summaryrefslogtreecommitdiffstats
path: root/src/gallium/drivers
diff options
context:
space:
mode:
authorThomas Balling Sørensen <tball@tball-laptop.(none)>2010-10-26 12:49:41 +0200
committerThomas Balling Sørensen <tball@tball-laptop.(none)>2010-10-26 12:49:41 +0200
commitdbf3a15313eed930a3d8fdde12e457259c43651b (patch)
tree78bc54fa866ff1e97e61802f52dabe3f4f3380b1 /src/gallium/drivers
parent1dccc4cfaa423f15ab582d2a0253a84a0ae0b9fa (diff)
parent547e7619aac74ae13bdaa7fdf403a4ceb5212467 (diff)
Merge branch 'master' into pipe-video
Conflicts: src/gallium/include/pipe/p_format.h
Diffstat (limited to 'src/gallium/drivers')
-rw-r--r--src/gallium/drivers/galahad/glhd_context.c4
-rw-r--r--src/gallium/drivers/galahad/glhd_screen.c2
-rw-r--r--src/gallium/drivers/llvmpipe/Makefile7
-rw-r--r--src/gallium/drivers/llvmpipe/SConscript11
-rw-r--r--src/gallium/drivers/llvmpipe/lp_bld_alpha.c6
-rw-r--r--src/gallium/drivers/llvmpipe/lp_bld_alpha.h3
-rw-r--r--src/gallium/drivers/llvmpipe/lp_bld_depth.c412
-rw-r--r--src/gallium/drivers/llvmpipe/lp_bld_depth.h22
-rw-r--r--src/gallium/drivers/llvmpipe/lp_bld_interp.c45
-rw-r--r--src/gallium/drivers/llvmpipe/lp_bld_interp.h32
-rw-r--r--src/gallium/drivers/llvmpipe/lp_context.c3
-rw-r--r--src/gallium/drivers/llvmpipe/lp_context.h10
-rw-r--r--src/gallium/drivers/llvmpipe/lp_flush.h1
-rw-r--r--src/gallium/drivers/llvmpipe/lp_jit.c12
-rw-r--r--src/gallium/drivers/llvmpipe/lp_jit.h2
-rw-r--r--src/gallium/drivers/llvmpipe/lp_limits.h10
-rw-r--r--src/gallium/drivers/llvmpipe/lp_rast.c64
-rw-r--r--src/gallium/drivers/llvmpipe/lp_rast.h64
-rw-r--r--src/gallium/drivers/llvmpipe/lp_rast_debug.c40
-rw-r--r--src/gallium/drivers/llvmpipe/lp_rast_priv.h19
-rw-r--r--src/gallium/drivers/llvmpipe/lp_rast_tri.c286
-rw-r--r--src/gallium/drivers/llvmpipe/lp_rast_tri_tmp.h136
-rw-r--r--src/gallium/drivers/llvmpipe/lp_scene.c4
-rw-r--r--src/gallium/drivers/llvmpipe/lp_scene.h28
-rw-r--r--src/gallium/drivers/llvmpipe/lp_screen.c2
-rw-r--r--src/gallium/drivers/llvmpipe/lp_setup.c127
-rw-r--r--src/gallium/drivers/llvmpipe/lp_setup.h29
-rw-r--r--src/gallium/drivers/llvmpipe/lp_setup_coef.c279
-rw-r--r--src/gallium/drivers/llvmpipe/lp_setup_coef.h64
-rw-r--r--src/gallium/drivers/llvmpipe/lp_setup_coef_intrin.c228
-rw-r--r--src/gallium/drivers/llvmpipe/lp_setup_context.h18
-rw-r--r--src/gallium/drivers/llvmpipe/lp_setup_line.c184
-rw-r--r--src/gallium/drivers/llvmpipe/lp_setup_point.c227
-rw-r--r--src/gallium/drivers/llvmpipe/lp_setup_tri.c513
-rw-r--r--src/gallium/drivers/llvmpipe/lp_setup_vbuf.c6
-rw-r--r--src/gallium/drivers/llvmpipe/lp_state.h3
-rw-r--r--src/gallium/drivers/llvmpipe/lp_state_derived.c76
-rw-r--r--src/gallium/drivers/llvmpipe/lp_state_fs.c524
-rw-r--r--src/gallium/drivers/llvmpipe/lp_state_fs.h7
-rw-r--r--src/gallium/drivers/llvmpipe/lp_state_sampler.c6
-rw-r--r--src/gallium/drivers/llvmpipe/lp_state_setup.c759
-rw-r--r--src/gallium/drivers/llvmpipe/lp_state_setup.h80
-rw-r--r--src/gallium/drivers/llvmpipe/lp_test_round.c29
-rw-r--r--src/gallium/drivers/llvmpipe/lp_test_sincos.c5
-rw-r--r--src/gallium/drivers/llvmpipe/lp_tile_soa.py267
-rw-r--r--src/gallium/drivers/nouveau/nouveau_screen.c2
-rw-r--r--src/gallium/drivers/nv50/nv50_context.h3
-rw-r--r--src/gallium/drivers/nv50/nv50_surface.c2
-rw-r--r--src/gallium/drivers/nv50/nv50_transfer.c10
-rw-r--r--src/gallium/drivers/r300/r300_debug.c1
-rw-r--r--src/gallium/drivers/r300/r300_fs.c5
-rw-r--r--src/gallium/drivers/r300/r300_screen.c11
-rw-r--r--src/gallium/drivers/r300/r300_screen.h1
-rw-r--r--src/gallium/drivers/r300/r300_state_derived.c13
-rw-r--r--src/gallium/drivers/r300/r300_vs.c5
-rw-r--r--src/gallium/drivers/r600/Makefile4
-rw-r--r--src/gallium/drivers/r600/SConscript1
-rw-r--r--src/gallium/drivers/r600/eg_asm.c7
-rw-r--r--src/gallium/drivers/r600/eg_state_inlines.h61
-rw-r--r--src/gallium/drivers/r600/evergreen_state.c471
-rw-r--r--src/gallium/drivers/r600/evergreend.h45
-rw-r--r--src/gallium/drivers/r600/r600.h13
-rw-r--r--src/gallium/drivers/r600/r600_asm.c43
-rw-r--r--src/gallium/drivers/r600/r600_asm.h6
-rw-r--r--src/gallium/drivers/r600/r600_blit.c103
-rw-r--r--src/gallium/drivers/r600/r600_buffer.c2
-rw-r--r--src/gallium/drivers/r600/r600_formats.h56
-rw-r--r--src/gallium/drivers/r600/r600_opcodes.h8
-rw-r--r--src/gallium/drivers/r600/r600_pipe.c28
-rw-r--r--src/gallium/drivers/r600/r600_pipe.h81
-rw-r--r--src/gallium/drivers/r600/r600_resource.h24
-rw-r--r--src/gallium/drivers/r600/r600_shader.c224
-rw-r--r--src/gallium/drivers/r600/r600_shader.h3
-rw-r--r--src/gallium/drivers/r600/r600_state.c375
-rw-r--r--src/gallium/drivers/r600/r600_state_common.c276
-rw-r--r--src/gallium/drivers/r600/r600_state_inlines.h59
-rw-r--r--src/gallium/drivers/r600/r600_texture.c380
-rw-r--r--src/gallium/drivers/r600/r600_translate.c211
-rw-r--r--src/gallium/drivers/r600/r600d.h47
-rw-r--r--src/gallium/drivers/softpipe/sp_fs_exec.c12
-rw-r--r--src/gallium/drivers/softpipe/sp_fs_sse.c12
-rw-r--r--src/gallium/drivers/softpipe/sp_quad.h1
-rw-r--r--src/gallium/drivers/softpipe/sp_quad_depth_test.c61
-rw-r--r--src/gallium/drivers/softpipe/sp_quad_pipe.c3
-rw-r--r--src/gallium/drivers/softpipe/sp_screen.c11
-rw-r--r--src/gallium/drivers/softpipe/sp_tex_sample.c57
-rw-r--r--src/gallium/drivers/softpipe/sp_tile_cache.h2
87 files changed, 4596 insertions, 2800 deletions
diff --git a/src/gallium/drivers/galahad/glhd_context.c b/src/gallium/drivers/galahad/glhd_context.c
index ff6d2aa00ab..50f66079c2a 100644
--- a/src/gallium/drivers/galahad/glhd_context.c
+++ b/src/gallium/drivers/galahad/glhd_context.c
@@ -641,7 +641,7 @@ galahad_set_index_buffer(struct pipe_context *_pipe,
break;
default:
glhd_warn("index buffer %p has unrecognized index size %d",
- _ib->buffer, _ib->index_size);
+ (void *) _ib->buffer, _ib->index_size);
break;
}
}
@@ -1013,7 +1013,7 @@ galahad_context_create(struct pipe_screen *_screen, struct pipe_context *pipe)
glhd_pipe->pipe = pipe;
- glhd_warn("Created context %p", glhd_pipe);
+ glhd_warn("Created context %p", (void *) glhd_pipe);
return &glhd_pipe->base;
}
diff --git a/src/gallium/drivers/galahad/glhd_screen.c b/src/gallium/drivers/galahad/glhd_screen.c
index 288941b1066..b6cc41d908b 100644
--- a/src/gallium/drivers/galahad/glhd_screen.c
+++ b/src/gallium/drivers/galahad/glhd_screen.c
@@ -370,7 +370,7 @@ galahad_screen_create(struct pipe_screen *screen)
glhd_screen->screen = screen;
- glhd_warn("Created screen %p", glhd_screen);
+ glhd_warn("Created screen %p", (void *) glhd_screen);
return &glhd_screen->base;
}
diff --git a/src/gallium/drivers/llvmpipe/Makefile b/src/gallium/drivers/llvmpipe/Makefile
index 55b877b4ab9..08da2286b05 100644
--- a/src/gallium/drivers/llvmpipe/Makefile
+++ b/src/gallium/drivers/llvmpipe/Makefile
@@ -28,8 +28,6 @@ C_SOURCES = \
lp_scene_queue.c \
lp_screen.c \
lp_setup.c \
- lp_setup_coef.c \
- lp_setup_coef_intrin.c \
lp_setup_line.c \
lp_setup_point.c \
lp_setup_tri.c \
@@ -38,6 +36,7 @@ C_SOURCES = \
lp_state_clip.c \
lp_state_derived.c \
lp_state_fs.c \
+ lp_state_setup.c \
lp_state_gs.c \
lp_state_rasterizer.c \
lp_state_sampler.c \
@@ -63,12 +62,12 @@ PROGS := lp_test_format \
# Need this for the lp_test_*.o files
CLEAN_EXTRA = *.o
+include ../../Makefile.template
+
lp_test_sincos.o : sse_mathfun.h
PROGS_DEPS := ../../auxiliary/libgallium.a
-include ../../Makefile.template
-
lp_tile_soa.c: lp_tile_soa.py ../../auxiliary/util/u_format_parse.py ../../auxiliary/util/u_format_pack.py ../../auxiliary/util/u_format.csv
python lp_tile_soa.py ../../auxiliary/util/u_format.csv > $@
diff --git a/src/gallium/drivers/llvmpipe/SConscript b/src/gallium/drivers/llvmpipe/SConscript
index 650435f0f19..49950153a4f 100644
--- a/src/gallium/drivers/llvmpipe/SConscript
+++ b/src/gallium/drivers/llvmpipe/SConscript
@@ -27,13 +27,7 @@ env.Depends('lp_tile_soa.c', [
])
-# Only enable SSSE3 for lp_tile_soa_sse3.c
-ssse3_env = env.Clone()
-if env['gcc'] \
- and distutils.version.LooseVersion(env['CCVERSION']) >= distutils.version.LooseVersion('4.3') \
- and env['machine'] in ('x86', 'x86_64') :
- ssse3_env.Append(CCFLAGS = ['-mssse3'])
-lp_tile_soa_os = ssse3_env.SharedObject('lp_tile_soa.c')
+lp_tile_soa_os = env.SharedObject('lp_tile_soa.c')
llvmpipe = env.ConvenienceLibrary(
@@ -64,13 +58,12 @@ llvmpipe = env.ConvenienceLibrary(
'lp_setup_line.c',
'lp_setup_point.c',
'lp_setup_tri.c',
- 'lp_setup_coef.c',
- 'lp_setup_coef_intrin.c',
'lp_setup_vbuf.c',
'lp_state_blend.c',
'lp_state_clip.c',
'lp_state_derived.c',
'lp_state_fs.c',
+ 'lp_state_setup.c',
'lp_state_gs.c',
'lp_state_rasterizer.c',
'lp_state_sampler.c',
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_alpha.c b/src/gallium/drivers/llvmpipe/lp_bld_alpha.c
index e28efe778f9..e50643790c8 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_alpha.c
+++ b/src/gallium/drivers/llvmpipe/lp_bld_alpha.c
@@ -48,7 +48,8 @@ lp_build_alpha_test(LLVMBuilderRef builder,
struct lp_type type,
struct lp_build_mask_context *mask,
LLVMValueRef alpha,
- LLVMValueRef ref)
+ LLVMValueRef ref,
+ boolean do_branch)
{
struct lp_build_context bld;
LLVMValueRef test;
@@ -60,4 +61,7 @@ lp_build_alpha_test(LLVMBuilderRef builder,
lp_build_name(test, "alpha_mask");
lp_build_mask_update(mask, test);
+
+ if (do_branch)
+ lp_build_mask_check(mask);
}
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_alpha.h b/src/gallium/drivers/llvmpipe/lp_bld_alpha.h
index 44603b418c0..27ca8aad4d4 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_alpha.h
+++ b/src/gallium/drivers/llvmpipe/lp_bld_alpha.h
@@ -48,7 +48,8 @@ lp_build_alpha_test(LLVMBuilderRef builder,
struct lp_type type,
struct lp_build_mask_context *mask,
LLVMValueRef alpha,
- LLVMValueRef ref);
+ LLVMValueRef ref,
+ boolean do_branch);
#endif /* !LP_BLD_ALPHA_H */
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_depth.c b/src/gallium/drivers/llvmpipe/lp_bld_depth.c
index 7561899a74e..7eb76d4fb31 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_depth.c
+++ b/src/gallium/drivers/llvmpipe/lp_bld_depth.c
@@ -1,6 +1,6 @@
/**************************************************************************
*
- * Copyright 2009 VMware, Inc.
+ * Copyright 2009-2010 VMware, Inc.
* All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
@@ -53,15 +53,8 @@
* ... ... ... ... ... ... ... ... ...
*
*
- * Stencil test:
- * Two-sided stencil test is supported but probably not as efficient as
- * it could be. Currently, we use if/then/else constructs to do the
- * operations for front vs. back-facing polygons. We could probably do
- * both the front and back arithmetic then use a Select() instruction to
- * choose the result depending on polyon orientation. We'd have to
- * measure performance both ways and see which is better.
- *
* @author Jose Fonseca <[email protected]>
+ * @author Brian Paul <[email protected]>
*/
#include "pipe/p_state.h"
@@ -71,6 +64,7 @@
#include "gallivm/lp_bld_arit.h"
#include "gallivm/lp_bld_bitarit.h"
#include "gallivm/lp_bld_const.h"
+#include "gallivm/lp_bld_conv.h"
#include "gallivm/lp_bld_logic.h"
#include "gallivm/lp_bld_flow.h"
#include "gallivm/lp_bld_intr.h"
@@ -128,57 +122,32 @@ lp_build_stencil_test_single(struct lp_build_context *bld,
/**
* Do the one or two-sided stencil test comparison.
* \sa lp_build_stencil_test_single
- * \param face an integer indicating front (+) or back (-) facing polygon.
- * If NULL, assume front-facing.
+ * \param front_facing an integer vector mask, indicating front (~0) or back
+ * (0) facing polygon. If NULL, assume front-facing.
*/
static LLVMValueRef
lp_build_stencil_test(struct lp_build_context *bld,
const struct pipe_stencil_state stencil[2],
LLVMValueRef stencilRefs[2],
LLVMValueRef stencilVals,
- LLVMValueRef face)
+ LLVMValueRef front_facing)
{
LLVMValueRef res;
assert(stencil[0].enabled);
- if (stencil[1].enabled && face) {
- /* do two-sided test */
- struct lp_build_flow_context *flow_ctx;
- struct lp_build_if_state if_ctx;
- LLVMValueRef front_facing;
- LLVMValueRef zero = LLVMConstReal(LLVMFloatType(), 0.0);
- LLVMValueRef result = bld->undef;
-
- flow_ctx = lp_build_flow_create(bld->builder);
- lp_build_flow_scope_begin(flow_ctx);
+ /* do front face test */
+ res = lp_build_stencil_test_single(bld, &stencil[0],
+ stencilRefs[0], stencilVals);
- lp_build_flow_scope_declare(flow_ctx, &result);
+ if (stencil[1].enabled && front_facing) {
+ /* do back face test */
+ LLVMValueRef back_res;
- /* front_facing = face > 0.0 */
- front_facing = LLVMBuildFCmp(bld->builder, LLVMRealUGT, face, zero, "");
-
- lp_build_if(&if_ctx, flow_ctx, bld->builder, front_facing);
- {
- result = lp_build_stencil_test_single(bld, &stencil[0],
- stencilRefs[0], stencilVals);
- }
- lp_build_else(&if_ctx);
- {
- result = lp_build_stencil_test_single(bld, &stencil[1],
- stencilRefs[1], stencilVals);
- }
- lp_build_endif(&if_ctx);
+ back_res = lp_build_stencil_test_single(bld, &stencil[1],
+ stencilRefs[1], stencilVals);
- lp_build_flow_scope_end(flow_ctx);
- lp_build_flow_destroy(flow_ctx);
-
- res = result;
- }
- else {
- /* do single-side test */
- res = lp_build_stencil_test_single(bld, &stencil[0],
- stencilRefs[0], stencilVals);
+ res = lp_build_select(bld, front_facing, res, back_res);
}
return res;
@@ -195,14 +164,12 @@ lp_build_stencil_op_single(struct lp_build_context *bld,
const struct pipe_stencil_state *stencil,
enum stencil_op op,
LLVMValueRef stencilRef,
- LLVMValueRef stencilVals,
- LLVMValueRef mask)
+ LLVMValueRef stencilVals)
{
- const unsigned stencilMax = 255; /* XXX fix */
struct lp_type type = bld->type;
LLVMValueRef res;
- LLVMValueRef max = lp_build_const_int_vec(type, stencilMax);
+ LLVMValueRef max = lp_build_const_int_vec(type, 0xff);
unsigned stencil_op;
assert(type.sign);
@@ -255,19 +222,7 @@ lp_build_stencil_op_single(struct lp_build_context *bld,
break;
default:
assert(0 && "bad stencil op mode");
- res = NULL;
- }
-
- if (stencil->writemask != stencilMax) {
- /* mask &= stencil->writemask */
- LLVMValueRef writemask = lp_build_const_int_vec(type, stencil->writemask);
- mask = LLVMBuildAnd(bld->builder, mask, writemask, "");
- /* res = (res & mask) | (stencilVals & ~mask) */
- res = lp_build_select_bitwise(bld, writemask, res, stencilVals);
- }
- else {
- /* res = mask ? res : stencilVals */
- res = lp_build_select(bld, mask, res, stencilVals);
+ res = bld->undef;
}
return res;
@@ -284,49 +239,40 @@ lp_build_stencil_op(struct lp_build_context *bld,
LLVMValueRef stencilRefs[2],
LLVMValueRef stencilVals,
LLVMValueRef mask,
- LLVMValueRef face)
+ LLVMValueRef front_facing)
{
- assert(stencil[0].enabled);
-
- if (stencil[1].enabled && face) {
- /* do two-sided op */
- struct lp_build_flow_context *flow_ctx;
- struct lp_build_if_state if_ctx;
- LLVMValueRef front_facing;
- LLVMValueRef zero = LLVMConstReal(LLVMFloatType(), 0.0);
- LLVMValueRef result = bld->undef;
+ LLVMValueRef res;
- flow_ctx = lp_build_flow_create(bld->builder);
- lp_build_flow_scope_begin(flow_ctx);
+ assert(stencil[0].enabled);
- lp_build_flow_scope_declare(flow_ctx, &result);
+ /* do front face op */
+ res = lp_build_stencil_op_single(bld, &stencil[0], op,
+ stencilRefs[0], stencilVals);
- /* front_facing = face > 0.0 */
- front_facing = LLVMBuildFCmp(bld->builder, LLVMRealUGT, face, zero, "");
+ if (stencil[1].enabled && front_facing) {
+ /* do back face op */
+ LLVMValueRef back_res;
- lp_build_if(&if_ctx, flow_ctx, bld->builder, front_facing);
- {
- result = lp_build_stencil_op_single(bld, &stencil[0], op,
- stencilRefs[0], stencilVals, mask);
- }
- lp_build_else(&if_ctx);
- {
- result = lp_build_stencil_op_single(bld, &stencil[1], op,
- stencilRefs[1], stencilVals, mask);
- }
- lp_build_endif(&if_ctx);
+ back_res = lp_build_stencil_op_single(bld, &stencil[1], op,
+ stencilRefs[1], stencilVals);
- lp_build_flow_scope_end(flow_ctx);
- lp_build_flow_destroy(flow_ctx);
+ res = lp_build_select(bld, front_facing, res, back_res);
+ }
- return result;
+ if (stencil->writemask != 0xff) {
+ /* mask &= stencil->writemask */
+ LLVMValueRef writemask = lp_build_const_int_vec(bld->type, stencil->writemask);
+ mask = LLVMBuildAnd(bld->builder, mask, writemask, "");
+ /* res = (res & mask) | (stencilVals & ~mask) */
+ res = lp_build_select_bitwise(bld, writemask, res, stencilVals);
}
else {
- /* do single-sided op */
- return lp_build_stencil_op_single(bld, &stencil[0], op,
- stencilRefs[0], stencilVals, mask);
+ /* res = mask ? res : stencilVals */
+ res = lp_build_select(bld, mask, res, stencilVals);
}
+
+ return res;
}
@@ -358,8 +304,13 @@ lp_depth_type(const struct util_format_description *format_desc,
}
else if(format_desc->channel[swizzle].type == UTIL_FORMAT_TYPE_UNSIGNED) {
assert(format_desc->block.bits <= 32);
- if(format_desc->channel[swizzle].normalized)
- type.norm = TRUE;
+ assert(format_desc->channel[swizzle].normalized);
+ if (format_desc->channel[swizzle].size < format_desc->block.bits) {
+ /* Prefer signed integers when possible, as SSE has less support
+ * for unsigned comparison;
+ */
+ type.sign = TRUE;
+ }
}
else
assert(0);
@@ -381,7 +332,7 @@ lp_depth_type(const struct util_format_description *format_desc,
*/
static boolean
get_z_shift_and_mask(const struct util_format_description *format_desc,
- unsigned *shift, unsigned *mask)
+ unsigned *shift, unsigned *width, unsigned *mask)
{
const unsigned total_bits = format_desc->block.bits;
unsigned z_swizzle;
@@ -397,12 +348,14 @@ get_z_shift_and_mask(const struct util_format_description *format_desc,
if (z_swizzle == UTIL_FORMAT_SWIZZLE_NONE)
return FALSE;
+ *width = format_desc->channel[z_swizzle].size;
+
padding_right = 0;
for (chan = 0; chan < z_swizzle; ++chan)
padding_right += format_desc->channel[chan].size;
padding_left =
- total_bits - (padding_right + format_desc->channel[z_swizzle].size);
+ total_bits - (padding_right + *width);
if (padding_left || padding_right) {
unsigned long long mask_left = (1ULL << (total_bits - padding_left)) - 1;
@@ -413,7 +366,7 @@ get_z_shift_and_mask(const struct util_format_description *format_desc,
*mask = 0xffffffff;
}
- *shift = padding_left;
+ *shift = padding_right;
return TRUE;
}
@@ -457,7 +410,7 @@ get_s_shift_and_mask(const struct util_format_description *format_desc,
* \param maskvalue is the depth test mask.
* \param counter is a pointer of the uint32 counter.
*/
-static void
+void
lp_build_occlusion_count(LLVMBuilderRef builder,
struct lp_type type,
LLVMValueRef maskvalue,
@@ -494,31 +447,57 @@ lp_build_occlusion_count(LLVMBuilderRef builder,
* \param format_desc description of the depth/stencil surface
* \param mask the alive/dead pixel mask for the quad (vector)
* \param stencil_refs the front/back stencil ref values (scalar)
- * \param z_src the incoming depth/stencil values (a 2x2 quad)
+ * \param z_src the incoming depth/stencil values (a 2x2 quad, float32)
* \param zs_dst_ptr pointer to depth/stencil values in framebuffer
- * \param facing contains float value indicating front/back facing polygon
+ * \param facing contains boolean value indicating front/back facing polygon
*/
void
lp_build_depth_stencil_test(LLVMBuilderRef builder,
const struct pipe_depth_state *depth,
const struct pipe_stencil_state stencil[2],
- struct lp_type type,
+ struct lp_type z_src_type,
const struct util_format_description *format_desc,
struct lp_build_mask_context *mask,
LLVMValueRef stencil_refs[2],
LLVMValueRef z_src,
LLVMValueRef zs_dst_ptr,
LLVMValueRef face,
- LLVMValueRef counter)
+ LLVMValueRef *zs_value,
+ boolean do_branch)
{
- struct lp_build_context bld;
- struct lp_build_context sbld;
+ struct lp_type z_type;
+ struct lp_build_context z_bld;
+ struct lp_build_context s_bld;
struct lp_type s_type;
+ unsigned z_shift = 0, z_width = 0, z_mask = 0;
LLVMValueRef zs_dst, z_dst = NULL;
LLVMValueRef stencil_vals = NULL;
LLVMValueRef z_bitmask = NULL, stencil_shift = NULL;
LLVMValueRef z_pass = NULL, s_pass_mask = NULL;
- LLVMValueRef orig_mask = mask->value;
+ LLVMValueRef orig_mask = lp_build_mask_value(mask);
+ LLVMValueRef front_facing = NULL;
+
+
+ /*
+ * Depths are expected to be between 0 and 1, even if they are stored in
+ * floats. Setting these bits here will ensure that the lp_build_conv() call
+ * below won't try to unnecessarily clamp the incoming values.
+ */
+ if(z_src_type.floating) {
+ z_src_type.sign = FALSE;
+ z_src_type.norm = TRUE;
+ }
+ else {
+ assert(!z_src_type.sign);
+ assert(z_src_type.norm);
+ }
+
+ /* Pick the depth type. */
+ z_type = lp_depth_type(format_desc, z_src_type.width*z_src_type.length);
+
+ /* FIXME: Cope with a depth test type with a different bit width. */
+ assert(z_type.width == z_src_type.width);
+ assert(z_type.length == z_src_type.length);
/* Sanity checking */
{
@@ -540,8 +519,8 @@ lp_build_depth_stencil_test(LLVMBuilderRef builder,
}
assert(z_swizzle < 4);
- assert(format_desc->block.bits == type.width);
- if (type.floating) {
+ assert(format_desc->block.bits == z_type.width);
+ if (z_type.floating) {
assert(z_swizzle == 0);
assert(format_desc->channel[z_swizzle].type ==
UTIL_FORMAT_TYPE_FLOAT);
@@ -552,54 +531,56 @@ lp_build_depth_stencil_test(LLVMBuilderRef builder,
assert(format_desc->channel[z_swizzle].type ==
UTIL_FORMAT_TYPE_UNSIGNED);
assert(format_desc->channel[z_swizzle].normalized);
- assert(!type.fixed);
- assert(!type.sign);
- assert(type.norm);
+ assert(!z_type.fixed);
}
}
/* Setup build context for Z vals */
- lp_build_context_init(&bld, builder, type);
+ lp_build_context_init(&z_bld, builder, z_type);
/* Setup build context for stencil vals */
- s_type = lp_type_int_vec(type.width);
- lp_build_context_init(&sbld, builder, s_type);
+ s_type = lp_type_int_vec(z_type.width);
+ lp_build_context_init(&s_bld, builder, s_type);
/* Load current z/stencil value from z/stencil buffer */
+ zs_dst_ptr = LLVMBuildBitCast(builder,
+ zs_dst_ptr,
+ LLVMPointerType(z_bld.vec_type, 0), "");
zs_dst = LLVMBuildLoad(builder, zs_dst_ptr, "");
- lp_build_name(zs_dst, "zsbufval");
+ lp_build_name(zs_dst, "zs_dst");
/* Compute and apply the Z/stencil bitmasks and shifts.
*/
{
- unsigned z_shift, z_mask;
unsigned s_shift, s_mask;
- if (get_z_shift_and_mask(format_desc, &z_shift, &z_mask)) {
- if (z_shift) {
- LLVMValueRef shift = lp_build_const_int_vec(type, z_shift);
- z_src = LLVMBuildLShr(builder, z_src, shift, "");
- }
-
+ if (get_z_shift_and_mask(format_desc, &z_shift, &z_width, &z_mask)) {
if (z_mask != 0xffffffff) {
- LLVMValueRef mask = lp_build_const_int_vec(type, z_mask);
- z_src = LLVMBuildAnd(builder, z_src, mask, "");
- z_dst = LLVMBuildAnd(builder, zs_dst, mask, "");
- z_bitmask = mask; /* used below */
+ z_bitmask = lp_build_const_int_vec(z_type, z_mask);
}
- else {
+
+ /*
+ * Align the framebuffer Z 's LSB to the right.
+ */
+ if (z_shift) {
+ LLVMValueRef shift = lp_build_const_int_vec(z_type, z_shift);
+ z_dst = LLVMBuildLShr(builder, zs_dst, shift, "z_dst");
+ } else if (z_bitmask) {
+ /* TODO: Instead of loading a mask from memory and ANDing, it's
+ * probably faster to just shake the bits with two shifts. */
+ z_dst = LLVMBuildAnd(builder, zs_dst, z_bitmask, "z_dst");
+ } else {
z_dst = zs_dst;
+ lp_build_name(z_dst, "z_dst");
}
-
- lp_build_name(z_dst, "zsbuf.z");
}
if (get_s_shift_and_mask(format_desc, &s_shift, &s_mask)) {
if (s_shift) {
- LLVMValueRef shift = lp_build_const_int_vec(type, s_shift);
+ LLVMValueRef shift = lp_build_const_int_vec(s_type, s_shift);
stencil_vals = LLVMBuildLShr(builder, zs_dst, shift, "");
stencil_shift = shift; /* used below */
}
@@ -608,35 +589,85 @@ lp_build_depth_stencil_test(LLVMBuilderRef builder,
}
if (s_mask != 0xffffffff) {
- LLVMValueRef mask = lp_build_const_int_vec(type, s_mask);
+ LLVMValueRef mask = lp_build_const_int_vec(s_type, s_mask);
stencil_vals = LLVMBuildAnd(builder, stencil_vals, mask, "");
}
- lp_build_name(stencil_vals, "stencil");
+ lp_build_name(stencil_vals, "s_dst");
}
}
-
if (stencil[0].enabled) {
+
+ if (face) {
+ LLVMValueRef zero = LLVMConstInt(LLVMInt32Type(), 0, 0);
+
+ /* front_facing = face != 0 ? ~0 : 0 */
+ front_facing = LLVMBuildICmp(builder, LLVMIntNE, face, zero, "");
+ front_facing = LLVMBuildSExt(builder, front_facing,
+ LLVMIntType(s_bld.type.length*s_bld.type.width),
+ "");
+ front_facing = LLVMBuildBitCast(builder, front_facing,
+ s_bld.int_vec_type, "");
+ }
+
/* convert scalar stencil refs into vectors */
- stencil_refs[0] = lp_build_broadcast_scalar(&bld, stencil_refs[0]);
- stencil_refs[1] = lp_build_broadcast_scalar(&bld, stencil_refs[1]);
+ stencil_refs[0] = lp_build_broadcast_scalar(&s_bld, stencil_refs[0]);
+ stencil_refs[1] = lp_build_broadcast_scalar(&s_bld, stencil_refs[1]);
- s_pass_mask = lp_build_stencil_test(&sbld, stencil,
- stencil_refs, stencil_vals, face);
+ s_pass_mask = lp_build_stencil_test(&s_bld, stencil,
+ stencil_refs, stencil_vals,
+ front_facing);
/* apply stencil-fail operator */
{
- LLVMValueRef s_fail_mask = lp_build_andnot(&bld, orig_mask, s_pass_mask);
- stencil_vals = lp_build_stencil_op(&sbld, stencil, S_FAIL_OP,
+ LLVMValueRef s_fail_mask = lp_build_andnot(&s_bld, orig_mask, s_pass_mask);
+ stencil_vals = lp_build_stencil_op(&s_bld, stencil, S_FAIL_OP,
stencil_refs, stencil_vals,
- s_fail_mask, face);
+ s_fail_mask, front_facing);
}
}
if (depth->enabled) {
+ /*
+ * Convert fragment Z to the desired type, aligning the LSB to the right.
+ */
+
+ assert(z_type.width == z_src_type.width);
+ assert(z_type.length == z_src_type.length);
+ assert(lp_check_value(z_src_type, z_src));
+ if (z_src_type.floating) {
+ /*
+ * Convert from floating point values
+ */
+
+ if (!z_type.floating) {
+ z_src = lp_build_clamped_float_to_unsigned_norm(builder,
+ z_src_type,
+ z_width,
+ z_src);
+ }
+ } else {
+ /*
+ * Convert from unsigned normalized values.
+ */
+
+ assert(!z_src_type.sign);
+ assert(!z_src_type.fixed);
+ assert(z_src_type.norm);
+ assert(!z_type.floating);
+ if (z_src_type.width > z_width) {
+ LLVMValueRef shift = lp_build_const_int_vec(z_src_type,
+ z_src_type.width - z_width);
+ z_src = LLVMBuildLShr(builder, z_src, shift, "");
+ }
+ }
+ assert(lp_check_value(z_type, z_src));
+
+ lp_build_name(z_src, "z_src");
+
/* compare src Z to dst Z, returning 'pass' mask */
- z_pass = lp_build_cmp(&bld, depth->func, z_src, z_dst);
+ z_pass = lp_build_cmp(&z_bld, depth->func, z_src, z_dst);
if (!stencil[0].enabled) {
/* We can potentially skip all remaining operations here, but only
@@ -644,28 +675,28 @@ lp_build_depth_stencil_test(LLVMBuilderRef builder,
* buffer values. Don't need to update Z buffer values.
*/
lp_build_mask_update(mask, z_pass);
+
+ if (do_branch) {
+ lp_build_mask_check(mask);
+ do_branch = FALSE;
+ }
}
if (depth->writemask) {
- LLVMValueRef zselectmask = mask->value;
+ LLVMValueRef zselectmask;
/* mask off bits that failed Z test */
- zselectmask = LLVMBuildAnd(builder, zselectmask, z_pass, "");
+ zselectmask = LLVMBuildAnd(builder, orig_mask, z_pass, "");
/* mask off bits that failed stencil test */
if (s_pass_mask) {
zselectmask = LLVMBuildAnd(builder, zselectmask, s_pass_mask, "");
}
- /* if combined Z/stencil format, mask off the stencil bits */
- if (z_bitmask) {
- zselectmask = LLVMBuildAnd(builder, zselectmask, z_bitmask, "");
- }
-
/* Mix the old and new Z buffer values.
- * z_dst[i] = (zselectmask[i] & z_src[i]) | (~zselectmask[i] & z_dst[i])
+ * z_dst[i] = zselectmask[i] ? z_src[i] : z_dst[i]
*/
- z_dst = lp_build_select_bitwise(&bld, zselectmask, z_src, z_dst);
+ z_dst = lp_build_select(&z_bld, zselectmask, z_src, z_dst);
}
if (stencil[0].enabled) {
@@ -673,33 +704,35 @@ lp_build_depth_stencil_test(LLVMBuilderRef builder,
LLVMValueRef z_fail_mask, z_pass_mask;
/* apply Z-fail operator */
- z_fail_mask = lp_build_andnot(&bld, orig_mask, z_pass);
- stencil_vals = lp_build_stencil_op(&sbld, stencil, Z_FAIL_OP,
+ z_fail_mask = lp_build_andnot(&z_bld, orig_mask, z_pass);
+ stencil_vals = lp_build_stencil_op(&s_bld, stencil, Z_FAIL_OP,
stencil_refs, stencil_vals,
- z_fail_mask, face);
+ z_fail_mask, front_facing);
/* apply Z-pass operator */
- z_pass_mask = LLVMBuildAnd(bld.builder, orig_mask, z_pass, "");
- stencil_vals = lp_build_stencil_op(&sbld, stencil, Z_PASS_OP,
+ z_pass_mask = LLVMBuildAnd(z_bld.builder, orig_mask, z_pass, "");
+ stencil_vals = lp_build_stencil_op(&s_bld, stencil, Z_PASS_OP,
stencil_refs, stencil_vals,
- z_pass_mask, face);
+ z_pass_mask, front_facing);
}
}
else {
/* No depth test: apply Z-pass operator to stencil buffer values which
* passed the stencil test.
*/
- s_pass_mask = LLVMBuildAnd(bld.builder, orig_mask, s_pass_mask, "");
- stencil_vals = lp_build_stencil_op(&sbld, stencil, Z_PASS_OP,
+ s_pass_mask = LLVMBuildAnd(s_bld.builder, orig_mask, s_pass_mask, "");
+ stencil_vals = lp_build_stencil_op(&s_bld, stencil, Z_PASS_OP,
stencil_refs, stencil_vals,
- s_pass_mask, face);
+ s_pass_mask, front_facing);
}
- /* The Z bits are already in the right place but we may need to shift the
- * stencil bits before ORing Z with Stencil to make the final pixel value.
- */
+ /* Put Z and ztencil bits in the right place */
+ if (z_dst && z_shift) {
+ LLVMValueRef shift = lp_build_const_int_vec(z_type, z_shift);
+ z_dst = LLVMBuildShl(builder, z_dst, shift, "");
+ }
if (stencil_vals && stencil_shift)
- stencil_vals = LLVMBuildShl(bld.builder, stencil_vals,
+ stencil_vals = LLVMBuildShl(s_bld.builder, stencil_vals,
stencil_shift, "");
/* Finally, merge/store the z/stencil values */
@@ -707,13 +740,13 @@ lp_build_depth_stencil_test(LLVMBuilderRef builder,
(stencil[0].enabled && stencil[0].writemask)) {
if (z_dst && stencil_vals)
- zs_dst = LLVMBuildOr(bld.builder, z_dst, stencil_vals, "");
+ zs_dst = LLVMBuildOr(z_bld.builder, z_dst, stencil_vals, "");
else if (z_dst)
zs_dst = z_dst;
else
zs_dst = stencil_vals;
- LLVMBuildStore(builder, zs_dst, zs_dst_ptr);
+ *zs_value = zs_dst;
}
if (s_pass_mask)
@@ -722,6 +755,47 @@ lp_build_depth_stencil_test(LLVMBuilderRef builder,
if (depth->enabled && stencil[0].enabled)
lp_build_mask_update(mask, z_pass);
- if (counter)
- lp_build_occlusion_count(builder, type, mask->value, counter);
+ if (do_branch)
+ lp_build_mask_check(mask);
+
+}
+
+
+void
+lp_build_depth_write(LLVMBuilderRef builder,
+ const struct util_format_description *format_desc,
+ LLVMValueRef zs_dst_ptr,
+ LLVMValueRef zs_value)
+{
+ zs_dst_ptr = LLVMBuildBitCast(builder, zs_dst_ptr,
+ LLVMPointerType(LLVMTypeOf(zs_value), 0), "");
+
+ LLVMBuildStore(builder, zs_value, zs_dst_ptr);
+}
+
+
+void
+lp_build_deferred_depth_write(LLVMBuilderRef builder,
+ struct lp_type z_src_type,
+ const struct util_format_description *format_desc,
+ struct lp_build_mask_context *mask,
+ LLVMValueRef zs_dst_ptr,
+ LLVMValueRef zs_value)
+{
+ struct lp_type z_type;
+ struct lp_build_context z_bld;
+ LLVMValueRef z_dst;
+
+ /* XXX: pointlessly redo type logic:
+ */
+ z_type = lp_depth_type(format_desc, z_src_type.width*z_src_type.length);
+ lp_build_context_init(&z_bld, builder, z_type);
+
+ zs_dst_ptr = LLVMBuildBitCast(builder, zs_dst_ptr,
+ LLVMPointerType(z_bld.vec_type, 0), "");
+
+ z_dst = LLVMBuildLoad(builder, zs_dst_ptr, "zsbufval");
+ z_dst = lp_build_select(&z_bld, lp_build_mask_value(mask), zs_value, z_dst);
+
+ LLVMBuildStore(builder, z_dst, zs_dst_ptr);
}
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_depth.h b/src/gallium/drivers/llvmpipe/lp_bld_depth.h
index e257a5bd7d0..a54ef3a711e 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_depth.h
+++ b/src/gallium/drivers/llvmpipe/lp_bld_depth.h
@@ -61,7 +61,27 @@ lp_build_depth_stencil_test(LLVMBuilderRef builder,
LLVMValueRef zs_src,
LLVMValueRef zs_dst_ptr,
LLVMValueRef facing,
- LLVMValueRef counter);
+ LLVMValueRef *zs_value,
+ boolean do_branch);
+void
+lp_build_depth_write(LLVMBuilderRef builder,
+ const struct util_format_description *format_desc,
+ LLVMValueRef zs_dst_ptr,
+ LLVMValueRef zs_value);
+
+void
+lp_build_deferred_depth_write(LLVMBuilderRef builder,
+ struct lp_type z_src_type,
+ const struct util_format_description *format_desc,
+ struct lp_build_mask_context *mask,
+ LLVMValueRef zs_dst_ptr,
+ LLVMValueRef zs_value);
+
+void
+lp_build_occlusion_count(LLVMBuilderRef builder,
+ struct lp_type type,
+ LLVMValueRef maskvalue,
+ LLVMValueRef counter);
#endif /* !LP_BLD_DEPTH_H */
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_interp.c b/src/gallium/drivers/llvmpipe/lp_bld_interp.c
index 2a374f8c390..c9da8900d0c 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_interp.c
+++ b/src/gallium/drivers/llvmpipe/lp_bld_interp.c
@@ -206,7 +206,7 @@ coeffs_init(struct lp_build_interp_soa_context *bld,
dadq2 = LLVMBuildFAdd(builder, dadq, dadq, "");
/*
- * a = a0 + x * dadx + y * dady
+ * a = a0 + (x * dadx + y * dady)
*/
if (attrib == 0 && chan == 0) {
@@ -219,11 +219,11 @@ coeffs_init(struct lp_build_interp_soa_context *bld,
a = a0;
if (interp != LP_INTERP_CONSTANT &&
interp != LP_INTERP_FACING) {
- LLVMValueRef tmp;
- tmp = LLVMBuildFMul(builder, bld->x, dadx, "");
- a = LLVMBuildFAdd(builder, a, tmp, "");
- tmp = LLVMBuildFMul(builder, bld->y, dady, "");
- a = LLVMBuildFAdd(builder, a, tmp, "");
+ LLVMValueRef ax, ay, axy;
+ ax = LLVMBuildFMul(builder, bld->x, dadx, "");
+ ay = LLVMBuildFMul(builder, bld->y, dady, "");
+ axy = LLVMBuildFAdd(builder, ax, ay, "");
+ a = LLVMBuildFAdd(builder, a, axy, "");
}
}
@@ -272,7 +272,10 @@ coeffs_init(struct lp_build_interp_soa_context *bld,
* This is called when we move from one quad to the next.
*/
static void
-attribs_update(struct lp_build_interp_soa_context *bld, int quad_index)
+attribs_update(struct lp_build_interp_soa_context *bld,
+ int quad_index,
+ int start,
+ int end)
{
struct lp_build_context *coeff_bld = &bld->coeff_bld;
LLVMValueRef shuffle = lp_build_const_int_vec(coeff_bld->type, quad_index);
@@ -282,7 +285,7 @@ attribs_update(struct lp_build_interp_soa_context *bld, int quad_index)
assert(quad_index < 4);
- for(attrib = 0; attrib < bld->num_attribs; ++attrib) {
+ for(attrib = start; attrib < end; ++attrib) {
const unsigned mask = bld->mask[attrib];
const unsigned interp = bld->interp[attrib];
for(chan = 0; chan < NUM_CHANNELS; ++chan) {
@@ -350,6 +353,14 @@ attribs_update(struct lp_build_interp_soa_context *bld, int quad_index)
}
#endif
+ if (attrib == 0 && chan == 2) {
+ /* FIXME: Depth values can exceed 1.0, due to the fact that
+ * setup interpolation coefficients refer to (0,0) which causes
+ * precision loss. So we must clamp to 1.0 here to avoid artifacts
+ */
+ a = lp_build_min(coeff_bld, a, coeff_bld->one);
+ }
+
attrib_name(a, attrib, chan, "");
}
bld->attribs[attrib][chan] = a;
@@ -434,8 +445,6 @@ lp_build_interp_soa_init(struct lp_build_interp_soa_context *bld,
pos_init(bld, x0, y0);
coeffs_init(bld, a0_ptr, dadx_ptr, dady_ptr);
-
- attribs_update(bld, 0);
}
@@ -443,10 +452,20 @@ lp_build_interp_soa_init(struct lp_build_interp_soa_context *bld,
* Advance the position and inputs to the given quad within the block.
*/
void
-lp_build_interp_soa_update(struct lp_build_interp_soa_context *bld,
- int quad_index)
+lp_build_interp_soa_update_inputs(struct lp_build_interp_soa_context *bld,
+ int quad_index)
+{
+ assert(quad_index < 4);
+
+ attribs_update(bld, quad_index, 1, bld->num_attribs);
+}
+
+void
+lp_build_interp_soa_update_pos(struct lp_build_interp_soa_context *bld,
+ int quad_index)
{
assert(quad_index < 4);
- attribs_update(bld, quad_index);
+ attribs_update(bld, quad_index, 0, 1);
}
+
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_interp.h b/src/gallium/drivers/llvmpipe/lp_bld_interp.h
index 3054030f739..a7ebdd1bfa2 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_interp.h
+++ b/src/gallium/drivers/llvmpipe/lp_bld_interp.h
@@ -46,7 +46,31 @@
#include "tgsi/tgsi_exec.h"
-#include "lp_setup.h"
+/**
+ * Describes how to compute the interpolation coefficients (a0, dadx, dady)
+ * from the vertices passed into our triangle/line/point functions by the
+ * draw module.
+ *
+ * Vertices are treated as an array of float[4] values, indexed by
+ * src_index.
+ *
+ * LP_INTERP_COLOR is translated to either LP_INTERP_CONSTANT or
+ * LINEAR depending on flatshade state.
+ */
+enum lp_interp {
+ LP_INTERP_CONSTANT,
+ LP_INTERP_COLOR,
+ LP_INTERP_LINEAR,
+ LP_INTERP_PERSPECTIVE,
+ LP_INTERP_POSITION,
+ LP_INTERP_FACING
+};
+
+struct lp_shader_input {
+ ushort interp:4; /* enum lp_interp */
+ ushort usage_mask:4; /* bitmask of TGSI_WRITEMASK_x flags */
+ ushort src_index:8; /* where to find values in incoming vertices */
+};
struct lp_build_interp_soa_context
@@ -89,7 +113,11 @@ lp_build_interp_soa_init(struct lp_build_interp_soa_context *bld,
LLVMValueRef y);
void
-lp_build_interp_soa_update(struct lp_build_interp_soa_context *bld,
+lp_build_interp_soa_update_inputs(struct lp_build_interp_soa_context *bld,
+ int quad_index);
+
+void
+lp_build_interp_soa_update_pos(struct lp_build_interp_soa_context *bld,
int quad_index);
diff --git a/src/gallium/drivers/llvmpipe/lp_context.c b/src/gallium/drivers/llvmpipe/lp_context.c
index 39f2c6085ef..763432ed712 100644
--- a/src/gallium/drivers/llvmpipe/lp_context.c
+++ b/src/gallium/drivers/llvmpipe/lp_context.c
@@ -82,6 +82,8 @@ static void llvmpipe_destroy( struct pipe_context *pipe )
}
}
+ lp_delete_setup_variants(llvmpipe);
+
align_free( llvmpipe );
}
@@ -108,6 +110,7 @@ llvmpipe_create_context( struct pipe_screen *screen, void *priv )
memset(llvmpipe, 0, sizeof *llvmpipe);
make_empty_list(&llvmpipe->fs_variants_list);
+ make_empty_list(&llvmpipe->setup_variants_list);
llvmpipe->pipe.winsys = screen->winsys;
llvmpipe->pipe.screen = screen;
diff --git a/src/gallium/drivers/llvmpipe/lp_context.h b/src/gallium/drivers/llvmpipe/lp_context.h
index 34fa20e204a..db09c95b272 100644
--- a/src/gallium/drivers/llvmpipe/lp_context.h
+++ b/src/gallium/drivers/llvmpipe/lp_context.h
@@ -39,6 +39,7 @@
#include "lp_jit.h"
#include "lp_setup.h"
#include "lp_state_fs.h"
+#include "lp_state_setup.h"
struct llvmpipe_vbuf_render;
@@ -48,6 +49,7 @@ struct lp_fragment_shader;
struct lp_vertex_shader;
struct lp_blend_state;
struct lp_setup_context;
+struct lp_setup_variant;
struct lp_velems_state;
struct llvmpipe_context {
@@ -105,12 +107,9 @@ struct llvmpipe_context {
/** Which vertex shader output slot contains point size */
int psize_slot;
- /** Fragment shader input interpolation info */
- unsigned num_inputs;
- struct lp_shader_input inputs[PIPE_MAX_SHADER_INPUTS];
-
/** The tiling engine */
struct lp_setup_context *setup;
+ struct lp_setup_variant setup_variant;
/** The primitive drawing context */
struct draw_context *draw;
@@ -120,6 +119,9 @@ struct llvmpipe_context {
struct lp_fs_variant_list_item fs_variants_list;
unsigned nr_fs_variants;
+
+ struct lp_setup_variant_list_item setup_variants_list;
+ unsigned nr_setup_variants;
};
diff --git a/src/gallium/drivers/llvmpipe/lp_flush.h b/src/gallium/drivers/llvmpipe/lp_flush.h
index bb538b2bd83..3626ce4a86c 100644
--- a/src/gallium/drivers/llvmpipe/lp_flush.h
+++ b/src/gallium/drivers/llvmpipe/lp_flush.h
@@ -32,6 +32,7 @@
struct pipe_context;
struct pipe_fence_handle;
+struct pipe_resource;
void
llvmpipe_flush(struct pipe_context *pipe,
diff --git a/src/gallium/drivers/llvmpipe/lp_jit.c b/src/gallium/drivers/llvmpipe/lp_jit.c
index 04b12dedccf..c540f9b3628 100644
--- a/src/gallium/drivers/llvmpipe/lp_jit.c
+++ b/src/gallium/drivers/llvmpipe/lp_jit.c
@@ -36,7 +36,6 @@
#include <llvm-c/Transforms/Scalar.h>
#include "util/u_memory.h"
-#include "util/u_cpu_detect.h"
#include "gallivm/lp_bld_init.h"
#include "gallivm/lp_bld_debug.h"
#include "lp_screen.h"
@@ -162,9 +161,6 @@ lp_jit_init_globals(struct llvmpipe_screen *screen)
void
lp_jit_screen_cleanup(struct llvmpipe_screen *screen)
{
- if(screen->engine)
- LLVMDisposeExecutionEngine(screen->engine);
-
if(screen->pass)
LLVMDisposePassManager(screen->pass);
}
@@ -190,13 +186,7 @@ lp_jit_screen_init(struct llvmpipe_screen *screen)
LLVMAddCFGSimplificationPass(screen->pass);
LLVMAddPromoteMemoryToRegisterPass(screen->pass);
LLVMAddConstantPropagationPass(screen->pass);
- if(util_cpu_caps.has_sse4_1) {
- /* FIXME: There is a bug in this pass, whereby the combination of fptosi
- * and sitofp (necessary for trunc/floor/ceil/round implementation)
- * somehow becomes invalid code.
- */
- LLVMAddInstructionCombiningPass(screen->pass);
- }
+ LLVMAddInstructionCombiningPass(screen->pass);
LLVMAddGVNPass(screen->pass);
} else {
/* We need at least this pass to prevent the backends to fail in
diff --git a/src/gallium/drivers/llvmpipe/lp_jit.h b/src/gallium/drivers/llvmpipe/lp_jit.h
index 16e04fce0cd..114f21f2d16 100644
--- a/src/gallium/drivers/llvmpipe/lp_jit.h
+++ b/src/gallium/drivers/llvmpipe/lp_jit.h
@@ -144,7 +144,7 @@ typedef void
(*lp_jit_frag_func)(const struct lp_jit_context *context,
uint32_t x,
uint32_t y,
- float facing,
+ uint32_t facing,
const void *a0,
const void *dadx,
const void *dady,
diff --git a/src/gallium/drivers/llvmpipe/lp_limits.h b/src/gallium/drivers/llvmpipe/lp_limits.h
index d1c431475d8..2538164ffaa 100644
--- a/src/gallium/drivers/llvmpipe/lp_limits.h
+++ b/src/gallium/drivers/llvmpipe/lp_limits.h
@@ -72,4 +72,14 @@
*/
#define LP_MAX_SHADER_VARIANTS 1024
+/**
+ * Max number of setup variants that will be kept around.
+ *
+ * These are determined by the combination of the fragment shader
+ * input signature and a small amount of rasterization state (eg
+ * flatshading). It is likely that many active fragment shaders will
+ * share the same setup variant.
+ */
+#define LP_MAX_SETUP_VARIANTS 64
+
#endif /* LP_LIMITS_H */
diff --git a/src/gallium/drivers/llvmpipe/lp_rast.c b/src/gallium/drivers/llvmpipe/lp_rast.c
index d7e6415e139..d358a983943 100644
--- a/src/gallium/drivers/llvmpipe/lp_rast.c
+++ b/src/gallium/drivers/llvmpipe/lp_rast.c
@@ -211,8 +211,8 @@ lp_rast_clear_zstencil(struct lp_rasterizer_task *task,
const union lp_rast_cmd_arg arg)
{
const struct lp_scene *scene = task->scene;
- unsigned clear_value = arg.clear_zstencil.value;
- unsigned clear_mask = arg.clear_zstencil.mask;
+ uint32_t clear_value = arg.clear_zstencil.value;
+ uint32_t clear_mask = arg.clear_zstencil.mask;
const unsigned height = TILE_SIZE / TILE_VECTOR_HEIGHT;
const unsigned width = TILE_SIZE * TILE_VECTOR_HEIGHT;
const unsigned block_size = scene->zsbuf.blocksize;
@@ -220,7 +220,8 @@ lp_rast_clear_zstencil(struct lp_rasterizer_task *task,
uint8_t *dst;
unsigned i, j;
- LP_DBG(DEBUG_RAST, "%s 0x%x%x\n", __FUNCTION__, clear_value, clear_mask);
+ LP_DBG(DEBUG_RAST, "%s: value=0x%08x, mask=0x%08x\n",
+ __FUNCTION__, clear_value, clear_mask);
/*
* Clear the aera of the swizzled depth/depth buffer matching this tile, in
@@ -232,16 +233,31 @@ lp_rast_clear_zstencil(struct lp_rasterizer_task *task,
dst = task->depth_tile;
+ clear_value &= clear_mask;
+
switch (block_size) {
case 1:
+ assert(clear_mask == 0xff);
memset(dst, (uint8_t) clear_value, height * width);
break;
case 2:
- for (i = 0; i < height; i++) {
- uint16_t *row = (uint16_t *)dst;
- for (j = 0; j < width; j++)
- *row++ = (uint16_t) clear_value;
- dst += dst_stride;
+ if (clear_mask == 0xffff) {
+ for (i = 0; i < height; i++) {
+ uint16_t *row = (uint16_t *)dst;
+ for (j = 0; j < width; j++)
+ *row++ = (uint16_t) clear_value;
+ dst += dst_stride;
+ }
+ }
+ else {
+ for (i = 0; i < height; i++) {
+ uint16_t *row = (uint16_t *)dst;
+ for (j = 0; j < width; j++) {
+ uint16_t tmp = ~clear_mask & *row;
+ *row++ = clear_value | tmp;
+ }
+ dst += dst_stride;
+ }
}
break;
case 4:
@@ -258,7 +274,7 @@ lp_rast_clear_zstencil(struct lp_rasterizer_task *task,
uint32_t *row = (uint32_t *)dst;
for (j = 0; j < width; j++) {
uint32_t tmp = ~clear_mask & *row;
- *row++ = (clear_value & clear_mask) | tmp;
+ *row++ = clear_value | tmp;
}
dst += dst_stride;
}
@@ -318,7 +334,7 @@ lp_rast_shade_tile(struct lp_rasterizer_task *task,
{
const struct lp_scene *scene = task->scene;
const struct lp_rast_shader_inputs *inputs = arg.shade_tile;
- const struct lp_rast_state *state = inputs->state;
+ const struct lp_rast_state *state = task->state;
struct lp_fragment_shader_variant *variant = state->variant;
const unsigned tile_x = task->x, tile_y = task->y;
unsigned x, y;
@@ -349,10 +365,10 @@ lp_rast_shade_tile(struct lp_rasterizer_task *task,
BEGIN_JIT_CALL(state);
variant->jit_function[RAST_WHOLE]( &state->jit_context,
tile_x + x, tile_y + y,
- inputs->facing,
- inputs->a0,
- inputs->dadx,
- inputs->dady,
+ inputs->frontfacing,
+ GET_A0(inputs),
+ GET_DADX(inputs),
+ GET_DADY(inputs),
color,
depth,
0xffff,
@@ -398,7 +414,7 @@ lp_rast_shade_quads_mask(struct lp_rasterizer_task *task,
unsigned x, unsigned y,
unsigned mask)
{
- const struct lp_rast_state *state = inputs->state;
+ const struct lp_rast_state *state = task->state;
struct lp_fragment_shader_variant *variant = state->variant;
const struct lp_scene *scene = task->scene;
uint8_t *color[PIPE_MAX_COLOR_BUFS];
@@ -430,10 +446,10 @@ lp_rast_shade_quads_mask(struct lp_rasterizer_task *task,
BEGIN_JIT_CALL(state);
variant->jit_function[RAST_EDGE_TEST](&state->jit_context,
x, y,
- inputs->facing,
- inputs->a0,
- inputs->dadx,
- inputs->dady,
+ inputs->frontfacing,
+ GET_A0(inputs),
+ GET_DADX(inputs),
+ GET_DADY(inputs),
color,
depth,
mask,
@@ -474,6 +490,14 @@ lp_rast_end_query(struct lp_rasterizer_task *task,
}
+void
+lp_rast_set_state(struct lp_rasterizer_task *task,
+ const union lp_rast_cmd_arg arg)
+{
+ task->state = arg.state;
+}
+
+
/**
* Set top row and left column of the tile's pixels to white. For debugging.
@@ -581,10 +605,12 @@ static lp_rast_cmd_func dispatch[LP_RAST_OP_MAX] =
lp_rast_triangle_8,
lp_rast_triangle_3_4,
lp_rast_triangle_3_16,
+ lp_rast_triangle_4_16,
lp_rast_shade_tile,
lp_rast_shade_tile_opaque,
lp_rast_begin_query,
lp_rast_end_query,
+ lp_rast_set_state,
};
diff --git a/src/gallium/drivers/llvmpipe/lp_rast.h b/src/gallium/drivers/llvmpipe/lp_rast.h
index c55b97a9d13..a64c152cf83 100644
--- a/src/gallium/drivers/llvmpipe/lp_rast.h
+++ b/src/gallium/drivers/llvmpipe/lp_rast.h
@@ -78,30 +78,28 @@ struct lp_rast_state {
* These pointers point into the bin data buffer.
*/
struct lp_rast_shader_inputs {
- float facing; /** Positive for front-facing, negative for back-facing */
- unsigned disable:1; /** Partially binned, disable this command */
- unsigned opaque:1; /** Is opaque */
-
- float (*a0)[4];
- float (*dadx)[4];
- float (*dady)[4];
-
- const struct lp_rast_state *state;
+ unsigned frontfacing:1; /** True for front-facing */
+ unsigned disable:1; /** Partially binned, disable this command */
+ unsigned opaque:1; /** Is opaque */
+ unsigned pad0:29; /* wasted space */
+ unsigned stride; /* how much to advance data between a0, dadx, dady */
+ unsigned pad2; /* wasted space */
+ unsigned pad3; /* wasted space */
+ /* followed by a0, dadx, dady and planes[] */
};
-
+/* Note: the order of these values is important as they are loaded by
+ * sse code in rasterization:
+ */
struct lp_rast_plane {
- /* one-pixel sized trivial accept offsets for each plane */
- int ei;
-
- /* one-pixel sized trivial reject offsets for each plane */
- int eo;
-
/* edge function values at minx,miny ?? */
int c;
int dcdx;
int dcdy;
+
+ /* one-pixel sized trivial reject offsets for each plane */
+ int eo;
};
/**
@@ -111,17 +109,24 @@ struct lp_rast_plane {
* Objects of this type are put into the lp_setup_context::data buffer.
*/
struct lp_rast_triangle {
- /* inputs for the shader */
- struct lp_rast_shader_inputs inputs;
-
#ifdef DEBUG
float v[3][2];
+ float pad0;
+ float pad1;
#endif
- struct lp_rast_plane plane[8]; /* NOTE: may allocate fewer planes */
+ /* inputs for the shader */
+ struct lp_rast_shader_inputs inputs;
+ /* planes are also allocated here */
};
+#define GET_A0(inputs) ((float (*)[4])((inputs)+1))
+#define GET_DADX(inputs) ((float (*)[4])((char *)((inputs) + 1) + (inputs)->stride))
+#define GET_DADY(inputs) ((float (*)[4])((char *)((inputs) + 1) + 2 * (inputs)->stride))
+#define GET_PLANES(tri) ((struct lp_rast_plane *)((char *)(&(tri)->inputs + 1) + 3 * (tri)->inputs.stride))
+
+
struct lp_rasterizer *
lp_rast_create( unsigned num_threads );
@@ -149,9 +154,10 @@ union lp_rast_cmd_arg {
const struct lp_rast_state *set_state;
uint8_t clear_color[4];
struct {
- unsigned value;
- unsigned mask;
+ uint32_t value;
+ uint32_t mask;
} clear_zstencil;
+ const struct lp_rast_state *state;
struct lp_fence *fence;
struct llvmpipe_query *query_obj;
};
@@ -238,12 +244,14 @@ lp_rast_arg_null( void )
#define LP_RAST_OP_TRIANGLE_8 0x9
#define LP_RAST_OP_TRIANGLE_3_4 0xa
#define LP_RAST_OP_TRIANGLE_3_16 0xb
-#define LP_RAST_OP_SHADE_TILE 0xc
-#define LP_RAST_OP_SHADE_TILE_OPAQUE 0xd
-#define LP_RAST_OP_BEGIN_QUERY 0xe
-#define LP_RAST_OP_END_QUERY 0xf
-
-#define LP_RAST_OP_MAX 0x10
+#define LP_RAST_OP_TRIANGLE_4_16 0xc
+#define LP_RAST_OP_SHADE_TILE 0xd
+#define LP_RAST_OP_SHADE_TILE_OPAQUE 0xe
+#define LP_RAST_OP_BEGIN_QUERY 0xf
+#define LP_RAST_OP_END_QUERY 0x10
+#define LP_RAST_OP_SET_STATE 0x11
+
+#define LP_RAST_OP_MAX 0x12
#define LP_RAST_OP_MASK 0xff
void
diff --git a/src/gallium/drivers/llvmpipe/lp_rast_debug.c b/src/gallium/drivers/llvmpipe/lp_rast_debug.c
index 9fc78645a3a..64ac616f629 100644
--- a/src/gallium/drivers/llvmpipe/lp_rast_debug.c
+++ b/src/gallium/drivers/llvmpipe/lp_rast_debug.c
@@ -12,6 +12,7 @@ static INLINE int u_bit_scan(unsigned *mask)
struct tile {
int coverage;
int overdraw;
+ const struct lp_rast_state *state;
char data[TILE_SIZE][TILE_SIZE];
};
@@ -42,10 +43,12 @@ static const char *cmd_names[LP_RAST_OP_MAX] =
"triangle_8",
"triangle_3_4",
"triangle_3_16",
+ "triangle_4_16",
"shade_tile",
"shade_tile_opaque",
"begin_query",
"end_query",
+ "set_state",
};
static const char *cmd_name(unsigned cmd)
@@ -55,31 +58,31 @@ static const char *cmd_name(unsigned cmd)
}
static const struct lp_fragment_shader_variant *
-get_variant( const struct cmd_block *block,
- int k )
+get_variant( const struct lp_rast_state *state,
+ const struct cmd_block *block,
+ int k )
{
if (block->cmd[k] == LP_RAST_OP_SHADE_TILE ||
- block->cmd[k] == LP_RAST_OP_SHADE_TILE_OPAQUE)
- return block->arg[k].shade_tile->state->variant;
-
- if (block->cmd[k] == LP_RAST_OP_TRIANGLE_1 ||
+ block->cmd[k] == LP_RAST_OP_SHADE_TILE_OPAQUE ||
+ block->cmd[k] == LP_RAST_OP_TRIANGLE_1 ||
block->cmd[k] == LP_RAST_OP_TRIANGLE_2 ||
block->cmd[k] == LP_RAST_OP_TRIANGLE_3 ||
block->cmd[k] == LP_RAST_OP_TRIANGLE_4 ||
block->cmd[k] == LP_RAST_OP_TRIANGLE_5 ||
block->cmd[k] == LP_RAST_OP_TRIANGLE_6 ||
block->cmd[k] == LP_RAST_OP_TRIANGLE_7)
- return block->arg[k].triangle.tri->inputs.state->variant;
+ return state->variant;
return NULL;
}
static boolean
-is_blend( const struct cmd_block *block,
+is_blend( const struct lp_rast_state *state,
+ const struct cmd_block *block,
int k )
{
- const struct lp_fragment_shader_variant *variant = get_variant(block, k);
+ const struct lp_fragment_shader_variant *variant = get_variant(state, block, k);
if (variant)
return variant->key.blend.rt[0].blend_enable;
@@ -92,6 +95,7 @@ is_blend( const struct cmd_block *block,
static void
debug_bin( const struct cmd_bin *bin )
{
+ const struct lp_rast_state *state = NULL;
const struct cmd_block *head = bin->head;
int i, j = 0;
@@ -99,9 +103,12 @@ debug_bin( const struct cmd_bin *bin )
while (head) {
for (i = 0; i < head->count; i++, j++) {
+ if (head->cmd[i] == LP_RAST_OP_SET_STATE)
+ state = head->arg[i].state;
+
debug_printf("%d: %s %s\n", j,
cmd_name(head->cmd[i]),
- is_blend(head, i) ? "blended" : "");
+ is_blend(state, head, i) ? "blended" : "");
}
head = head->next;
}
@@ -133,7 +140,7 @@ debug_shade_tile(int x, int y,
char val)
{
const struct lp_rast_shader_inputs *inputs = arg.shade_tile;
- boolean blend = inputs->state->variant->key.blend.rt[0].blend_enable;
+ boolean blend = tile->state->variant->key.blend.rt[0].blend_enable;
unsigned i,j;
if (inputs->disable)
@@ -171,11 +178,12 @@ debug_triangle(int tilex, int tiley,
{
const struct lp_rast_triangle *tri = arg.triangle.tri;
unsigned plane_mask = arg.triangle.plane_mask;
+ const struct lp_rast_plane *tri_plane = GET_PLANES(tri);
struct lp_rast_plane plane[8];
int x, y;
int count = 0;
unsigned i, nr_planes = 0;
- boolean blend = tri->inputs.state->variant->key.blend.rt[0].blend_enable;
+ boolean blend = tile->state->variant->key.blend.rt[0].blend_enable;
if (tri->inputs.disable) {
/* This triangle was partially binned and has been disabled */
@@ -183,7 +191,7 @@ debug_triangle(int tilex, int tiley,
}
while (plane_mask) {
- plane[nr_planes] = tri->plane[u_bit_scan(&plane_mask)];
+ plane[nr_planes] = tri_plane[u_bit_scan(&plane_mask)];
plane[nr_planes].c = (plane[nr_planes].c +
plane[nr_planes].dcdy * tiley -
plane[nr_planes].dcdx * tilex);
@@ -232,15 +240,19 @@ do_debug_bin( struct tile *tile,
memset(tile->data, ' ', sizeof tile->data);
tile->coverage = 0;
tile->overdraw = 0;
+ tile->state = NULL;
for (block = bin->head; block; block = block->next) {
for (k = 0; k < block->count; k++, j++) {
- boolean blend = is_blend(block, k);
+ boolean blend = is_blend(tile->state, block, k);
char val = get_label(j);
int count = 0;
if (print_cmds)
debug_printf("%c: %15s", val, cmd_name(block->cmd[k]));
+
+ if (block->cmd[k] == LP_RAST_OP_SET_STATE)
+ tile->state = block->arg[k].state;
if (block->cmd[k] == LP_RAST_OP_CLEAR_COLOR ||
block->cmd[k] == LP_RAST_OP_CLEAR_ZSTENCIL)
diff --git a/src/gallium/drivers/llvmpipe/lp_rast_priv.h b/src/gallium/drivers/llvmpipe/lp_rast_priv.h
index 7370119e966..b30408f097b 100644
--- a/src/gallium/drivers/llvmpipe/lp_rast_priv.h
+++ b/src/gallium/drivers/llvmpipe/lp_rast_priv.h
@@ -77,6 +77,7 @@ struct cmd_bin;
struct lp_rasterizer_task
{
const struct cmd_bin *bin;
+ const struct lp_rast_state *state;
struct lp_scene *scene;
unsigned x, y; /**< Pos of this tile in framebuffer, in pixels */
@@ -244,7 +245,7 @@ lp_rast_shade_quads_all( struct lp_rasterizer_task *task,
unsigned x, unsigned y )
{
const struct lp_scene *scene = task->scene;
- const struct lp_rast_state *state = inputs->state;
+ const struct lp_rast_state *state = task->state;
struct lp_fragment_shader_variant *variant = state->variant;
uint8_t *color[PIPE_MAX_COLOR_BUFS];
void *depth;
@@ -260,10 +261,10 @@ lp_rast_shade_quads_all( struct lp_rasterizer_task *task,
BEGIN_JIT_CALL(state);
variant->jit_function[RAST_WHOLE]( &state->jit_context,
x, y,
- inputs->facing,
- inputs->a0,
- inputs->dadx,
- inputs->dady,
+ inputs->frontfacing,
+ GET_A0(inputs),
+ GET_DADX(inputs),
+ GET_DADY(inputs),
color,
depth,
0xffff,
@@ -293,6 +294,14 @@ void lp_rast_triangle_3_4(struct lp_rasterizer_task *,
void lp_rast_triangle_3_16( struct lp_rasterizer_task *,
const union lp_rast_cmd_arg );
+
+void lp_rast_triangle_4_16( struct lp_rasterizer_task *,
+ const union lp_rast_cmd_arg );
+
+void
+lp_rast_set_state(struct lp_rasterizer_task *task,
+ const union lp_rast_cmd_arg arg);
+
void
lp_debug_bin( const struct cmd_bin *bin );
diff --git a/src/gallium/drivers/llvmpipe/lp_rast_tri.c b/src/gallium/drivers/llvmpipe/lp_rast_tri.c
index a1f309d4b01..042c315635e 100644
--- a/src/gallium/drivers/llvmpipe/lp_rast_tri.c
+++ b/src/gallium/drivers/llvmpipe/lp_rast_tri.c
@@ -123,6 +123,16 @@ lp_rast_triangle_3_16(struct lp_rasterizer_task *task,
}
void
+lp_rast_triangle_4_16(struct lp_rasterizer_task *task,
+ const union lp_rast_cmd_arg arg)
+{
+ union lp_rast_cmd_arg arg2;
+ arg2.triangle.tri = arg.triangle.tri;
+ arg2.triangle.plane_mask = (1<<4)-1;
+ lp_rast_triangle_3(task, arg2);
+}
+
+void
lp_rast_triangle_3_4(struct lp_rasterizer_task *task,
const union lp_rast_cmd_arg arg)
{
@@ -230,144 +240,207 @@ sign_bits4(const __m128i *cstep, int cdiff)
}
-/* Special case for 3 plane triangle which is contained entirely
- * within a 16x16 block.
- */
+#define NR_PLANES 3
+
+
+
+
+
+
+
void
lp_rast_triangle_3_16(struct lp_rasterizer_task *task,
const union lp_rast_cmd_arg arg)
{
const struct lp_rast_triangle *tri = arg.triangle.tri;
- const struct lp_rast_plane *plane = tri->plane;
- unsigned mask = arg.triangle.plane_mask;
- const int x = task->x + (mask & 0xff);
- const int y = task->y + (mask >> 8);
- unsigned outmask, inmask, partmask, partial_mask;
- unsigned j;
- __m128i cstep4[3][4];
-
- outmask = 0; /* outside one or more trivial reject planes */
- partmask = 0; /* outside one or more trivial accept planes */
-
- for (j = 0; j < 3; j++) {
- const int dcdx = -plane[j].dcdx * 4;
- const int dcdy = plane[j].dcdy * 4;
- __m128i xdcdy = _mm_set1_epi32(dcdy);
-
- cstep4[j][0] = _mm_setr_epi32(0, dcdx, dcdx*2, dcdx*3);
- cstep4[j][1] = _mm_add_epi32(cstep4[j][0], xdcdy);
- cstep4[j][2] = _mm_add_epi32(cstep4[j][1], xdcdy);
- cstep4[j][3] = _mm_add_epi32(cstep4[j][2], xdcdy);
-
- {
- const int c = plane[j].c + plane[j].dcdy * y - plane[j].dcdx * x;
- const int cox = plane[j].eo * 4;
- const int cio = plane[j].ei * 4 - 1;
-
- outmask |= sign_bits4(cstep4[j], c + cox);
- partmask |= sign_bits4(cstep4[j], c + cio);
- }
- }
+ const struct lp_rast_plane *plane = GET_PLANES(tri);
+ int x = (arg.triangle.plane_mask & 0xff) + task->x;
+ int y = (arg.triangle.plane_mask >> 8) + task->y;
+ unsigned i, j;
+
+ struct { unsigned mask:16; unsigned i:8; unsigned j:8; } out[16];
+ unsigned nr = 0;
+
+ __m128i p0 = _mm_load_si128((__m128i *)&plane[0]); /* c, dcdx, dcdy, eo */
+ __m128i p1 = _mm_load_si128((__m128i *)&plane[1]); /* c, dcdx, dcdy, eo */
+ __m128i p2 = _mm_load_si128((__m128i *)&plane[2]); /* c, dcdx, dcdy, eo */
+ __m128i zero = _mm_setzero_si128();
+
+ __m128i c;
+ __m128i dcdx;
+ __m128i dcdy;
+ __m128i rej4;
+
+ __m128i dcdx2;
+ __m128i dcdx3;
+
+ __m128i span_0; /* 0,dcdx,2dcdx,3dcdx for plane 0 */
+ __m128i span_1; /* 0,dcdx,2dcdx,3dcdx for plane 1 */
+ __m128i span_2; /* 0,dcdx,2dcdx,3dcdx for plane 2 */
+ __m128i unused;
+
+ transpose4_epi32(&p0, &p1, &p2, &zero,
+ &c, &dcdx, &dcdy, &rej4);
+
+ /* Adjust dcdx;
+ */
+ dcdx = _mm_sub_epi32(zero, dcdx);
- if (outmask == 0xffff)
- return;
+ c = _mm_add_epi32(c, mm_mullo_epi32(dcdx, _mm_set1_epi32(x)));
+ c = _mm_add_epi32(c, mm_mullo_epi32(dcdy, _mm_set1_epi32(y)));
+ rej4 = _mm_slli_epi32(rej4, 2);
- /* Mask of sub-blocks which are inside all trivial accept planes:
- */
- inmask = ~partmask & 0xffff;
+ dcdx2 = _mm_add_epi32(dcdx, dcdx);
+ dcdx3 = _mm_add_epi32(dcdx2, dcdx);
- /* Mask of sub-blocks which are inside all trivial reject planes,
- * but outside at least one trivial accept plane:
- */
- partial_mask = partmask & ~outmask;
+ transpose4_epi32(&zero, &dcdx, &dcdx2, &dcdx3,
+ &span_0, &span_1, &span_2, &unused);
- assert((partial_mask & inmask) == 0);
+ for (i = 0; i < 4; i++) {
+ __m128i cx = c;
- /* Iterate over partials:
- */
- while (partial_mask) {
- int i = ffs(partial_mask) - 1;
- int ix = (i & 3) * 4;
- int iy = (i >> 2) * 4;
- int px = x + ix;
- int py = y + iy;
- unsigned mask = 0xffff;
-
- partial_mask &= ~(1 << i);
-
- for (j = 0; j < 3; j++) {
- const int cx = (plane[j].c
- - plane[j].dcdx * px
- + plane[j].dcdy * py) * 4;
-
- mask &= ~sign_bits4(cstep4[j], cx);
- }
+ for (j = 0; j < 4; j++) {
+ __m128i c4rej = _mm_add_epi32(cx, rej4);
+ __m128i rej_masks = _mm_srai_epi32(c4rej, 31);
- if (mask)
- lp_rast_shade_quads_mask(task, &tri->inputs, px, py, mask);
- }
+ /* if (is_zero(rej_masks)) */
+ if (_mm_movemask_epi8(rej_masks) == 0) {
+ __m128i c0_0 = _mm_add_epi32(SCALAR_EPI32(cx, 0), span_0);
+ __m128i c1_0 = _mm_add_epi32(SCALAR_EPI32(cx, 1), span_1);
+ __m128i c2_0 = _mm_add_epi32(SCALAR_EPI32(cx, 2), span_2);
- /* Iterate over fulls:
- */
- while (inmask) {
- int i = ffs(inmask) - 1;
- int ix = (i & 3) * 4;
- int iy = (i >> 2) * 4;
- int px = x + ix;
- int py = y + iy;
+ __m128i c_0 = _mm_or_si128(_mm_or_si128(c0_0, c1_0), c2_0);
+
+ __m128i c0_1 = _mm_add_epi32(c0_0, SCALAR_EPI32(dcdy, 0));
+ __m128i c1_1 = _mm_add_epi32(c1_0, SCALAR_EPI32(dcdy, 1));
+ __m128i c2_1 = _mm_add_epi32(c2_0, SCALAR_EPI32(dcdy, 2));
+
+ __m128i c_1 = _mm_or_si128(_mm_or_si128(c0_1, c1_1), c2_1);
+ __m128i c_01 = _mm_packs_epi32(c_0, c_1);
+
+ __m128i c0_2 = _mm_add_epi32(c0_1, SCALAR_EPI32(dcdy, 0));
+ __m128i c1_2 = _mm_add_epi32(c1_1, SCALAR_EPI32(dcdy, 1));
+ __m128i c2_2 = _mm_add_epi32(c2_1, SCALAR_EPI32(dcdy, 2));
+
+ __m128i c_2 = _mm_or_si128(_mm_or_si128(c0_2, c1_2), c2_2);
- inmask &= ~(1 << i);
+ __m128i c0_3 = _mm_add_epi32(c0_2, SCALAR_EPI32(dcdy, 0));
+ __m128i c1_3 = _mm_add_epi32(c1_2, SCALAR_EPI32(dcdy, 1));
+ __m128i c2_3 = _mm_add_epi32(c2_2, SCALAR_EPI32(dcdy, 2));
- block_full_4(task, tri, px, py);
+ __m128i c_3 = _mm_or_si128(_mm_or_si128(c0_3, c1_3), c2_3);
+ __m128i c_23 = _mm_packs_epi32(c_2, c_3);
+ __m128i c_0123 = _mm_packs_epi16(c_01, c_23);
+
+ unsigned mask = _mm_movemask_epi8(c_0123);
+
+ out[nr].i = i;
+ out[nr].j = j;
+ out[nr].mask = mask;
+ if (mask != 0xffff)
+ nr++;
+ }
+ cx = _mm_add_epi32(cx, _mm_slli_epi32(dcdx, 2));
+ }
+
+ c = _mm_add_epi32(c, _mm_slli_epi32(dcdy, 2));
}
+
+ for (i = 0; i < nr; i++)
+ lp_rast_shade_quads_mask(task,
+ &tri->inputs,
+ x + 4 * out[i].j,
+ y + 4 * out[i].i,
+ 0xffff & ~out[i].mask);
}
+
+
+
void
lp_rast_triangle_3_4(struct lp_rasterizer_task *task,
- const union lp_rast_cmd_arg arg)
+ const union lp_rast_cmd_arg arg)
{
const struct lp_rast_triangle *tri = arg.triangle.tri;
- const struct lp_rast_plane *plane = tri->plane;
- unsigned mask = arg.triangle.plane_mask;
- const int x = task->x + (mask & 0xff);
- const int y = task->y + (mask >> 8);
- unsigned j;
-
- /* Iterate over partials:
+ const struct lp_rast_plane *plane = GET_PLANES(tri);
+ int x = (arg.triangle.plane_mask & 0xff) + task->x;
+ int y = (arg.triangle.plane_mask >> 8) + task->y;
+
+ __m128i p0 = _mm_load_si128((__m128i *)&plane[0]); /* c, dcdx, dcdy, eo */
+ __m128i p1 = _mm_load_si128((__m128i *)&plane[1]); /* c, dcdx, dcdy, eo */
+ __m128i p2 = _mm_load_si128((__m128i *)&plane[2]); /* c, dcdx, dcdy, eo */
+ __m128i zero = _mm_setzero_si128();
+
+ __m128i c;
+ __m128i dcdx;
+ __m128i dcdy;
+
+ __m128i dcdx2;
+ __m128i dcdx3;
+
+ __m128i span_0; /* 0,dcdx,2dcdx,3dcdx for plane 0 */
+ __m128i span_1; /* 0,dcdx,2dcdx,3dcdx for plane 1 */
+ __m128i span_2; /* 0,dcdx,2dcdx,3dcdx for plane 2 */
+ __m128i unused;
+
+ transpose4_epi32(&p0, &p1, &p2, &zero,
+ &c, &dcdx, &dcdy, &unused);
+
+ /* Adjust dcdx;
*/
+ dcdx = _mm_sub_epi32(zero, dcdx);
+
+ c = _mm_add_epi32(c, mm_mullo_epi32(dcdx, _mm_set1_epi32(x)));
+ c = _mm_add_epi32(c, mm_mullo_epi32(dcdy, _mm_set1_epi32(y)));
+
+ dcdx2 = _mm_add_epi32(dcdx, dcdx);
+ dcdx3 = _mm_add_epi32(dcdx2, dcdx);
+
+ transpose4_epi32(&zero, &dcdx, &dcdx2, &dcdx3,
+ &span_0, &span_1, &span_2, &unused);
+
+
{
- unsigned mask = 0xffff;
+ __m128i c0_0 = _mm_add_epi32(SCALAR_EPI32(c, 0), span_0);
+ __m128i c1_0 = _mm_add_epi32(SCALAR_EPI32(c, 1), span_1);
+ __m128i c2_0 = _mm_add_epi32(SCALAR_EPI32(c, 2), span_2);
+
+ __m128i c_0 = _mm_or_si128(_mm_or_si128(c0_0, c1_0), c2_0);
- for (j = 0; j < 3; j++) {
- const int cx = (plane[j].c
- - plane[j].dcdx * x
- + plane[j].dcdy * y);
+ __m128i c0_1 = _mm_add_epi32(c0_0, SCALAR_EPI32(dcdy, 0));
+ __m128i c1_1 = _mm_add_epi32(c1_0, SCALAR_EPI32(dcdy, 1));
+ __m128i c2_1 = _mm_add_epi32(c2_0, SCALAR_EPI32(dcdy, 2));
- const int dcdx = -plane[j].dcdx;
- const int dcdy = plane[j].dcdy;
- __m128i xdcdy = _mm_set1_epi32(dcdy);
+ __m128i c_1 = _mm_or_si128(_mm_or_si128(c0_1, c1_1), c2_1);
+ __m128i c_01 = _mm_packs_epi32(c_0, c_1);
- __m128i cstep0 = _mm_setr_epi32(cx, cx + dcdx, cx + dcdx*2, cx + dcdx*3);
- __m128i cstep1 = _mm_add_epi32(cstep0, xdcdy);
- __m128i cstep2 = _mm_add_epi32(cstep1, xdcdy);
- __m128i cstep3 = _mm_add_epi32(cstep2, xdcdy);
+ __m128i c0_2 = _mm_add_epi32(c0_1, SCALAR_EPI32(dcdy, 0));
+ __m128i c1_2 = _mm_add_epi32(c1_1, SCALAR_EPI32(dcdy, 1));
+ __m128i c2_2 = _mm_add_epi32(c2_1, SCALAR_EPI32(dcdy, 2));
- __m128i cstep01 = _mm_packs_epi32(cstep0, cstep1);
- __m128i cstep23 = _mm_packs_epi32(cstep2, cstep3);
- __m128i result = _mm_packs_epi16(cstep01, cstep23);
+ __m128i c_2 = _mm_or_si128(_mm_or_si128(c0_2, c1_2), c2_2);
- /* Extract the sign bits
- */
- mask &= ~_mm_movemask_epi8(result);
- }
+ __m128i c0_3 = _mm_add_epi32(c0_2, SCALAR_EPI32(dcdy, 0));
+ __m128i c1_3 = _mm_add_epi32(c1_2, SCALAR_EPI32(dcdy, 1));
+ __m128i c2_3 = _mm_add_epi32(c2_2, SCALAR_EPI32(dcdy, 2));
- if (mask)
- lp_rast_shade_quads_mask(task, &tri->inputs, x, y, mask);
+ __m128i c_3 = _mm_or_si128(_mm_or_si128(c0_3, c1_3), c2_3);
+ __m128i c_23 = _mm_packs_epi32(c_2, c_3);
+ __m128i c_0123 = _mm_packs_epi16(c_01, c_23);
+
+ unsigned mask = _mm_movemask_epi8(c_0123);
+
+ if (mask != 0xffff)
+ lp_rast_shade_quads_mask(task,
+ &tri->inputs,
+ x,
+ y,
+ 0xffff & ~mask);
}
}
-
+#undef NR_PLANES
#endif
@@ -383,10 +456,13 @@ lp_rast_triangle_3_4(struct lp_rasterizer_task *task,
#define TAG(x) x##_3
#define NR_PLANES 3
+/*#define TRI_4 lp_rast_triangle_3_4*/
+/*#define TRI_16 lp_rast_triangle_3_16*/
#include "lp_rast_tri_tmp.h"
#define TAG(x) x##_4
#define NR_PLANES 4
+#define TRI_16 lp_rast_triangle_4_16
#include "lp_rast_tri_tmp.h"
#define TAG(x) x##_5
diff --git a/src/gallium/drivers/llvmpipe/lp_rast_tri_tmp.h b/src/gallium/drivers/llvmpipe/lp_rast_tri_tmp.h
index 9830a43ba55..4825d651c04 100644
--- a/src/gallium/drivers/llvmpipe/lp_rast_tri_tmp.h
+++ b/src/gallium/drivers/llvmpipe/lp_rast_tri_tmp.h
@@ -82,7 +82,8 @@ TAG(do_block_16)(struct lp_rasterizer_task *task,
const int dcdx = -plane[j].dcdx * 4;
const int dcdy = plane[j].dcdy * 4;
const int cox = plane[j].eo * 4;
- const int cio = plane[j].ei * 4 - 1;
+ const int ei = plane[j].dcdy - plane[j].dcdx - plane[j].eo;
+ const int cio = ei * 4 - 1;
build_masks(c[j] + cox,
cio - cox,
@@ -156,6 +157,7 @@ TAG(lp_rast_triangle)(struct lp_rasterizer_task *task,
{
const struct lp_rast_triangle *tri = arg.triangle.tri;
unsigned plane_mask = arg.triangle.plane_mask;
+ const struct lp_rast_plane *tri_plane = GET_PLANES(tri);
const int x = task->x, y = task->y;
struct lp_rast_plane plane[NR_PLANES];
int c[NR_PLANES];
@@ -172,7 +174,7 @@ TAG(lp_rast_triangle)(struct lp_rasterizer_task *task,
while (plane_mask) {
int i = ffs(plane_mask) - 1;
- plane[j] = tri->plane[i];
+ plane[j] = tri_plane[i];
plane_mask &= ~(1 << i);
c[j] = plane[j].c + plane[j].dcdy * y - plane[j].dcdx * x;
@@ -180,7 +182,8 @@ TAG(lp_rast_triangle)(struct lp_rasterizer_task *task,
const int dcdx = -plane[j].dcdx * 16;
const int dcdy = plane[j].dcdy * 16;
const int cox = plane[j].eo * 16;
- const int cio = plane[j].ei * 16 - 1;
+ const int ei = plane[j].dcdy - plane[j].dcdx - plane[j].eo;
+ const int cio = ei * 16 - 1;
build_masks(c[j] + cox,
cio - cox,
@@ -245,6 +248,133 @@ TAG(lp_rast_triangle)(struct lp_rasterizer_task *task,
}
}
+#if defined(PIPE_ARCH_SSE) && defined(TRI_16)
+/* XXX: special case this when intersection is not required.
+ * - tile completely within bbox,
+ * - bbox completely within tile.
+ */
+void
+TRI_16(struct lp_rasterizer_task *task,
+ const union lp_rast_cmd_arg arg)
+{
+ const struct lp_rast_triangle *tri = arg.triangle.tri;
+ const struct lp_rast_plane *plane = GET_PLANES(tri);
+ unsigned mask = arg.triangle.plane_mask;
+ unsigned outmask, partial_mask;
+ unsigned j;
+ __m128i cstep4[NR_PLANES][4];
+
+ int x = (mask & 0xff);
+ int y = (mask >> 8);
+
+ outmask = 0; /* outside one or more trivial reject planes */
+
+ x += task->x;
+ y += task->y;
+
+ for (j = 0; j < NR_PLANES; j++) {
+ const int dcdx = -plane[j].dcdx * 4;
+ const int dcdy = plane[j].dcdy * 4;
+ __m128i xdcdy = _mm_set1_epi32(dcdy);
+
+ cstep4[j][0] = _mm_setr_epi32(0, dcdx, dcdx*2, dcdx*3);
+ cstep4[j][1] = _mm_add_epi32(cstep4[j][0], xdcdy);
+ cstep4[j][2] = _mm_add_epi32(cstep4[j][1], xdcdy);
+ cstep4[j][3] = _mm_add_epi32(cstep4[j][2], xdcdy);
+
+ {
+ const int c = plane[j].c + plane[j].dcdy * y - plane[j].dcdx * x;
+ const int cox = plane[j].eo * 4;
+
+ outmask |= sign_bits4(cstep4[j], c + cox);
+ }
+ }
+
+ if (outmask == 0xffff)
+ return;
+
+
+ /* Mask of sub-blocks which are inside all trivial reject planes,
+ * but outside at least one trivial accept plane:
+ */
+ partial_mask = 0xffff & ~outmask;
+
+ /* Iterate over partials:
+ */
+ while (partial_mask) {
+ int i = ffs(partial_mask) - 1;
+ int ix = (i & 3) * 4;
+ int iy = (i >> 2) * 4;
+ int px = x + ix;
+ int py = y + iy;
+ unsigned mask = 0xffff;
+
+ partial_mask &= ~(1 << i);
+
+ for (j = 0; j < NR_PLANES; j++) {
+ const int cx = (plane[j].c - 1
+ - plane[j].dcdx * px
+ + plane[j].dcdy * py) * 4;
+
+ mask &= ~sign_bits4(cstep4[j], cx);
+ }
+
+ if (mask)
+ lp_rast_shade_quads_mask(task, &tri->inputs, px, py, mask);
+ }
+}
+#endif
+
+#if defined(PIPE_ARCH_SSE) && defined(TRI_4)
+void
+TRI_4(struct lp_rasterizer_task *task,
+ const union lp_rast_cmd_arg arg)
+{
+ const struct lp_rast_triangle *tri = arg.triangle.tri;
+ const struct lp_rast_plane *plane = GET_PLANES(tri);
+ unsigned mask = arg.triangle.plane_mask;
+ const int x = task->x + (mask & 0xff);
+ const int y = task->y + (mask >> 8);
+ unsigned j;
+
+ /* Iterate over partials:
+ */
+ {
+ unsigned mask = 0xffff;
+
+ for (j = 0; j < NR_PLANES; j++) {
+ const int cx = (plane[j].c
+ - plane[j].dcdx * x
+ + plane[j].dcdy * y);
+
+ const int dcdx = -plane[j].dcdx;
+ const int dcdy = plane[j].dcdy;
+ __m128i xdcdy = _mm_set1_epi32(dcdy);
+
+ __m128i cstep0 = _mm_setr_epi32(cx, cx + dcdx, cx + dcdx*2, cx + dcdx*3);
+ __m128i cstep1 = _mm_add_epi32(cstep0, xdcdy);
+ __m128i cstep2 = _mm_add_epi32(cstep1, xdcdy);
+ __m128i cstep3 = _mm_add_epi32(cstep2, xdcdy);
+
+ __m128i cstep01 = _mm_packs_epi32(cstep0, cstep1);
+ __m128i cstep23 = _mm_packs_epi32(cstep2, cstep3);
+ __m128i result = _mm_packs_epi16(cstep01, cstep23);
+
+ /* Extract the sign bits
+ */
+ mask &= ~_mm_movemask_epi8(result);
+ }
+
+ if (mask)
+ lp_rast_shade_quads_mask(task, &tri->inputs, x, y, mask);
+ }
+}
+#endif
+
+
+
#undef TAG
+#undef TRI_4
+#undef TRI_16
#undef NR_PLANES
diff --git a/src/gallium/drivers/llvmpipe/lp_scene.c b/src/gallium/drivers/llvmpipe/lp_scene.c
index 8b504f23a33..a4fdf7cff36 100644
--- a/src/gallium/drivers/llvmpipe/lp_scene.c
+++ b/src/gallium/drivers/llvmpipe/lp_scene.c
@@ -203,7 +203,9 @@ lp_scene_end_rasterization(struct lp_scene *scene )
for (i = 0; i < scene->tiles_x; i++) {
for (j = 0; j < scene->tiles_y; j++) {
struct cmd_bin *bin = lp_scene_get_bin(scene, i, j);
- bin->head = bin->tail = NULL;
+ bin->head = NULL;
+ bin->tail = NULL;
+ bin->last_state = NULL;
}
}
diff --git a/src/gallium/drivers/llvmpipe/lp_scene.h b/src/gallium/drivers/llvmpipe/lp_scene.h
index dbef7692e42..622c522f11a 100644
--- a/src/gallium/drivers/llvmpipe/lp_scene.h
+++ b/src/gallium/drivers/llvmpipe/lp_scene.h
@@ -41,6 +41,7 @@
#include "lp_debug.h"
struct lp_scene_queue;
+struct lp_rast_state;
/* We're limited to 2K by 2K for 32bit fixed point rasterization.
* Will need a 64-bit version for larger framebuffers.
@@ -94,6 +95,7 @@ struct data_block {
struct cmd_bin {
ushort x;
ushort y;
+ const struct lp_rast_state *last_state; /* most recent state set in bin */
struct cmd_block *head;
struct cmd_block *tail;
};
@@ -297,7 +299,7 @@ lp_scene_bin_command( struct lp_scene *scene,
assert(x < scene->tiles_x);
assert(y < scene->tiles_y);
- assert(cmd <= LP_RAST_OP_END_QUERY);
+ assert(cmd < LP_RAST_OP_MAX);
if (tail == NULL || tail->count == CMD_BLOCK_MAX) {
tail = lp_scene_new_cmd_block( scene, bin );
@@ -318,6 +320,30 @@ lp_scene_bin_command( struct lp_scene *scene,
}
+static INLINE boolean
+lp_scene_bin_cmd_with_state( struct lp_scene *scene,
+ unsigned x, unsigned y,
+ const struct lp_rast_state *state,
+ unsigned cmd,
+ union lp_rast_cmd_arg arg )
+{
+ struct cmd_bin *bin = lp_scene_get_bin(scene, x, y);
+
+ if (state != bin->last_state) {
+ bin->last_state = state;
+ if (!lp_scene_bin_command(scene, x, y,
+ LP_RAST_OP_SET_STATE,
+ lp_rast_arg_state(state)))
+ return FALSE;
+ }
+
+ if (!lp_scene_bin_command( scene, x, y, cmd, arg ))
+ return FALSE;
+
+ return TRUE;
+}
+
+
/* Add a command to all active bins.
*/
static INLINE boolean
diff --git a/src/gallium/drivers/llvmpipe/lp_screen.c b/src/gallium/drivers/llvmpipe/lp_screen.c
index 96633d93654..ad0ea75b3a3 100644
--- a/src/gallium/drivers/llvmpipe/lp_screen.c
+++ b/src/gallium/drivers/llvmpipe/lp_screen.c
@@ -158,6 +158,8 @@ llvmpipe_get_param(struct pipe_screen *screen, enum pipe_cap param)
case PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT:
case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER:
return 0;
+ case PIPE_CAP_PRIMITIVE_RESTART:
+ return 1;
case PIPE_CAP_DEPTHSTENCIL_CLEAR_SEPARATE:
return 1;
case PIPE_CAP_DEPTH_CLAMP:
diff --git a/src/gallium/drivers/llvmpipe/lp_setup.c b/src/gallium/drivers/llvmpipe/lp_setup.c
index 5ff11a33632..6118434d3d3 100644
--- a/src/gallium/drivers/llvmpipe/lp_setup.c
+++ b/src/gallium/drivers/llvmpipe/lp_setup.c
@@ -56,7 +56,7 @@
#include "draw/draw_vbuf.h"
-static void set_scene_state( struct lp_setup_context *, enum setup_state,
+static boolean set_scene_state( struct lp_setup_context *, enum setup_state,
const char *reason);
static boolean try_update_scene_state( struct lp_setup_context *setup );
@@ -167,7 +167,7 @@ lp_setup_rasterize_scene( struct lp_setup_context *setup )
-static void
+static boolean
begin_binning( struct lp_setup_context *setup )
{
struct lp_scene *scene = setup->scene;
@@ -181,6 +181,8 @@ begin_binning( struct lp_setup_context *setup )
/* Always create a fence:
*/
scene->fence = lp_fence_create(MAX2(1, setup->num_threads));
+ if (!scene->fence)
+ return FALSE;
/* Initialize the bin flags and x/y coords:
*/
@@ -192,7 +194,8 @@ begin_binning( struct lp_setup_context *setup )
}
ok = try_update_scene_state(setup);
- assert(ok);
+ if (!ok)
+ return FALSE;
if (setup->fb.zsbuf &&
((setup->clear.flags & PIPE_CLEAR_DEPTHSTENCIL) != PIPE_CLEAR_DEPTHSTENCIL) &&
@@ -208,7 +211,8 @@ begin_binning( struct lp_setup_context *setup )
ok = lp_scene_bin_everywhere( scene,
LP_RAST_OP_CLEAR_COLOR,
setup->clear.color );
- assert(ok);
+ if (!ok)
+ return FALSE;
}
}
@@ -216,12 +220,14 @@ begin_binning( struct lp_setup_context *setup )
if (setup->clear.flags & PIPE_CLEAR_DEPTHSTENCIL) {
if (!need_zsload)
scene->has_depthstencil_clear = TRUE;
+
ok = lp_scene_bin_everywhere( scene,
LP_RAST_OP_CLEAR_ZSTENCIL,
lp_rast_arg_clearzs(
setup->clear.zsvalue,
setup->clear.zsmask));
- assert(ok);
+ if (!ok)
+ return FALSE;
}
}
@@ -229,15 +235,16 @@ begin_binning( struct lp_setup_context *setup )
ok = lp_scene_bin_everywhere( scene,
LP_RAST_OP_BEGIN_QUERY,
lp_rast_arg_query(setup->active_query) );
- assert(ok);
+ if (!ok)
+ return FALSE;
}
-
setup->clear.flags = 0;
setup->clear.zsmask = 0;
setup->clear.zsvalue = 0;
LP_DBG(DEBUG_SETUP, "%s done\n", __FUNCTION__);
+ return TRUE;
}
@@ -246,12 +253,12 @@ begin_binning( struct lp_setup_context *setup )
*
* TODO: fast path for fullscreen clears and no triangles.
*/
-static void
+static boolean
execute_clears( struct lp_setup_context *setup )
{
LP_DBG(DEBUG_SETUP, "%s\n", __FUNCTION__);
- begin_binning( setup );
+ return begin_binning( setup );
}
const char *states[] = {
@@ -262,7 +269,7 @@ const char *states[] = {
};
-static void
+static boolean
set_scene_state( struct lp_setup_context *setup,
enum setup_state new_state,
const char *reason)
@@ -270,7 +277,7 @@ set_scene_state( struct lp_setup_context *setup,
unsigned old_state = setup->state;
if (old_state == new_state)
- return;
+ return TRUE;
if (LP_DEBUG & DEBUG_SCENE) {
debug_printf("%s old %s new %s%s%s\n",
@@ -294,12 +301,14 @@ set_scene_state( struct lp_setup_context *setup,
break;
case SETUP_ACTIVE:
- begin_binning( setup );
+ if (!begin_binning( setup ))
+ goto fail;
break;
case SETUP_FLUSHED:
if (old_state == SETUP_CLEARED)
- execute_clears( setup );
+ if (!execute_clears( setup ))
+ goto fail;
lp_setup_rasterize_scene( setup );
assert(setup->scene == NULL);
@@ -307,9 +316,21 @@ set_scene_state( struct lp_setup_context *setup,
default:
assert(0 && "invalid setup state mode");
+ goto fail;
}
setup->state = new_state;
+ return TRUE;
+
+fail:
+ if (setup->scene) {
+ lp_scene_end_rasterization(setup->scene);
+ setup->scene = NULL;
+ }
+
+ setup->state = SETUP_FLUSHED;
+ lp_setup_reset( setup );
+ return FALSE;
}
@@ -377,16 +398,19 @@ lp_setup_try_clear( struct lp_setup_context *setup,
}
if (flags & PIPE_CLEAR_DEPTHSTENCIL) {
- unsigned zmask = (flags & PIPE_CLEAR_DEPTH) ? ~0 : 0;
- unsigned smask = (flags & PIPE_CLEAR_STENCIL) ? ~0 : 0;
+ uint32_t zmask = (flags & PIPE_CLEAR_DEPTH) ? ~0 : 0;
+ uint32_t smask = (flags & PIPE_CLEAR_STENCIL) ? ~0 : 0;
zsvalue = util_pack_z_stencil(setup->fb.zsbuf->format,
depth,
stencil);
- zsmask = util_pack_uint_z_stencil(setup->fb.zsbuf->format,
+
+ zsmask = util_pack_mask_z_stencil(setup->fb.zsbuf->format,
zmask,
smask);
+
+ zsvalue &= zsmask;
}
if (setup->state == SETUP_ACTIVE) {
@@ -431,7 +455,7 @@ lp_setup_try_clear( struct lp_setup_context *setup,
if (flags & PIPE_CLEAR_COLOR) {
memcpy(setup->clear.color.clear_color,
&color_arg,
- sizeof color_arg);
+ sizeof setup->clear.color.clear_color);
}
}
@@ -502,14 +526,12 @@ lp_setup_set_point_state( struct lp_setup_context *setup,
}
void
-lp_setup_set_fs_inputs( struct lp_setup_context *setup,
- const struct lp_shader_input *input,
- unsigned nr )
+lp_setup_set_setup_variant( struct lp_setup_context *setup,
+ const struct lp_setup_variant *variant)
{
- LP_DBG(DEBUG_SETUP, "%s %p %u\n", __FUNCTION__, (void *) input, nr);
-
- memcpy( setup->fs.input, input, nr * sizeof input[0] );
- setup->fs.nr_inputs = nr;
+ LP_DBG(DEBUG_SETUP, "%s\n", __FUNCTION__);
+
+ setup->setup.variant = variant;
}
void
@@ -875,7 +897,7 @@ try_update_scene_state( struct lp_setup_context *setup )
return TRUE;
}
-void
+boolean
lp_setup_update_state( struct lp_setup_context *setup,
boolean update_scene )
{
@@ -897,22 +919,47 @@ lp_setup_update_state( struct lp_setup_context *setup,
setup->psize = lp->psize_slot;
assert(lp->dirty == 0);
+
+ assert(lp->setup_variant.key.size ==
+ setup->setup.variant->key.size);
+
+ assert(memcmp(&lp->setup_variant.key,
+ &setup->setup.variant->key,
+ setup->setup.variant->key.size) == 0);
}
- if (update_scene)
- set_scene_state( setup, SETUP_ACTIVE, __FUNCTION__ );
+ if (update_scene) {
+ if (!set_scene_state( setup, SETUP_ACTIVE, __FUNCTION__ ))
+ return FALSE;
+ }
/* Only call into update_scene_state() if we already have a
* scene:
*/
if (update_scene && setup->scene) {
assert(setup->state == SETUP_ACTIVE);
- if (!try_update_scene_state(setup)) {
- lp_setup_flush_and_restart(setup);
- if (!try_update_scene_state(setup))
- assert(0);
- }
+
+ if (try_update_scene_state(setup))
+ return TRUE;
+
+ /* Update failed, try to restart the scene.
+ *
+ * Cannot call lp_setup_flush_and_restart() directly here
+ * because of potential recursion.
+ */
+ if (!set_scene_state(setup, SETUP_FLUSHED, __FUNCTION__))
+ return FALSE;
+
+ if (!set_scene_state(setup, SETUP_ACTIVE, __FUNCTION__))
+ return FALSE;
+
+ if (!setup->scene)
+ return FALSE;
+
+ return try_update_scene_state(setup);
}
+
+ return TRUE;
}
@@ -1016,12 +1063,12 @@ lp_setup_begin_query(struct lp_setup_context *setup,
LP_RAST_OP_BEGIN_QUERY,
lp_rast_arg_query(pq))) {
- lp_setup_flush_and_restart(setup);
+ if (!lp_setup_flush_and_restart(setup))
+ return;
if (!lp_scene_bin_everywhere(setup->scene,
LP_RAST_OP_BEGIN_QUERY,
lp_rast_arg_query(pq))) {
- assert(0);
return;
}
}
@@ -1065,14 +1112,20 @@ lp_setup_end_query(struct lp_setup_context *setup, struct llvmpipe_query *pq)
}
-void
+boolean
lp_setup_flush_and_restart(struct lp_setup_context *setup)
{
if (0) debug_printf("%s\n", __FUNCTION__);
assert(setup->state == SETUP_ACTIVE);
- set_scene_state(setup, SETUP_FLUSHED, __FUNCTION__);
- lp_setup_update_state(setup, TRUE);
+
+ if (!set_scene_state(setup, SETUP_FLUSHED, __FUNCTION__))
+ return FALSE;
+
+ if (!lp_setup_update_state(setup, TRUE))
+ return FALSE;
+
+ return TRUE;
}
diff --git a/src/gallium/drivers/llvmpipe/lp_setup.h b/src/gallium/drivers/llvmpipe/lp_setup.h
index 25dab78f64f..ebb18f81344 100644
--- a/src/gallium/drivers/llvmpipe/lp_setup.h
+++ b/src/gallium/drivers/llvmpipe/lp_setup.h
@@ -33,28 +33,6 @@
struct draw_context;
struct vertex_info;
-enum lp_interp {
- LP_INTERP_CONSTANT,
- LP_INTERP_LINEAR,
- LP_INTERP_PERSPECTIVE,
- LP_INTERP_POSITION,
- LP_INTERP_FACING
-};
-
-
-/**
- * Describes how to compute the interpolation coefficients (a0, dadx, dady)
- * from the vertices passed into our triangle/line/point functions by the
- * draw module.
- *
- * Vertices are treated as an array of float[4] values, indexed by
- * src_index.
- */
-struct lp_shader_input {
- enum lp_interp interp; /* how to interpolate values */
- unsigned src_index; /* where to find values in incoming vertices */
- unsigned usage_mask; /* bitmask of TGSI_WRITEMASK_x flags */
-};
struct pipe_resource;
struct pipe_query;
@@ -66,7 +44,7 @@ struct lp_fragment_shader_variant;
struct lp_jit_context;
struct llvmpipe_query;
struct pipe_fence_handle;
-
+struct lp_setup_variant;
struct lp_setup_context *
lp_setup_create( struct pipe_context *pipe,
@@ -111,9 +89,8 @@ lp_setup_set_point_state( struct lp_setup_context *setup,
uint sprite_coord_origin);
void
-lp_setup_set_fs_inputs( struct lp_setup_context *setup,
- const struct lp_shader_input *interp,
- unsigned nr );
+lp_setup_set_setup_variant( struct lp_setup_context *setup,
+ const struct lp_setup_variant *variant );
void
lp_setup_set_fs_variant( struct lp_setup_context *setup,
diff --git a/src/gallium/drivers/llvmpipe/lp_setup_coef.c b/src/gallium/drivers/llvmpipe/lp_setup_coef.c
deleted file mode 100644
index 8dc2688ddb6..00000000000
--- a/src/gallium/drivers/llvmpipe/lp_setup_coef.c
+++ /dev/null
@@ -1,279 +0,0 @@
-/**************************************************************************
- *
- * Copyright 2010, VMware.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- **************************************************************************/
-
-/*
- * Binning code for triangles
- */
-
-#include "util/u_math.h"
-#include "util/u_memory.h"
-#include "lp_perf.h"
-#include "lp_setup_context.h"
-#include "lp_setup_coef.h"
-#include "lp_rast.h"
-#include "lp_state_fs.h"
-
-#if !defined(PIPE_ARCH_SSE)
-
-/**
- * Compute a0 for a constant-valued coefficient (GL_FLAT shading).
- */
-static void constant_coef( struct lp_rast_shader_inputs *inputs,
- unsigned slot,
- const float value,
- unsigned i )
-{
- inputs->a0[slot][i] = value;
- inputs->dadx[slot][i] = 0.0f;
- inputs->dady[slot][i] = 0.0f;
-}
-
-
-
-static void linear_coef( struct lp_rast_shader_inputs *inputs,
- const struct lp_tri_info *info,
- unsigned slot,
- unsigned vert_attr,
- unsigned i)
-{
- float a0 = info->v0[vert_attr][i];
- float a1 = info->v1[vert_attr][i];
- float a2 = info->v2[vert_attr][i];
-
- float da01 = a0 - a1;
- float da20 = a2 - a0;
- float dadx = (da01 * info->dy20_ooa - info->dy01_ooa * da20);
- float dady = (da20 * info->dx01_ooa - info->dx20_ooa * da01);
-
- inputs->dadx[slot][i] = dadx;
- inputs->dady[slot][i] = dady;
-
- /* calculate a0 as the value which would be sampled for the
- * fragment at (0,0), taking into account that we want to sample at
- * pixel centers, in other words (0.5, 0.5).
- *
- * this is neat but unfortunately not a good way to do things for
- * triangles with very large values of dadx or dady as it will
- * result in the subtraction and re-addition from a0 of a very
- * large number, which means we'll end up loosing a lot of the
- * fractional bits and precision from a0. the way to fix this is
- * to define a0 as the sample at a pixel center somewhere near vmin
- * instead - i'll switch to this later.
- */
- inputs->a0[slot][i] = a0 - (dadx * info->x0_center +
- dady * info->y0_center);
-}
-
-
-/**
- * Compute a0, dadx and dady for a perspective-corrected interpolant,
- * for a triangle.
- * We basically multiply the vertex value by 1/w before computing
- * the plane coefficients (a0, dadx, dady).
- * Later, when we compute the value at a particular fragment position we'll
- * divide the interpolated value by the interpolated W at that fragment.
- */
-static void perspective_coef( struct lp_rast_shader_inputs *inputs,
- const struct lp_tri_info *info,
- unsigned slot,
- unsigned vert_attr,
- unsigned i)
-{
- /* premultiply by 1/w (v[0][3] is always 1/w):
- */
- float a0 = info->v0[vert_attr][i] * info->v0[0][3];
- float a1 = info->v1[vert_attr][i] * info->v1[0][3];
- float a2 = info->v2[vert_attr][i] * info->v2[0][3];
- float da01 = a0 - a1;
- float da20 = a2 - a0;
- float dadx = da01 * info->dy20_ooa - info->dy01_ooa * da20;
- float dady = da20 * info->dx01_ooa - info->dx20_ooa * da01;
-
- inputs->dadx[slot][i] = dadx;
- inputs->dady[slot][i] = dady;
- inputs->a0[slot][i] = a0 - (dadx * info->x0_center +
- dady * info->y0_center);
-}
-
-
-/**
- * Special coefficient setup for gl_FragCoord.
- * X and Y are trivial
- * Z and W are copied from position_coef which should have already been computed.
- * We could do a bit less work if we'd examine gl_FragCoord's swizzle mask.
- */
-static void
-setup_fragcoord_coef(struct lp_rast_shader_inputs *inputs,
- const struct lp_tri_info *info,
- unsigned slot,
- unsigned usage_mask)
-{
- /*X*/
- if (usage_mask & TGSI_WRITEMASK_X) {
- inputs->a0[slot][0] = 0.0;
- inputs->dadx[slot][0] = 1.0;
- inputs->dady[slot][0] = 0.0;
- }
-
- /*Y*/
- if (usage_mask & TGSI_WRITEMASK_Y) {
- inputs->a0[slot][1] = 0.0;
- inputs->dadx[slot][1] = 0.0;
- inputs->dady[slot][1] = 1.0;
- }
-
- /*Z*/
- if (usage_mask & TGSI_WRITEMASK_Z) {
- linear_coef(inputs, info, slot, 0, 2);
- }
-
- /*W*/
- if (usage_mask & TGSI_WRITEMASK_W) {
- linear_coef(inputs, info, slot, 0, 3);
- }
-}
-
-
-/**
- * Setup the fragment input attribute with the front-facing value.
- * \param frontface is the triangle front facing?
- */
-static void setup_facing_coef( struct lp_rast_shader_inputs *inputs,
- unsigned slot,
- boolean frontface,
- unsigned usage_mask)
-{
- /* convert TRUE to 1.0 and FALSE to -1.0 */
- if (usage_mask & TGSI_WRITEMASK_X)
- constant_coef( inputs, slot, 2.0f * frontface - 1.0f, 0 );
-
- if (usage_mask & TGSI_WRITEMASK_Y)
- constant_coef( inputs, slot, 0.0f, 1 ); /* wasted */
-
- if (usage_mask & TGSI_WRITEMASK_Z)
- constant_coef( inputs, slot, 0.0f, 2 ); /* wasted */
-
- if (usage_mask & TGSI_WRITEMASK_W)
- constant_coef( inputs, slot, 0.0f, 3 ); /* wasted */
-}
-
-
-/**
- * Compute the tri->coef[] array dadx, dady, a0 values.
- */
-void lp_setup_tri_coef( struct lp_setup_context *setup,
- struct lp_rast_shader_inputs *inputs,
- const float (*v0)[4],
- const float (*v1)[4],
- const float (*v2)[4],
- boolean frontfacing)
-{
- unsigned fragcoord_usage_mask = TGSI_WRITEMASK_XYZ;
- unsigned slot;
- unsigned i;
- struct lp_tri_info info;
- float dx01 = v0[0][0] - v1[0][0];
- float dy01 = v0[0][1] - v1[0][1];
- float dx20 = v2[0][0] - v0[0][0];
- float dy20 = v2[0][1] - v0[0][1];
- float oneoverarea = 1.0f / (dx01 * dy20 - dx20 * dy01);
-
- info.v0 = v0;
- info.v1 = v1;
- info.v2 = v2;
- info.frontfacing = frontfacing;
- info.x0_center = v0[0][0] - setup->pixel_offset;
- info.y0_center = v0[0][1] - setup->pixel_offset;
- info.dx01_ooa = dx01 * oneoverarea;
- info.dx20_ooa = dx20 * oneoverarea;
- info.dy01_ooa = dy01 * oneoverarea;
- info.dy20_ooa = dy20 * oneoverarea;
-
-
- /* setup interpolation for all the remaining attributes:
- */
- for (slot = 0; slot < setup->fs.nr_inputs; slot++) {
- unsigned vert_attr = setup->fs.input[slot].src_index;
- unsigned usage_mask = setup->fs.input[slot].usage_mask;
-
- switch (setup->fs.input[slot].interp) {
- case LP_INTERP_CONSTANT:
- if (setup->flatshade_first) {
- for (i = 0; i < NUM_CHANNELS; i++)
- if (usage_mask & (1 << i))
- constant_coef(inputs, slot+1, info.v0[vert_attr][i], i);
- }
- else {
- for (i = 0; i < NUM_CHANNELS; i++)
- if (usage_mask & (1 << i))
- constant_coef(inputs, slot+1, info.v2[vert_attr][i], i);
- }
- break;
-
- case LP_INTERP_LINEAR:
- for (i = 0; i < NUM_CHANNELS; i++)
- if (usage_mask & (1 << i))
- linear_coef(inputs, &info, slot+1, vert_attr, i);
- break;
-
- case LP_INTERP_PERSPECTIVE:
- for (i = 0; i < NUM_CHANNELS; i++)
- if (usage_mask & (1 << i))
- perspective_coef(inputs, &info, slot+1, vert_attr, i);
- fragcoord_usage_mask |= TGSI_WRITEMASK_W;
- break;
-
- case LP_INTERP_POSITION:
- /*
- * The generated pixel interpolators will pick up the coeffs from
- * slot 0, so all need to ensure that the usage mask is covers all
- * usages.
- */
- fragcoord_usage_mask |= usage_mask;
- break;
-
- case LP_INTERP_FACING:
- setup_facing_coef(inputs, slot+1, info.frontfacing, usage_mask);
- break;
-
- default:
- assert(0);
- }
- }
-
- /* The internal position input is in slot zero:
- */
- setup_fragcoord_coef(inputs, &info, 0, fragcoord_usage_mask);
-}
-
-#else
-extern void lp_setup_coef_dummy(void);
-void lp_setup_coef_dummy(void)
-{
-}
-
-#endif
diff --git a/src/gallium/drivers/llvmpipe/lp_setup_coef.h b/src/gallium/drivers/llvmpipe/lp_setup_coef.h
deleted file mode 100644
index 87a3255ccc6..00000000000
--- a/src/gallium/drivers/llvmpipe/lp_setup_coef.h
+++ /dev/null
@@ -1,64 +0,0 @@
-/**************************************************************************
- *
- * Copyright 2010 VMware, Inc.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- **************************************************************************/
-
-
-/**
- * The setup code is concerned with point/line/triangle setup and
- * putting commands/data into the bins.
- */
-
-
-#ifndef LP_SETUP_COEF_H
-#define LP_SETUP_COEF_H
-
-
-struct lp_tri_info {
-
- float x0_center;
- float y0_center;
-
- /* turn these into an aligned float[4] */
- float dy01_ooa;
- float dy20_ooa;
- float dx01_ooa;
- float dx20_ooa;
-
- const float (*v0)[4];
- const float (*v1)[4];
- const float (*v2)[4];
-
- boolean frontfacing; /* remove eventually */
-};
-
-void lp_setup_tri_coef( struct lp_setup_context *setup,
- struct lp_rast_shader_inputs *inputs,
- const float (*v0)[4],
- const float (*v1)[4],
- const float (*v2)[4],
- boolean frontfacing);
-
-#endif
diff --git a/src/gallium/drivers/llvmpipe/lp_setup_coef_intrin.c b/src/gallium/drivers/llvmpipe/lp_setup_coef_intrin.c
deleted file mode 100644
index 3742fd672b2..00000000000
--- a/src/gallium/drivers/llvmpipe/lp_setup_coef_intrin.c
+++ /dev/null
@@ -1,228 +0,0 @@
-/**************************************************************************
- *
- * Copyright 2010 VMware.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- **************************************************************************/
-
-/*
- * Binning code for triangles
- */
-
-#include "util/u_math.h"
-#include "util/u_memory.h"
-#include "lp_perf.h"
-#include "lp_setup_context.h"
-#include "lp_setup_coef.h"
-#include "lp_rast.h"
-
-#if defined(PIPE_ARCH_SSE)
-#include <emmintrin.h>
-
-
-static void constant_coef4( struct lp_rast_shader_inputs *inputs,
- const struct lp_tri_info *info,
- unsigned slot,
- const float *attr)
-{
- *(__m128 *)inputs->a0[slot] = *(__m128 *)attr;
- *(__m128 *)inputs->dadx[slot] = _mm_set1_ps(0.0);
- *(__m128 *)inputs->dady[slot] = _mm_set1_ps(0.0);
-}
-
-
-
-/**
- * Setup the fragment input attribute with the front-facing value.
- * \param frontface is the triangle front facing?
- */
-static void setup_facing_coef( struct lp_rast_shader_inputs *inputs,
- const struct lp_tri_info *info,
- unsigned slot )
-{
- /* XXX: just pass frontface directly to the shader, don't bother
- * treating it as an input.
- */
- __m128 a0 = _mm_setr_ps(info->frontfacing ? 1.0 : -1.0,
- 0, 0, 0);
-
- *(__m128 *)inputs->a0[slot] = a0;
- *(__m128 *)inputs->dadx[slot] = _mm_set1_ps(0.0);
- *(__m128 *)inputs->dady[slot] = _mm_set1_ps(0.0);
-}
-
-
-
-static void calc_coef4( struct lp_rast_shader_inputs *inputs,
- const struct lp_tri_info *info,
- unsigned slot,
- __m128 a0,
- __m128 a1,
- __m128 a2)
-{
- __m128 da01 = _mm_sub_ps(a0, a1);
- __m128 da20 = _mm_sub_ps(a2, a0);
-
- __m128 da01_dy20_ooa = _mm_mul_ps(da01, _mm_set1_ps(info->dy20_ooa));
- __m128 da20_dy01_ooa = _mm_mul_ps(da20, _mm_set1_ps(info->dy01_ooa));
- __m128 dadx = _mm_sub_ps(da01_dy20_ooa, da20_dy01_ooa);
-
- __m128 da01_dx20_ooa = _mm_mul_ps(da01, _mm_set1_ps(info->dx20_ooa));
- __m128 da20_dx01_ooa = _mm_mul_ps(da20, _mm_set1_ps(info->dx01_ooa));
- __m128 dady = _mm_sub_ps(da20_dx01_ooa, da01_dx20_ooa);
-
- __m128 dadx_x0 = _mm_mul_ps(dadx, _mm_set1_ps(info->x0_center));
- __m128 dady_y0 = _mm_mul_ps(dady, _mm_set1_ps(info->y0_center));
- __m128 attr_v0 = _mm_add_ps(dadx_x0, dady_y0);
- __m128 attr_0 = _mm_sub_ps(a0, attr_v0);
-
- *(__m128 *)inputs->a0[slot] = attr_0;
- *(__m128 *)inputs->dadx[slot] = dadx;
- *(__m128 *)inputs->dady[slot] = dady;
-}
-
-
-static void linear_coef( struct lp_rast_shader_inputs *inputs,
- const struct lp_tri_info *info,
- unsigned slot,
- unsigned vert_attr)
-{
- __m128 a0 = *(const __m128 *)info->v0[vert_attr];
- __m128 a1 = *(const __m128 *)info->v1[vert_attr];
- __m128 a2 = *(const __m128 *)info->v2[vert_attr];
-
- calc_coef4(inputs, info, slot, a0, a1, a2);
-}
-
-
-
-/**
- * Compute a0, dadx and dady for a perspective-corrected interpolant,
- * for a triangle.
- * We basically multiply the vertex value by 1/w before computing
- * the plane coefficients (a0, dadx, dady).
- * Later, when we compute the value at a particular fragment position we'll
- * divide the interpolated value by the interpolated W at that fragment.
- */
-static void perspective_coef( struct lp_rast_shader_inputs *inputs,
- const struct lp_tri_info *info,
- unsigned slot,
- unsigned vert_attr)
-{
- /* premultiply by 1/w (v[0][3] is always 1/w):
- */
- __m128 a0 = *(const __m128 *)info->v0[vert_attr];
- __m128 a1 = *(const __m128 *)info->v1[vert_attr];
- __m128 a2 = *(const __m128 *)info->v2[vert_attr];
-
- __m128 a0_oow = _mm_mul_ps(a0, _mm_set1_ps(info->v0[0][3]));
- __m128 a1_oow = _mm_mul_ps(a1, _mm_set1_ps(info->v1[0][3]));
- __m128 a2_oow = _mm_mul_ps(a2, _mm_set1_ps(info->v2[0][3]));
-
- calc_coef4(inputs, info, slot, a0_oow, a1_oow, a2_oow);
-}
-
-
-
-
-
-/**
- * Compute the inputs-> dadx, dady, a0 values.
- */
-void lp_setup_tri_coef( struct lp_setup_context *setup,
- struct lp_rast_shader_inputs *inputs,
- const float (*v0)[4],
- const float (*v1)[4],
- const float (*v2)[4],
- boolean frontfacing)
-{
- unsigned slot;
- struct lp_tri_info info;
- float dx01 = v0[0][0] - v1[0][0];
- float dy01 = v0[0][1] - v1[0][1];
- float dx20 = v2[0][0] - v0[0][0];
- float dy20 = v2[0][1] - v0[0][1];
- float oneoverarea = 1.0f / (dx01 * dy20 - dx20 * dy01);
-
- info.v0 = v0;
- info.v1 = v1;
- info.v2 = v2;
- info.frontfacing = frontfacing;
- info.x0_center = v0[0][0] - setup->pixel_offset;
- info.y0_center = v0[0][1] - setup->pixel_offset;
- info.dx01_ooa = dx01 * oneoverarea;
- info.dx20_ooa = dx20 * oneoverarea;
- info.dy01_ooa = dy01 * oneoverarea;
- info.dy20_ooa = dy20 * oneoverarea;
-
-
- /* The internal position input is in slot zero:
- */
- linear_coef(inputs, &info, 0, 0);
-
- /* setup interpolation for all the remaining attributes:
- */
- for (slot = 0; slot < setup->fs.nr_inputs; slot++) {
- unsigned vert_attr = setup->fs.input[slot].src_index;
-
- switch (setup->fs.input[slot].interp) {
- case LP_INTERP_CONSTANT:
- if (setup->flatshade_first) {
- constant_coef4(inputs, &info, slot+1, info.v0[vert_attr]);
- }
- else {
- constant_coef4(inputs, &info, slot+1, info.v2[vert_attr]);
- }
- break;
-
- case LP_INTERP_LINEAR:
- linear_coef(inputs, &info, slot+1, vert_attr);
- break;
-
- case LP_INTERP_PERSPECTIVE:
- perspective_coef(inputs, &info, slot+1, vert_attr);
- break;
-
- case LP_INTERP_POSITION:
- /*
- * The generated pixel interpolators will pick up the coeffs from
- * slot 0.
- */
- break;
-
- case LP_INTERP_FACING:
- setup_facing_coef(inputs, &info, slot+1);
- break;
-
- default:
- assert(0);
- }
- }
-}
-
-#else
-extern void lp_setup_coef_dummy(void);
-void lp_setup_coef_dummy(void)
-{
-}
-#endif
diff --git a/src/gallium/drivers/llvmpipe/lp_setup_context.h b/src/gallium/drivers/llvmpipe/lp_setup_context.h
index 8506ed2dc9e..dc2533bedc5 100644
--- a/src/gallium/drivers/llvmpipe/lp_setup_context.h
+++ b/src/gallium/drivers/llvmpipe/lp_setup_context.h
@@ -39,6 +39,7 @@
#include "lp_rast.h"
#include "lp_tile_soa.h" /* for TILE_SIZE */
#include "lp_scene.h"
+#include "lp_bld_interp.h" /* for struct lp_shader_input */
#include "draw/draw_vbuf.h"
#include "util/u_rect.h"
@@ -49,6 +50,8 @@
#define LP_SETUP_NEW_SCISSOR 0x08
+struct lp_setup_variant;
+
/** Max number of scenes */
#define MAX_SCENES 2
@@ -118,9 +121,6 @@ struct lp_setup_context
} state;
struct {
- struct lp_shader_input input[PIPE_MAX_ATTRIBS];
- unsigned nr_inputs;
-
const struct lp_rast_state *stored; /**< what's in the scene */
struct lp_rast_state current; /**< currently set state */
struct pipe_resource *current_tex[PIPE_MAX_SAMPLERS];
@@ -139,6 +139,10 @@ struct lp_setup_context
} blend_color;
+ struct {
+ const struct lp_setup_variant *variant;
+ } setup;
+
unsigned dirty; /**< bitmask of LP_SETUP_NEW_x bits */
void (*point)( struct lp_setup_context *,
@@ -160,12 +164,12 @@ void lp_setup_choose_point( struct lp_setup_context *setup );
void lp_setup_init_vbuf(struct lp_setup_context *setup);
-void lp_setup_update_state( struct lp_setup_context *setup,
+boolean lp_setup_update_state( struct lp_setup_context *setup,
boolean update_scene);
void lp_setup_destroy( struct lp_setup_context *setup );
-void lp_setup_flush_and_restart(struct lp_setup_context *setup);
+boolean lp_setup_flush_and_restart(struct lp_setup_context *setup);
void
lp_setup_print_triangle(struct lp_setup_context *setup,
@@ -181,7 +185,7 @@ lp_setup_print_vertex(struct lp_setup_context *setup,
struct lp_rast_triangle *
lp_setup_alloc_triangle(struct lp_scene *scene,
- unsigned nr_inputs,
+ unsigned num_inputs,
unsigned nr_planes,
unsigned *tri_size);
@@ -191,6 +195,4 @@ lp_setup_bin_triangle( struct lp_setup_context *setup,
const struct u_rect *bbox,
int nr_planes );
-void lp_setup_flush_and_restart(struct lp_setup_context *setup);
-
#endif
diff --git a/src/gallium/drivers/llvmpipe/lp_setup_line.c b/src/gallium/drivers/llvmpipe/lp_setup_line.c
index 156bd633754..827413bb33e 100644
--- a/src/gallium/drivers/llvmpipe/lp_setup_line.c
+++ b/src/gallium/drivers/llvmpipe/lp_setup_line.c
@@ -35,6 +35,7 @@
#include "lp_setup_context.h"
#include "lp_rast.h"
#include "lp_state_fs.h"
+#include "lp_state_setup.h"
#define NUM_CHANNELS 4
@@ -46,6 +47,10 @@ struct lp_line_info {
const float (*v1)[4];
const float (*v2)[4];
+
+ float (*a0)[4];
+ float (*dadx)[4];
+ float (*dady)[4];
};
@@ -53,14 +58,14 @@ struct lp_line_info {
* Compute a0 for a constant-valued coefficient (GL_FLAT shading).
*/
static void constant_coef( struct lp_setup_context *setup,
- struct lp_rast_triangle *tri,
+ struct lp_line_info *info,
unsigned slot,
const float value,
unsigned i )
{
- tri->inputs.a0[slot][i] = value;
- tri->inputs.dadx[slot][i] = 0.0f;
- tri->inputs.dady[slot][i] = 0.0f;
+ info->a0[slot][i] = value;
+ info->dadx[slot][i] = 0.0f;
+ info->dady[slot][i] = 0.0f;
}
@@ -69,7 +74,6 @@ static void constant_coef( struct lp_setup_context *setup,
* for a triangle.
*/
static void linear_coef( struct lp_setup_context *setup,
- struct lp_rast_triangle *tri,
struct lp_line_info *info,
unsigned slot,
unsigned vert_attr,
@@ -82,10 +86,10 @@ static void linear_coef( struct lp_setup_context *setup,
float dadx = da21 * info->dx * info->oneoverarea;
float dady = da21 * info->dy * info->oneoverarea;
- tri->inputs.dadx[slot][i] = dadx;
- tri->inputs.dady[slot][i] = dady;
+ info->dadx[slot][i] = dadx;
+ info->dady[slot][i] = dady;
- tri->inputs.a0[slot][i] = (a1 -
+ info->a0[slot][i] = (a1 -
(dadx * (info->v1[0][0] - setup->pixel_offset) +
dady * (info->v1[0][1] - setup->pixel_offset)));
}
@@ -100,7 +104,6 @@ static void linear_coef( struct lp_setup_context *setup,
* divide the interpolated value by the interpolated W at that fragment.
*/
static void perspective_coef( struct lp_setup_context *setup,
- struct lp_rast_triangle *tri,
struct lp_line_info *info,
unsigned slot,
unsigned vert_attr,
@@ -115,43 +118,42 @@ static void perspective_coef( struct lp_setup_context *setup,
float dadx = da21 * info->dx * info->oneoverarea;
float dady = da21 * info->dy * info->oneoverarea;
- tri->inputs.dadx[slot][i] = dadx;
- tri->inputs.dady[slot][i] = dady;
+ info->dadx[slot][i] = dadx;
+ info->dady[slot][i] = dady;
- tri->inputs.a0[slot][i] = (a1 -
- (dadx * (info->v1[0][0] - setup->pixel_offset) +
- dady * (info->v1[0][1] - setup->pixel_offset)));
+ info->a0[slot][i] = (a1 -
+ (dadx * (info->v1[0][0] - setup->pixel_offset) +
+ dady * (info->v1[0][1] - setup->pixel_offset)));
}
static void
setup_fragcoord_coef( struct lp_setup_context *setup,
- struct lp_rast_triangle *tri,
struct lp_line_info *info,
unsigned slot,
unsigned usage_mask)
{
/*X*/
if (usage_mask & TGSI_WRITEMASK_X) {
- tri->inputs.a0[slot][0] = 0.0;
- tri->inputs.dadx[slot][0] = 1.0;
- tri->inputs.dady[slot][0] = 0.0;
+ info->a0[slot][0] = 0.0;
+ info->dadx[slot][0] = 1.0;
+ info->dady[slot][0] = 0.0;
}
/*Y*/
if (usage_mask & TGSI_WRITEMASK_Y) {
- tri->inputs.a0[slot][1] = 0.0;
- tri->inputs.dadx[slot][1] = 0.0;
- tri->inputs.dady[slot][1] = 1.0;
+ info->a0[slot][1] = 0.0;
+ info->dadx[slot][1] = 0.0;
+ info->dady[slot][1] = 1.0;
}
/*Z*/
if (usage_mask & TGSI_WRITEMASK_Z) {
- linear_coef(setup, tri, info, slot, 0, 2);
+ linear_coef(setup, info, slot, 0, 2);
}
/*W*/
if (usage_mask & TGSI_WRITEMASK_W) {
- linear_coef(setup, tri, info, slot, 0, 3);
+ linear_coef(setup, info, slot, 0, 3);
}
}
@@ -159,43 +161,43 @@ setup_fragcoord_coef( struct lp_setup_context *setup,
* Compute the tri->coef[] array dadx, dady, a0 values.
*/
static void setup_line_coefficients( struct lp_setup_context *setup,
- struct lp_rast_triangle *tri,
struct lp_line_info *info)
{
+ const struct lp_setup_variant_key *key = &setup->setup.variant->key;
unsigned fragcoord_usage_mask = TGSI_WRITEMASK_XYZ;
unsigned slot;
/* setup interpolation for all the remaining attributes:
*/
- for (slot = 0; slot < setup->fs.nr_inputs; slot++) {
- unsigned vert_attr = setup->fs.input[slot].src_index;
- unsigned usage_mask = setup->fs.input[slot].usage_mask;
+ for (slot = 0; slot < key->num_inputs; slot++) {
+ unsigned vert_attr = key->inputs[slot].src_index;
+ unsigned usage_mask = key->inputs[slot].usage_mask;
unsigned i;
- switch (setup->fs.input[slot].interp) {
+ switch (key->inputs[slot].interp) {
case LP_INTERP_CONSTANT:
- if (setup->flatshade_first) {
+ if (key->flatshade_first) {
for (i = 0; i < NUM_CHANNELS; i++)
if (usage_mask & (1 << i))
- constant_coef(setup, tri, slot+1, info->v1[vert_attr][i], i);
+ constant_coef(setup, info, slot+1, info->v1[vert_attr][i], i);
}
else {
for (i = 0; i < NUM_CHANNELS; i++)
if (usage_mask & (1 << i))
- constant_coef(setup, tri, slot+1, info->v2[vert_attr][i], i);
+ constant_coef(setup, info, slot+1, info->v2[vert_attr][i], i);
}
break;
case LP_INTERP_LINEAR:
for (i = 0; i < NUM_CHANNELS; i++)
if (usage_mask & (1 << i))
- linear_coef(setup, tri, info, slot+1, vert_attr, i);
+ linear_coef(setup, info, slot+1, vert_attr, i);
break;
case LP_INTERP_PERSPECTIVE:
for (i = 0; i < NUM_CHANNELS; i++)
if (usage_mask & (1 << i))
- perspective_coef(setup, tri, info, slot+1, vert_attr, i);
+ perspective_coef(setup, info, slot+1, vert_attr, i);
fragcoord_usage_mask |= TGSI_WRITEMASK_W;
break;
@@ -211,7 +213,7 @@ static void setup_line_coefficients( struct lp_setup_context *setup,
case LP_INTERP_FACING:
for (i = 0; i < NUM_CHANNELS; i++)
if (usage_mask & (1 << i))
- constant_coef(setup, tri, slot+1, 1.0, i);
+ constant_coef(setup, info, slot+1, 1.0, i);
break;
default:
@@ -221,7 +223,7 @@ static void setup_line_coefficients( struct lp_setup_context *setup,
/* The internal position input is in slot zero:
*/
- setup_fragcoord_coef(setup, tri, info, 0,
+ setup_fragcoord_coef(setup, info, 0,
fragcoord_usage_mask);
}
@@ -241,14 +243,15 @@ print_line(struct lp_setup_context *setup,
const float (*v1)[4],
const float (*v2)[4])
{
+ const struct lp_setup_variant_key *key = &setup->setup.variant->key;
uint i;
debug_printf("llvmpipe line\n");
- for (i = 0; i < 1 + setup->fs.nr_inputs; i++) {
+ for (i = 0; i < 1 + key->num_inputs; i++) {
debug_printf(" v1[%d]: %f %f %f %f\n", i,
v1[i][0], v1[i][1], v1[i][2], v1[i][3]);
}
- for (i = 0; i < 1 + setup->fs.nr_inputs; i++) {
+ for (i = 0; i < 1 + key->num_inputs; i++) {
debug_printf(" v2[%d]: %f %f %f %f\n", i,
v2[i][0], v2[i][1], v2[i][2], v2[i][3]);
}
@@ -275,7 +278,9 @@ try_setup_line( struct lp_setup_context *setup,
const float (*v2)[4])
{
struct lp_scene *scene = setup->scene;
+ const struct lp_setup_variant_key *key = &setup->setup.variant->key;
struct lp_rast_triangle *line;
+ struct lp_rast_plane *plane;
struct lp_line_info info;
float width = MAX2(1.0, setup->line_width);
struct u_rect bbox;
@@ -475,7 +480,7 @@ try_setup_line( struct lp_setup_context *setup,
else {
/* do intersection test */
float xintersect = fracf(v2[0][0]) + y2diff * dxdy;
- draw_end = (xintersect < 1.0 && xintersect > 0.0);
+ draw_end = (xintersect < 1.0 && xintersect >= 0.0);
}
/* Are we already drawing start/end?
@@ -513,7 +518,7 @@ try_setup_line( struct lp_setup_context *setup,
x_offset_end = y_offset_end * dxdy;
}
}
-
+
/* x/y positions in fixed point */
x[0] = subpixel_snap(v1[0][0] + x_offset - setup->pixel_offset) - fixed_width/2;
x[1] = subpixel_snap(v2[0][0] + x_offset_end - setup->pixel_offset) - fixed_width/2;
@@ -567,7 +572,7 @@ try_setup_line( struct lp_setup_context *setup,
u_rect_find_intersection(&setup->draw_region, &bbox);
line = lp_setup_alloc_triangle(scene,
- setup->fs.nr_inputs,
+ key->num_inputs,
nr_planes,
&tri_bytes);
if (!line)
@@ -581,33 +586,35 @@ try_setup_line( struct lp_setup_context *setup,
#endif
/* calculate the deltas */
- line->plane[0].dcdy = x[0] - x[1];
- line->plane[1].dcdy = x[1] - x[2];
- line->plane[2].dcdy = x[2] - x[3];
- line->plane[3].dcdy = x[3] - x[0];
+ plane = GET_PLANES(line);
+ plane[0].dcdy = x[0] - x[1];
+ plane[1].dcdy = x[1] - x[2];
+ plane[2].dcdy = x[2] - x[3];
+ plane[3].dcdy = x[3] - x[0];
- line->plane[0].dcdx = y[0] - y[1];
- line->plane[1].dcdx = y[1] - y[2];
- line->plane[2].dcdx = y[2] - y[3];
- line->plane[3].dcdx = y[3] - y[0];
+ plane[0].dcdx = y[0] - y[1];
+ plane[1].dcdx = y[1] - y[2];
+ plane[2].dcdx = y[2] - y[3];
+ plane[3].dcdx = y[3] - y[0];
/* Setup parameter interpolants:
*/
- setup_line_coefficients( setup, line, &info);
+ info.a0 = GET_A0(&line->inputs);
+ info.dadx = GET_DADX(&line->inputs);
+ info.dady = GET_DADY(&line->inputs);
+ setup_line_coefficients(setup, &info);
- line->inputs.facing = 1.0F;
- line->inputs.state = setup->fs.stored;
+ line->inputs.frontfacing = TRUE;
line->inputs.disable = FALSE;
line->inputs.opaque = FALSE;
for (i = 0; i < 4; i++) {
- struct lp_rast_plane *plane = &line->plane[i];
/* half-edge constants, will be interated over the whole render
* target.
*/
- plane->c = plane->dcdx * x[i] - plane->dcdy * y[i];
+ plane[i].c = plane[i].dcdx * x[i] - plane[i].dcdy * y[i];
/* correct for top-left vs. bottom-left fill convention.
@@ -623,38 +630,34 @@ try_setup_line( struct lp_setup_context *setup,
* to its usual method, in which case it will probably want
* to use the opposite, top-left convention.
*/
- if (plane->dcdx < 0) {
+ if (plane[i].dcdx < 0) {
/* both fill conventions want this - adjust for left edges */
- plane->c++;
+ plane[i].c++;
}
- else if (plane->dcdx == 0) {
+ else if (plane[i].dcdx == 0) {
if (setup->pixel_offset == 0) {
/* correct for top-left fill convention:
*/
- if (plane->dcdy > 0) plane->c++;
+ if (plane[i].dcdy > 0) plane[i].c++;
}
else {
/* correct for bottom-left fill convention:
*/
- if (plane->dcdy < 0) plane->c++;
+ if (plane[i].dcdy < 0) plane[i].c++;
}
}
- plane->dcdx *= FIXED_ONE;
- plane->dcdy *= FIXED_ONE;
+ plane[i].dcdx *= FIXED_ONE;
+ plane[i].dcdy *= FIXED_ONE;
/* find trivial reject offsets for each edge for a single-pixel
* sized block. These will be scaled up at each recursive level to
* match the active blocksize. Scaling in this way works best if
* the blocks are square.
*/
- plane->eo = 0;
- if (plane->dcdx < 0) plane->eo -= plane->dcdx;
- if (plane->dcdy > 0) plane->eo += plane->dcdy;
-
- /* Calculate trivial accept offsets from the above.
- */
- plane->ei = plane->dcdy - plane->dcdx - plane->eo;
+ plane[i].eo = 0;
+ if (plane[i].dcdx < 0) plane[i].eo -= plane[i].dcdx;
+ if (plane[i].dcdy > 0) plane[i].eo += plane[i].dcdy;
}
@@ -677,29 +680,25 @@ try_setup_line( struct lp_setup_context *setup,
* these planes elsewhere.
*/
if (nr_planes == 8) {
- line->plane[4].dcdx = -1;
- line->plane[4].dcdy = 0;
- line->plane[4].c = 1-bbox.x0;
- line->plane[4].ei = 0;
- line->plane[4].eo = 1;
-
- line->plane[5].dcdx = 1;
- line->plane[5].dcdy = 0;
- line->plane[5].c = bbox.x1+1;
- line->plane[5].ei = -1;
- line->plane[5].eo = 0;
-
- line->plane[6].dcdx = 0;
- line->plane[6].dcdy = 1;
- line->plane[6].c = 1-bbox.y0;
- line->plane[6].ei = 0;
- line->plane[6].eo = 1;
-
- line->plane[7].dcdx = 0;
- line->plane[7].dcdy = -1;
- line->plane[7].c = bbox.y1+1;
- line->plane[7].ei = -1;
- line->plane[7].eo = 0;
+ plane[4].dcdx = -1;
+ plane[4].dcdy = 0;
+ plane[4].c = 1-bbox.x0;
+ plane[4].eo = 1;
+
+ plane[5].dcdx = 1;
+ plane[5].dcdy = 0;
+ plane[5].c = bbox.x1+1;
+ plane[5].eo = 0;
+
+ plane[6].dcdx = 0;
+ plane[6].dcdy = 1;
+ plane[6].c = 1-bbox.y0;
+ plane[6].eo = 1;
+
+ plane[7].dcdx = 0;
+ plane[7].dcdy = -1;
+ plane[7].c = bbox.y1+1;
+ plane[7].eo = 0;
}
return lp_setup_bin_triangle(setup, line, &bbox, nr_planes);
@@ -712,10 +711,11 @@ static void lp_setup_line( struct lp_setup_context *setup,
{
if (!try_setup_line( setup, v0, v1 ))
{
- lp_setup_flush_and_restart(setup);
+ if (!lp_setup_flush_and_restart(setup))
+ return;
if (!try_setup_line( setup, v0, v1 ))
- assert(0);
+ return;
}
}
diff --git a/src/gallium/drivers/llvmpipe/lp_setup_point.c b/src/gallium/drivers/llvmpipe/lp_setup_point.c
index 3b217f95447..146f1bd07ca 100644
--- a/src/gallium/drivers/llvmpipe/lp_setup_point.c
+++ b/src/gallium/drivers/llvmpipe/lp_setup_point.c
@@ -35,6 +35,7 @@
#include "lp_perf.h"
#include "lp_rast.h"
#include "lp_state_fs.h"
+#include "lp_state_setup.h"
#include "tgsi/tgsi_scan.h"
#define NUM_CHANNELS 4
@@ -45,6 +46,10 @@ struct point_info {
int dx01, dx12;
const float (*v0)[4];
+
+ float (*a0)[4];
+ float (*dadx)[4];
+ float (*dady)[4];
};
@@ -53,14 +58,37 @@ struct point_info {
*/
static void
constant_coef(struct lp_setup_context *setup,
- struct lp_rast_triangle *point,
+ struct point_info *info,
unsigned slot,
const float value,
unsigned i)
{
- point->inputs.a0[slot][i] = value;
- point->inputs.dadx[slot][i] = 0.0f;
- point->inputs.dady[slot][i] = 0.0f;
+ info->a0[slot][i] = value;
+ info->dadx[slot][i] = 0.0f;
+ info->dady[slot][i] = 0.0f;
+}
+
+
+static void
+point_persp_coeff(struct lp_setup_context *setup,
+ const struct point_info *info,
+ unsigned slot,
+ unsigned i)
+{
+ /*
+ * Fragment shader expects pre-multiplied w for LP_INTERP_PERSPECTIVE. A
+ * better stratergy would be to take the primitive in consideration when
+ * generating the fragment shader key, and therefore avoid the per-fragment
+ * perspective divide.
+ */
+
+ float w0 = info->v0[0][3];
+
+ assert(i < 4);
+
+ info->a0[slot][i] = info->v0[slot][i]*w0;
+ info->dadx[slot][i] = 0.0f;
+ info->dady[slot][i] = 0.0f;
}
@@ -69,17 +97,19 @@ constant_coef(struct lp_setup_context *setup,
* \param slot the vertex attribute slot to setup
* \param i the attribute channel in [0,3]
* \param sprite_coord_origin one of PIPE_SPRITE_COORD_x
- * \param perspective_proj will the TEX instruction do a divide by Q?
+ * \param perspective does the shader expects pre-multiplied w, i.e.,
+ * LP_INTERP_PERSPECTIVE is specified in the shader key
*/
static void
texcoord_coef(struct lp_setup_context *setup,
- struct lp_rast_triangle *point,
const struct point_info *info,
unsigned slot,
unsigned i,
unsigned sprite_coord_origin,
- boolean perspective_proj)
+ boolean perspective)
{
+ float w0 = info->v0[0][3];
+
assert(i < 4);
if (i == 0) {
@@ -88,21 +118,14 @@ texcoord_coef(struct lp_setup_context *setup,
float x0 = info->v0[0][0] - setup->pixel_offset;
float y0 = info->v0[0][1] - setup->pixel_offset;
- point->inputs.dadx[slot][0] = dadx;
- point->inputs.dady[slot][0] = dady;
- point->inputs.a0[slot][0] = 0.5 - (dadx * x0 + dady * y0);
+ info->dadx[slot][0] = dadx;
+ info->dady[slot][0] = dady;
+ info->a0[slot][0] = 0.5 - (dadx * x0 + dady * y0);
- if (!perspective_proj) {
- /* Divide coefficients by vertex.w here.
- *
- * It would be clearer to always multiply by w0 above and
- * then divide it out for perspective projection here, but
- * doing it this way involves less algebra.
- */
- float w0 = info->v0[0][3];
- point->inputs.dadx[slot][0] *= w0;
- point->inputs.dady[slot][0] *= w0;
- point->inputs.a0[slot][0] *= w0;
+ if (perspective) {
+ info->dadx[slot][0] *= w0;
+ info->dady[slot][0] *= w0;
+ info->a0[slot][0] *= w0;
}
}
else if (i == 1) {
@@ -115,26 +138,25 @@ texcoord_coef(struct lp_setup_context *setup,
dady = -dady;
}
- point->inputs.dadx[slot][1] = dadx;
- point->inputs.dady[slot][1] = dady;
- point->inputs.a0[slot][1] = 0.5 - (dadx * x0 + dady * y0);
+ info->dadx[slot][1] = dadx;
+ info->dady[slot][1] = dady;
+ info->a0[slot][1] = 0.5 - (dadx * x0 + dady * y0);
- if (!perspective_proj) {
- float w0 = info->v0[0][3];
- point->inputs.dadx[slot][1] *= w0;
- point->inputs.dady[slot][1] *= w0;
- point->inputs.a0[slot][1] *= w0;
+ if (perspective) {
+ info->dadx[slot][1] *= w0;
+ info->dady[slot][1] *= w0;
+ info->a0[slot][1] *= w0;
}
}
else if (i == 2) {
- point->inputs.a0[slot][2] = 0.0f;
- point->inputs.dadx[slot][2] = 0.0f;
- point->inputs.dady[slot][2] = 0.0f;
+ info->a0[slot][2] = 0.0f;
+ info->dadx[slot][2] = 0.0f;
+ info->dady[slot][2] = 0.0f;
}
else {
- point->inputs.a0[slot][3] = 1.0f;
- point->inputs.dadx[slot][3] = 0.0f;
- point->inputs.dady[slot][3] = 0.0f;
+ info->a0[slot][3] = perspective ? w0 : 1.0f;
+ info->dadx[slot][3] = 0.0f;
+ info->dady[slot][3] = 0.0f;
}
}
@@ -147,33 +169,32 @@ texcoord_coef(struct lp_setup_context *setup,
*/
static void
setup_point_fragcoord_coef(struct lp_setup_context *setup,
- struct lp_rast_triangle *point,
- const struct point_info *info,
+ struct point_info *info,
unsigned slot,
unsigned usage_mask)
{
/*X*/
if (usage_mask & TGSI_WRITEMASK_X) {
- point->inputs.a0[slot][0] = 0.0;
- point->inputs.dadx[slot][0] = 1.0;
- point->inputs.dady[slot][0] = 0.0;
+ info->a0[slot][0] = 0.0;
+ info->dadx[slot][0] = 1.0;
+ info->dady[slot][0] = 0.0;
}
/*Y*/
if (usage_mask & TGSI_WRITEMASK_Y) {
- point->inputs.a0[slot][1] = 0.0;
- point->inputs.dadx[slot][1] = 0.0;
- point->inputs.dady[slot][1] = 1.0;
+ info->a0[slot][1] = 0.0;
+ info->dadx[slot][1] = 0.0;
+ info->dady[slot][1] = 1.0;
}
/*Z*/
if (usage_mask & TGSI_WRITEMASK_Z) {
- constant_coef(setup, point, slot, info->v0[0][2], 2);
+ constant_coef(setup, info, slot, info->v0[0][2], 2);
}
/*W*/
if (usage_mask & TGSI_WRITEMASK_W) {
- constant_coef(setup, point, slot, info->v0[0][3], 3);
+ constant_coef(setup, info, slot, info->v0[0][3], 3);
}
}
@@ -183,21 +204,27 @@ setup_point_fragcoord_coef(struct lp_setup_context *setup,
*/
static void
setup_point_coefficients( struct lp_setup_context *setup,
- struct lp_rast_triangle *point,
- const struct point_info *info)
+ struct point_info *info)
{
+ const struct lp_setup_variant_key *key = &setup->setup.variant->key;
const struct lp_fragment_shader *shader = setup->fs.current.variant->shader;
unsigned fragcoord_usage_mask = TGSI_WRITEMASK_XYZ;
unsigned slot;
/* setup interpolation for all the remaining attributes:
*/
- for (slot = 0; slot < setup->fs.nr_inputs; slot++) {
- unsigned vert_attr = setup->fs.input[slot].src_index;
- unsigned usage_mask = setup->fs.input[slot].usage_mask;
+ for (slot = 0; slot < key->num_inputs; slot++) {
+ unsigned vert_attr = key->inputs[slot].src_index;
+ unsigned usage_mask = key->inputs[slot].usage_mask;
+ enum lp_interp interp = key->inputs[slot].interp;
+ boolean perspective = !!(interp == LP_INTERP_PERSPECTIVE);
unsigned i;
+
+ if (perspective & usage_mask) {
+ fragcoord_usage_mask |= TGSI_WRITEMASK_W;
+ }
- switch (setup->fs.input[slot].interp) {
+ switch (interp) {
case LP_INTERP_POSITION:
/*
* The generated pixel interpolators will pick up the coeffs from
@@ -210,39 +237,45 @@ setup_point_coefficients( struct lp_setup_context *setup,
case LP_INTERP_LINEAR:
/* Sprite tex coords may use linear interpolation someday */
/* fall-through */
-
case LP_INTERP_PERSPECTIVE:
/* check if the sprite coord flag is set for this attribute.
* If so, set it up so it up so x and y vary from 0 to 1.
*/
- if (shader->info.input_semantic_name[slot] == TGSI_SEMANTIC_GENERIC) {
- const int index = shader->info.input_semantic_index[slot];
+ if (shader->info.base.input_semantic_name[slot] == TGSI_SEMANTIC_GENERIC) {
+ unsigned semantic_index = shader->info.base.input_semantic_index[slot];
/* Note that sprite_coord enable is a bitfield of
* PIPE_MAX_SHADER_OUTPUTS bits.
*/
- if (index < PIPE_MAX_SHADER_OUTPUTS &&
- (setup->sprite_coord_enable & (1 << index))) {
- for (i = 0; i < NUM_CHANNELS; i++)
- if (usage_mask & (1 << i))
- texcoord_coef(setup, point, info, slot + 1, i,
+ if (semantic_index < PIPE_MAX_SHADER_OUTPUTS &&
+ (setup->sprite_coord_enable & (1 << semantic_index))) {
+ for (i = 0; i < NUM_CHANNELS; i++) {
+ if (usage_mask & (1 << i)) {
+ texcoord_coef(setup, info, slot + 1, i,
setup->sprite_coord_origin,
- (usage_mask & TGSI_WRITEMASK_W));
- fragcoord_usage_mask |= TGSI_WRITEMASK_W;
- break;
+ perspective);
+ }
+ }
+ break;
}
}
- /* FALLTHROUGH */
+ /* fall-through */
case LP_INTERP_CONSTANT:
for (i = 0; i < NUM_CHANNELS; i++) {
- if (usage_mask & (1 << i))
- constant_coef(setup, point, slot+1, info->v0[vert_attr][i], i);
+ if (usage_mask & (1 << i)) {
+ if (perspective) {
+ point_persp_coeff(setup, info, slot+1, i);
+ }
+ else {
+ constant_coef(setup, info, slot+1, info->v0[vert_attr][i], i);
+ }
+ }
}
break;
case LP_INTERP_FACING:
for (i = 0; i < NUM_CHANNELS; i++)
if (usage_mask & (1 << i))
- constant_coef(setup, point, slot+1, 1.0, i);
+ constant_coef(setup, info, slot+1, 1.0, i);
break;
default:
@@ -253,7 +286,7 @@ setup_point_coefficients( struct lp_setup_context *setup,
/* The internal position input is in slot zero:
*/
- setup_point_fragcoord_coef(setup, point, info, 0,
+ setup_point_fragcoord_coef(setup, info, 0,
fragcoord_usage_mask);
}
@@ -270,6 +303,7 @@ try_setup_point( struct lp_setup_context *setup,
const float (*v0)[4] )
{
/* x/y positions in fixed point */
+ const struct lp_setup_variant_key *key = &setup->setup.variant->key;
const int sizeAttr = setup->psize;
const float size
= (setup->point_size_per_vertex && sizeAttr > 0) ? v0[sizeAttr][0]
@@ -321,7 +355,7 @@ try_setup_point( struct lp_setup_context *setup,
u_rect_find_intersection(&setup->draw_region, &bbox);
point = lp_setup_alloc_triangle(scene,
- setup->fs.nr_inputs,
+ key->num_inputs,
nr_planes,
&bytes);
if (!point)
@@ -337,40 +371,40 @@ try_setup_point( struct lp_setup_context *setup,
info.dx12 = fixed_width;
info.dy01 = fixed_width;
info.dy12 = 0;
+ info.a0 = GET_A0(&point->inputs);
+ info.dadx = GET_DADX(&point->inputs);
+ info.dady = GET_DADY(&point->inputs);
/* Setup parameter interpolants:
*/
- setup_point_coefficients(setup, point, &info);
+ setup_point_coefficients(setup, &info);
- point->inputs.facing = 1.0F;
- point->inputs.state = setup->fs.stored;
+ point->inputs.frontfacing = TRUE;
point->inputs.disable = FALSE;
point->inputs.opaque = FALSE;
{
- point->plane[0].dcdx = -1;
- point->plane[0].dcdy = 0;
- point->plane[0].c = 1-bbox.x0;
- point->plane[0].ei = 0;
- point->plane[0].eo = 1;
-
- point->plane[1].dcdx = 1;
- point->plane[1].dcdy = 0;
- point->plane[1].c = bbox.x1+1;
- point->plane[1].ei = -1;
- point->plane[1].eo = 0;
-
- point->plane[2].dcdx = 0;
- point->plane[2].dcdy = 1;
- point->plane[2].c = 1-bbox.y0;
- point->plane[2].ei = 0;
- point->plane[2].eo = 1;
-
- point->plane[3].dcdx = 0;
- point->plane[3].dcdy = -1;
- point->plane[3].c = bbox.y1+1;
- point->plane[3].ei = -1;
- point->plane[3].eo = 0;
+ struct lp_rast_plane *plane = GET_PLANES(point);
+
+ plane[0].dcdx = -1;
+ plane[0].dcdy = 0;
+ plane[0].c = 1-bbox.x0;
+ plane[0].eo = 1;
+
+ plane[1].dcdx = 1;
+ plane[1].dcdy = 0;
+ plane[1].c = bbox.x1+1;
+ plane[1].eo = 0;
+
+ plane[2].dcdx = 0;
+ plane[2].dcdy = 1;
+ plane[2].c = 1-bbox.y0;
+ plane[2].eo = 1;
+
+ plane[3].dcdx = 0;
+ plane[3].dcdy = -1;
+ plane[3].c = bbox.y1+1;
+ plane[3].eo = 0;
}
return lp_setup_bin_triangle(setup, point, &bbox, nr_planes);
@@ -383,10 +417,11 @@ lp_setup_point(struct lp_setup_context *setup,
{
if (!try_setup_point( setup, v0 ))
{
- lp_setup_flush_and_restart(setup);
+ if (!lp_setup_flush_and_restart(setup))
+ return;
if (!try_setup_point( setup, v0 ))
- assert(0);
+ return;
}
}
diff --git a/src/gallium/drivers/llvmpipe/lp_setup_tri.c b/src/gallium/drivers/llvmpipe/lp_setup_tri.c
index 9016bb8e249..4ab0b72a574 100644
--- a/src/gallium/drivers/llvmpipe/lp_setup_tri.c
+++ b/src/gallium/drivers/llvmpipe/lp_setup_tri.c
@@ -32,15 +32,18 @@
#include "util/u_math.h"
#include "util/u_memory.h"
#include "util/u_rect.h"
+#include "util/u_sse.h"
#include "lp_perf.h"
#include "lp_setup_context.h"
-#include "lp_setup_coef.h"
#include "lp_rast.h"
#include "lp_state_fs.h"
+#include "lp_state_setup.h"
#define NUM_CHANNELS 4
-
+#if defined(PIPE_ARCH_SSE)
+#include <emmintrin.h>
+#endif
static INLINE int
subpixel_snap(float a)
@@ -65,7 +68,7 @@ fixed_to_float(int a)
* immediately after it.
* The memory is allocated from the per-scene pool, not per-tile.
* \param tri_size returns number of bytes allocated
- * \param nr_inputs number of fragment shader inputs
+ * \param num_inputs number of fragment shader inputs
* \return pointer to triangle space
*/
struct lp_rast_triangle *
@@ -75,22 +78,23 @@ lp_setup_alloc_triangle(struct lp_scene *scene,
unsigned *tri_size)
{
unsigned input_array_sz = NUM_CHANNELS * (nr_inputs + 1) * sizeof(float);
+ unsigned plane_sz = nr_planes * sizeof(struct lp_rast_plane);
struct lp_rast_triangle *tri;
- unsigned tri_bytes, bytes;
- char *inputs;
- tri_bytes = align(Offset(struct lp_rast_triangle, plane[nr_planes]), 16);
- bytes = tri_bytes + (3 * input_array_sz);
+ *tri_size = (sizeof(struct lp_rast_triangle) +
+ 3 * input_array_sz +
+ plane_sz);
- tri = lp_scene_alloc_aligned( scene, bytes, 16 );
+ tri = lp_scene_alloc_aligned( scene, *tri_size, 16 );
+ if (tri == NULL)
+ return NULL;
- if (tri) {
- inputs = ((char *)tri) + tri_bytes;
- tri->inputs.a0 = (float (*)[4]) inputs;
- tri->inputs.dadx = (float (*)[4]) (inputs + input_array_sz);
- tri->inputs.dady = (float (*)[4]) (inputs + 2 * input_array_sz);
+ tri->inputs.stride = input_array_sz;
- *tri_size = bytes;
+ {
+ char *a = (char *)tri;
+ char *b = (char *)&GET_PLANES(tri)[nr_planes];
+ assert(b - a == *tri_size);
}
return tri;
@@ -101,25 +105,26 @@ lp_setup_print_vertex(struct lp_setup_context *setup,
const char *name,
const float (*v)[4])
{
+ const struct lp_setup_variant_key *key = &setup->setup.variant->key;
int i, j;
debug_printf(" wpos (%s[0]) xyzw %f %f %f %f\n",
name,
v[0][0], v[0][1], v[0][2], v[0][3]);
- for (i = 0; i < setup->fs.nr_inputs; i++) {
- const float *in = v[setup->fs.input[i].src_index];
+ for (i = 0; i < key->num_inputs; i++) {
+ const float *in = v[key->inputs[i].src_index];
debug_printf(" in[%d] (%s[%d]) %s%s%s%s ",
i,
- name, setup->fs.input[i].src_index,
- (setup->fs.input[i].usage_mask & 0x1) ? "x" : " ",
- (setup->fs.input[i].usage_mask & 0x2) ? "y" : " ",
- (setup->fs.input[i].usage_mask & 0x4) ? "z" : " ",
- (setup->fs.input[i].usage_mask & 0x8) ? "w" : " ");
+ name, key->inputs[i].src_index,
+ (key->inputs[i].usage_mask & 0x1) ? "x" : " ",
+ (key->inputs[i].usage_mask & 0x2) ? "y" : " ",
+ (key->inputs[i].usage_mask & 0x4) ? "z" : " ",
+ (key->inputs[i].usage_mask & 0x8) ? "w" : " ");
for (j = 0; j < 4; j++)
- if (setup->fs.input[i].usage_mask & (1<<j))
+ if (key->inputs[i].usage_mask & (1<<j))
debug_printf("%.5f ", in[j]);
debug_printf("\n");
@@ -200,14 +205,16 @@ lp_setup_whole_tile(struct lp_setup_context *setup,
}
LP_COUNT(nr_shade_opaque_64);
- return lp_scene_bin_command( scene, tx, ty,
- LP_RAST_OP_SHADE_TILE_OPAQUE,
- lp_rast_arg_inputs(inputs) );
+ return lp_scene_bin_cmd_with_state( scene, tx, ty,
+ setup->fs.stored,
+ LP_RAST_OP_SHADE_TILE_OPAQUE,
+ lp_rast_arg_inputs(inputs) );
} else {
LP_COUNT(nr_shade_64);
- return lp_scene_bin_command( scene, tx, ty,
- LP_RAST_OP_SHADE_TILE,
- lp_rast_arg_inputs(inputs) );
+ return lp_scene_bin_cmd_with_state( scene, tx, ty,
+ setup->fs.stored,
+ LP_RAST_OP_SHADE_TILE,
+ lp_rast_arg_inputs(inputs) );
}
}
@@ -225,13 +232,13 @@ do_triangle_ccw(struct lp_setup_context *setup,
boolean frontfacing )
{
struct lp_scene *scene = setup->scene;
+ const struct lp_setup_variant_key *key = &setup->setup.variant->key;
struct lp_rast_triangle *tri;
- int x[3];
- int y[3];
- int area;
+ struct lp_rast_plane *plane;
+ int x[4];
+ int y[4];
struct u_rect bbox;
unsigned tri_bytes;
- int i;
int nr_planes = 3;
if (0)
@@ -248,10 +255,12 @@ do_triangle_ccw(struct lp_setup_context *setup,
x[0] = subpixel_snap(v0[0][0] - setup->pixel_offset);
x[1] = subpixel_snap(v1[0][0] - setup->pixel_offset);
x[2] = subpixel_snap(v2[0][0] - setup->pixel_offset);
+ x[3] = 0;
y[0] = subpixel_snap(v0[0][1] - setup->pixel_offset);
y[1] = subpixel_snap(v1[0][1] - setup->pixel_offset);
y[2] = subpixel_snap(v2[0][1] - setup->pixel_offset);
-
+ y[3] = 0;
+
/* Bounding rectangle (in pixels) */
{
@@ -289,13 +298,13 @@ do_triangle_ccw(struct lp_setup_context *setup,
u_rect_find_intersection(&setup->draw_region, &bbox);
tri = lp_setup_alloc_triangle(scene,
- setup->fs.nr_inputs,
+ key->num_inputs,
nr_planes,
&tri_bytes);
if (!tri)
return FALSE;
-#ifdef DEBUG
+#if 0
tri->v[0][0] = v0[0][0];
tri->v[1][0] = v1[0][0];
tri->v[2][0] = v2[0][0];
@@ -304,92 +313,172 @@ do_triangle_ccw(struct lp_setup_context *setup,
tri->v[2][1] = v2[0][1];
#endif
- tri->plane[0].dcdy = x[0] - x[1];
- tri->plane[1].dcdy = x[1] - x[2];
- tri->plane[2].dcdy = x[2] - x[0];
-
- tri->plane[0].dcdx = y[0] - y[1];
- tri->plane[1].dcdx = y[1] - y[2];
- tri->plane[2].dcdx = y[2] - y[0];
-
- area = (tri->plane[0].dcdy * tri->plane[2].dcdx -
- tri->plane[2].dcdy * tri->plane[0].dcdx);
-
LP_COUNT(nr_tris);
- /* Cull non-ccw and zero-sized triangles.
- *
- * XXX: subject to overflow??
- */
- if (area <= 0) {
- lp_scene_putback_data( scene, tri_bytes );
- LP_COUNT(nr_culled_tris);
- return TRUE;
- }
-
/* Setup parameter interpolants:
*/
- lp_setup_tri_coef( setup, &tri->inputs, v0, v1, v2, frontfacing );
-
- tri->inputs.facing = frontfacing ? 1.0F : -1.0F;
+ setup->setup.variant->jit_function( v0,
+ v1,
+ v2,
+ frontfacing,
+ GET_A0(&tri->inputs),
+ GET_DADX(&tri->inputs),
+ GET_DADY(&tri->inputs) );
+
+ tri->inputs.frontfacing = frontfacing;
tri->inputs.disable = FALSE;
tri->inputs.opaque = setup->fs.current.variant->opaque;
- tri->inputs.state = setup->fs.stored;
-
- for (i = 0; i < 3; i++) {
- struct lp_rast_plane *plane = &tri->plane[i];
+ if (0)
+ lp_dump_setup_coef(&setup->setup.variant->key,
+ (const float (*)[4])GET_A0(&tri->inputs),
+ (const float (*)[4])GET_DADX(&tri->inputs),
+ (const float (*)[4])GET_DADY(&tri->inputs));
+
+ plane = GET_PLANES(tri);
+
+#if defined(PIPE_ARCH_SSE)
+ {
+ __m128i vertx, verty;
+ __m128i shufx, shufy;
+ __m128i dcdx, dcdy, c;
+ __m128i unused;
+ __m128i dcdx_neg_mask;
+ __m128i dcdy_neg_mask;
+ __m128i dcdx_zero_mask;
+ __m128i top_left_flag;
+ __m128i c_inc_mask, c_inc;
+ __m128i eo, p0, p1, p2;
+ __m128i zero = _mm_setzero_si128();
+
+ vertx = _mm_loadu_si128((__m128i *)x); /* vertex x coords */
+ verty = _mm_loadu_si128((__m128i *)y); /* vertex y coords */
+
+ shufx = _mm_shuffle_epi32(vertx, _MM_SHUFFLE(3,0,2,1));
+ shufy = _mm_shuffle_epi32(verty, _MM_SHUFFLE(3,0,2,1));
+
+ dcdx = _mm_sub_epi32(verty, shufy);
+ dcdy = _mm_sub_epi32(vertx, shufx);
+
+ dcdx_neg_mask = _mm_srai_epi32(dcdx, 31);
+ dcdx_zero_mask = _mm_cmpeq_epi32(dcdx, zero);
+ dcdy_neg_mask = _mm_srai_epi32(dcdy, 31);
+
+ top_left_flag = _mm_set1_epi32((setup->pixel_offset == 0) ? ~0 : 0);
- /* half-edge constants, will be interated over the whole render
- * target.
+ c_inc_mask = _mm_or_si128(dcdx_neg_mask,
+ _mm_and_si128(dcdx_zero_mask,
+ _mm_xor_si128(dcdy_neg_mask,
+ top_left_flag)));
+
+ c_inc = _mm_srli_epi32(c_inc_mask, 31);
+
+ c = _mm_sub_epi32(mm_mullo_epi32(dcdx, vertx),
+ mm_mullo_epi32(dcdy, verty));
+
+ c = _mm_add_epi32(c, c_inc);
+
+ /* Scale up to match c:
*/
- plane->c = plane->dcdx * x[i] - plane->dcdy * y[i];
-
- /* correct for top-left vs. bottom-left fill convention.
- *
- * note that we're overloading gl_rasterization_rules to mean
- * both (0.5,0.5) pixel centers *and* bottom-left filling
- * convention.
- *
- * GL actually has a top-left filling convention, but GL's
- * notion of "top" differs from gallium's...
- *
- * Also, sometimes (in FBO cases) GL will render upside down
- * to its usual method, in which case it will probably want
- * to use the opposite, top-left convention.
- */
- if (plane->dcdx < 0) {
- /* both fill conventions want this - adjust for left edges */
- plane->c++;
- }
- else if (plane->dcdx == 0) {
- if (setup->pixel_offset == 0) {
- /* correct for top-left fill convention:
- */
- if (plane->dcdy > 0) plane->c++;
+ dcdx = _mm_slli_epi32(dcdx, FIXED_ORDER);
+ dcdy = _mm_slli_epi32(dcdy, FIXED_ORDER);
+
+ /* Calculate trivial reject values:
+ */
+ eo = _mm_sub_epi32(_mm_andnot_si128(dcdy_neg_mask, dcdy),
+ _mm_and_si128(dcdx_neg_mask, dcdx));
+
+ /* ei = _mm_sub_epi32(_mm_sub_epi32(dcdy, dcdx), eo); */
+
+ /* Pointless transpose which gets undone immediately in
+ * rasterization:
+ */
+ transpose4_epi32(&c, &dcdx, &dcdy, &eo,
+ &p0, &p1, &p2, &unused);
+
+ _mm_store_si128((__m128i *)&plane[0], p0);
+ _mm_store_si128((__m128i *)&plane[1], p1);
+ _mm_store_si128((__m128i *)&plane[2], p2);
+ }
+#else
+ {
+ int i;
+ plane[0].dcdy = x[0] - x[1];
+ plane[1].dcdy = x[1] - x[2];
+ plane[2].dcdy = x[2] - x[0];
+ plane[0].dcdx = y[0] - y[1];
+ plane[1].dcdx = y[1] - y[2];
+ plane[2].dcdx = y[2] - y[0];
+
+ for (i = 0; i < 3; i++) {
+ /* half-edge constants, will be interated over the whole render
+ * target.
+ */
+ plane[i].c = plane[i].dcdx * x[i] - plane[i].dcdy * y[i];
+
+ /* correct for top-left vs. bottom-left fill convention.
+ *
+ * note that we're overloading gl_rasterization_rules to mean
+ * both (0.5,0.5) pixel centers *and* bottom-left filling
+ * convention.
+ *
+ * GL actually has a top-left filling convention, but GL's
+ * notion of "top" differs from gallium's...
+ *
+ * Also, sometimes (in FBO cases) GL will render upside down
+ * to its usual method, in which case it will probably want
+ * to use the opposite, top-left convention.
+ */
+ if (plane[i].dcdx < 0) {
+ /* both fill conventions want this - adjust for left edges */
+ plane[i].c++;
}
- else {
- /* correct for bottom-left fill convention:
- */
- if (plane->dcdy < 0) plane->c++;
+ else if (plane[i].dcdx == 0) {
+ if (setup->pixel_offset == 0) {
+ /* correct for top-left fill convention:
+ */
+ if (plane[i].dcdy > 0) plane[i].c++;
+ }
+ else {
+ /* correct for bottom-left fill convention:
+ */
+ if (plane[i].dcdy < 0) plane[i].c++;
+ }
}
- }
- plane->dcdx *= FIXED_ONE;
- plane->dcdy *= FIXED_ONE;
+ plane[i].dcdx *= FIXED_ONE;
+ plane[i].dcdy *= FIXED_ONE;
- /* find trivial reject offsets for each edge for a single-pixel
- * sized block. These will be scaled up at each recursive level to
- * match the active blocksize. Scaling in this way works best if
- * the blocks are square.
- */
- plane->eo = 0;
- if (plane->dcdx < 0) plane->eo -= plane->dcdx;
- if (plane->dcdy > 0) plane->eo += plane->dcdy;
+ /* find trivial reject offsets for each edge for a single-pixel
+ * sized block. These will be scaled up at each recursive level to
+ * match the active blocksize. Scaling in this way works best if
+ * the blocks are square.
+ */
+ plane[i].eo = 0;
+ if (plane[i].dcdx < 0) plane[i].eo -= plane[i].dcdx;
+ if (plane[i].dcdy > 0) plane[i].eo += plane[i].dcdy;
+ }
+ }
+#endif
- /* Calculate trivial accept offsets from the above.
- */
- plane->ei = plane->dcdy - plane->dcdx - plane->eo;
+ if (0) {
+ debug_printf("p0: %08x/%08x/%08x/%08x\n",
+ plane[0].c,
+ plane[0].dcdx,
+ plane[0].dcdy,
+ plane[0].eo);
+
+ debug_printf("p1: %08x/%08x/%08x/%08x\n",
+ plane[1].c,
+ plane[1].dcdx,
+ plane[1].dcdy,
+ plane[1].eo);
+
+ debug_printf("p0: %08x/%08x/%08x/%08x\n",
+ plane[2].c,
+ plane[2].dcdx,
+ plane[2].dcdy,
+ plane[2].eo);
}
@@ -412,29 +501,25 @@ do_triangle_ccw(struct lp_setup_context *setup,
* these planes elsewhere.
*/
if (nr_planes == 7) {
- tri->plane[3].dcdx = -1;
- tri->plane[3].dcdy = 0;
- tri->plane[3].c = 1-bbox.x0;
- tri->plane[3].ei = 0;
- tri->plane[3].eo = 1;
-
- tri->plane[4].dcdx = 1;
- tri->plane[4].dcdy = 0;
- tri->plane[4].c = bbox.x1+1;
- tri->plane[4].ei = -1;
- tri->plane[4].eo = 0;
-
- tri->plane[5].dcdx = 0;
- tri->plane[5].dcdy = 1;
- tri->plane[5].c = 1-bbox.y0;
- tri->plane[5].ei = 0;
- tri->plane[5].eo = 1;
-
- tri->plane[6].dcdx = 0;
- tri->plane[6].dcdy = -1;
- tri->plane[6].c = bbox.y1+1;
- tri->plane[6].ei = -1;
- tri->plane[6].eo = 0;
+ plane[3].dcdx = -1;
+ plane[3].dcdy = 0;
+ plane[3].c = 1-bbox.x0;
+ plane[3].eo = 1;
+
+ plane[4].dcdx = 1;
+ plane[4].dcdy = 0;
+ plane[4].c = bbox.x1+1;
+ plane[4].eo = 0;
+
+ plane[5].dcdx = 0;
+ plane[5].dcdy = 1;
+ plane[5].c = 1-bbox.y0;
+ plane[5].eo = 1;
+
+ plane[6].dcdx = 0;
+ plane[6].dcdy = -1;
+ plane[6].c = bbox.y1+1;
+ plane[6].eo = 0;
}
return lp_setup_bin_triangle( setup, tri, &bbox, nr_planes );
@@ -487,51 +572,58 @@ lp_setup_bin_triangle( struct lp_setup_context *setup,
int sz = floor_pot((bbox->x1 - (bbox->x0 & ~3)) |
(bbox->y1 - (bbox->y0 & ~3)));
- if (nr_planes == 3) {
- if (sz < 4 && dx < 64)
- {
- /* Triangle is contained in a single 4x4 stamp:
- */
- int mask = (bbox->x0 & 63 & ~3) | ((bbox->y0 & 63 & ~3) << 8);
-
- return lp_scene_bin_command( scene,
- bbox->x0/64, bbox->y0/64,
- LP_RAST_OP_TRIANGLE_3_4,
- lp_rast_arg_triangle(tri, mask) );
- }
-
- if (sz < 16 && dx < 64)
- {
- int mask = (bbox->x0 & 63 & ~3) | ((bbox->y0 & 63 & ~3) << 8);
-
- /* Triangle is contained in a single 16x16 block:
- */
- return lp_scene_bin_command( scene,
- bbox->x0/64, bbox->y0/64,
- LP_RAST_OP_TRIANGLE_3_16,
- lp_rast_arg_triangle(tri, mask) );
- }
- }
-
-
/* Determine which tile(s) intersect the triangle's bounding box
*/
if (dx < TILE_SIZE)
{
int ix0 = bbox->x0 / TILE_SIZE;
int iy0 = bbox->y0 / TILE_SIZE;
+ int px = bbox->x0 & 63 & ~3;
+ int py = bbox->y0 & 63 & ~3;
+ int mask = px | (py << 8);
assert(iy0 == bbox->y1 / TILE_SIZE &&
ix0 == bbox->x1 / TILE_SIZE);
+ if (nr_planes == 3) {
+ if (sz < 4)
+ {
+ /* Triangle is contained in a single 4x4 stamp:
+ */
+ return lp_scene_bin_cmd_with_state( scene, ix0, iy0,
+ setup->fs.stored,
+ LP_RAST_OP_TRIANGLE_3_4,
+ lp_rast_arg_triangle(tri, mask) );
+ }
+
+ if (sz < 16)
+ {
+ /* Triangle is contained in a single 16x16 block:
+ */
+ return lp_scene_bin_cmd_with_state( scene, ix0, iy0,
+ setup->fs.stored,
+ LP_RAST_OP_TRIANGLE_3_16,
+ lp_rast_arg_triangle(tri, mask) );
+ }
+ }
+ else if (nr_planes == 4 && sz < 16)
+ {
+ return lp_scene_bin_cmd_with_state(scene, ix0, iy0,
+ setup->fs.stored,
+ LP_RAST_OP_TRIANGLE_4_16,
+ lp_rast_arg_triangle(tri, mask) );
+ }
+
+
/* Triangle is contained in a single tile:
*/
- return lp_scene_bin_command( scene, ix0, iy0,
- lp_rast_tri_tab[nr_planes],
- lp_rast_arg_triangle(tri, (1<<nr_planes)-1) );
+ return lp_scene_bin_cmd_with_state( scene, ix0, iy0, setup->fs.stored,
+ lp_rast_tri_tab[nr_planes],
+ lp_rast_arg_triangle(tri, (1<<nr_planes)-1) );
}
else
{
+ struct lp_rast_plane *plane = GET_PLANES(tri);
int c[MAX_PLANES];
int ei[MAX_PLANES];
int eo[MAX_PLANES];
@@ -545,14 +637,17 @@ lp_setup_bin_triangle( struct lp_setup_context *setup,
int iy1 = bbox->y1 / TILE_SIZE;
for (i = 0; i < nr_planes; i++) {
- c[i] = (tri->plane[i].c +
- tri->plane[i].dcdy * iy0 * TILE_SIZE -
- tri->plane[i].dcdx * ix0 * TILE_SIZE);
-
- ei[i] = tri->plane[i].ei << TILE_ORDER;
- eo[i] = tri->plane[i].eo << TILE_ORDER;
- xstep[i] = -(tri->plane[i].dcdx << TILE_ORDER);
- ystep[i] = tri->plane[i].dcdy << TILE_ORDER;
+ c[i] = (plane[i].c +
+ plane[i].dcdy * iy0 * TILE_SIZE -
+ plane[i].dcdx * ix0 * TILE_SIZE);
+
+ ei[i] = (plane[i].dcdy -
+ plane[i].dcdx -
+ plane[i].eo) << TILE_ORDER;
+
+ eo[i] = plane[i].eo << TILE_ORDER;
+ xstep[i] = -(plane[i].dcdx << TILE_ORDER);
+ ystep[i] = plane[i].dcdy << TILE_ORDER;
}
@@ -594,9 +689,11 @@ lp_setup_bin_triangle( struct lp_setup_context *setup,
*/
int count = util_bitcount(partial);
in = TRUE;
- if (!lp_scene_bin_command( scene, x, y,
- lp_rast_tri_tab[count],
- lp_rast_arg_triangle(tri, partial) ))
+
+ if (!lp_scene_bin_cmd_with_state( scene, x, y,
+ setup->fs.stored,
+ lp_rast_tri_tab[count],
+ lp_rast_arg_triangle(tri, partial) ))
goto fail;
LP_COUNT(nr_partially_covered_64);
@@ -635,40 +732,62 @@ fail:
/**
- * Draw triangle if it's CW, cull otherwise.
+ * Try to draw the triangle, restart the scene on failure.
*/
-static void triangle_cw( struct lp_setup_context *setup,
- const float (*v0)[4],
- const float (*v1)[4],
- const float (*v2)[4] )
+static void retry_triangle_ccw( struct lp_setup_context *setup,
+ const float (*v0)[4],
+ const float (*v1)[4],
+ const float (*v2)[4],
+ boolean front)
{
- if (!do_triangle_ccw( setup, v1, v0, v2, !setup->ccw_is_frontface ))
+ if (!do_triangle_ccw( setup, v0, v1, v2, front ))
{
- lp_setup_flush_and_restart(setup);
+ if (!lp_setup_flush_and_restart(setup))
+ return;
- if (!do_triangle_ccw( setup, v1, v0, v2, !setup->ccw_is_frontface ))
- assert(0);
+ if (!do_triangle_ccw( setup, v0, v1, v2, front ))
+ return;
}
}
+static INLINE float
+calc_area(const float (*v0)[4],
+ const float (*v1)[4],
+ const float (*v2)[4])
+{
+ float dx01 = v0[0][0] - v1[0][0];
+ float dy01 = v0[0][1] - v1[0][1];
+ float dx20 = v2[0][0] - v0[0][0];
+ float dy20 = v2[0][1] - v0[0][1];
+ return dx01 * dy20 - dx20 * dy01;
+}
+
/**
- * Draw triangle if it's CCW, cull otherwise.
+ * Draw triangle if it's CW, cull otherwise.
*/
-static void triangle_ccw( struct lp_setup_context *setup,
+static void triangle_cw( struct lp_setup_context *setup,
const float (*v0)[4],
const float (*v1)[4],
const float (*v2)[4] )
{
- if (!do_triangle_ccw( setup, v0, v1, v2, setup->ccw_is_frontface ))
- {
- lp_setup_flush_and_restart(setup);
- if (!do_triangle_ccw( setup, v0, v1, v2, setup->ccw_is_frontface ))
- assert(0);
- }
+ float area = calc_area(v0, v1, v2);
+
+ if (area < 0.0f)
+ retry_triangle_ccw(setup, v0, v2, v1, !setup->ccw_is_frontface);
}
+static void triangle_ccw( struct lp_setup_context *setup,
+ const float (*v0)[4],
+ const float (*v1)[4],
+ const float (*v2)[4])
+{
+ float area = calc_area(v0, v1, v2);
+
+ if (area > 0.0f)
+ retry_triangle_ccw(setup, v0, v1, v2, setup->ccw_is_frontface);
+}
/**
* Draw triangle whether it's CW or CCW.
@@ -678,18 +797,12 @@ static void triangle_both( struct lp_setup_context *setup,
const float (*v1)[4],
const float (*v2)[4] )
{
- /* edge vectors e = v0 - v2, f = v1 - v2 */
- const float ex = v0[0][0] - v2[0][0];
- const float ey = v0[0][1] - v2[0][1];
- const float fx = v1[0][0] - v2[0][0];
- const float fy = v1[0][1] - v2[0][1];
-
- /* det = cross(e,f).z */
- const float det = ex * fy - ey * fx;
- if (det < 0.0f)
- triangle_ccw( setup, v0, v1, v2 );
- else if (det > 0.0f)
- triangle_cw( setup, v0, v1, v2 );
+ float area = calc_area(v0, v1, v2);
+
+ if (area > 0.0f)
+ retry_triangle_ccw( setup, v0, v1, v2, setup->ccw_is_frontface );
+ else if (area < 0.0f)
+ retry_triangle_ccw( setup, v0, v2, v1, !setup->ccw_is_frontface );
}
diff --git a/src/gallium/drivers/llvmpipe/lp_setup_vbuf.c b/src/gallium/drivers/llvmpipe/lp_setup_vbuf.c
index 6308561f242..9c1f0fe7939 100644
--- a/src/gallium/drivers/llvmpipe/lp_setup_vbuf.c
+++ b/src/gallium/drivers/llvmpipe/lp_setup_vbuf.c
@@ -141,7 +141,8 @@ lp_setup_draw_elements(struct vbuf_render *vbr, const ushort *indices, uint nr)
const boolean flatshade_first = setup->flatshade_first;
unsigned i;
- lp_setup_update_state(setup, TRUE);
+ if (!lp_setup_update_state(setup, TRUE))
+ return;
switch (setup->prim) {
case PIPE_PRIM_POINTS:
@@ -338,7 +339,8 @@ lp_setup_draw_arrays(struct vbuf_render *vbr, uint start, uint nr)
const boolean flatshade_first = setup->flatshade_first;
unsigned i;
- lp_setup_update_state(setup, TRUE);
+ if (!lp_setup_update_state(setup, TRUE))
+ return;
switch (setup->prim) {
case PIPE_PRIM_POINTS:
diff --git a/src/gallium/drivers/llvmpipe/lp_state.h b/src/gallium/drivers/llvmpipe/lp_state.h
index 86313e1c484..7893e9cdc0c 100644
--- a/src/gallium/drivers/llvmpipe/lp_state.h
+++ b/src/gallium/drivers/llvmpipe/lp_state.h
@@ -97,6 +97,9 @@ llvmpipe_set_framebuffer_state(struct pipe_context *,
void
llvmpipe_update_fs(struct llvmpipe_context *lp);
+void
+llvmpipe_update_setup(struct llvmpipe_context *lp);
+
void
llvmpipe_update_derived(struct llvmpipe_context *llvmpipe);
diff --git a/src/gallium/drivers/llvmpipe/lp_state_derived.c b/src/gallium/drivers/llvmpipe/lp_state_derived.c
index bb059d04599..0f5f7369e04 100644
--- a/src/gallium/drivers/llvmpipe/lp_state_derived.c
+++ b/src/gallium/drivers/llvmpipe/lp_state_derived.c
@@ -50,12 +50,13 @@ compute_vertex_info(struct llvmpipe_context *llvmpipe)
{
const struct lp_fragment_shader *lpfs = llvmpipe->fs;
struct vertex_info *vinfo = &llvmpipe->vertex_info;
- struct lp_shader_input *inputs = llvmpipe->inputs;
unsigned vs_index;
uint i;
/*
- * Match FS inputs against VS outputs, emitting the necessary attributes.
+ * Match FS inputs against VS outputs, emitting the necessary
+ * attributes. Could cache these structs and look them up with a
+ * combination of fragment shader, vertex shader ids.
*/
vinfo->num_attribs = 0;
@@ -66,72 +67,18 @@ compute_vertex_info(struct llvmpipe_context *llvmpipe)
draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_PERSPECTIVE, vs_index);
- for (i = 0; i < lpfs->info.num_inputs; i++) {
+ for (i = 0; i < lpfs->info.base.num_inputs; i++) {
/*
* Search for each input in current vs output:
*/
vs_index = draw_find_shader_output(llvmpipe->draw,
- lpfs->info.input_semantic_name[i],
- lpfs->info.input_semantic_index[i]);
- if (vs_index < 0) {
- /*
- * This can happen with sprite coordinates - the vertex
- * shader doesn't need to provide an output as we generate
- * them internally. However, lets keep pretending that there
- * is something there to not confuse other code.
- */
- vs_index = 0;
- }
-
- /* This can be pre-computed, except for flatshade:
- */
- inputs[i].usage_mask = lpfs->info.input_usage_mask[i];
-
- switch (lpfs->info.input_interpolate[i]) {
- case TGSI_INTERPOLATE_CONSTANT:
- inputs[i].interp = LP_INTERP_CONSTANT;
- break;
- case TGSI_INTERPOLATE_LINEAR:
- inputs[i].interp = LP_INTERP_LINEAR;
- break;
- case TGSI_INTERPOLATE_PERSPECTIVE:
- inputs[i].interp = LP_INTERP_PERSPECTIVE;
- break;
- default:
- assert(0);
- break;
- }
-
- switch (lpfs->info.input_semantic_name[i]) {
- case TGSI_SEMANTIC_FACE:
- inputs[i].interp = LP_INTERP_FACING;
- break;
- case TGSI_SEMANTIC_POSITION:
- /* Position was already emitted above
- */
- inputs[i].interp = LP_INTERP_POSITION;
- inputs[i].src_index = 0;
- continue;
- case TGSI_SEMANTIC_COLOR:
- /* Colors are linearly inputs[i].interpolated in the fragment shader
- * even when flatshading is active. This just tells the
- * setup module to use coefficients with ddx==0 and
- * ddy==0.
- */
- if (llvmpipe->rasterizer->flatshade)
- inputs[i].interp = LP_INTERP_CONSTANT;
- break;
-
- default:
- break;
- }
+ lpfs->info.base.input_semantic_name[i],
+ lpfs->info.base.input_semantic_index[i]);
/*
* Emit the requested fs attribute for all but position.
*/
-
- inputs[i].src_index = vinfo->num_attribs;
draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_PERSPECTIVE, vs_index);
}
@@ -145,15 +92,8 @@ compute_vertex_info(struct llvmpipe_context *llvmpipe)
draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_CONSTANT, vs_index);
}
- llvmpipe->num_inputs = lpfs->info.num_inputs;
-
draw_compute_vertex_size(vinfo);
-
lp_setup_set_vertex_info(llvmpipe->setup, vinfo);
-
- lp_setup_set_fs_inputs(llvmpipe->setup,
- inputs,
- lpfs->info.num_inputs);
}
@@ -190,6 +130,10 @@ void llvmpipe_update_derived( struct llvmpipe_context *llvmpipe )
LP_NEW_QUERY))
llvmpipe_update_fs( llvmpipe );
+ if (llvmpipe->dirty & (LP_NEW_FS |
+ LP_NEW_RASTERIZER))
+ llvmpipe_update_setup( llvmpipe );
+
if (llvmpipe->dirty & LP_NEW_BLEND_COLOR)
lp_setup_set_blend_color(llvmpipe->setup,
&llvmpipe->blend_color);
diff --git a/src/gallium/drivers/llvmpipe/lp_state_fs.c b/src/gallium/drivers/llvmpipe/lp_state_fs.c
index f0a15e11b9b..9fbedac165f 100644
--- a/src/gallium/drivers/llvmpipe/lp_state_fs.c
+++ b/src/gallium/drivers/llvmpipe/lp_state_fs.c
@@ -99,74 +99,12 @@
#include <llvm-c/Analysis.h>
+#include <llvm-c/BitWriter.h>
static unsigned fs_no = 0;
-/**
- * Generate the depth /stencil test code.
- */
-static void
-generate_depth_stencil(LLVMBuilderRef builder,
- const struct lp_fragment_shader_variant_key *key,
- struct lp_type src_type,
- struct lp_build_mask_context *mask,
- LLVMValueRef stencil_refs[2],
- LLVMValueRef src,
- LLVMValueRef dst_ptr,
- LLVMValueRef facing,
- LLVMValueRef counter)
-{
- const struct util_format_description *format_desc;
- struct lp_type dst_type;
-
- if (!key->depth.enabled && !key->stencil[0].enabled && !key->stencil[1].enabled)
- return;
-
- format_desc = util_format_description(key->zsbuf_format);
- assert(format_desc);
-
- /*
- * Depths are expected to be between 0 and 1, even if they are stored in
- * floats. Setting these bits here will ensure that the lp_build_conv() call
- * below won't try to unnecessarily clamp the incoming values.
- */
- if(src_type.floating) {
- src_type.sign = FALSE;
- src_type.norm = TRUE;
- }
- else {
- assert(!src_type.sign);
- assert(src_type.norm);
- }
-
- /* Pick the depth type. */
- dst_type = lp_depth_type(format_desc, src_type.width*src_type.length);
-
- /* FIXME: Cope with a depth test type with a different bit width. */
- assert(dst_type.width == src_type.width);
- assert(dst_type.length == src_type.length);
-
- /* Convert fragment Z from float to integer */
- lp_build_conv(builder, src_type, dst_type, &src, 1, &src, 1);
-
- dst_ptr = LLVMBuildBitCast(builder,
- dst_ptr,
- LLVMPointerType(lp_build_vec_type(dst_type), 0), "");
- lp_build_depth_stencil_test(builder,
- &key->depth,
- key->stencil,
- dst_type,
- format_desc,
- mask,
- stencil_refs,
- src,
- dst_ptr,
- facing,
- counter);
-}
-
/**
* Expand the relevent bits of mask_input to a 4-dword mask for the
@@ -248,6 +186,26 @@ generate_quad_mask(LLVMBuilderRef builder,
}
+#define EARLY_DEPTH_TEST 0x1
+#define LATE_DEPTH_TEST 0x2
+#define EARLY_DEPTH_WRITE 0x4
+#define LATE_DEPTH_WRITE 0x8
+
+static int
+find_output_by_semantic( const struct tgsi_shader_info *info,
+ unsigned semantic,
+ unsigned index )
+{
+ int i;
+
+ for (i = 0; i < info->num_outputs; i++)
+ if (info->output_semantic_name[i] == semantic &&
+ info->output_semantic_index[i] == index)
+ return i;
+
+ return -1;
+}
+
/**
* Generate the fragment shader, depth/stencil test, and alpha tests.
@@ -255,14 +213,13 @@ generate_quad_mask(LLVMBuilderRef builder,
* \param partial_mask if 1, do mask_input testing
*/
static void
-generate_fs(struct llvmpipe_context *lp,
- struct lp_fragment_shader *shader,
+generate_fs(struct lp_fragment_shader *shader,
const struct lp_fragment_shader_variant_key *key,
LLVMBuilderRef builder,
struct lp_type type,
LLVMValueRef context_ptr,
unsigned i,
- const struct lp_build_interp_soa_context *interp,
+ struct lp_build_interp_soa_context *interp,
struct lp_build_sampler_soa *sampler,
LLVMValueRef *pmask,
LLVMValueRef (*color)[4],
@@ -272,18 +229,52 @@ generate_fs(struct llvmpipe_context *lp,
LLVMValueRef mask_input,
LLVMValueRef counter)
{
+ const struct util_format_description *zs_format_desc = NULL;
const struct tgsi_token *tokens = shader->base.tokens;
LLVMTypeRef vec_type;
LLVMValueRef consts_ptr;
LLVMValueRef outputs[PIPE_MAX_SHADER_OUTPUTS][NUM_CHANNELS];
- LLVMValueRef z = interp->pos[2];
+ LLVMValueRef z;
+ LLVMValueRef zs_value = NULL;
LLVMValueRef stencil_refs[2];
- struct lp_build_flow_context *flow;
struct lp_build_mask_context mask;
- boolean early_depth_stencil_test;
+ boolean simple_shader = (shader->info.base.file_count[TGSI_FILE_SAMPLER] == 0 &&
+ shader->info.base.num_inputs < 3 &&
+ shader->info.base.num_instructions < 8);
unsigned attrib;
unsigned chan;
unsigned cbuf;
+ unsigned depth_mode;
+
+ if (key->depth.enabled ||
+ key->stencil[0].enabled ||
+ key->stencil[1].enabled) {
+
+ zs_format_desc = util_format_description(key->zsbuf_format);
+ assert(zs_format_desc);
+
+ if (!shader->info.base.writes_z) {
+ if (key->alpha.enabled || shader->info.base.uses_kill)
+ /* With alpha test and kill, can do the depth test early
+ * and hopefully eliminate some quads. But need to do a
+ * special deferred depth write once the final mask value
+ * is known.
+ */
+ depth_mode = EARLY_DEPTH_TEST | LATE_DEPTH_WRITE;
+ else
+ depth_mode = EARLY_DEPTH_TEST | EARLY_DEPTH_WRITE;
+ }
+ else {
+ depth_mode = LATE_DEPTH_TEST | LATE_DEPTH_WRITE;
+ }
+
+ if (!(key->depth.enabled && key->depth.writemask) &&
+ !(key->stencil[0].enabled && key->stencil[0].writemask))
+ depth_mode &= ~(LATE_DEPTH_WRITE | EARLY_DEPTH_WRITE);
+ }
+ else {
+ depth_mode = 0;
+ }
assert(i < 4);
@@ -294,20 +285,14 @@ generate_fs(struct llvmpipe_context *lp,
consts_ptr = lp_jit_context_constants(builder, context_ptr);
- flow = lp_build_flow_create(builder);
-
memset(outputs, 0, sizeof outputs);
- lp_build_flow_scope_begin(flow);
-
/* Declare the color and z variables */
for(cbuf = 0; cbuf < key->nr_cbufs; cbuf++) {
for(chan = 0; chan < NUM_CHANNELS; ++chan) {
- color[cbuf][chan] = LLVMGetUndef(vec_type);
- lp_build_flow_scope_declare(flow, &color[cbuf][chan]);
+ color[cbuf][chan] = lp_build_alloca(builder, vec_type, "color");
}
}
- lp_build_flow_scope_declare(flow, &z);
/* do triangle edge testing */
if (partial_mask) {
@@ -319,74 +304,126 @@ generate_fs(struct llvmpipe_context *lp,
}
/* 'mask' will control execution based on quad's pixel alive/killed state */
- lp_build_mask_begin(&mask, flow, type, *pmask);
-
- early_depth_stencil_test =
- (key->depth.enabled || key->stencil[0].enabled) &&
- !key->alpha.enabled &&
- !shader->info.uses_kill &&
- !shader->info.writes_z;
-
- if (early_depth_stencil_test)
- generate_depth_stencil(builder, key,
- type, &mask,
- stencil_refs, z, depth_ptr, facing, counter);
+ lp_build_mask_begin(&mask, builder, type, *pmask);
+
+ if (!(depth_mode & EARLY_DEPTH_TEST) && !simple_shader)
+ lp_build_mask_check(&mask);
+
+ lp_build_interp_soa_update_pos(interp, i);
+ z = interp->pos[2];
+
+ if (depth_mode & EARLY_DEPTH_TEST) {
+ lp_build_depth_stencil_test(builder,
+ &key->depth,
+ key->stencil,
+ type,
+ zs_format_desc,
+ &mask,
+ stencil_refs,
+ z,
+ depth_ptr, facing,
+ &zs_value,
+ !simple_shader);
+
+ if (depth_mode & EARLY_DEPTH_WRITE) {
+ lp_build_depth_write(builder, zs_format_desc, depth_ptr, zs_value);
+ }
+ }
+ lp_build_interp_soa_update_inputs(interp, i);
+
+ /* Build the actual shader */
lp_build_tgsi_soa(builder, tokens, type, &mask,
consts_ptr, interp->pos, interp->inputs,
- outputs, sampler, &shader->info);
+ outputs, sampler, &shader->info.base);
- /* loop over fragment shader outputs/results */
- for (attrib = 0; attrib < shader->info.num_outputs; ++attrib) {
- for(chan = 0; chan < NUM_CHANNELS; ++chan) {
- if(outputs[attrib][chan]) {
- LLVMValueRef out = LLVMBuildLoad(builder, outputs[attrib][chan], "");
- lp_build_name(out, "output%u.%u.%c", i, attrib, "xyzw"[chan]);
-
- switch (shader->info.output_semantic_name[attrib]) {
- case TGSI_SEMANTIC_COLOR:
- {
- unsigned cbuf = shader->info.output_semantic_index[attrib];
-
- lp_build_name(out, "color%u.%u.%c", i, attrib, "rgba"[chan]);
-
- /* Alpha test */
- /* XXX: should only test the final assignment to alpha */
- if (cbuf == 0 && chan == 3 && key->alpha.enabled) {
- LLVMValueRef alpha = out;
- LLVMValueRef alpha_ref_value;
- alpha_ref_value = lp_jit_context_alpha_ref_value(builder, context_ptr);
- alpha_ref_value = lp_build_broadcast(builder, vec_type, alpha_ref_value);
- lp_build_alpha_test(builder, key->alpha.func, type,
- &mask, alpha, alpha_ref_value);
- }
-
- color[cbuf][chan] = out;
- break;
- }
-
- case TGSI_SEMANTIC_POSITION:
- if(chan == 2)
- z = out;
- break;
- }
- }
+
+ /* Alpha test */
+ if (key->alpha.enabled) {
+ int color0 = find_output_by_semantic(&shader->info.base,
+ TGSI_SEMANTIC_COLOR,
+ 0);
+
+ if (color0 != -1 && outputs[color0][3]) {
+ LLVMValueRef alpha = LLVMBuildLoad(builder, outputs[color0][3], "alpha");
+ LLVMValueRef alpha_ref_value;
+
+ alpha_ref_value = lp_jit_context_alpha_ref_value(builder, context_ptr);
+ alpha_ref_value = lp_build_broadcast(builder, vec_type, alpha_ref_value);
+
+ lp_build_alpha_test(builder, key->alpha.func, type,
+ &mask, alpha, alpha_ref_value,
+ (depth_mode & LATE_DEPTH_TEST) != 0);
}
}
- if (!early_depth_stencil_test)
- generate_depth_stencil(builder, key,
- type, &mask,
- stencil_refs, z, depth_ptr, facing, counter);
+ /* Late Z test */
+ if (depth_mode & LATE_DEPTH_TEST) {
+ int pos0 = find_output_by_semantic(&shader->info.base,
+ TGSI_SEMANTIC_POSITION,
+ 0);
+
+ if (pos0 != -1 && outputs[pos0][2]) {
+ z = LLVMBuildLoad(builder, outputs[pos0][2], "output.z");
+ }
- lp_build_mask_end(&mask);
+ lp_build_depth_stencil_test(builder,
+ &key->depth,
+ key->stencil,
+ type,
+ zs_format_desc,
+ &mask,
+ stencil_refs,
+ z,
+ depth_ptr, facing,
+ &zs_value,
+ !simple_shader);
+ /* Late Z write */
+ if (depth_mode & LATE_DEPTH_WRITE) {
+ lp_build_depth_write(builder, zs_format_desc, depth_ptr, zs_value);
+ }
+ }
+ else if ((depth_mode & EARLY_DEPTH_TEST) &&
+ (depth_mode & LATE_DEPTH_WRITE))
+ {
+ /* Need to apply a reduced mask to the depth write. Reload the
+ * depth value, update from zs_value with the new mask value and
+ * write that out.
+ */
+ lp_build_deferred_depth_write(builder,
+ type,
+ zs_format_desc,
+ &mask,
+ depth_ptr,
+ zs_value);
+ }
- lp_build_flow_scope_end(flow);
- lp_build_flow_destroy(flow);
+ /* Color write */
+ for (attrib = 0; attrib < shader->info.base.num_outputs; ++attrib)
+ {
+ if (shader->info.base.output_semantic_name[attrib] == TGSI_SEMANTIC_COLOR &&
+ shader->info.base.output_semantic_index[attrib] < key->nr_cbufs)
+ {
+ unsigned cbuf = shader->info.base.output_semantic_index[attrib];
+ for(chan = 0; chan < NUM_CHANNELS; ++chan) {
+ if(outputs[attrib][chan]) {
+ /* XXX: just initialize outputs to point at colors[] and
+ * skip this.
+ */
+ LLVMValueRef out = LLVMBuildLoad(builder, outputs[attrib][chan], "");
+ lp_build_name(out, "color%u.%u.%c", i, attrib, "rgba"[chan]);
+ LLVMBuildStore(builder, out, color[cbuf][chan]);
+ }
+ }
+ }
+ }
- *pmask = mask.value;
+ if (counter)
+ lp_build_occlusion_count(builder, type,
+ lp_build_mask_value(&mask), counter);
+ *pmask = lp_build_mask_end(&mask);
}
@@ -407,10 +444,10 @@ generate_blend(const struct pipe_blend_state *blend,
LLVMValueRef context_ptr,
LLVMValueRef mask,
LLVMValueRef *src,
- LLVMValueRef dst_ptr)
+ LLVMValueRef dst_ptr,
+ boolean do_branch)
{
struct lp_build_context bld;
- struct lp_build_flow_context *flow;
struct lp_build_mask_context mask_ctx;
LLVMTypeRef vec_type;
LLVMValueRef const_ptr;
@@ -421,10 +458,9 @@ generate_blend(const struct pipe_blend_state *blend,
lp_build_context_init(&bld, builder, type);
- flow = lp_build_flow_create(builder);
-
- /* we'll use this mask context to skip blending if all pixels are dead */
- lp_build_mask_begin(&mask_ctx, flow, type, mask);
+ lp_build_mask_begin(&mask_ctx, builder, type, mask);
+ if (do_branch)
+ lp_build_mask_check(&mask_ctx);
vec_type = lp_build_vec_type(type);
@@ -457,7 +493,6 @@ generate_blend(const struct pipe_blend_state *blend,
}
lp_build_mask_end(&mask_ctx);
- lp_build_flow_destroy(flow);
}
@@ -468,13 +503,13 @@ generate_blend(const struct pipe_blend_state *blend,
* 2x2 pixels.
*/
static void
-generate_fragment(struct llvmpipe_context *lp,
+generate_fragment(struct llvmpipe_screen *screen,
struct lp_fragment_shader *shader,
struct lp_fragment_shader_variant *variant,
unsigned partial_mask)
{
- struct llvmpipe_screen *screen = llvmpipe_screen(lp->pipe.screen);
const struct lp_fragment_shader_variant_key *key = &variant->key;
+ struct lp_shader_input inputs[PIPE_MAX_SHADER_INPUTS];
char func_name[256];
struct lp_type fs_type;
struct lp_type blend_type;
@@ -502,11 +537,24 @@ generate_fragment(struct llvmpipe_context *lp,
LLVMValueRef blend_mask;
LLVMValueRef function;
LLVMValueRef facing;
+ const struct util_format_description *zs_format_desc;
unsigned num_fs;
unsigned i;
unsigned chan;
unsigned cbuf;
+ /* Adjust color input interpolation according to flatshade state:
+ */
+ memcpy(inputs, shader->inputs, shader->info.base.num_inputs * sizeof inputs[0]);
+ for (i = 0; i < shader->info.base.num_inputs; i++) {
+ if (inputs[i].interp == LP_INTERP_COLOR) {
+ if (key->flatshade)
+ inputs[i].interp = LP_INTERP_CONSTANT;
+ else
+ inputs[i].interp = LP_INTERP_LINEAR;
+ }
+ }
+
/* TODO: actually pick these based on the fs and color buffer
* characteristics. */
@@ -542,12 +590,12 @@ generate_fragment(struct llvmpipe_context *lp,
arg_types[0] = screen->context_ptr_type; /* context */
arg_types[1] = LLVMInt32Type(); /* x */
arg_types[2] = LLVMInt32Type(); /* y */
- arg_types[3] = LLVMFloatType(); /* facing */
+ arg_types[3] = LLVMInt32Type(); /* facing */
arg_types[4] = LLVMPointerType(fs_elem_type, 0); /* a0 */
arg_types[5] = LLVMPointerType(fs_elem_type, 0); /* dadx */
arg_types[6] = LLVMPointerType(fs_elem_type, 0); /* dady */
arg_types[7] = LLVMPointerType(LLVMPointerType(blend_vec_type, 0), 0); /* color */
- arg_types[8] = LLVMPointerType(fs_int_vec_type, 0); /* depth */
+ arg_types[8] = LLVMPointerType(LLVMInt8Type(), 0); /* depth */
arg_types[9] = LLVMInt32Type(); /* mask_input */
arg_types[10] = LLVMPointerType(LLVMInt32Type(), 0);/* counter */
@@ -558,7 +606,6 @@ generate_fragment(struct llvmpipe_context *lp,
variant->function[partial_mask] = function;
-
/* XXX: need to propagate noalias down into color param now we are
* passing a pointer-to-pointer?
*/
@@ -606,8 +653,8 @@ generate_fragment(struct llvmpipe_context *lp,
* already included in the shader key.
*/
lp_build_interp_soa_init(&interp,
- lp->num_inputs,
- lp->inputs,
+ shader->info.base.num_inputs,
+ inputs,
builder, fs_type,
a0_ptr, dadx_ptr, dady_ptr,
x, y);
@@ -616,17 +663,18 @@ generate_fragment(struct llvmpipe_context *lp,
sampler = lp_llvm_sampler_soa_create(key->sampler, context_ptr);
/* loop over quads in the block */
+ zs_format_desc = util_format_description(key->zsbuf_format);
+
for(i = 0; i < num_fs; ++i) {
- LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0);
+ LLVMValueRef depth_offset = LLVMConstInt(LLVMInt32Type(),
+ i*fs_type.length*zs_format_desc->block.bits/8,
+ 0);
LLVMValueRef out_color[PIPE_MAX_COLOR_BUFS][NUM_CHANNELS];
LLVMValueRef depth_ptr_i;
- if(i != 0)
- lp_build_interp_soa_update(&interp, i);
+ depth_ptr_i = LLVMBuildGEP(builder, depth_ptr, &depth_offset, 1, "");
- depth_ptr_i = LLVMBuildGEP(builder, depth_ptr, &index, 1, "");
-
- generate_fs(lp, shader, key,
+ generate_fs(shader, key,
builder,
fs_type,
context_ptr,
@@ -660,9 +708,18 @@ generate_fragment(struct llvmpipe_context *lp,
* Convert the fs's output color and mask to fit to the blending type.
*/
for(chan = 0; chan < NUM_CHANNELS; ++chan) {
+ LLVMValueRef fs_color_vals[LP_MAX_VECTOR_LENGTH];
+
+ for (i = 0; i < num_fs; i++) {
+ fs_color_vals[i] =
+ LLVMBuildLoad(builder, fs_out_color[cbuf][chan][i], "fs_color_vals");
+ }
+
lp_build_conv(builder, fs_type, blend_type,
- fs_out_color[cbuf][chan], num_fs,
+ fs_color_vals,
+ num_fs,
&blend_in_color[chan], 1);
+
lp_build_name(blend_in_color[chan], "color%d.%c", cbuf, "rgba"[chan]);
}
@@ -685,14 +742,23 @@ generate_fragment(struct llvmpipe_context *lp,
/*
* Blending.
*/
- generate_blend(&key->blend,
- rt,
- builder,
- blend_type,
- context_ptr,
- blend_mask,
- blend_in_color,
- color_ptr);
+ {
+ /* Could the 4x4 have been killed?
+ */
+ boolean do_branch = ((key->depth.enabled || key->stencil[0].enabled) &&
+ !key->alpha.enabled &&
+ !shader->info.base.uses_kill);
+
+ generate_blend(&key->blend,
+ rt,
+ builder,
+ blend_type,
+ context_ptr,
+ blend_mask,
+ blend_in_color,
+ color_ptr,
+ do_branch);
+ }
}
#ifdef PIPE_ARCH_X86
@@ -717,12 +783,17 @@ generate_fragment(struct llvmpipe_context *lp,
/* Apply optimizations to LLVM IR */
LLVMRunFunctionPassManager(screen->pass, function);
- if (gallivm_debug & GALLIVM_DEBUG_IR) {
+ if ((gallivm_debug & GALLIVM_DEBUG_IR) || (LP_DEBUG & DEBUG_FS)) {
/* Print the LLVM IR to stderr */
lp_debug_dump_value(function);
debug_printf("\n");
}
+ /* Dump byte code to a file */
+ if (0) {
+ LLVMWriteBitcodeToFile(lp_build_module, "llvmpipe.bc");
+ }
+
/*
* Translate the LLVM IR into machine code.
*/
@@ -731,7 +802,7 @@ generate_fragment(struct llvmpipe_context *lp,
variant->jit_function[partial_mask] = (lp_jit_frag_func)pointer_to_func(f);
- if (gallivm_debug & GALLIVM_DEBUG_ASM) {
+ if ((gallivm_debug & GALLIVM_DEBUG_ASM) || (LP_DEBUG & DEBUG_FS)) {
lp_disassemble(f);
}
lp_func_delete_body(function);
@@ -746,6 +817,9 @@ dump_fs_variant_key(const struct lp_fragment_shader_variant_key *key)
debug_printf("fs variant %p:\n", (void *) key);
+ if (key->flatshade) {
+ debug_printf("flatshade = 1\n");
+ }
for (i = 0; i < key->nr_cbufs; ++i) {
debug_printf("cbuf_format[%u] = %s\n", i, util_format_name(key->cbuf_format[i]));
}
@@ -770,6 +844,10 @@ dump_fs_variant_key(const struct lp_fragment_shader_variant_key *key)
debug_printf("alpha.func = %s\n", util_dump_func(key->alpha.func, TRUE));
}
+ if (key->occlusion_count) {
+ debug_printf("occlusion_count = 1\n");
+ }
+
if (key->blend.logicop_enable) {
debug_printf("blend.logicop_func = %s\n", util_dump_logicop(key->blend.logicop_func, TRUE));
}
@@ -782,31 +860,33 @@ dump_fs_variant_key(const struct lp_fragment_shader_variant_key *key)
debug_printf("blend.alpha_dst_factor = %s\n", util_dump_blend_factor(key->blend.rt[0].alpha_dst_factor, TRUE));
}
debug_printf("blend.colormask = 0x%x\n", key->blend.rt[0].colormask);
- for (i = 0; i < PIPE_MAX_SAMPLERS; ++i) {
- if (key->sampler[i].format) {
- debug_printf("sampler[%u] = \n", i);
- debug_printf(" .format = %s\n",
- util_format_name(key->sampler[i].format));
- debug_printf(" .target = %s\n",
- util_dump_tex_target(key->sampler[i].target, TRUE));
- debug_printf(" .pot = %u %u %u\n",
- key->sampler[i].pot_width,
- key->sampler[i].pot_height,
- key->sampler[i].pot_depth);
- debug_printf(" .wrap = %s %s %s\n",
- util_dump_tex_wrap(key->sampler[i].wrap_s, TRUE),
- util_dump_tex_wrap(key->sampler[i].wrap_t, TRUE),
- util_dump_tex_wrap(key->sampler[i].wrap_r, TRUE));
- debug_printf(" .min_img_filter = %s\n",
- util_dump_tex_filter(key->sampler[i].min_img_filter, TRUE));
- debug_printf(" .min_mip_filter = %s\n",
- util_dump_tex_mipfilter(key->sampler[i].min_mip_filter, TRUE));
- debug_printf(" .mag_img_filter = %s\n",
- util_dump_tex_filter(key->sampler[i].mag_img_filter, TRUE));
- if (key->sampler[i].compare_mode != PIPE_TEX_COMPARE_NONE)
- debug_printf(" .compare_func = %s\n", util_dump_func(key->sampler[i].compare_func, TRUE));
- debug_printf(" .normalized_coords = %u\n", key->sampler[i].normalized_coords);
- }
+ for (i = 0; i < key->nr_samplers; ++i) {
+ debug_printf("sampler[%u] = \n", i);
+ debug_printf(" .format = %s\n",
+ util_format_name(key->sampler[i].format));
+ debug_printf(" .target = %s\n",
+ util_dump_tex_target(key->sampler[i].target, TRUE));
+ debug_printf(" .pot = %u %u %u\n",
+ key->sampler[i].pot_width,
+ key->sampler[i].pot_height,
+ key->sampler[i].pot_depth);
+ debug_printf(" .wrap = %s %s %s\n",
+ util_dump_tex_wrap(key->sampler[i].wrap_s, TRUE),
+ util_dump_tex_wrap(key->sampler[i].wrap_t, TRUE),
+ util_dump_tex_wrap(key->sampler[i].wrap_r, TRUE));
+ debug_printf(" .min_img_filter = %s\n",
+ util_dump_tex_filter(key->sampler[i].min_img_filter, TRUE));
+ debug_printf(" .min_mip_filter = %s\n",
+ util_dump_tex_mipfilter(key->sampler[i].min_mip_filter, TRUE));
+ debug_printf(" .mag_img_filter = %s\n",
+ util_dump_tex_filter(key->sampler[i].mag_img_filter, TRUE));
+ if (key->sampler[i].compare_mode != PIPE_TEX_COMPARE_NONE)
+ debug_printf(" .compare_func = %s\n", util_dump_func(key->sampler[i].compare_func, TRUE));
+ debug_printf(" .normalized_coords = %u\n", key->sampler[i].normalized_coords);
+ debug_printf(" .min_max_lod_equal = %u\n", key->sampler[i].min_max_lod_equal);
+ debug_printf(" .lod_bias_non_zero = %u\n", key->sampler[i].lod_bias_non_zero);
+ debug_printf(" .apply_min_lod = %u\n", key->sampler[i].apply_min_lod);
+ debug_printf(" .apply_max_lod = %u\n", key->sampler[i].apply_max_lod);
}
}
@@ -823,7 +903,7 @@ lp_debug_fs_variant(const struct lp_fragment_shader_variant *variant)
}
static struct lp_fragment_shader_variant *
-generate_variant(struct llvmpipe_context *lp,
+generate_variant(struct llvmpipe_screen *screen,
struct lp_fragment_shader *shader,
const struct lp_fragment_shader_variant_key *key)
{
@@ -861,7 +941,7 @@ generate_variant(struct llvmpipe_context *lp,
!key->stencil[0].enabled &&
!key->alpha.enabled &&
!key->depth.enabled &&
- !shader->info.uses_kill
+ !shader->info.base.uses_kill
? TRUE : FALSE;
@@ -869,11 +949,11 @@ generate_variant(struct llvmpipe_context *lp,
lp_debug_fs_variant(variant);
}
- generate_fragment(lp, shader, variant, RAST_EDGE_TEST);
+ generate_fragment(screen, shader, variant, RAST_EDGE_TEST);
if (variant->opaque) {
/* Specialized shader, which doesn't need to read the color buffer. */
- generate_fragment(lp, shader, variant, RAST_WHOLE);
+ generate_fragment(screen, shader, variant, RAST_WHOLE);
} else {
variant->jit_function[RAST_WHOLE] = variant->jit_function[RAST_EDGE_TEST];
}
@@ -889,6 +969,7 @@ llvmpipe_create_fs_state(struct pipe_context *pipe,
struct llvmpipe_context *llvmpipe = llvmpipe_context(pipe);
struct lp_fragment_shader *shader;
int nr_samplers;
+ int i;
shader = CALLOC_STRUCT(lp_fragment_shader);
if (!shader)
@@ -898,7 +979,7 @@ llvmpipe_create_fs_state(struct pipe_context *pipe,
make_empty_list(&shader->variants);
/* get/save the summary info for this shader */
- tgsi_scan_shader(templ->tokens, &shader->info);
+ lp_build_tgsi_info(templ->tokens, &shader->info);
/* we need to keep a local copy of the tokens */
shader->base.tokens = tgsi_dup_tokens(templ->tokens);
@@ -910,18 +991,58 @@ llvmpipe_create_fs_state(struct pipe_context *pipe,
return NULL;
}
- nr_samplers = shader->info.file_max[TGSI_FILE_SAMPLER] + 1;
+ nr_samplers = shader->info.base.file_max[TGSI_FILE_SAMPLER] + 1;
shader->variant_key_size = Offset(struct lp_fragment_shader_variant_key,
sampler[nr_samplers]);
+ for (i = 0; i < shader->info.base.num_inputs; i++) {
+ shader->inputs[i].usage_mask = shader->info.base.input_usage_mask[i];
+
+ switch (shader->info.base.input_interpolate[i]) {
+ case TGSI_INTERPOLATE_CONSTANT:
+ shader->inputs[i].interp = LP_INTERP_CONSTANT;
+ break;
+ case TGSI_INTERPOLATE_LINEAR:
+ shader->inputs[i].interp = LP_INTERP_LINEAR;
+ break;
+ case TGSI_INTERPOLATE_PERSPECTIVE:
+ shader->inputs[i].interp = LP_INTERP_PERSPECTIVE;
+ break;
+ default:
+ assert(0);
+ break;
+ }
+
+ switch (shader->info.base.input_semantic_name[i]) {
+ case TGSI_SEMANTIC_COLOR:
+ /* Colors may be either linearly or constant interpolated in
+ * the fragment shader, but that information isn't available
+ * here. Mark color inputs and fix them up later.
+ */
+ shader->inputs[i].interp = LP_INTERP_COLOR;
+ break;
+ case TGSI_SEMANTIC_FACE:
+ shader->inputs[i].interp = LP_INTERP_FACING;
+ break;
+ case TGSI_SEMANTIC_POSITION:
+ /* Position was already emitted above
+ */
+ shader->inputs[i].interp = LP_INTERP_POSITION;
+ shader->inputs[i].src_index = 0;
+ continue;
+ }
+
+ shader->inputs[i].src_index = i+1;
+ }
+
if (LP_DEBUG & DEBUG_TGSI) {
unsigned attrib;
debug_printf("llvmpipe: Create fragment shader #%u %p:\n", shader->no, (void *) shader);
tgsi_dump(templ->tokens, 0);
debug_printf("usage masks:\n");
- for (attrib = 0; attrib < shader->info.num_inputs; ++attrib) {
- unsigned usage_mask = shader->info.input_usage_mask[attrib];
+ for (attrib = 0; attrib < shader->info.base.num_inputs; ++attrib) {
+ unsigned usage_mask = shader->info.base.input_usage_mask[attrib];
debug_printf(" IN[%u].%s%s%s%s\n",
attrib,
usage_mask & TGSI_WRITEMASK_X ? "x" : "",
@@ -1150,10 +1271,10 @@ make_variant_key(struct llvmpipe_context *lp,
/* This value will be the same for all the variants of a given shader:
*/
- key->nr_samplers = shader->info.file_max[TGSI_FILE_SAMPLER] + 1;
+ key->nr_samplers = shader->info.base.file_max[TGSI_FILE_SAMPLER] + 1;
for(i = 0; i < key->nr_samplers; ++i) {
- if(shader->info.file_mask[TGSI_FILE_SAMPLER] & (1 << i)) {
+ if(shader->info.base.file_mask[TGSI_FILE_SAMPLER] & (1 << i)) {
lp_sampler_static_state(&key->sampler[i],
lp->fragment_sampler_views[i],
lp->sampler[i]);
@@ -1168,6 +1289,7 @@ make_variant_key(struct llvmpipe_context *lp,
void
llvmpipe_update_fs(struct llvmpipe_context *lp)
{
+ struct llvmpipe_screen *screen = llvmpipe_screen(lp->pipe.screen);
struct lp_fragment_shader *shader = lp->fs;
struct lp_fragment_shader_variant_key key;
struct lp_fragment_shader_variant *variant = NULL;
@@ -1208,7 +1330,7 @@ llvmpipe_update_fs(struct llvmpipe_context *lp)
}
t0 = os_time_get();
- variant = generate_variant(lp, shader, &key);
+ variant = generate_variant(screen, shader, &key);
t1 = os_time_get();
dt = t1 - t0;
@@ -1228,6 +1350,10 @@ llvmpipe_update_fs(struct llvmpipe_context *lp)
+
+
+
+
void
llvmpipe_init_fs_funcs(struct llvmpipe_context *llvmpipe)
{
diff --git a/src/gallium/drivers/llvmpipe/lp_state_fs.h b/src/gallium/drivers/llvmpipe/lp_state_fs.h
index 4999b8dca1a..7d58c4936c7 100644
--- a/src/gallium/drivers/llvmpipe/lp_state_fs.h
+++ b/src/gallium/drivers/llvmpipe/lp_state_fs.h
@@ -34,6 +34,8 @@
#include "pipe/p_state.h"
#include "tgsi/tgsi_scan.h" /* for tgsi_shader_info */
#include "gallivm/lp_bld_sample.h" /* for struct lp_sampler_static_state */
+#include "gallivm/lp_bld_tgsi.h" /* for lp_tgsi_info */
+#include "lp_bld_interp.h" /* for struct lp_shader_input */
struct tgsi_token;
@@ -96,7 +98,7 @@ struct lp_fragment_shader
{
struct pipe_shader_state base;
- struct tgsi_shader_info info;
+ struct lp_tgsi_info info;
struct lp_fs_variant_list_item variants;
@@ -107,6 +109,9 @@ struct lp_fragment_shader
unsigned no;
unsigned variants_created;
unsigned variants_cached;
+
+ /** Fragment shader input interpolation info */
+ struct lp_shader_input inputs[PIPE_MAX_SHADER_INPUTS];
};
diff --git a/src/gallium/drivers/llvmpipe/lp_state_sampler.c b/src/gallium/drivers/llvmpipe/lp_state_sampler.c
index 17a4a0ed02d..1dd866195d3 100644
--- a/src/gallium/drivers/llvmpipe/lp_state_sampler.c
+++ b/src/gallium/drivers/llvmpipe/lp_state_sampler.c
@@ -246,9 +246,9 @@ llvmpipe_prepare_vertex_sampling(struct llvmpipe_context *lp,
struct pipe_sampler_view **views)
{
unsigned i;
- uint32_t row_stride[DRAW_MAX_TEXTURE_LEVELS];
- uint32_t img_stride[DRAW_MAX_TEXTURE_LEVELS];
- const void *data[DRAW_MAX_TEXTURE_LEVELS];
+ uint32_t row_stride[PIPE_MAX_TEXTURE_LEVELS];
+ uint32_t img_stride[PIPE_MAX_TEXTURE_LEVELS];
+ const void *data[PIPE_MAX_TEXTURE_LEVELS];
assert(num <= PIPE_MAX_VERTEX_SAMPLERS);
if (!num)
diff --git a/src/gallium/drivers/llvmpipe/lp_state_setup.c b/src/gallium/drivers/llvmpipe/lp_state_setup.c
new file mode 100644
index 00000000000..2c8b8b9a928
--- /dev/null
+++ b/src/gallium/drivers/llvmpipe/lp_state_setup.c
@@ -0,0 +1,759 @@
+/**************************************************************************
+ *
+ * Copyright 2010 VMware.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+
+#include "util/u_math.h"
+#include "util/u_memory.h"
+#include "util/u_simple_list.h"
+#include "os/os_time.h"
+#include "gallivm/lp_bld_debug.h"
+#include "gallivm/lp_bld_init.h"
+#include "gallivm/lp_bld_intr.h"
+#include <llvm-c/Analysis.h> /* for LLVMVerifyFunction */
+
+#include "lp_perf.h"
+#include "lp_debug.h"
+#include "lp_flush.h"
+#include "lp_screen.h"
+#include "lp_context.h"
+#include "lp_setup_context.h"
+#include "lp_rast.h"
+#include "lp_state.h"
+#include "lp_state_fs.h"
+#include "lp_state_setup.h"
+
+
+
+/* currently organized to interpolate full float[4] attributes even
+ * when some elements are unused. Later, can pack vertex data more
+ * closely.
+ */
+
+
+struct lp_setup_args
+{
+ /* Function arguments:
+ */
+ LLVMValueRef v0;
+ LLVMValueRef v1;
+ LLVMValueRef v2;
+ LLVMValueRef facing; /* boolean */
+ LLVMValueRef a0;
+ LLVMValueRef dadx;
+ LLVMValueRef dady;
+
+ /* Derived:
+ */
+ LLVMValueRef x0_center;
+ LLVMValueRef y0_center;
+ LLVMValueRef dy20_ooa;
+ LLVMValueRef dy01_ooa;
+ LLVMValueRef dx20_ooa;
+ LLVMValueRef dx01_ooa;
+};
+
+static LLVMTypeRef type4f(void)
+{
+ return LLVMVectorType(LLVMFloatType(), 4);
+}
+
+
+/* Equivalent of _mm_setr_ps(a,b,c,d)
+ */
+static LLVMValueRef vec4f(LLVMBuilderRef bld,
+ LLVMValueRef a, LLVMValueRef b, LLVMValueRef c, LLVMValueRef d,
+ const char *name)
+{
+ LLVMValueRef i0 = LLVMConstInt(LLVMInt32Type(), 0, 0);
+ LLVMValueRef i1 = LLVMConstInt(LLVMInt32Type(), 1, 0);
+ LLVMValueRef i2 = LLVMConstInt(LLVMInt32Type(), 2, 0);
+ LLVMValueRef i3 = LLVMConstInt(LLVMInt32Type(), 3, 0);
+
+ LLVMValueRef res = LLVMGetUndef(type4f());
+
+ res = LLVMBuildInsertElement(bld, res, a, i0, "");
+ res = LLVMBuildInsertElement(bld, res, b, i1, "");
+ res = LLVMBuildInsertElement(bld, res, c, i2, "");
+ res = LLVMBuildInsertElement(bld, res, d, i3, name);
+
+ return res;
+}
+
+/* Equivalent of _mm_set1_ps(a)
+ */
+static LLVMValueRef vec4f_from_scalar(LLVMBuilderRef bld,
+ LLVMValueRef a,
+ const char *name)
+{
+ LLVMValueRef res = LLVMGetUndef(type4f());
+ int i;
+
+ for(i = 0; i < 4; ++i) {
+ LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0);
+ res = LLVMBuildInsertElement(bld, res, a, index, i == 3 ? name : "");
+ }
+
+ return res;
+}
+
+static void
+store_coef(LLVMBuilderRef builder,
+ struct lp_setup_args *args,
+ unsigned slot,
+ LLVMValueRef a0,
+ LLVMValueRef dadx,
+ LLVMValueRef dady)
+{
+ LLVMValueRef idx = LLVMConstInt(LLVMInt32Type(), slot, 0);
+
+ LLVMBuildStore(builder,
+ a0,
+ LLVMBuildGEP(builder, args->a0, &idx, 1, ""));
+
+ LLVMBuildStore(builder,
+ dadx,
+ LLVMBuildGEP(builder, args->dadx, &idx, 1, ""));
+
+ LLVMBuildStore(builder,
+ dady,
+ LLVMBuildGEP(builder, args->dady, &idx, 1, ""));
+}
+
+
+
+static void
+emit_constant_coef4( LLVMBuilderRef builder,
+ struct lp_setup_args *args,
+ unsigned slot,
+ LLVMValueRef vert,
+ unsigned attr)
+{
+ LLVMValueRef zero = LLVMConstReal(LLVMFloatType(), 0.0);
+ LLVMValueRef zerovec = vec4f_from_scalar(builder, zero, "zero");
+ LLVMValueRef idx = LLVMConstInt(LLVMInt32Type(), attr, 0);
+ LLVMValueRef attr_ptr = LLVMBuildGEP(builder, vert, &idx, 1, "attr_ptr");
+ LLVMValueRef vert_attr = LLVMBuildLoad(builder, attr_ptr, "vert_attr");
+
+ store_coef(builder, args, slot, vert_attr, zerovec, zerovec);
+}
+
+
+
+/**
+ * Setup the fragment input attribute with the front-facing value.
+ * \param frontface is the triangle front facing?
+ */
+static void
+emit_facing_coef( LLVMBuilderRef builder,
+ struct lp_setup_args *args,
+ unsigned slot )
+{
+ LLVMValueRef a0_0 = args->facing;
+ LLVMValueRef a0_0f = LLVMBuildSIToFP(builder, a0_0, LLVMFloatType(), "");
+ LLVMValueRef zero = LLVMConstReal(LLVMFloatType(), 0.0);
+ LLVMValueRef a0 = vec4f(builder, a0_0f, zero, zero, zero, "facing");
+ LLVMValueRef zerovec = vec4f_from_scalar(builder, zero, "zero");
+
+ store_coef(builder, args, slot, a0, zerovec, zerovec);
+}
+
+
+static LLVMValueRef
+vert_attrib(LLVMBuilderRef b,
+ LLVMValueRef vert,
+ int attr,
+ int elem,
+ const char *name)
+{
+ LLVMValueRef idx[2];
+ idx[0] = LLVMConstInt(LLVMInt32Type(), attr, 0);
+ idx[1] = LLVMConstInt(LLVMInt32Type(), elem, 0);
+ return LLVMBuildLoad(b, LLVMBuildGEP(b, vert, idx, 2, ""), name);
+}
+
+
+
+static void
+emit_coef4( LLVMBuilderRef b,
+ struct lp_setup_args *args,
+ unsigned slot,
+ LLVMValueRef a0,
+ LLVMValueRef a1,
+ LLVMValueRef a2)
+{
+ LLVMValueRef dy20_ooa = args->dy20_ooa;
+ LLVMValueRef dy01_ooa = args->dy01_ooa;
+ LLVMValueRef dx20_ooa = args->dx20_ooa;
+ LLVMValueRef dx01_ooa = args->dx01_ooa;
+ LLVMValueRef x0_center = args->x0_center;
+ LLVMValueRef y0_center = args->y0_center;
+
+ /* XXX: using fsub, fmul on vector types -- does this work??
+ */
+ LLVMValueRef da01 = LLVMBuildFSub(b, a0, a1, "da01");
+ LLVMValueRef da20 = LLVMBuildFSub(b, a2, a0, "da20");
+
+ /* Calculate dadx (vec4f)
+ */
+ LLVMValueRef da01_dy20_ooa = LLVMBuildFMul(b, da01, dy20_ooa, "da01_dy20_ooa");
+ LLVMValueRef da20_dy01_ooa = LLVMBuildFMul(b, da20, dy01_ooa, "da20_dy01_ooa");
+ LLVMValueRef dadx = LLVMBuildFSub(b, da01_dy20_ooa, da20_dy01_ooa, "dadx");
+
+ /* Calculate dady (vec4f)
+ */
+ LLVMValueRef da01_dx20_ooa = LLVMBuildFMul(b, da01, dx20_ooa, "da01_dx20_ooa");
+ LLVMValueRef da20_dx01_ooa = LLVMBuildFMul(b, da20, dx01_ooa, "da20_dx01_ooa");
+ LLVMValueRef dady = LLVMBuildFSub(b, da20_dx01_ooa, da01_dx20_ooa, "dady");
+
+ /* Calculate a0 - the attribute value at the origin
+ */
+ LLVMValueRef dadx_x0 = LLVMBuildFMul(b, dadx, x0_center, "dadx_x0");
+ LLVMValueRef dady_y0 = LLVMBuildFMul(b, dady, y0_center, "dady_y0");
+ LLVMValueRef attr_v0 = LLVMBuildFAdd(b, dadx_x0, dady_y0, "attr_v0");
+ LLVMValueRef attr_0 = LLVMBuildFSub(b, a0, attr_v0, "attr_0");
+
+ store_coef(b, args, slot, attr_0, dadx, dady);
+}
+
+
+static void
+emit_linear_coef( LLVMBuilderRef b,
+ struct lp_setup_args *args,
+ unsigned slot,
+ unsigned vert_attr)
+{
+ LLVMValueRef idx = LLVMConstInt(LLVMInt32Type(), vert_attr, 0);
+
+ LLVMValueRef a0 = LLVMBuildLoad(b, LLVMBuildGEP(b, args->v0, &idx, 1, ""), "v0a");
+ LLVMValueRef a1 = LLVMBuildLoad(b, LLVMBuildGEP(b, args->v1, &idx, 1, ""), "v1a");
+ LLVMValueRef a2 = LLVMBuildLoad(b, LLVMBuildGEP(b, args->v2, &idx, 1, ""), "v2a");
+
+ emit_coef4(b, args, slot, a0, a1, a2);
+}
+
+
+
+/**
+ * Compute a0, dadx and dady for a perspective-corrected interpolant,
+ * for a triangle.
+ * We basically multiply the vertex value by 1/w before computing
+ * the plane coefficients (a0, dadx, dady).
+ * Later, when we compute the value at a particular fragment position we'll
+ * divide the interpolated value by the interpolated W at that fragment.
+ */
+static void
+emit_perspective_coef( LLVMBuilderRef b,
+ struct lp_setup_args *args,
+ unsigned slot,
+ unsigned vert_attr)
+{
+ /* premultiply by 1/w (v[0][3] is always 1/w):
+ */
+ LLVMValueRef idx = LLVMConstInt(LLVMInt32Type(), vert_attr, 0);
+
+ LLVMValueRef v0a = LLVMBuildLoad(b, LLVMBuildGEP(b, args->v0, &idx, 1, ""), "v0a");
+ LLVMValueRef v1a = LLVMBuildLoad(b, LLVMBuildGEP(b, args->v1, &idx, 1, ""), "v1a");
+ LLVMValueRef v2a = LLVMBuildLoad(b, LLVMBuildGEP(b, args->v2, &idx, 1, ""), "v2a");
+
+ LLVMValueRef v0_oow = vec4f_from_scalar(b, vert_attrib(b, args->v0, 0, 3, ""), "v0_oow");
+ LLVMValueRef v1_oow = vec4f_from_scalar(b, vert_attrib(b, args->v1, 0, 3, ""), "v1_oow");
+ LLVMValueRef v2_oow = vec4f_from_scalar(b, vert_attrib(b, args->v2, 0, 3, ""), "v2_oow");
+
+ LLVMValueRef v0_oow_v0a = LLVMBuildFMul(b, v0a, v0_oow, "v0_oow_v0a");
+ LLVMValueRef v1_oow_v1a = LLVMBuildFMul(b, v1a, v1_oow, "v1_oow_v1a");
+ LLVMValueRef v2_oow_v2a = LLVMBuildFMul(b, v2a, v2_oow, "v2_oow_v2a");
+
+ emit_coef4(b, args, slot, v0_oow_v0a, v1_oow_v1a, v2_oow_v2a);
+}
+
+
+static void
+emit_position_coef( LLVMBuilderRef builder,
+ struct lp_setup_args *args,
+ int slot, int attrib )
+{
+ emit_linear_coef(builder, args, slot, attrib);
+}
+
+
+
+
+/**
+ * Compute the inputs-> dadx, dady, a0 values.
+ */
+static void
+emit_tri_coef( LLVMBuilderRef builder,
+ const struct lp_setup_variant_key *key,
+ struct lp_setup_args *args )
+{
+ unsigned slot;
+
+ /* The internal position input is in slot zero:
+ */
+ emit_position_coef(builder, args, 0, 0);
+
+ /* setup interpolation for all the remaining attributes:
+ */
+ for (slot = 0; slot < key->num_inputs; slot++) {
+ unsigned vert_attr = key->inputs[slot].src_index;
+
+ switch (key->inputs[slot].interp) {
+ case LP_INTERP_CONSTANT:
+ if (key->flatshade_first) {
+ emit_constant_coef4(builder, args, slot+1, args->v0, vert_attr);
+ }
+ else {
+ emit_constant_coef4(builder, args, slot+1, args->v2, vert_attr);
+ }
+ break;
+
+ case LP_INTERP_LINEAR:
+ emit_linear_coef(builder, args, slot+1, vert_attr);
+ break;
+
+ case LP_INTERP_PERSPECTIVE:
+ emit_perspective_coef(builder, args, slot+1, vert_attr);
+ break;
+
+ case LP_INTERP_POSITION:
+ /*
+ * The generated pixel interpolators will pick up the coeffs from
+ * slot 0.
+ */
+ break;
+
+ case LP_INTERP_FACING:
+ emit_facing_coef(builder, args, slot+1);
+ break;
+
+ default:
+ assert(0);
+ }
+ }
+}
+
+
+/* XXX: This is generic code, share with fs/vs codegen:
+ */
+static lp_jit_setup_triangle
+finalize_function(struct llvmpipe_screen *screen,
+ LLVMBuilderRef builder,
+ LLVMValueRef function)
+{
+ void *f;
+
+ /* Verify the LLVM IR. If invalid, dump and abort */
+#ifdef DEBUG
+ if (LLVMVerifyFunction(function, LLVMPrintMessageAction)) {
+ if (1)
+ lp_debug_dump_value(function);
+ abort();
+ }
+#endif
+
+ /* Apply optimizations to LLVM IR */
+ LLVMRunFunctionPassManager(screen->pass, function);
+
+ if (gallivm_debug & GALLIVM_DEBUG_IR)
+ {
+ /* Print the LLVM IR to stderr */
+ lp_debug_dump_value(function);
+ debug_printf("\n");
+ }
+
+ /*
+ * Translate the LLVM IR into machine code.
+ */
+ f = LLVMGetPointerToGlobal(screen->engine, function);
+
+ if (gallivm_debug & GALLIVM_DEBUG_ASM)
+ {
+ lp_disassemble(f);
+ }
+
+ lp_func_delete_body(function);
+
+ return f;
+}
+
+/* XXX: Generic code:
+ */
+static void
+lp_emit_emms(LLVMBuilderRef builder)
+{
+#ifdef PIPE_ARCH_X86
+ /* Avoid corrupting the FPU stack on 32bit OSes. */
+ lp_build_intrinsic(builder, "llvm.x86.mmx.emms", LLVMVoidType(), NULL, 0);
+#endif
+}
+
+
+/* XXX: generic code:
+ */
+static void
+set_noalias(LLVMBuilderRef builder,
+ LLVMValueRef function,
+ const LLVMTypeRef *arg_types,
+ int nr_args)
+{
+ int i;
+ for(i = 0; i < Elements(arg_types); ++i)
+ if(LLVMGetTypeKind(arg_types[i]) == LLVMPointerTypeKind)
+ LLVMAddAttribute(LLVMGetParam(function, i),
+ LLVMNoAliasAttribute);
+}
+
+static void
+init_args(LLVMBuilderRef b,
+ struct lp_setup_args *args,
+ const struct lp_setup_variant *variant)
+{
+ LLVMValueRef v0_x = vert_attrib(b, args->v0, 0, 0, "v0_x");
+ LLVMValueRef v0_y = vert_attrib(b, args->v0, 0, 1, "v0_y");
+
+ LLVMValueRef v1_x = vert_attrib(b, args->v1, 0, 0, "v1_x");
+ LLVMValueRef v1_y = vert_attrib(b, args->v1, 0, 1, "v1_y");
+
+ LLVMValueRef v2_x = vert_attrib(b, args->v2, 0, 0, "v2_x");
+ LLVMValueRef v2_y = vert_attrib(b, args->v2, 0, 1, "v2_y");
+
+ LLVMValueRef pixel_center = LLVMConstReal(LLVMFloatType(),
+ variant->key.pixel_center_half ? 0.5 : 0);
+
+ LLVMValueRef x0_center = LLVMBuildFSub(b, v0_x, pixel_center, "x0_center" );
+ LLVMValueRef y0_center = LLVMBuildFSub(b, v0_y, pixel_center, "y0_center" );
+
+ LLVMValueRef dx01 = LLVMBuildFSub(b, v0_x, v1_x, "dx01");
+ LLVMValueRef dy01 = LLVMBuildFSub(b, v0_y, v1_y, "dy01");
+ LLVMValueRef dx20 = LLVMBuildFSub(b, v2_x, v0_x, "dx20");
+ LLVMValueRef dy20 = LLVMBuildFSub(b, v2_y, v0_y, "dy20");
+
+ LLVMValueRef one = LLVMConstReal(LLVMFloatType(), 1.0);
+ LLVMValueRef e = LLVMBuildFMul(b, dx01, dy20, "e");
+ LLVMValueRef f = LLVMBuildFMul(b, dx20, dy01, "f");
+ LLVMValueRef ooa = LLVMBuildFDiv(b, one, LLVMBuildFSub(b, e, f, ""), "ooa");
+
+ LLVMValueRef dy20_ooa = LLVMBuildFMul(b, dy20, ooa, "dy20_ooa");
+ LLVMValueRef dy01_ooa = LLVMBuildFMul(b, dy01, ooa, "dy01_ooa");
+ LLVMValueRef dx20_ooa = LLVMBuildFMul(b, dx20, ooa, "dx20_ooa");
+ LLVMValueRef dx01_ooa = LLVMBuildFMul(b, dx01, ooa, "dx01_ooa");
+
+ args->dy20_ooa = vec4f_from_scalar(b, dy20_ooa, "dy20_ooa_4f");
+ args->dy01_ooa = vec4f_from_scalar(b, dy01_ooa, "dy01_ooa_4f");
+
+ args->dx20_ooa = vec4f_from_scalar(b, dx20_ooa, "dx20_ooa_4f");
+ args->dx01_ooa = vec4f_from_scalar(b, dx01_ooa, "dx01_ooa_4f");
+
+ args->x0_center = vec4f_from_scalar(b, x0_center, "x0_center_4f");
+ args->y0_center = vec4f_from_scalar(b, y0_center, "y0_center_4f");
+}
+
+/**
+ * Generate the runtime callable function for the coefficient calculation.
+ *
+ */
+static struct lp_setup_variant *
+generate_setup_variant(struct llvmpipe_screen *screen,
+ struct lp_setup_variant_key *key)
+{
+ struct lp_setup_variant *variant = NULL;
+ struct lp_setup_args args;
+ char func_name[256];
+ LLVMTypeRef vec4f_type;
+ LLVMTypeRef func_type;
+ LLVMTypeRef arg_types[7];
+ LLVMBasicBlockRef block;
+ LLVMBuilderRef builder;
+ int64_t t0, t1;
+
+ if (0)
+ goto fail;
+
+ variant = CALLOC_STRUCT(lp_setup_variant);
+ if (variant == NULL)
+ goto fail;
+
+ if (LP_DEBUG & DEBUG_COUNTERS) {
+ t0 = os_time_get();
+ }
+
+ memcpy(&variant->key, key, key->size);
+ variant->list_item_global.base = variant;
+
+ util_snprintf(func_name, sizeof(func_name), "fs%u_setup%u",
+ 0,
+ variant->no);
+
+ /* Currently always deal with full 4-wide vertex attributes from
+ * the vertices.
+ */
+
+ vec4f_type = LLVMVectorType(LLVMFloatType(), 4);
+
+ arg_types[0] = LLVMPointerType(vec4f_type, 0); /* v0 */
+ arg_types[1] = LLVMPointerType(vec4f_type, 0); /* v1 */
+ arg_types[2] = LLVMPointerType(vec4f_type, 0); /* v2 */
+ arg_types[3] = LLVMInt32Type(); /* facing */
+ arg_types[4] = LLVMPointerType(vec4f_type, 0); /* a0, aligned */
+ arg_types[5] = LLVMPointerType(vec4f_type, 0); /* dadx, aligned */
+ arg_types[6] = LLVMPointerType(vec4f_type, 0); /* dady, aligned */
+
+ func_type = LLVMFunctionType(LLVMVoidType(), arg_types, Elements(arg_types), 0);
+
+ variant->function = LLVMAddFunction(screen->module, func_name, func_type);
+ if (!variant->function)
+ goto fail;
+
+ LLVMSetFunctionCallConv(variant->function, LLVMCCallConv);
+
+ args.v0 = LLVMGetParam(variant->function, 0);
+ args.v1 = LLVMGetParam(variant->function, 1);
+ args.v2 = LLVMGetParam(variant->function, 2);
+ args.facing = LLVMGetParam(variant->function, 3);
+ args.a0 = LLVMGetParam(variant->function, 4);
+ args.dadx = LLVMGetParam(variant->function, 5);
+ args.dady = LLVMGetParam(variant->function, 6);
+
+ lp_build_name(args.v0, "in_v0");
+ lp_build_name(args.v1, "in_v1");
+ lp_build_name(args.v2, "in_v2");
+ lp_build_name(args.facing, "in_facing");
+ lp_build_name(args.a0, "out_a0");
+ lp_build_name(args.dadx, "out_dadx");
+ lp_build_name(args.dady, "out_dady");
+
+ /*
+ * Function body
+ */
+ block = LLVMAppendBasicBlock(variant->function, "entry");
+ builder = LLVMCreateBuilder();
+ LLVMPositionBuilderAtEnd(builder, block);
+
+ set_noalias(builder, variant->function, arg_types, Elements(arg_types));
+ init_args(builder, &args, variant);
+ emit_tri_coef(builder, &variant->key, &args);
+
+ lp_emit_emms(builder);
+ LLVMBuildRetVoid(builder);
+ LLVMDisposeBuilder(builder);
+
+ variant->jit_function = finalize_function(screen, builder,
+ variant->function);
+ if (!variant->jit_function)
+ goto fail;
+
+ /*
+ * Update timing information:
+ */
+ if (LP_DEBUG & DEBUG_COUNTERS) {
+ t1 = os_time_get();
+ LP_COUNT_ADD(llvm_compile_time, t1 - t0);
+ LP_COUNT_ADD(nr_llvm_compiles, 1);
+ }
+
+ return variant;
+
+fail:
+ if (variant) {
+ if (variant->function) {
+ if (variant->jit_function)
+ LLVMFreeMachineCodeForFunction(screen->engine,
+ variant->function);
+ LLVMDeleteFunction(variant->function);
+ }
+ FREE(variant);
+ }
+
+ return NULL;
+}
+
+
+
+static void
+lp_make_setup_variant_key(struct llvmpipe_context *lp,
+ struct lp_setup_variant_key *key)
+{
+ struct lp_fragment_shader *fs = lp->fs;
+ unsigned i;
+
+ assert(sizeof key->inputs[0] == sizeof(ushort));
+
+ key->num_inputs = fs->info.base.num_inputs;
+ key->flatshade_first = lp->rasterizer->flatshade_first;
+ key->pixel_center_half = lp->rasterizer->gl_rasterization_rules;
+ key->size = Offset(struct lp_setup_variant_key,
+ inputs[key->num_inputs]);
+ key->pad = 0;
+
+ memcpy(key->inputs, fs->inputs, key->num_inputs * sizeof key->inputs[0]);
+ for (i = 0; i < key->num_inputs; i++) {
+ if (key->inputs[i].interp == LP_INTERP_COLOR) {
+ if (lp->rasterizer->flatshade)
+ key->inputs[i].interp = LP_INTERP_CONSTANT;
+ else
+ key->inputs[i].interp = LP_INTERP_LINEAR;
+ }
+ }
+
+}
+
+
+static void
+remove_setup_variant(struct llvmpipe_context *lp,
+ struct lp_setup_variant *variant)
+{
+ struct llvmpipe_screen *screen = llvmpipe_screen(lp->pipe.screen);
+
+ if (gallivm_debug & GALLIVM_DEBUG_IR) {
+ debug_printf("llvmpipe: del setup_variant #%u total %u\n",
+ variant->no, lp->nr_setup_variants);
+ }
+
+ if (variant->function) {
+ if (variant->jit_function)
+ LLVMFreeMachineCodeForFunction(screen->engine,
+ variant->function);
+ LLVMDeleteFunction(variant->function);
+ }
+
+ remove_from_list(&variant->list_item_global);
+ lp->nr_setup_variants--;
+ FREE(variant);
+}
+
+
+
+/* When the number of setup variants exceeds a threshold, cull a
+ * fraction (currently a quarter) of them.
+ */
+static void
+cull_setup_variants(struct llvmpipe_context *lp)
+{
+ struct pipe_context *pipe = &lp->pipe;
+ int i;
+
+ /*
+ * XXX: we need to flush the context until we have some sort of reference
+ * counting in fragment shaders as they may still be binned
+ * Flushing alone might not be sufficient we need to wait on it too.
+ */
+ llvmpipe_finish(pipe, __FUNCTION__);
+
+ for (i = 0; i < LP_MAX_SETUP_VARIANTS / 4; i++) {
+ struct lp_setup_variant_list_item *item = last_elem(&lp->setup_variants_list);
+ remove_setup_variant(lp, item->base);
+ }
+}
+
+
+/**
+ * Update fragment/vertex shader linkage state. This is called just
+ * prior to drawing something when some fragment-related state has
+ * changed.
+ */
+void
+llvmpipe_update_setup(struct llvmpipe_context *lp)
+{
+ struct llvmpipe_screen *screen = llvmpipe_screen(lp->pipe.screen);
+
+ struct lp_setup_variant_key *key = &lp->setup_variant.key;
+ struct lp_setup_variant *variant = NULL;
+ struct lp_setup_variant_list_item *li;
+
+ lp_make_setup_variant_key(lp, key);
+
+ foreach(li, &lp->setup_variants_list) {
+ if(li->base->key.size == key->size &&
+ memcmp(&li->base->key, key, key->size) == 0) {
+ variant = li->base;
+ break;
+ }
+ }
+
+ if (variant) {
+ move_to_head(&lp->setup_variants_list, &variant->list_item_global);
+ }
+ else {
+ if (lp->nr_setup_variants >= LP_MAX_SETUP_VARIANTS) {
+ cull_setup_variants(lp);
+ }
+
+ variant = generate_setup_variant(screen, key);
+ insert_at_head(&lp->setup_variants_list, &variant->list_item_global);
+ lp->nr_setup_variants++;
+ }
+
+ lp_setup_set_setup_variant(lp->setup,
+ variant);
+}
+
+void
+lp_delete_setup_variants(struct llvmpipe_context *lp)
+{
+ struct lp_setup_variant_list_item *li;
+ li = first_elem(&lp->setup_variants_list);
+ while(!at_end(&lp->setup_variants_list, li)) {
+ struct lp_setup_variant_list_item *next = next_elem(li);
+ remove_setup_variant(lp, li->base);
+ li = next;
+ }
+}
+
+void
+lp_dump_setup_coef( const struct lp_setup_variant_key *key,
+ const float (*sa0)[4],
+ const float (*sdadx)[4],
+ const float (*sdady)[4])
+{
+ int i, slot;
+
+ for (i = 0; i < NUM_CHANNELS; i++) {
+ float a0 = sa0 [0][i];
+ float dadx = sdadx[0][i];
+ float dady = sdady[0][i];
+
+ debug_printf("POS.%c: a0 = %f, dadx = %f, dady = %f\n",
+ "xyzw"[i],
+ a0, dadx, dady);
+ }
+
+ for (slot = 0; slot < key->num_inputs; slot++) {
+ unsigned usage_mask = key->inputs[slot].usage_mask;
+ for (i = 0; i < NUM_CHANNELS; i++) {
+ if (usage_mask & (1 << i)) {
+ float a0 = sa0 [1 + slot][i];
+ float dadx = sdadx[1 + slot][i];
+ float dady = sdady[1 + slot][i];
+
+ debug_printf("IN[%u].%c: a0 = %f, dadx = %f, dady = %f\n",
+ slot,
+ "xyzw"[i],
+ a0, dadx, dady);
+ }
+ }
+ }
+}
diff --git a/src/gallium/drivers/llvmpipe/lp_state_setup.h b/src/gallium/drivers/llvmpipe/lp_state_setup.h
new file mode 100644
index 00000000000..b0c81baa75f
--- /dev/null
+++ b/src/gallium/drivers/llvmpipe/lp_state_setup.h
@@ -0,0 +1,80 @@
+#ifndef LP_STATE_SETUP_H
+#define LP_STATE_SETUP_H
+
+#include "lp_bld_interp.h"
+
+
+struct llvmpipe_context;
+struct lp_setup_variant;
+
+struct lp_setup_variant_list_item
+{
+ struct lp_setup_variant *base;
+ struct lp_setup_variant_list_item *next, *prev;
+};
+
+
+struct lp_setup_variant_key {
+ unsigned num_inputs:8;
+ unsigned flatshade_first:1;
+ unsigned pixel_center_half:1;
+ unsigned pad:7;
+ unsigned size:16;
+ struct lp_shader_input inputs[PIPE_MAX_SHADER_INPUTS];
+};
+
+
+typedef void (*lp_jit_setup_triangle)( const float (*v0)[4],
+ const float (*v1)[4],
+ const float (*v2)[4],
+ boolean front_facing,
+ float (*a0)[4],
+ float (*dadx)[4],
+ float (*dady)[4] );
+
+
+
+
+/* At this stage, for a given variant key, we create a
+ * draw_vertex_info struct telling the draw module how to format the
+ * vertices, and an llvm-generated function which calculates the
+ * attribute interpolants (a0, dadx, dady) from three of those
+ * vertices.
+ */
+struct lp_setup_variant {
+ struct lp_setup_variant_key key;
+
+ struct lp_setup_variant_list_item list_item_global;
+
+ /* XXX: this is a pointer to the LLVM IR. Once jit_function is
+ * generated, we never need to use the IR again - need to find a
+ * way to release this data without destroying the generated
+ * assembly.
+ */
+ LLVMValueRef function;
+
+ /* The actual generated setup function:
+ */
+ lp_jit_setup_triangle jit_function;
+
+ unsigned no;
+};
+
+void lp_setup_tri_fallback( const float (*v0)[4],
+ const float (*v1)[4],
+ const float (*v2)[4],
+ boolean front_facing,
+ float (*a0)[4],
+ float (*dadx)[4],
+ float (*dady)[4],
+ const struct lp_setup_variant_key *key );
+
+void lp_delete_setup_variants(struct llvmpipe_context *lp);
+
+void
+lp_dump_setup_coef( const struct lp_setup_variant_key *key,
+ const float (*sa0)[4],
+ const float (*sdadx)[4],
+ const float (*sdady)[4]);
+
+#endif
diff --git a/src/gallium/drivers/llvmpipe/lp_test_round.c b/src/gallium/drivers/llvmpipe/lp_test_round.c
index 57b0ee57767..816518e5081 100644
--- a/src/gallium/drivers/llvmpipe/lp_test_round.c
+++ b/src/gallium/drivers/llvmpipe/lp_test_round.c
@@ -75,10 +75,7 @@ add_test(LLVMModuleRef module, const char *name, lp_func_t lp_func)
LLVMValueRef ret;
struct lp_build_context bld;
- bld.builder = builder;
- bld.type.floating = 1;
- bld.type.width = 32;
- bld.type.length = 4;
+ lp_build_context_init(&bld, builder, lp_float32_vec4_type());
LLVMSetFunctionCallConv(func, LLVMCCallConv);
@@ -100,9 +97,10 @@ printv(char* string, v4sf value)
f[0], f[1], f[2], f[3]);
}
-static void
+static boolean
compare(v4sf x, v4sf y)
{
+ boolean success = TRUE;
float *xp = (float *) &x;
float *yp = (float *) &y;
if (xp[0] != yp[0] ||
@@ -110,7 +108,9 @@ compare(v4sf x, v4sf y)
xp[2] != yp[2] ||
xp[3] != yp[3]) {
printf(" Incorrect result! ^^^^^^^^^^^^^^^^^^^^^^^^^^^^ \n");
+ success = FALSE;
}
+ return success;
}
@@ -171,9 +171,12 @@ test_round(unsigned verbose, FILE *fp)
LLVMDumpModule(module);
for (i = 0; i < 3; i++) {
+ /* NOTE: There are several acceptable rules for x.5 rounding: ceiling,
+ * nearest even, etc. So we avoid testing such corner cases here.
+ */
v4sf xvals[3] = {
{-10.0, -1, 0, 12.0},
- {-1.5, -0.25, 1.25, 2.5},
+ {-1.49, -0.25, 1.25, 2.51},
{-0.99, -0.01, 0.01, 0.99}
};
v4sf x = xvals[i];
@@ -191,7 +194,7 @@ test_round(unsigned verbose, FILE *fp)
y = round_func(x);
printv("C round(x) ", ref);
printv("LLVM round(x)", y);
- compare(ref, y);
+ success = success && compare(ref, y);
refp[0] = trunc(xp[0]);
refp[1] = trunc(xp[1]);
@@ -200,7 +203,7 @@ test_round(unsigned verbose, FILE *fp)
y = trunc_func(x);
printv("C trunc(x) ", ref);
printv("LLVM trunc(x)", y);
- compare(ref, y);
+ success = success && compare(ref, y);
refp[0] = floor(xp[0]);
refp[1] = floor(xp[1]);
@@ -209,7 +212,7 @@ test_round(unsigned verbose, FILE *fp)
y = floor_func(x);
printv("C floor(x) ", ref);
printv("LLVM floor(x)", y);
- compare(ref, y);
+ success = success && compare(ref, y);
refp[0] = ceil(xp[0]);
refp[1] = ceil(xp[1]);
@@ -218,7 +221,7 @@ test_round(unsigned verbose, FILE *fp)
y = ceil_func(x);
printv("C ceil(x) ", ref);
printv("LLVM ceil(x) ", y);
- compare(ref, y);
+ success = success && compare(ref, y);
}
LLVMFreeMachineCodeForFunction(engine, test_round);
@@ -247,11 +250,7 @@ test_round(unsigned verbose, FILE *fp)
boolean
test_all(unsigned verbose, FILE *fp)
{
- boolean success = TRUE;
-
- test_round(verbose, fp);
-
- return success;
+ return test_round(verbose, fp);
}
diff --git a/src/gallium/drivers/llvmpipe/lp_test_sincos.c b/src/gallium/drivers/llvmpipe/lp_test_sincos.c
index 7ab357f162e..79939b1a393 100644
--- a/src/gallium/drivers/llvmpipe/lp_test_sincos.c
+++ b/src/gallium/drivers/llvmpipe/lp_test_sincos.c
@@ -72,10 +72,7 @@ add_sincos_test(LLVMModuleRef module, boolean sin)
LLVMValueRef ret;
struct lp_build_context bld;
- bld.builder = builder;
- bld.type.floating = 1;
- bld.type.width = 32;
- bld.type.length = 4;
+ lp_build_context_init(&bld, builder, lp_float32_vec4_type());
LLVMSetFunctionCallConv(func, LLVMCCallConv);
diff --git a/src/gallium/drivers/llvmpipe/lp_tile_soa.py b/src/gallium/drivers/llvmpipe/lp_tile_soa.py
index 2ba39052aba..e49f9c62fe7 100644
--- a/src/gallium/drivers/llvmpipe/lp_tile_soa.py
+++ b/src/gallium/drivers/llvmpipe/lp_tile_soa.py
@@ -289,172 +289,141 @@ def generate_format_write(format, src_channel, src_native_type, src_suffix):
print
-def generate_ssse3():
+def generate_sse2():
print '''
#if defined(PIPE_ARCH_SSE)
#include "util/u_sse.h"
-static void
-lp_tile_b8g8r8a8_unorm_swizzle_4ub_ssse3(uint8_t *dst,
- const uint8_t *src, unsigned src_stride,
- unsigned x0, unsigned y0)
+static ALWAYS_INLINE void
+swz4( const __m128i * restrict x,
+ const __m128i * restrict y,
+ const __m128i * restrict z,
+ const __m128i * restrict w,
+ __m128i * restrict a,
+ __m128i * restrict b,
+ __m128i * restrict c,
+ __m128i * restrict d)
{
+ __m128i i, j, k, l;
+ __m128i m, n, o, p;
+ __m128i e, f, g, h;
+
+ m = _mm_unpacklo_epi8(*x,*y);
+ n = _mm_unpackhi_epi8(*x,*y);
+ o = _mm_unpacklo_epi8(*z,*w);
+ p = _mm_unpackhi_epi8(*z,*w);
+
+ i = _mm_unpacklo_epi16(m,n);
+ j = _mm_unpackhi_epi16(m,n);
+ k = _mm_unpacklo_epi16(o,p);
+ l = _mm_unpackhi_epi16(o,p);
+
+ e = _mm_unpacklo_epi8(i,j);
+ f = _mm_unpackhi_epi8(i,j);
+ g = _mm_unpacklo_epi8(k,l);
+ h = _mm_unpackhi_epi8(k,l);
+
+ *a = _mm_unpacklo_epi64(e,g);
+ *b = _mm_unpackhi_epi64(e,g);
+ *c = _mm_unpacklo_epi64(f,h);
+ *d = _mm_unpackhi_epi64(f,h);
+}
+
+static ALWAYS_INLINE void
+unswz4( const __m128i * restrict a,
+ const __m128i * restrict b,
+ const __m128i * restrict c,
+ const __m128i * restrict d,
+ __m128i * restrict x,
+ __m128i * restrict y,
+ __m128i * restrict z,
+ __m128i * restrict w)
+{
+ __m128i i, j, k, l;
+ __m128i m, n, o, p;
+
+ i = _mm_unpacklo_epi8(*a,*b);
+ j = _mm_unpackhi_epi8(*a,*b);
+ k = _mm_unpacklo_epi8(*c,*d);
+ l = _mm_unpackhi_epi8(*c,*d);
+
+ m = _mm_unpacklo_epi16(i,k);
+ n = _mm_unpackhi_epi16(i,k);
+ o = _mm_unpacklo_epi16(j,l);
+ p = _mm_unpackhi_epi16(j,l);
+
+ *x = _mm_unpacklo_epi64(m,n);
+ *y = _mm_unpackhi_epi64(m,n);
+ *z = _mm_unpacklo_epi64(o,p);
+ *w = _mm_unpackhi_epi64(o,p);
+}
+static void
+lp_tile_b8g8r8a8_unorm_swizzle_4ub_sse2(uint8_t * restrict dst,
+ const uint8_t * restrict src, unsigned src_stride,
+ unsigned x0, unsigned y0)
+{
+ __m128i *dst128 = (__m128i *) dst;
unsigned x, y;
- __m128i *pdst = (__m128i*) dst;
- const uint8_t *ysrc0 = src + y0*src_stride + x0*sizeof(uint32_t);
- unsigned int tile_stridex = src_stride*(TILE_VECTOR_HEIGHT - 1) - sizeof(uint32_t)*TILE_VECTOR_WIDTH;
- unsigned int tile_stridey = src_stride*TILE_VECTOR_HEIGHT;
-
- const __m128i shuffle00 = _mm_setr_epi8(0x02,0x06,0xff,0xff,0x0a,0x0e,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff);
- const __m128i shuffle01 = _mm_setr_epi8(0x01,0x05,0xff,0xff,0x09,0x0d,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff);
- const __m128i shuffle02 = _mm_setr_epi8(0x00,0x04,0xff,0xff,0x08,0x0c,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff);
- const __m128i shuffle03 = _mm_setr_epi8(0x03,0x07,0xff,0xff,0x0b,0x0f,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff);
-
- const __m128i shuffle10 = _mm_setr_epi8(0xff,0xff,0x02,0x06,0xff,0xff,0x0a,0x0e,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff);
- const __m128i shuffle11 = _mm_setr_epi8(0xff,0xff,0x01,0x05,0xff,0xff,0x09,0x0d,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff);
- const __m128i shuffle12 = _mm_setr_epi8(0xff,0xff,0x00,0x04,0xff,0xff,0x08,0x0c,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff);
- const __m128i shuffle13 = _mm_setr_epi8(0xff,0xff,0x03,0x07,0xff,0xff,0x0b,0x0f,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff);
-
- const __m128i shuffle20 = _mm_setr_epi8(0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x02,0x06,0xff,0xff,0x0a,0x0e,0xff,0xff);
- const __m128i shuffle21 = _mm_setr_epi8(0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x01,0x05,0xff,0xff,0x09,0x0d,0xff,0xff);
- const __m128i shuffle22 = _mm_setr_epi8(0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x04,0xff,0xff,0x08,0x0c,0xff,0xff);
- const __m128i shuffle23 = _mm_setr_epi8(0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x03,0x07,0xff,0xff,0x0b,0x0f,0xff,0xff);
-
- const __m128i shuffle30 = _mm_setr_epi8(0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x02,0x06,0xff,0xff,0x0a,0x0e);
- const __m128i shuffle31 = _mm_setr_epi8(0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x01,0x05,0xff,0xff,0x09,0x0d);
- const __m128i shuffle32 = _mm_setr_epi8(0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x04,0xff,0xff,0x08,0x0c);
- const __m128i shuffle33 = _mm_setr_epi8(0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x03,0x07,0xff,0xff,0x0b,0x0f);
-
- for (y = 0; y < TILE_SIZE; y += TILE_VECTOR_HEIGHT) {
- __m128i line0 = *(__m128i*)ysrc0;
- const uint8_t *ysrc = ysrc0 + src_stride;
- ysrc0 += tile_stridey;
-
- for (x = 0; x < TILE_SIZE; x += TILE_VECTOR_WIDTH) {
- __m128i r, g, b, a, line1;
- line1 = *(__m128i*)ysrc;
- PIPE_READ_WRITE_BARRIER();
- ysrc += src_stride;
- r = _mm_shuffle_epi8(line0, shuffle00);
- g = _mm_shuffle_epi8(line0, shuffle01);
- b = _mm_shuffle_epi8(line0, shuffle02);
- a = _mm_shuffle_epi8(line0, shuffle03);
-
- line0 = *(__m128i*)ysrc;
- PIPE_READ_WRITE_BARRIER();
- ysrc += src_stride;
- r = _mm_or_si128(r, _mm_shuffle_epi8(line1, shuffle10));
- g = _mm_or_si128(g, _mm_shuffle_epi8(line1, shuffle11));
- b = _mm_or_si128(b, _mm_shuffle_epi8(line1, shuffle12));
- a = _mm_or_si128(a, _mm_shuffle_epi8(line1, shuffle13));
-
- line1 = *(__m128i*)ysrc;
- PIPE_READ_WRITE_BARRIER();
- ysrc -= tile_stridex;
- r = _mm_or_si128(r, _mm_shuffle_epi8(line0, shuffle20));
- g = _mm_or_si128(g, _mm_shuffle_epi8(line0, shuffle21));
- b = _mm_or_si128(b, _mm_shuffle_epi8(line0, shuffle22));
- a = _mm_or_si128(a, _mm_shuffle_epi8(line0, shuffle23));
-
- if (x + 1 < TILE_SIZE) {
- line0 = *(__m128i*)ysrc;
- ysrc += src_stride;
- }
-
- PIPE_READ_WRITE_BARRIER();
- r = _mm_or_si128(r, _mm_shuffle_epi8(line1, shuffle30));
- g = _mm_or_si128(g, _mm_shuffle_epi8(line1, shuffle31));
- b = _mm_or_si128(b, _mm_shuffle_epi8(line1, shuffle32));
- a = _mm_or_si128(a, _mm_shuffle_epi8(line1, shuffle33));
-
- *pdst++ = r;
- *pdst++ = g;
- *pdst++ = b;
- *pdst++ = a;
+
+ src += y0 * src_stride;
+ src += x0 * sizeof(uint32_t);
+
+ for (y = 0; y < TILE_SIZE; y += 4) {
+ const uint8_t *src_row = src;
+
+ for (x = 0; x < TILE_SIZE; x += 4) {
+ swz4((const __m128i *) (src_row + 0 * src_stride),
+ (const __m128i *) (src_row + 1 * src_stride),
+ (const __m128i *) (src_row + 2 * src_stride),
+ (const __m128i *) (src_row + 3 * src_stride),
+ dst128 + 2, /* b */
+ dst128 + 1, /* g */
+ dst128 + 0, /* r */
+ dst128 + 3); /* a */
+
+ dst128 += 4;
+ src_row += sizeof(__m128i);
}
- }
+ src += 4 * src_stride;
+ }
}
static void
-lp_tile_b8g8r8a8_unorm_unswizzle_4ub_ssse3(const uint8_t *src,
- uint8_t *dst, unsigned dst_stride,
+lp_tile_b8g8r8a8_unorm_unswizzle_4ub_sse2(const uint8_t * restrict src,
+ uint8_t * restrict dst, unsigned dst_stride,
unsigned x0, unsigned y0)
{
unsigned int x, y;
- const __m128i *psrc = (__m128i*) src;
- const __m128i *end = (__m128i*) (src + (y0 + TILE_SIZE - 1)*dst_stride + (x0 + TILE_SIZE - 1)*sizeof(uint32_t));
- uint8_t *pdst = dst + y0 * dst_stride + x0 * sizeof(uint32_t);
- __m128i c0 = *psrc++;
- __m128i c1;
-
- const __m128i shuffle00 = _mm_setr_epi8(0xff,0xff,0x00,0xff,0xff,0xff,0x01,0xff,0xff,0xff,0x04,0xff,0xff,0xff,0x05,0xff);
- const __m128i shuffle01 = _mm_setr_epi8(0xff,0xff,0x02,0xff,0xff,0xff,0x03,0xff,0xff,0xff,0x06,0xff,0xff,0xff,0x07,0xff);
- const __m128i shuffle02 = _mm_setr_epi8(0xff,0xff,0x08,0xff,0xff,0xff,0x09,0xff,0xff,0xff,0x0c,0xff,0xff,0xff,0x0d,0xff);
- const __m128i shuffle03 = _mm_setr_epi8(0xff,0xff,0x0a,0xff,0xff,0xff,0x0b,0xff,0xff,0xff,0x0e,0xff,0xff,0xff,0x0f,0xff);
-
- const __m128i shuffle10 = _mm_setr_epi8(0xff,0x00,0xff,0xff,0xff,0x01,0xff,0xff,0xff,0x04,0xff,0xff,0xff,0x05,0xff,0xff);
- const __m128i shuffle11 = _mm_setr_epi8(0xff,0x02,0xff,0xff,0xff,0x03,0xff,0xff,0xff,0x06,0xff,0xff,0xff,0x07,0xff,0xff);
- const __m128i shuffle12 = _mm_setr_epi8(0xff,0x08,0xff,0xff,0xff,0x09,0xff,0xff,0xff,0x0c,0xff,0xff,0xff,0x0d,0xff,0xff);
- const __m128i shuffle13 = _mm_setr_epi8(0xff,0x0a,0xff,0xff,0xff,0x0b,0xff,0xff,0xff,0x0e,0xff,0xff,0xff,0x0f,0xff,0xff);
-
- const __m128i shuffle20 = _mm_setr_epi8(0x00,0xff,0xff,0xff,0x01,0xff,0xff,0xff,0x04,0xff,0xff,0xff,0x05,0xff,0xff,0xff);
- const __m128i shuffle21 = _mm_setr_epi8(0x02,0xff,0xff,0xff,0x03,0xff,0xff,0xff,0x06,0xff,0xff,0xff,0x07,0xff,0xff,0xff);
- const __m128i shuffle22 = _mm_setr_epi8(0x08,0xff,0xff,0xff,0x09,0xff,0xff,0xff,0x0c,0xff,0xff,0xff,0x0d,0xff,0xff,0xff);
- const __m128i shuffle23 = _mm_setr_epi8(0x0a,0xff,0xff,0xff,0x0b,0xff,0xff,0xff,0x0e,0xff,0xff,0xff,0x0f,0xff,0xff,0xff);
-
- const __m128i shuffle30 = _mm_setr_epi8(0xff,0xff,0xff,0x00,0xff,0xff,0xff,0x01,0xff,0xff,0xff,0x04,0xff,0xff,0xff,0x05);
- const __m128i shuffle31 = _mm_setr_epi8(0xff,0xff,0xff,0x02,0xff,0xff,0xff,0x03,0xff,0xff,0xff,0x06,0xff,0xff,0xff,0x07);
- const __m128i shuffle32 = _mm_setr_epi8(0xff,0xff,0xff,0x08,0xff,0xff,0xff,0x09,0xff,0xff,0xff,0x0c,0xff,0xff,0xff,0x0d);
- const __m128i shuffle33 = _mm_setr_epi8(0xff,0xff,0xff,0x0a,0xff,0xff,0xff,0x0b,0xff,0xff,0xff,0x0e,0xff,0xff,0xff,0x0f);
-
- for (y = 0; y < TILE_SIZE; y += TILE_VECTOR_HEIGHT) {
- __m128i *tile = (__m128i*) pdst;
- pdst += dst_stride * TILE_VECTOR_HEIGHT;
- for (x = 0; x < TILE_SIZE; x += TILE_VECTOR_WIDTH) {
- uint8_t *linep = (uint8_t*) (tile++);
- __m128i line0, line1, line2, line3;
-
- c1 = *psrc++; /* r */
- PIPE_READ_WRITE_BARRIER();
- line0 = _mm_shuffle_epi8(c0, shuffle00);
- line1 = _mm_shuffle_epi8(c0, shuffle01);
- line2 = _mm_shuffle_epi8(c0, shuffle02);
- line3 = _mm_shuffle_epi8(c0, shuffle03);
-
- c0 = *psrc++; /* g */
- PIPE_READ_WRITE_BARRIER();
- line0 = _mm_or_si128(line0, _mm_shuffle_epi8(c1, shuffle10));
- line1 = _mm_or_si128(line1, _mm_shuffle_epi8(c1, shuffle11));
- line2 = _mm_or_si128(line2, _mm_shuffle_epi8(c1, shuffle12));
- line3 = _mm_or_si128(line3, _mm_shuffle_epi8(c1, shuffle13));
-
- c1 = *psrc++; /* b */
- PIPE_READ_WRITE_BARRIER();
- line0 = _mm_or_si128(line0, _mm_shuffle_epi8(c0, shuffle20));
- line1 = _mm_or_si128(line1, _mm_shuffle_epi8(c0, shuffle21));
- line2 = _mm_or_si128(line2, _mm_shuffle_epi8(c0, shuffle22));
- line3 = _mm_or_si128(line3, _mm_shuffle_epi8(c0, shuffle23));
-
- if (psrc != end)
- c0 = *psrc++; /* a */
- PIPE_READ_WRITE_BARRIER();
- line0 = _mm_or_si128(line0, _mm_shuffle_epi8(c1, shuffle30));
- line1 = _mm_or_si128(line1, _mm_shuffle_epi8(c1, shuffle31));
- line2 = _mm_or_si128(line2, _mm_shuffle_epi8(c1, shuffle32));
- line3 = _mm_or_si128(line3, _mm_shuffle_epi8(c1, shuffle33));
-
- *(__m128i*) (linep) = line0;
- *(__m128i*) (((char*)linep) + dst_stride) = line1;
- *(__m128i*) (((char*)linep) + 2 * dst_stride) = line2;
- *(__m128i*) (((char*)linep) + 3 * dst_stride) = line3;
+ const __m128i *src128 = (const __m128i *) src;
+
+ dst += y0 * dst_stride;
+ dst += x0 * sizeof(uint32_t);
+
+ for (y = 0; y < TILE_SIZE; y += 4) {
+ const uint8_t *dst_row = dst;
+
+ for (x = 0; x < TILE_SIZE; x += 4) {
+ unswz4( &src128[2], /* b */
+ &src128[1], /* g */
+ &src128[0], /* r */
+ &src128[3], /* a */
+ (__m128i *) (dst_row + 0 * dst_stride),
+ (__m128i *) (dst_row + 1 * dst_stride),
+ (__m128i *) (dst_row + 2 * dst_stride),
+ (__m128i *) (dst_row + 3 * dst_stride));
+
+ src128 += 4;
+ dst_row += sizeof(__m128i);;
}
+
+ dst += 4 * dst_stride;
}
}
-#endif /* PIPE_ARCH_SSSE3 */
+#endif /* PIPE_ARCH_SSE */
'''
@@ -479,7 +448,7 @@ def generate_swizzle(formats, dst_channel, dst_native_type, dst_suffix):
func_name = 'lp_tile_%s_swizzle_%s' % (format.short_name(), dst_suffix)
if format.name == 'PIPE_FORMAT_B8G8R8A8_UNORM':
print '#ifdef PIPE_ARCH_SSE'
- print ' func = util_cpu_caps.has_ssse3 ? %s_ssse3 : %s;' % (func_name, func_name)
+ print ' func = util_cpu_caps.has_sse2 ? %s_sse2 : %s;' % (func_name, func_name)
print '#else'
print ' func = %s;' % (func_name,)
print '#endif'
@@ -517,7 +486,7 @@ def generate_unswizzle(formats, src_channel, src_native_type, src_suffix):
func_name = 'lp_tile_%s_unswizzle_%s' % (format.short_name(), src_suffix)
if format.name == 'PIPE_FORMAT_B8G8R8A8_UNORM':
print '#ifdef PIPE_ARCH_SSE'
- print ' func = util_cpu_caps.has_ssse3 ? %s_ssse3 : %s;' % (func_name, func_name)
+ print ' func = util_cpu_caps.has_sse2 ? %s_sse2 : %s;' % (func_name, func_name)
print '#else'
print ' func = %s;' % (func_name,)
print '#endif'
@@ -577,7 +546,7 @@ def main():
print '};'
print
- generate_ssse3()
+ generate_sse2()
channel = Channel(UNSIGNED, True, 8)
native_type = 'uint8_t'
diff --git a/src/gallium/drivers/nouveau/nouveau_screen.c b/src/gallium/drivers/nouveau/nouveau_screen.c
index ebb21a6e5a3..a9426df686f 100644
--- a/src/gallium/drivers/nouveau/nouveau_screen.c
+++ b/src/gallium/drivers/nouveau/nouveau_screen.c
@@ -236,7 +236,7 @@ nouveau_screen_init(struct nouveau_screen *screen, struct nouveau_device *dev)
int ret;
ret = nouveau_channel_alloc(dev, 0xbeef0201, 0xbeef0202,
- &screen->channel);
+ 512*1024, &screen->channel);
if (ret)
return ret;
screen->device = dev;
diff --git a/src/gallium/drivers/nv50/nv50_context.h b/src/gallium/drivers/nv50/nv50_context.h
index ac69c7848e9..bf6a577188b 100644
--- a/src/gallium/drivers/nv50/nv50_context.h
+++ b/src/gallium/drivers/nv50/nv50_context.h
@@ -26,6 +26,9 @@
#define NOUVEAU_MSG(fmt, args...) \
fprintf(stderr, "nouveau: "fmt, ##args);
+#define nouveau_bo_tile_layout(nvbo) \
+ ((nvbo)->tile_flags & NOUVEAU_BO_TILE_LAYOUT_MASK)
+
/* Constant buffer assignment */
#define NV50_CB_PMISC 0
#define NV50_CB_PVP 1
diff --git a/src/gallium/drivers/nv50/nv50_surface.c b/src/gallium/drivers/nv50/nv50_surface.c
index 3f3166261b1..f70c138fe1a 100644
--- a/src/gallium/drivers/nv50/nv50_surface.c
+++ b/src/gallium/drivers/nv50/nv50_surface.c
@@ -92,7 +92,7 @@ nv50_surface_set(struct nv50_screen *screen, struct pipe_surface *ps, int dst)
return 1;
}
- if (!bo->tile_flags) {
+ if (!nouveau_bo_tile_layout(bo)) {
BEGIN_RING(chan, eng2d, mthd, 2);
OUT_RING (chan, format);
OUT_RING (chan, 1);
diff --git a/src/gallium/drivers/nv50/nv50_transfer.c b/src/gallium/drivers/nv50/nv50_transfer.c
index f973cf24b98..0cc2f4a837f 100644
--- a/src/gallium/drivers/nv50/nv50_transfer.c
+++ b/src/gallium/drivers/nv50/nv50_transfer.c
@@ -45,7 +45,7 @@ nv50_transfer_rect_m2mf(struct pipe_screen *pscreen,
WAIT_RING (chan, 14);
- if (!src_bo->tile_flags) {
+ if (!nouveau_bo_tile_layout(src_bo)) {
BEGIN_RING(chan, m2mf,
NV50_MEMORY_TO_MEMORY_FORMAT_LINEAR_IN, 1);
OUT_RING (chan, 1);
@@ -64,7 +64,7 @@ nv50_transfer_rect_m2mf(struct pipe_screen *pscreen,
OUT_RING (chan, sz); /* copying only 1 zslice per call */
}
- if (!dst_bo->tile_flags) {
+ if (!nouveau_bo_tile_layout(dst_bo)) {
BEGIN_RING(chan, m2mf,
NV50_MEMORY_TO_MEMORY_FORMAT_LINEAR_OUT, 1);
OUT_RING (chan, 1);
@@ -95,14 +95,14 @@ nv50_transfer_rect_m2mf(struct pipe_screen *pscreen,
NV04_MEMORY_TO_MEMORY_FORMAT_OFFSET_IN, 2);
OUT_RELOCl(chan, src_bo, src_offset, src_reloc);
OUT_RELOCl(chan, dst_bo, dst_offset, dst_reloc);
- if (src_bo->tile_flags) {
+ if (nouveau_bo_tile_layout(src_bo)) {
BEGIN_RING(chan, m2mf,
NV50_MEMORY_TO_MEMORY_FORMAT_TILING_POSITION_IN, 1);
OUT_RING (chan, (sy << 16) | (sx * cpp));
} else {
src_offset += (line_count * src_pitch);
}
- if (dst_bo->tile_flags) {
+ if (nouveau_bo_tile_layout(dst_bo)) {
BEGIN_RING(chan, m2mf,
NV50_MEMORY_TO_MEMORY_FORMAT_TILING_POSITION_OUT, 1);
OUT_RING (chan, (dy << 16) | (dx * cpp));
@@ -280,7 +280,7 @@ nv50_upload_sifc(struct nv50_context *nv50,
MARK_RING (chan, 32, 2); /* flush on lack of space or relocs */
- if (bo->tile_flags) {
+ if (nouveau_bo_tile_layout(bo)) {
BEGIN_RING(chan, eng2d, NV50_2D_DST_FORMAT, 5);
OUT_RING (chan, dst_format);
OUT_RING (chan, 0);
diff --git a/src/gallium/drivers/r300/r300_debug.c b/src/gallium/drivers/r300/r300_debug.c
index 145a7985da3..f78fe34790c 100644
--- a/src/gallium/drivers/r300/r300_debug.c
+++ b/src/gallium/drivers/r300/r300_debug.c
@@ -29,6 +29,7 @@
static const struct debug_named_value debug_options[] = {
{ "fp", DBG_FP, "Log fragment program compilation" },
{ "vp", DBG_VP, "Log vertex program compilation" },
+ { "pstat", DBG_P_STAT, "Log vertex/fragment program stats" },
{ "draw", DBG_DRAW, "Log draw calls" },
{ "swtcl", DBG_SWTCL, "Log SWTCL-specific info" },
{ "rsblock", DBG_RS_BLOCK, "Log rasterizer registers" },
diff --git a/src/gallium/drivers/r300/r300_fs.c b/src/gallium/drivers/r300/r300_fs.c
index d9d4a9304df..c91532eb7b3 100644
--- a/src/gallium/drivers/r300/r300_fs.c
+++ b/src/gallium/drivers/r300/r300_fs.c
@@ -378,7 +378,8 @@ static void r300_translate_fragment_shader(
/* Setup the compiler. */
memset(&compiler, 0, sizeof(compiler));
rc_init(&compiler.Base);
- compiler.Base.Debug = DBG_ON(r300, DBG_FP);
+ DBG_ON(r300, DBG_FP) ? compiler.Base.Debug |= RC_DBG_LOG : 0;
+ DBG_ON(r300, DBG_P_STAT) ? compiler.Base.Debug |= RC_DBG_STATS : 0;
compiler.code = &shader->code;
compiler.state = shader->compare_state;
@@ -395,7 +396,7 @@ static void r300_translate_fragment_shader(
find_output_registers(&compiler, shader);
- if (compiler.Base.Debug) {
+ if (compiler.Base.Debug & RC_DBG_LOG) {
DBG(r300, DBG_FP, "r300: Initial fragment program\n");
tgsi_dump(tokens, 0);
}
diff --git a/src/gallium/drivers/r300/r300_screen.c b/src/gallium/drivers/r300/r300_screen.c
index 7f41ff0e2ec..d445df408dd 100644
--- a/src/gallium/drivers/r300/r300_screen.c
+++ b/src/gallium/drivers/r300/r300_screen.c
@@ -124,6 +124,9 @@ static int r300_get_param(struct pipe_screen* pscreen, enum pipe_cap param)
case PIPE_CAP_INDEP_BLEND_FUNC:
case PIPE_CAP_DEPTH_CLAMP: /* XXX implemented, but breaks Regnum Online */
case PIPE_CAP_DEPTHSTENCIL_CLEAR_SEPARATE:
+ case PIPE_CAP_SHADER_STENCIL_EXPORT:
+ case PIPE_CAP_STREAM_OUTPUT:
+ case PIPE_CAP_PRIMITIVE_RESTART:
return 0;
/* Texturing. */
@@ -153,8 +156,8 @@ static int r300_get_param(struct pipe_screen* pscreen, enum pipe_cap param)
case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER:
return 0;
default:
- fprintf(stderr, "r300: Implementation error: Bad param %d\n",
- param);
+ debug_printf("r300: Warning: Unknown CAP %d in get_param.\n",
+ param);
return 0;
}
}
@@ -264,8 +267,8 @@ static float r300_get_paramf(struct pipe_screen* pscreen, enum pipe_cap param)
case PIPE_CAP_MAX_TEXTURE_LOD_BIAS:
return 16.0f;
default:
- fprintf(stderr, "r300: Implementation error: Bad paramf %d\n",
- param);
+ debug_printf("r300: Warning: Unknown CAP %d in get_paramf.\n",
+ param);
return 0.0f;
}
}
diff --git a/src/gallium/drivers/r300/r300_screen.h b/src/gallium/drivers/r300/r300_screen.h
index dc2bc7e8279..8b7f1fab61b 100644
--- a/src/gallium/drivers/r300/r300_screen.h
+++ b/src/gallium/drivers/r300/r300_screen.h
@@ -102,6 +102,7 @@ r300_winsys_screen(struct pipe_screen *screen) {
#define DBG_NO_CBZB (1 << 21)
/* Statistics. */
#define DBG_STATS (1 << 24)
+#define DBG_P_STAT (1 << 25)
/*@}*/
static INLINE boolean SCREEN_DBG_ON(struct r300_screen * screen, unsigned flags)
diff --git a/src/gallium/drivers/r300/r300_state_derived.c b/src/gallium/drivers/r300/r300_state_derived.c
index 904736ef06d..730ef7a3ee2 100644
--- a/src/gallium/drivers/r300/r300_state_derived.c
+++ b/src/gallium/drivers/r300/r300_state_derived.c
@@ -620,14 +620,19 @@ static uint32_t r300_get_border_color(enum pipe_format format,
}
break;
- default:
- /* I think the fat formats (16, 32) are specified
- * as the 8-bit ones. I am not sure how compressed formats
- * work here. */
+ case 8:
r = ((float_to_ubyte(border_swizzled[0]) & 0xff) << 0) |
((float_to_ubyte(border_swizzled[1]) & 0xff) << 8) |
((float_to_ubyte(border_swizzled[2]) & 0xff) << 16) |
((float_to_ubyte(border_swizzled[3]) & 0xff) << 24);
+ break;
+
+ case 16:
+ r = ((float_to_ubyte(border_swizzled[2]) & 0xff) << 0) |
+ ((float_to_ubyte(border_swizzled[1]) & 0xff) << 8) |
+ ((float_to_ubyte(border_swizzled[0]) & 0xff) << 16) |
+ ((float_to_ubyte(border_swizzled[3]) & 0xff) << 24);
+ break;
}
return r;
diff --git a/src/gallium/drivers/r300/r300_vs.c b/src/gallium/drivers/r300/r300_vs.c
index e2b9af9d018..65696555ac3 100644
--- a/src/gallium/drivers/r300/r300_vs.c
+++ b/src/gallium/drivers/r300/r300_vs.c
@@ -202,7 +202,8 @@ void r300_translate_vertex_shader(struct r300_context *r300,
memset(&compiler, 0, sizeof(compiler));
rc_init(&compiler.Base);
- compiler.Base.Debug = DBG_ON(r300, DBG_VP);
+ DBG_ON(r300, DBG_VP) ? compiler.Base.Debug |= RC_DBG_LOG : 0;
+ DBG_ON(r300, DBG_P_STAT) ? compiler.Base.Debug |= RC_DBG_STATS : 0;
compiler.code = &vs->code;
compiler.UserData = vs;
compiler.Base.is_r500 = r300->screen->caps.is_r500;
@@ -214,7 +215,7 @@ void r300_translate_vertex_shader(struct r300_context *r300,
compiler.Base.max_alu_insts = r300->screen->caps.is_r500 ? 1024 : 256;
compiler.Base.remove_unused_constants = TRUE;
- if (compiler.Base.Debug) {
+ if (compiler.Base.Debug & RC_DBG_LOG) {
DBG(r300, DBG_VP, "r300: Initial vertex program\n");
tgsi_dump(vs->state.tokens, 0);
}
diff --git a/src/gallium/drivers/r600/Makefile b/src/gallium/drivers/r600/Makefile
index ede0bb2ec45..a484f38e9f1 100644
--- a/src/gallium/drivers/r600/Makefile
+++ b/src/gallium/drivers/r600/Makefile
@@ -19,6 +19,8 @@ C_SOURCES = \
r600_texture.c \
r700_asm.c \
evergreen_state.c \
- eg_asm.c
+ eg_asm.c \
+ r600_translate.c \
+ r600_state_common.c
include ../../Makefile.template
diff --git a/src/gallium/drivers/r600/SConscript b/src/gallium/drivers/r600/SConscript
index b6d9b7d8d22..bf0ad8571ba 100644
--- a/src/gallium/drivers/r600/SConscript
+++ b/src/gallium/drivers/r600/SConscript
@@ -18,6 +18,7 @@ r600 = env.ConvenienceLibrary(
source = [
'r600_asm.c',
'r600_buffer.c',
+ 'r600_blit.c',
'r600_helper.c',
'r600_pipe.c',
'r600_query.c',
diff --git a/src/gallium/drivers/r600/eg_asm.c b/src/gallium/drivers/r600/eg_asm.c
index 52b7189e9e5..c30f09c394b 100644
--- a/src/gallium/drivers/r600/eg_asm.c
+++ b/src/gallium/drivers/r600/eg_asm.c
@@ -36,8 +36,13 @@ int eg_bc_cf_build(struct r600_bc *bc, struct r600_bc_cf *cf)
case (EG_V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU << 3):
case (EG_V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_PUSH_BEFORE << 3):
bc->bytecode[id++] = S_SQ_CF_ALU_WORD0_ADDR(cf->addr >> 1) |
- S_SQ_CF_ALU_WORD0_KCACHE_MODE0(cf->kcache0_mode);
+ S_SQ_CF_ALU_WORD0_KCACHE_MODE0(cf->kcache0_mode) |
+ S_SQ_CF_ALU_WORD0_KCACHE_BANK0(cf->kcache0_bank) |
+ S_SQ_CF_ALU_WORD0_KCACHE_BANK1(cf->kcache1_bank);
bc->bytecode[id++] = S_SQ_CF_ALU_WORD1_CF_INST(cf->inst >> 3) |
+ S_SQ_CF_ALU_WORD1_KCACHE_MODE1(cf->kcache1_mode) |
+ S_SQ_CF_ALU_WORD1_KCACHE_ADDR0(cf->kcache0_addr) |
+ S_SQ_CF_ALU_WORD1_KCACHE_ADDR1(cf->kcache1_addr) |
S_SQ_CF_ALU_WORD1_BARRIER(1) |
S_SQ_CF_ALU_WORD1_COUNT((cf->ndw / 2) - 1);
break;
diff --git a/src/gallium/drivers/r600/eg_state_inlines.h b/src/gallium/drivers/r600/eg_state_inlines.h
index 81094904485..be81c28b43f 100644
--- a/src/gallium/drivers/r600/eg_state_inlines.h
+++ b/src/gallium/drivers/r600/eg_state_inlines.h
@@ -25,6 +25,7 @@
#include "util/u_format.h"
#include "evergreend.h"
+#include "r600_formats.h"
static INLINE uint32_t r600_translate_blend_function(int blend_func)
{
@@ -276,6 +277,14 @@ static inline uint32_t r600_translate_dbformat(enum pipe_format format)
}
}
+static inline uint32_t r600_translate_stencilformat(enum pipe_format format)
+{
+ if (format == PIPE_FORMAT_Z24_UNORM_S8_USCALED)
+ return 1;
+ else
+ return 0;
+}
+
static inline uint32_t r600_translate_colorswap(enum pipe_format format)
{
switch (format) {
@@ -301,6 +310,12 @@ static inline uint32_t r600_translate_colorswap(enum pipe_format format)
case PIPE_FORMAT_Z16_UNORM:
return V_028C70_SWAP_STD;
+
+ case PIPE_FORMAT_R8G8_UNORM:
+ return V_028C70_SWAP_STD;
+
+ case PIPE_FORMAT_R16_UNORM:
+ return V_028C70_SWAP_STD;
/* 32-bit buffers. */
case PIPE_FORMAT_A8B8G8R8_SRGB:
@@ -338,6 +353,9 @@ static inline uint32_t r600_translate_colorswap(enum pipe_format format)
case PIPE_FORMAT_R10SG10SB10SA2U_NORM:
return V_028C70_SWAP_STD_REV;
+ case PIPE_FORMAT_R16G16_UNORM:
+ return V_028C70_SWAP_STD;
+
/* 64-bit buffers. */
case PIPE_FORMAT_R16G16B16A16_UNORM:
case PIPE_FORMAT_R16G16B16A16_SNORM:
@@ -382,6 +400,12 @@ static INLINE uint32_t r600_translate_colorformat(enum pipe_format format)
case PIPE_FORMAT_Z16_UNORM:
return V_028C70_COLOR_16;
+ case PIPE_FORMAT_R8G8_UNORM:
+ return V_028C70_COLOR_8_8;
+
+ case PIPE_FORMAT_R16_UNORM:
+ return V_028C70_COLOR_16;
+
/* 32-bit buffers. */
case PIPE_FORMAT_A8B8G8R8_SRGB:
case PIPE_FORMAT_A8B8G8R8_UNORM:
@@ -419,6 +443,7 @@ static INLINE uint32_t r600_translate_colorformat(enum pipe_format format)
return V_028C70_COLOR_16_16_FLOAT;
case PIPE_FORMAT_R16G16_SSCALED:
+ case PIPE_FORMAT_R16G16_UNORM:
return V_028C70_COLOR_16_16;
/* 64-bit buffers. */
@@ -499,32 +524,32 @@ static INLINE uint32_t r600_translate_vertex_data_type(enum pipe_format format)
case 16:
switch (desc->nr_channels) {
case 1:
- result = V_030008_FMT_16_FLOAT;
+ result = FMT_16_FLOAT;
break;
case 2:
- result = V_030008_FMT_16_16_FLOAT;
+ result = FMT_16_16_FLOAT;
break;
case 3:
- result = V_030008_FMT_16_16_16_FLOAT;
+ result = FMT_16_16_16_FLOAT;
break;
case 4:
- result = V_030008_FMT_16_16_16_16_FLOAT;
+ result = FMT_16_16_16_16_FLOAT;
break;
}
break;
case 32:
switch (desc->nr_channels) {
case 1:
- result = V_030008_FMT_32_FLOAT;
+ result = FMT_32_FLOAT;
break;
case 2:
- result = V_030008_FMT_32_32_FLOAT;
+ result = FMT_32_32_FLOAT;
break;
case 3:
- result = V_030008_FMT_32_32_32_FLOAT;
+ result = FMT_32_32_32_FLOAT;
break;
case 4:
- result = V_030008_FMT_32_32_32_32_FLOAT;
+ result = FMT_32_32_32_32_FLOAT;
break;
}
break;
@@ -540,48 +565,48 @@ static INLINE uint32_t r600_translate_vertex_data_type(enum pipe_format format)
case 8:
switch (desc->nr_channels) {
case 1:
- result = V_030008_FMT_8;
+ result = FMT_8;
break;
case 2:
- result = V_030008_FMT_8_8;
+ result = FMT_8_8;
break;
case 3:
// result = V_038008_FMT_8_8_8; /* fails piglit draw-vertices test */
// break;
case 4:
- result = V_030008_FMT_8_8_8_8;
+ result = FMT_8_8_8_8;
break;
}
break;
case 16:
switch (desc->nr_channels) {
case 1:
- result = V_030008_FMT_16;
+ result = FMT_16;
break;
case 2:
- result = V_030008_FMT_16_16;
+ result = FMT_16_16;
break;
case 3:
// result = V_038008_FMT_16_16_16; /* fails piglit draw-vertices test */
// break;
case 4:
- result = V_030008_FMT_16_16_16_16;
+ result = FMT_16_16_16_16;
break;
}
break;
case 32:
switch (desc->nr_channels) {
case 1:
- result = V_030008_FMT_32;
+ result = FMT_32;
break;
case 2:
- result = V_030008_FMT_32_32;
+ result = FMT_32_32;
break;
case 3:
- result = V_030008_FMT_32_32_32;
+ result = FMT_32_32_32;
break;
case 4:
- result = V_030008_FMT_32_32_32_32;
+ result = FMT_32_32_32_32;
break;
}
break;
diff --git a/src/gallium/drivers/r600/evergreen_state.c b/src/gallium/drivers/r600/evergreen_state.c
index 147d4f372e6..72223485067 100644
--- a/src/gallium/drivers/r600/evergreen_state.c
+++ b/src/gallium/drivers/r600/evergreen_state.c
@@ -39,6 +39,7 @@
#include <util/u_pack_color.h>
#include <util/u_memory.h>
#include <util/u_inlines.h>
+#include <util/u_framebuffer.h>
#include <pipebuffer/pb_buffer.h>
#include "r600.h"
#include "evergreend.h"
@@ -136,20 +137,6 @@ static void *evergreen_create_blend_state(struct pipe_context *ctx,
return rstate;
}
-static void evergreen_bind_blend_state(struct pipe_context *ctx, void *state)
-{
- struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
- struct r600_pipe_blend *blend = (struct r600_pipe_blend *)state;
- struct r600_pipe_state *rstate;
-
- if (state == NULL)
- return;
- rstate = &blend->rstate;
- rctx->states[rstate->id] = rstate;
- rctx->cb_target_mask = blend->cb_target_mask;
- r600_context_pipe_state_set(&rctx->ctx, rstate);
-}
-
static void *evergreen_create_dsa_state(struct pipe_context *ctx,
const struct pipe_depth_stencil_alpha_state *state)
{
@@ -241,6 +228,7 @@ static void *evergreen_create_rs_state(struct pipe_context *ctx,
struct r600_pipe_state *rstate;
unsigned tmp;
unsigned prov_vtx = 1, polygon_dual_mode;
+ unsigned clip_rule;
if (rs == NULL) {
return NULL;
@@ -250,6 +238,8 @@ static void *evergreen_create_rs_state(struct pipe_context *ctx,
rs->flatshade = state->flatshade;
rs->sprite_coord_enable = state->sprite_coord_enable;
+ clip_rule = state->scissor ? 0xAAAA : 0xFFFF;
+
/* offset */
rs->offset_units = state->offset_units;
rs->offset_scale = state->offset_scale * 12.0f;
@@ -257,7 +247,7 @@ static void *evergreen_create_rs_state(struct pipe_context *ctx,
rstate->id = R600_PIPE_STATE_RASTERIZER;
if (state->flatshade_first)
prov_vtx = 0;
- tmp = 0x00000001;
+ tmp = S_0286D4_FLAT_SHADE_ENA(1);
if (state->sprite_coord_enable) {
tmp |= S_0286D4_PNT_SPRITE_ENA(1) |
S_0286D4_PNT_SPRITE_OVRD_X(2) |
@@ -299,39 +289,10 @@ static void *evergreen_create_rs_state(struct pipe_context *ctx,
r600_pipe_state_add_reg(rstate, R_028C18_PA_CL_GB_HORZ_DISC_ADJ, 0x3F800000, 0xFFFFFFFF, NULL);
r600_pipe_state_add_reg(rstate, R_028B7C_PA_SU_POLY_OFFSET_CLAMP, 0x0, 0xFFFFFFFF, NULL);
r600_pipe_state_add_reg(rstate, R_028C08_PA_SU_VTX_CNTL, 0x00000005, 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_02820C_PA_SC_CLIPRECT_RULE, clip_rule, 0xFFFFFFFF, NULL);
return rstate;
}
-static void evergreen_bind_rs_state(struct pipe_context *ctx, void *state)
-{
- struct r600_pipe_rasterizer *rs = (struct r600_pipe_rasterizer *)state;
- struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
-
- if (state == NULL)
- return;
-
- rctx->flatshade = rs->flatshade;
- rctx->sprite_coord_enable = rs->sprite_coord_enable;
- rctx->rasterizer = rs;
-
- rctx->states[rs->rstate.id] = &rs->rstate;
- r600_context_pipe_state_set(&rctx->ctx, &rs->rstate);
-}
-
-static void evergreen_delete_rs_state(struct pipe_context *ctx, void *state)
-{
- struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
- struct r600_pipe_rasterizer *rs = (struct r600_pipe_rasterizer *)state;
-
- if (rctx->rasterizer == rs) {
- rctx->rasterizer = NULL;
- }
- if (rctx->states[rs->rstate.id] == &rs->rstate) {
- rctx->states[rs->rstate.id] = NULL;
- }
- free(rs);
-}
-
static void *evergreen_create_sampler_state(struct pipe_context *ctx,
const struct pipe_sampler_state *state)
{
@@ -372,28 +333,6 @@ static void *evergreen_create_sampler_state(struct pipe_context *ctx,
return rstate;
}
-static void *evergreen_create_vertex_elements(struct pipe_context *ctx,
- unsigned count,
- const struct pipe_vertex_element *elements)
-{
- struct r600_vertex_element *v = CALLOC_STRUCT(r600_vertex_element);
-
- assert(count < 32);
- v->count = count;
- v->refcount = 1;
- memcpy(v->elements, elements, count * sizeof(struct pipe_vertex_element));
- return v;
-}
-
-static void evergreen_sampler_view_destroy(struct pipe_context *ctx,
- struct pipe_sampler_view *state)
-{
- struct r600_pipe_sampler_view *resource = (struct r600_pipe_sampler_view *)state;
-
- pipe_resource_reference(&state->texture, NULL);
- FREE(resource);
-}
-
static struct pipe_sampler_view *evergreen_create_sampler_view(struct pipe_context *ctx,
struct pipe_resource *texture,
const struct pipe_sampler_view *state)
@@ -424,15 +363,15 @@ static struct pipe_sampler_view *evergreen_create_sampler_view(struct pipe_conte
swizzle[1] = state->swizzle_g;
swizzle[2] = state->swizzle_b;
swizzle[3] = state->swizzle_a;
- format = r600_translate_texformat(texture->format,
+ format = r600_translate_texformat(state->format,
swizzle,
&word4, &yuv_format);
if (format == ~0) {
format = 0;
}
- desc = util_format_description(texture->format);
+ desc = util_format_description(state->format);
if (desc == NULL) {
- R600_ERR("unknow format %d\n", texture->format);
+ R600_ERR("unknow format %d\n", state->format);
}
tmp = (struct r600_resource_texture*)texture;
rbuffer = &tmp->resource;
@@ -446,7 +385,7 @@ static struct pipe_sampler_view *evergreen_create_sampler_view(struct pipe_conte
bo[0] = rbuffer->bo;
bo[1] = rbuffer->bo;
}
- pitch = align(tmp->pitch[0] / tmp->bpt, 8);
+ pitch = align(tmp->pitch_in_pixels[0], 8);
/* FIXME properly handle first level != 0 */
r600_pipe_state_add_reg(rstate, R_030000_RESOURCE0_WORD0,
@@ -464,7 +403,6 @@ static struct pipe_sampler_view *evergreen_create_sampler_view(struct pipe_conte
r600_pipe_state_add_reg(rstate, R_030010_RESOURCE0_WORD4,
word4 | S_030010_NUM_FORMAT_ALL(V_030010_SQ_NUM_FORMAT_NORM) |
S_030010_SRF_MODE_ALL(V_030010_SFR_MODE_NO_ZERO) |
- S_030010_REQUEST_SIZE(1) |
S_030010_BASE_LEVEL(state->first_level), 0xFFFFFFFF, NULL);
r600_pipe_state_add_reg(rstate, R_030014_RESOURCE0_WORD5,
S_030014_LAST_LEVEL(state->last_level) |
@@ -481,32 +419,42 @@ static struct pipe_sampler_view *evergreen_create_sampler_view(struct pipe_conte
static void evergreen_set_vs_sampler_view(struct pipe_context *ctx, unsigned count,
struct pipe_sampler_view **views)
{
- /* TODO */
- assert(1);
-}
-
-static void evergreen_set_ps_sampler_view(struct pipe_context *ctx, unsigned count,
- struct pipe_sampler_view **views)
-{
struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
struct r600_pipe_sampler_view **resource = (struct r600_pipe_sampler_view **)views;
for (int i = 0; i < count; i++) {
if (resource[i]) {
- evergreen_context_pipe_state_set_ps_resource(&rctx->ctx, &resource[i]->state, i);
+ evergreen_context_pipe_state_set_vs_resource(&rctx->ctx, &resource[i]->state, i + PIPE_MAX_ATTRIBS);
}
}
}
-static void evergreen_bind_state(struct pipe_context *ctx, void *state)
+static void evergreen_set_ps_sampler_view(struct pipe_context *ctx, unsigned count,
+ struct pipe_sampler_view **views)
{
struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
- struct r600_pipe_state *rstate = (struct r600_pipe_state *)state;
-
- if (state == NULL)
- return;
- rctx->states[rstate->id] = rstate;
- r600_context_pipe_state_set(&rctx->ctx, rstate);
+ struct r600_pipe_sampler_view **resource = (struct r600_pipe_sampler_view **)views;
+ int i;
+
+ for (i = 0; i < count; i++) {
+ if (&rctx->ps_samplers.views[i]->base != views[i]) {
+ if (resource[i])
+ evergreen_context_pipe_state_set_ps_resource(&rctx->ctx, &resource[i]->state, i);
+ else
+ evergreen_context_pipe_state_set_ps_resource(&rctx->ctx, NULL, i);
+
+ pipe_sampler_view_reference(
+ (struct pipe_sampler_view **)&rctx->ps_samplers.views[i],
+ views[i]);
+ }
+ }
+ for (i = count; i < NUM_TEX_UNITS; i++) {
+ if (rctx->ps_samplers.views[i]) {
+ evergreen_context_pipe_state_set_ps_resource(&rctx->ctx, NULL, i);
+ pipe_sampler_view_reference((struct pipe_sampler_view **)&rctx->ps_samplers.views[i], NULL);
+ }
+ }
+ rctx->ps_samplers.n_views = count;
}
static void evergreen_bind_ps_sampler(struct pipe_context *ctx, unsigned count, void **states)
@@ -514,6 +462,10 @@ static void evergreen_bind_ps_sampler(struct pipe_context *ctx, unsigned count,
struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
struct r600_pipe_state **rstates = (struct r600_pipe_state **)states;
+
+ memcpy(rctx->ps_samplers.samplers, states, sizeof(void*) * count);
+ rctx->ps_samplers.n_samplers = count;
+
for (int i = 0; i < count; i++) {
evergreen_context_pipe_state_set_ps_sampler(&rctx->ctx, rstates[i], i);
}
@@ -524,37 +476,11 @@ static void evergreen_bind_vs_sampler(struct pipe_context *ctx, unsigned count,
struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
struct r600_pipe_state **rstates = (struct r600_pipe_state **)states;
- /* TODO implement */
for (int i = 0; i < count; i++) {
evergreen_context_pipe_state_set_vs_sampler(&rctx->ctx, rstates[i], i);
}
}
-static void evergreen_delete_state(struct pipe_context *ctx, void *state)
-{
- struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
- struct r600_pipe_state *rstate = (struct r600_pipe_state *)state;
-
- if (rctx->states[rstate->id] == rstate) {
- rctx->states[rstate->id] = NULL;
- }
- for (int i = 0; i < rstate->nregs; i++) {
- r600_bo_reference(rctx->radeon, &rstate->regs[i].bo, NULL);
- }
- free(rstate);
-}
-
-static void evergreen_delete_vertex_element(struct pipe_context *ctx, void *state)
-{
- struct r600_vertex_element *v = (struct r600_vertex_element*)state;
-
- if (v == NULL)
- return;
- if (--v->refcount)
- return;
- free(v);
-}
-
static void evergreen_set_clip_state(struct pipe_context *ctx,
const struct pipe_clip_state *state)
{
@@ -590,19 +516,6 @@ static void evergreen_set_clip_state(struct pipe_context *ctx,
r600_context_pipe_state_set(&rctx->ctx, rstate);
}
-static void evergreen_bind_vertex_elements(struct pipe_context *ctx, void *state)
-{
- struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
- struct r600_vertex_element *v = (struct r600_vertex_element*)state;
-
- evergreen_delete_vertex_element(ctx, rctx->vertex_elements);
- rctx->vertex_elements = v;
- if (v) {
- v->refcount++;
-// rctx->vs_rebuild = TRUE;
- }
-}
-
static void evergreen_set_polygon_stipple(struct pipe_context *ctx,
const struct pipe_poly_stipple *state)
{
@@ -626,18 +539,6 @@ static void evergreen_set_scissor_state(struct pipe_context *ctx,
tl = S_028240_TL_X(state->minx) | S_028240_TL_Y(state->miny);
br = S_028244_BR_X(state->maxx) | S_028244_BR_Y(state->maxy);
r600_pipe_state_add_reg(rstate,
- R_028030_PA_SC_SCREEN_SCISSOR_TL, tl,
- 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate,
- R_028034_PA_SC_SCREEN_SCISSOR_BR, br,
- 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate,
- R_028204_PA_SC_WINDOW_SCISSOR_TL, tl,
- 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate,
- R_028208_PA_SC_WINDOW_SCISSOR_BR, br,
- 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate,
R_028210_PA_SC_CLIPRECT_0_TL, tl,
0xFFFFFFFF, NULL);
r600_pipe_state_add_reg(rstate,
@@ -661,15 +562,6 @@ static void evergreen_set_scissor_state(struct pipe_context *ctx,
r600_pipe_state_add_reg(rstate,
R_02822C_PA_SC_CLIPRECT_3_BR, br,
0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate,
- R_028200_PA_SC_WINDOW_OFFSET, 0x00000000,
- 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate,
- R_02820C_PA_SC_CLIPRECT_RULE, 0x0000FFFF,
- 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate,
- R_028230_PA_SC_EDGERULE, 0xAAAAAAAA,
- 0xFFFFFFFF, NULL);
free(rctx->states[R600_PIPE_STATE_SCISSOR]);
rctx->states[R600_PIPE_STATE_SCISSOR] = rstate;
@@ -733,6 +625,7 @@ static void evergreen_cb(struct r600_pipe_context *rctx, struct r600_pipe_state
{
struct r600_resource_texture *rtex;
struct r600_resource *rbuffer;
+ struct r600_surface *surf;
unsigned level = state->cbufs[cb]->level;
unsigned pitch, slice;
unsigned color_info;
@@ -740,14 +633,15 @@ static void evergreen_cb(struct r600_pipe_context *rctx, struct r600_pipe_state
const struct util_format_description *desc;
struct r600_bo *bo[3];
+ surf = (struct r600_surface *)state->cbufs[cb];
rtex = (struct r600_resource_texture*)state->cbufs[cb]->texture;
rbuffer = &rtex->resource;
bo[0] = rbuffer->bo;
bo[1] = rbuffer->bo;
bo[2] = rbuffer->bo;
- pitch = (rtex->pitch[level] / rtex->bpt) / 8 - 1;
- slice = (rtex->pitch[level] / rtex->bpt) * state->cbufs[cb]->height / 64 - 1;
+ pitch = rtex->pitch_in_pixels[level] / 8 - 1;
+ slice = rtex->pitch_in_pixels[level] * surf->aligned_height / 64 - 1;
ntype = 0;
desc = util_format_description(rtex->resource.base.b.format);
if (desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB)
@@ -794,32 +688,49 @@ static void evergreen_db(struct r600_pipe_context *rctx, struct r600_pipe_state
{
struct r600_resource_texture *rtex;
struct r600_resource *rbuffer;
+ struct r600_surface *surf;
unsigned level;
- unsigned pitch, slice, format;
+ unsigned pitch, slice, format, stencil_format;
if (state->zsbuf == NULL)
return;
+ level = state->zsbuf->level;
+
+ surf = (struct r600_surface *)state->zsbuf;
rtex = (struct r600_resource_texture*)state->zsbuf->texture;
rtex->tiled = 1;
- rtex->array_mode = 2;
+ rtex->array_mode[level] = 2;
rtex->tile_type = 1;
rtex->depth = 1;
rbuffer = &rtex->resource;
- level = state->zsbuf->level;
- pitch = (rtex->pitch[level] / rtex->bpt) / 8 - 1;
- slice = (rtex->pitch[level] / rtex->bpt) * state->zsbuf->height / 64 - 1;
+ pitch = rtex->pitch_in_pixels[level] / 8 - 1;
+ slice = rtex->pitch_in_pixels[level] * surf->aligned_height / 64 - 1;
format = r600_translate_dbformat(state->zsbuf->texture->format);
+ stencil_format = r600_translate_stencilformat(state->zsbuf->texture->format);
r600_pipe_state_add_reg(rstate, R_028048_DB_Z_READ_BASE,
(state->zsbuf->offset + r600_bo_offset(rbuffer->bo)) >> 8, 0xFFFFFFFF, rbuffer->bo);
r600_pipe_state_add_reg(rstate, R_028050_DB_Z_WRITE_BASE,
(state->zsbuf->offset + r600_bo_offset(rbuffer->bo)) >> 8, 0xFFFFFFFF, rbuffer->bo);
-// r600_pipe_state_add_reg(rstate, R_028014_DB_HTILE_DATA_BASE, state->zsbuf->offset >> 8, 0xFFFFFFFF, rbuffer->bo);
+
+ if (stencil_format) {
+ uint32_t stencil_offset;
+
+ stencil_offset = ((surf->aligned_height * rtex->pitch_in_bytes[level]) + 255) & ~255;
+ r600_pipe_state_add_reg(rstate, R_02804C_DB_STENCIL_READ_BASE,
+ (state->zsbuf->offset + stencil_offset + r600_bo_offset(rbuffer->bo)) >> 8, 0xFFFFFFFF, rbuffer->bo);
+ r600_pipe_state_add_reg(rstate, R_028054_DB_STENCIL_WRITE_BASE,
+ (state->zsbuf->offset + stencil_offset + r600_bo_offset(rbuffer->bo)) >> 8, 0xFFFFFFFF, rbuffer->bo);
+ }
+
r600_pipe_state_add_reg(rstate, R_028008_DB_DEPTH_VIEW, 0x00000000, 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_028044_DB_STENCIL_INFO,
+ S_028044_FORMAT(stencil_format), 0xFFFFFFFF, rbuffer->bo);
+
r600_pipe_state_add_reg(rstate, R_028040_DB_Z_INFO,
- S_028040_ARRAY_MODE(rtex->array_mode) | S_028040_FORMAT(format),
+ S_028040_ARRAY_MODE(rtex->array_mode[level]) | S_028040_FORMAT(format),
0xFFFFFFFF, rbuffer->bo);
r600_pipe_state_add_reg(rstate, R_028058_DB_DEPTH_SIZE,
S_028058_PITCH_TILE_MAX(pitch),
@@ -841,14 +752,9 @@ static void evergreen_set_framebuffer_state(struct pipe_context *ctx,
/* unreference old buffer and reference new one */
rstate->id = R600_PIPE_STATE_FRAMEBUFFER;
- for (int i = 0; i < rctx->framebuffer.nr_cbufs; i++) {
- pipe_surface_reference(&rctx->framebuffer.cbufs[i], NULL);
- }
- for (int i = 0; i < state->nr_cbufs; i++) {
- pipe_surface_reference(&rctx->framebuffer.cbufs[i], state->cbufs[i]);
- }
- pipe_surface_reference(&rctx->framebuffer.zsbuf, state->zsbuf);
- rctx->framebuffer = *state;
+
+ util_copy_framebuffer_state(&rctx->framebuffer, state);
+
rctx->pframebuffer = &rctx->framebuffer;
/* build states */
@@ -881,6 +787,24 @@ static void evergreen_set_framebuffer_state(struct pipe_context *ctx,
r600_pipe_state_add_reg(rstate,
R_028254_PA_SC_VPORT_SCISSOR_0_BR, br,
0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate,
+ R_028030_PA_SC_SCREEN_SCISSOR_TL, tl,
+ 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate,
+ R_028034_PA_SC_SCREEN_SCISSOR_BR, br,
+ 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate,
+ R_028204_PA_SC_WINDOW_SCISSOR_TL, tl,
+ 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate,
+ R_028208_PA_SC_WINDOW_SCISSOR_BR, br,
+ 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate,
+ R_028200_PA_SC_WINDOW_OFFSET, 0x00000000,
+ 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate,
+ R_028230_PA_SC_EDGERULE, 0xAAAAAAAA,
+ 0xFFFFFFFF, NULL);
r600_pipe_state_add_reg(rstate, R_028238_CB_TARGET_MASK,
0x00000000, target_mask, NULL);
@@ -896,40 +820,6 @@ static void evergreen_set_framebuffer_state(struct pipe_context *ctx,
r600_context_pipe_state_set(&rctx->ctx, rstate);
}
-static void evergreen_set_index_buffer(struct pipe_context *ctx,
- const struct pipe_index_buffer *ib)
-{
- struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
-
- if (ib) {
- pipe_resource_reference(&rctx->index_buffer.buffer, ib->buffer);
- memcpy(&rctx->index_buffer, ib, sizeof(rctx->index_buffer));
- } else {
- pipe_resource_reference(&rctx->index_buffer.buffer, NULL);
- memset(&rctx->index_buffer, 0, sizeof(rctx->index_buffer));
- }
-
- /* TODO make this more like a state */
-}
-
-static void evergreen_set_vertex_buffers(struct pipe_context *ctx, unsigned count,
- const struct pipe_vertex_buffer *buffers)
-{
- struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
-
- for (int i = 0; i < rctx->nvertex_buffer; i++) {
- pipe_resource_reference(&rctx->vertex_buffer[i].buffer, NULL);
- }
- memcpy(rctx->vertex_buffer, buffers, sizeof(struct pipe_vertex_buffer) * count);
- for (int i = 0; i < count; i++) {
- rctx->vertex_buffer[i].buffer = NULL;
- if (r600_buffer_is_user_buffer(buffers[i].buffer))
- rctx->any_user_vbs = TRUE;
- pipe_resource_reference(&rctx->vertex_buffer[i].buffer, buffers[i].buffer);
- }
- rctx->nvertex_buffer = count;
-}
-
static void evergreen_set_constant_buffer(struct pipe_context *ctx, uint shader, uint index,
struct pipe_resource *buffer)
{
@@ -965,84 +855,31 @@ static void evergreen_set_constant_buffer(struct pipe_context *ctx, uint shader,
}
}
-static void *evergreen_create_shader_state(struct pipe_context *ctx,
- const struct pipe_shader_state *state)
-{
- struct r600_pipe_shader *shader = CALLOC_STRUCT(r600_pipe_shader);
- int r;
-
- r = r600_pipe_shader_create(ctx, shader, state->tokens);
- if (r) {
- return NULL;
- }
- return shader;
-}
-
-static void evergreen_bind_ps_shader(struct pipe_context *ctx, void *state)
-{
- struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
-
- /* TODO delete old shader */
- rctx->ps_shader = (struct r600_pipe_shader *)state;
-}
-
-static void evergreen_bind_vs_shader(struct pipe_context *ctx, void *state)
-{
- struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
-
- /* TODO delete old shader */
- rctx->vs_shader = (struct r600_pipe_shader *)state;
-}
-
-static void evergreen_delete_ps_shader(struct pipe_context *ctx, void *state)
-{
- struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
- struct r600_pipe_shader *shader = (struct r600_pipe_shader *)state;
-
- if (rctx->ps_shader == shader) {
- rctx->ps_shader = NULL;
- }
- /* TODO proper delete */
- free(shader);
-}
-
-static void evergreen_delete_vs_shader(struct pipe_context *ctx, void *state)
-{
- struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
- struct r600_pipe_shader *shader = (struct r600_pipe_shader *)state;
-
- if (rctx->vs_shader == shader) {
- rctx->vs_shader = NULL;
- }
- /* TODO proper delete */
- free(shader);
-}
-
void evergreen_init_state_functions(struct r600_pipe_context *rctx)
{
rctx->context.create_blend_state = evergreen_create_blend_state;
rctx->context.create_depth_stencil_alpha_state = evergreen_create_dsa_state;
- rctx->context.create_fs_state = evergreen_create_shader_state;
+ rctx->context.create_fs_state = r600_create_shader_state;
rctx->context.create_rasterizer_state = evergreen_create_rs_state;
rctx->context.create_sampler_state = evergreen_create_sampler_state;
rctx->context.create_sampler_view = evergreen_create_sampler_view;
- rctx->context.create_vertex_elements_state = evergreen_create_vertex_elements;
- rctx->context.create_vs_state = evergreen_create_shader_state;
- rctx->context.bind_blend_state = evergreen_bind_blend_state;
- rctx->context.bind_depth_stencil_alpha_state = evergreen_bind_state;
+ rctx->context.create_vertex_elements_state = r600_create_vertex_elements;
+ rctx->context.create_vs_state = r600_create_shader_state;
+ rctx->context.bind_blend_state = r600_bind_blend_state;
+ rctx->context.bind_depth_stencil_alpha_state = r600_bind_state;
rctx->context.bind_fragment_sampler_states = evergreen_bind_ps_sampler;
- rctx->context.bind_fs_state = evergreen_bind_ps_shader;
- rctx->context.bind_rasterizer_state = evergreen_bind_rs_state;
- rctx->context.bind_vertex_elements_state = evergreen_bind_vertex_elements;
+ rctx->context.bind_fs_state = r600_bind_ps_shader;
+ rctx->context.bind_rasterizer_state = r600_bind_rs_state;
+ rctx->context.bind_vertex_elements_state = r600_bind_vertex_elements;
rctx->context.bind_vertex_sampler_states = evergreen_bind_vs_sampler;
- rctx->context.bind_vs_state = evergreen_bind_vs_shader;
- rctx->context.delete_blend_state = evergreen_delete_state;
- rctx->context.delete_depth_stencil_alpha_state = evergreen_delete_state;
- rctx->context.delete_fs_state = evergreen_delete_ps_shader;
- rctx->context.delete_rasterizer_state = evergreen_delete_rs_state;
- rctx->context.delete_sampler_state = evergreen_delete_state;
- rctx->context.delete_vertex_elements_state = evergreen_delete_vertex_element;
- rctx->context.delete_vs_state = evergreen_delete_vs_shader;
+ rctx->context.bind_vs_state = r600_bind_vs_shader;
+ rctx->context.delete_blend_state = r600_delete_state;
+ rctx->context.delete_depth_stencil_alpha_state = r600_delete_state;
+ rctx->context.delete_fs_state = r600_delete_ps_shader;
+ rctx->context.delete_rasterizer_state = r600_delete_rs_state;
+ rctx->context.delete_sampler_state = r600_delete_state;
+ rctx->context.delete_vertex_elements_state = r600_delete_vertex_element;
+ rctx->context.delete_vs_state = r600_delete_vs_shader;
rctx->context.set_blend_color = evergreen_set_blend_color;
rctx->context.set_clip_state = evergreen_set_clip_state;
rctx->context.set_constant_buffer = evergreen_set_constant_buffer;
@@ -1052,11 +889,11 @@ void evergreen_init_state_functions(struct r600_pipe_context *rctx)
rctx->context.set_sample_mask = evergreen_set_sample_mask;
rctx->context.set_scissor_state = evergreen_set_scissor_state;
rctx->context.set_stencil_ref = evergreen_set_stencil_ref;
- rctx->context.set_vertex_buffers = evergreen_set_vertex_buffers;
- rctx->context.set_index_buffer = evergreen_set_index_buffer;
+ rctx->context.set_vertex_buffers = r600_set_vertex_buffers;
+ rctx->context.set_index_buffer = r600_set_index_buffer;
rctx->context.set_vertex_sampler_views = evergreen_set_vs_sampler_view;
rctx->context.set_viewport_state = evergreen_set_viewport_state;
- rctx->context.sampler_view_destroy = evergreen_sampler_view_destroy;
+ rctx->context.sampler_view_destroy = r600_sampler_view_destroy;
}
void evergreen_init_config(struct r600_pipe_context *rctx)
@@ -1339,6 +1176,12 @@ void evergreen_draw(struct pipe_context *ctx, const struct pipe_draw_info *info)
struct r600_draw rdraw;
struct r600_pipe_state vgt;
struct r600_drawl draw;
+ boolean translate = FALSE;
+
+ if (rctx->vertex_elements->incompatible_layout) {
+ r600_begin_vertex_translate(rctx);
+ translate = TRUE;
+ }
if (rctx->any_user_vbs) {
r600_upload_user_buffers(rctx);
@@ -1415,11 +1258,11 @@ void evergreen_draw(struct pipe_context *ctx, const struct pipe_draw_info *info)
vertex_buffer->buffer_offset +
r600_bo_offset(rbuffer->bo);
- format = r600_translate_vertex_data_type(rctx->vertex_elements->elements[i].src_format);
+ format = r600_translate_vertex_data_type(rctx->vertex_elements->hw_format[i]);
word2 = format | S_030008_STRIDE(vertex_buffer->stride);
- word3 = r600_translate_vertex_data_swizzle(rctx->vertex_elements->elements[i].src_format);
+ word3 = r600_translate_vertex_data_swizzle(rctx->vertex_elements->hw_format[i]);
r600_pipe_state_add_reg(rstate, R_030000_RESOURCE0_WORD0, offset, 0xFFFFFFFF, rbuffer->bo);
r600_pipe_state_add_reg(rstate, R_030004_RESOURCE0_WORD1, rbuffer->size - offset - 1, 0xFFFFFFFF, NULL);
@@ -1500,6 +1343,9 @@ void evergreen_draw(struct pipe_context *ctx, const struct pipe_draw_info *info)
}
evergreen_context_draw(&rctx->ctx, &rdraw);
+ if (translate)
+ r600_end_vertex_translate(rctx);
+
pipe_resource_reference(&draw.index_buffer, NULL);
}
@@ -1508,23 +1354,39 @@ void evergreen_pipe_shader_ps(struct pipe_context *ctx, struct r600_pipe_shader
struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
struct r600_pipe_state *rstate = &shader->rstate;
struct r600_shader *rshader = &shader->shader;
- unsigned i, tmp, exports_ps, num_cout, spi_ps_in_control_0, spi_input_z;
- boolean have_pos = FALSE, have_face = FALSE;
+ unsigned i, tmp, exports_ps, num_cout, spi_ps_in_control_0, spi_input_z, spi_ps_in_control_1;
+ int pos_index = -1, face_index = -1;
+ int ninterp = 0;
+ boolean have_linear = FALSE, have_centroid = FALSE, have_perspective = FALSE;
+ unsigned spi_baryc_cntl;
/* clear previous register */
rstate->nregs = 0;
for (i = 0; i < rshader->ninput; i++) {
tmp = S_028644_SEMANTIC(r600_find_vs_semantic_index(&rctx->vs_shader->shader, rshader, i));
+ /* evergreen NUM_INTERP only contains values interpolated into the LDS,
+ POSITION goes via GPRs from the SC so isn't counted */
if (rshader->input[i].name == TGSI_SEMANTIC_POSITION)
- have_pos = TRUE;
+ pos_index = i;
+ else if (rshader->input[i].name == TGSI_SEMANTIC_FACE)
+ face_index = i;
+ else {
+ if (rshader->input[i].interpolate == TGSI_INTERPOLATE_LINEAR ||
+ rshader->input[i].interpolate == TGSI_INTERPOLATE_PERSPECTIVE)
+ ninterp++;
+ if (rshader->input[i].interpolate == TGSI_INTERPOLATE_LINEAR)
+ have_linear = TRUE;
+ if (rshader->input[i].interpolate == TGSI_INTERPOLATE_PERSPECTIVE)
+ have_perspective = TRUE;
+ if (rshader->input[i].centroid)
+ have_centroid = TRUE;
+ }
if (rshader->input[i].name == TGSI_SEMANTIC_COLOR ||
rshader->input[i].name == TGSI_SEMANTIC_BCOLOR ||
rshader->input[i].name == TGSI_SEMANTIC_POSITION) {
tmp |= S_028644_FLAT_SHADE(rshader->flat_shade);
}
- if (rshader->input[i].name == TGSI_SEMANTIC_FACE)
- have_face = TRUE;
if (rshader->input[i].name == TGSI_SEMANTIC_GENERIC &&
rctx->sprite_coord_enable & (1 << rshader->input[i].sid)) {
tmp |= S_028644_PT_SPRITE_TEX(1);
@@ -1532,17 +1394,23 @@ void evergreen_pipe_shader_ps(struct pipe_context *ctx, struct r600_pipe_shader
r600_pipe_state_add_reg(rstate, R_028644_SPI_PS_INPUT_CNTL_0 + i * 4, tmp, 0xFFFFFFFF, NULL);
}
for (i = 0; i < rshader->noutput; i++) {
- if (rshader->input[i].name == TGSI_SEMANTIC_POSITION)
+ if (rshader->output[i].name == TGSI_SEMANTIC_POSITION)
r600_pipe_state_add_reg(rstate,
R_02880C_DB_SHADER_CONTROL,
S_02880C_Z_EXPORT_ENABLE(1),
S_02880C_Z_EXPORT_ENABLE(1), NULL);
+ if (rshader->output[i].name == TGSI_SEMANTIC_STENCIL)
+ r600_pipe_state_add_reg(rstate,
+ R_02880C_DB_SHADER_CONTROL,
+ S_02880C_STENCIL_EXPORT_ENABLE(1),
+ S_02880C_STENCIL_EXPORT_ENABLE(1), NULL);
}
exports_ps = 0;
num_cout = 0;
for (i = 0; i < rshader->noutput; i++) {
- if (rshader->output[i].name == TGSI_SEMANTIC_POSITION)
+ if (rshader->output[i].name == TGSI_SEMANTIC_POSITION ||
+ rshader->output[i].name == TGSI_SEMANTIC_STENCIL)
exports_ps |= 1;
else if (rshader->output[i].name == TGSI_SEMANTIC_COLOR) {
num_cout++;
@@ -1554,19 +1422,49 @@ void evergreen_pipe_shader_ps(struct pipe_context *ctx, struct r600_pipe_shader
exports_ps = 2;
}
- spi_ps_in_control_0 = S_0286CC_NUM_INTERP(rshader->ninput) |
- S_0286CC_PERSP_GRADIENT_ENA(1);
+ if (ninterp == 0) {
+ ninterp = 1;
+ have_perspective = TRUE;
+ }
+
+ spi_ps_in_control_0 = S_0286CC_NUM_INTERP(ninterp) |
+ S_0286CC_PERSP_GRADIENT_ENA(have_perspective) |
+ S_0286CC_LINEAR_GRADIENT_ENA(have_linear);
spi_input_z = 0;
- if (have_pos) {
- spi_ps_in_control_0 |= S_0286CC_POSITION_ENA(1);
+ if (pos_index != -1) {
+ spi_ps_in_control_0 |= S_0286CC_POSITION_ENA(1) |
+ S_0286CC_POSITION_CENTROID(rshader->input[pos_index].centroid) |
+ S_0286CC_POSITION_ADDR(rshader->input[pos_index].gpr);
spi_input_z |= 1;
}
+
+ spi_ps_in_control_1 = 0;
+ if (face_index != -1) {
+ spi_ps_in_control_1 |= S_0286D0_FRONT_FACE_ENA(1) |
+ S_0286D0_FRONT_FACE_ADDR(rshader->input[face_index].gpr);
+ }
+
+ spi_baryc_cntl = 0;
+ if (have_perspective)
+ spi_baryc_cntl |= S_0286E0_PERSP_CENTER_ENA(1) |
+ S_0286E0_PERSP_CENTROID_ENA(have_centroid);
+ if (have_linear)
+ spi_baryc_cntl |= S_0286E0_LINEAR_CENTER_ENA(1) |
+ S_0286E0_LINEAR_CENTROID_ENA(have_centroid);
+
r600_pipe_state_add_reg(rstate, R_0286CC_SPI_PS_IN_CONTROL_0,
spi_ps_in_control_0, 0xFFFFFFFF, NULL);
r600_pipe_state_add_reg(rstate, R_0286D0_SPI_PS_IN_CONTROL_1,
- S_0286D0_FRONT_FACE_ENA(have_face), 0xFFFFFFFF, NULL);
+ spi_ps_in_control_1, 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_0286E4_SPI_PS_IN_CONTROL_2,
+ 0, 0xFFFFFFFF, NULL);
r600_pipe_state_add_reg(rstate, R_0286D8_SPI_INPUT_Z, spi_input_z, 0xFFFFFFFF, NULL);
r600_pipe_state_add_reg(rstate,
+ R_0286E0_SPI_BARYC_CNTL,
+ spi_baryc_cntl,
+ 0xFFFFFFFF, NULL);
+
+ r600_pipe_state_add_reg(rstate,
R_028840_SQ_PGM_START_PS,
(r600_bo_offset(shader->bo)) >> 8, 0xFFFFFFFF, shader->bo);
r600_pipe_state_add_reg(rstate,
@@ -1581,11 +1479,6 @@ void evergreen_pipe_shader_ps(struct pipe_context *ctx, struct r600_pipe_shader
r600_pipe_state_add_reg(rstate,
R_02884C_SQ_PGM_EXPORTS_PS,
exports_ps, 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate,
- R_0286E0_SPI_BARYC_CNTL,
- S_0286E0_PERSP_CENTROID_ENA(1) |
- S_0286E0_LINEAR_CENTROID_ENA(1),
- 0xFFFFFFFF, NULL);
if (rshader->uses_kill) {
/* only set some bits here, the other bits are set in the dsa state */
diff --git a/src/gallium/drivers/r600/evergreend.h b/src/gallium/drivers/r600/evergreend.h
index 9971dded782..8e96f9355e6 100644
--- a/src/gallium/drivers/r600/evergreend.h
+++ b/src/gallium/drivers/r600/evergreend.h
@@ -686,6 +686,9 @@
#define S_02880C_Z_EXPORT_ENABLE(x) (((x) & 0x1) << 0)
#define G_02880C_Z_EXPORT_ENABLE(x) (((x) >> 0) & 0x1)
#define C_02880C_Z_EXPORT_ENABLE 0xFFFFFFFE
+#define S_02880C_STENCIL_EXPORT_ENABLE(x) (((x) & 0x1) << 1)
+#define G_02880C_STENCIL_EXPORT_ENABLE(x) (((x) >> 1) & 0x1)
+#define C_02880C_STENCIL_EXPORT_ENABLE 0xFFFFFFFD
#define S_02880C_Z_ORDER(x) (((x) & 0x3) << 4)
#define G_02880C_Z_ORDER(x) (((x) >> 4) & 0x3)
#define C_02880C_Z_ORDER 0xFFFFFCFF
@@ -984,9 +987,6 @@
#define S_030010_ENDIAN_SWAP(x) (((x) & 0x3) << 12)
#define G_030010_ENDIAN_SWAP(x) (((x) >> 12) & 0x3)
#define C_030010_ENDIAN_SWAP 0xFFFFCFFF
-#define S_030010_REQUEST_SIZE(x) (((x) & 0x3) << 14)
-#define G_030010_REQUEST_SIZE(x) (((x) >> 14) & 0x3)
-#define C_030010_REQUEST_SIZE 0xFFFF3FFF
#define S_030010_DST_SEL_X(x) (((x) & 0x7) << 16)
#define G_030010_DST_SEL_X(x) (((x) >> 16) & 0x7)
#define C_030010_DST_SEL_X 0xFFF8FFFF
@@ -1050,45 +1050,6 @@
#define S_030008_DATA_FORMAT(x) (((x) & 0x3F) << 20)
#define G_030008_DATA_FORMAT(x) (((x) >> 20) & 0x3F)
#define C_030008_DATA_FORMAT 0xFC0FFFFF
-#define V_030008_FMT_INVALID 0x00000000
-#define V_030008_FMT_8 0x00000001
-#define V_030008_FMT_4_4 0x00000002
-#define V_030008_FMT_3_3_2 0x00000003
-#define V_030008_FMT_16 0x00000005
-#define V_030008_FMT_16_FLOAT 0x00000006
-#define V_030008_FMT_8_8 0x00000007
-#define V_030008_FMT_5_6_5 0x00000008
-#define V_030008_FMT_6_5_5 0x00000009
-#define V_030008_FMT_1_5_5_5 0x0000000A
-#define V_030008_FMT_4_4_4_4 0x0000000B
-#define V_030008_FMT_5_5_5_1 0x0000000C
-#define V_030008_FMT_32 0x0000000D
-#define V_030008_FMT_32_FLOAT 0x0000000E
-#define V_030008_FMT_16_16 0x0000000F
-#define V_030008_FMT_16_16_FLOAT 0x00000010
-#define V_030008_FMT_8_24 0x00000011
-#define V_030008_FMT_8_24_FLOAT 0x00000012
-#define V_030008_FMT_24_8 0x00000013
-#define V_030008_FMT_24_8_FLOAT 0x00000014
-#define V_030008_FMT_10_11_11 0x00000015
-#define V_030008_FMT_10_11_11_FLOAT 0x00000016
-#define V_030008_FMT_11_11_10 0x00000017
-#define V_030008_FMT_11_11_10_FLOAT 0x00000018
-#define V_030008_FMT_2_10_10_10 0x00000019
-#define V_030008_FMT_8_8_8_8 0x0000001A
-#define V_030008_FMT_10_10_10_2 0x0000001B
-#define V_030008_FMT_X24_8_32_FLOAT 0x0000001C
-#define V_030008_FMT_32_32 0x0000001D
-#define V_030008_FMT_32_32_FLOAT 0x0000001E
-#define V_030008_FMT_16_16_16_16 0x0000001F
-#define V_030008_FMT_16_16_16_16_FLOAT 0x00000020
-#define V_030008_FMT_32_32_32_32 0x00000022
-#define V_030008_FMT_32_32_32_32_FLOAT 0x00000023
-#define V_030008_FMT_8_8_8 0x0000002c
-#define V_030008_FMT_16_16_16 0x0000002d
-#define V_030008_FMT_16_16_16_FLOAT 0x0000002e
-#define V_030008_FMT_32_32_32 0x0000002f
-#define V_030008_FMT_32_32_32_FLOAT 0x00000030
#define S_030008_NUM_FORMAT_ALL(x) (((x) & 0x3) << 26)
#define G_030008_NUM_FORMAT_ALL(x) (((x) >> 26) & 0x3)
#define C_030008_NUM_FORMAT_ALL 0xF3FFFFFF
diff --git a/src/gallium/drivers/r600/r600.h b/src/gallium/drivers/r600/r600.h
index 630177d6add..62d983269f5 100644
--- a/src/gallium/drivers/r600/r600.h
+++ b/src/gallium/drivers/r600/r600.h
@@ -99,15 +99,22 @@ enum chip_class {
EVERGREEN,
};
+struct r600_tiling_info {
+ unsigned num_channels;
+ unsigned num_banks;
+ unsigned group_bytes;
+};
+
enum radeon_family r600_get_family(struct radeon *rw);
enum chip_class r600_get_family_class(struct radeon *radeon);
+struct r600_tiling_info *r600_get_tiling_info(struct radeon *radeon);
/* r600_bo.c */
struct r600_bo;
struct r600_bo *r600_bo(struct radeon *radeon,
unsigned size, unsigned alignment, unsigned usage);
struct r600_bo *r600_bo_handle(struct radeon *radeon,
- unsigned handle);
+ unsigned handle, unsigned *array_mode);
void *r600_bo_map(struct radeon *radeon, struct r600_bo *bo, unsigned usage, void *ctx);
void r600_bo_unmap(struct radeon *radeon, struct r600_bo *bo);
void r600_bo_reference(struct radeon *radeon, struct r600_bo **dst,
@@ -233,6 +240,10 @@ struct r600_context {
u32 *pm4;
struct list_head query_list;
unsigned num_query_running;
+ unsigned fence;
+ struct list_head fenced_bo;
+ unsigned *cfence;
+ struct r600_bo *fence_bo;
};
struct r600_draw {
diff --git a/src/gallium/drivers/r600/r600_asm.c b/src/gallium/drivers/r600/r600_asm.c
index d13da0ef638..c22628423bd 100644
--- a/src/gallium/drivers/r600/r600_asm.c
+++ b/src/gallium/drivers/r600/r600_asm.c
@@ -701,9 +701,14 @@ static int r600_bc_cf_build(struct r600_bc *bc, struct r600_bc_cf *cf)
case (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU << 3):
case (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_PUSH_BEFORE << 3):
bc->bytecode[id++] = S_SQ_CF_ALU_WORD0_ADDR(cf->addr >> 1) |
- S_SQ_CF_ALU_WORD0_KCACHE_MODE0(cf->kcache0_mode);
+ S_SQ_CF_ALU_WORD0_KCACHE_MODE0(cf->kcache0_mode) |
+ S_SQ_CF_ALU_WORD0_KCACHE_BANK0(cf->kcache0_bank) |
+ S_SQ_CF_ALU_WORD0_KCACHE_BANK1(cf->kcache1_bank);
bc->bytecode[id++] = S_SQ_CF_ALU_WORD1_CF_INST(cf->inst >> 3) |
+ S_SQ_CF_ALU_WORD1_KCACHE_MODE1(cf->kcache1_mode) |
+ S_SQ_CF_ALU_WORD1_KCACHE_ADDR0(cf->kcache0_addr) |
+ S_SQ_CF_ALU_WORD1_KCACHE_ADDR1(cf->kcache1_addr) |
S_SQ_CF_ALU_WORD1_BARRIER(1) |
S_SQ_CF_ALU_WORD1_USES_WATERFALL(bc->chiprev == 0 ? cf->r6xx_uses_waterfall : 0) |
S_SQ_CF_ALU_WORD1_COUNT((cf->ndw / 2) - 1);
@@ -871,3 +876,39 @@ int r600_bc_build(struct r600_bc *bc)
}
return 0;
}
+
+void r600_bc_clear(struct r600_bc *bc)
+{
+ struct r600_bc_cf *cf, *next_cf;
+
+ free(bc->bytecode);
+ bc->bytecode = NULL;
+
+ LIST_FOR_EACH_ENTRY_SAFE(cf, next_cf, &bc->cf, list) {
+ struct r600_bc_alu *alu, *next_alu;
+ struct r600_bc_tex *tex, *next_tex;
+ struct r600_bc_tex *vtx, *next_vtx;
+
+ LIST_FOR_EACH_ENTRY_SAFE(alu, next_alu, &cf->alu, list) {
+ free(alu);
+ }
+
+ LIST_INITHEAD(&cf->alu);
+
+ LIST_FOR_EACH_ENTRY_SAFE(tex, next_tex, &cf->tex, list) {
+ free(tex);
+ }
+
+ LIST_INITHEAD(&cf->tex);
+
+ LIST_FOR_EACH_ENTRY_SAFE(vtx, next_vtx, &cf->vtx, list) {
+ free(vtx);
+ }
+
+ LIST_INITHEAD(&cf->vtx);
+
+ free(cf);
+ }
+
+ LIST_INITHEAD(&cf->list);
+}
diff --git a/src/gallium/drivers/r600/r600_asm.h b/src/gallium/drivers/r600/r600_asm.h
index bebc7c15b00..25cda16837d 100644
--- a/src/gallium/drivers/r600/r600_asm.h
+++ b/src/gallium/drivers/r600/r600_asm.h
@@ -132,6 +132,11 @@ struct r600_bc_cf {
unsigned pop_count;
unsigned cf_addr; /* control flow addr */
unsigned kcache0_mode;
+ unsigned kcache1_mode;
+ unsigned kcache0_addr;
+ unsigned kcache1_addr;
+ unsigned kcache0_bank;
+ unsigned kcache1_bank;
unsigned r6xx_uses_waterfall;
struct list_head alu;
struct list_head tex;
@@ -185,6 +190,7 @@ int eg_bc_cf_build(struct r600_bc *bc, struct r600_bc_cf *cf);
/* r600_asm.c */
int r600_bc_init(struct r600_bc *bc, enum radeon_family family);
+void r600_bc_clear(struct r600_bc *bc);
int r600_bc_add_alu(struct r600_bc *bc, const struct r600_bc_alu *alu);
int r600_bc_add_literal(struct r600_bc *bc, const u32 *value);
int r600_bc_add_vtx(struct r600_bc *bc, const struct r600_bc_vtx *vtx);
diff --git a/src/gallium/drivers/r600/r600_blit.c b/src/gallium/drivers/r600/r600_blit.c
index 4bf44a171af..50d47060c1a 100644
--- a/src/gallium/drivers/r600/r600_blit.c
+++ b/src/gallium/drivers/r600/r600_blit.c
@@ -22,12 +22,22 @@
*/
#include <util/u_surface.h>
#include <util/u_blitter.h>
+#include <util/u_format.h>
#include "r600_pipe.h"
-static void r600_blitter_save_states(struct pipe_context *ctx)
+enum r600_blitter_op /* bitmask */
+{
+ R600_CLEAR = 1,
+ R600_CLEAR_SURFACE = 2,
+ R600_COPY = 4
+};
+
+static void r600_blitter_begin(struct pipe_context *ctx, enum r600_blitter_op op)
{
struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
+ r600_context_queries_suspend(&rctx->ctx);
+
util_blitter_save_blend(rctx->blitter, rctx->states[R600_PIPE_STATE_BLEND]);
util_blitter_save_depth_stencil_alpha(rctx->blitter, rctx->states[R600_PIPE_STATE_DSA]);
if (rctx->states[R600_PIPE_STATE_STENCIL_REF]) {
@@ -47,25 +57,34 @@ static void r600_blitter_save_states(struct pipe_context *ctx)
rctx->vertex_elements = NULL;
- /* TODO queries */
+ if (op & (R600_CLEAR_SURFACE | R600_COPY))
+ util_blitter_save_framebuffer(rctx->blitter, &rctx->framebuffer);
+
+ if (op & R600_COPY) {
+ util_blitter_save_fragment_sampler_states(
+ rctx->blitter, rctx->ps_samplers.n_samplers,
+ (void**)rctx->ps_samplers.samplers);
+
+ util_blitter_save_fragment_sampler_views(
+ rctx->blitter, rctx->ps_samplers.n_views,
+ (struct pipe_sampler_view**)rctx->ps_samplers.views);
+ }
+
+}
+
+static void r600_blitter_end(struct pipe_context *ctx)
+{
+ struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
+ r600_context_queries_resume(&rctx->ctx);
}
int r600_blit_uncompress_depth(struct pipe_context *ctx, struct r600_resource_texture *texture)
{
struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
- struct pipe_framebuffer_state fb = *rctx->pframebuffer;
struct pipe_surface *zsurf, *cbsurf;
int level = 0;
float depth = 1.0f;
- r600_context_queries_suspend(&rctx->ctx);
- for (int i = 0; i < fb.nr_cbufs; i++) {
- fb.cbufs[i] = NULL;
- pipe_surface_reference(&fb.cbufs[i], rctx->pframebuffer->cbufs[i]);
- }
- fb.zsbuf = NULL;
- pipe_surface_reference(&fb.zsbuf, rctx->pframebuffer->zsbuf);
-
zsurf = ctx->screen->get_tex_surface(ctx->screen, &texture->resource.base.b, 0, level, 0,
PIPE_BIND_DEPTH_STENCIL);
@@ -73,22 +92,17 @@ int r600_blit_uncompress_depth(struct pipe_context *ctx, struct r600_resource_te
(struct pipe_resource*)texture->flushed_depth_texture,
0, level, 0, PIPE_BIND_RENDER_TARGET);
- r600_blitter_save_states(ctx);
- util_blitter_save_framebuffer(rctx->blitter, &fb);
-
if (rctx->family == CHIP_RV610 || rctx->family == CHIP_RV630 ||
- rctx->family == CHIP_RV620 || rctx->family == CHIP_RV635)
+ rctx->family == CHIP_RV620 || rctx->family == CHIP_RV635)
depth = 0.0f;
+ r600_blitter_begin(ctx, R600_CLEAR_SURFACE);
util_blitter_custom_depth_stencil(rctx->blitter, zsurf, cbsurf, rctx->custom_dsa_flush, depth);
+ r600_blitter_end(ctx);
pipe_surface_reference(&zsurf, NULL);
pipe_surface_reference(&cbsurf, NULL);
- for (int i = 0; i < fb.nr_cbufs; i++) {
- pipe_surface_reference(&fb.cbufs[i], NULL);
- }
- pipe_surface_reference(&fb.zsbuf, NULL);
- r600_context_queries_resume(&rctx->ctx);
+
return 0;
}
@@ -99,12 +113,11 @@ static void r600_clear(struct pipe_context *ctx, unsigned buffers,
struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
struct pipe_framebuffer_state *fb = &rctx->framebuffer;
- r600_context_queries_suspend(&rctx->ctx);
- r600_blitter_save_states(ctx);
+ r600_blitter_begin(ctx, R600_CLEAR);
util_blitter_clear(rctx->blitter, fb->width, fb->height,
fb->nr_cbufs, buffers, rgba, depth,
stencil);
- r600_context_queries_resume(&rctx->ctx);
+ r600_blitter_end(ctx);
}
static void r600_clear_render_target(struct pipe_context *ctx,
@@ -114,13 +127,11 @@ static void r600_clear_render_target(struct pipe_context *ctx,
unsigned width, unsigned height)
{
struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
- struct pipe_framebuffer_state *fb = &rctx->framebuffer;
- r600_context_queries_suspend(&rctx->ctx);
- util_blitter_save_framebuffer(rctx->blitter, fb);
+ r600_blitter_begin(ctx, R600_CLEAR_SURFACE);
util_blitter_clear_render_target(rctx->blitter, dst, rgba,
dstx, dsty, width, height);
- r600_context_queries_resume(&rctx->ctx);
+ r600_blitter_end(ctx);
}
static void r600_clear_depth_stencil(struct pipe_context *ctx,
@@ -132,16 +143,34 @@ static void r600_clear_depth_stencil(struct pipe_context *ctx,
unsigned width, unsigned height)
{
struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
- struct pipe_framebuffer_state *fb = &rctx->framebuffer;
- r600_context_queries_suspend(&rctx->ctx);
- util_blitter_save_framebuffer(rctx->blitter, fb);
+ r600_blitter_begin(ctx, R600_CLEAR_SURFACE);
util_blitter_clear_depth_stencil(rctx->blitter, dst, clear_flags, depth, stencil,
dstx, dsty, width, height);
- r600_context_queries_resume(&rctx->ctx);
+ r600_blitter_end(ctx);
}
+
+/* Copy a block of pixels from one surface to another using HW. */
+static void r600_hw_copy_region(struct pipe_context *ctx,
+ struct pipe_resource *dst,
+ struct pipe_subresource subdst,
+ unsigned dstx, unsigned dsty, unsigned dstz,
+ struct pipe_resource *src,
+ struct pipe_subresource subsrc,
+ unsigned srcx, unsigned srcy, unsigned srcz,
+ unsigned width, unsigned height)
+{
+ struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
+
+ r600_blitter_begin(ctx, R600_COPY);
+ util_blitter_copy_region(rctx->blitter, dst, subdst, dstx, dsty, dstz,
+ src, subsrc, srcx, srcy, srcz, width, height,
+ TRUE);
+ r600_blitter_end(ctx);
+}
+
static void r600_resource_copy_region(struct pipe_context *ctx,
struct pipe_resource *dst,
struct pipe_subresource subdst,
@@ -151,8 +180,16 @@ static void r600_resource_copy_region(struct pipe_context *ctx,
unsigned srcx, unsigned srcy, unsigned srcz,
unsigned width, unsigned height)
{
- util_resource_copy_region(ctx, dst, subdst, dstx, dsty, dstz,
- src, subsrc, srcx, srcy, srcz, width, height);
+ boolean is_depth;
+ /* there is something wrong with depth resource copies at the moment so avoid them for now */
+ is_depth = util_format_get_component_bits(src->format, UTIL_FORMAT_COLORSPACE_ZS, 0) != 0;
+ if (is_depth)
+ util_resource_copy_region(ctx, dst, subdst, dstx, dsty, dstz,
+ src, subsrc, srcx, srcy, srcz, width, height);
+ else
+ r600_hw_copy_region(ctx, dst, subdst, dstx, dsty, dstz,
+ src, subsrc, srcx, srcy, srcz, width, height);
+
}
void r600_init_blit_functions(struct r600_pipe_context *rctx)
diff --git a/src/gallium/drivers/r600/r600_buffer.c b/src/gallium/drivers/r600/r600_buffer.c
index 2bfa4e22fec..455aa2e81f6 100644
--- a/src/gallium/drivers/r600/r600_buffer.c
+++ b/src/gallium/drivers/r600/r600_buffer.c
@@ -227,7 +227,7 @@ struct pipe_resource *r600_buffer_from_handle(struct pipe_screen *screen,
struct r600_resource *rbuffer;
struct r600_bo *bo = NULL;
- bo = r600_bo_handle(rw, whandle->handle);
+ bo = r600_bo_handle(rw, whandle->handle, NULL);
if (bo == NULL) {
return NULL;
}
diff --git a/src/gallium/drivers/r600/r600_formats.h b/src/gallium/drivers/r600/r600_formats.h
new file mode 100644
index 00000000000..0c91a212384
--- /dev/null
+++ b/src/gallium/drivers/r600/r600_formats.h
@@ -0,0 +1,56 @@
+#ifndef R600_FORMATS_H
+#define R600_FORMATS_H
+
+/* list of formats from R700 ISA document - apply across GPUs in different registers */
+#define FMT_INVALID 0x00000000
+#define FMT_8 0x00000001
+#define FMT_4_4 0x00000002
+#define FMT_3_3_2 0x00000003
+#define FMT_16 0x00000005
+#define FMT_16_FLOAT 0x00000006
+#define FMT_8_8 0x00000007
+#define FMT_5_6_5 0x00000008
+#define FMT_6_5_5 0x00000009
+#define FMT_1_5_5_5 0x0000000A
+#define FMT_4_4_4_4 0x0000000B
+#define FMT_5_5_5_1 0x0000000C
+#define FMT_32 0x0000000D
+#define FMT_32_FLOAT 0x0000000E
+#define FMT_16_16 0x0000000F
+#define FMT_16_16_FLOAT 0x00000010
+#define FMT_8_24 0x00000011
+#define FMT_8_24_FLOAT 0x00000012
+#define FMT_24_8 0x00000013
+#define FMT_24_8_FLOAT 0x00000014
+#define FMT_10_11_11 0x00000015
+#define FMT_10_11_11_FLOAT 0x00000016
+#define FMT_11_11_10 0x00000017
+#define FMT_11_11_10_FLOAT 0x00000018
+#define FMT_2_10_10_10 0x00000019
+#define FMT_8_8_8_8 0x0000001A
+#define FMT_10_10_10_2 0x0000001B
+#define FMT_X24_8_32_FLOAT 0x0000001C
+#define FMT_32_32 0x0000001D
+#define FMT_32_32_FLOAT 0x0000001E
+#define FMT_16_16_16_16 0x0000001F
+#define FMT_16_16_16_16_FLOAT 0x00000020
+#define FMT_32_32_32_32 0x00000022
+#define FMT_32_32_32_32_FLOAT 0x00000023
+#define FMT_1 0x00000025
+#define FMT_GB_GR 0x00000027
+#define FMT_BG_RG 0x00000028
+#define FMT_32_AS_8 0x00000029
+#define FMT_32_AS_8_8 0x0000002a
+#define FMT_5_9_9_9_SHAREDEXP 0x0000002b
+#define FMT_8_8_8 0x0000002c
+#define FMT_16_16_16 0x0000002d
+#define FMT_16_16_16_FLOAT 0x0000002e
+#define FMT_32_32_32 0x0000002f
+#define FMT_32_32_32_FLOAT 0x00000030
+#define FMT_BC1 0x00000031
+#define FMT_BC2 0x00000032
+#define FMT_BC3 0x00000033
+#define FMT_BC4 0x00000034
+#define FMT_BC5 0x00000035
+
+#endif
diff --git a/src/gallium/drivers/r600/r600_opcodes.h b/src/gallium/drivers/r600/r600_opcodes.h
index 0cf9c1c401c..4f9b39a7fdc 100644
--- a/src/gallium/drivers/r600/r600_opcodes.h
+++ b/src/gallium/drivers/r600/r600_opcodes.h
@@ -233,12 +233,6 @@
#define EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_CEIL 0x00000012
#define EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RNDNE 0x00000013
#define EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR 0x00000014
-/* same up to here */
-/*
-#define EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA 0x00000015
-#define EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_FLOOR 0x00000016
-#define EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_INT 0x00000018
-*/
#define EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ASHR_INT 0x00000015
#define EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LSHR_INT 0x00000016
#define EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LSHL_INT 0x00000017
@@ -336,9 +330,11 @@
#define EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_CLAMPED_64 0x00000098
#define EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SQRT_64 0x00000099
/* TODO Fill in more ALU */
+#define EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT_FLOOR 0x000000B1
#define EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4 0x000000BE
#define EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4_IEEE 0x000000BF
#define EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_CUBE 0x000000C0
+#define EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_INT 0x000000CC
#define EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INTERP_XY 0x000000D6
#define EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INTERP_ZW 0x000000D7
diff --git a/src/gallium/drivers/r600/r600_pipe.c b/src/gallium/drivers/r600/r600_pipe.c
index 0589652f705..bea7ef5df84 100644
--- a/src/gallium/drivers/r600/r600_pipe.c
+++ b/src/gallium/drivers/r600/r600_pipe.c
@@ -85,6 +85,11 @@ static void r600_destroy_context(struct pipe_context *context)
u_upload_destroy(rctx->upload_vb);
u_upload_destroy(rctx->upload_ib);
+ if (rctx->tran.translate_cache)
+ translate_cache_destroy(rctx->tran.translate_cache);
+
+ FREE(rctx->ps_resource);
+ FREE(rctx->vs_resource);
FREE(rctx);
}
@@ -171,6 +176,24 @@ static struct pipe_context *r600_create_context(struct pipe_screen *screen, void
return NULL;
}
+ rctx->tran.translate_cache = translate_cache_create();
+ if (rctx->tran.translate_cache == NULL) {
+ FREE(rctx);
+ return NULL;
+ }
+
+ rctx->vs_resource = CALLOC(R600_RESOURCE_ARRAY_SIZE, sizeof(struct r600_pipe_state));
+ if (!rctx->vs_resource) {
+ FREE(rctx);
+ return NULL;
+ }
+
+ rctx->ps_resource = CALLOC(R600_RESOURCE_ARRAY_SIZE, sizeof(struct r600_pipe_state));
+ if (!rctx->ps_resource) {
+ FREE(rctx);
+ return NULL;
+ }
+
class = r600_get_family_class(rctx->radeon);
if (class == R600 || class == R700)
rctx->custom_dsa_flush = r600_create_db_flush_dsa(rctx);
@@ -242,6 +265,7 @@ static int r600_get_param(struct pipe_screen* pscreen, enum pipe_cap param)
case PIPE_CAP_INDEP_BLEND_ENABLE:
case PIPE_CAP_DEPTHSTENCIL_CLEAR_SEPARATE:
case PIPE_CAP_DEPTH_CLAMP:
+ case PIPE_CAP_SHADER_STENCIL_EXPORT:
return 1;
/* Unsupported features (boolean caps). */
@@ -257,7 +281,7 @@ static int r600_get_param(struct pipe_screen* pscreen, enum pipe_cap param)
return 14;
case PIPE_CAP_MAX_VERTEX_TEXTURE_UNITS:
/* FIXME allow this once infrastructure is there */
- return 0;
+ return 16;
case PIPE_CAP_MAX_TEXTURE_IMAGE_UNITS:
case PIPE_CAP_MAX_COMBINED_SAMPLERS:
return 16;
@@ -428,5 +452,7 @@ struct pipe_screen *r600_screen_create(struct radeon *radeon)
r600_init_screen_texture_functions(&rscreen->screen);
r600_init_screen_resource_functions(&rscreen->screen);
+ rscreen->tiling_info = r600_get_tiling_info(radeon);
+
return &rscreen->screen;
}
diff --git a/src/gallium/drivers/r600/r600_pipe.h b/src/gallium/drivers/r600/r600_pipe.h
index c46029a5617..1c691f6b764 100644
--- a/src/gallium/drivers/r600/r600_pipe.h
+++ b/src/gallium/drivers/r600/r600_pipe.h
@@ -30,6 +30,7 @@
#include <pipe/p_screen.h>
#include <pipe/p_context.h>
#include <util/u_math.h>
+#include "translate/translate_cache.h"
#include "r600.h"
#include "r600_public.h"
#include "r600_shader.h"
@@ -58,6 +59,7 @@ enum r600_pipe_state_id {
struct r600_screen {
struct pipe_screen screen;
struct radeon *radeon;
+ struct r600_tiling_info *tiling_info;
};
struct r600_pipe_sampler_view {
@@ -81,8 +83,10 @@ struct r600_pipe_blend {
struct r600_vertex_element
{
unsigned count;
- unsigned refcount;
- struct pipe_vertex_element elements[32];
+ struct pipe_vertex_element elements[PIPE_MAX_ATTRIBS];
+ enum pipe_format hw_format[PIPE_MAX_ATTRIBS];
+ unsigned hw_format_size[PIPE_MAX_ATTRIBS];
+ boolean incompatible_layout;
};
struct r600_pipe_shader {
@@ -92,6 +96,29 @@ struct r600_pipe_shader {
struct r600_vertex_element vertex_elements;
};
+/* needed for blitter save */
+#define NUM_TEX_UNITS 16
+
+struct r600_textures_info {
+ struct r600_pipe_sampler_view *views[NUM_TEX_UNITS];
+ unsigned n_views;
+ void *samplers[NUM_TEX_UNITS];
+ unsigned n_samplers;
+};
+
+struct r600_translate_context {
+ /* Translate cache for incompatible vertex offset/stride/format fallback. */
+ struct translate_cache *translate_cache;
+
+ /* The vertex buffer slot containing the translated buffer. */
+ unsigned vb_slot;
+ /* Saved and new vertex element state. */
+ void *saved_velems, *new_velems;
+};
+
+#define R600_CONSTANT_ARRAY_SIZE 256
+#define R600_RESOURCE_ARRAY_SIZE 160
+
struct r600_pipe_context {
struct pipe_context context;
struct blitter_context *blitter;
@@ -112,12 +139,8 @@ struct r600_pipe_context {
struct pipe_stencil_ref stencil_ref;
struct pipe_viewport_state viewport;
struct pipe_clip_state clip;
- unsigned vs_nconst;
- unsigned ps_nconst;
- struct r600_pipe_state vs_const[256];
- struct r600_pipe_state ps_const[256];
- struct r600_pipe_state vs_resource[160];
- struct r600_pipe_state ps_resource[160];
+ struct r600_pipe_state *vs_resource;
+ struct r600_pipe_state *ps_resource;
struct r600_pipe_state config;
struct r600_pipe_shader *ps_shader;
struct r600_pipe_shader *vs_shader;
@@ -130,6 +153,11 @@ struct r600_pipe_context {
struct u_upload_mgr *upload_vb;
struct u_upload_mgr *upload_ib;
unsigned any_user_vbs;
+ struct r600_textures_info ps_samplers;
+
+ unsigned vb_max_index;
+ struct r600_translate_context tran;
+
};
struct r600_drawl {
@@ -180,6 +208,7 @@ void r600_init_context_resource_functions(struct r600_pipe_context *r600);
/* r600_shader.c */
int r600_pipe_shader_update(struct pipe_context *ctx, struct r600_pipe_shader *shader);
int r600_pipe_shader_create(struct pipe_context *ctx, struct r600_pipe_shader *shader, const struct tgsi_token *tokens);
+void r600_pipe_shader_destroy(struct pipe_context *ctx, struct r600_pipe_shader *shader);
int r600_find_vs_semantic_index(struct r600_shader *vs,
struct r600_shader *ps, int id);
@@ -187,10 +216,6 @@ int r600_find_vs_semantic_index(struct r600_shader *vs,
void r600_init_state_functions(struct r600_pipe_context *rctx);
void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info);
void r600_init_config(struct r600_pipe_context *rctx);
-void r600_translate_index_buffer(struct r600_pipe_context *r600,
- struct pipe_resource **index_buffer,
- unsigned *index_size,
- unsigned *start, unsigned count);
void *r600_create_db_flush_dsa(struct r600_pipe_context *rctx);
/* r600_helper.h */
int r600_conv_pipe_prim(unsigned pprim, unsigned *prim);
@@ -201,6 +226,38 @@ uint32_t r600_translate_texformat(enum pipe_format format,
const unsigned char *swizzle_view,
uint32_t *word4_p, uint32_t *yuv_format_p);
+/* r600_translate.c */
+void r600_begin_vertex_translate(struct r600_pipe_context *rctx);
+void r600_end_vertex_translate(struct r600_pipe_context *rctx);
+void r600_translate_index_buffer(struct r600_pipe_context *r600,
+ struct pipe_resource **index_buffer,
+ unsigned *index_size,
+ unsigned *start, unsigned count);
+
+/* r600_state_common.c */
+void r600_set_index_buffer(struct pipe_context *ctx,
+ const struct pipe_index_buffer *ib);
+void r600_set_vertex_buffers(struct pipe_context *ctx, unsigned count,
+ const struct pipe_vertex_buffer *buffers);
+void *r600_create_vertex_elements(struct pipe_context *ctx,
+ unsigned count,
+ const struct pipe_vertex_element *elements);
+void r600_delete_vertex_element(struct pipe_context *ctx, void *state);
+void r600_bind_blend_state(struct pipe_context *ctx, void *state);
+void r600_bind_rs_state(struct pipe_context *ctx, void *state);
+void r600_delete_rs_state(struct pipe_context *ctx, void *state);
+void r600_sampler_view_destroy(struct pipe_context *ctx,
+ struct pipe_sampler_view *state);
+void r600_bind_state(struct pipe_context *ctx, void *state);
+void r600_delete_state(struct pipe_context *ctx, void *state);
+void r600_bind_vertex_elements(struct pipe_context *ctx, void *state);
+
+void *r600_create_shader_state(struct pipe_context *ctx,
+ const struct pipe_shader_state *state);
+void r600_bind_ps_shader(struct pipe_context *ctx, void *state);
+void r600_bind_vs_shader(struct pipe_context *ctx, void *state);
+void r600_delete_ps_shader(struct pipe_context *ctx, void *state);
+void r600_delete_vs_shader(struct pipe_context *ctx, void *state);
/*
* common helpers
*/
diff --git a/src/gallium/drivers/r600/r600_resource.h b/src/gallium/drivers/r600/r600_resource.h
index f6377ea802b..d152285815c 100644
--- a/src/gallium/drivers/r600/r600_resource.h
+++ b/src/gallium/drivers/r600/r600_resource.h
@@ -25,6 +25,9 @@
#include "util/u_transfer.h"
+/* flag to indicate a resource is to be used as a transfer so should not be tiled */
+#define R600_RESOURCE_FLAG_TRANSFER PIPE_RESOURCE_FLAG_DRV_PRIV
+
/* Texture transfer. */
struct r600_transfer {
/* Base class. */
@@ -49,16 +52,14 @@ struct r600_resource {
struct r600_resource_texture {
struct r600_resource resource;
- unsigned long offset[PIPE_MAX_TEXTURE_LEVELS];
- unsigned long pitch[PIPE_MAX_TEXTURE_LEVELS];
- unsigned long width[PIPE_MAX_TEXTURE_LEVELS];
- unsigned long height[PIPE_MAX_TEXTURE_LEVELS];
- unsigned long layer_size[PIPE_MAX_TEXTURE_LEVELS];
- unsigned long pitch_override;
- unsigned long bpt;
- unsigned long size;
+ unsigned offset[PIPE_MAX_TEXTURE_LEVELS];
+ unsigned pitch_in_bytes[PIPE_MAX_TEXTURE_LEVELS];
+ unsigned pitch_in_pixels[PIPE_MAX_TEXTURE_LEVELS];
+ unsigned layer_size[PIPE_MAX_TEXTURE_LEVELS];
+ unsigned array_mode[PIPE_MAX_TEXTURE_LEVELS];
+ unsigned pitch_override;
+ unsigned size;
unsigned tiled;
- unsigned array_mode;
unsigned tile_type;
unsigned depth;
unsigned dirty;
@@ -126,4 +127,9 @@ void* r600_texture_transfer_map(struct pipe_context *ctx,
void r600_texture_transfer_unmap(struct pipe_context *ctx,
struct pipe_transfer* transfer);
+struct r600_surface {
+ struct pipe_surface base;
+ unsigned aligned_height;
+};
+
#endif
diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c
index 016e75bd52b..4106587398b 100644
--- a/src/gallium/drivers/r600/r600_shader.c
+++ b/src/gallium/drivers/r600/r600_shader.c
@@ -107,24 +107,28 @@ static void r600_pipe_shader_ps(struct pipe_context *ctx, struct r600_pipe_shade
struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
struct r600_pipe_state *rstate = &shader->rstate;
struct r600_shader *rshader = &shader->shader;
- unsigned i, tmp, exports_ps, num_cout, spi_ps_in_control_0, spi_input_z;
- boolean have_pos = FALSE, have_face = FALSE;
+ unsigned i, tmp, exports_ps, num_cout, spi_ps_in_control_0, spi_input_z, spi_ps_in_control_1;
+ int pos_index = -1, face_index = -1;
/* clear previous register */
rstate->nregs = 0;
for (i = 0; i < rshader->ninput; i++) {
tmp = S_028644_SEMANTIC(r600_find_vs_semantic_index(&rctx->vs_shader->shader, rshader, i));
- tmp |= S_028644_SEL_CENTROID(1);
+ if (rshader->input[i].centroid)
+ tmp |= S_028644_SEL_CENTROID(1);
+ if (rshader->input[i].interpolate == TGSI_INTERPOLATE_LINEAR)
+ tmp |= S_028644_SEL_LINEAR(1);
+
if (rshader->input[i].name == TGSI_SEMANTIC_POSITION)
- have_pos = TRUE;
+ pos_index = i;
if (rshader->input[i].name == TGSI_SEMANTIC_COLOR ||
rshader->input[i].name == TGSI_SEMANTIC_BCOLOR ||
rshader->input[i].name == TGSI_SEMANTIC_POSITION) {
tmp |= S_028644_FLAT_SHADE(rshader->flat_shade);
}
if (rshader->input[i].name == TGSI_SEMANTIC_FACE)
- have_face = TRUE;
+ face_index = i;
if (rshader->input[i].name == TGSI_SEMANTIC_GENERIC &&
rctx->sprite_coord_enable & (1 << rshader->input[i].sid)) {
tmp |= S_028644_PT_SPRITE_TEX(1);
@@ -132,17 +136,22 @@ static void r600_pipe_shader_ps(struct pipe_context *ctx, struct r600_pipe_shade
r600_pipe_state_add_reg(rstate, R_028644_SPI_PS_INPUT_CNTL_0 + i * 4, tmp, 0xFFFFFFFF, NULL);
}
for (i = 0; i < rshader->noutput; i++) {
- if (rshader->input[i].name == TGSI_SEMANTIC_POSITION)
+ if (rshader->output[i].name == TGSI_SEMANTIC_POSITION)
r600_pipe_state_add_reg(rstate,
R_02880C_DB_SHADER_CONTROL,
S_02880C_Z_EXPORT_ENABLE(1),
S_02880C_Z_EXPORT_ENABLE(1), NULL);
+ if (rshader->output[i].name == TGSI_SEMANTIC_STENCIL)
+ r600_pipe_state_add_reg(rstate,
+ R_02880C_DB_SHADER_CONTROL,
+ S_02880C_STENCIL_REF_EXPORT_ENABLE(1),
+ S_02880C_STENCIL_REF_EXPORT_ENABLE(1), NULL);
}
exports_ps = 0;
num_cout = 0;
for (i = 0; i < rshader->noutput; i++) {
- if (rshader->output[i].name == TGSI_SEMANTIC_POSITION)
+ if (rshader->output[i].name == TGSI_SEMANTIC_POSITION || rshader->output[i].name == TGSI_SEMANTIC_STENCIL)
exports_ps |= 1;
else if (rshader->output[i].name == TGSI_SEMANTIC_COLOR) {
num_cout++;
@@ -157,13 +166,22 @@ static void r600_pipe_shader_ps(struct pipe_context *ctx, struct r600_pipe_shade
spi_ps_in_control_0 = S_0286CC_NUM_INTERP(rshader->ninput) |
S_0286CC_PERSP_GRADIENT_ENA(1);
spi_input_z = 0;
- if (have_pos) {
- spi_ps_in_control_0 |= S_0286CC_POSITION_ENA(1) |
- S_0286CC_BARYC_SAMPLE_CNTL(1);
+ if (pos_index != -1) {
+ spi_ps_in_control_0 |= (S_0286CC_POSITION_ENA(1) |
+ S_0286CC_POSITION_CENTROID(rshader->input[pos_index].centroid) |
+ S_0286CC_POSITION_ADDR(rshader->input[pos_index].gpr) |
+ S_0286CC_BARYC_SAMPLE_CNTL(1));
spi_input_z |= 1;
}
+
+ spi_ps_in_control_1 = 0;
+ if (face_index != -1) {
+ spi_ps_in_control_1 |= S_0286D0_FRONT_FACE_ENA(1) |
+ S_0286D0_FRONT_FACE_ADDR(rshader->input[face_index].gpr);
+ }
+
r600_pipe_state_add_reg(rstate, R_0286CC_SPI_PS_IN_CONTROL_0, spi_ps_in_control_0, 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, R_0286D0_SPI_PS_IN_CONTROL_1, S_0286D0_FRONT_FACE_ENA(have_face), 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_0286D0_SPI_PS_IN_CONTROL_1, spi_ps_in_control_1, 0xFFFFFFFF, NULL);
r600_pipe_state_add_reg(rstate, R_0286D8_SPI_INPUT_Z, spi_input_z, 0xFFFFFFFF, NULL);
r600_pipe_state_add_reg(rstate,
R_028840_SQ_PGM_START_PS,
@@ -252,7 +270,7 @@ static int r600_shader_update(struct pipe_context *ctx, struct r600_pipe_shader
}
rshader->vertex_elements = *rctx->vertex_elements;
for (i = 0; i < rctx->vertex_elements->count; i++) {
- resource_format[nresources++] = rctx->vertex_elements->elements[i].src_format;
+ resource_format[nresources++] = rctx->vertex_elements->hw_format[i];
}
r600_bo_reference(rctx->radeon, &rshader->bo, NULL);
LIST_FOR_EACH_ENTRY(cf, &bc->cf, list) {
@@ -320,6 +338,18 @@ int r600_pipe_shader_create(struct pipe_context *ctx, struct r600_pipe_shader *s
return 0;
}
+void
+r600_pipe_shader_destroy(struct pipe_context *ctx, struct r600_pipe_shader *shader)
+{
+ struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
+
+ r600_bo_reference(rctx->radeon, &shader->bo, NULL);
+
+ r600_bc_clear(&shader->shader.bc);
+
+ /* FIXME: is there more stuff to free? */
+}
+
/*
* tgsi -> r600 shader
*/
@@ -339,6 +369,11 @@ struct r600_shader_ctx {
u32 *literals;
u32 nliterals;
u32 max_driver_temp_used;
+ /* needed for evergreen interpolation */
+ boolean input_centroid;
+ boolean input_linear;
+ boolean input_perspective;
+ int num_interp_gpr;
};
struct r600_shader_tgsi_instruction {
@@ -371,11 +406,9 @@ static int tgsi_is_supported(struct r600_shader_ctx *ctx)
}
#endif
for (j = 0; j < i->Instruction.NumSrcRegs; j++) {
- if (i->Src[j].Register.Dimension ||
- i->Src[j].Register.Absolute) {
- R600_ERR("unsupported src %d (dimension %d|absolute %d)\n", j,
- i->Src[j].Register.Dimension,
- i->Src[j].Register.Absolute);
+ if (i->Src[j].Register.Dimension) {
+ R600_ERR("unsupported src %d (dimension %d)\n", j,
+ i->Src[j].Register.Dimension);
return -EINVAL;
}
}
@@ -388,10 +421,33 @@ static int tgsi_is_supported(struct r600_shader_ctx *ctx)
return 0;
}
-static int evergreen_interp_alu(struct r600_shader_ctx *ctx, int gpr)
+static int evergreen_interp_alu(struct r600_shader_ctx *ctx, int input)
{
int i, r;
struct r600_bc_alu alu;
+ int gpr = 0, base_chan = 0;
+ int ij_index = 0;
+
+ if (ctx->shader->input[input].interpolate == TGSI_INTERPOLATE_PERSPECTIVE) {
+ ij_index = 0;
+ if (ctx->shader->input[input].centroid)
+ ij_index++;
+ } else if (ctx->shader->input[input].interpolate == TGSI_INTERPOLATE_LINEAR) {
+ ij_index = 0;
+ /* if we have perspective add one */
+ if (ctx->input_perspective) {
+ ij_index++;
+ /* if we have perspective centroid */
+ if (ctx->input_centroid)
+ ij_index++;
+ }
+ if (ctx->shader->input[input].centroid)
+ ij_index++;
+ }
+
+ /* work out gpr and base_chan from index */
+ gpr = ij_index / 2;
+ base_chan = (2 * (ij_index % 2)) + 1;
for (i = 0; i < 8; i++) {
memset(&alu, 0, sizeof(struct r600_bc_alu));
@@ -402,13 +458,16 @@ static int evergreen_interp_alu(struct r600_shader_ctx *ctx, int gpr)
alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INTERP_XY;
if ((i > 1) && (i < 6)) {
- alu.dst.sel = ctx->shader->input[gpr].gpr;
+ alu.dst.sel = ctx->shader->input[input].gpr;
alu.dst.write = 1;
}
alu.dst.chan = i % 4;
- alu.src[0].chan = (1 - (i % 2));
- alu.src[1].sel = V_SQ_ALU_SRC_PARAM_BASE + gpr;
+
+ alu.src[0].sel = gpr;
+ alu.src[0].chan = (base_chan - (i % 2));
+
+ alu.src[1].sel = V_SQ_ALU_SRC_PARAM_BASE + ctx->shader->input[input].lds_pos;
alu.bank_swizzle_force = SQ_ALU_VEC_210;
if ((i % 4) == 3)
@@ -434,6 +493,7 @@ static int tgsi_declaration(struct r600_shader_ctx *ctx)
ctx->shader->input[i].name = d->Semantic.Name;
ctx->shader->input[i].sid = d->Semantic.Index;
ctx->shader->input[i].interpolate = d->Declaration.Interpolate;
+ ctx->shader->input[i].centroid = d->Declaration.Centroid;
ctx->shader->input[i].gpr = ctx->file_offset[TGSI_FILE_INPUT] + i;
if (ctx->type == TGSI_PROCESSOR_VERTEX) {
/* turn input into fetch */
@@ -457,7 +517,12 @@ static int tgsi_declaration(struct r600_shader_ctx *ctx)
}
if (ctx->type == TGSI_PROCESSOR_FRAGMENT && ctx->bc->chiprev == 2) {
/* turn input into interpolate on EG */
- evergreen_interp_alu(ctx, i);
+ if (ctx->shader->input[i].name != TGSI_SEMANTIC_POSITION) {
+ if (ctx->shader->input[i].interpolate > 0) {
+ ctx->shader->input[i].lds_pos = ctx->shader->nlds++;
+ evergreen_interp_alu(ctx, i);
+ }
+ }
}
break;
case TGSI_FILE_OUTPUT:
@@ -484,6 +549,53 @@ static int r600_get_temp(struct r600_shader_ctx *ctx)
return ctx->temp_reg + ctx->max_driver_temp_used++;
}
+/*
+ * for evergreen we need to scan the shader to find the number of GPRs we need to
+ * reserve for interpolation.
+ *
+ * we need to know if we are going to emit
+ * any centroid inputs
+ * if perspective and linear are required
+*/
+static int evergreen_gpr_count(struct r600_shader_ctx *ctx)
+{
+ int i;
+ int num_baryc;
+
+ ctx->input_linear = FALSE;
+ ctx->input_perspective = FALSE;
+ ctx->input_centroid = FALSE;
+ ctx->num_interp_gpr = 1;
+
+ /* any centroid inputs */
+ for (i = 0; i < ctx->info.num_inputs; i++) {
+ /* skip position/face */
+ if (ctx->info.input_semantic_name[i] == TGSI_SEMANTIC_POSITION ||
+ ctx->info.input_semantic_name[i] == TGSI_SEMANTIC_FACE)
+ continue;
+ if (ctx->info.input_interpolate[i] == TGSI_INTERPOLATE_LINEAR)
+ ctx->input_linear = TRUE;
+ if (ctx->info.input_interpolate[i] == TGSI_INTERPOLATE_PERSPECTIVE)
+ ctx->input_perspective = TRUE;
+ if (ctx->info.input_centroid[i])
+ ctx->input_centroid = TRUE;
+ }
+
+ num_baryc = 0;
+ /* ignoring sample for now */
+ if (ctx->input_perspective)
+ num_baryc++;
+ if (ctx->input_linear)
+ num_baryc++;
+ if (ctx->input_centroid)
+ num_baryc *= 2;
+
+ ctx->num_interp_gpr += (num_baryc + 1) >> 1;
+
+ /* TODO PULL MODEL and LINE STIPPLE, FIXED PT POS */
+ return ctx->num_interp_gpr;
+}
+
int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *shader)
{
struct tgsi_full_immediate *immediate;
@@ -529,6 +641,9 @@ int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *s
if (ctx.type == TGSI_PROCESSOR_VERTEX) {
ctx.file_offset[TGSI_FILE_INPUT] = 1;
}
+ if (ctx.type == TGSI_PROCESSOR_FRAGMENT && ctx.bc->chiprev == 2) {
+ ctx.file_offset[TGSI_FILE_INPUT] = evergreen_gpr_count(&ctx);
+ }
ctx.file_offset[TGSI_FILE_OUTPUT] = ctx.file_offset[TGSI_FILE_INPUT] +
ctx.info.file_count[TGSI_FILE_INPUT];
ctx.file_offset[TGSI_FILE_TEMPORARY] = ctx.file_offset[TGSI_FILE_OUTPUT] +
@@ -625,7 +740,14 @@ int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *s
} else if (shader->output[i].name == TGSI_SEMANTIC_POSITION) {
output[i].array_base = 61;
output[i].swizzle_x = 2;
- output[i].swizzle_y = output[i].swizzle_z = output[i].swizzle_w = 7;
+ output[i].swizzle_y = 7;
+ output[i].swizzle_z = output[i].swizzle_w = 7;
+ output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
+ } else if (shader->output[i].name == TGSI_SEMANTIC_STENCIL) {
+ output[i].array_base = 61;
+ output[i].swizzle_x = 7;
+ output[i].swizzle_y = 1;
+ output[i].swizzle_z = output[i].swizzle_w = 7;
output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
} else {
R600_ERR("unsupported fragment output name %d\n", shader->output[i].name);
@@ -731,6 +853,7 @@ static int tgsi_src(struct r600_shader_ctx *ctx,
if (tgsi_src->Register.Indirect)
r600_src->rel = V_SQ_REL_RELATIVE;
r600_src->neg = tgsi_src->Register.Negate;
+ r600_src->abs = tgsi_src->Register.Absolute;
r600_src->sel += ctx->file_offset[tgsi_src->Register.File];
return 0;
}
@@ -793,6 +916,7 @@ static int tgsi_split_constant(struct r600_shader_ctx *ctx, struct r600_bc_alu_s
alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
alu.src[0].sel = r600_src[i].sel;
alu.src[0].chan = k;
+ alu.src[0].rel = r600_src[i].rel;
alu.dst.sel = treg;
alu.dst.chan = k;
alu.dst.write = 1;
@@ -803,6 +927,7 @@ static int tgsi_split_constant(struct r600_shader_ctx *ctx, struct r600_bc_alu_s
return r;
}
r600_src[i].sel = treg;
+ r600_src[i].rel =0;
j--;
}
}
@@ -1843,7 +1968,7 @@ static int tgsi_tex(struct r600_shader_ctx *ctx)
memset(&alu, 0, sizeof(struct r600_bc_alu));
alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
alu.src[0].sel = src_gpr;
- alu.src[0].chan = i;
+ alu.src[0].chan = tgsi_chan(&inst->Src[0], i);
alu.dst.sel = ctx->temp_reg;
alu.dst.chan = i;
if (i == 3)
@@ -1863,8 +1988,10 @@ static int tgsi_tex(struct r600_shader_ctx *ctx)
memset(&tex, 0, sizeof(struct r600_bc_tex));
tex.inst = opcode;
- tex.resource_id = ctx->file_offset[inst->Src[1].Register.File] + inst->Src[1].Register.Index;
- tex.sampler_id = tex.resource_id;
+ tex.sampler_id = ctx->file_offset[inst->Src[1].Register.File] + inst->Src[1].Register.Index;
+ tex.resource_id = tex.sampler_id;
+ if (ctx->shader->processor_type == TGSI_PROCESSOR_VERTEX)
+ tex.resource_id += PIPE_MAX_ATTRIBS;
tex.src_gpr = src_gpr;
tex.dst_gpr = ctx->file_offset[inst->Dst[0].Register.File] + inst->Dst[0].Register.Index;
tex.dst_sel_x = (inst->Dst[0].Register.WriteMask & 1) ? 0 : 7;
@@ -2172,7 +2299,7 @@ static int tgsi_xpd(struct r600_shader_ctx *ctx)
static int tgsi_exp(struct r600_shader_ctx *ctx)
{
struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
- struct r600_bc_alu_src r600_src[3];
+ struct r600_bc_alu_src r600_src[3] = { { 0 } };
struct r600_bc_alu alu;
int r;
@@ -2495,7 +2622,40 @@ static int tgsi_log(struct r600_shader_ctx *ctx)
}
/* r6/7 only for now */
-static int tgsi_arl(struct r600_shader_ctx *ctx)
+static int tgsi_eg_arl(struct r600_shader_ctx *ctx)
+{
+ struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
+ struct r600_bc_alu alu;
+ int r;
+
+ memset(&alu, 0, sizeof(struct r600_bc_alu));
+
+ alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT_FLOOR;
+ r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
+ if (r)
+ return r;
+ alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
+ alu.last = 1;
+ alu.dst.chan = 0;
+ alu.dst.sel = ctx->temp_reg;
+ alu.dst.write = 1;
+ r = r600_bc_add_alu_type(ctx->bc, &alu, CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU));
+ if (r)
+ return r;
+ memset(&alu, 0, sizeof(struct r600_bc_alu));
+ alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_INT;
+ r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
+ if (r)
+ return r;
+ alu.src[0].sel = ctx->temp_reg;
+ alu.src[0].chan = 0;
+ alu.last = 1;
+ r = r600_bc_add_alu_type(ctx->bc, &alu, CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU));
+ if (r)
+ return r;
+ return 0;
+}
+static int tgsi_r600_arl(struct r600_shader_ctx *ctx)
{
/* TODO from r600c, ar values don't persist between clauses */
struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
@@ -2840,7 +3000,7 @@ static int tgsi_loop_brk_cont(struct r600_shader_ctx *ctx)
}
static struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[] = {
- {TGSI_OPCODE_ARL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_arl},
+ {TGSI_OPCODE_ARL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_r600_arl},
{TGSI_OPCODE_MOV, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2},
{TGSI_OPCODE_LIT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lit},
@@ -2921,7 +3081,7 @@ static struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[] = {
{TGSI_OPCODE_NRM, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
{TGSI_OPCODE_DIV, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
{TGSI_OPCODE_DP2, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
- {TGSI_OPCODE_TXL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+ {TGSI_OPCODE_TXL, 0, SQ_TEX_INST_SAMPLE_L, tgsi_tex},
{TGSI_OPCODE_BRK, 0, V_SQ_CF_WORD1_SQ_CF_INST_LOOP_BREAK, tgsi_loop_brk_cont},
{TGSI_OPCODE_IF, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_if},
/* gap */
@@ -3004,7 +3164,7 @@ static struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[] = {
};
static struct r600_shader_tgsi_instruction eg_shader_tgsi_instruction[] = {
- {TGSI_OPCODE_ARL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+ {TGSI_OPCODE_ARL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_eg_arl},
{TGSI_OPCODE_MOV, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2},
{TGSI_OPCODE_LIT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lit},
{TGSI_OPCODE_RCP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE, tgsi_trans_srcx_replicate},
@@ -3079,7 +3239,7 @@ static struct r600_shader_tgsi_instruction eg_shader_tgsi_instruction[] = {
{TGSI_OPCODE_NRM, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
{TGSI_OPCODE_DIV, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
{TGSI_OPCODE_DP2, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
- {TGSI_OPCODE_TXL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+ {TGSI_OPCODE_TXL, 0, SQ_TEX_INST_SAMPLE_L, tgsi_tex},
{TGSI_OPCODE_BRK, 0, EG_V_SQ_CF_WORD1_SQ_CF_INST_LOOP_BREAK, tgsi_loop_brk_cont},
{TGSI_OPCODE_IF, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_if},
/* gap */
diff --git a/src/gallium/drivers/r600/r600_shader.h b/src/gallium/drivers/r600/r600_shader.h
index 6e2620f2012..f8bc5951395 100644
--- a/src/gallium/drivers/r600/r600_shader.h
+++ b/src/gallium/drivers/r600/r600_shader.h
@@ -31,6 +31,8 @@ struct r600_shader_io {
unsigned done;
int sid;
unsigned interpolate;
+ boolean centroid;
+ unsigned lds_pos; /* for evergreen */
};
struct r600_shader {
@@ -39,6 +41,7 @@ struct r600_shader {
boolean flat_shade;
unsigned ninput;
unsigned noutput;
+ unsigned nlds;
struct r600_shader_io input[32];
struct r600_shader_io output[32];
enum radeon_family family;
diff --git a/src/gallium/drivers/r600/r600_state.c b/src/gallium/drivers/r600/r600_state.c
index b55c3450059..b3e0d493217 100644
--- a/src/gallium/drivers/r600/r600_state.c
+++ b/src/gallium/drivers/r600/r600_state.c
@@ -37,7 +37,7 @@
#include <util/u_memory.h>
#include <util/u_inlines.h>
#include <util/u_upload_mgr.h>
-#include <util/u_index_modify.h>
+#include <util/u_framebuffer.h>
#include <pipebuffer/pb_buffer.h>
#include "r600.h"
#include "r600d.h"
@@ -98,7 +98,7 @@ static void r600_draw_common(struct r600_drawl *draw)
vertex_buffer->buffer_offset +
r600_bo_offset(rbuffer->bo);
- format = r600_translate_vertex_data_type(rctx->vertex_elements->elements[i].src_format);
+ format = r600_translate_vertex_data_type(rctx->vertex_elements->hw_format[i]);
word2 = format | S_038008_STRIDE(vertex_buffer->stride);
@@ -181,40 +181,21 @@ static void r600_draw_common(struct r600_drawl *draw)
r600_context_draw(&rctx->ctx, &rdraw);
}
-void r600_translate_index_buffer(struct r600_pipe_context *r600,
- struct pipe_resource **index_buffer,
- unsigned *index_size,
- unsigned *start, unsigned count)
-{
- switch (*index_size) {
- case 1:
- util_shorten_ubyte_elts(&r600->context, index_buffer, 0, *start, count);
- *index_size = 2;
- *start = 0;
- break;
-
- case 2:
- if (*start % 2 != 0) {
- util_rebuild_ushort_elts(&r600->context, index_buffer, 0, *start, count);
- *start = 0;
- }
- break;
-
- case 4:
- break;
- }
-}
-
void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info)
{
struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
struct r600_drawl draw;
+ boolean translate = FALSE;
+
+ if (rctx->vertex_elements->incompatible_layout) {
+ r600_begin_vertex_translate(rctx);
+ translate = TRUE;
+ }
if (rctx->any_user_vbs) {
r600_upload_user_buffers(rctx);
rctx->any_user_vbs = FALSE;
}
-
memset(&draw, 0, sizeof(struct r600_drawl));
draw.ctx = ctx;
draw.mode = info->mode;
@@ -245,6 +226,9 @@ void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info)
}
r600_draw_common(&draw);
+ if (translate)
+ r600_end_vertex_translate(rctx);
+
pipe_resource_reference(&draw.index_buffer, NULL);
}
@@ -340,20 +324,6 @@ static void *r600_create_blend_state(struct pipe_context *ctx,
return rstate;
}
-static void r600_bind_blend_state(struct pipe_context *ctx, void *state)
-{
- struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
- struct r600_pipe_blend *blend = (struct r600_pipe_blend *)state;
- struct r600_pipe_state *rstate;
-
- if (state == NULL)
- return;
- rstate = &blend->rstate;
- rctx->states[rstate->id] = rstate;
- rctx->cb_target_mask = blend->cb_target_mask;
- r600_context_pipe_state_set(&rctx->ctx, rstate);
-}
-
static void *r600_create_dsa_state(struct pipe_context *ctx,
const struct pipe_depth_stencil_alpha_state *state)
{
@@ -446,6 +416,7 @@ static void *r600_create_rs_state(struct pipe_context *ctx,
struct r600_pipe_state *rstate;
unsigned tmp;
unsigned prov_vtx = 1, polygon_dual_mode;
+ unsigned clip_rule;
if (rs == NULL) {
return NULL;
@@ -455,6 +426,7 @@ static void *r600_create_rs_state(struct pipe_context *ctx,
rs->flatshade = state->flatshade;
rs->sprite_coord_enable = state->sprite_coord_enable;
+ clip_rule = state->scissor ? 0xAAAA : 0xFFFF;
/* offset */
rs->offset_units = state->offset_units;
rs->offset_scale = state->offset_scale * 12.0f;
@@ -462,7 +434,7 @@ static void *r600_create_rs_state(struct pipe_context *ctx,
rstate->id = R600_PIPE_STATE_RASTERIZER;
if (state->flatshade_first)
prov_vtx = 0;
- tmp = 0x00000001;
+ tmp = S_0286D4_FLAT_SHADE_ENA(1);
if (state->sprite_coord_enable) {
tmp |= S_0286D4_PNT_SPRITE_ENA(1) |
S_0286D4_PNT_SPRITE_OVRD_X(2) |
@@ -496,7 +468,10 @@ static void *r600_create_rs_state(struct pipe_context *ctx,
tmp = (unsigned)(state->point_size * 8.0);
r600_pipe_state_add_reg(rstate, R_028A00_PA_SU_POINT_SIZE, S_028A00_HEIGHT(tmp) | S_028A00_WIDTH(tmp), 0xFFFFFFFF, NULL);
r600_pipe_state_add_reg(rstate, R_028A04_PA_SU_POINT_MINMAX, 0x80000000, 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, R_028A08_PA_SU_LINE_CNTL, 0x00000008, 0xFFFFFFFF, NULL);
+
+ tmp = (unsigned)(state->line_width * 8.0);
+ r600_pipe_state_add_reg(rstate, R_028A08_PA_SU_LINE_CNTL, S_028A08_WIDTH(tmp), 0xFFFFFFFF, NULL);
+
r600_pipe_state_add_reg(rstate, R_028A0C_PA_SC_LINE_STIPPLE, 0x00000005, 0xFFFFFFFF, NULL);
r600_pipe_state_add_reg(rstate, R_028A48_PA_SC_MPASS_PS_CNTL, 0x00000000, 0xFFFFFFFF, NULL);
r600_pipe_state_add_reg(rstate, R_028C00_PA_SC_LINE_CNTL, 0x00000400, 0xFFFFFFFF, NULL);
@@ -505,37 +480,9 @@ static void *r600_create_rs_state(struct pipe_context *ctx,
r600_pipe_state_add_reg(rstate, R_028C14_PA_CL_GB_HORZ_CLIP_ADJ, 0x3F800000, 0xFFFFFFFF, NULL);
r600_pipe_state_add_reg(rstate, R_028C18_PA_CL_GB_HORZ_DISC_ADJ, 0x3F800000, 0xFFFFFFFF, NULL);
r600_pipe_state_add_reg(rstate, R_028DFC_PA_SU_POLY_OFFSET_CLAMP, 0x00000000, 0xFFFFFFFF, NULL);
- return rstate;
-}
-
-static void r600_bind_rs_state(struct pipe_context *ctx, void *state)
-{
- struct r600_pipe_rasterizer *rs = (struct r600_pipe_rasterizer *)state;
- struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
-
- if (state == NULL)
- return;
-
- rctx->flatshade = rs->flatshade;
- rctx->sprite_coord_enable = rs->sprite_coord_enable;
- rctx->rasterizer = rs;
+ r600_pipe_state_add_reg(rstate, R_02820C_PA_SC_CLIPRECT_RULE, clip_rule, 0xFFFFFFFF, NULL);
- rctx->states[rs->rstate.id] = &rs->rstate;
- r600_context_pipe_state_set(&rctx->ctx, &rs->rstate);
-}
-
-static void r600_delete_rs_state(struct pipe_context *ctx, void *state)
-{
- struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
- struct r600_pipe_rasterizer *rs = (struct r600_pipe_rasterizer *)state;
-
- if (rctx->rasterizer == rs) {
- rctx->rasterizer = NULL;
- }
- if (rctx->states[rs->rstate.id] == &rs->rstate) {
- rctx->states[rs->rstate.id] = NULL;
- }
- free(rs);
+ return rstate;
}
static void *r600_create_sampler_state(struct pipe_context *ctx,
@@ -574,28 +521,6 @@ static void *r600_create_sampler_state(struct pipe_context *ctx,
return rstate;
}
-static void *r600_create_vertex_elements(struct pipe_context *ctx,
- unsigned count,
- const struct pipe_vertex_element *elements)
-{
- struct r600_vertex_element *v = CALLOC_STRUCT(r600_vertex_element);
-
- assert(count < 32);
- v->count = count;
- v->refcount = 1;
- memcpy(v->elements, elements, count * sizeof(struct pipe_vertex_element));
- return v;
-}
-
-static void r600_sampler_view_destroy(struct pipe_context *ctx,
- struct pipe_sampler_view *state)
-{
- struct r600_pipe_sampler_view *resource = (struct r600_pipe_sampler_view *)state;
-
- pipe_resource_reference(&state->texture, NULL);
- FREE(resource);
-}
-
static struct pipe_sampler_view *r600_create_sampler_view(struct pipe_context *ctx,
struct pipe_resource *texture,
const struct pipe_sampler_view *state)
@@ -626,15 +551,15 @@ static struct pipe_sampler_view *r600_create_sampler_view(struct pipe_context *c
swizzle[1] = state->swizzle_g;
swizzle[2] = state->swizzle_b;
swizzle[3] = state->swizzle_a;
- format = r600_translate_texformat(texture->format,
+ format = r600_translate_texformat(state->format,
swizzle,
&word4, &yuv_format);
if (format == ~0) {
format = 0;
}
- desc = util_format_description(texture->format);
+ desc = util_format_description(state->format);
if (desc == NULL) {
- R600_ERR("unknow format %d\n", texture->format);
+ R600_ERR("unknow format %d\n", state->format);
}
tmp = (struct r600_resource_texture*)texture;
rbuffer = &tmp->resource;
@@ -648,7 +573,11 @@ static struct pipe_sampler_view *r600_create_sampler_view(struct pipe_context *c
bo[0] = rbuffer->bo;
bo[1] = rbuffer->bo;
}
- pitch = align(tmp->pitch[0] / tmp->bpt, 8);
+ pitch = align(tmp->pitch_in_pixels[0], 8);
+ if (tmp->tiled) {
+ array_mode = tmp->array_mode[0];
+ tile_type = tmp->tile_type;
+ }
/* FIXME properly handle first level != 0 */
r600_pipe_state_add_reg(rstate, R_038000_RESOURCE0_WORD0,
@@ -683,32 +612,43 @@ static struct pipe_sampler_view *r600_create_sampler_view(struct pipe_context *c
static void r600_set_vs_sampler_view(struct pipe_context *ctx, unsigned count,
struct pipe_sampler_view **views)
{
- /* TODO */
- assert(1);
-}
-
-static void r600_set_ps_sampler_view(struct pipe_context *ctx, unsigned count,
- struct pipe_sampler_view **views)
-{
struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
struct r600_pipe_sampler_view **resource = (struct r600_pipe_sampler_view **)views;
for (int i = 0; i < count; i++) {
if (resource[i]) {
- r600_context_pipe_state_set_ps_resource(&rctx->ctx, &resource[i]->state, i);
+ r600_context_pipe_state_set_vs_resource(&rctx->ctx, &resource[i]->state, i + PIPE_MAX_ATTRIBS);
}
}
}
-static void r600_bind_state(struct pipe_context *ctx, void *state)
+static void r600_set_ps_sampler_view(struct pipe_context *ctx, unsigned count,
+ struct pipe_sampler_view **views)
{
struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
- struct r600_pipe_state *rstate = (struct r600_pipe_state *)state;
+ struct r600_pipe_sampler_view **resource = (struct r600_pipe_sampler_view **)views;
+ int i;
- if (state == NULL)
- return;
- rctx->states[rstate->id] = rstate;
- r600_context_pipe_state_set(&rctx->ctx, rstate);
+ for (i = 0; i < count; i++) {
+ if (&rctx->ps_samplers.views[i]->base != views[i]) {
+ if (resource[i])
+ r600_context_pipe_state_set_ps_resource(&rctx->ctx, &resource[i]->state, i);
+ else
+ r600_context_pipe_state_set_ps_resource(&rctx->ctx, NULL, i);
+
+ pipe_sampler_view_reference(
+ (struct pipe_sampler_view **)&rctx->ps_samplers.views[i],
+ views[i]);
+
+ }
+ }
+ for (i = count; i < NUM_TEX_UNITS; i++) {
+ if (rctx->ps_samplers.views[i]) {
+ r600_context_pipe_state_set_ps_resource(&rctx->ctx, NULL, i);
+ pipe_sampler_view_reference((struct pipe_sampler_view **)&rctx->ps_samplers.views[i], NULL);
+ }
+ }
+ rctx->ps_samplers.n_views = count;
}
static void r600_bind_ps_sampler(struct pipe_context *ctx, unsigned count, void **states)
@@ -716,6 +656,9 @@ static void r600_bind_ps_sampler(struct pipe_context *ctx, unsigned count, void
struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
struct r600_pipe_state **rstates = (struct r600_pipe_state **)states;
+ memcpy(rctx->ps_samplers.samplers, states, sizeof(void*) * count);
+ rctx->ps_samplers.n_samplers = count;
+
for (int i = 0; i < count; i++) {
r600_context_pipe_state_set_ps_sampler(&rctx->ctx, rstates[i], i);
}
@@ -726,37 +669,11 @@ static void r600_bind_vs_sampler(struct pipe_context *ctx, unsigned count, void
struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
struct r600_pipe_state **rstates = (struct r600_pipe_state **)states;
- /* TODO implement */
for (int i = 0; i < count; i++) {
r600_context_pipe_state_set_vs_sampler(&rctx->ctx, rstates[i], i);
}
}
-static void r600_delete_state(struct pipe_context *ctx, void *state)
-{
- struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
- struct r600_pipe_state *rstate = (struct r600_pipe_state *)state;
-
- if (rctx->states[rstate->id] == rstate) {
- rctx->states[rstate->id] = NULL;
- }
- for (int i = 0; i < rstate->nregs; i++) {
- r600_bo_reference(rctx->radeon, &rstate->regs[i].bo, NULL);
- }
- free(rstate);
-}
-
-static void r600_delete_vertex_element(struct pipe_context *ctx, void *state)
-{
- struct r600_vertex_element *v = (struct r600_vertex_element*)state;
-
- if (v == NULL)
- return;
- if (--v->refcount)
- return;
- free(v);
-}
-
static void r600_set_clip_state(struct pipe_context *ctx,
const struct pipe_clip_state *state)
{
@@ -792,19 +709,6 @@ static void r600_set_clip_state(struct pipe_context *ctx,
r600_context_pipe_state_set(&rctx->ctx, rstate);
}
-static void r600_bind_vertex_elements(struct pipe_context *ctx, void *state)
-{
- struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
- struct r600_vertex_element *v = (struct r600_vertex_element*)state;
-
- r600_delete_vertex_element(ctx, rctx->vertex_elements);
- rctx->vertex_elements = v;
- if (v) {
- v->refcount++;
-// rctx->vs_rebuild = TRUE;
- }
-}
-
static void r600_set_polygon_stipple(struct pipe_context *ctx,
const struct pipe_poly_stipple *state)
{
@@ -828,18 +732,6 @@ static void r600_set_scissor_state(struct pipe_context *ctx,
tl = S_028240_TL_X(state->minx) | S_028240_TL_Y(state->miny) | S_028240_WINDOW_OFFSET_DISABLE(1);
br = S_028244_BR_X(state->maxx) | S_028244_BR_Y(state->maxy);
r600_pipe_state_add_reg(rstate,
- R_028030_PA_SC_SCREEN_SCISSOR_TL, tl,
- 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate,
- R_028034_PA_SC_SCREEN_SCISSOR_BR, br,
- 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate,
- R_028204_PA_SC_WINDOW_SCISSOR_TL, tl,
- 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate,
- R_028208_PA_SC_WINDOW_SCISSOR_BR, br,
- 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate,
R_028210_PA_SC_CLIPRECT_0_TL, tl,
0xFFFFFFFF, NULL);
r600_pipe_state_add_reg(rstate,
@@ -863,17 +755,6 @@ static void r600_set_scissor_state(struct pipe_context *ctx,
r600_pipe_state_add_reg(rstate,
R_02822C_PA_SC_CLIPRECT_3_BR, br,
0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate,
- R_028200_PA_SC_WINDOW_OFFSET, 0x00000000,
- 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate,
- R_02820C_PA_SC_CLIPRECT_RULE, 0x0000FFFF,
- 0xFFFFFFFF, NULL);
- if (rctx->family >= CHIP_RV770) {
- r600_pipe_state_add_reg(rstate,
- R_028230_PA_SC_EDGERULE, 0xAAAAAAAA,
- 0xFFFFFFFF, NULL);
- }
free(rctx->states[R600_PIPE_STATE_SCISSOR]);
rctx->states[R600_PIPE_STATE_SCISSOR] = rstate;
@@ -937,6 +818,7 @@ static void r600_cb(struct r600_pipe_context *rctx, struct r600_pipe_state *rsta
{
struct r600_resource_texture *rtex;
struct r600_resource *rbuffer;
+ struct r600_surface *surf;
unsigned level = state->cbufs[cb]->level;
unsigned pitch, slice;
unsigned color_info;
@@ -944,14 +826,15 @@ static void r600_cb(struct r600_pipe_context *rctx, struct r600_pipe_state *rsta
const struct util_format_description *desc;
struct r600_bo *bo[3];
+ surf = (struct r600_surface *)state->cbufs[cb];
rtex = (struct r600_resource_texture*)state->cbufs[cb]->texture;
rbuffer = &rtex->resource;
bo[0] = rbuffer->bo;
bo[1] = rbuffer->bo;
bo[2] = rbuffer->bo;
- pitch = (rtex->pitch[level] / rtex->bpt) / 8 - 1;
- slice = (rtex->pitch[level] / rtex->bpt) * state->cbufs[cb]->height / 64 - 1;
+ pitch = rtex->pitch_in_pixels[level] / 8 - 1;
+ slice = rtex->pitch_in_pixels[level] * surf->aligned_height / 64 - 1;
ntype = 0;
desc = util_format_description(rtex->resource.base.b.format);
if (desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB)
@@ -961,6 +844,7 @@ static void r600_cb(struct r600_pipe_context *rctx, struct r600_pipe_state *rsta
swap = r600_translate_colorswap(rtex->resource.base.b.format);
color_info = S_0280A0_FORMAT(format) |
S_0280A0_COMP_SWAP(swap) |
+ S_0280A0_ARRAY_MODE(rtex->array_mode[level]) |
S_0280A0_BLEND_CLAMP(1) |
S_0280A0_NUMBER_TYPE(ntype);
if (desc->colorspace != UTIL_FORMAT_COLORSPACE_ZS)
@@ -996,22 +880,25 @@ static void r600_db(struct r600_pipe_context *rctx, struct r600_pipe_state *rsta
{
struct r600_resource_texture *rtex;
struct r600_resource *rbuffer;
+ struct r600_surface *surf;
unsigned level;
unsigned pitch, slice, format;
if (state->zsbuf == NULL)
return;
+ level = state->zsbuf->level;
+
+ surf = (struct r600_surface *)state->zsbuf;
rtex = (struct r600_resource_texture*)state->zsbuf->texture;
rtex->tiled = 1;
- rtex->array_mode = 2;
+ rtex->array_mode[level] = 2;
rtex->tile_type = 1;
rtex->depth = 1;
rbuffer = &rtex->resource;
- level = state->zsbuf->level;
- pitch = (rtex->pitch[level] / rtex->bpt) / 8 - 1;
- slice = (rtex->pitch[level] / rtex->bpt) * state->zsbuf->height / 64 - 1;
+ pitch = rtex->pitch_in_pixels[level] / 8 - 1;
+ slice = rtex->pitch_in_pixels[level] * surf->aligned_height / 64 - 1;
format = r600_translate_dbformat(state->zsbuf->texture->format);
r600_pipe_state_add_reg(rstate, R_02800C_DB_DEPTH_BASE,
@@ -1021,10 +908,10 @@ static void r600_db(struct r600_pipe_context *rctx, struct r600_pipe_state *rsta
0xFFFFFFFF, NULL);
r600_pipe_state_add_reg(rstate, R_028004_DB_DEPTH_VIEW, 0x00000000, 0xFFFFFFFF, NULL);
r600_pipe_state_add_reg(rstate, R_028010_DB_DEPTH_INFO,
- S_028010_ARRAY_MODE(rtex->array_mode) | S_028010_FORMAT(format),
+ S_028010_ARRAY_MODE(rtex->array_mode[level]) | S_028010_FORMAT(format),
0xFFFFFFFF, rbuffer->bo);
r600_pipe_state_add_reg(rstate, R_028D34_DB_PREFETCH_LIMIT,
- (state->zsbuf->height / 8) - 1, 0xFFFFFFFF, NULL);
+ (surf->aligned_height / 8) - 1, 0xFFFFFFFF, NULL);
}
static void r600_set_framebuffer_state(struct pipe_context *ctx,
@@ -1039,14 +926,9 @@ static void r600_set_framebuffer_state(struct pipe_context *ctx,
/* unreference old buffer and reference new one */
rstate->id = R600_PIPE_STATE_FRAMEBUFFER;
- for (int i = 0; i < rctx->framebuffer.nr_cbufs; i++) {
- pipe_surface_reference(&rctx->framebuffer.cbufs[i], NULL);
- }
- for (int i = 0; i < state->nr_cbufs; i++) {
- pipe_surface_reference(&rctx->framebuffer.cbufs[i], state->cbufs[i]);
- }
- pipe_surface_reference(&rctx->framebuffer.zsbuf, state->zsbuf);
- rctx->framebuffer = *state;
+
+ util_copy_framebuffer_state(&rctx->framebuffer, state);
+
rctx->pframebuffer = &rctx->framebuffer;
/* build states */
@@ -1070,6 +952,18 @@ static void r600_set_framebuffer_state(struct pipe_context *ctx,
br = S_028244_BR_X(state->width) | S_028244_BR_Y(state->height);
r600_pipe_state_add_reg(rstate,
+ R_028030_PA_SC_SCREEN_SCISSOR_TL, tl,
+ 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate,
+ R_028034_PA_SC_SCREEN_SCISSOR_BR, br,
+ 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate,
+ R_028204_PA_SC_WINDOW_SCISSOR_TL, tl,
+ 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate,
+ R_028208_PA_SC_WINDOW_SCISSOR_BR, br,
+ 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate,
R_028240_PA_SC_GENERIC_SCISSOR_TL, tl,
0xFFFFFFFF, NULL);
r600_pipe_state_add_reg(rstate,
@@ -1081,6 +975,14 @@ static void r600_set_framebuffer_state(struct pipe_context *ctx,
r600_pipe_state_add_reg(rstate,
R_028254_PA_SC_VPORT_SCISSOR_0_BR, br,
0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate,
+ R_028200_PA_SC_WINDOW_OFFSET, 0x00000000,
+ 0xFFFFFFFF, NULL);
+ if (rctx->family >= CHIP_RV770) {
+ r600_pipe_state_add_reg(rstate,
+ R_028230_PA_SC_EDGERULE, 0xAAAAAAAA,
+ 0xFFFFFFFF, NULL);
+ }
r600_pipe_state_add_reg(rstate, R_0287A0_CB_SHADER_CONTROL,
shader_control, 0xFFFFFFFF, NULL);
@@ -1110,40 +1012,6 @@ static void r600_set_framebuffer_state(struct pipe_context *ctx,
r600_context_pipe_state_set(&rctx->ctx, rstate);
}
-static void r600_set_index_buffer(struct pipe_context *ctx,
- const struct pipe_index_buffer *ib)
-{
- struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
-
- if (ib) {
- pipe_resource_reference(&rctx->index_buffer.buffer, ib->buffer);
- memcpy(&rctx->index_buffer, ib, sizeof(rctx->index_buffer));
- } else {
- pipe_resource_reference(&rctx->index_buffer.buffer, NULL);
- memset(&rctx->index_buffer, 0, sizeof(rctx->index_buffer));
- }
-
- /* TODO make this more like a state */
-}
-
-static void r600_set_vertex_buffers(struct pipe_context *ctx, unsigned count,
- const struct pipe_vertex_buffer *buffers)
-{
- struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
-
- for (int i = 0; i < rctx->nvertex_buffer; i++) {
- pipe_resource_reference(&rctx->vertex_buffer[i].buffer, NULL);
- }
- memcpy(rctx->vertex_buffer, buffers, sizeof(struct pipe_vertex_buffer) * count);
- for (int i = 0; i < count; i++) {
- rctx->vertex_buffer[i].buffer = NULL;
- if (r600_buffer_is_user_buffer(buffers[i].buffer))
- rctx->any_user_vbs = TRUE;
- pipe_resource_reference(&rctx->vertex_buffer[i].buffer, buffers[i].buffer);
- }
- rctx->nvertex_buffer = count;
-}
-
static void r600_set_constant_buffer(struct pipe_context *ctx, uint shader, uint index,
struct pipe_resource *buffer)
{
@@ -1179,59 +1047,6 @@ static void r600_set_constant_buffer(struct pipe_context *ctx, uint shader, uint
}
}
-static void *r600_create_shader_state(struct pipe_context *ctx,
- const struct pipe_shader_state *state)
-{
- struct r600_pipe_shader *shader = CALLOC_STRUCT(r600_pipe_shader);
- int r;
-
- r = r600_pipe_shader_create(ctx, shader, state->tokens);
- if (r) {
- return NULL;
- }
- return shader;
-}
-
-static void r600_bind_ps_shader(struct pipe_context *ctx, void *state)
-{
- struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
-
- /* TODO delete old shader */
- rctx->ps_shader = (struct r600_pipe_shader *)state;
-}
-
-static void r600_bind_vs_shader(struct pipe_context *ctx, void *state)
-{
- struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
-
- /* TODO delete old shader */
- rctx->vs_shader = (struct r600_pipe_shader *)state;
-}
-
-static void r600_delete_ps_shader(struct pipe_context *ctx, void *state)
-{
- struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
- struct r600_pipe_shader *shader = (struct r600_pipe_shader *)state;
-
- if (rctx->ps_shader == shader) {
- rctx->ps_shader = NULL;
- }
- /* TODO proper delete */
- free(shader);
-}
-
-static void r600_delete_vs_shader(struct pipe_context *ctx, void *state)
-{
- struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
- struct r600_pipe_shader *shader = (struct r600_pipe_shader *)state;
-
- if (rctx->vs_shader == shader) {
- rctx->vs_shader = NULL;
- }
- /* TODO proper delete */
- free(shader);
-}
-
void r600_init_state_functions(struct r600_pipe_context *rctx)
{
rctx->context.create_blend_state = r600_create_blend_state;
@@ -1480,14 +1295,14 @@ void r600_init_config(struct r600_pipe_context *rctx)
r600_pipe_state_add_reg(rstate, R_009830_DB_DEBUG, 0x00000000, 0xFFFFFFFF, NULL);
r600_pipe_state_add_reg(rstate, R_009838_DB_WATERMARKS, 0x00420204, 0xFFFFFFFF, NULL);
r600_pipe_state_add_reg(rstate, R_0286C8_SPI_THREAD_GROUPING, 0x00000000, 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, R_028A4C_PA_SC_MODE_CNTL, 0x00514000, 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_028A4C_PA_SC_MODE_CNTL, 0x00514002, 0xFFFFFFFF, NULL);
} else {
r600_pipe_state_add_reg(rstate, R_008D8C_SQ_DYN_GPR_CNTL_PS_FLUSH_REQ, 0x00000000, 0xFFFFFFFF, NULL);
r600_pipe_state_add_reg(rstate, R_009508_TA_CNTL_AUX, 0x07000003, 0xFFFFFFFF, NULL);
r600_pipe_state_add_reg(rstate, R_009830_DB_DEBUG, 0x82000000, 0xFFFFFFFF, NULL);
r600_pipe_state_add_reg(rstate, R_009838_DB_WATERMARKS, 0x01020204, 0xFFFFFFFF, NULL);
r600_pipe_state_add_reg(rstate, R_0286C8_SPI_THREAD_GROUPING, 0x00000001, 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, R_028A4C_PA_SC_MODE_CNTL, 0x00004010, 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_028A4C_PA_SC_MODE_CNTL, 0x00004012, 0xFFFFFFFF, NULL);
}
r600_pipe_state_add_reg(rstate, R_0288A8_SQ_ESGS_RING_ITEMSIZE, 0x00000000, 0xFFFFFFFF, NULL);
r600_pipe_state_add_reg(rstate, R_0288AC_SQ_GSVS_RING_ITEMSIZE, 0x00000000, 0xFFFFFFFF, NULL);
diff --git a/src/gallium/drivers/r600/r600_state_common.c b/src/gallium/drivers/r600/r600_state_common.c
new file mode 100644
index 00000000000..210420e823b
--- /dev/null
+++ b/src/gallium/drivers/r600/r600_state_common.c
@@ -0,0 +1,276 @@
+/*
+ * Copyright 2010 Red Hat Inc.
+ * 2010 Jerome Glisse
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Dave Airlie <[email protected]>
+ * Jerome Glisse <[email protected]>
+ */
+#include <util/u_memory.h>
+#include <util/u_format.h>
+#include <pipebuffer/pb_buffer.h>
+#include "r600_pipe.h"
+
+/* common state between evergreen and r600 */
+void r600_bind_blend_state(struct pipe_context *ctx, void *state)
+{
+ struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
+ struct r600_pipe_blend *blend = (struct r600_pipe_blend *)state;
+ struct r600_pipe_state *rstate;
+
+ if (state == NULL)
+ return;
+ rstate = &blend->rstate;
+ rctx->states[rstate->id] = rstate;
+ rctx->cb_target_mask = blend->cb_target_mask;
+ r600_context_pipe_state_set(&rctx->ctx, rstate);
+}
+
+void r600_bind_rs_state(struct pipe_context *ctx, void *state)
+{
+ struct r600_pipe_rasterizer *rs = (struct r600_pipe_rasterizer *)state;
+ struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
+
+ if (state == NULL)
+ return;
+
+ rctx->flatshade = rs->flatshade;
+ rctx->sprite_coord_enable = rs->sprite_coord_enable;
+ rctx->rasterizer = rs;
+
+ rctx->states[rs->rstate.id] = &rs->rstate;
+ r600_context_pipe_state_set(&rctx->ctx, &rs->rstate);
+}
+
+void r600_delete_rs_state(struct pipe_context *ctx, void *state)
+{
+ struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
+ struct r600_pipe_rasterizer *rs = (struct r600_pipe_rasterizer *)state;
+
+ if (rctx->rasterizer == rs) {
+ rctx->rasterizer = NULL;
+ }
+ if (rctx->states[rs->rstate.id] == &rs->rstate) {
+ rctx->states[rs->rstate.id] = NULL;
+ }
+ free(rs);
+}
+
+void r600_sampler_view_destroy(struct pipe_context *ctx,
+ struct pipe_sampler_view *state)
+{
+ struct r600_pipe_sampler_view *resource = (struct r600_pipe_sampler_view *)state;
+
+ pipe_resource_reference(&state->texture, NULL);
+ FREE(resource);
+}
+
+void r600_bind_state(struct pipe_context *ctx, void *state)
+{
+ struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
+ struct r600_pipe_state *rstate = (struct r600_pipe_state *)state;
+
+ if (state == NULL)
+ return;
+ rctx->states[rstate->id] = rstate;
+ r600_context_pipe_state_set(&rctx->ctx, rstate);
+}
+
+void r600_delete_state(struct pipe_context *ctx, void *state)
+{
+ struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
+ struct r600_pipe_state *rstate = (struct r600_pipe_state *)state;
+
+ if (rctx->states[rstate->id] == rstate) {
+ rctx->states[rstate->id] = NULL;
+ }
+ for (int i = 0; i < rstate->nregs; i++) {
+ r600_bo_reference(rctx->radeon, &rstate->regs[i].bo, NULL);
+ }
+ free(rstate);
+}
+
+void r600_bind_vertex_elements(struct pipe_context *ctx, void *state)
+{
+ struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
+ struct r600_vertex_element *v = (struct r600_vertex_element*)state;
+
+ rctx->vertex_elements = v;
+ if (v) {
+// rctx->vs_rebuild = TRUE;
+ }
+}
+
+void r600_delete_vertex_element(struct pipe_context *ctx, void *state)
+{
+ struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
+
+ FREE(state);
+
+ if (rctx->vertex_elements == state)
+ rctx->vertex_elements = NULL;
+}
+
+
+void r600_set_index_buffer(struct pipe_context *ctx,
+ const struct pipe_index_buffer *ib)
+{
+ struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
+
+ if (ib) {
+ pipe_resource_reference(&rctx->index_buffer.buffer, ib->buffer);
+ memcpy(&rctx->index_buffer, ib, sizeof(rctx->index_buffer));
+ } else {
+ pipe_resource_reference(&rctx->index_buffer.buffer, NULL);
+ memset(&rctx->index_buffer, 0, sizeof(rctx->index_buffer));
+ }
+
+ /* TODO make this more like a state */
+}
+
+void r600_set_vertex_buffers(struct pipe_context *ctx, unsigned count,
+ const struct pipe_vertex_buffer *buffers)
+{
+ struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
+ struct pipe_vertex_buffer *vbo;
+ unsigned max_index = (unsigned)-1;
+
+ for (int i = 0; i < rctx->nvertex_buffer; i++) {
+ pipe_resource_reference(&rctx->vertex_buffer[i].buffer, NULL);
+ }
+ memcpy(rctx->vertex_buffer, buffers, sizeof(struct pipe_vertex_buffer) * count);
+
+ for (int i = 0; i < count; i++) {
+ vbo = (struct pipe_vertex_buffer*)&buffers[i];
+
+ rctx->vertex_buffer[i].buffer = NULL;
+ if (r600_buffer_is_user_buffer(buffers[i].buffer))
+ rctx->any_user_vbs = TRUE;
+ pipe_resource_reference(&rctx->vertex_buffer[i].buffer, buffers[i].buffer);
+
+ if (vbo->max_index == ~0) {
+ if (!vbo->stride)
+ vbo->max_index = 1;
+ else
+ vbo->max_index = (vbo->buffer->width0 - vbo->buffer_offset) / vbo->stride;
+ }
+ max_index = MIN2(vbo->max_index, max_index);
+ }
+ rctx->nvertex_buffer = count;
+ rctx->vb_max_index = max_index;
+}
+
+
+#define FORMAT_REPLACE(what, withwhat) \
+ case PIPE_FORMAT_##what: *format = PIPE_FORMAT_##withwhat; break
+
+void *r600_create_vertex_elements(struct pipe_context *ctx,
+ unsigned count,
+ const struct pipe_vertex_element *elements)
+{
+ struct r600_vertex_element *v = CALLOC_STRUCT(r600_vertex_element);
+ int i;
+ enum pipe_format *format;
+
+ assert(count < 32);
+ if (!v)
+ return NULL;
+
+ v->count = count;
+ memcpy(v->elements, elements, count * sizeof(struct pipe_vertex_element));
+
+ for (i = 0; i < count; i++) {
+ v->hw_format[i] = v->elements[i].src_format;
+ format = &v->hw_format[i];
+
+ switch (*format) {
+ FORMAT_REPLACE(R64_FLOAT, R32_FLOAT);
+ FORMAT_REPLACE(R64G64_FLOAT, R32G32_FLOAT);
+ FORMAT_REPLACE(R64G64B64_FLOAT, R32G32B32_FLOAT);
+ FORMAT_REPLACE(R64G64B64A64_FLOAT, R32G32B32A32_FLOAT);
+ default:;
+ }
+ v->incompatible_layout =
+ v->incompatible_layout ||
+ v->elements[i].src_format != v->hw_format[i] ||
+ v->elements[i].src_offset % 4 != 0;
+
+ v->hw_format_size[i] =
+ align(util_format_get_blocksize(v->hw_format[i]), 4);
+ }
+
+ return v;
+}
+
+void *r600_create_shader_state(struct pipe_context *ctx,
+ const struct pipe_shader_state *state)
+{
+ struct r600_pipe_shader *shader = CALLOC_STRUCT(r600_pipe_shader);
+ int r;
+
+ r = r600_pipe_shader_create(ctx, shader, state->tokens);
+ if (r) {
+ return NULL;
+ }
+ return shader;
+}
+
+void r600_bind_ps_shader(struct pipe_context *ctx, void *state)
+{
+ struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
+
+ /* TODO delete old shader */
+ rctx->ps_shader = (struct r600_pipe_shader *)state;
+}
+
+void r600_bind_vs_shader(struct pipe_context *ctx, void *state)
+{
+ struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
+
+ /* TODO delete old shader */
+ rctx->vs_shader = (struct r600_pipe_shader *)state;
+}
+
+void r600_delete_ps_shader(struct pipe_context *ctx, void *state)
+{
+ struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
+ struct r600_pipe_shader *shader = (struct r600_pipe_shader *)state;
+
+ if (rctx->ps_shader == shader) {
+ rctx->ps_shader = NULL;
+ }
+
+ r600_pipe_shader_destroy(ctx, shader);
+ free(shader);
+}
+
+void r600_delete_vs_shader(struct pipe_context *ctx, void *state)
+{
+ struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
+ struct r600_pipe_shader *shader = (struct r600_pipe_shader *)state;
+
+ if (rctx->vs_shader == shader) {
+ rctx->vs_shader = NULL;
+ }
+
+ r600_pipe_shader_destroy(ctx, shader);
+ free(shader);
+}
diff --git a/src/gallium/drivers/r600/r600_state_inlines.h b/src/gallium/drivers/r600/r600_state_inlines.h
index 81f285027e6..1c1978f8abb 100644
--- a/src/gallium/drivers/r600/r600_state_inlines.h
+++ b/src/gallium/drivers/r600/r600_state_inlines.h
@@ -25,6 +25,7 @@
#include "util/u_format.h"
#include "r600d.h"
+#include "r600_formats.h"
static INLINE uint32_t r600_translate_blend_function(int blend_func)
{
@@ -303,6 +304,10 @@ static inline uint32_t r600_translate_colorswap(enum pipe_format format)
return V_0280A0_SWAP_STD;
case PIPE_FORMAT_L8A8_UNORM:
+ case PIPE_FORMAT_R8G8_UNORM:
+ return V_0280A0_SWAP_STD;
+
+ case PIPE_FORMAT_R16_UNORM:
return V_0280A0_SWAP_STD;
/* 32-bit buffers. */
@@ -342,16 +347,19 @@ static inline uint32_t r600_translate_colorswap(enum pipe_format format)
case PIPE_FORMAT_R10SG10SB10SA2U_NORM:
return V_0280A0_SWAP_STD_REV;
+ case PIPE_FORMAT_R16G16_UNORM:
+ return V_0280A0_SWAP_STD;
+
/* 64-bit buffers. */
case PIPE_FORMAT_R16G16B16A16_UNORM:
case PIPE_FORMAT_R16G16B16A16_SNORM:
- // return V_0280A0_COLOR_16_16_16_16;
+ // return FMT_16_16_16_16;
case PIPE_FORMAT_R16G16B16A16_FLOAT:
- // return V_0280A0_COLOR_16_16_16_16_FLOAT;
+ // return FMT_16_16_16_16_FLOAT;
/* 128-bit buffers. */
case PIPE_FORMAT_R32G32B32A32_FLOAT:
- // return V_0280A0_COLOR_32_32_32_32_FLOAT;
+ // return FMT_32_32_32_32_FLOAT;
return 0;
default:
R600_ERR("unsupported colorswap format %d\n", format);
@@ -387,8 +395,12 @@ static INLINE uint32_t r600_translate_colorformat(enum pipe_format format)
return V_0280A0_COLOR_16;
case PIPE_FORMAT_L8A8_UNORM:
+ case PIPE_FORMAT_R8G8_UNORM:
return V_0280A0_COLOR_8_8;
+ case PIPE_FORMAT_R16_UNORM:
+ return V_0280A0_COLOR_16;
+
/* 32-bit buffers. */
case PIPE_FORMAT_A8B8G8R8_SRGB:
case PIPE_FORMAT_A8B8G8R8_UNORM:
@@ -426,6 +438,7 @@ static INLINE uint32_t r600_translate_colorformat(enum pipe_format format)
return V_0280A0_COLOR_16_16_FLOAT;
case PIPE_FORMAT_R16G16_SSCALED:
+ case PIPE_FORMAT_R16G16_UNORM:
return V_0280A0_COLOR_16_16;
@@ -510,32 +523,32 @@ static INLINE uint32_t r600_translate_vertex_data_type(enum pipe_format format)
case 16:
switch (desc->nr_channels) {
case 1:
- result = V_038008_FMT_16_FLOAT;
+ result = FMT_16_FLOAT;
break;
case 2:
- result = V_038008_FMT_16_16_FLOAT;
+ result = FMT_16_16_FLOAT;
break;
case 3:
- result = V_038008_FMT_16_16_16_FLOAT;
+ result = FMT_16_16_16_FLOAT;
break;
case 4:
- result = V_038008_FMT_16_16_16_16_FLOAT;
+ result = FMT_16_16_16_16_FLOAT;
break;
}
break;
case 32:
switch (desc->nr_channels) {
case 1:
- result = V_038008_FMT_32_FLOAT;
+ result = FMT_32_FLOAT;
break;
case 2:
- result = V_038008_FMT_32_32_FLOAT;
+ result = FMT_32_32_FLOAT;
break;
case 3:
- result = V_038008_FMT_32_32_32_FLOAT;
+ result = FMT_32_32_32_FLOAT;
break;
case 4:
- result = V_038008_FMT_32_32_32_32_FLOAT;
+ result = FMT_32_32_32_32_FLOAT;
break;
}
break;
@@ -551,48 +564,48 @@ static INLINE uint32_t r600_translate_vertex_data_type(enum pipe_format format)
case 8:
switch (desc->nr_channels) {
case 1:
- result = V_038008_FMT_8;
+ result = FMT_8;
break;
case 2:
- result = V_038008_FMT_8_8;
+ result = FMT_8_8;
break;
case 3:
- // result = V_038008_FMT_8_8_8; /* fails piglit draw-vertices test */
+ // result = FMT_8_8_8; /* fails piglit draw-vertices test */
// break;
case 4:
- result = V_038008_FMT_8_8_8_8;
+ result = FMT_8_8_8_8;
break;
}
break;
case 16:
switch (desc->nr_channels) {
case 1:
- result = V_038008_FMT_16;
+ result = FMT_16;
break;
case 2:
- result = V_038008_FMT_16_16;
+ result = FMT_16_16;
break;
case 3:
- // result = V_038008_FMT_16_16_16; /* fails piglit draw-vertices test */
+ // result = FMT_16_16_16; /* fails piglit draw-vertices test */
// break;
case 4:
- result = V_038008_FMT_16_16_16_16;
+ result = FMT_16_16_16_16;
break;
}
break;
case 32:
switch (desc->nr_channels) {
case 1:
- result = V_038008_FMT_32;
+ result = FMT_32;
break;
case 2:
- result = V_038008_FMT_32_32;
+ result = FMT_32_32;
break;
case 3:
- result = V_038008_FMT_32_32_32;
+ result = FMT_32_32_32;
break;
case 4:
- result = V_038008_FMT_32_32_32_32;
+ result = FMT_32_32_32_32;
break;
}
break;
diff --git a/src/gallium/drivers/r600/r600_texture.c b/src/gallium/drivers/r600/r600_texture.c
index c46bfa66ba1..4ebd5b754b3 100644
--- a/src/gallium/drivers/r600/r600_texture.c
+++ b/src/gallium/drivers/r600/r600_texture.c
@@ -35,6 +35,7 @@
#include "r600_resource.h"
#include "r600_state_inlines.h"
#include "r600d.h"
+#include "r600_formats.h"
extern struct u_resource_vtbl r600_texture_vtbl;
@@ -53,11 +54,30 @@ static void r600_copy_from_tiled_texture(struct pipe_context *ctx, struct r600_t
transfer->box.width, transfer->box.height);
}
-static unsigned long r600_texture_get_offset(struct r600_resource_texture *rtex,
+
+/* Copy from a detiled texture to a tiled one. */
+static void r600_copy_into_tiled_texture(struct pipe_context *ctx, struct r600_transfer *rtransfer)
+{
+ struct pipe_transfer *transfer = (struct pipe_transfer*)rtransfer;
+ struct pipe_resource *texture = transfer->resource;
+ struct pipe_subresource subsrc;
+
+ subsrc.face = 0;
+ subsrc.level = 0;
+ ctx->resource_copy_region(ctx, texture, transfer->sr,
+ transfer->box.x, transfer->box.y, transfer->box.z,
+ rtransfer->linear_texture, subsrc,
+ 0, 0, 0,
+ transfer->box.width, transfer->box.height);
+
+ ctx->flush(ctx, 0, NULL);
+}
+
+static unsigned r600_texture_get_offset(struct r600_resource_texture *rtex,
unsigned level, unsigned zslice,
unsigned face)
{
- unsigned long offset = rtex->offset[level];
+ unsigned offset = rtex->offset[level];
switch (rtex->resource.base.b.target) {
case PIPE_TEXTURE_3D:
@@ -72,22 +92,158 @@ static unsigned long r600_texture_get_offset(struct r600_resource_texture *rtex,
}
}
-static void r600_setup_miptree(struct r600_resource_texture *rtex, enum chip_class chipc)
+static unsigned r600_get_pixel_alignment(struct pipe_screen *screen,
+ enum pipe_format format,
+ unsigned array_mode)
+{
+ struct r600_screen* rscreen = (struct r600_screen *)screen;
+ unsigned pixsize = util_format_get_blocksize(format);
+ int p_align;
+
+ switch(array_mode) {
+ case V_038000_ARRAY_1D_TILED_THIN1:
+ p_align = MAX2(8,
+ ((rscreen->tiling_info->group_bytes / 8 / pixsize)));
+ break;
+ case V_038000_ARRAY_2D_TILED_THIN1:
+ p_align = MAX2(rscreen->tiling_info->num_banks,
+ (((rscreen->tiling_info->group_bytes / 8 / pixsize)) *
+ rscreen->tiling_info->num_banks));
+ break;
+ case 0:
+ default:
+ p_align = 64;
+ break;
+ }
+ return p_align;
+}
+
+static unsigned r600_get_height_alignment(struct pipe_screen *screen,
+ unsigned array_mode)
+{
+ struct r600_screen* rscreen = (struct r600_screen *)screen;
+ int h_align;
+
+ switch (array_mode) {
+ case V_038000_ARRAY_2D_TILED_THIN1:
+ h_align = rscreen->tiling_info->num_channels * 8;
+ break;
+ case V_038000_ARRAY_1D_TILED_THIN1:
+ h_align = 8;
+ break;
+ default:
+ h_align = 1;
+ break;
+ }
+ return h_align;
+}
+
+static unsigned mip_minify(unsigned size, unsigned level)
+{
+ unsigned val;
+ val = u_minify(size, level);
+ if (level > 0)
+ val = util_next_power_of_two(val);
+ return val;
+}
+
+static unsigned r600_texture_get_stride(struct pipe_screen *screen,
+ struct r600_resource_texture *rtex,
+ unsigned level)
+{
+ struct pipe_resource *ptex = &rtex->resource.base.b;
+ struct radeon *radeon = (struct radeon *)screen->winsys;
+ enum chip_class chipc = r600_get_family_class(radeon);
+ unsigned width, stride, tile_width;
+
+ if (rtex->pitch_override)
+ return rtex->pitch_override;
+
+ width = mip_minify(ptex->width0, level);
+ if (util_format_is_plain(ptex->format)) {
+ tile_width = r600_get_pixel_alignment(screen, ptex->format,
+ rtex->array_mode[level]);
+ width = align(width, tile_width);
+ }
+ stride = util_format_get_stride(ptex->format, width);
+ if (chipc == EVERGREEN)
+ stride = align(stride, 512);
+ return stride;
+}
+
+static unsigned r600_texture_get_nblocksy(struct pipe_screen *screen,
+ struct r600_resource_texture *rtex,
+ unsigned level)
{
struct pipe_resource *ptex = &rtex->resource.base.b;
- unsigned long w, h, pitch, size, layer_size, i, offset;
+ unsigned height, tile_height;
- rtex->bpt = util_format_get_blocksize(ptex->format);
- for (i = 0, offset = 0; i <= ptex->last_level; i++) {
- w = u_minify(ptex->width0, i);
- h = u_minify(ptex->height0, i);
- h = util_next_power_of_two(h);
- pitch = util_format_get_stride(ptex->format, align(w, 64));
- if (chipc == EVERGREEN)
- pitch = align(pitch, 512);
+ height = mip_minify(ptex->height0, level);
+ if (util_format_is_plain(ptex->format)) {
+ tile_height = r600_get_height_alignment(screen,
+ rtex->array_mode[level]);
+ height = align(height, tile_height);
+ }
+ return util_format_get_nblocksy(ptex->format, height);
+}
+
+/* Get a width in pixels from a stride in bytes. */
+static unsigned pitch_to_width(enum pipe_format format,
+ unsigned pitch_in_bytes)
+{
+ return (pitch_in_bytes / util_format_get_blocksize(format)) *
+ util_format_get_blockwidth(format);
+}
+
+static void r600_texture_set_array_mode(struct pipe_screen *screen,
+ struct r600_resource_texture *rtex,
+ unsigned level, unsigned array_mode)
+{
+ struct pipe_resource *ptex = &rtex->resource.base.b;
+
+ switch (array_mode) {
+ case V_0280A0_ARRAY_LINEAR_GENERAL:
+ case V_0280A0_ARRAY_LINEAR_ALIGNED:
+ case V_0280A0_ARRAY_1D_TILED_THIN1:
+ default:
+ rtex->array_mode[level] = array_mode;
+ break;
+ case V_0280A0_ARRAY_2D_TILED_THIN1:
+ {
+ unsigned w, h, tile_height, tile_width;
+
+ tile_height = r600_get_height_alignment(screen, array_mode);
+ tile_width = r600_get_pixel_alignment(screen, ptex->format, array_mode);
+
+ w = mip_minify(ptex->width0, level);
+ h = mip_minify(ptex->height0, level);
+ if (w < tile_width || h < tile_height)
+ rtex->array_mode[level] = V_0280A0_ARRAY_1D_TILED_THIN1;
else
- pitch = align(pitch, 256);
- layer_size = pitch * h;
+ rtex->array_mode[level] = array_mode;
+ }
+ break;
+ }
+}
+
+static void r600_setup_miptree(struct pipe_screen *screen,
+ struct r600_resource_texture *rtex,
+ unsigned array_mode)
+{
+ struct pipe_resource *ptex = &rtex->resource.base.b;
+ struct radeon *radeon = (struct radeon *)screen->winsys;
+ enum chip_class chipc = r600_get_family_class(radeon);
+ unsigned pitch, size, layer_size, i, offset;
+ unsigned nblocksy;
+
+ for (i = 0, offset = 0; i <= ptex->last_level; i++) {
+ r600_texture_set_array_mode(screen, rtex, i, array_mode);
+
+ pitch = r600_texture_get_stride(screen, rtex, i);
+ nblocksy = r600_texture_get_nblocksy(screen, rtex, i);
+
+ layer_size = pitch * nblocksy;
+
if (ptex->target == PIPE_TEXTURE_CUBE) {
if (chipc >= R700)
size = layer_size * 8;
@@ -98,41 +254,69 @@ static void r600_setup_miptree(struct r600_resource_texture *rtex, enum chip_cla
size = layer_size * u_minify(ptex->depth0, i);
rtex->offset[i] = offset;
rtex->layer_size[i] = layer_size;
- rtex->pitch[i] = pitch;
- rtex->width[i] = w;
- rtex->height[i] = h;
+ rtex->pitch_in_bytes[i] = pitch;
+ rtex->pitch_in_pixels[i] = pitch_to_width(ptex->format, pitch);
offset += size;
}
rtex->size = offset;
}
-struct pipe_resource *r600_texture_create(struct pipe_screen *screen,
- const struct pipe_resource *templ)
+static struct r600_resource_texture *
+r600_texture_create_object(struct pipe_screen *screen,
+ const struct pipe_resource *base,
+ unsigned array_mode,
+ unsigned pitch_in_bytes_override,
+ unsigned max_buffer_size,
+ struct r600_bo *bo)
{
struct r600_resource_texture *rtex;
struct r600_resource *resource;
struct radeon *radeon = (struct radeon *)screen->winsys;
rtex = CALLOC_STRUCT(r600_resource_texture);
- if (!rtex) {
+ if (rtex == NULL)
return NULL;
- }
+
resource = &rtex->resource;
- resource->base.b = *templ;
+ resource->base.b = *base;
resource->base.vtbl = &r600_texture_vtbl;
pipe_reference_init(&resource->base.b.reference, 1);
resource->base.b.screen = screen;
- r600_setup_miptree(rtex, r600_get_family_class(radeon));
-
- /* FIXME alignment 4096 enought ? too much ? */
+ resource->bo = bo;
resource->domain = r600_domain_from_usage(resource->base.b.bind);
+ rtex->pitch_override = pitch_in_bytes_override;
+
+ if (array_mode)
+ rtex->tiled = 1;
+ r600_setup_miptree(screen, rtex, array_mode);
+
resource->size = rtex->size;
- resource->bo = r600_bo(radeon, rtex->size, 4096, 0);
- if (resource->bo == NULL) {
- FREE(rtex);
- return NULL;
+
+ if (!resource->bo) {
+ resource->bo = r600_bo(radeon, rtex->size, 4096, 0);
+ if (!resource->bo) {
+ FREE(rtex);
+ return NULL;
+ }
+ }
+ return rtex;
+}
+
+struct pipe_resource *r600_texture_create(struct pipe_screen *screen,
+ const struct pipe_resource *templ)
+{
+ unsigned array_mode = 0;
+
+ if (debug_get_bool_option("R600_FORCE_TILING", FALSE)) {
+ if (!(templ->flags & R600_RESOURCE_FLAG_TRANSFER) &&
+ !(templ->bind & PIPE_BIND_SCANOUT)) {
+ array_mode = V_038000_ARRAY_2D_TILED_THIN1;
+ }
}
- return &resource->base.b;
+
+ return (struct pipe_resource *)r600_texture_create_object(screen, templ, array_mode,
+ 0, 0, NULL);
+
}
static void r600_texture_destroy(struct pipe_screen *screen,
@@ -157,24 +341,27 @@ static struct pipe_surface *r600_get_tex_surface(struct pipe_screen *screen,
unsigned zslice, unsigned flags)
{
struct r600_resource_texture *rtex = (struct r600_resource_texture*)texture;
- struct pipe_surface *surface = CALLOC_STRUCT(pipe_surface);
- unsigned long offset;
+ struct r600_surface *surface = CALLOC_STRUCT(r600_surface);
+ unsigned offset, tile_height;
if (surface == NULL)
return NULL;
offset = r600_texture_get_offset(rtex, level, zslice, face);
- pipe_reference_init(&surface->reference, 1);
- pipe_resource_reference(&surface->texture, texture);
- surface->format = texture->format;
- surface->width = u_minify(texture->width0, level);
- surface->height = u_minify(texture->height0, level);
- surface->offset = offset;
- surface->usage = flags;
- surface->zslice = zslice;
- surface->texture = texture;
- surface->face = face;
- surface->level = level;
- return surface;
+ pipe_reference_init(&surface->base.reference, 1);
+ pipe_resource_reference(&surface->base.texture, texture);
+ surface->base.format = texture->format;
+ surface->base.width = mip_minify(texture->width0, level);
+ surface->base.height = mip_minify(texture->height0, level);
+ surface->base.offset = offset;
+ surface->base.usage = flags;
+ surface->base.zslice = zslice;
+ surface->base.texture = texture;
+ surface->base.face = face;
+ surface->base.level = level;
+
+ tile_height = r600_get_height_alignment(screen, rtex->array_mode[level]);
+ surface->aligned_height = align(surface->base.height, tile_height);
+ return &surface->base;
}
static void r600_tex_surface_destroy(struct pipe_surface *surface)
@@ -183,46 +370,29 @@ static void r600_tex_surface_destroy(struct pipe_surface *surface)
FREE(surface);
}
+
struct pipe_resource *r600_texture_from_handle(struct pipe_screen *screen,
const struct pipe_resource *templ,
struct winsys_handle *whandle)
{
struct radeon *rw = (struct radeon*)screen->winsys;
- struct r600_resource_texture *rtex;
- struct r600_resource *resource;
struct r600_bo *bo = NULL;
+ unsigned array_mode = 0;
/* Support only 2D textures without mipmaps */
if ((templ->target != PIPE_TEXTURE_2D && templ->target != PIPE_TEXTURE_RECT) ||
templ->depth0 != 1 || templ->last_level != 0)
return NULL;
- rtex = CALLOC_STRUCT(r600_resource_texture);
- if (rtex == NULL)
- return NULL;
-
- bo = r600_bo_handle(rw, whandle->handle);
+ bo = r600_bo_handle(rw, whandle->handle, &array_mode);
if (bo == NULL) {
- FREE(rtex);
return NULL;
}
- resource = &rtex->resource;
- resource->base.b = *templ;
- resource->base.vtbl = &r600_texture_vtbl;
- pipe_reference_init(&resource->base.b.reference, 1);
- resource->base.b.screen = screen;
- resource->bo = bo;
- rtex->depth = 0;
- rtex->pitch_override = whandle->stride;
- rtex->bpt = util_format_get_blocksize(templ->format);
- rtex->pitch[0] = whandle->stride;
- rtex->width[0] = templ->width0;
- rtex->height[0] = templ->height0;
- rtex->offset[0] = 0;
- rtex->size = align(rtex->pitch[0] * templ->height0, 64);
-
- return &resource->base.b;
+ return (struct pipe_resource *)r600_texture_create_object(screen, templ, array_mode,
+ whandle->stride,
+ 0,
+ bo);
}
static unsigned int r600_texture_is_referenced(struct pipe_context *context,
@@ -248,14 +418,14 @@ int r600_texture_depth_flush(struct pipe_context *ctx,
resource.format = texture->format;
resource.width0 = texture->width0;
resource.height0 = texture->height0;
- resource.depth0 = 0;
+ resource.depth0 = 1;
resource.last_level = 0;
resource.nr_samples = 0;
resource.usage = PIPE_USAGE_DYNAMIC;
resource.bind = 0;
- resource.flags = 0;
+ resource.flags = R600_RESOURCE_FLAG_TRANSFER;
- resource.bind |= PIPE_BIND_RENDER_TARGET;
+ resource.bind |= PIPE_BIND_DEPTH_STENCIL;
rtex->flushed_depth_texture = (struct r600_resource_texture *)ctx->screen->resource_create(ctx->screen, &resource);
if (rtex->flushed_depth_texture == NULL) {
@@ -286,8 +456,6 @@ struct pipe_transfer* r600_texture_get_transfer(struct pipe_context *ctx,
trans->transfer.sr = sr;
trans->transfer.usage = usage;
trans->transfer.box = *box;
- trans->transfer.stride = rtex->pitch[sr.level];
- trans->offset = r600_texture_get_offset(rtex, sr.level, box->z, sr.face);
if (rtex->depth) {
r = r600_texture_depth_flush(ctx, texture);
if (r < 0) {
@@ -301,12 +469,12 @@ struct pipe_transfer* r600_texture_get_transfer(struct pipe_context *ctx,
resource.format = texture->format;
resource.width0 = box->width;
resource.height0 = box->height;
- resource.depth0 = 0;
+ resource.depth0 = 1;
resource.last_level = 0;
resource.nr_samples = 0;
resource.usage = PIPE_USAGE_DYNAMIC;
resource.bind = 0;
- resource.flags = 0;
+ resource.flags = R600_RESOURCE_FLAG_TRANSFER;
/* For texture reading, the temporary (detiled) texture is used as
* a render target when blitting from a tiled texture. */
if (usage & PIPE_TRANSFER_READ) {
@@ -325,6 +493,9 @@ struct pipe_transfer* r600_texture_get_transfer(struct pipe_context *ctx,
FREE(trans);
return NULL;
}
+
+ trans->transfer.stride =
+ ((struct r600_resource_texture *)trans->linear_texture)->pitch_in_bytes[0];
if (usage & PIPE_TRANSFER_READ) {
/* We cannot map a tiled texture directly because the data is
* in a different order, therefore we do detiling using a blit. */
@@ -332,7 +503,10 @@ struct pipe_transfer* r600_texture_get_transfer(struct pipe_context *ctx,
/* Always referenced in the blit. */
ctx->flush(ctx, 0, NULL);
}
+ return &trans->transfer;
}
+ trans->transfer.stride = rtex->pitch_in_bytes[sr.level];
+ trans->offset = r600_texture_get_offset(rtex, sr.level, box->z, sr.face);
return &trans->transfer;
}
@@ -343,12 +517,12 @@ void r600_texture_transfer_destroy(struct pipe_context *ctx,
struct r600_resource_texture *rtex = (struct r600_resource_texture*)transfer->resource;
if (rtransfer->linear_texture) {
+ if (transfer->usage & PIPE_TRANSFER_WRITE) {
+ r600_copy_into_tiled_texture(ctx, rtransfer);
+ }
pipe_resource_reference(&rtransfer->linear_texture, NULL);
}
if (rtex->flushed_depth_texture) {
- if (transfer->usage & PIPE_TRANSFER_WRITE) {
- // TODO
- }
pipe_resource_reference((struct pipe_resource **)&rtex->flushed_depth_texture, NULL);
}
pipe_resource_reference(&transfer->resource, NULL);
@@ -362,7 +536,7 @@ void* r600_texture_transfer_map(struct pipe_context *ctx,
struct r600_bo *bo;
enum pipe_format format = transfer->resource->format;
struct radeon *radeon = (struct radeon *)ctx->screen->winsys;
- unsigned long offset = 0;
+ unsigned offset = 0;
char *map;
if (rtransfer->linear_texture) {
@@ -500,15 +674,23 @@ uint32_t r600_translate_texformat(enum pipe_format format,
case UTIL_FORMAT_COLORSPACE_ZS:
switch (format) {
case PIPE_FORMAT_Z16_UNORM:
- result = V_0280A0_COLOR_16;
+ result = FMT_16;
goto out_word4;
+ case PIPE_FORMAT_X24S8_USCALED:
+ word4 |= S_038010_NUM_FORMAT_ALL(V_038010_SQ_NUM_FORMAT_INT);
case PIPE_FORMAT_Z24X8_UNORM:
case PIPE_FORMAT_Z24_UNORM_S8_USCALED:
- result = V_0280A0_COLOR_8_24;
+ result = FMT_8_24;
goto out_word4;
+ case PIPE_FORMAT_S8X24_USCALED:
+ word4 |= S_038010_NUM_FORMAT_ALL(V_038010_SQ_NUM_FORMAT_INT);
case PIPE_FORMAT_X8Z24_UNORM:
case PIPE_FORMAT_S8_USCALED_Z24_UNORM:
- result = V_0280A0_COLOR_24_8;
+ result = FMT_24_8;
+ goto out_word4;
+ case PIPE_FORMAT_S8_USCALED:
+ result = V_0280A0_COLOR_8;
+ word4 |= S_038010_NUM_FORMAT_ALL(V_038010_SQ_NUM_FORMAT_INT);
goto out_word4;
default:
goto out_unknown;
@@ -562,7 +744,7 @@ uint32_t r600_translate_texformat(enum pipe_format format,
if (desc->channel[0].size == 5 &&
desc->channel[1].size == 6 &&
desc->channel[2].size == 5) {
- result = V_0280A0_COLOR_5_6_5;
+ result = FMT_5_6_5;
goto out_word4;
}
goto out_unknown;
@@ -571,14 +753,14 @@ uint32_t r600_translate_texformat(enum pipe_format format,
desc->channel[1].size == 5 &&
desc->channel[2].size == 5 &&
desc->channel[3].size == 1) {
- result = V_0280A0_COLOR_1_5_5_5;
+ result = FMT_1_5_5_5;
goto out_word4;
}
if (desc->channel[0].size == 10 &&
desc->channel[1].size == 10 &&
desc->channel[2].size == 10 &&
desc->channel[3].size == 2) {
- result = V_0280A0_COLOR_10_10_10_2;
+ result = FMT_10_10_10_2;
goto out_word4;
}
goto out_unknown;
@@ -609,36 +791,36 @@ uint32_t r600_translate_texformat(enum pipe_format format,
case 4:
switch (desc->nr_channels) {
case 2:
- result = V_0280A0_COLOR_4_4;
+ result = FMT_4_4;
goto out_word4;
case 4:
- result = V_0280A0_COLOR_4_4_4_4;
+ result = FMT_4_4_4_4;
goto out_word4;
}
goto out_unknown;
case 8:
switch (desc->nr_channels) {
case 1:
- result = V_0280A0_COLOR_8;
+ result = FMT_8;
goto out_word4;
case 2:
- result = V_0280A0_COLOR_8_8;
+ result = FMT_8_8;
goto out_word4;
case 4:
- result = V_0280A0_COLOR_8_8_8_8;
+ result = FMT_8_8_8_8;
goto out_word4;
}
goto out_unknown;
case 16:
switch (desc->nr_channels) {
case 1:
- result = V_0280A0_COLOR_16;
+ result = FMT_16;
goto out_word4;
case 2:
- result = V_0280A0_COLOR_16_16;
+ result = FMT_16_16;
goto out_word4;
case 4:
- result = V_0280A0_COLOR_16_16_16_16;
+ result = FMT_16_16_16_16;
goto out_word4;
}
}
@@ -649,26 +831,26 @@ uint32_t r600_translate_texformat(enum pipe_format format,
case 16:
switch (desc->nr_channels) {
case 1:
- result = V_0280A0_COLOR_16_FLOAT;
+ result = FMT_16_FLOAT;
goto out_word4;
case 2:
- result = V_0280A0_COLOR_16_16_FLOAT;
+ result = FMT_16_16_FLOAT;
goto out_word4;
case 4:
- result = V_0280A0_COLOR_16_16_16_16_FLOAT;
+ result = FMT_16_16_16_16_FLOAT;
goto out_word4;
}
goto out_unknown;
case 32:
switch (desc->nr_channels) {
case 1:
- result = V_0280A0_COLOR_32_FLOAT;
+ result = FMT_32_FLOAT;
goto out_word4;
case 2:
- result = V_0280A0_COLOR_32_32_FLOAT;
+ result = FMT_32_32_FLOAT;
goto out_word4;
case 4:
- result = V_0280A0_COLOR_32_32_32_32_FLOAT;
+ result = FMT_32_32_32_32_FLOAT;
goto out_word4;
}
}
diff --git a/src/gallium/drivers/r600/r600_translate.c b/src/gallium/drivers/r600/r600_translate.c
new file mode 100644
index 00000000000..9a07cf2073f
--- /dev/null
+++ b/src/gallium/drivers/r600/r600_translate.c
@@ -0,0 +1,211 @@
+/*
+ * Copyright 2010 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Dave Airlie <[email protected]>
+ */
+#include "translate/translate_cache.h"
+#include "translate/translate.h"
+#include <pipebuffer/pb_buffer.h>
+#include <util/u_index_modify.h>
+#include "r600_pipe.h"
+
+void r600_begin_vertex_translate(struct r600_pipe_context *rctx)
+{
+ struct pipe_context *pipe = &rctx->context;
+ struct translate_key key = {0};
+ struct translate_element *te;
+ unsigned tr_elem_index[PIPE_MAX_ATTRIBS] = {0};
+ struct translate *tr;
+ struct r600_vertex_element *ve = rctx->vertex_elements;
+ boolean vb_translated[PIPE_MAX_ATTRIBS] = {0};
+ void *vb_map[PIPE_MAX_ATTRIBS] = {0}, *out_map;
+ struct pipe_transfer *vb_transfer[PIPE_MAX_ATTRIBS] = {0}, *out_transfer;
+ struct pipe_resource *out_buffer;
+ unsigned i, num_verts;
+
+ /* Initialize the translate key, i.e. the recipe how vertices should be
+ * translated. */
+ for (i = 0; i < ve->count; i++) {
+ struct pipe_vertex_buffer *vb =
+ &rctx->vertex_buffer[ve->elements[i].vertex_buffer_index];
+ enum pipe_format output_format = ve->hw_format[i];
+ unsigned output_format_size = ve->hw_format_size[i];
+
+ /* Check for support. */
+ if (ve->elements[i].src_format == ve->hw_format[i] &&
+ (vb->buffer_offset + ve->elements[i].src_offset) % 4 == 0 &&
+ vb->stride % 4 == 0) {
+ continue;
+ }
+
+ /* Workaround for translate: output floats instead of halfs. */
+ switch (output_format) {
+ case PIPE_FORMAT_R16_FLOAT:
+ output_format = PIPE_FORMAT_R32_FLOAT;
+ output_format_size = 4;
+ break;
+ case PIPE_FORMAT_R16G16_FLOAT:
+ output_format = PIPE_FORMAT_R32G32_FLOAT;
+ output_format_size = 8;
+ break;
+ case PIPE_FORMAT_R16G16B16_FLOAT:
+ output_format = PIPE_FORMAT_R32G32B32_FLOAT;
+ output_format_size = 12;
+ break;
+ case PIPE_FORMAT_R16G16B16A16_FLOAT:
+ output_format = PIPE_FORMAT_R32G32B32A32_FLOAT;
+ output_format_size = 16;
+ break;
+ default:;
+ }
+
+ /* Add this vertex element. */
+ te = &key.element[key.nr_elements];
+ /*te->type;
+ te->instance_divisor;*/
+ te->input_buffer = ve->elements[i].vertex_buffer_index;
+ te->input_format = ve->elements[i].src_format;
+ te->input_offset = vb->buffer_offset + ve->elements[i].src_offset;
+ te->output_format = output_format;
+ te->output_offset = key.output_stride;
+
+ key.output_stride += output_format_size;
+ vb_translated[ve->elements[i].vertex_buffer_index] = TRUE;
+ tr_elem_index[i] = key.nr_elements;
+ key.nr_elements++;
+ }
+
+ /* Get a translate object. */
+ tr = translate_cache_find(rctx->tran.translate_cache, &key);
+
+ /* Map buffers we want to translate. */
+ for (i = 0; i < rctx->nvertex_buffer; i++) {
+ if (vb_translated[i]) {
+ struct pipe_vertex_buffer *vb = &rctx->vertex_buffer[i];
+
+ vb_map[i] = pipe_buffer_map(pipe, vb->buffer,
+ PIPE_TRANSFER_READ, &vb_transfer[i]);
+
+ tr->set_buffer(tr, i, vb_map[i], vb->stride, vb->max_index);
+ }
+ }
+
+ /* Create and map the output buffer. */
+ num_verts = rctx->vb_max_index + 1;
+
+ out_buffer = pipe_buffer_create(&rctx->screen->screen,
+ PIPE_BIND_VERTEX_BUFFER,
+ key.output_stride * num_verts);
+
+ out_map = pipe_buffer_map(pipe, out_buffer, PIPE_TRANSFER_WRITE,
+ &out_transfer);
+
+ /* Translate. */
+ tr->run(tr, 0, num_verts, 0, out_map);
+
+ /* Unmap all buffers. */
+ for (i = 0; i < rctx->nvertex_buffer; i++) {
+ if (vb_translated[i]) {
+ pipe_buffer_unmap(pipe, rctx->vertex_buffer[i].buffer,
+ vb_transfer[i]);
+ }
+ }
+
+ pipe_buffer_unmap(pipe, out_buffer, out_transfer);
+
+ /* Setup the new vertex buffer in the first free slot. */
+ for (i = 0; i < PIPE_MAX_ATTRIBS; i++) {
+ struct pipe_vertex_buffer *vb = &rctx->vertex_buffer[i];
+
+ if (!vb->buffer) {
+ pipe_resource_reference(&vb->buffer, out_buffer);
+ vb->buffer_offset = 0;
+ vb->max_index = num_verts - 1;
+ vb->stride = key.output_stride;
+ rctx->tran.vb_slot = i;
+ break;
+ }
+ }
+
+ /* Save and replace vertex elements. */
+ {
+ struct pipe_vertex_element new_velems[PIPE_MAX_ATTRIBS];
+
+ rctx->tran.saved_velems = rctx->vertex_elements;
+
+ for (i = 0; i < ve->count; i++) {
+ if (vb_translated[ve->elements[i].vertex_buffer_index]) {
+ te = &key.element[tr_elem_index[i]];
+ new_velems[i].instance_divisor = ve->elements[i].instance_divisor;
+ new_velems[i].src_format = te->output_format;
+ new_velems[i].src_offset = te->output_offset;
+ new_velems[i].vertex_buffer_index = rctx->tran.vb_slot;
+ } else {
+ memcpy(&new_velems[i], &ve->elements[i],
+ sizeof(struct pipe_vertex_element));
+ }
+ }
+
+ rctx->tran.new_velems =
+ pipe->create_vertex_elements_state(pipe, ve->count, new_velems);
+ pipe->bind_vertex_elements_state(pipe, rctx->tran.new_velems);
+ }
+
+ pipe_resource_reference(&out_buffer, NULL);
+}
+
+void r600_end_vertex_translate(struct r600_pipe_context *rctx)
+{
+ struct pipe_context *pipe = &rctx->context;
+
+ /* Restore vertex elements. */
+ pipe->bind_vertex_elements_state(pipe, rctx->tran.saved_velems);
+ pipe->delete_vertex_elements_state(pipe, rctx->tran.new_velems);
+
+ /* Delete the now-unused VBO. */
+ pipe_resource_reference(&rctx->vertex_buffer[rctx->tran.vb_slot].buffer,
+ NULL);
+}
+
+void r600_translate_index_buffer(struct r600_pipe_context *r600,
+ struct pipe_resource **index_buffer,
+ unsigned *index_size,
+ unsigned *start, unsigned count)
+{
+ switch (*index_size) {
+ case 1:
+ util_shorten_ubyte_elts(&r600->context, index_buffer, 0, *start, count);
+ *index_size = 2;
+ *start = 0;
+ break;
+
+ case 2:
+ if (*start % 2 != 0) {
+ util_rebuild_ushort_elts(&r600->context, index_buffer, 0, *start, count);
+ *start = 0;
+ }
+ break;
+
+ case 4:
+ break;
+ }
+}
diff --git a/src/gallium/drivers/r600/r600d.h b/src/gallium/drivers/r600/r600d.h
index a96d2ce26c1..a3cb5b86004 100644
--- a/src/gallium/drivers/r600/r600d.h
+++ b/src/gallium/drivers/r600/r600d.h
@@ -667,6 +667,9 @@
#define S_02880C_Z_EXPORT_ENABLE(x) (((x) & 0x1) << 0)
#define G_02880C_Z_EXPORT_ENABLE(x) (((x) >> 0) & 0x1)
#define C_02880C_Z_EXPORT_ENABLE 0xFFFFFFFE
+#define S_02880C_STENCIL_REF_EXPORT_ENABLE(x) (((x) & 0x1) << 1)
+#define G_02880C_STENCIL_REF_EXPORT_ENABLE(x) (((x) >> 1) & 0x1)
+#define C_02880C_STENCIL_REF_EXPORT_ENABLE 0xFFFFFFFD
#define S_02880C_Z_ORDER(x) (((x) & 0x3) << 4)
#define G_02880C_Z_ORDER(x) (((x) >> 4) & 0x3)
#define C_02880C_Z_ORDER 0xFFFFFCFF
@@ -901,6 +904,10 @@
#define S_038000_TILE_MODE(x) (((x) & 0xF) << 3)
#define G_038000_TILE_MODE(x) (((x) >> 3) & 0xF)
#define C_038000_TILE_MODE 0xFFFFFF87
+#define V_038000_ARRAY_LINEAR_GENERAL 0x00000000
+#define V_038000_ARRAY_LINEAR_ALIGNED 0x00000001
+#define V_038000_ARRAY_1D_TILED_THIN1 0x00000002
+#define V_038000_ARRAY_2D_TILED_THIN1 0x00000004
#define S_038000_TILE_TYPE(x) (((x) & 0x1) << 7)
#define G_038000_TILE_TYPE(x) (((x) >> 7) & 0x1)
#define C_038000_TILE_TYPE 0xFFFFFF7F
@@ -1025,45 +1032,7 @@
#define S_038008_DATA_FORMAT(x) (((x) & 0x3F) << 20)
#define G_038008_DATA_FORMAT(x) (((x) >> 20) & 0x3F)
#define C_038008_DATA_FORMAT 0xFC0FFFFF
-#define V_038008_FMT_INVALID 0x00000000
-#define V_038008_FMT_8 0x00000001
-#define V_038008_FMT_4_4 0x00000002
-#define V_038008_FMT_3_3_2 0x00000003
-#define V_038008_FMT_16 0x00000005
-#define V_038008_FMT_16_FLOAT 0x00000006
-#define V_038008_FMT_8_8 0x00000007
-#define V_038008_FMT_5_6_5 0x00000008
-#define V_038008_FMT_6_5_5 0x00000009
-#define V_038008_FMT_1_5_5_5 0x0000000A
-#define V_038008_FMT_4_4_4_4 0x0000000B
-#define V_038008_FMT_5_5_5_1 0x0000000C
-#define V_038008_FMT_32 0x0000000D
-#define V_038008_FMT_32_FLOAT 0x0000000E
-#define V_038008_FMT_16_16 0x0000000F
-#define V_038008_FMT_16_16_FLOAT 0x00000010
-#define V_038008_FMT_8_24 0x00000011
-#define V_038008_FMT_8_24_FLOAT 0x00000012
-#define V_038008_FMT_24_8 0x00000013
-#define V_038008_FMT_24_8_FLOAT 0x00000014
-#define V_038008_FMT_10_11_11 0x00000015
-#define V_038008_FMT_10_11_11_FLOAT 0x00000016
-#define V_038008_FMT_11_11_10 0x00000017
-#define V_038008_FMT_11_11_10_FLOAT 0x00000018
-#define V_038008_FMT_2_10_10_10 0x00000019
-#define V_038008_FMT_8_8_8_8 0x0000001A
-#define V_038008_FMT_10_10_10_2 0x0000001B
-#define V_038008_FMT_X24_8_32_FLOAT 0x0000001C
-#define V_038008_FMT_32_32 0x0000001D
-#define V_038008_FMT_32_32_FLOAT 0x0000001E
-#define V_038008_FMT_16_16_16_16 0x0000001F
-#define V_038008_FMT_16_16_16_16_FLOAT 0x00000020
-#define V_038008_FMT_32_32_32_32 0x00000022
-#define V_038008_FMT_32_32_32_32_FLOAT 0x00000023
-#define V_038008_FMT_8_8_8 0x0000002c
-#define V_038008_FMT_16_16_16 0x0000002d
-#define V_038008_FMT_16_16_16_FLOAT 0x0000002e
-#define V_038008_FMT_32_32_32 0x0000002f
-#define V_038008_FMT_32_32_32_FLOAT 0x00000030
+
#define S_038008_NUM_FORMAT_ALL(x) (((x) & 0x3) << 26)
#define G_038008_NUM_FORMAT_ALL(x) (((x) >> 26) & 0x3)
#define C_038008_NUM_FORMAT_ALL 0xF3FFFFFF
diff --git a/src/gallium/drivers/softpipe/sp_fs_exec.c b/src/gallium/drivers/softpipe/sp_fs_exec.c
index 67e2c8f8bc4..346e1b402ba 100644
--- a/src/gallium/drivers/softpipe/sp_fs_exec.c
+++ b/src/gallium/drivers/softpipe/sp_fs_exec.c
@@ -158,9 +158,17 @@ exec_run( const struct sp_fragment_shader *base,
case TGSI_SEMANTIC_POSITION:
{
uint j;
- for (j = 0; j < 4; j++) {
+
+ for (j = 0; j < 4; j++)
quad->output.depth[j] = machine->Outputs[i].xyzw[2].f[j];
- }
+ }
+ break;
+ case TGSI_SEMANTIC_STENCIL:
+ {
+ uint j;
+
+ for (j = 0; j < 4; j++)
+ quad->output.stencil[j] = (unsigned)machine->Outputs[i].xyzw[1].f[j];
}
break;
}
diff --git a/src/gallium/drivers/softpipe/sp_fs_sse.c b/src/gallium/drivers/softpipe/sp_fs_sse.c
index daa158df7c4..5b18cd035e3 100644
--- a/src/gallium/drivers/softpipe/sp_fs_sse.c
+++ b/src/gallium/drivers/softpipe/sp_fs_sse.c
@@ -169,9 +169,15 @@ fs_sse_run( const struct sp_fragment_shader *base,
case TGSI_SEMANTIC_POSITION:
{
uint j;
- for (j = 0; j < 4; j++) {
- quad->output.depth[j] = machine->Outputs[0].xyzw[2].f[j];
- }
+ for (j = 0; j < 4; j++)
+ quad->output.depth[j] = machine->Outputs[i].xyzw[2].f[j];
+ }
+ break;
+ case TGSI_SEMANTIC_STENCIL:
+ {
+ uint j;
+ for (j = 0; j < 4; j++)
+ quad->output.stencil[j] = machine->Outputs[i].xyzw[1].f[j];
}
break;
}
diff --git a/src/gallium/drivers/softpipe/sp_quad.h b/src/gallium/drivers/softpipe/sp_quad.h
index a3236bd1169..e745aa80619 100644
--- a/src/gallium/drivers/softpipe/sp_quad.h
+++ b/src/gallium/drivers/softpipe/sp_quad.h
@@ -85,6 +85,7 @@ struct quad_header_output
/** colors in SOA format (rrrr, gggg, bbbb, aaaa) */
float color[PIPE_MAX_COLOR_BUFS][NUM_CHANNELS][QUAD_SIZE];
float depth[QUAD_SIZE];
+ uint8_t stencil[QUAD_SIZE];
};
diff --git a/src/gallium/drivers/softpipe/sp_quad_depth_test.c b/src/gallium/drivers/softpipe/sp_quad_depth_test.c
index e9b92626176..c8f5f89568a 100644
--- a/src/gallium/drivers/softpipe/sp_quad_depth_test.c
+++ b/src/gallium/drivers/softpipe/sp_quad_depth_test.c
@@ -47,6 +47,8 @@ struct depth_data {
unsigned bzzzz[QUAD_SIZE]; /**< Z values fetched from depth buffer */
unsigned qzzzz[QUAD_SIZE]; /**< Z values from the quad */
ubyte stencilVals[QUAD_SIZE];
+ boolean use_shader_stencil_refs;
+ ubyte shader_stencil_refs[QUAD_SIZE];
struct softpipe_cached_tile *tile;
};
@@ -186,6 +188,33 @@ convert_quad_depth( struct depth_data *data,
}
+/**
+ * Compute the depth_data::shader_stencil_refs[] values from the float fragment stencil values.
+ */
+static void
+convert_quad_stencil( struct depth_data *data,
+ const struct quad_header *quad )
+{
+ unsigned j;
+
+ data->use_shader_stencil_refs = TRUE;
+ /* Copy quads stencil values
+ */
+ switch (data->format) {
+ case PIPE_FORMAT_Z24X8_UNORM:
+ case PIPE_FORMAT_Z24_UNORM_S8_USCALED:
+ case PIPE_FORMAT_X8Z24_UNORM:
+ case PIPE_FORMAT_S8_USCALED_Z24_UNORM:
+ {
+ for (j = 0; j < QUAD_SIZE; j++) {
+ data->shader_stencil_refs[j] = ((unsigned)(quad->output.stencil[j]));
+ }
+ }
+ break;
+ default:
+ assert(0);
+ }
+}
/**
* Write data->bzzzz[] values and data->stencilVals into the Z/stencil buffer.
@@ -272,8 +301,14 @@ do_stencil_test(struct depth_data *data,
{
unsigned passMask = 0x0;
unsigned j;
+ ubyte refs[QUAD_SIZE];
- ref &= valMask;
+ for (j = 0; j < QUAD_SIZE; j++) {
+ if (data->use_shader_stencil_refs)
+ refs[j] = data->shader_stencil_refs[j] & valMask;
+ else
+ refs[j] = ref & valMask;
+ }
switch (func) {
case PIPE_FUNC_NEVER:
@@ -281,42 +316,42 @@ do_stencil_test(struct depth_data *data,
break;
case PIPE_FUNC_LESS:
for (j = 0; j < QUAD_SIZE; j++) {
- if (ref < (data->stencilVals[j] & valMask)) {
+ if (refs[j] < (data->stencilVals[j] & valMask)) {
passMask |= (1 << j);
}
}
break;
case PIPE_FUNC_EQUAL:
for (j = 0; j < QUAD_SIZE; j++) {
- if (ref == (data->stencilVals[j] & valMask)) {
+ if (refs[j] == (data->stencilVals[j] & valMask)) {
passMask |= (1 << j);
}
}
break;
case PIPE_FUNC_LEQUAL:
for (j = 0; j < QUAD_SIZE; j++) {
- if (ref <= (data->stencilVals[j] & valMask)) {
+ if (refs[j] <= (data->stencilVals[j] & valMask)) {
passMask |= (1 << j);
}
}
break;
case PIPE_FUNC_GREATER:
for (j = 0; j < QUAD_SIZE; j++) {
- if (ref > (data->stencilVals[j] & valMask)) {
+ if (refs[j] > (data->stencilVals[j] & valMask)) {
passMask |= (1 << j);
}
}
break;
case PIPE_FUNC_NOTEQUAL:
for (j = 0; j < QUAD_SIZE; j++) {
- if (ref != (data->stencilVals[j] & valMask)) {
+ if (refs[j] != (data->stencilVals[j] & valMask)) {
passMask |= (1 << j);
}
}
break;
case PIPE_FUNC_GEQUAL:
for (j = 0; j < QUAD_SIZE; j++) {
- if (ref >= (data->stencilVals[j] & valMask)) {
+ if (refs[j] >= (data->stencilVals[j] & valMask)) {
passMask |= (1 << j);
}
}
@@ -348,9 +383,14 @@ apply_stencil_op(struct depth_data *data,
{
unsigned j;
ubyte newstencil[QUAD_SIZE];
+ ubyte refs[QUAD_SIZE];
for (j = 0; j < QUAD_SIZE; j++) {
newstencil[j] = data->stencilVals[j];
+ if (data->use_shader_stencil_refs)
+ refs[j] = data->shader_stencil_refs[j];
+ else
+ refs[j] = ref;
}
switch (op) {
@@ -367,7 +407,7 @@ apply_stencil_op(struct depth_data *data,
case PIPE_STENCIL_OP_REPLACE:
for (j = 0; j < QUAD_SIZE; j++) {
if (mask & (1 << j)) {
- newstencil[j] = ref;
+ newstencil[j] = refs[j];
}
}
break;
@@ -688,8 +728,10 @@ depth_test_quads_fallback(struct quad_stage *qs,
unsigned i, pass = 0;
const struct sp_fragment_shader *fs = qs->softpipe->fs;
boolean interp_depth = !fs->info.writes_z;
+ boolean shader_stencil_ref = fs->info.writes_stencil;
struct depth_data data;
+ data.use_shader_stencil_refs = FALSE;
if (qs->softpipe->depth_stencil->alpha.enabled) {
nr = alpha_test_quads(qs, quads, nr);
@@ -716,6 +758,9 @@ depth_test_quads_fallback(struct quad_stage *qs,
}
if (qs->softpipe->depth_stencil->stencil[0].enabled) {
+ if (shader_stencil_ref)
+ convert_quad_stencil(&data, quads[i]);
+
depth_stencil_test_quad(qs, &data, quads[i]);
write_depth_stencil_values(&data, quads[i]);
}
diff --git a/src/gallium/drivers/softpipe/sp_quad_pipe.c b/src/gallium/drivers/softpipe/sp_quad_pipe.c
index 43b8e88e334..2cfd02a22c6 100644
--- a/src/gallium/drivers/softpipe/sp_quad_pipe.c
+++ b/src/gallium/drivers/softpipe/sp_quad_pipe.c
@@ -47,7 +47,8 @@ sp_build_quad_pipeline(struct softpipe_context *sp)
sp->framebuffer.zsbuf &&
!sp->depth_stencil->alpha.enabled &&
!sp->fs->info.uses_kill &&
- !sp->fs->info.writes_z;
+ !sp->fs->info.writes_z &&
+ !sp->fs->info.writes_stencil;
sp->quad.first = sp->quad.blend;
diff --git a/src/gallium/drivers/softpipe/sp_screen.c b/src/gallium/drivers/softpipe/sp_screen.c
index 25a0a622179..edc2a6dacf2 100644
--- a/src/gallium/drivers/softpipe/sp_screen.c
+++ b/src/gallium/drivers/softpipe/sp_screen.c
@@ -113,8 +113,12 @@ softpipe_get_param(struct pipe_screen *screen, enum pipe_cap param)
return 1;
case PIPE_CAP_STREAM_OUTPUT:
return 1;
+ case PIPE_CAP_PRIMITIVE_RESTART:
+ return 1;
case PIPE_CAP_DEPTHSTENCIL_CLEAR_SEPARATE:
return 0;
+ case PIPE_CAP_SHADER_STENCIL_EXPORT:
+ return 1;
default:
return 0;
}
@@ -206,13 +210,6 @@ softpipe_is_format_supported( struct pipe_screen *screen,
if (format_desc->block.width != 1 ||
format_desc->block.height != 1)
return FALSE;
-
- /*
- * TODO: Unfortunately we cannot render into anything more than 32 bits
- * because we encode color clear values into a 32bit word.
- */
- if (format_desc->block.bits > 32)
- return FALSE;
}
if (bind & PIPE_BIND_DEPTH_STENCIL) {
diff --git a/src/gallium/drivers/softpipe/sp_tex_sample.c b/src/gallium/drivers/softpipe/sp_tex_sample.c
index 088e48f81fe..2eac4c7a82b 100644
--- a/src/gallium/drivers/softpipe/sp_tex_sample.c
+++ b/src/gallium/drivers/softpipe/sp_tex_sample.c
@@ -44,6 +44,9 @@
#include "sp_tex_tile_cache.h"
+/** Set to one to help debug texture sampling */
+#define DEBUG_TEX 0
+
/*
* Return fractional part of 'f'. Used for computing interpolation weights.
@@ -774,6 +777,18 @@ pot_level_size(unsigned base_pot, unsigned level)
}
+static void
+print_sample(const char *function, float rgba[NUM_CHANNELS][QUAD_SIZE])
+{
+ debug_printf("%s %g %g %g %g, %g %g %g %g, %g %g %g %g, %g %g %g %g\n",
+ function,
+ rgba[0][0], rgba[1][0], rgba[2][0], rgba[3][0],
+ rgba[0][1], rgba[1][1], rgba[2][1], rgba[3][1],
+ rgba[0][2], rgba[1][2], rgba[2][2], rgba[3][2],
+ rgba[0][3], rgba[1][3], rgba[2][3], rgba[3][3]);
+}
+
+
/* Some image-filter fastpaths:
*/
static INLINE void
@@ -832,6 +847,10 @@ img_filter_2d_linear_repeat_POT(struct tgsi_sampler *tgsi_sampler,
tx[2][c], tx[3][c]);
}
}
+
+ if (DEBUG_TEX) {
+ print_sample(__FUNCTION__, rgba);
+ }
}
@@ -872,6 +891,10 @@ img_filter_2d_nearest_repeat_POT(struct tgsi_sampler *tgsi_sampler,
rgba[c][j] = out[c];
}
}
+
+ if (DEBUG_TEX) {
+ print_sample(__FUNCTION__, rgba);
+ }
}
@@ -921,6 +944,10 @@ img_filter_2d_nearest_clamp_POT(struct tgsi_sampler *tgsi_sampler,
rgba[c][j] = out[c];
}
}
+
+ if (DEBUG_TEX) {
+ print_sample(__FUNCTION__, rgba);
+ }
}
@@ -957,6 +984,10 @@ img_filter_1d_nearest(struct tgsi_sampler *tgsi_sampler,
rgba[c][j] = out[c];
}
}
+
+ if (DEBUG_TEX) {
+ print_sample(__FUNCTION__, rgba);
+ }
}
@@ -997,6 +1028,10 @@ img_filter_2d_nearest(struct tgsi_sampler *tgsi_sampler,
rgba[c][j] = out[c];
}
}
+
+ if (DEBUG_TEX) {
+ print_sample(__FUNCTION__, rgba);
+ }
}
@@ -1045,6 +1080,10 @@ img_filter_cube_nearest(struct tgsi_sampler *tgsi_sampler,
rgba[c][j] = out[c];
}
}
+
+ if (DEBUG_TEX) {
+ print_sample(__FUNCTION__, rgba);
+ }
}
@@ -1357,6 +1396,10 @@ mip_filter_linear(struct tgsi_sampler *tgsi_sampler,
}
}
}
+
+ if (DEBUG_TEX) {
+ print_sample(__FUNCTION__, rgba);
+ }
}
@@ -1402,13 +1445,9 @@ mip_filter_nearest(struct tgsi_sampler *tgsi_sampler,
samp->min_img_filter(tgsi_sampler, s, t, p, NULL, tgsi_sampler_lod_bias, rgba);
}
-#if 0
- printf("RGBA %g %g %g %g, %g %g %g %g, %g %g %g %g, %g %g %g %g\n",
- rgba[0][0], rgba[1][0], rgba[2][0], rgba[3][0],
- rgba[0][1], rgba[1][1], rgba[2][1], rgba[3][1],
- rgba[0][2], rgba[1][2], rgba[2][2], rgba[3][2],
- rgba[0][3], rgba[1][3], rgba[2][3], rgba[3][3]);
-#endif
+ if (DEBUG_TEX) {
+ print_sample(__FUNCTION__, rgba);
+ }
}
@@ -1510,6 +1549,10 @@ mip_filter_linear_2d_linear_repeat_POT(
}
}
}
+
+ if (DEBUG_TEX) {
+ print_sample(__FUNCTION__, rgba);
+ }
}
diff --git a/src/gallium/drivers/softpipe/sp_tile_cache.h b/src/gallium/drivers/softpipe/sp_tile_cache.h
index 031c7c1ea5c..4151a47c323 100644
--- a/src/gallium/drivers/softpipe/sp_tile_cache.h
+++ b/src/gallium/drivers/softpipe/sp_tile_cache.h
@@ -86,7 +86,7 @@ struct softpipe_tile_cache
struct softpipe_cached_tile *entries[NUM_ENTRIES];
uint clear_flags[(MAX_WIDTH / TILE_SIZE) * (MAX_HEIGHT / TILE_SIZE) / 32];
float clear_color[4]; /**< for color bufs */
- uint clear_val; /**< for z+stencil, or packed color clear value */
+ uint clear_val; /**< for z+stencil */
boolean depth_stencil; /**< Is the surface a depth/stencil format? */
struct softpipe_cached_tile *tile; /**< scratch tile for clears */