summaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
authorEric Anholt <[email protected]>2011-04-14 19:36:28 -0700
committerEric Anholt <[email protected]>2011-04-17 10:26:09 -0700
commit59c6b775a6aacfe03c84dae62c2fd45d4af9d70b (patch)
tree5391952408a854713bd069653e071061affd6aca /src
parent14eedf3028422e98ac05713a3e3c37e6c9b4cf37 (diff)
i965/fs: Add gen6 register spilling support.
Most of this is code movement to get the scratch space allocated in a shared location. Other than that, the only real changes are that the old oword block messages now operate on oword-aligned areas (with new messages for unaligned access, which we don't do), and that the caching control is in the SFID part of the descriptor instead of message control. Fixes glsl-fs-convolution-1.
Diffstat (limited to 'src')
-rw-r--r--src/mesa/drivers/dri/i965/brw_eu_emit.c45
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp2
-rw-r--r--src/mesa/drivers/dri/i965/brw_wm.c15
-rw-r--r--src/mesa/drivers/dri/i965/brw_wm_state.c20
-rw-r--r--src/mesa/drivers/dri/i965/gen6_wm_state.c7
5 files changed, 58 insertions, 31 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_eu_emit.c b/src/mesa/drivers/dri/i965/brw_eu_emit.c
index 57313a59c08..2d654e71432 100644
--- a/src/mesa/drivers/dri/i965/brw_eu_emit.c
+++ b/src/mesa/drivers/dri/i965/brw_eu_emit.c
@@ -496,6 +496,8 @@ static void brw_set_dp_write_message( struct brw_context *brw,
insn->bits3.dp_render_cache.response_length = response_length;
insn->bits3.dp_render_cache.msg_length = msg_length;
insn->bits3.dp_render_cache.end_of_thread = end_of_thread;
+
+ /* We always use the render cache for write messages */
insn->header.destreg__conditionalmod = BRW_MESSAGE_TARGET_DATAPORT_WRITE;
/* XXX really need below? */
insn->bits2.send_gen5.sfid = BRW_MESSAGE_TARGET_DATAPORT_WRITE;
@@ -539,6 +541,13 @@ brw_set_dp_read_message(struct brw_context *brw,
brw_set_src1(insn, brw_imm_d(0));
if (intel->gen >= 6) {
+ uint32_t target_function;
+
+ if (target_cache == BRW_DATAPORT_READ_TARGET_DATA_CACHE)
+ target_function = BRW_MESSAGE_TARGET_DATAPORT_READ; /* data cache */
+ else
+ target_function = BRW_MESSAGE_TARGET_DATAPORT_WRITE; /* render cache */
+
insn->bits3.dp_render_cache.binding_table_index = binding_table_index;
insn->bits3.dp_render_cache.msg_control = msg_control;
insn->bits3.dp_render_cache.pixel_scoreboard_clear = 0;
@@ -548,9 +557,9 @@ brw_set_dp_read_message(struct brw_context *brw,
insn->bits3.dp_render_cache.response_length = response_length;
insn->bits3.dp_render_cache.msg_length = msg_length;
insn->bits3.dp_render_cache.end_of_thread = 0;
- insn->header.destreg__conditionalmod = BRW_MESSAGE_TARGET_DATAPORT_READ;
+ insn->header.destreg__conditionalmod = target_function;
/* XXX really need below? */
- insn->bits2.send_gen5.sfid = BRW_MESSAGE_TARGET_DATAPORT_READ;
+ insn->bits2.send_gen5.sfid = target_function;
insn->bits2.send_gen5.end_of_thread = 0;
} else if (intel->gen == 5) {
insn->bits3.dp_read_gen5.binding_table_index = binding_table_index;
@@ -1486,9 +1495,12 @@ void brw_oword_block_write_scratch(struct brw_compile *p,
GLuint offset)
{
struct intel_context *intel = &p->brw->intel;
- uint32_t msg_control;
+ uint32_t msg_control, msg_type;
int mlen;
+ if (intel->gen >= 6)
+ offset /= 16;
+
mrf = retype(mrf, BRW_REGISTER_TYPE_UD);
if (num_regs == 1) {
@@ -1554,13 +1566,22 @@ void brw_oword_block_write_scratch(struct brw_compile *p,
}
brw_set_dest(p, insn, dest);
- brw_set_src0(insn, brw_null_reg());
+ if (intel->gen >= 6) {
+ brw_set_src0(insn, mrf);
+ } else {
+ brw_set_src0(insn, brw_null_reg());
+ }
+
+ if (intel->gen >= 6)
+ msg_type = GEN6_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE;
+ else
+ msg_type = BRW_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE;
brw_set_dp_write_message(p->brw,
insn,
255, /* binding table index (255=stateless) */
msg_control,
- BRW_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE, /* msg_type */
+ msg_type,
mlen,
GL_TRUE, /* header_present */
0, /* pixel scoreboard */
@@ -1585,9 +1606,13 @@ brw_oword_block_read_scratch(struct brw_compile *p,
int num_regs,
GLuint offset)
{
+ struct intel_context *intel = &p->brw->intel;
uint32_t msg_control;
int rlen;
+ if (intel->gen >= 6)
+ offset /= 16;
+
mrf = retype(mrf, BRW_REGISTER_TYPE_UD);
dest = retype(dest, BRW_REGISTER_TYPE_UW);
@@ -1624,14 +1649,18 @@ brw_oword_block_read_scratch(struct brw_compile *p,
insn->header.destreg__conditionalmod = mrf.nr;
brw_set_dest(p, insn, dest); /* UW? */
- brw_set_src0(insn, brw_null_reg());
+ if (intel->gen >= 6) {
+ brw_set_src0(insn, mrf);
+ } else {
+ brw_set_src0(insn, brw_null_reg());
+ }
brw_set_dp_read_message(p->brw,
insn,
255, /* binding table index (255=stateless) */
msg_control,
BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, /* msg_type */
- 1, /* target cache (render/scratch) */
+ BRW_DATAPORT_READ_TARGET_RENDER_CACHE,
1, /* msg_length */
rlen);
}
@@ -1839,7 +1868,7 @@ void brw_dp_READ_4_vs_relative(struct brw_compile *p,
bind_table_index,
BRW_DATAPORT_OWORD_DUAL_BLOCK_1OWORD,
msg_type,
- 0, /* source cache = data cache */
+ BRW_DATAPORT_READ_TARGET_DATA_CACHE,
2, /* msg_length */
1); /* response_length */
}
diff --git a/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp b/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp
index 479a91436a7..67f29ce1816 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp
@@ -228,8 +228,6 @@ fs_visitor::assign_regs()
if (reg == -1) {
fail("no register to spill\n");
- } else if (intel->gen >= 6) {
- fail("no spilling support on gen6 yet\n");
} else {
spill_reg(reg);
}
diff --git a/src/mesa/drivers/dri/i965/brw_wm.c b/src/mesa/drivers/dri/i965/brw_wm.c
index 2dd28fd1c58..ab731a807a7 100644
--- a/src/mesa/drivers/dri/i965/brw_wm.c
+++ b/src/mesa/drivers/dri/i965/brw_wm.c
@@ -185,6 +185,7 @@ static void do_wm_prog( struct brw_context *brw,
struct brw_fragment_program *fp,
struct brw_wm_prog_key *key)
{
+ struct intel_context *intel = &brw->intel;
struct brw_wm_compile *c;
const GLuint *program;
GLuint program_size;
@@ -238,12 +239,26 @@ static void do_wm_prog( struct brw_context *brw,
/* Scratch space is used for register spilling */
if (c->last_scratch) {
+ uint32_t total_scratch;
+
/* Per-thread scratch space is power-of-two sized. */
for (c->prog_data.total_scratch = 1024;
c->prog_data.total_scratch <= c->last_scratch;
c->prog_data.total_scratch *= 2) {
/* empty */
}
+ total_scratch = c->prog_data.total_scratch * brw->wm_max_threads;
+
+ if (brw->wm.scratch_bo && total_scratch > brw->wm.scratch_bo->size) {
+ drm_intel_bo_unreference(brw->wm.scratch_bo);
+ brw->wm.scratch_bo = NULL;
+ }
+ if (brw->wm.scratch_bo == NULL) {
+ brw->wm.scratch_bo = drm_intel_bo_alloc(intel->bufmgr,
+ "wm scratch",
+ total_scratch,
+ 4096);
+ }
}
else {
c->prog_data.total_scratch = 0;
diff --git a/src/mesa/drivers/dri/i965/brw_wm_state.c b/src/mesa/drivers/dri/i965/brw_wm_state.c
index 5b5afc4626b..be4b260a5ff 100644
--- a/src/mesa/drivers/dri/i965/brw_wm_state.c
+++ b/src/mesa/drivers/dri/i965/brw_wm_state.c
@@ -278,30 +278,10 @@ wm_unit_create_from_key(struct brw_context *brw, struct brw_wm_unit_key *key,
static void upload_wm_unit( struct brw_context *brw )
{
- struct intel_context *intel = &brw->intel;
struct brw_wm_unit_key key;
drm_intel_bo *reloc_bufs[3];
wm_unit_populate_key(brw, &key);
- /* Allocate the necessary scratch space if we haven't already. Don't
- * bother reducing the allocation later, since we use scratch so
- * rarely.
- */
- if (key.total_scratch) {
- GLuint total = key.total_scratch * brw->wm_max_threads;
-
- if (brw->wm.scratch_bo && total > brw->wm.scratch_bo->size) {
- drm_intel_bo_unreference(brw->wm.scratch_bo);
- brw->wm.scratch_bo = NULL;
- }
- if (brw->wm.scratch_bo == NULL) {
- brw->wm.scratch_bo = drm_intel_bo_alloc(intel->bufmgr,
- "wm scratch",
- total,
- 4096);
- }
- }
-
reloc_bufs[0] = brw->wm.prog_bo;
reloc_bufs[1] = brw->wm.scratch_bo;
reloc_bufs[2] = brw->wm.sampler_bo;
diff --git a/src/mesa/drivers/dri/i965/gen6_wm_state.c b/src/mesa/drivers/dri/i965/gen6_wm_state.c
index f4f04750aeb..8215cb15a9c 100644
--- a/src/mesa/drivers/dri/i965/gen6_wm_state.c
+++ b/src/mesa/drivers/dri/i965/gen6_wm_state.c
@@ -184,7 +184,12 @@ upload_wm_state(struct brw_context *brw)
OUT_BATCH(_3DSTATE_WM << 16 | (9 - 2));
OUT_RELOC(brw->wm.prog_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
OUT_BATCH(dw2);
- OUT_BATCH(0); /* scratch space base offset */
+ if (brw->wm.prog_data->total_scratch) {
+ OUT_RELOC(brw->wm.scratch_bo, I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
+ ffs(brw->wm.prog_data->total_scratch) - 11);
+ } else {
+ OUT_BATCH(0);
+ }
OUT_BATCH(dw4);
OUT_BATCH(dw5);
OUT_BATCH(dw6);