summaryrefslogtreecommitdiffstats
path: root/src/mesa
diff options
context:
space:
mode:
authorAbdiel Janulgue <[email protected]>2015-04-15 13:04:45 +0300
committerAbdiel Janulgue <[email protected]>2015-07-18 16:16:54 +0300
commit190756482e62cb57e2bc8c798181e5f0171726fb (patch)
treed7c8cb2ea1e5740f4c527da792c94cb141a0b290 /src/mesa
parent090529af1828817344e0850ef27eebd1f096eb5f (diff)
i965: Enable hardware-generated binding tables on render path.
This patch implements the binding table enable command which is also used to allocate a binding table pool where where hardware-generated binding table entries are flushed into. Each binding table offset in the binding table pool is unique per each shader stage that are enabled within a batch. Also insert the required brw_tracked_state objects to enable hw-generated binding tables in normal render path. v2: - Use MOCS in binding table pool alloc for GEN8 - Fix spurious offset when allocating binding table pool entry and start from zero instead. v3: - Include GEN8 fix for spurious offset above. v4: - Fixup wrong packet length in enable/disable hw-binding table for GEN8 (Ville). - Don't invoke HW-binding table disable command when we dont have resource streamer (Chris). v5: - Reorder the state cache invalidate flush so it happens in-between enabling hw-generated binding tables and the previous sw-binding table GPU state (Chris). v6: - Do the same fix in v5 for gen7_disable_hw_binding_tables(). - Adhere to coding guidelines and make comments more informative. Cc: [email protected] Cc: [email protected] Cc: [email protected] Reviewed-by: Kenneth Graunke <[email protected]> Signed-off-by: Abdiel Janulgue <[email protected]>
Diffstat (limited to 'src/mesa')
-rw-r--r--src/mesa/drivers/dri/i965/brw_binding_tables.c100
-rw-r--r--src/mesa/drivers/dri/i965/brw_context.c4
-rw-r--r--src/mesa/drivers/dri/i965/brw_context.h6
-rw-r--r--src/mesa/drivers/dri/i965/brw_state.h6
-rw-r--r--src/mesa/drivers/dri/i965/brw_state_upload.c4
-rw-r--r--src/mesa/drivers/dri/i965/gen7_disable.c4
-rw-r--r--src/mesa/drivers/dri/i965/gen8_disable.c4
-rw-r--r--src/mesa/drivers/dri/i965/intel_batchbuffer.c4
8 files changed, 128 insertions, 4 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_binding_tables.c b/src/mesa/drivers/dri/i965/brw_binding_tables.c
index 98ff0ddcd58..6769f0cd1ab 100644
--- a/src/mesa/drivers/dri/i965/brw_binding_tables.c
+++ b/src/mesa/drivers/dri/i965/brw_binding_tables.c
@@ -170,6 +170,106 @@ const struct brw_tracked_state brw_gs_binding_table = {
.emit = brw_gs_upload_binding_table,
};
+/**
+ * Disable hardware binding table support, falling back to the
+ * older software-generated binding table mechanism.
+ */
+void
+gen7_disable_hw_binding_tables(struct brw_context *brw)
+{
+ if (!brw->use_resource_streamer)
+ return;
+ /* From the Haswell PRM, Volume 7: 3D Media GPGPU,
+ * 3DSTATE_BINDING_TABLE_POOL_ALLOC > Programming Note:
+ *
+ * "When switching between HW and SW binding table generation, SW must
+ * issue a state cache invalidate."
+ */
+ brw_emit_pipe_control_flush(brw, PIPE_CONTROL_STATE_CACHE_INVALIDATE);
+
+ int pkt_len = brw->gen >= 8 ? 4 : 3;
+
+ BEGIN_BATCH(pkt_len);
+ OUT_BATCH(_3DSTATE_BINDING_TABLE_POOL_ALLOC << 16 | (pkt_len - 2));
+ if (brw->gen >= 8) {
+ OUT_BATCH(0);
+ OUT_BATCH(0);
+ OUT_BATCH(0);
+ } else {
+ OUT_BATCH(HSW_BT_POOL_ALLOC_MUST_BE_ONE);
+ OUT_BATCH(0);
+ }
+ ADVANCE_BATCH();
+}
+
+/**
+ * Enable hardware binding tables and set up the binding table pool.
+ */
+void
+gen7_enable_hw_binding_tables(struct brw_context *brw)
+{
+ if (!brw->use_resource_streamer)
+ return;
+
+ if (!brw->hw_bt_pool.bo) {
+ /* We use a single re-usable buffer object for the lifetime of the
+ * context and size it to maximum allowed binding tables that can be
+ * programmed per batch:
+ *
+ * From the Haswell PRM, Volume 7: 3D Media GPGPU,
+ * 3DSTATE_BINDING_TABLE_POOL_ALLOC > Programming Note:
+ * "A maximum of 16,383 Binding tables are allowed in any batch buffer"
+ */
+ static const int max_size = 16383 * 4;
+ brw->hw_bt_pool.bo = drm_intel_bo_alloc(brw->bufmgr, "hw_bt",
+ max_size, 64);
+ brw->hw_bt_pool.next_offset = 0;
+ }
+
+ /* From the Haswell PRM, Volume 7: 3D Media GPGPU,
+ * 3DSTATE_BINDING_TABLE_POOL_ALLOC > Programming Note:
+ *
+ * "When switching between HW and SW binding table generation, SW must
+ * issue a state cache invalidate."
+ */
+ brw_emit_pipe_control_flush(brw, PIPE_CONTROL_STATE_CACHE_INVALIDATE);
+
+ int pkt_len = brw->gen >= 8 ? 4 : 3;
+ uint32_t dw1 = BRW_HW_BINDING_TABLE_ENABLE;
+ if (brw->is_haswell) {
+ dw1 |= SET_FIELD(GEN7_MOCS_L3, GEN7_HW_BT_POOL_MOCS) |
+ HSW_BT_POOL_ALLOC_MUST_BE_ONE;
+ } else if (brw->gen >= 8) {
+ dw1 |= BDW_MOCS_WB;
+ }
+
+ BEGIN_BATCH(pkt_len);
+ OUT_BATCH(_3DSTATE_BINDING_TABLE_POOL_ALLOC << 16 | (pkt_len - 2));
+ if (brw->gen >= 8) {
+ OUT_RELOC64(brw->hw_bt_pool.bo, I915_GEM_DOMAIN_SAMPLER, 0, dw1);
+ OUT_BATCH(brw->hw_bt_pool.bo->size);
+ } else {
+ OUT_RELOC(brw->hw_bt_pool.bo, I915_GEM_DOMAIN_SAMPLER, 0, dw1);
+ OUT_RELOC(brw->hw_bt_pool.bo, I915_GEM_DOMAIN_SAMPLER, 0,
+ brw->hw_bt_pool.bo->size);
+ }
+ ADVANCE_BATCH();
+}
+
+void
+gen7_reset_hw_bt_pool_offsets(struct brw_context *brw)
+{
+ brw->hw_bt_pool.next_offset = 0;
+}
+
+const struct brw_tracked_state gen7_hw_binding_tables = {
+ .dirty = {
+ .mesa = 0,
+ .brw = BRW_NEW_BATCH,
+ },
+ .emit = gen7_enable_hw_binding_tables
+};
+
/** @} */
/**
diff --git a/src/mesa/drivers/dri/i965/brw_context.c b/src/mesa/drivers/dri/i965/brw_context.c
index 05cb53b3711..efcd91aad84 100644
--- a/src/mesa/drivers/dri/i965/brw_context.c
+++ b/src/mesa/drivers/dri/i965/brw_context.c
@@ -941,6 +941,10 @@ intelDestroyContext(__DRIcontext * driContextPriv)
if (brw->wm.base.scratch_bo)
drm_intel_bo_unreference(brw->wm.base.scratch_bo);
+ gen7_reset_hw_bt_pool_offsets(brw);
+ drm_intel_bo_unreference(brw->hw_bt_pool.bo);
+ brw->hw_bt_pool.bo = NULL;
+
drm_intel_gem_context_destroy(brw->hw_ctx);
if (ctx->swrast_context) {
diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h
index a9f1f61b268..8bbeb34075c 100644
--- a/src/mesa/drivers/dri/i965/brw_context.h
+++ b/src/mesa/drivers/dri/i965/brw_context.h
@@ -1398,6 +1398,12 @@ struct brw_context
struct brw_cs_prog_data *prog_data;
} cs;
+ /* RS hardware binding table */
+ struct {
+ drm_intel_bo *bo;
+ uint32_t next_offset;
+ } hw_bt_pool;
+
struct {
uint32_t state_offset;
uint32_t blend_state_offset;
diff --git a/src/mesa/drivers/dri/i965/brw_state.h b/src/mesa/drivers/dri/i965/brw_state.h
index 987672f8815..f8ef98f2db9 100644
--- a/src/mesa/drivers/dri/i965/brw_state.h
+++ b/src/mesa/drivers/dri/i965/brw_state.h
@@ -132,6 +132,7 @@ extern const struct brw_tracked_state gen7_sol_state;
extern const struct brw_tracked_state gen7_urb;
extern const struct brw_tracked_state gen7_vs_state;
extern const struct brw_tracked_state gen7_wm_state;
+extern const struct brw_tracked_state gen7_hw_binding_tables;
extern const struct brw_tracked_state haswell_cut_index;
extern const struct brw_tracked_state gen8_blend_state;
extern const struct brw_tracked_state gen8_disable_stages;
@@ -372,6 +373,11 @@ gen7_upload_constant_state(struct brw_context *brw,
const struct brw_stage_state *stage_state,
bool active, unsigned opcode);
+void gen7_rs_control(struct brw_context *brw, int enable);
+void gen7_enable_hw_binding_tables(struct brw_context *brw);
+void gen7_disable_hw_binding_tables(struct brw_context *brw);
+void gen7_reset_hw_bt_pool_offsets(struct brw_context *brw);
+
#ifdef __cplusplus
}
#endif
diff --git a/src/mesa/drivers/dri/i965/brw_state_upload.c b/src/mesa/drivers/dri/i965/brw_state_upload.c
index 7662c3b580c..6096b4946a0 100644
--- a/src/mesa/drivers/dri/i965/brw_state_upload.c
+++ b/src/mesa/drivers/dri/i965/brw_state_upload.c
@@ -192,6 +192,8 @@ static const struct brw_tracked_state *gen7_render_atoms[] =
&gen6_color_calc_state, /* must do before cc unit */
&gen6_depth_stencil_state, /* must do before cc unit */
+ &gen7_hw_binding_tables, /* Enable hw-generated binding tables for Haswell */
+
&gen6_vs_push_constants, /* Before vs_state */
&gen6_gs_push_constants, /* Before gs_state */
&gen6_wm_push_constants, /* Before wm_surfaces and constant_buffer */
@@ -268,6 +270,8 @@ static const struct brw_tracked_state *gen8_render_atoms[] =
&gen8_blend_state,
&gen6_color_calc_state,
+ &gen7_hw_binding_tables, /* Enable hw-generated binding tables for Broadwell */
+
&gen6_vs_push_constants, /* Before vs_state */
&gen6_gs_push_constants, /* Before gs_state */
&gen6_wm_push_constants, /* Before wm_surfaces and constant_buffer */
diff --git a/src/mesa/drivers/dri/i965/gen7_disable.c b/src/mesa/drivers/dri/i965/gen7_disable.c
index 2c43cd77f07..bb509696d72 100644
--- a/src/mesa/drivers/dri/i965/gen7_disable.c
+++ b/src/mesa/drivers/dri/i965/gen7_disable.c
@@ -52,7 +52,7 @@ disable_stages(struct brw_context *brw)
BEGIN_BATCH(2);
OUT_BATCH(_3DSTATE_BINDING_TABLE_POINTERS_HS << 16 | (2 - 2));
- OUT_BATCH(0);
+ OUT_BATCH(brw->hw_bt_pool.next_offset);
ADVANCE_BATCH();
/* Disable the TE */
@@ -85,7 +85,7 @@ disable_stages(struct brw_context *brw)
BEGIN_BATCH(2);
OUT_BATCH(_3DSTATE_BINDING_TABLE_POINTERS_DS << 16 | (2 - 2));
- OUT_BATCH(0);
+ OUT_BATCH(brw->hw_bt_pool.next_offset);
ADVANCE_BATCH();
}
diff --git a/src/mesa/drivers/dri/i965/gen8_disable.c b/src/mesa/drivers/dri/i965/gen8_disable.c
index da0d4a5fe7a..32508e377c9 100644
--- a/src/mesa/drivers/dri/i965/gen8_disable.c
+++ b/src/mesa/drivers/dri/i965/gen8_disable.c
@@ -66,7 +66,7 @@ disable_stages(struct brw_context *brw)
BEGIN_BATCH(2);
OUT_BATCH(_3DSTATE_BINDING_TABLE_POINTERS_HS << 16 | (2 - 2));
- OUT_BATCH(0);
+ OUT_BATCH(brw->hw_bt_pool.next_offset);
ADVANCE_BATCH();
/* Disable the TE */
@@ -101,7 +101,7 @@ disable_stages(struct brw_context *brw)
BEGIN_BATCH(2);
OUT_BATCH(_3DSTATE_BINDING_TABLE_POINTERS_DS << 16 | (2 - 2));
- OUT_BATCH(0);
+ OUT_BATCH(brw->hw_bt_pool.next_offset);
ADVANCE_BATCH();
BEGIN_BATCH(2);
diff --git a/src/mesa/drivers/dri/i965/intel_batchbuffer.c b/src/mesa/drivers/dri/i965/intel_batchbuffer.c
index d40e67133e2..85f20a05729 100644
--- a/src/mesa/drivers/dri/i965/intel_batchbuffer.c
+++ b/src/mesa/drivers/dri/i965/intel_batchbuffer.c
@@ -33,6 +33,7 @@
#include "intel_fbo.h"
#include "brw_context.h"
#include "brw_defines.h"
+#include "brw_state.h"
#include <xf86drm.h>
#include <i915_drm.h>
@@ -391,6 +392,9 @@ _intel_batchbuffer_flush(struct brw_context *brw,
drm_intel_bo_wait_rendering(brw->batch.bo);
}
+ if (brw->use_resource_streamer)
+ gen7_reset_hw_bt_pool_offsets(brw);
+
/* Start a new batch buffer. */
brw_new_batch(brw);