summaryrefslogtreecommitdiffstats
path: root/src/mesa/drivers/dri
diff options
context:
space:
mode:
Diffstat (limited to 'src/mesa/drivers/dri')
-rw-r--r--src/mesa/drivers/dri/i965/Makefile.sources1
-rw-r--r--src/mesa/drivers/dri/i965/brw_context.c14
-rw-r--r--src/mesa/drivers/dri/i965/brw_context.h14
-rw-r--r--src/mesa/drivers/dri/i965/brw_draw.c38
-rw-r--r--src/mesa/drivers/dri/i965/hsw_sol.c263
5 files changed, 318 insertions, 12 deletions
diff --git a/src/mesa/drivers/dri/i965/Makefile.sources b/src/mesa/drivers/dri/i965/Makefile.sources
index 8c60954ee64..d35775ea3ad 100644
--- a/src/mesa/drivers/dri/i965/Makefile.sources
+++ b/src/mesa/drivers/dri/i965/Makefile.sources
@@ -228,6 +228,7 @@ i965_FILES = \
gen8_vs_state.c \
gen8_wm_depth_stencil.c \
hsw_queryobj.c \
+ hsw_sol.c \
intel_batchbuffer.c \
intel_batchbuffer.h \
intel_blit.c \
diff --git a/src/mesa/drivers/dri/i965/brw_context.c b/src/mesa/drivers/dri/i965/brw_context.c
index 1380d4138cf..26514a0c7b6 100644
--- a/src/mesa/drivers/dri/i965/brw_context.c
+++ b/src/mesa/drivers/dri/i965/brw_context.c
@@ -372,13 +372,18 @@ brw_init_driver_functions(struct brw_context *brw,
functions->NewTransformFeedback = brw_new_transform_feedback;
functions->DeleteTransformFeedback = brw_delete_transform_feedback;
- functions->GetTransformFeedbackVertexCount =
- brw_get_transform_feedback_vertex_count;
- if (brw->gen >= 7) {
+ if (brw->intelScreen->has_mi_math_and_lrr) {
+ functions->BeginTransformFeedback = hsw_begin_transform_feedback;
+ functions->EndTransformFeedback = hsw_end_transform_feedback;
+ functions->PauseTransformFeedback = hsw_pause_transform_feedback;
+ functions->ResumeTransformFeedback = hsw_resume_transform_feedback;
+ } else if (brw->gen >= 7) {
functions->BeginTransformFeedback = gen7_begin_transform_feedback;
functions->EndTransformFeedback = gen7_end_transform_feedback;
functions->PauseTransformFeedback = gen7_pause_transform_feedback;
functions->ResumeTransformFeedback = gen7_resume_transform_feedback;
+ functions->GetTransformFeedbackVertexCount =
+ brw_get_transform_feedback_vertex_count;
} else {
functions->BeginTransformFeedback = brw_begin_transform_feedback;
functions->EndTransformFeedback = brw_end_transform_feedback;
@@ -494,7 +499,8 @@ brw_initialize_context_constants(struct brw_context *brw)
ctx->Const.MaxTransformFeedbackSeparateComponents =
BRW_MAX_SOL_BINDINGS / BRW_MAX_SOL_BUFFERS;
- ctx->Const.AlwaysUseGetTransformFeedbackVertexCount = true;
+ ctx->Const.AlwaysUseGetTransformFeedbackVertexCount =
+ !brw->intelScreen->has_mi_math_and_lrr;
int max_samples;
const int *msaa_modes = intel_supported_msaa_modes(brw->intelScreen);
diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h
index b620f14b39f..035cbe9d325 100644
--- a/src/mesa/drivers/dri/i965/brw_context.h
+++ b/src/mesa/drivers/dri/i965/brw_context.h
@@ -1656,6 +1656,20 @@ void
gen7_resume_transform_feedback(struct gl_context *ctx,
struct gl_transform_feedback_object *obj);
+/* hsw_sol.c */
+void
+hsw_begin_transform_feedback(struct gl_context *ctx, GLenum mode,
+ struct gl_transform_feedback_object *obj);
+void
+hsw_end_transform_feedback(struct gl_context *ctx,
+ struct gl_transform_feedback_object *obj);
+void
+hsw_pause_transform_feedback(struct gl_context *ctx,
+ struct gl_transform_feedback_object *obj);
+void
+hsw_resume_transform_feedback(struct gl_context *ctx,
+ struct gl_transform_feedback_object *obj);
+
/* brw_blorp_blit.cpp */
GLbitfield
brw_blorp_framebuffer(struct brw_context *brw,
diff --git a/src/mesa/drivers/dri/i965/brw_draw.c b/src/mesa/drivers/dri/i965/brw_draw.c
index afa8a4e9eae..9d034cfdb33 100644
--- a/src/mesa/drivers/dri/i965/brw_draw.c
+++ b/src/mesa/drivers/dri/i965/brw_draw.c
@@ -153,7 +153,9 @@ trim(GLenum prim, GLuint length)
static void
brw_emit_prim(struct brw_context *brw,
const struct _mesa_prim *prim,
- uint32_t hw_prim)
+ uint32_t hw_prim,
+ struct brw_transform_feedback_object *xfb_obj,
+ unsigned stream)
{
int verts_per_instance;
int vertex_access_type;
@@ -185,7 +187,7 @@ brw_emit_prim(struct brw_context *brw,
verts_per_instance = prim->count;
/* If nothing to emit, just return. */
- if (verts_per_instance == 0 && !prim->is_indirect)
+ if (verts_per_instance == 0 && !prim->is_indirect && !xfb_obj)
return;
/* If we're set to always flush, do it before and after the primitive emit.
@@ -197,7 +199,25 @@ brw_emit_prim(struct brw_context *brw,
brw_emit_mi_flush(brw);
/* If indirect, emit a bunch of loads from the indirect BO. */
- if (prim->is_indirect) {
+ if (xfb_obj) {
+ indirect_flag = GEN7_3DPRIM_INDIRECT_PARAMETER_ENABLE;
+
+ brw_load_register_mem(brw, GEN7_3DPRIM_VERTEX_COUNT,
+ xfb_obj->prim_count_bo,
+ I915_GEM_DOMAIN_VERTEX, 0,
+ stream * sizeof(uint32_t));
+ BEGIN_BATCH(9);
+ OUT_BATCH(MI_LOAD_REGISTER_IMM | (9 - 2));
+ OUT_BATCH(GEN7_3DPRIM_INSTANCE_COUNT);
+ OUT_BATCH(prim->num_instances);
+ OUT_BATCH(GEN7_3DPRIM_START_VERTEX);
+ OUT_BATCH(0);
+ OUT_BATCH(GEN7_3DPRIM_BASE_VERTEX);
+ OUT_BATCH(0);
+ OUT_BATCH(GEN7_3DPRIM_START_INSTANCE);
+ OUT_BATCH(0);
+ ADVANCE_BATCH();
+ } else if (prim->is_indirect) {
struct gl_buffer_object *indirect_buffer = brw->ctx.DrawIndirectBuffer;
drm_intel_bo *bo = intel_bufferobj_buffer(brw,
intel_buffer_object(indirect_buffer),
@@ -382,6 +402,8 @@ brw_try_draw_prims(struct gl_context *ctx,
const struct _mesa_index_buffer *ib,
GLuint min_index,
GLuint max_index,
+ struct brw_transform_feedback_object *xfb_obj,
+ unsigned stream,
struct gl_buffer_object *indirect)
{
struct brw_context *brw = brw_context(ctx);
@@ -531,7 +553,7 @@ retry:
brw_upload_render_state(brw);
}
- brw_emit_prim(brw, &prims[i], brw->primitive);
+ brw_emit_prim(brw, &prims[i], brw->primitive, xfb_obj, stream);
brw->no_batch_wrap = false;
@@ -573,14 +595,14 @@ brw_draw_prims(struct gl_context *ctx,
GLboolean index_bounds_valid,
GLuint min_index,
GLuint max_index,
- struct gl_transform_feedback_object *unused_tfb_object,
+ struct gl_transform_feedback_object *gl_xfb_obj,
unsigned stream,
struct gl_buffer_object *indirect)
{
struct brw_context *brw = brw_context(ctx);
const struct gl_client_array **arrays = ctx->Array._DrawArrays;
-
- assert(unused_tfb_object == NULL);
+ struct brw_transform_feedback_object *xfb_obj =
+ (struct brw_transform_feedback_object *) gl_xfb_obj;
if (!brw_check_conditional_render(brw))
return;
@@ -619,7 +641,7 @@ brw_draw_prims(struct gl_context *ctx,
* to it.
*/
brw_try_draw_prims(ctx, arrays, prims, nr_prims, ib, min_index, max_index,
- indirect);
+ xfb_obj, stream, indirect);
}
void
diff --git a/src/mesa/drivers/dri/i965/hsw_sol.c b/src/mesa/drivers/dri/i965/hsw_sol.c
new file mode 100644
index 00000000000..ef8fcf48346
--- /dev/null
+++ b/src/mesa/drivers/dri/i965/hsw_sol.c
@@ -0,0 +1,263 @@
+/*
+ * Copyright © 2016 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+/**
+ * An implementation of the transform feedback driver hooks for Haswell
+ * and later hardware. This uses MI_MATH to compute the number of vertices
+ * written (for use by DrawTransformFeedback()) without any CPU<->GPU
+ * synchronization which could stall.
+ */
+
+#include "brw_context.h"
+#include "brw_state.h"
+#include "brw_defines.h"
+#include "intel_batchbuffer.h"
+#include "intel_buffer_objects.h"
+#include "main/transformfeedback.h"
+
+/**
+ * We store several values in obj->prim_count_bo:
+ *
+ * [4x 32-bit values]: Final Number of Vertices Written
+ * [4x 32-bit values]: Tally of Primitives Written So Far
+ * [4x 64-bit values]: Starting SO_NUM_PRIMS_WRITTEN Counter Snapshots
+ *
+ * The first set of values is used by DrawTransformFeedback(), which
+ * copies one of them into the 3DPRIM_VERTEX_COUNT register and performs
+ * an indirect draw. The other values are just temporary storage.
+ */
+
+#define TALLY_OFFSET (BRW_MAX_XFB_STREAMS * sizeof(uint32_t))
+#define START_OFFSET (TALLY_OFFSET * 2)
+
+/**
+ * Store the SO_NUM_PRIMS_WRITTEN counters for each stream (4 uint64_t values)
+ * to prim_count_bo.
+ */
+static void
+save_prim_start_values(struct brw_context *brw,
+ struct brw_transform_feedback_object *obj)
+{
+ /* Flush any drawing so that the counters have the right values. */
+ brw_emit_mi_flush(brw);
+
+ /* Emit MI_STORE_REGISTER_MEM commands to write the values. */
+ for (int i = 0; i < BRW_MAX_XFB_STREAMS; i++) {
+ brw_store_register_mem64(brw, obj->prim_count_bo,
+ GEN7_SO_NUM_PRIMS_WRITTEN(i),
+ START_OFFSET + i * sizeof(uint64_t));
+ }
+}
+
+/**
+ * Compute the number of primitives written during our most recent
+ * transform feedback activity (the current SO_NUM_PRIMS_WRITTEN value
+ * minus the stashed "start" value), and add it to our running tally.
+ *
+ * If \p finalize is true, also compute the number of vertices written
+ * (by multiplying by the number of vertices per primitive), and store
+ * that to the "final" location.
+ *
+ * Otherwise, just overwrite the old tally with the new one.
+ */
+static void
+tally_prims_written(struct brw_context *brw,
+ struct brw_transform_feedback_object *obj,
+ bool finalize)
+{
+ /* Flush any drawing so that the counters have the right values. */
+ brw_emit_mi_flush(brw);
+
+ for (int i = 0; i < BRW_MAX_XFB_STREAMS; i++) {
+ /* GPR0 = Tally */
+ brw_load_register_imm32(brw, HSW_CS_GPR(0) + 4, 0);
+ brw_load_register_mem(brw, HSW_CS_GPR(0), obj->prim_count_bo,
+ I915_GEM_DOMAIN_INSTRUCTION,
+ I915_GEM_DOMAIN_INSTRUCTION,
+ TALLY_OFFSET + i * sizeof(uint32_t));
+ /* GPR1 = Start Snapshot */
+ brw_load_register_mem64(brw, HSW_CS_GPR(1), obj->prim_count_bo,
+ I915_GEM_DOMAIN_INSTRUCTION,
+ I915_GEM_DOMAIN_INSTRUCTION,
+ START_OFFSET + i * sizeof(uint64_t));
+ /* GPR2 = Ending Snapshot */
+ brw_load_register_reg64(brw, GEN7_SO_NUM_PRIMS_WRITTEN(i), HSW_CS_GPR(2));
+
+ BEGIN_BATCH(9);
+ OUT_BATCH(HSW_MI_MATH | (9 - 2));
+ /* GPR1 = GPR2 (End) - GPR1 (Start) */
+ OUT_BATCH(MI_MATH_ALU2(LOAD, SRCA, R2));
+ OUT_BATCH(MI_MATH_ALU2(LOAD, SRCB, R1));
+ OUT_BATCH(MI_MATH_ALU0(SUB));
+ OUT_BATCH(MI_MATH_ALU2(STORE, R1, ACCU));
+ /* GPR0 = GPR0 (Tally) + GPR1 (Diff) */
+ OUT_BATCH(MI_MATH_ALU2(LOAD, SRCA, R0));
+ OUT_BATCH(MI_MATH_ALU2(LOAD, SRCB, R1));
+ OUT_BATCH(MI_MATH_ALU0(ADD));
+ OUT_BATCH(MI_MATH_ALU2(STORE, R0, ACCU));
+ ADVANCE_BATCH();
+
+ if (!finalize) {
+ /* Write back the new tally */
+ brw_store_register_mem32(brw, obj->prim_count_bo, HSW_CS_GPR(0),
+ TALLY_OFFSET + i * sizeof(uint32_t));
+ } else {
+ /* Convert the number of primitives to the number of vertices. */
+ if (obj->primitive_mode == GL_LINES) {
+ /* Double R0 (R0 = R0 + R0) */
+ BEGIN_BATCH(5);
+ OUT_BATCH(HSW_MI_MATH | (5 - 2));
+ OUT_BATCH(MI_MATH_ALU2(LOAD, SRCA, R0));
+ OUT_BATCH(MI_MATH_ALU2(LOAD, SRCB, R0));
+ OUT_BATCH(MI_MATH_ALU0(ADD));
+ OUT_BATCH(MI_MATH_ALU2(STORE, R0, ACCU));
+ ADVANCE_BATCH();
+ } else if (obj->primitive_mode == GL_TRIANGLES) {
+ /* Triple R0 (R1 = R0 + R0, R0 = R0 + R1) */
+ BEGIN_BATCH(9);
+ OUT_BATCH(HSW_MI_MATH | (9 - 2));
+ OUT_BATCH(MI_MATH_ALU2(LOAD, SRCA, R0));
+ OUT_BATCH(MI_MATH_ALU2(LOAD, SRCB, R0));
+ OUT_BATCH(MI_MATH_ALU0(ADD));
+ OUT_BATCH(MI_MATH_ALU2(STORE, R1, ACCU));
+ OUT_BATCH(MI_MATH_ALU2(LOAD, SRCA, R0));
+ OUT_BATCH(MI_MATH_ALU2(LOAD, SRCB, R1));
+ OUT_BATCH(MI_MATH_ALU0(ADD));
+ OUT_BATCH(MI_MATH_ALU2(STORE, R0, ACCU));
+ ADVANCE_BATCH();
+ }
+ /* Store it to the final result */
+ brw_store_register_mem32(brw, obj->prim_count_bo, HSW_CS_GPR(0),
+ i * sizeof(uint32_t));
+ }
+ }
+}
+
+/**
+ * BeginTransformFeedback() driver hook.
+ */
+void
+hsw_begin_transform_feedback(struct gl_context *ctx, GLenum mode,
+ struct gl_transform_feedback_object *obj)
+{
+ struct brw_context *brw = brw_context(ctx);
+ struct brw_transform_feedback_object *brw_obj =
+ (struct brw_transform_feedback_object *) obj;
+
+ brw_obj->primitive_mode = mode;
+
+ /* Reset the SO buffer offsets to 0. */
+ if (brw->gen >= 8) {
+ brw_obj->zero_offsets = true;
+ } else {
+ BEGIN_BATCH(1 + 2 * BRW_MAX_XFB_STREAMS);
+ OUT_BATCH(MI_LOAD_REGISTER_IMM | (1 + 2 * BRW_MAX_XFB_STREAMS - 2));
+ for (int i = 0; i < BRW_MAX_XFB_STREAMS; i++) {
+ OUT_BATCH(GEN7_SO_WRITE_OFFSET(i));
+ OUT_BATCH(0);
+ }
+ ADVANCE_BATCH();
+ }
+
+ /* Zero out the initial tallies */
+ brw_store_data_imm64(brw, brw_obj->prim_count_bo, TALLY_OFFSET, 0ull);
+ brw_store_data_imm64(brw, brw_obj->prim_count_bo, TALLY_OFFSET + 8, 0ull);
+
+ /* Store the new starting value of the SO_NUM_PRIMS_WRITTEN counters. */
+ save_prim_start_values(brw, brw_obj);
+}
+
+/**
+ * PauseTransformFeedback() driver hook.
+ */
+void
+hsw_pause_transform_feedback(struct gl_context *ctx,
+ struct gl_transform_feedback_object *obj)
+{
+ struct brw_context *brw = brw_context(ctx);
+ struct brw_transform_feedback_object *brw_obj =
+ (struct brw_transform_feedback_object *) obj;
+
+ if (brw->is_haswell) {
+ /* Save the SOL buffer offset register values. */
+ for (int i = 0; i < BRW_MAX_XFB_STREAMS; i++) {
+ BEGIN_BATCH(3);
+ OUT_BATCH(MI_STORE_REGISTER_MEM | (3 - 2));
+ OUT_BATCH(GEN7_SO_WRITE_OFFSET(i));
+ OUT_RELOC(brw_obj->offset_bo,
+ I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
+ i * sizeof(uint32_t));
+ ADVANCE_BATCH();
+ }
+ }
+
+ /* Add any primitives written to our tally */
+ tally_prims_written(brw, brw_obj, false);
+}
+
+/**
+ * ResumeTransformFeedback() driver hook.
+ */
+void
+hsw_resume_transform_feedback(struct gl_context *ctx,
+ struct gl_transform_feedback_object *obj)
+{
+ struct brw_context *brw = brw_context(ctx);
+ struct brw_transform_feedback_object *brw_obj =
+ (struct brw_transform_feedback_object *) obj;
+
+ if (brw->is_haswell) {
+ /* Reload the SOL buffer offset registers. */
+ for (int i = 0; i < BRW_MAX_XFB_STREAMS; i++) {
+ BEGIN_BATCH(3);
+ OUT_BATCH(GEN7_MI_LOAD_REGISTER_MEM | (3 - 2));
+ OUT_BATCH(GEN7_SO_WRITE_OFFSET(i));
+ OUT_RELOC(brw_obj->offset_bo,
+ I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
+ i * sizeof(uint32_t));
+ ADVANCE_BATCH();
+ }
+ }
+
+ /* Store the new starting value of the SO_NUM_PRIMS_WRITTEN counters. */
+ save_prim_start_values(brw, brw_obj);
+}
+
+/**
+ * EndTransformFeedback() driver hook.
+ */
+void
+hsw_end_transform_feedback(struct gl_context *ctx,
+ struct gl_transform_feedback_object *obj)
+{
+ struct brw_context *brw = brw_context(ctx);
+ struct brw_transform_feedback_object *brw_obj =
+ (struct brw_transform_feedback_object *) obj;
+
+ /* Add any primitives written to our tally, convert it from the number
+ * of primitives written to the number of vertices written, and store
+ * it in the "final" location in the buffer which DrawTransformFeedback()
+ * will use as the vertex count.
+ */
+ tally_prims_written(brw, brw_obj, true);
+}