summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorKenneth Graunke <[email protected]>2017-05-10 12:41:39 -0700
committerKenneth Graunke <[email protected]>2017-05-11 16:52:59 -0700
commitf790d6e0b48ca35291f50aeb7462130723d62870 (patch)
tree34cacdd5f277144950539ebd756351996784a7f1
parent4933c3d16e8b652c388d04e498b4f9016507fbf3 (diff)
i965: Port Gen4-5 VS_STATE to genxml.
It's actually not that much code. Reviewed-by: Rafael Antognolli <[email protected]>
-rw-r--r--src/intel/genxml/gen4.xml2
-rw-r--r--src/intel/genxml/gen45.xml2
-rw-r--r--src/intel/genxml/gen5.xml2
-rw-r--r--src/mesa/drivers/dri/i965/Makefile.sources1
-rw-r--r--src/mesa/drivers/dri/i965/brw_state.h1
-rw-r--r--src/mesa/drivers/dri/i965/brw_structs.h35
-rw-r--r--src/mesa/drivers/dri/i965/brw_vs_state.c192
-rw-r--r--src/mesa/drivers/dri/i965/genX_state_upload.c74
8 files changed, 71 insertions, 238 deletions
diff --git a/src/intel/genxml/gen4.xml b/src/intel/genxml/gen4.xml
index d3a2f9264e1..4748e0b0d7a 100644
--- a/src/intel/genxml/gen4.xml
+++ b/src/intel/genxml/gen4.xml
@@ -753,7 +753,7 @@
<field name="Sampler State Offset" start="165" end="191" type="address"/>
<field name="Sampler Count" start="160" end="162" type="uint"/>
<field name="Vertex Cache Disable" start="193" end="193" type="bool"/>
- <field name="Function Enable" start="192" end="192" type="bool"/>
+ <field name="Enable" start="192" end="192" type="bool"/>
</struct>
<struct name="WM_STATE" length="8">
diff --git a/src/intel/genxml/gen45.xml b/src/intel/genxml/gen45.xml
index 547e2785444..04251e0ada0 100644
--- a/src/intel/genxml/gen45.xml
+++ b/src/intel/genxml/gen45.xml
@@ -704,7 +704,7 @@
<field name="Sampler State Offset" start="165" end="191" type="address"/>
<field name="Sampler Count" start="160" end="162" type="uint"/>
<field name="Vertex Cache Disable" start="193" end="193" type="bool"/>
- <field name="Function Enable" start="192" end="192" type="bool"/>
+ <field name="Enable" start="192" end="192" type="bool"/>
</struct>
<struct name="WM_STATE" length="8">
diff --git a/src/intel/genxml/gen5.xml b/src/intel/genxml/gen5.xml
index 933a434487e..e834bdf5d45 100644
--- a/src/intel/genxml/gen5.xml
+++ b/src/intel/genxml/gen5.xml
@@ -860,7 +860,7 @@
<field name="Sampler State Offset" start="165" end="191" type="address"/>
<field name="Sampler Count" start="160" end="162" type="uint"/>
<field name="Vertex Cache Disable" start="193" end="193" type="bool"/>
- <field name="Function Enable" start="192" end="192" type="bool"/>
+ <field name="Enable" start="192" end="192" type="bool"/>
</struct>
<struct name="WM_STATE" length="11">
diff --git a/src/mesa/drivers/dri/i965/Makefile.sources b/src/mesa/drivers/dri/i965/Makefile.sources
index 9e567cbc908..349777cf24a 100644
--- a/src/mesa/drivers/dri/i965/Makefile.sources
+++ b/src/mesa/drivers/dri/i965/Makefile.sources
@@ -70,7 +70,6 @@ i965_FILES = \
brw_util.h \
brw_vs.c \
brw_vs.h \
- brw_vs_state.c \
brw_vs_surface_state.c \
brw_wm.c \
brw_wm.h \
diff --git a/src/mesa/drivers/dri/i965/brw_state.h b/src/mesa/drivers/dri/i965/brw_state.h
index 4727e2abce3..a50ec4d565c 100644
--- a/src/mesa/drivers/dri/i965/brw_state.h
+++ b/src/mesa/drivers/dri/i965/brw_state.h
@@ -79,7 +79,6 @@ extern const struct brw_tracked_state brw_tes_image_surfaces;
extern const struct brw_tracked_state brw_gs_ubo_surfaces;
extern const struct brw_tracked_state brw_gs_abo_surfaces;
extern const struct brw_tracked_state brw_gs_image_surfaces;
-extern const struct brw_tracked_state brw_vs_unit;
extern const struct brw_tracked_state brw_renderbuffer_surfaces;
extern const struct brw_tracked_state brw_renderbuffer_read_surfaces;
extern const struct brw_tracked_state brw_texture_surfaces;
diff --git a/src/mesa/drivers/dri/i965/brw_structs.h b/src/mesa/drivers/dri/i965/brw_structs.h
index 55338c0e243..f5774138e22 100644
--- a/src/mesa/drivers/dri/i965/brw_structs.h
+++ b/src/mesa/drivers/dri/i965/brw_structs.h
@@ -483,41 +483,6 @@ struct brw_gs_unit_state
};
-struct brw_vs_unit_state
-{
- struct thread0 thread0;
- struct thread1 thread1;
- struct thread2 thread2;
- struct thread3 thread3;
-
- struct
- {
- unsigned pad0:10;
- unsigned stats_enable:1;
- unsigned nr_urb_entries:7;
- unsigned pad1:1;
- unsigned urb_entry_allocation_size:5;
- unsigned pad2:1;
- unsigned max_threads:6;
- unsigned pad3:1;
- } thread4;
-
- struct
- {
- unsigned sampler_count:3;
- unsigned pad0:2;
- unsigned sampler_state_pointer:27;
- } vs5;
-
- struct
- {
- unsigned vs_enable:1;
- unsigned vert_cache_disable:1;
- unsigned pad0:30;
- } vs6;
-};
-
-
struct brw_wm_unit_state
{
struct thread0 thread0;
diff --git a/src/mesa/drivers/dri/i965/brw_vs_state.c b/src/mesa/drivers/dri/i965/brw_vs_state.c
deleted file mode 100644
index fafe305f4f7..00000000000
--- a/src/mesa/drivers/dri/i965/brw_vs_state.c
+++ /dev/null
@@ -1,192 +0,0 @@
-/*
- Copyright (C) Intel Corp. 2006. All Rights Reserved.
- Intel funded Tungsten Graphics to
- develop this 3D driver.
-
- Permission is hereby granted, free of charge, to any person obtaining
- a copy of this software and associated documentation files (the
- "Software"), to deal in the Software without restriction, including
- without limitation the rights to use, copy, modify, merge, publish,
- distribute, sublicense, and/or sell copies of the Software, and to
- permit persons to whom the Software is furnished to do so, subject to
- the following conditions:
-
- The above copyright notice and this permission notice (including the
- next paragraph) shall be included in all copies or substantial
- portions of the Software.
-
- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
- IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
- LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
- OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
- WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-
- **********************************************************************/
- /*
- * Authors:
- * Keith Whitwell <[email protected]>
- */
-
-
-
-#include "intel_batchbuffer.h"
-#include "brw_context.h"
-#include "brw_state.h"
-#include "brw_defines.h"
-#include "main/macros.h"
-
-static void
-brw_upload_vs_unit(struct brw_context *brw)
-{
- const struct gen_device_info *devinfo = &brw->screen->devinfo;
- struct brw_stage_state *stage_state = &brw->vs.base;
- const struct brw_stage_prog_data *prog_data = stage_state->prog_data;
- const struct brw_vue_prog_data *vue_prog_data =
- brw_vue_prog_data(stage_state->prog_data);
-
- struct brw_vs_unit_state *vs;
-
- vs = brw_state_batch(brw, sizeof(*vs), 32, &stage_state->state_offset);
- memset(vs, 0, sizeof(*vs));
-
- /* BRW_NEW_PROGRAM_CACHE | BRW_NEW_VS_PROG_DATA */
- vs->thread0.grf_reg_count = ALIGN(vue_prog_data->total_grf, 16) / 16 - 1;
- vs->thread0.kernel_start_pointer =
- brw_program_reloc(brw,
- stage_state->state_offset +
- offsetof(struct brw_vs_unit_state, thread0),
- stage_state->prog_offset +
- (vs->thread0.grf_reg_count << 1)) >> 6;
-
- if (prog_data->use_alt_mode)
- vs->thread1.floating_point_mode = BRW_FLOATING_POINT_NON_IEEE_754;
- else
- vs->thread1.floating_point_mode = BRW_FLOATING_POINT_IEEE_754;
-
- /* Choosing multiple program flow means that we may get 2-vertex threads,
- * which will have the channel mask for dwords 4-7 enabled in the thread,
- * and those dwords will be written to the second URB handle when we
- * brw_urb_WRITE() results.
- */
- /* Force single program flow on Ironlake. We cannot reliably get
- * all applications working without it. See:
- * https://bugs.freedesktop.org/show_bug.cgi?id=29172
- *
- * The most notable and reliably failing application is the Humus
- * demo "CelShading"
- */
- vs->thread1.single_program_flow = (brw->gen == 5);
-
- vs->thread1.binding_table_entry_count =
- prog_data->binding_table.size_bytes / 4;
-
- if (prog_data->total_scratch != 0) {
- vs->thread2.scratch_space_base_pointer =
- stage_state->scratch_bo->offset64 >> 10; /* reloc */
- vs->thread2.per_thread_scratch_space =
- ffs(stage_state->per_thread_scratch) - 11;
- } else {
- vs->thread2.scratch_space_base_pointer = 0;
- vs->thread2.per_thread_scratch_space = 0;
- }
-
- vs->thread3.urb_entry_read_length = vue_prog_data->urb_read_length;
- vs->thread3.const_urb_entry_read_length = prog_data->curb_read_length;
- vs->thread3.dispatch_grf_start_reg = prog_data->dispatch_grf_start_reg;
- vs->thread3.urb_entry_read_offset = 0;
-
- /* BRW_NEW_PUSH_CONSTANT_ALLOCATION */
- vs->thread3.const_urb_entry_read_offset = brw->curbe.vs_start * 2;
-
- /* BRW_NEW_URB_FENCE */
- if (brw->gen == 5) {
- switch (brw->urb.nr_vs_entries) {
- case 8:
- case 12:
- case 16:
- case 32:
- case 64:
- case 96:
- case 128:
- case 168:
- case 192:
- case 224:
- case 256:
- vs->thread4.nr_urb_entries = brw->urb.nr_vs_entries >> 2;
- break;
- default:
- unreachable("not reached");
- }
- } else {
- switch (brw->urb.nr_vs_entries) {
- case 8:
- case 12:
- case 16:
- case 32:
- break;
- case 64:
- assert(brw->is_g4x);
- break;
- default:
- unreachable("not reached");
- }
- vs->thread4.nr_urb_entries = brw->urb.nr_vs_entries;
- }
-
- vs->thread4.urb_entry_allocation_size = brw->urb.vsize - 1;
-
- vs->thread4.max_threads = CLAMP(brw->urb.nr_vs_entries / 2,
- 1, devinfo->max_vs_threads) - 1;
-
- if (brw->gen == 5)
- vs->vs5.sampler_count = 0; /* hardware requirement */
- else {
- vs->vs5.sampler_count = (stage_state->sampler_count + 3) / 4;
- }
-
- /* Vertex program always enabled:
- */
- vs->vs6.vs_enable = 1;
-
- /* Set the sampler state pointer, and its reloc
- */
- if (stage_state->sampler_count) {
- /* BRW_NEW_SAMPLER_STATE_TABLE - reloc */
- vs->vs5.sampler_state_pointer =
- (brw->batch.bo->offset64 + stage_state->sampler_offset) >> 5;
- brw_emit_reloc(&brw->batch,
- stage_state->state_offset +
- offsetof(struct brw_vs_unit_state, vs5),
- brw->batch.bo,
- (stage_state->sampler_offset | vs->vs5.sampler_count),
- I915_GEM_DOMAIN_INSTRUCTION, 0);
- }
-
- /* Emit scratch space relocation */
- if (prog_data->total_scratch != 0) {
- brw_emit_reloc(&brw->batch,
- stage_state->state_offset +
- offsetof(struct brw_vs_unit_state, thread2),
- stage_state->scratch_bo,
- vs->thread2.per_thread_scratch_space,
- I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER);
- }
-
- brw->ctx.NewDriverState |= BRW_NEW_GEN4_UNIT_STATE;
-}
-
-const struct brw_tracked_state brw_vs_unit = {
- .dirty = {
- .mesa = 0,
- .brw = BRW_NEW_BATCH |
- BRW_NEW_BLORP |
- BRW_NEW_PROGRAM_CACHE |
- BRW_NEW_PUSH_CONSTANT_ALLOCATION |
- BRW_NEW_SAMPLER_STATE_TABLE |
- BRW_NEW_URB_FENCE |
- BRW_NEW_VS_PROG_DATA,
- },
- .emit = brw_upload_vs_unit,
-};
diff --git a/src/mesa/drivers/dri/i965/genX_state_upload.c b/src/mesa/drivers/dri/i965/genX_state_upload.c
index fccc66864a7..b949d5b448d 100644
--- a/src/mesa/drivers/dri/i965/genX_state_upload.c
+++ b/src/mesa/drivers/dri/i965/genX_state_upload.c
@@ -133,6 +133,17 @@ instruction_bo(struct brw_bo *bo, uint32_t offset)
}
static inline struct brw_address
+instruction_ro_bo(struct brw_bo *bo, uint32_t offset)
+{
+ return (struct brw_address) {
+ .bo = bo,
+ .offset = offset,
+ .read_domains = I915_GEM_DOMAIN_INSTRUCTION,
+ .write_domain = 0,
+ };
+}
+
+static inline struct brw_address
vertex_bo(struct brw_bo *bo, uint32_t offset)
{
return (struct brw_address) {
@@ -1693,8 +1704,22 @@ static const struct brw_tracked_state genX(wm_state) = {
/* ---------------------------------------------------------------------- */
+#if GEN_GEN == 4
+static inline struct brw_address
+KSP(struct brw_context *brw, uint32_t offset)
+{
+ return instruction_bo(brw->cache.bo, offset);
+}
+#else
+static inline uint32_t
+KSP(struct brw_context *brw, uint32_t offset)
+{
+ return offset;
+}
+#endif
+
#define INIT_THREAD_DISPATCH_FIELDS(pkt, prefix) \
- pkt.KernelStartPointer = stage_state->prog_offset; \
+ pkt.KernelStartPointer = KSP(brw, stage_state->prog_offset); \
pkt.SamplerCount = \
DIV_ROUND_UP(CLAMP(stage_state->sampler_count, 0, 16), 4); \
pkt.BindingTableEntryCount = \
@@ -1716,12 +1741,12 @@ static const struct brw_tracked_state genX(wm_state) = {
pkt.StatisticsEnable = true; \
pkt.Enable = true;
-#if GEN_GEN >= 6
static void
genX(upload_vs_state)(struct brw_context *brw)
{
+ UNUSED struct gl_context *ctx = &brw->ctx;
const struct gen_device_info *devinfo = &brw->screen->devinfo;
- const struct brw_stage_state *stage_state = &brw->vs.base;
+ struct brw_stage_state *stage_state = &brw->vs.base;
/* BRW_NEW_VS_PROG_DATA */
const struct brw_vue_prog_data *vue_prog_data =
@@ -1755,11 +1780,44 @@ genX(upload_vs_state)(struct brw_context *brw)
if (GEN_GEN == 7 && devinfo->is_ivybridge)
gen7_emit_vs_workaround_flush(brw);
+#if GEN_GEN >= 6
brw_batch_emit(brw, GENX(3DSTATE_VS), vs) {
+#else
+ ctx->NewDriverState |= BRW_NEW_GEN4_UNIT_STATE;
+ brw_state_emit(brw, GENX(VS_STATE), 32, &stage_state->state_offset, vs) {
+#endif
INIT_THREAD_DISPATCH_FIELDS(vs, Vertex);
vs.MaximumNumberofThreads = devinfo->max_vs_threads - 1;
+#if GEN_GEN < 6
+ vs.GRFRegisterCount = DIV_ROUND_UP(vue_prog_data->total_grf, 16) - 1;
+ vs.ConstantURBEntryReadLength = stage_prog_data->curb_read_length;
+ vs.ConstantURBEntryReadOffset = brw->curbe.vs_start * 2;
+
+ vs.NumberofURBEntries = brw->urb.nr_vs_entries >> (GEN_GEN == 5 ? 2 : 0);
+ vs.URBEntryAllocationSize = brw->urb.vsize - 1;
+
+ vs.MaximumNumberofThreads =
+ CLAMP(brw->urb.nr_vs_entries / 2, 1, devinfo->max_vs_threads) - 1;
+
+ vs.StatisticsEnable = false;
+ vs.SamplerStateOffset =
+ instruction_ro_bo(brw->batch.bo, stage_state->sampler_offset);
+#endif
+
+#if GEN_GEN == 5
+ /* Force single program flow on Ironlake. We cannot reliably get
+ * all applications working without it. See:
+ * https://bugs.freedesktop.org/show_bug.cgi?id=29172
+ *
+ * The most notable and reliably failing application is the Humus
+ * demo "CelShading"
+ */
+ vs.SingleProgramFlow = true;
+ vs.SamplerCount = 0; /* hardware requirement */
+#endif
+
#if GEN_GEN >= 8
vs.SIMD8DispatchEnable =
vue_prog_data->dispatch_mode == DISPATCH_MODE_SIMD8;
@@ -1801,11 +1859,15 @@ static const struct brw_tracked_state genX(vs_state) = {
BRW_NEW_BLORP |
BRW_NEW_CONTEXT |
BRW_NEW_VS_PROG_DATA |
- (GEN_GEN == 6 ? BRW_NEW_VERTEX_PROGRAM : 0),
+ (GEN_GEN == 6 ? BRW_NEW_VERTEX_PROGRAM : 0) |
+ (GEN_GEN <= 5 ? BRW_NEW_PUSH_CONSTANT_ALLOCATION |
+ BRW_NEW_PROGRAM_CACHE |
+ BRW_NEW_SAMPLER_STATE_TABLE |
+ BRW_NEW_URB_FENCE
+ : 0),
},
.emit = genX(upload_vs_state),
};
-#endif
/* ---------------------------------------------------------------------- */
@@ -3884,7 +3946,7 @@ genX(init_atoms)(struct brw_context *brw)
&brw_wm_unit,
&brw_sf_vp,
&brw_sf_unit,
- &brw_vs_unit, /* always required, enabled or not */
+ &genX(vs_state), /* always required, enabled or not */
&brw_clip_unit,
&brw_gs_unit,