aboutsummaryrefslogtreecommitdiffstats
path: root/src/broadcom/compiler/v3d_compiler.h
diff options
context:
space:
mode:
authorEric Anholt <[email protected]>2018-01-08 11:55:31 -0800
committerEric Anholt <[email protected]>2018-01-12 21:55:30 -0800
commit90269ba353333be13e54549ecff3adb8803661db (patch)
tree228c677b3b355ea9e0dee929e6ed8d0afb85dd0c /src/broadcom/compiler/v3d_compiler.h
parent86a12b4d5a49c68f4613513d2846c5eb8e56a677 (diff)
broadcom/vc5: Use THRSW to enable multi-threaded shaders.
This is a major performance boost on all of V3D, but is required on V3D 4.x where shaders are always either 2- or 4-threaded.
Diffstat (limited to 'src/broadcom/compiler/v3d_compiler.h')
-rw-r--r--src/broadcom/compiler/v3d_compiler.h22
1 files changed, 16 insertions, 6 deletions
diff --git a/src/broadcom/compiler/v3d_compiler.h b/src/broadcom/compiler/v3d_compiler.h
index e17a108233f..cb3614edcb6 100644
--- a/src/broadcom/compiler/v3d_compiler.h
+++ b/src/broadcom/compiler/v3d_compiler.h
@@ -134,6 +134,7 @@ struct qinst {
struct qreg src[3];
bool cond_is_exec_mask;
bool has_implicit_uniform;
+ bool is_last_thrsw;
/* After vir_to_qpu.c: If instr reads a uniform, which uniform from
* the uncompiled stream it is.
@@ -522,12 +523,16 @@ struct v3d_compile {
uint32_t program_id;
uint32_t variant_id;
- /* Set to compile program in threaded FS mode, where SIG_THREAD_SWITCH
- * is used to hide texturing latency at the cost of limiting ourselves
- * to the bottom half of physical reg space.
+ /* Set to compile program in in 1x, 2x, or 4x threaded mode, where
+ * SIG_THREAD_SWITCH is used to hide texturing latency at the cost of
+ * limiting ourselves to the part of the physical reg space.
+ *
+ * On V3D 3.x, 2x or 4x divide the physical reg space by 2x or 4x. On
+ * V3D 4.x, all shaders are 2x threaded, and 4x only divides the
+ * physical reg space in half.
*/
- bool fs_threaded;
-
+ uint8_t threads;
+ struct qinst *last_thrsw;
bool last_thrsw_at_top_level;
bool failed;
@@ -547,7 +552,12 @@ struct v3d_prog_data {
uint32_t ubo_size;
uint8_t num_inputs;
+ uint8_t threads;
+ /* For threads > 1, whether the program should be dispatched in the
+ * after-final-THRSW state.
+ */
+ bool single_seg;
};
struct v3d_vs_prog_data {
@@ -674,7 +684,7 @@ void v3d_nir_lower_io(nir_shader *s, struct v3d_compile *c);
void v3d_nir_lower_txf_ms(nir_shader *s, struct v3d_compile *c);
void vir_lower_uniforms(struct v3d_compile *c);
-void v3d_vir_to_qpu(struct v3d_compile *c);
+void v3d_vir_to_qpu(struct v3d_compile *c, struct qpu_reg *temp_registers);
uint32_t v3d_qpu_schedule_instructions(struct v3d_compile *c);
void qpu_validate(struct v3d_compile *c);
struct qpu_reg *v3d_register_allocate(struct v3d_compile *c);