diff options
author | Eric Anholt <[email protected]> | 2018-01-08 11:55:31 -0800 |
---|---|---|
committer | Eric Anholt <[email protected]> | 2018-01-12 21:55:30 -0800 |
commit | 90269ba353333be13e54549ecff3adb8803661db (patch) | |
tree | 228c677b3b355ea9e0dee929e6ed8d0afb85dd0c /src/broadcom/compiler/v3d_compiler.h | |
parent | 86a12b4d5a49c68f4613513d2846c5eb8e56a677 (diff) |
broadcom/vc5: Use THRSW to enable multi-threaded shaders.
This is a major performance boost on all of V3D, but is required on V3D
4.x where shaders are always either 2- or 4-threaded.
Diffstat (limited to 'src/broadcom/compiler/v3d_compiler.h')
-rw-r--r-- | src/broadcom/compiler/v3d_compiler.h | 22 |
1 files changed, 16 insertions, 6 deletions
diff --git a/src/broadcom/compiler/v3d_compiler.h b/src/broadcom/compiler/v3d_compiler.h index e17a108233f..cb3614edcb6 100644 --- a/src/broadcom/compiler/v3d_compiler.h +++ b/src/broadcom/compiler/v3d_compiler.h @@ -134,6 +134,7 @@ struct qinst { struct qreg src[3]; bool cond_is_exec_mask; bool has_implicit_uniform; + bool is_last_thrsw; /* After vir_to_qpu.c: If instr reads a uniform, which uniform from * the uncompiled stream it is. @@ -522,12 +523,16 @@ struct v3d_compile { uint32_t program_id; uint32_t variant_id; - /* Set to compile program in threaded FS mode, where SIG_THREAD_SWITCH - * is used to hide texturing latency at the cost of limiting ourselves - * to the bottom half of physical reg space. + /* Set to compile program in in 1x, 2x, or 4x threaded mode, where + * SIG_THREAD_SWITCH is used to hide texturing latency at the cost of + * limiting ourselves to the part of the physical reg space. + * + * On V3D 3.x, 2x or 4x divide the physical reg space by 2x or 4x. On + * V3D 4.x, all shaders are 2x threaded, and 4x only divides the + * physical reg space in half. */ - bool fs_threaded; - + uint8_t threads; + struct qinst *last_thrsw; bool last_thrsw_at_top_level; bool failed; @@ -547,7 +552,12 @@ struct v3d_prog_data { uint32_t ubo_size; uint8_t num_inputs; + uint8_t threads; + /* For threads > 1, whether the program should be dispatched in the + * after-final-THRSW state. + */ + bool single_seg; }; struct v3d_vs_prog_data { @@ -674,7 +684,7 @@ void v3d_nir_lower_io(nir_shader *s, struct v3d_compile *c); void v3d_nir_lower_txf_ms(nir_shader *s, struct v3d_compile *c); void vir_lower_uniforms(struct v3d_compile *c); -void v3d_vir_to_qpu(struct v3d_compile *c); +void v3d_vir_to_qpu(struct v3d_compile *c, struct qpu_reg *temp_registers); uint32_t v3d_qpu_schedule_instructions(struct v3d_compile *c); void qpu_validate(struct v3d_compile *c); struct qpu_reg *v3d_register_allocate(struct v3d_compile *c); |