diff options
author | Rhys Perry <[email protected]> | 2019-02-05 15:56:24 +0000 |
---|---|---|
committer | Rhys Perry <[email protected]> | 2019-10-21 18:49:46 +0000 |
commit | 8b98d0954e6168484479cf51d56bface448d00d5 (patch) | |
tree | e53969337c5a608d2f241aec0ce5c6b1464c4c3a /src/compiler/nir/nir.h | |
parent | f729ecefef1542314e1f7660e8f00e9e67e33a84 (diff) |
nir/lower_idiv: add new llvm-based path
v2: make variable names snake_case
v2: minor cleanups in emit_udiv()
v2: fix Panfrost build failure
v3: use an enum instead of a boolean flag in nir_lower_idiv()'s signature
v4: remove nir_op_urcp
v5: drop nv50 path
v5: rebase
v6: add back nv50 path
v6: add comment for nir_lower_idiv_path enum
v7: rename _nv50/_llvm to _fast/_precise
v8: fix etnaviv build failure
Signed-off-by: Rhys Perry <[email protected]>
Reviewed-by: Daniel Schürmann <[email protected]>
Diffstat (limited to 'src/compiler/nir/nir.h')
-rw-r--r-- | src/compiler/nir/nir.h | 14 |
1 files changed, 13 insertions, 1 deletions
diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h index d002102cad8..6b437040dcc 100644 --- a/src/compiler/nir/nir.h +++ b/src/compiler/nir/nir.h @@ -3917,7 +3917,19 @@ enum nir_lower_non_uniform_access_type { bool nir_lower_non_uniform_access(nir_shader *shader, enum nir_lower_non_uniform_access_type); -bool nir_lower_idiv(nir_shader *shader); +enum nir_lower_idiv_path { + /* This path is based on NV50LegalizeSSA::handleDIV(). It is the faster of + * the two but it is not exact in some cases (for example, 1091317713u / + * 1034u gives 5209173 instead of 1055432) */ + nir_lower_idiv_fast, + /* This path is based on AMDGPUTargetLowering::LowerUDIVREM() and + * AMDGPUTargetLowering::LowerSDIVREM(). It requires more instructions than + * the nv50 path and many of them are integer multiplications, so it is + * probably slower. It should always return the correct result, though. */ + nir_lower_idiv_precise, +}; + +bool nir_lower_idiv(nir_shader *shader, enum nir_lower_idiv_path path); bool nir_lower_input_attachments(nir_shader *shader, bool use_fragcoord_sysval); |