diff options
author | Marek Olšák <[email protected]> | 2019-05-14 22:16:20 -0400 |
---|---|---|
committer | Marek Olšák <[email protected]> | 2019-05-15 20:54:10 -0400 |
commit | ccfcb9d818b40564001b3cf2516367526de26c1d (patch) | |
tree | 635e075d82a6793001a8982866684e36be61d4d8 /src/amd | |
parent | e5cc363f43ba3e4b0800dc1e4fae1395f65a1275 (diff) |
ac: rename SI-CIK-VI to GFX6-GFX7-GFX8
Acked-by: Dave Airlie <[email protected]>
We already use GFX9 and I don't want us to have confusing naming
in the driver. GFXn naming is better from the driver perspective,
because it's the real version of the gfx portion of the hw. Also,
CIK means Bonaire-Kaveri-Kabini, it doesn't mean CI.
It shouldn't confuse our SDMA, UVD, VCE etc. code much. Those have
nothing to do with GFXn and they have their own version numbers.
Diffstat (limited to 'src/amd')
-rw-r--r-- | src/amd/common/ac_debug.c | 2 | ||||
-rw-r--r-- | src/amd/common/ac_gpu_info.c | 24 | ||||
-rw-r--r-- | src/amd/common/ac_gpu_info.h | 2 | ||||
-rw-r--r-- | src/amd/common/ac_llvm_build.c | 26 | ||||
-rw-r--r-- | src/amd/common/ac_nir_to_llvm.c | 34 | ||||
-rw-r--r-- | src/amd/common/ac_shader_abi.h | 2 | ||||
-rw-r--r-- | src/amd/common/ac_shader_util.c | 6 | ||||
-rw-r--r-- | src/amd/common/ac_surface.c | 16 | ||||
-rw-r--r-- | src/amd/common/ac_surface.h | 2 | ||||
-rw-r--r-- | src/amd/common/amd_family.h | 6 | ||||
-rw-r--r-- | src/amd/vulkan/radv_cmd_buffer.c | 34 | ||||
-rw-r--r-- | src/amd/vulkan/radv_debug.c | 2 | ||||
-rw-r--r-- | src/amd/vulkan/radv_device.c | 84 | ||||
-rw-r--r-- | src/amd/vulkan/radv_extensions.py | 12 | ||||
-rw-r--r-- | src/amd/vulkan/radv_formats.c | 2 | ||||
-rw-r--r-- | src/amd/vulkan/radv_image.c | 18 | ||||
-rw-r--r-- | src/amd/vulkan/radv_nir_to_llvm.c | 16 | ||||
-rw-r--r-- | src/amd/vulkan/radv_pipeline.c | 34 | ||||
-rw-r--r-- | src/amd/vulkan/radv_private.h | 2 | ||||
-rw-r--r-- | src/amd/vulkan/radv_shader.c | 4 | ||||
-rw-r--r-- | src/amd/vulkan/si_cmd_buffer.c | 66 | ||||
-rw-r--r-- | src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c | 2 | ||||
-rw-r--r-- | src/amd/vulkan/winsys/amdgpu/radv_amdgpu_winsys.c | 2 |
23 files changed, 199 insertions, 199 deletions
diff --git a/src/amd/common/ac_debug.c b/src/amd/common/ac_debug.c index e5463b66616..187e9d6ba66 100644 --- a/src/amd/common/ac_debug.c +++ b/src/amd/common/ac_debug.c @@ -268,7 +268,7 @@ static void ac_parse_packet3(FILE *f, uint32_t header, struct ac_ib_parser *ib, print_named_value(f, "POLL_INTERVAL", ac_ib_get(ib), 16); break; case PKT3_SURFACE_SYNC: - if (ib->chip_class >= CIK) { + if (ib->chip_class >= GFX7) { ac_dump_reg(f, ib->chip_class, R_0301F0_CP_COHER_CNTL, ac_ib_get(ib), ~0); ac_dump_reg(f, ib->chip_class, R_0301F4_CP_COHER_SIZE, ac_ib_get(ib), ~0); ac_dump_reg(f, ib->chip_class, R_0301F8_CP_COHER_BASE, ac_ib_get(ib), ~0); diff --git a/src/amd/common/ac_gpu_info.c b/src/amd/common/ac_gpu_info.c index e46424dd885..db7f9e47ce1 100644 --- a/src/amd/common/ac_gpu_info.c +++ b/src/amd/common/ac_gpu_info.c @@ -78,7 +78,7 @@ static unsigned cik_get_num_tile_pipes(struct amdgpu_gpu_info *info) case CIK__PIPE_CONFIG__ADDR_SURF_P16_32X32_16X16: return 16; default: - fprintf(stderr, "Invalid CIK pipe configuration, assuming P2\n"); + fprintf(stderr, "Invalid GFX7 pipe configuration, assuming P2\n"); assert(!"this should never occur"); return 2; } @@ -323,11 +323,11 @@ bool ac_query_gpu_info(int fd, amdgpu_device_handle dev, if (info->family >= CHIP_VEGA10) info->chip_class = GFX9; else if (info->family >= CHIP_TONGA) - info->chip_class = VI; + info->chip_class = GFX8; else if (info->family >= CHIP_BONAIRE) - info->chip_class = CIK; + info->chip_class = GFX7; else if (info->family >= CHIP_TAHITI) - info->chip_class = SI; + info->chip_class = GFX6; else { fprintf(stderr, "amdgpu: Unknown family.\n"); return false; @@ -382,18 +382,18 @@ bool ac_query_gpu_info(int fd, amdgpu_device_handle dev, info->has_gpu_reset_counter_query = false; info->has_eqaa_surface_allocator = true; info->has_format_bc1_through_bc7 = true; - /* DRM 3.1.0 doesn't flush TC for VI correctly. */ - info->kernel_flushes_tc_l2_after_ib = info->chip_class != VI || + /* DRM 3.1.0 doesn't flush TC for GFX8 correctly. */ + info->kernel_flushes_tc_l2_after_ib = info->chip_class != GFX8 || info->drm_minor >= 2; info->has_indirect_compute_dispatch = true; - /* SI doesn't support unaligned loads. */ - info->has_unaligned_shader_loads = info->chip_class != SI; - /* Disable sparse mappings on SI due to VM faults in CP DMA. Enable them once + /* GFX6 doesn't support unaligned loads. */ + info->has_unaligned_shader_loads = info->chip_class != GFX6; + /* Disable sparse mappings on GFX6 due to VM faults in CP DMA. Enable them once * these faults are mitigated in software. * Disable sparse mappings on GFX9 due to hangs. */ info->has_sparse_vm_mappings = - info->chip_class >= CIK && info->chip_class <= VI && + info->chip_class >= GFX7 && info->chip_class <= GFX8 && info->drm_minor >= 13; info->has_2d_tiling = true; info->has_read_registers_query = true; @@ -446,7 +446,7 @@ bool ac_query_gpu_info(int fd, amdgpu_device_handle dev, info->pte_fragment_size = alignment_info.size_local; info->gart_page_size = alignment_info.size_remote; - if (info->chip_class == SI) + if (info->chip_class == GFX6) info->gfx_ib_pad_with_type2 = TRUE; unsigned ib_align = 0; @@ -791,7 +791,7 @@ ac_get_harvested_configs(struct radeon_info *info, assert(rb_per_pkr == 1 || rb_per_pkr == 2); - if (info->chip_class >= CIK) { + if (info->chip_class >= GFX7) { unsigned raster_config_1 = *cik_raster_config_1_p; if ((num_se > 2) && ((!se_mask[0] && !se_mask[1]) || (!se_mask[2] && !se_mask[3]))) { diff --git a/src/amd/common/ac_gpu_info.h b/src/amd/common/ac_gpu_info.h index 946c2df82d0..11fb77eee87 100644 --- a/src/amd/common/ac_gpu_info.h +++ b/src/amd/common/ac_gpu_info.h @@ -183,7 +183,7 @@ static inline unsigned ac_get_max_simd_waves(enum radeon_family family) static inline uint32_t ac_get_num_physical_sgprs(enum chip_class chip_class) { - return chip_class >= VI ? 800 : 512; + return chip_class >= GFX8 ? 800 : 512; } #ifdef __cplusplus diff --git a/src/amd/common/ac_llvm_build.c b/src/amd/common/ac_llvm_build.c index 58dcd2e863d..3ad9bb34805 100644 --- a/src/amd/common/ac_llvm_build.c +++ b/src/amd/common/ac_llvm_build.c @@ -826,14 +826,14 @@ ac_prepare_cube_coords(struct ac_llvm_context *ctx, * helper invocation which happens to fall on a different * layer due to extrapolation." * - * VI and earlier attempt to implement this in hardware by + * GFX8 and earlier attempt to implement this in hardware by * clamping the value of coords[2] = (8 * layer) + face. * Unfortunately, this means that the we end up with the wrong * face when clamping occurs. * * Clamp the layer earlier to work around the issue. */ - if (ctx->chip_class <= VI) { + if (ctx->chip_class <= GFX8) { LLVMValueRef ge0; ge0 = LLVMBuildFCmp(builder, LLVMRealOGE, tmp, ctx->f32_0, ""); tmp = LLVMBuildSelect(builder, ge0, tmp, ctx->f32_0, ""); @@ -1392,7 +1392,7 @@ ac_build_buffer_load(struct ac_llvm_context *ctx, offset = LLVMBuildAdd(ctx->builder, offset, soffset, ""); if (allow_smem && !slc && - (!glc || (HAVE_LLVM >= 0x0800 && ctx->chip_class >= VI))) { + (!glc || (HAVE_LLVM >= 0x0800 && ctx->chip_class >= GFX8))) { assert(vindex == NULL); LLVMValueRef result[8]; @@ -1783,7 +1783,7 @@ ac_build_opencoded_load_format(struct ac_llvm_context *ctx, } int log_recombine = 0; - if (ctx->chip_class == SI && !known_aligned) { + if (ctx->chip_class == GFX6 && !known_aligned) { /* Avoid alignment restrictions by loading one byte at a time. */ load_num_channels <<= load_log_size; log_recombine = load_log_size; @@ -1819,7 +1819,7 @@ ac_build_opencoded_load_format(struct ac_llvm_context *ctx, } if (log_recombine > 0) { - /* Recombine bytes if necessary (SI only) */ + /* Recombine bytes if necessary (GFX6 only) */ LLVMTypeRef dst_type = log_recombine == 2 ? ctx->i32 : ctx->i16; for (unsigned src = 0, dst = 0; src < load_num_channels; ++dst) { @@ -2212,7 +2212,7 @@ ac_get_thread_id(struct ac_llvm_context *ctx) } /* - * SI implements derivatives using the local data store (LDS) + * AMD GCN implements derivatives using the local data store (LDS) * All writes to the LDS happen in all executing threads at * the same time. TID is the Thread ID for the current * thread and is a value between 0 and 63, representing @@ -3304,7 +3304,7 @@ void ac_init_exec_full_mask(struct ac_llvm_context *ctx) void ac_declare_lds_as_pointer(struct ac_llvm_context *ctx) { - unsigned lds_size = ctx->chip_class >= CIK ? 65536 : 32768; + unsigned lds_size = ctx->chip_class >= GFX7 ? 65536 : 32768; ctx->lds = LLVMBuildIntToPtr(ctx->builder, ctx->i32_0, LLVMPointerType(LLVMArrayType(ctx->i32, lds_size / 4), AC_ADDR_SPACE_LDS), "lds"); @@ -4034,7 +4034,7 @@ ac_build_alu_op(struct ac_llvm_context *ctx, LLVMValueRef lhs, LLVMValueRef rhs, * \param maxprefix specifies that the result only needs to be correct for a * prefix of this many threads * - * TODO: add inclusive and excluse scan functions for SI chip class. + * TODO: add inclusive and excluse scan functions for GFX6. */ static LLVMValueRef ac_build_scan(struct ac_llvm_context *ctx, nir_op op, LLVMValueRef src, LLVMValueRef identity, @@ -4142,28 +4142,28 @@ ac_build_reduce(struct ac_llvm_context *ctx, LLVMValueRef src, nir_op op, unsign result = ac_build_alu_op(ctx, result, swap, op); if (cluster_size == 4) return ac_build_wwm(ctx, result); - if (ctx->chip_class >= VI) + if (ctx->chip_class >= GFX8) swap = ac_build_dpp(ctx, identity, result, dpp_row_half_mirror, 0xf, 0xf, false); else swap = ac_build_ds_swizzle(ctx, result, ds_pattern_bitmode(0x1f, 0, 0x04)); result = ac_build_alu_op(ctx, result, swap, op); if (cluster_size == 8) return ac_build_wwm(ctx, result); - if (ctx->chip_class >= VI) + if (ctx->chip_class >= GFX8) swap = ac_build_dpp(ctx, identity, result, dpp_row_mirror, 0xf, 0xf, false); else swap = ac_build_ds_swizzle(ctx, result, ds_pattern_bitmode(0x1f, 0, 0x08)); result = ac_build_alu_op(ctx, result, swap, op); if (cluster_size == 16) return ac_build_wwm(ctx, result); - if (ctx->chip_class >= VI && cluster_size != 32) + if (ctx->chip_class >= GFX8 && cluster_size != 32) swap = ac_build_dpp(ctx, identity, result, dpp_row_bcast15, 0xa, 0xf, false); else swap = ac_build_ds_swizzle(ctx, result, ds_pattern_bitmode(0x1f, 0, 0x10)); result = ac_build_alu_op(ctx, result, swap, op); if (cluster_size == 32) return ac_build_wwm(ctx, result); - if (ctx->chip_class >= VI) { + if (ctx->chip_class >= GFX8) { swap = ac_build_dpp(ctx, identity, result, dpp_row_bcast31, 0xc, 0xf, false); result = ac_build_alu_op(ctx, result, swap, op); result = ac_build_readlane(ctx, result, LLVMConstInt(ctx->i32, 63, 0)); @@ -4350,7 +4350,7 @@ ac_build_quad_swizzle(struct ac_llvm_context *ctx, LLVMValueRef src, unsigned lane0, unsigned lane1, unsigned lane2, unsigned lane3) { unsigned mask = dpp_quad_perm(lane0, lane1, lane2, lane3); - if (ctx->chip_class >= VI) { + if (ctx->chip_class >= GFX8) { return ac_build_dpp(ctx, src, src, mask, 0xf, 0xf, false); } else { return ac_build_ds_swizzle(ctx, src, (1 << 15) | mask); diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c index 682645e9b1f..53c4ff7d383 100644 --- a/src/amd/common/ac_nir_to_llvm.c +++ b/src/amd/common/ac_nir_to_llvm.c @@ -112,7 +112,7 @@ get_ac_image_dim(const struct ac_llvm_context *ctx, enum glsl_sampler_dim sdim, enum ac_image_dim dim = get_ac_sampler_dim(ctx, sdim, is_array); if (dim == ac_image_cube || - (ctx->chip_class <= VI && dim == ac_image_3d)) + (ctx->chip_class <= GFX8 && dim == ac_image_3d)) dim = ac_image_2darray; return dim; @@ -371,7 +371,7 @@ static LLVMValueRef emit_f2f16(struct ac_llvm_context *ctx, src0 = ac_to_float(ctx, src0); result = LLVMBuildFPTrunc(ctx->builder, src0, ctx->f16, ""); - if (ctx->chip_class >= VI) { + if (ctx->chip_class >= GFX8) { LLVMValueRef args[2]; /* Check if the result is a denormal - and flush to 0 if so. */ args[0] = result; @@ -382,10 +382,10 @@ static LLVMValueRef emit_f2f16(struct ac_llvm_context *ctx, /* need to convert back up to f32 */ result = LLVMBuildFPExt(ctx->builder, result, ctx->f32, ""); - if (ctx->chip_class >= VI) + if (ctx->chip_class >= GFX8) result = LLVMBuildSelect(ctx->builder, cond, ctx->f32_0, result, ""); else { - /* for SI/CIK */ + /* for GFX6-GFX7 */ /* 0x38800000 is smallest half float value (2^-14) in 32-bit float, * so compare the result and flush to 0 if it's smaller. */ @@ -1169,9 +1169,9 @@ get_buffer_size(struct ac_nir_context *ctx, LLVMValueRef descriptor, bool in_ele LLVMBuildExtractElement(ctx->ac.builder, descriptor, LLVMConstInt(ctx->ac.i32, 2, false), ""); - /* VI only */ - if (ctx->ac.chip_class == VI && in_elements) { - /* On VI, the descriptor contains the size in bytes, + /* GFX8 only */ + if (ctx->ac.chip_class == GFX8 && in_elements) { + /* On GFX8, the descriptor contains the size in bytes, * but TXQ must return the size in elements. * The stride is always non-zero for resources using TXQ. */ @@ -1376,7 +1376,7 @@ static LLVMValueRef build_tex_intrinsic(struct ac_nir_context *ctx, break; } - if (instr->op == nir_texop_tg4 && ctx->ac.chip_class <= VI) { + if (instr->op == nir_texop_tg4 && ctx->ac.chip_class <= GFX8) { nir_deref_instr *texture_deref_instr = get_tex_texture_deref(instr); nir_variable *var = nir_deref_instr_get_variable(texture_deref_instr); const struct glsl_type *type = glsl_without_array(var->type); @@ -1535,11 +1535,11 @@ static unsigned get_cache_policy(struct ac_nir_context *ctx, { unsigned cache_policy = 0; - /* SI has a TC L1 bug causing corruption of 8bit/16bit stores. All + /* GFX6 has a TC L1 bug causing corruption of 8bit/16bit stores. All * store opcodes not aligned to a dword are affected. The only way to * get unaligned stores is through shader images. */ - if (((may_store_unaligned && ctx->ac.chip_class == SI) || + if (((may_store_unaligned && ctx->ac.chip_class == GFX6) || /* If this is write-only, don't keep data in L1 to prevent * evicting L1 cache lines that may be needed by other * instructions. @@ -2773,11 +2773,11 @@ static void emit_membar(struct ac_llvm_context *ac, void ac_emit_barrier(struct ac_llvm_context *ac, gl_shader_stage stage) { - /* SI only (thanks to a hw bug workaround): + /* GFX6 only (thanks to a hw bug workaround): * The real barrier instruction isn’t needed, because an entire patch * always fits into a single wave. */ - if (ac->chip_class == SI && stage == MESA_SHADER_TESS_CTRL) { + if (ac->chip_class == GFX6 && stage == MESA_SHADER_TESS_CTRL) { ac_build_waitcnt(ac, LGKM_CNT & VM_CNT); return; } @@ -3557,13 +3557,13 @@ static LLVMValueRef get_sampler_desc(struct ac_nir_context *ctx, /* Disable anisotropic filtering if BASE_LEVEL == LAST_LEVEL. * - * SI-CI: + * GFX6-GFX7: * If BASE_LEVEL == LAST_LEVEL, the shader must disable anisotropic * filtering manually. The driver sets img7 to a mask clearing * MAX_ANISO_RATIO if BASE_LEVEL == LAST_LEVEL. The shader must do: * s_and_b32 samp0, samp0, img7 * - * VI: + * GFX8: * The ANISO_OVERRIDE sampler field enables this fix in TA. */ static LLVMValueRef sici_fix_sampler_aniso(struct ac_nir_context *ctx, @@ -3572,7 +3572,7 @@ static LLVMValueRef sici_fix_sampler_aniso(struct ac_nir_context *ctx, LLVMBuilderRef builder = ctx->ac.builder; LLVMValueRef img7, samp0; - if (ctx->ac.chip_class >= VI) + if (ctx->ac.chip_class >= GFX8) return samp; img7 = LLVMBuildExtractElement(builder, res, @@ -3756,7 +3756,7 @@ static void visit_tex(struct ac_nir_context *ctx, nir_tex_instr *instr) * It's unnecessary if the original texture format was * Z32_FLOAT, but we don't know that here. */ - if (args.compare && ctx->ac.chip_class >= VI && ctx->abi->clamp_shadow_reference) + if (args.compare && ctx->ac.chip_class >= GFX8 && ctx->abi->clamp_shadow_reference) args.compare = ac_build_clamp(&ctx->ac, ac_to_float(&ctx->ac, args.compare)); /* pack derivatives */ @@ -4396,7 +4396,7 @@ ac_lower_indirect_derefs(struct nir_shader *nir, enum chip_class chip_class) * by the reality that LLVM 5.0 doesn't have working VGPR indexing * on GFX9. */ - bool llvm_has_working_vgpr_indexing = chip_class <= VI; + bool llvm_has_working_vgpr_indexing = chip_class <= GFX8; /* TODO: Indirect indexing of GS inputs is unimplemented. * diff --git a/src/amd/common/ac_shader_abi.h b/src/amd/common/ac_shader_abi.h index 8debb1ff986..2051f22d29b 100644 --- a/src/amd/common/ac_shader_abi.h +++ b/src/amd/common/ac_shader_abi.h @@ -196,7 +196,7 @@ struct ac_shader_abi { LLVMValueRef (*load_base_vertex)(struct ac_shader_abi *abi); - /* Whether to clamp the shadow reference value to [0,1]on VI. Radeonsi currently + /* Whether to clamp the shadow reference value to [0,1]on GFX8. Radeonsi currently * uses it due to promoting D16 to D32, but radv needs it off. */ bool clamp_shadow_reference; diff --git a/src/amd/common/ac_shader_util.c b/src/amd/common/ac_shader_util.c index 531395f4f62..64152081737 100644 --- a/src/amd/common/ac_shader_util.c +++ b/src/amd/common/ac_shader_util.c @@ -104,7 +104,7 @@ ac_vgt_gs_mode(unsigned gs_max_vert_out, enum chip_class chip_class) return S_028A40_MODE(V_028A40_GS_SCENARIO_G) | S_028A40_CUT_MODE(cut_mode)| - S_028A40_ES_WRITE_OPTIMIZE(chip_class <= VI) | + S_028A40_ES_WRITE_OPTIMIZE(chip_class <= GFX8) | S_028A40_GS_WRITE_OPTIMIZE(1) | S_028A40_ONCHIP(chip_class >= GFX9 ? 1 : 0); } @@ -167,9 +167,9 @@ ac_export_mrt_z(struct ac_llvm_context *ctx, LLVMValueRef depth, } } - /* SI (except OLAND and HAINAN) has a bug that it only looks + /* GFX6 (except OLAND and HAINAN) has a bug that it only looks * at the X writemask component. */ - if (ctx->chip_class == SI && + if (ctx->chip_class == GFX6 && ctx->family != CHIP_OLAND && ctx->family != CHIP_HAINAN) mask |= 0x1; diff --git a/src/amd/common/ac_surface.c b/src/amd/common/ac_surface.c index f9dd4f5d77d..a9433b9696c 100644 --- a/src/amd/common/ac_surface.c +++ b/src/amd/common/ac_surface.c @@ -452,7 +452,7 @@ static void gfx6_set_micro_tile_mode(struct radeon_surf *surf, { uint32_t tile_mode = info->si_tile_mode_array[surf->u.legacy.tiling_index[0]]; - if (info->chip_class >= CIK) + if (info->chip_class >= GFX7) surf->micro_tile_mode = G_009910_MICRO_TILE_MODE_NEW(tile_mode); else surf->micro_tile_mode = G_009910_MICRO_TILE_MODE(tile_mode); @@ -526,8 +526,8 @@ static int gfx6_surface_settings(ADDR_HANDLE addrlib, } /* Compute tile swizzle. */ - /* TODO: fix tile swizzle with mipmapping for SI */ - if ((info->chip_class >= CIK || config->info.levels == 1) && + /* TODO: fix tile swizzle with mipmapping for GFX6 */ + if ((info->chip_class >= GFX7 || config->info.levels == 1) && config->info.surf_index && surf->u.legacy.level[0].mode == RADEON_SURF_MODE_2D && !(surf->flags & (RADEON_SURF_Z_OR_SBUFFER | RADEON_SURF_SHAREABLE)) && @@ -567,7 +567,7 @@ void ac_compute_cmask(const struct radeon_info *info, if (surf->flags & RADEON_SURF_Z_OR_SBUFFER) return; - assert(info->chip_class <= VI); + assert(info->chip_class <= GFX8); switch (num_pipes) { case 2: @@ -732,7 +732,7 @@ static int gfx6_compute_surface(ADDR_HANDLE addrlib, * driver team). */ AddrSurfInfoIn.flags.dccCompatible = - info->chip_class >= VI && + info->chip_class >= GFX8 && !(surf->flags & RADEON_SURF_Z_OR_SBUFFER) && !(surf->flags & RADEON_SURF_DISABLE_DCC) && !compressed && @@ -742,7 +742,7 @@ static int gfx6_compute_surface(ADDR_HANDLE addrlib, AddrSurfInfoIn.flags.noStencil = (surf->flags & RADEON_SURF_SBUFFER) == 0; AddrSurfInfoIn.flags.compressZ = !!(surf->flags & RADEON_SURF_Z_OR_SBUFFER); - /* On CI/VI, the DB uses the same pitch and tile mode (except tilesplit) + /* On GFX7-GFX8, the DB uses the same pitch and tile mode (except tilesplit) * for Z and stencil. This can cause a number of problems which we work * around here: * @@ -799,7 +799,7 @@ static int gfx6_compute_surface(ADDR_HANDLE addrlib, assert(!(surf->flags & RADEON_SURF_Z_OR_SBUFFER)); assert(AddrSurfInfoIn.tileMode == ADDR_TM_2D_TILED_THIN1); - if (info->chip_class == SI) { + if (info->chip_class == GFX6) { if (AddrSurfInfoIn.tileType == ADDR_DISPLAYABLE) { if (surf->bpe == 2) AddrSurfInfoIn.tileIndex = 11; /* 16bpp */ @@ -816,7 +816,7 @@ static int gfx6_compute_surface(ADDR_HANDLE addrlib, AddrSurfInfoIn.tileIndex = 17; /* 64bpp (and 128bpp) */ } } else { - /* CIK - VI */ + /* GFX7 - GFX8 */ if (AddrSurfInfoIn.tileType == ADDR_DISPLAYABLE) AddrSurfInfoIn.tileIndex = 10; /* 2D displayable */ else diff --git a/src/amd/common/ac_surface.h b/src/amd/common/ac_surface.h index 10d25e23d32..08aac94d3a9 100644 --- a/src/amd/common/ac_surface.h +++ b/src/amd/common/ac_surface.h @@ -221,7 +221,7 @@ struct radeon_surf { uint32_t cmask_alignment; union { - /* R600-VI return values. + /* Return values for GFX8 and older. * * Some of them can be set by the caller if certain parameters are * desirable. The allocator will try to obey them. diff --git a/src/amd/common/amd_family.h b/src/amd/common/amd_family.h index 82eff1a492f..ee30dc74b93 100644 --- a/src/amd/common/amd_family.h +++ b/src/amd/common/amd_family.h @@ -110,9 +110,9 @@ enum chip_class { R700, EVERGREEN, CAYMAN, - SI, /* GFX6 */ - CIK, /* GFX7 */ - VI, /* GFX8 */ + GFX6, + GFX7, + GFX8, GFX9, }; diff --git a/src/amd/vulkan/radv_cmd_buffer.c b/src/amd/vulkan/radv_cmd_buffer.c index 6d1f3fc7d5a..ec1fcf4fd64 100644 --- a/src/amd/vulkan/radv_cmd_buffer.c +++ b/src/amd/vulkan/radv_cmd_buffer.c @@ -215,7 +215,7 @@ radv_bind_streamout_state(struct radv_cmd_buffer *cmd_buffer, bool radv_cmd_buffer_uses_mec(struct radv_cmd_buffer *cmd_buffer) { return cmd_buffer->queue_family_index == RADV_QUEUE_COMPUTE && - cmd_buffer->device->physical_device->rad_info.chip_class >= CIK; + cmd_buffer->device->physical_device->rad_info.chip_class >= GFX7; } enum ring_type radv_queue_family_to_ring(int f) { @@ -1041,7 +1041,7 @@ radv_emit_fb_color_state(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image, VkImageLayout layout) { - bool is_vi = cmd_buffer->device->physical_device->rad_info.chip_class >= VI; + bool is_vi = cmd_buffer->device->physical_device->rad_info.chip_class >= GFX8; struct radv_color_buffer_info *cb = &att->cb; uint32_t cb_color_info = cb->cb_color_info; @@ -1629,8 +1629,8 @@ radv_emit_framebuffer_state(struct radv_cmd_buffer *cmd_buffer) S_028208_BR_X(framebuffer->width) | S_028208_BR_Y(framebuffer->height)); - if (cmd_buffer->device->physical_device->rad_info.chip_class >= VI) { - uint8_t watermark = 4; /* Default value for VI. */ + if (cmd_buffer->device->physical_device->rad_info.chip_class >= GFX8) { + uint8_t watermark = 4; /* Default value for GFX8. */ /* For optimal DCC performance. */ if (cmd_buffer->device->physical_device->rad_info.chip_class >= GFX9) { @@ -1691,7 +1691,7 @@ void radv_set_db_count_control(struct radv_cmd_buffer *cmd_buffer) uint32_t db_count_control; if(!cmd_buffer->state.active_occlusion_queries) { - if (cmd_buffer->device->physical_device->rad_info.chip_class >= CIK) { + if (cmd_buffer->device->physical_device->rad_info.chip_class >= GFX7) { if (G_028A4C_OUT_OF_ORDER_PRIMITIVE_ENABLE(pa_sc_mode_cntl_1) && pipeline->graphics.disable_out_of_order_rast_for_occlusion && has_perfect_queries) { @@ -1710,7 +1710,7 @@ void radv_set_db_count_control(struct radv_cmd_buffer *cmd_buffer) const struct radv_subpass *subpass = cmd_buffer->state.subpass; uint32_t sample_rate = subpass ? util_logbase2(subpass->max_sample_count) : 0; - if (cmd_buffer->device->physical_device->rad_info.chip_class >= CIK) { + if (cmd_buffer->device->physical_device->rad_info.chip_class >= GFX7) { db_count_control = S_028004_PERFECT_ZPASS_COUNTS(has_perfect_queries) | S_028004_SAMPLE_RATE(sample_rate) | @@ -2019,7 +2019,7 @@ radv_flush_vertex_descriptors(struct radv_cmd_buffer *cmd_buffer, va += offset + buffer->offset; desc[0] = va; desc[1] = S_008F04_BASE_ADDRESS_HI(va >> 32) | S_008F04_STRIDE(stride); - if (cmd_buffer->device->physical_device->rad_info.chip_class <= CIK && stride) + if (cmd_buffer->device->physical_device->rad_info.chip_class <= GFX7 && stride) desc[2] = (buffer->size - offset - velems->format_size[i]) / stride + 1; else desc[2] = buffer->size - offset; @@ -2106,7 +2106,7 @@ radv_flush_streamout_descriptors(struct radv_cmd_buffer *cmd_buffer) /* Set the descriptor. * - * On VI, the format must be non-INVALID, otherwise + * On GFX8, the format must be non-INVALID, otherwise * the buffer will be considered not bound and store * instructions will be no-ops. */ @@ -2211,7 +2211,7 @@ radv_emit_draw_registers(struct radv_cmd_buffer *cmd_buffer, radeon_set_uconfig_reg_idx(cs, R_030960_IA_MULTI_VGT_PARAM, 4, ia_multi_vgt_param); - } else if (info->chip_class >= CIK) { + } else if (info->chip_class >= GFX7) { radeon_set_context_reg_idx(cs, R_028AA8_IA_MULTI_VGT_PARAM, 1, ia_multi_vgt_param); @@ -2948,7 +2948,7 @@ VkResult radv_EndCommandBuffer( RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); if (cmd_buffer->queue_family_index != RADV_QUEUE_TRANSFER) { - if (cmd_buffer->device->physical_device->rad_info.chip_class == SI) + if (cmd_buffer->device->physical_device->rad_info.chip_class == GFX6) cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_CS_PARTIAL_FLUSH | RADV_CMD_FLAG_PS_PARTIAL_FLUSH | RADV_CMD_FLAG_WRITEBACK_GLOBAL_L2; si_emit_cache_flush(cmd_buffer); } @@ -3824,11 +3824,11 @@ radv_emit_all_graphics_states(struct radv_cmd_buffer *cmd_buffer, if (cmd_buffer->state.dirty & RADV_CMD_DIRTY_INDEX_BUFFER) radv_emit_index_buffer(cmd_buffer); } else { - /* On CI and later, non-indexed draws overwrite VGT_INDEX_TYPE, + /* On GFX7 and later, non-indexed draws overwrite VGT_INDEX_TYPE, * so the state must be re-emitted before the next indexed * draw. */ - if (cmd_buffer->device->physical_device->rad_info.chip_class >= CIK) { + if (cmd_buffer->device->physical_device->rad_info.chip_class >= GFX7) { cmd_buffer->state.last_index_type = -1; cmd_buffer->state.dirty |= RADV_CMD_DIRTY_INDEX_BUFFER; } @@ -3849,7 +3849,7 @@ radv_draw(struct radv_cmd_buffer *cmd_buffer, struct radeon_info *rad_info = &cmd_buffer->device->physical_device->rad_info; bool has_prefetch = - cmd_buffer->device->physical_device->rad_info.chip_class >= CIK; + cmd_buffer->device->physical_device->rad_info.chip_class >= GFX7; bool pipeline_is_dirty = (cmd_buffer->state.dirty & RADV_CMD_DIRTY_PIPELINE) && cmd_buffer->state.pipeline != cmd_buffer->state.emitted_pipeline; @@ -3859,7 +3859,7 @@ radv_draw(struct radv_cmd_buffer *cmd_buffer, cmd_buffer->cs, 4096); if (likely(!info->indirect)) { - /* SI-CI treat instance_count==0 as instance_count==1. There is + /* GFX6-GFX7 treat instance_count==0 as instance_count==1. There is * no workaround for indirect draws, but we can at least skip * direct draws. */ @@ -4237,7 +4237,7 @@ radv_dispatch(struct radv_cmd_buffer *cmd_buffer, { struct radv_pipeline *pipeline = cmd_buffer->state.compute_pipeline; bool has_prefetch = - cmd_buffer->device->physical_device->rad_info.chip_class >= CIK; + cmd_buffer->device->physical_device->rad_info.chip_class >= GFX7; bool pipeline_is_dirty = pipeline && pipeline != cmd_buffer->state.emitted_compute_pipeline; @@ -5043,7 +5043,7 @@ static void radv_flush_vgt_streamout(struct radv_cmd_buffer *cmd_buffer) unsigned reg_strmout_cntl; /* The register is at different places on different ASICs. */ - if (cmd_buffer->device->physical_device->rad_info.chip_class >= CIK) { + if (cmd_buffer->device->physical_device->rad_info.chip_class >= GFX7) { reg_strmout_cntl = R_0300FC_CP_STRMOUT_CNTL; radeon_set_uconfig_reg(cs, reg_strmout_cntl, 0); } else { @@ -5084,7 +5084,7 @@ void radv_CmdBeginTransformFeedbackEXT( if (counter_buffer_idx >= 0 && counter_buffer_idx >= counterBufferCount) counter_buffer_idx = -1; - /* SI binds streamout buffers as shader resources. + /* AMD GCN binds streamout buffers as shader resources. * VGT only counts primitives and tells the shader through * SGPRs what to do. */ diff --git a/src/amd/vulkan/radv_debug.c b/src/amd/vulkan/radv_debug.c index 4854b094ba7..432e65b1475 100644 --- a/src/amd/vulkan/radv_debug.c +++ b/src/amd/vulkan/radv_debug.c @@ -131,7 +131,7 @@ radv_dump_debug_registers(struct radv_device *device, FILE *f) radv_dump_mmapped_reg(device, f, R_00803C_GRBM_STATUS_SE3); radv_dump_mmapped_reg(device, f, R_00D034_SDMA0_STATUS_REG); radv_dump_mmapped_reg(device, f, R_00D834_SDMA1_STATUS_REG); - if (info->chip_class <= VI) { + if (info->chip_class <= GFX8) { radv_dump_mmapped_reg(device, f, R_000E50_SRBM_STATUS); radv_dump_mmapped_reg(device, f, R_000E4C_SRBM_STATUS2); radv_dump_mmapped_reg(device, f, R_000E54_SRBM_STATUS3); diff --git a/src/amd/vulkan/radv_device.c b/src/amd/vulkan/radv_device.c index 4b64f5101ed..c0e317a97e5 100644 --- a/src/amd/vulkan/radv_device.c +++ b/src/amd/vulkan/radv_device.c @@ -220,11 +220,11 @@ radv_handle_env_var_force_family(struct radv_physical_device *device) if (i >= CHIP_VEGA10) device->rad_info.chip_class = GFX9; else if (i >= CHIP_TONGA) - device->rad_info.chip_class = VI; + device->rad_info.chip_class = GFX8; else if (i >= CHIP_BONAIRE) - device->rad_info.chip_class = CIK; + device->rad_info.chip_class = GFX7; else - device->rad_info.chip_class = SI; + device->rad_info.chip_class = GFX6; return; } @@ -332,7 +332,7 @@ radv_physical_device_init(struct radv_physical_device *device, disk_cache_format_hex_id(buf, device->cache_uuid, VK_UUID_SIZE * 2); device->disk_cache = disk_cache_create(device->name, buf, shader_env_flags); - if (device->rad_info.chip_class < VI || + if (device->rad_info.chip_class < GFX8 || device->rad_info.chip_class > GFX9) fprintf(stderr, "WARNING: radv is not a conformant vulkan implementation, testing use only.\n"); @@ -349,18 +349,18 @@ radv_physical_device_init(struct radv_physical_device *device, } /* The mere presence of CLEAR_STATE in the IB causes random GPU hangs - * on SI. + * on GFX6. */ - device->has_clear_state = device->rad_info.chip_class >= CIK; + device->has_clear_state = device->rad_info.chip_class >= GFX7; - device->cpdma_prefetch_writes_memory = device->rad_info.chip_class <= VI; + device->cpdma_prefetch_writes_memory = device->rad_info.chip_class <= GFX8; /* Vega10/Raven need a special workaround for a hardware bug. */ device->has_scissor_bug = device->rad_info.family == CHIP_VEGA10 || device->rad_info.family == CHIP_RAVEN; /* Out-of-order primitive rasterization. */ - device->has_out_of_order_rast = device->rad_info.chip_class >= VI && + device->has_out_of_order_rast = device->rad_info.chip_class >= GFX8 && device->rad_info.max_se >= 2; device->out_of_order_rast_allowed = device->has_out_of_order_rast && !(device->instance->debug_flags & RADV_DEBUG_NO_OUT_OF_ORDER); @@ -368,9 +368,9 @@ radv_physical_device_init(struct radv_physical_device *device, device->dcc_msaa_allowed = (device->instance->perftest_flags & RADV_PERFTEST_DCC_MSAA); - /* TODO: Figure out how to use LOAD_CONTEXT_REG on SI/CIK. */ + /* TODO: Figure out how to use LOAD_CONTEXT_REG on GFX6-GFX7. */ device->has_load_ctx_reg_pkt = device->rad_info.chip_class >= GFX9 || - (device->rad_info.chip_class >= VI && + (device->rad_info.chip_class >= GFX8 && device->rad_info.me_fw_feature >= 41); radv_physical_device_init_mem_types(device); @@ -769,7 +769,7 @@ void radv_GetPhysicalDeviceFeatures( .shaderTessellationAndGeometryPointSize = true, .shaderImageGatherExtended = true, .shaderStorageImageExtendedFormats = true, - .shaderStorageImageMultisample = pdevice->rad_info.chip_class >= VI, + .shaderStorageImageMultisample = pdevice->rad_info.chip_class >= GFX8, .shaderUniformBufferArrayDynamicIndexing = true, .shaderSampledImageArrayDynamicIndexing = true, .shaderStorageBufferArrayDynamicIndexing = true, @@ -822,7 +822,7 @@ void radv_GetPhysicalDeviceFeatures2( case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_16BIT_STORAGE_FEATURES: { VkPhysicalDevice16BitStorageFeatures *features = (VkPhysicalDevice16BitStorageFeatures*)ext; - bool enabled = pdevice->rad_info.chip_class >= VI; + bool enabled = pdevice->rad_info.chip_class >= GFX8; features->storageBuffer16BitAccess = enabled; features->uniformAndStorageBuffer16BitAccess = enabled; features->storagePushConstant16 = enabled; @@ -884,7 +884,7 @@ void radv_GetPhysicalDeviceFeatures2( case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SCALAR_BLOCK_LAYOUT_FEATURES_EXT: { VkPhysicalDeviceScalarBlockLayoutFeaturesEXT *features = (VkPhysicalDeviceScalarBlockLayoutFeaturesEXT *)ext; - features->scalarBlockLayout = pdevice->rad_info.chip_class >= CIK; + features->scalarBlockLayout = pdevice->rad_info.chip_class >= GFX7; break; } case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MEMORY_PRIORITY_FEATURES_EXT: { @@ -916,7 +916,7 @@ void radv_GetPhysicalDeviceFeatures2( case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_8BIT_STORAGE_FEATURES_KHR: { VkPhysicalDevice8BitStorageFeaturesKHR *features = (VkPhysicalDevice8BitStorageFeaturesKHR*)ext; - bool enabled = pdevice->rad_info.chip_class >= VI; + bool enabled = pdevice->rad_info.chip_class >= GFX8; features->storageBuffer8BitAccess = enabled; features->uniformAndStorageBuffer8BitAccess = enabled; features->storagePushConstant8 = enabled; @@ -925,7 +925,7 @@ void radv_GetPhysicalDeviceFeatures2( case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FLOAT16_INT8_FEATURES_KHR: { VkPhysicalDeviceFloat16Int8FeaturesKHR *features = (VkPhysicalDeviceFloat16Int8FeaturesKHR*)ext; - features->shaderFloat16 = pdevice->rad_info.chip_class >= VI && HAVE_LLVM >= 0x0800; + features->shaderFloat16 = pdevice->rad_info.chip_class >= GFX8 && HAVE_LLVM >= 0x0800; features->shaderInt8 = true; break; } @@ -1087,7 +1087,7 @@ void radv_GetPhysicalDeviceProperties( .sampledImageIntegerSampleCounts = VK_SAMPLE_COUNT_1_BIT, .sampledImageDepthSampleCounts = sample_counts, .sampledImageStencilSampleCounts = sample_counts, - .storageImageSampleCounts = pdevice->rad_info.chip_class >= VI ? sample_counts : VK_SAMPLE_COUNT_1_BIT, + .storageImageSampleCounts = pdevice->rad_info.chip_class >= GFX8 ? sample_counts : VK_SAMPLE_COUNT_1_BIT, .maxSampleMaskWords = 1, .timestampComputeAndGraphics = true, .timestampPeriod = 1000000.0 / pdevice->rad_info.clock_crystal_freq, @@ -1176,7 +1176,7 @@ void radv_GetPhysicalDeviceProperties2( VK_SUBGROUP_FEATURE_BALLOT_BIT | VK_SUBGROUP_FEATURE_QUAD_BIT | VK_SUBGROUP_FEATURE_VOTE_BIT; - if (pdevice->rad_info.chip_class >= VI) { + if (pdevice->rad_info.chip_class >= GFX8) { properties->supportedOperations |= VK_SUBGROUP_FEATURE_ARITHMETIC_BIT | VK_SUBGROUP_FEATURE_SHUFFLE_BIT | @@ -1228,12 +1228,12 @@ void radv_GetPhysicalDeviceProperties2( properties->sgprsPerSimd = ac_get_num_physical_sgprs(pdevice->rad_info.chip_class); properties->minSgprAllocation = - pdevice->rad_info.chip_class >= VI ? 16 : 8; + pdevice->rad_info.chip_class >= GFX8 ? 16 : 8; properties->maxSgprAllocation = pdevice->rad_info.family == CHIP_TONGA || pdevice->rad_info.family == CHIP_ICELAND ? 96 : 104; properties->sgprAllocationGranularity = - pdevice->rad_info.chip_class >= VI ? 16 : 8; + pdevice->rad_info.chip_class >= GFX8 ? 16 : 8; /* VGPR. */ properties->vgprsPerSimd = RADV_NUM_PHYSICAL_VGPRS; @@ -1868,7 +1868,7 @@ VkResult radv_CreateDevice( device->dispatch_initiator = S_00B800_COMPUTE_SHADER_EN(1); - if (device->physical_device->rad_info.chip_class >= CIK) { + if (device->physical_device->rad_info.chip_class >= GFX7) { /* If the KMD allows it (there is a KMD hw register for it), * allow launching waves out-of-order. */ @@ -1880,7 +1880,7 @@ VkResult radv_CreateDevice( device->tess_offchip_block_dw_size = device->physical_device->rad_info.family == CHIP_HAWAII ? 4096 : 8192; device->has_distributed_tess = - device->physical_device->rad_info.chip_class >= VI && + device->physical_device->rad_info.chip_class >= GFX8 && device->physical_device->rad_info.max_se >= 2; if (getenv("RADV_TRACE_FILE")) { @@ -1923,7 +1923,7 @@ VkResult radv_CreateDevice( device->ws->cs_finalize(device->empty_cs[family]); } - if (device->physical_device->rad_info.chip_class >= CIK) + if (device->physical_device->rad_info.chip_class >= GFX7) cik_create_gfx_config(device); VkPipelineCacheCreateInfo ci; @@ -2223,7 +2223,7 @@ fill_geom_tess_rings(struct radv_queue *queue, static unsigned radv_get_hs_offchip_param(struct radv_device *device, uint32_t *max_offchip_buffers_p) { - bool double_offchip_buffers = device->physical_device->rad_info.chip_class >= CIK && + bool double_offchip_buffers = device->physical_device->rad_info.chip_class >= GFX7 && device->physical_device->rad_info.family != CHIP_CARRIZO && device->physical_device->rad_info.family != CHIP_STONEY; unsigned max_offchip_buffers_per_se = double_offchip_buffers ? 128 : 64; @@ -2234,7 +2234,7 @@ radv_get_hs_offchip_param(struct radv_device *device, uint32_t *max_offchip_buff /* * Per RadeonSI: * This must be one less than the maximum number due to a hw limitation. - * Various hardware bugs in SI, CIK, and GFX9 need this. + * Various hardware bugs need thGFX7 * * Per AMDVLK: * Vega10 should limit max_offchip_buffers to 508 (4 * 127). @@ -2244,8 +2244,8 @@ radv_get_hs_offchip_param(struct radv_device *device, uint32_t *max_offchip_buff * Follow AMDVLK here. */ if (device->physical_device->rad_info.family == CHIP_VEGA10 || - device->physical_device->rad_info.chip_class == CIK || - device->physical_device->rad_info.chip_class == SI) + device->physical_device->rad_info.chip_class == GFX7 || + device->physical_device->rad_info.chip_class == GFX6) --max_offchip_buffers_per_se; max_offchip_buffers = max_offchip_buffers_per_se * @@ -2263,11 +2263,11 @@ radv_get_hs_offchip_param(struct radv_device *device, uint32_t *max_offchip_buff } switch (device->physical_device->rad_info.chip_class) { - case SI: + case GFX6: max_offchip_buffers = MIN2(max_offchip_buffers, 126); break; - case CIK: - case VI: + case GFX7: + case GFX8: case GFX9: default: max_offchip_buffers = MIN2(max_offchip_buffers, 508); @@ -2275,8 +2275,8 @@ radv_get_hs_offchip_param(struct radv_device *device, uint32_t *max_offchip_buff } *max_offchip_buffers_p = max_offchip_buffers; - if (device->physical_device->rad_info.chip_class >= CIK) { - if (device->physical_device->rad_info.chip_class >= VI) + if (device->physical_device->rad_info.chip_class >= GFX7) { + if (device->physical_device->rad_info.chip_class >= GFX8) --max_offchip_buffers; hs_offchip_param = S_03093C_OFFCHIP_BUFFERING(max_offchip_buffers) | @@ -2304,7 +2304,7 @@ radv_emit_gs_ring_sizes(struct radv_queue *queue, struct radeon_cmdbuf *cs, if (gsvs_ring_bo) radv_cs_add_buffer(queue->device->ws, cs, gsvs_ring_bo); - if (queue->device->physical_device->rad_info.chip_class >= CIK) { + if (queue->device->physical_device->rad_info.chip_class >= GFX7) { radeon_set_uconfig_reg_seq(cs, R_030900_VGT_ESGS_RING_SIZE, 2); radeon_emit(cs, esgs_ring_size >> 8); radeon_emit(cs, gsvs_ring_size >> 8); @@ -2329,7 +2329,7 @@ radv_emit_tess_factor_ring(struct radv_queue *queue, struct radeon_cmdbuf *cs, radv_cs_add_buffer(queue->device->ws, cs, tess_rings_bo); - if (queue->device->physical_device->rad_info.chip_class >= CIK) { + if (queue->device->physical_device->rad_info.chip_class >= GFX7) { radeon_set_uconfig_reg(cs, R_030938_VGT_TF_RING_SIZE, S_030938_SIZE(tf_ring_size / 4)); radeon_set_uconfig_reg(cs, R_030940_VGT_TF_MEMORY_BASE, @@ -2649,7 +2649,7 @@ radv_get_preamble_cs(struct radv_queue *queue, queue->device->physical_device->rad_info.chip_class, NULL, 0, queue->queue_family_index == RING_COMPUTE && - queue->device->physical_device->rad_info.chip_class >= CIK, + queue->device->physical_device->rad_info.chip_class >= GFX7, (queue->queue_family_index == RADV_QUEUE_COMPUTE ? RADV_CMD_FLAG_CS_PARTIAL_FLUSH : (RADV_CMD_FLAG_CS_PARTIAL_FLUSH | RADV_CMD_FLAG_PS_PARTIAL_FLUSH)) | RADV_CMD_FLAG_INV_ICACHE | RADV_CMD_FLAG_INV_SMEM_L1 | @@ -2661,7 +2661,7 @@ radv_get_preamble_cs(struct radv_queue *queue, queue->device->physical_device->rad_info.chip_class, NULL, 0, queue->queue_family_index == RING_COMPUTE && - queue->device->physical_device->rad_info.chip_class >= CIK, + queue->device->physical_device->rad_info.chip_class >= GFX7, RADV_CMD_FLAG_INV_ICACHE | RADV_CMD_FLAG_INV_SMEM_L1 | RADV_CMD_FLAG_INV_VMEM_L1 | @@ -4274,13 +4274,13 @@ radv_initialise_color_surface(struct radv_device *device, cb->cb_color_attrib |= S_028C74_TILE_MODE_INDEX(tile_mode_index); if (radv_image_has_fmask(iview->image)) { - if (device->physical_device->rad_info.chip_class >= CIK) + if (device->physical_device->rad_info.chip_class >= GFX7) cb->cb_color_pitch |= S_028C64_FMASK_TILE_MAX(iview->image->fmask.pitch_in_pixels / 8 - 1); cb->cb_color_attrib |= S_028C74_FMASK_TILE_MODE_INDEX(iview->image->fmask.tile_mode_index); cb->cb_color_fmask_slice = S_028C88_TILE_MAX(iview->image->fmask.slice_tile_max); } else { /* This must be set for fast clear to work without FMASK. */ - if (device->physical_device->rad_info.chip_class >= CIK) + if (device->physical_device->rad_info.chip_class >= GFX7) cb->cb_color_pitch |= S_028C64_FMASK_TILE_MAX(pitch_tile_max); cb->cb_color_attrib |= S_028C74_FMASK_TILE_MODE_INDEX(tile_mode_index); cb->cb_color_fmask_slice = S_028C88_TILE_MAX(slice_tile_max); @@ -4360,7 +4360,7 @@ radv_initialise_color_surface(struct radv_device *device, S_028C70_ENDIAN(endian); if (radv_image_has_fmask(iview->image)) { cb->cb_color_info |= S_028C70_COMPRESSION(1); - if (device->physical_device->rad_info.chip_class == SI) { + if (device->physical_device->rad_info.chip_class == GFX6) { unsigned fmask_bankh = util_logbase2(iview->image->fmask.bank_height); cb->cb_color_attrib |= S_028C74_FMASK_BANK_HEIGHT(fmask_bankh); } @@ -4377,7 +4377,7 @@ radv_initialise_color_surface(struct radv_device *device, /* This must be set for fast clear to work without FMASK. */ if (!radv_image_has_fmask(iview->image) && - device->physical_device->rad_info.chip_class == SI) { + device->physical_device->rad_info.chip_class == GFX6) { unsigned bankh = util_logbase2(surf->u.legacy.bankh); cb->cb_color_attrib |= S_028C74_FMASK_BANK_HEIGHT(bankh); } @@ -4548,7 +4548,7 @@ radv_initialise_ds_surface(struct radv_device *device, if (iview->image->info.samples > 1) ds->db_z_info |= S_028040_NUM_SAMPLES(util_logbase2(iview->image->info.samples)); - if (device->physical_device->rad_info.chip_class >= CIK) { + if (device->physical_device->rad_info.chip_class >= GFX7) { struct radeon_info *info = &device->physical_device->rad_info; unsigned tiling_index = surf->u.legacy.tiling_index[level]; unsigned stencil_index = surf->u.legacy.stencil_tiling_index[level]; @@ -4807,7 +4807,7 @@ radv_init_sampler(struct radv_device *device, { uint32_t max_aniso = radv_get_max_anisotropy(device, pCreateInfo); uint32_t max_aniso_ratio = radv_tex_aniso_filter(max_aniso); - bool is_vi = (device->physical_device->rad_info.chip_class >= VI); + bool is_vi = (device->physical_device->rad_info.chip_class >= GFX8); unsigned filter_mode = V_008F30_SQ_IMG_FILTER_MODE_BLEND; const struct VkSamplerReductionModeCreateInfoEXT *sampler_reduction = @@ -4835,7 +4835,7 @@ radv_init_sampler(struct radv_device *device, S_008F38_XY_MIN_FILTER(radv_tex_filter(pCreateInfo->minFilter, max_aniso)) | S_008F38_MIP_FILTER(radv_tex_mipfilter(pCreateInfo->mipmapMode)) | S_008F38_MIP_POINT_PRECLAMP(0) | - S_008F38_DISABLE_LSB_CEIL(device->physical_device->rad_info.chip_class <= VI) | + S_008F38_DISABLE_LSB_CEIL(device->physical_device->rad_info.chip_class <= GFX8) | S_008F38_FILTER_PREC_FIX(1) | S_008F38_ANISO_OVERRIDE(is_vi)); sampler->state[3] = (S_008F3C_BORDER_COLOR_PTR(0) | diff --git a/src/amd/vulkan/radv_extensions.py b/src/amd/vulkan/radv_extensions.py index 576a21f4ca5..0b5af56a435 100644 --- a/src/amd/vulkan/radv_extensions.py +++ b/src/amd/vulkan/radv_extensions.py @@ -96,7 +96,7 @@ EXTENSIONS = [ Extension('VK_KHR_xlib_surface', 6, 'VK_USE_PLATFORM_XLIB_KHR'), Extension('VK_KHR_multiview', 1, True), Extension('VK_KHR_display', 23, 'VK_USE_PLATFORM_DISPLAY_KHR'), - Extension('VK_KHR_8bit_storage', 1, 'device->rad_info.chip_class >= VI'), + Extension('VK_KHR_8bit_storage', 1, 'device->rad_info.chip_class >= GFX8'), Extension('VK_EXT_direct_mode_display', 1, 'VK_USE_PLATFORM_DISPLAY_KHR'), Extension('VK_EXT_acquire_xlib_display', 1, 'VK_USE_PLATFORM_XLIB_XRANDR_EXT'), Extension('VK_EXT_buffer_device_address', 1, True), @@ -119,8 +119,8 @@ EXTENSIONS = [ Extension('VK_EXT_memory_priority', 1, True), Extension('VK_EXT_pci_bus_info', 2, True), Extension('VK_EXT_pipeline_creation_feedback', 1, True), - Extension('VK_EXT_sampler_filter_minmax', 1, 'device->rad_info.chip_class >= CIK'), - Extension('VK_EXT_scalar_block_layout', 1, 'device->rad_info.chip_class >= CIK'), + Extension('VK_EXT_sampler_filter_minmax', 1, 'device->rad_info.chip_class >= GFX7'), + Extension('VK_EXT_scalar_block_layout', 1, 'device->rad_info.chip_class >= GFX7'), Extension('VK_EXT_shader_viewport_index_layer', 1, True), Extension('VK_EXT_shader_stencil_export', 1, True), Extension('VK_EXT_transform_feedback', 1, True), @@ -128,15 +128,15 @@ EXTENSIONS = [ Extension('VK_EXT_ycbcr_image_arrays', 1, True), Extension('VK_AMD_draw_indirect_count', 1, True), Extension('VK_AMD_gcn_shader', 1, True), - Extension('VK_AMD_gpu_shader_half_float', 1, 'device->rad_info.chip_class >= VI && HAVE_LLVM >= 0x0800'), - Extension('VK_AMD_gpu_shader_int16', 1, 'device->rad_info.chip_class >= VI'), + Extension('VK_AMD_gpu_shader_half_float', 1, 'device->rad_info.chip_class >= GFX8 && HAVE_LLVM >= 0x0800'), + Extension('VK_AMD_gpu_shader_int16', 1, 'device->rad_info.chip_class >= GFX8'), Extension('VK_AMD_rasterization_order', 1, 'device->has_out_of_order_rast'), Extension('VK_AMD_shader_core_properties', 1, True), Extension('VK_AMD_shader_info', 1, True), Extension('VK_AMD_shader_trinary_minmax', 1, True), Extension('VK_GOOGLE_decorate_string', 1, True), Extension('VK_GOOGLE_hlsl_functionality1', 1, True), - Extension('VK_NV_compute_shader_derivatives', 1, 'device->rad_info.chip_class >= VI'), + Extension('VK_NV_compute_shader_derivatives', 1, 'device->rad_info.chip_class >= GFX8'), ] class VkVersion: diff --git a/src/amd/vulkan/radv_formats.c b/src/amd/vulkan/radv_formats.c index 9883002fa42..d7b560082f6 100644 --- a/src/amd/vulkan/radv_formats.c +++ b/src/amd/vulkan/radv_formats.c @@ -761,7 +761,7 @@ radv_physical_device_get_format_properties(struct radv_physical_device *physical case VK_FORMAT_A2B10G10R10_SSCALED_PACK32: case VK_FORMAT_A2R10G10B10_SINT_PACK32: case VK_FORMAT_A2B10G10R10_SINT_PACK32: - if (physical_device->rad_info.chip_class <= VI && + if (physical_device->rad_info.chip_class <= GFX8 && physical_device->rad_info.family != CHIP_STONEY) { buffer &= ~(VK_FORMAT_FEATURE_UNIFORM_TEXEL_BUFFER_BIT | VK_FORMAT_FEATURE_STORAGE_TEXEL_BUFFER_BIT); diff --git a/src/amd/vulkan/radv_image.c b/src/amd/vulkan/radv_image.c index 3ffb4e95749..161997ae196 100644 --- a/src/amd/vulkan/radv_image.c +++ b/src/amd/vulkan/radv_image.c @@ -47,7 +47,7 @@ radv_choose_tiling(struct radv_device *device, if (!vk_format_is_compressed(pCreateInfo->format) && !vk_format_is_depth_or_stencil(pCreateInfo->format) - && device->physical_device->rad_info.chip_class <= VI) { + && device->physical_device->rad_info.chip_class <= GFX8) { /* this causes hangs in some VK CTS tests on GFX9. */ /* Textures with a very small height are recommended to be linear. */ if (pCreateInfo->imageType == VK_IMAGE_TYPE_1D || @@ -69,7 +69,7 @@ radv_use_tc_compat_htile_for_image(struct radv_device *device, const VkImageCreateInfo *pCreateInfo) { /* TC-compat HTILE is only available for GFX8+. */ - if (device->physical_device->rad_info.chip_class < VI) + if (device->physical_device->rad_info.chip_class < GFX8) return false; if ((pCreateInfo->usage & VK_IMAGE_USAGE_STORAGE_BIT) || @@ -130,7 +130,7 @@ radv_use_dcc_for_image(struct radv_device *device, bool blendable; /* DCC (Delta Color Compression) is only available for GFX8+. */ - if (device->physical_device->rad_info.chip_class < VI) + if (device->physical_device->rad_info.chip_class < GFX8) return false; if (device->instance->debug_flags & RADV_DEBUG_NO_DCC) @@ -328,7 +328,7 @@ radv_make_buffer_descriptor(struct radv_device *device, state[1] = S_008F04_BASE_ADDRESS_HI(va >> 32) | S_008F04_STRIDE(stride); - if (device->physical_device->rad_info.chip_class != VI && stride) { + if (device->physical_device->rad_info.chip_class != GFX8 && stride) { range /= stride; } @@ -370,12 +370,12 @@ si_set_mutable_tex_desc_fields(struct radv_device *device, state[1] &= C_008F14_BASE_ADDRESS_HI; state[1] |= S_008F14_BASE_ADDRESS_HI(va >> 40); - if (chip_class >= VI) { + if (chip_class >= GFX8) { state[6] &= C_008F28_COMPRESSION_EN; state[7] = 0; if (!is_storage_image && radv_dcc_enabled(image, first_level)) { meta_va = gpu_address + image->dcc_offset; - if (chip_class <= VI) + if (chip_class <= GFX8) meta_va += base_level_info->dcc_offset; } else if (!is_storage_image && radv_image_is_tc_compat_htile(image)) { @@ -417,7 +417,7 @@ si_set_mutable_tex_desc_fields(struct radv_device *device, S_008F24_META_RB_ALIGNED(meta.rb_aligned); } } else { - /* SI-CI-VI */ + /* GFX6-GFX8 */ unsigned pitch = base_level_info->nblk_x * block_width; unsigned index = si_tile_mode_index(plane, base_level, is_stencil); @@ -596,7 +596,7 @@ si_make_texture_descriptor(struct radv_device *device, /* The last dword is unused by hw. The shader uses it to clear * bits in the first dword of sampler state. */ - if (device->physical_device->rad_info.chip_class <= CIK && image->info.samples <= 1) { + if (device->physical_device->rad_info.chip_class <= GFX7 && image->info.samples <= 1) { if (first_level == last_level) state[7] = C_008F30_MAX_ANISO_RATIO; else @@ -725,7 +725,7 @@ radv_query_opaque_metadata(struct radv_device *device, memcpy(&md->metadata[2], desc, sizeof(desc)); /* Dwords [10:..] contain the mipmap level offsets. */ - if (device->physical_device->rad_info.chip_class <= VI) { + if (device->physical_device->rad_info.chip_class <= GFX8) { for (i = 0; i <= image->info.levels - 1; i++) md->metadata[10+i] = image->planes[0].surface.u.legacy.level[i].offset >> 8; md->size_metadata = (11 + image->info.levels - 1) * 4; diff --git a/src/amd/vulkan/radv_nir_to_llvm.c b/src/amd/vulkan/radv_nir_to_llvm.c index e8be058d3f7..341f6388f32 100644 --- a/src/amd/vulkan/radv_nir_to_llvm.c +++ b/src/amd/vulkan/radv_nir_to_llvm.c @@ -262,7 +262,7 @@ get_tcs_num_patches(struct radv_shader_context *ctx) * * Test: dEQP-VK.tessellation.shader_input_output.barrier */ - if (ctx->options->chip_class >= CIK && ctx->options->family != CHIP_STONEY) + if (ctx->options->chip_class >= GFX7 && ctx->options->family != CHIP_STONEY) hardware_lds_size = 65536; num_patches = MIN2(num_patches, hardware_lds_size / (input_patch_size + output_patch_size)); @@ -273,8 +273,8 @@ get_tcs_num_patches(struct radv_shader_context *ctx) */ num_patches = MIN2(num_patches, 40); - /* SI bug workaround - limit LS-HS threadgroups to only one wave. */ - if (ctx->options->chip_class == SI) { + /* GFX6 bug workaround - limit LS-HS threadgroups to only one wave. */ + if (ctx->options->chip_class == GFX6) { unsigned one_wave = 64 / MAX2(num_tcs_input_cp, num_tcs_output_cp); num_patches = MIN2(num_patches, one_wave); } @@ -3276,7 +3276,7 @@ write_tess_factors(struct radv_shader_context *ctx) LLVMConstInt(ctx->ac.i32, 4 * stride, false), ""); unsigned tf_offset = 0; - if (ctx->options->chip_class <= VI) { + if (ctx->options->chip_class <= GFX8) { ac_nir_build_if(&inner_if_ctx, ctx, LLVMBuildICmp(ctx->ac.builder, LLVMIntEQ, rel_patch_id, ctx->ac.i32_0, "")); @@ -3518,7 +3518,7 @@ ac_nir_eliminate_const_vs_outputs(struct radv_shader_context *ctx) static void ac_setup_rings(struct radv_shader_context *ctx) { - if (ctx->options->chip_class <= VI && + if (ctx->options->chip_class <= GFX8 && (ctx->stage == MESA_SHADER_GEOMETRY || ctx->options->key.vs.as_es || ctx->options->key.tes.as_es)) { unsigned ring = ctx->stage == MESA_SHADER_GEOMETRY ? RING_ESGS_GS @@ -3568,7 +3568,7 @@ ac_setup_rings(struct radv_shader_context *ctx) stride = 4 * num_components * ctx->gs_max_out_vertices; - /* Limit on the stride field for <= CIK. */ + /* Limit on the stride field for <= GFX7. */ assert(stride < (1 << 14)); ring = LLVMBuildBitCast(ctx->ac.builder, @@ -3616,7 +3616,7 @@ radv_nir_get_max_workgroup_size(enum chip_class chip_class, { switch (nir->info.stage) { case MESA_SHADER_TESS_CTRL: - return chip_class >= CIK ? 128 : 64; + return chip_class >= GFX7 ? 128 : 64; case MESA_SHADER_GEOMETRY: return chip_class >= GFX9 ? 128 : 64; case MESA_SHADER_COMPUTE: @@ -3961,7 +3961,7 @@ static void ac_compile_llvm_module(struct ac_llvm_compiler *ac_llvm, * - Floating-point output modifiers would be ignored by the hw. * - Some opcodes don't support denormals, such as v_mad_f32. We would * have to stop using those. - * - SI & CI would be very slow. + * - GFX6 & GFX7 would be very slow. */ config->float_mode |= V_00B028_FP_64_DENORMS; } diff --git a/src/amd/vulkan/radv_pipeline.c b/src/amd/vulkan/radv_pipeline.c index f25a5f55bf5..c89a6f139ba 100644 --- a/src/amd/vulkan/radv_pipeline.c +++ b/src/amd/vulkan/radv_pipeline.c @@ -1558,11 +1558,11 @@ calculate_gs_ring_sizes(struct radv_pipeline *pipeline, const struct radv_gs_sta unsigned num_se = device->physical_device->rad_info.max_se; unsigned wave_size = 64; unsigned max_gs_waves = 32 * num_se; /* max 32 per SE on GCN */ - /* On SI-CI, the value comes from VGT_GS_VERTEX_REUSE = 16. - * On VI+, the value comes from VGT_VERTEX_REUSE_BLOCK_CNTL = 30 (+2). + /* On GFX6-GFX7, the value comes from VGT_GS_VERTEX_REUSE = 16. + * On GFX8+, the value comes from VGT_VERTEX_REUSE_BLOCK_CNTL = 30 (+2). */ unsigned gs_vertex_reuse = - (device->physical_device->rad_info.chip_class >= VI ? 32 : 16) * num_se; + (device->physical_device->rad_info.chip_class >= GFX8 ? 32 : 16) * num_se; unsigned alignment = 256 * num_se; /* The maximum size is 63.999 MB per SE. */ unsigned max_size = ((unsigned)(63.999 * 1024 * 1024) & ~255) * num_se; @@ -1581,7 +1581,7 @@ calculate_gs_ring_sizes(struct radv_pipeline *pipeline, const struct radv_gs_sta esgs_ring_size = align(esgs_ring_size, alignment); gsvs_ring_size = align(gsvs_ring_size, alignment); - if (pipeline->device->physical_device->rad_info.chip_class <= VI) + if (pipeline->device->physical_device->rad_info.chip_class <= GFX8) pipeline->graphics.esgs_ring_size = CLAMP(esgs_ring_size, min_esgs_ring_size, max_size); pipeline->graphics.gsvs_ring_size = MIN2(gsvs_ring_size, max_size); @@ -1643,7 +1643,7 @@ calculate_tess_state(struct radv_pipeline *pipeline, lds_size = pipeline->shaders[MESA_SHADER_TESS_CTRL]->info.tcs.lds_size; - if (pipeline->device->physical_device->rad_info.chip_class >= CIK) { + if (pipeline->device->physical_device->rad_info.chip_class >= GFX7) { assert(lds_size <= 65536); lds_size = align(lds_size, 512) / 512; } else { @@ -1904,7 +1904,7 @@ radv_generate_graphics_pipeline_key(struct radv_pipeline *pipeline, key.vertex_attribute_offsets[location] = desc->offset; key.vertex_attribute_strides[location] = radv_get_attrib_stride(input_state, desc->binding); - if (pipeline->device->physical_device->rad_info.chip_class <= VI && + if (pipeline->device->physical_device->rad_info.chip_class <= GFX8 && pipeline->device->physical_device->rad_info.family != CHIP_STONEY) { VkFormat format = input_state->pVertexAttributeDescriptions[i].format; uint64_t adjust; @@ -1962,7 +1962,7 @@ radv_generate_graphics_pipeline_key(struct radv_pipeline *pipeline, } key.col_format = blend->spi_shader_col_format; - if (pipeline->device->physical_device->rad_info.chip_class < VI) + if (pipeline->device->physical_device->rad_info.chip_class < GFX8) radv_pipeline_compute_get_int_clamp(pCreateInfo, &key.is_int8, &key.is_int10); return key; @@ -2918,7 +2918,7 @@ radv_pipeline_generate_multisample_state(struct radeon_cmdbuf *ctx_cs, * if no sample lies on the pixel boundary (-8 sample offset). It's * currently always TRUE because the driver doesn't support 16 samples. */ - bool exclusion = pipeline->device->physical_device->rad_info.chip_class >= CIK; + bool exclusion = pipeline->device->physical_device->rad_info.chip_class >= GFX7; radeon_set_context_reg(ctx_cs, R_02882C_PA_SU_PRIM_FILTER_CNTL, S_02882C_XMAX_RIGHT_EXCLUSION(exclusion) | S_02882C_YMAX_BOTTOM_EXCLUSION(exclusion)); @@ -3003,7 +3003,7 @@ radv_pipeline_generate_hw_vs(struct radeon_cmdbuf *ctx_cs, cull_dist_mask << 8 | clip_dist_mask); - if (pipeline->device->physical_device->rad_info.chip_class <= VI) + if (pipeline->device->physical_device->rad_info.chip_class <= GFX8) radeon_set_context_reg(ctx_cs, R_028AB4_VGT_REUSE_OFF, outinfo->writes_viewport_index); } @@ -3036,7 +3036,7 @@ radv_pipeline_generate_hw_ls(struct radeon_cmdbuf *cs, radeon_emit(cs, S_00B524_MEM_BASE(va >> 40)); rsrc2 |= S_00B52C_LDS_SIZE(tess->lds_size); - if (pipeline->device->physical_device->rad_info.chip_class == CIK && + if (pipeline->device->physical_device->rad_info.chip_class == GFX7 && pipeline->device->physical_device->rad_info.family != CHIP_HAWAII) radeon_set_sh_reg(cs, R_00B52C_SPI_SHADER_PGM_RSRC2_LS, rsrc2); @@ -3118,7 +3118,7 @@ radv_pipeline_generate_tess_shaders(struct radeon_cmdbuf *ctx_cs, radeon_set_context_reg(ctx_cs, R_028B6C_VGT_TF_PARAM, tess->tf_param); - if (pipeline->device->physical_device->rad_info.chip_class >= CIK) + if (pipeline->device->physical_device->rad_info.chip_class >= GFX7) radeon_set_context_reg_idx(ctx_cs, R_028B58_VGT_LS_HS_CONFIG, 2, tess->ls_hs_config); else @@ -3490,7 +3490,7 @@ radv_pipeline_generate_pm4(struct radv_pipeline *pipeline, radeon_set_context_reg(ctx_cs, R_028B54_VGT_SHADER_STAGES_EN, radv_compute_vgt_shader_stages_en(pipeline)); - if (pipeline->device->physical_device->rad_info.chip_class >= CIK) { + if (pipeline->device->physical_device->rad_info.chip_class >= GFX7) { radeon_set_uconfig_reg_idx(cs, R_030908_VGT_PRIMITIVE_TYPE, 1, prim); } else { radeon_set_config_reg(cs, R_008958_VGT_PRIMITIVE_TYPE, prim); @@ -3522,12 +3522,12 @@ radv_compute_ia_multi_vgt_param_helpers(struct radv_pipeline *pipeline, /* GS requirement. */ ia_multi_vgt_param.partial_es_wave = false; - if (radv_pipeline_has_gs(pipeline) && device->physical_device->rad_info.chip_class <= VI) + if (radv_pipeline_has_gs(pipeline) && device->physical_device->rad_info.chip_class <= GFX8) if (SI_GS_PER_ES / ia_multi_vgt_param.primgroup_size >= pipeline->device->gs_table_depth - 3) ia_multi_vgt_param.partial_es_wave = true; ia_multi_vgt_param.wd_switch_on_eop = false; - if (device->physical_device->rad_info.chip_class >= CIK) { + if (device->physical_device->rad_info.chip_class >= GFX7) { /* WD_SWITCH_ON_EOP has no effect on GPUs with less than * 4 shader engines. Set 1 to pass the assertion below. * The other cases are hardware requirements. */ @@ -3567,7 +3567,7 @@ radv_compute_ia_multi_vgt_param_helpers(struct radv_pipeline *pipeline, /* Needed for 028B6C_DISTRIBUTION_MODE != 0 */ if (device->has_distributed_tess) { if (radv_pipeline_has_gs(pipeline)) { - if (device->physical_device->rad_info.chip_class <= VI) + if (device->physical_device->rad_info.chip_class <= GFX8) ia_multi_vgt_param.partial_es_wave = true; } else { ia_multi_vgt_param.partial_vs_wave = true; @@ -3609,7 +3609,7 @@ radv_compute_ia_multi_vgt_param_helpers(struct radv_pipeline *pipeline, ia_multi_vgt_param.base = S_028AA8_PRIMGROUP_SIZE(ia_multi_vgt_param.primgroup_size - 1) | /* The following field was moved to VGT_SHADER_STAGES_EN in GFX9. */ - S_028AA8_MAX_PRIMGRP_IN_WAVE(device->physical_device->rad_info.chip_class == VI ? 2 : 0) | + S_028AA8_MAX_PRIMGRP_IN_WAVE(device->physical_device->rad_info.chip_class == GFX8 ? 2 : 0) | S_030960_EN_INST_OPT_BASIC(device->physical_device->rad_info.chip_class >= GFX9) | S_030960_EN_INST_OPT_ADV(device->physical_device->rad_info.chip_class >= GFX9); @@ -3885,7 +3885,7 @@ radv_compute_generate_pm4(struct radv_pipeline *pipeline) compute_resource_limits = S_00B854_SIMD_DEST_CNTL(waves_per_threadgroup % 4 == 0); - if (device->physical_device->rad_info.chip_class >= CIK) { + if (device->physical_device->rad_info.chip_class >= GFX7) { unsigned num_cu_per_se = device->physical_device->rad_info.num_good_compute_units / device->physical_device->rad_info.max_se; diff --git a/src/amd/vulkan/radv_private.h b/src/amd/vulkan/radv_private.h index aa25e8f9805..a88c0f31ad3 100644 --- a/src/amd/vulkan/radv_private.h +++ b/src/amd/vulkan/radv_private.h @@ -703,7 +703,7 @@ struct radv_device { float sample_locations_8x[8][2]; float sample_locations_16x[16][2]; - /* CIK and later */ + /* GFX7 and later */ uint32_t gfx_init_size_dw; struct radeon_winsys_bo *gfx_init; diff --git a/src/amd/vulkan/radv_shader.c b/src/amd/vulkan/radv_shader.c index 17d6c5bc33a..dfa50155c06 100644 --- a/src/amd/vulkan/radv_shader.c +++ b/src/amd/vulkan/radv_shader.c @@ -773,7 +773,7 @@ generate_shader_stats(struct radv_device *device, struct _mesa_string_buffer *buf) { enum chip_class chip_class = device->physical_device->rad_info.chip_class; - unsigned lds_increment = chip_class >= CIK ? 512 : 256; + unsigned lds_increment = chip_class >= GFX7 ? 512 : 256; struct ac_shader_config *conf; unsigned max_simd_waves; unsigned lds_per_wave = 0; @@ -875,7 +875,7 @@ radv_GetShaderInfoAMD(VkDevice _device, if (!pInfo) { *pInfoSize = sizeof(VkShaderStatisticsInfoAMD); } else { - unsigned lds_multiplier = device->physical_device->rad_info.chip_class >= CIK ? 512 : 256; + unsigned lds_multiplier = device->physical_device->rad_info.chip_class >= GFX7 ? 512 : 256; struct ac_shader_config *conf = &variant->config; VkShaderStatisticsInfoAMD statistics = {}; diff --git a/src/amd/vulkan/si_cmd_buffer.c b/src/amd/vulkan/si_cmd_buffer.c index e73c13762e5..0f4bdadc3d2 100644 --- a/src/amd/vulkan/si_cmd_buffer.c +++ b/src/amd/vulkan/si_cmd_buffer.c @@ -25,7 +25,7 @@ * IN THE SOFTWARE. */ -/* command buffer handling for SI */ +/* command buffer handling for AMD GCN */ #include "radv_private.h" #include "radv_shader.h" @@ -51,8 +51,8 @@ si_write_harvested_raster_configs(struct radv_physical_device *physical_device, raster_config_se); for (se = 0; se < num_se; se++) { - /* GRBM_GFX_INDEX has a different offset on SI and CI+ */ - if (physical_device->rad_info.chip_class < CIK) + /* GRBM_GFX_INDEX has a different offset on GFX6 and GFX7+ */ + if (physical_device->rad_info.chip_class < GFX7) radeon_set_config_reg(cs, R_00802C_GRBM_GFX_INDEX, S_00802C_SE_INDEX(se) | S_00802C_SH_BROADCAST_WRITES(1) | @@ -64,8 +64,8 @@ si_write_harvested_raster_configs(struct radv_physical_device *physical_device, radeon_set_context_reg(cs, R_028350_PA_SC_RASTER_CONFIG, raster_config_se[se]); } - /* GRBM_GFX_INDEX has a different offset on SI and CI+ */ - if (physical_device->rad_info.chip_class < CIK) + /* GRBM_GFX_INDEX has a different offset on GFX6 and GFX7+ */ + if (physical_device->rad_info.chip_class < GFX7) radeon_set_config_reg(cs, R_00802C_GRBM_GFX_INDEX, S_00802C_SE_BROADCAST_WRITES(1) | S_00802C_SH_BROADCAST_WRITES(1) | @@ -75,7 +75,7 @@ si_write_harvested_raster_configs(struct radv_physical_device *physical_device, S_030800_SE_BROADCAST_WRITES(1) | S_030800_SH_BROADCAST_WRITES(1) | S_030800_INSTANCE_BROADCAST_WRITES(1)); - if (physical_device->rad_info.chip_class >= CIK) + if (physical_device->rad_info.chip_class >= GFX7) radeon_set_context_reg(cs, R_028354_PA_SC_RASTER_CONFIG_1, raster_config_1); } @@ -93,7 +93,7 @@ si_emit_compute(struct radv_physical_device *physical_device, radeon_emit(cs, S_00B858_SH0_CU_EN(0xffff) | S_00B858_SH1_CU_EN(0xffff)); radeon_emit(cs, S_00B85C_SH0_CU_EN(0xffff) | S_00B85C_SH1_CU_EN(0xffff)); - if (physical_device->rad_info.chip_class >= CIK) { + if (physical_device->rad_info.chip_class >= GFX7) { /* Also set R_00B858_COMPUTE_STATIC_THREAD_MGMT_SE2 / SE3 */ radeon_set_sh_reg_seq(cs, R_00B864_COMPUTE_STATIC_THREAD_MGMT_SE2, 2); @@ -108,7 +108,7 @@ si_emit_compute(struct radv_physical_device *physical_device, * kernel if we want to use something other than the default value, * which is now 0x22f. */ - if (physical_device->rad_info.chip_class <= SI) { + if (physical_device->rad_info.chip_class <= GFX6) { /* XXX: This should be: * (number of compute units) * 4 * (waves per simd) - 1 */ @@ -142,7 +142,7 @@ si_set_raster_config(struct radv_physical_device *physical_device, if (!rb_mask || util_bitcount(rb_mask) >= num_rb) { radeon_set_context_reg(cs, R_028350_PA_SC_RASTER_CONFIG, raster_config); - if (physical_device->rad_info.chip_class >= CIK) + if (physical_device->rad_info.chip_class >= GFX7) radeon_set_context_reg(cs, R_028354_PA_SC_RASTER_CONFIG_1, raster_config_1); } else { @@ -158,9 +158,9 @@ si_emit_graphics(struct radv_physical_device *physical_device, { int i; - /* Only SI can disable CLEAR_STATE for now. */ + /* Only GFX6 can disable CLEAR_STATE for now. */ assert(physical_device->has_clear_state || - physical_device->rad_info.chip_class == SI); + physical_device->rad_info.chip_class == GFX6); radeon_emit(cs, PKT3(PKT3_CONTEXT_CONTROL, 1, 0)); radeon_emit(cs, CONTEXT_CONTROL_LOAD_ENABLE(1)); @@ -171,7 +171,7 @@ si_emit_graphics(struct radv_physical_device *physical_device, radeon_emit(cs, 0); } - if (physical_device->rad_info.chip_class <= VI) + if (physical_device->rad_info.chip_class <= GFX8) si_set_raster_config(physical_device, cs); radeon_set_context_reg(cs, R_028A18_VGT_HOS_MAX_TESS_LEVEL, fui(64)); @@ -179,7 +179,7 @@ si_emit_graphics(struct radv_physical_device *physical_device, radeon_set_context_reg(cs, R_028A1C_VGT_HOS_MIN_TESS_LEVEL, fui(0)); /* FIXME calculate these values somehow ??? */ - if (physical_device->rad_info.chip_class <= VI) { + if (physical_device->rad_info.chip_class <= GFX8) { radeon_set_context_reg(cs, R_028A54_VGT_GS_PER_ES, SI_GS_PER_ES); radeon_set_context_reg(cs, R_028A58_VGT_ES_PER_GS, 0x40); } @@ -193,7 +193,7 @@ si_emit_graphics(struct radv_physical_device *physical_device, radeon_set_context_reg(cs, R_028AA0_VGT_INSTANCE_STEP_RATE_0, 1); if (!physical_device->has_clear_state) radeon_set_context_reg(cs, R_028AB8_VGT_VTX_CNT_EN, 0x0); - if (physical_device->rad_info.chip_class < CIK) + if (physical_device->rad_info.chip_class < GFX7) radeon_set_config_reg(cs, R_008A14_PA_CL_ENHANCE, S_008A14_NUM_CLIP_SEQ(3) | S_008A14_CLIP_VTX_REORDER_ENA(1)); @@ -206,7 +206,7 @@ si_emit_graphics(struct radv_physical_device *physical_device, /* CLEAR_STATE doesn't clear these correctly on certain generations. * I don't know why. Deduced by trial and error. */ - if (physical_device->rad_info.chip_class <= CIK) { + if (physical_device->rad_info.chip_class <= GFX7) { radeon_set_context_reg(cs, R_028B28_VGT_STRMOUT_DRAW_OPAQUE_OFFSET, 0); radeon_set_context_reg(cs, R_028204_PA_SC_WINDOW_SCISSOR_TL, S_028204_WINDOW_OFFSET_DISABLE(1)); @@ -229,7 +229,7 @@ si_emit_graphics(struct radv_physical_device *physical_device, if (!physical_device->has_clear_state) { radeon_set_context_reg(cs, R_02820C_PA_SC_CLIPRECT_RULE, 0xFFFF); radeon_set_context_reg(cs, R_028230_PA_SC_EDGERULE, 0xAAAAAAAA); - /* PA_SU_HARDWARE_SCREEN_OFFSET must be 0 due to hw bug on SI */ + /* PA_SU_HARDWARE_SCREEN_OFFSET must be 0 due to hw bug on GFX6 */ radeon_set_context_reg(cs, R_028234_PA_SU_HARDWARE_SCREEN_OFFSET, 0); radeon_set_context_reg(cs, R_028820_PA_CL_NANINF_CNTL, 0); radeon_set_context_reg(cs, R_028AC0_DB_SRESULTS_COMPARE_STATE0, 0x0); @@ -256,7 +256,7 @@ si_emit_graphics(struct radv_physical_device *physical_device, radeon_set_context_reg(cs, R_028408_VGT_INDX_OFFSET, 0); } - if (physical_device->rad_info.chip_class >= CIK) { + if (physical_device->rad_info.chip_class >= GFX7) { if (physical_device->rad_info.chip_class >= GFX9) { radeon_set_sh_reg(cs, R_00B41C_SPI_SHADER_PGM_RSRC3_HS, S_00B41C_CU_EN(0xffff) | S_00B41C_WAVE_LIMIT(0x3F)); @@ -303,7 +303,7 @@ si_emit_graphics(struct radv_physical_device *physical_device, S_00B01C_CU_EN(0xffff) | S_00B01C_WAVE_LIMIT(0x3F)); } - if (physical_device->rad_info.chip_class >= VI) { + if (physical_device->rad_info.chip_class >= GFX8) { uint32_t vgt_tess_distribution; vgt_tess_distribution = S_028B50_ACCUM_ISOLINE(32) | @@ -586,7 +586,7 @@ si_get_ia_multi_vgt_param(struct radv_cmd_buffer *cmd_buffer, ia_switch_on_eoi = cmd_buffer->state.pipeline->graphics.ia_multi_vgt_param.ia_switch_on_eoi; partial_vs_wave = cmd_buffer->state.pipeline->graphics.ia_multi_vgt_param.partial_vs_wave; - if (chip_class >= CIK) { + if (chip_class >= GFX7) { wd_switch_on_eop = cmd_buffer->state.pipeline->graphics.ia_multi_vgt_param.wd_switch_on_eop; /* Hawaii hangs if instancing is enabled and WD_SWITCH_ON_EOP is 0. @@ -601,19 +601,19 @@ si_get_ia_multi_vgt_param(struct radv_cmd_buffer *cmd_buffer, * Assume indirect draws always use small instances. * This is needed for good VS wave utilization. */ - if (chip_class <= VI && + if (chip_class <= GFX8 && info->max_se == 4 && multi_instances_smaller_than_primgroup) wd_switch_on_eop = true; - /* Required on CIK and later. */ + /* Required on GFX7 and later. */ if (info->max_se > 2 && !wd_switch_on_eop) ia_switch_on_eoi = true; - /* Required by Hawaii and, for some special cases, by VI. */ + /* Required by Hawaii and, for some special cases, by GFX8. */ if (ia_switch_on_eoi && (family == CHIP_HAWAII || - (chip_class == VI && + (chip_class == GFX8 && /* max primgroup in wave is always 2 - leave this for documentation */ (radv_pipeline_has_gs(cmd_buffer->state.pipeline) || max_primgroup_in_wave != 2)))) partial_vs_wave = true; @@ -633,7 +633,7 @@ si_get_ia_multi_vgt_param(struct radv_cmd_buffer *cmd_buffer, assert(wd_switch_on_eop || !ia_switch_on_eop); } /* If SWITCH_ON_EOI is set, PARTIAL_ES_WAVE must be set too. */ - if (chip_class <= VI && ia_switch_on_eoi) + if (chip_class <= GFX8 && ia_switch_on_eoi) partial_es_wave = true; if (radv_pipeline_has_gs(cmd_buffer->state.pipeline)) { @@ -658,7 +658,7 @@ si_get_ia_multi_vgt_param(struct radv_cmd_buffer *cmd_buffer, S_028AA8_SWITCH_ON_EOI(ia_switch_on_eoi) | S_028AA8_PARTIAL_VS_WAVE_ON(partial_vs_wave) | S_028AA8_PARTIAL_ES_WAVE_ON(partial_es_wave) | - S_028AA8_WD_SWITCH_ON_EOP(chip_class >= CIK ? wd_switch_on_eop : 0); + S_028AA8_WD_SWITCH_ON_EOP(chip_class >= GFX7 ? wd_switch_on_eop : 0); } @@ -704,8 +704,8 @@ void si_cs_emit_write_event_eop(struct radeon_cmdbuf *cs, if (!is_gfx8_mec) radeon_emit(cs, 0); /* unused */ } else { - if (chip_class == CIK || - chip_class == VI) { + if (chip_class == GFX7 || + chip_class == GFX8) { /* Two EOP events are required to make all engines go idle * (and optional cache flushes executed) before the timestamp * is written. @@ -788,7 +788,7 @@ si_cs_emit_cache_flush(struct radeon_cmdbuf *cs, if (flush_bits & RADV_CMD_FLAG_INV_SMEM_L1) cp_coher_cntl |= S_0085F0_SH_KCACHE_ACTION_ENA(1); - if (chip_class <= VI) { + if (chip_class <= GFX8) { if (flush_bits & RADV_CMD_FLAG_FLUSH_AND_INV_CB) { cp_coher_cntl |= S_0085F0_CB_ACTION_ENA(1) | S_0085F0_CB0_DEST_BASE_ENA(1) | @@ -801,7 +801,7 @@ si_cs_emit_cache_flush(struct radeon_cmdbuf *cs, S_0085F0_CB7_DEST_BASE_ENA(1); /* Necessary for DCC */ - if (chip_class >= VI) { + if (chip_class >= GFX8) { si_cs_emit_write_event_eop(cs, chip_class, is_mec, @@ -911,12 +911,12 @@ si_cs_emit_cache_flush(struct radeon_cmdbuf *cs, } if ((flush_bits & RADV_CMD_FLAG_INV_GLOBAL_L2) || - (chip_class <= CIK && (flush_bits & RADV_CMD_FLAG_WRITEBACK_GLOBAL_L2))) { + (chip_class <= GFX7 && (flush_bits & RADV_CMD_FLAG_WRITEBACK_GLOBAL_L2))) { si_emit_acquire_mem(cs, is_mec, chip_class >= GFX9, cp_coher_cntl | S_0085F0_TC_ACTION_ENA(1) | S_0085F0_TCL1_ACTION_ENA(1) | - S_0301F0_TC_WB_ACTION_ENA(chip_class >= VI)); + S_0301F0_TC_WB_ACTION_ENA(chip_class >= GFX8)); cp_coher_cntl = 0; } else { if(flush_bits & RADV_CMD_FLAG_WRITEBACK_GLOBAL_L2) { @@ -1099,7 +1099,7 @@ static void si_emit_cp_dma(struct radv_cmd_buffer *cmd_buffer, else if (flags & CP_DMA_USE_L2) header |= S_411_SRC_SEL(V_411_SRC_ADDR_TC_L2); - if (cmd_buffer->device->physical_device->rad_info.chip_class >= CIK) { + if (cmd_buffer->device->physical_device->rad_info.chip_class >= GFX7) { radeon_emit(cs, PKT3(PKT3_DMA_DATA, 5, cmd_buffer->state.predicating)); radeon_emit(cs, header); radeon_emit(cs, src_va); /* SRC_ADDR_LO [31:0] */ @@ -1281,7 +1281,7 @@ void si_cp_dma_clear_buffer(struct radv_cmd_buffer *cmd_buffer, uint64_t va, void si_cp_dma_wait_for_idle(struct radv_cmd_buffer *cmd_buffer) { - if (cmd_buffer->device->physical_device->rad_info.chip_class < CIK) + if (cmd_buffer->device->physical_device->rad_info.chip_class < GFX7) return; if (!cmd_buffer->state.dma_is_busy) diff --git a/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c b/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c index 70f81119c02..0c521917027 100644 --- a/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c +++ b/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c @@ -1037,7 +1037,7 @@ static int radv_amdgpu_winsys_cs_submit_sysmem(struct radeon_winsys_ctx *_ctx, uint32_t pad_word = 0xffff1000U; bool emit_signal_sem = sem_info->cs_emit_signal; - if (radv_amdgpu_winsys(ws)->info.chip_class == SI) + if (radv_amdgpu_winsys(ws)->info.chip_class == GFX6) pad_word = 0x80000000; assert(cs_count); diff --git a/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_winsys.c b/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_winsys.c index 35a585a5693..649a7698069 100644 --- a/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_winsys.c +++ b/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_winsys.c @@ -58,7 +58,7 @@ do_winsys_init(struct radv_amdgpu_winsys *ws, int fd) ws->info.num_sdma_rings = MIN2(ws->info.num_sdma_rings, MAX_RINGS_PER_TYPE); ws->info.num_compute_rings = MIN2(ws->info.num_compute_rings, MAX_RINGS_PER_TYPE); - ws->use_ib_bos = ws->info.chip_class >= CIK; + ws->use_ib_bos = ws->info.chip_class >= GFX7; return true; } |