diff options
author | Marek Olšák <[email protected]> | 2019-05-14 22:16:20 -0400 |
---|---|---|
committer | Marek Olšák <[email protected]> | 2019-05-15 20:54:10 -0400 |
commit | ccfcb9d818b40564001b3cf2516367526de26c1d (patch) | |
tree | 635e075d82a6793001a8982866684e36be61d4d8 /src/gallium/drivers | |
parent | e5cc363f43ba3e4b0800dc1e4fae1395f65a1275 (diff) |
ac: rename SI-CIK-VI to GFX6-GFX7-GFX8
Acked-by: Dave Airlie <[email protected]>
We already use GFX9 and I don't want us to have confusing naming
in the driver. GFXn naming is better from the driver perspective,
because it's the real version of the gfx portion of the hw. Also,
CIK means Bonaire-Kaveri-Kabini, it doesn't mean CI.
It shouldn't confuse our SDMA, UVD, VCE etc. code much. Those have
nothing to do with GFXn and they have their own version numbers.
Diffstat (limited to 'src/gallium/drivers')
30 files changed, 285 insertions, 285 deletions
diff --git a/src/gallium/drivers/r600/r600_texture.c b/src/gallium/drivers/r600/r600_texture.c index 27565e0aa0c..497da0c3dfa 100644 --- a/src/gallium/drivers/r600/r600_texture.c +++ b/src/gallium/drivers/r600/r600_texture.c @@ -366,7 +366,7 @@ static void r600_reallocate_texture_inplace(struct r600_common_context *rctx, templ.bind |= new_bind_flag; /* r600g doesn't react to dirty_tex_descriptor_counter */ - if (rctx->chip_class < SI) + if (rctx->chip_class < GFX6) return; if (rtex->resource.b.is_shared) @@ -1264,7 +1264,7 @@ static bool r600_can_invalidate_texture(struct r600_common_screen *rscreen, const struct pipe_box *box) { /* r600g doesn't react to dirty_tex_descriptor_counter */ - return rscreen->chip_class >= SI && + return rscreen->chip_class >= GFX6 && !rtex->resource.b.is_shared && !(transfer_usage & PIPE_TRANSFER_READ) && rtex->resource.b.b.last_level == 0 && diff --git a/src/gallium/drivers/r600/r600d_common.h b/src/gallium/drivers/r600/r600d_common.h index b06f90f8edd..979f26bc7da 100644 --- a/src/gallium/drivers/r600/r600d_common.h +++ b/src/gallium/drivers/r600/r600d_common.h @@ -85,7 +85,7 @@ #define SURFACE_BASE_UPDATE_COLOR_NUM(x) (((1 << x) - 1) << 1) #define SURFACE_BASE_UPDATE_STRMOUT(x) (0x200 << (x)) #define PKT3_SET_SH_REG 0x76 /* SI and later */ -#define PKT3_SET_UCONFIG_REG 0x79 /* CIK and later */ +#define PKT3_SET_UCONFIG_REG 0x79 /* GFX7 and later */ #define EVENT_TYPE_SAMPLE_STREAMOUTSTATS1 0x1 /* EG and later */ #define EVENT_TYPE_SAMPLE_STREAMOUTSTATS2 0x2 /* EG and later */ diff --git a/src/gallium/drivers/radeonsi/cik_sdma.c b/src/gallium/drivers/radeonsi/cik_sdma.c index da9b25a442d..2728541dd29 100644 --- a/src/gallium/drivers/radeonsi/cik_sdma.c +++ b/src/gallium/drivers/radeonsi/cik_sdma.c @@ -180,12 +180,12 @@ static bool cik_sdma_copy_texture(struct si_context *sctx, copy_width <= (1 << 14) && copy_height <= (1 << 14) && copy_depth <= (1 << 11) && - /* HW limitation - CIK: */ - (sctx->chip_class != CIK || + /* HW limitation - GFX7: */ + (sctx->chip_class != GFX7 || (copy_width < (1 << 14) && copy_height < (1 << 14) && copy_depth < (1 << 11))) && - /* HW limitation - some CIK parts: */ + /* HW limitation - some GFX7 parts: */ ((sctx->family != CHIP_BONAIRE && sctx->family != CHIP_KAVERI) || (srcx + copy_width != (1 << 14) && @@ -207,7 +207,7 @@ static bool cik_sdma_copy_texture(struct si_context *sctx, radeon_emit(cs, dstx | (dsty << 16)); radeon_emit(cs, dstz | ((dst_pitch - 1) << 16)); radeon_emit(cs, dst_slice_pitch - 1); - if (sctx->chip_class == CIK) { + if (sctx->chip_class == GFX7) { radeon_emit(cs, copy_width | (copy_height << 16)); radeon_emit(cs, copy_depth); } else { @@ -264,7 +264,7 @@ static bool cik_sdma_copy_texture(struct si_context *sctx, bpp == 16) return false; - if (sctx->chip_class == CIK && + if (sctx->chip_class == GFX7 && (copy_width_aligned == (1 << 14) || copy_height == (1 << 14) || copy_depth == (1 << 11))) @@ -371,7 +371,7 @@ static bool cik_sdma_copy_texture(struct si_context *sctx, radeon_emit(cs, linear_x | (linear_y << 16)); radeon_emit(cs, linear_z | ((linear_pitch - 1) << 16)); radeon_emit(cs, linear_slice_pitch - 1); - if (sctx->chip_class == CIK) { + if (sctx->chip_class == GFX7) { radeon_emit(cs, copy_width_aligned | (copy_height << 16)); radeon_emit(cs, copy_depth); } else { @@ -394,9 +394,9 @@ static bool cik_sdma_copy_texture(struct si_context *sctx, dsty % 8 == 0 && srcx % 8 == 0 && srcy % 8 == 0 && - /* this can either be equal, or display->rotated (VI+ only) */ + /* this can either be equal, or display->rotated (GFX8+ only) */ (src_micro_mode == dst_micro_mode || - (sctx->chip_class >= VI && + (sctx->chip_class >= GFX8 && src_micro_mode == V_009910_ADDR_SURF_DISPLAY_MICRO_TILING && dst_micro_mode == V_009910_ADDR_SURF_ROTATED_MICRO_TILING))) { assert(src_pitch % 8 == 0); @@ -434,12 +434,12 @@ static bool cik_sdma_copy_texture(struct si_context *sctx, copy_depth <= (1 << 11) && copy_width_aligned % 8 == 0 && copy_height_aligned % 8 == 0 && - /* HW limitation - CIK: */ - (sctx->chip_class != CIK || + /* HW limitation - GFX7: */ + (sctx->chip_class != GFX7 || (copy_width_aligned < (1 << 14) && copy_height_aligned < (1 << 14) && copy_depth < (1 << 11))) && - /* HW limitation - some CIK parts: */ + /* HW limitation - some GFX7 parts: */ ((sctx->family != CHIP_BONAIRE && sctx->family != CHIP_KAVERI && sctx->family != CHIP_KABINI && @@ -465,7 +465,7 @@ static bool cik_sdma_copy_texture(struct si_context *sctx, radeon_emit(cs, dstz | (dst_pitch_tile_max << 16)); radeon_emit(cs, dst_slice_tile_max); radeon_emit(cs, encode_tile_info(sctx, sdst, dst_level, false)); - if (sctx->chip_class == CIK) { + if (sctx->chip_class == GFX7) { radeon_emit(cs, copy_width_aligned | (copy_height_aligned << 16)); radeon_emit(cs, copy_depth); @@ -502,7 +502,7 @@ static void cik_sdma_copy(struct pipe_context *ctx, return; } - if ((sctx->chip_class == CIK || sctx->chip_class == VI) && + if ((sctx->chip_class == GFX7 || sctx->chip_class == GFX8) && cik_sdma_copy_texture(sctx, dst, dst_level, dstx, dsty, dstz, src, src_level, src_box)) return; diff --git a/src/gallium/drivers/radeonsi/si_blit.c b/src/gallium/drivers/radeonsi/si_blit.c index 9d3d7d3d27a..5806342cca9 100644 --- a/src/gallium/drivers/radeonsi/si_blit.c +++ b/src/gallium/drivers/radeonsi/si_blit.c @@ -1151,7 +1151,7 @@ static bool do_hardware_msaa_resolve(struct pipe_context *ctx, goto resolve_to_temp; /* This can happen with mipmapping. */ - if (sctx->chip_class == VI && + if (sctx->chip_class == GFX8 && !dst->surface.u.legacy.level[info->dst.level].dcc_fast_clear_size) goto resolve_to_temp; diff --git a/src/gallium/drivers/radeonsi/si_clear.c b/src/gallium/drivers/radeonsi/si_clear.c index d294f236914..d0094031a95 100644 --- a/src/gallium/drivers/radeonsi/si_clear.c +++ b/src/gallium/drivers/radeonsi/si_clear.c @@ -313,7 +313,7 @@ static void si_set_optimal_micro_tile_mode(struct si_screen *sscreen, assert(!"unexpected micro mode"); return; } - } else if (sscreen->info.chip_class >= CIK) { + } else if (sscreen->info.chip_class >= GFX7) { /* These magic numbers were copied from addrlib. It doesn't use * any definitions for them either. They are all 2D_TILED_THIN1 * modes with different bpp and micro tile mode. @@ -332,7 +332,7 @@ static void si_set_optimal_micro_tile_mode(struct si_screen *sscreen, assert(!"unexpected micro mode"); return; } - } else { /* SI */ + } else { /* GFX6 */ switch (tex->last_msaa_resolve_target_micro_mode) { case RADEON_MICRO_MODE_DISPLAY: switch (tex->surface.bpe) { @@ -434,7 +434,7 @@ static void si_do_fast_color_clear(struct si_context *sctx, !(tex->buffer.external_usage & PIPE_HANDLE_USAGE_EXPLICIT_FLUSH)) continue; - if (sctx->chip_class <= VI && + if (sctx->chip_class <= GFX8 && tex->surface.u.legacy.level[0].mode == RADEON_SURF_MODE_1D && !sctx->screen->info.htile_cmask_support_1d_tiling) continue; @@ -474,7 +474,7 @@ static void si_do_fast_color_clear(struct si_context *sctx, continue; /* This can happen with mipmapping or MSAA. */ - if (sctx->chip_class == VI && + if (sctx->chip_class == GFX8 && !tex->surface.u.legacy.level[level].dcc_fast_clear_size) continue; diff --git a/src/gallium/drivers/radeonsi/si_compute.c b/src/gallium/drivers/radeonsi/si_compute.c index f1a433b72df..46a0ba76ed5 100644 --- a/src/gallium/drivers/radeonsi/si_compute.c +++ b/src/gallium/drivers/radeonsi/si_compute.c @@ -327,7 +327,7 @@ static void si_initialize_compute(struct si_context *sctx) radeon_emit(cs, S_00B858_SH0_CU_EN(0xffff) | S_00B858_SH1_CU_EN(0xffff)); radeon_emit(cs, S_00B85C_SH0_CU_EN(0xffff) | S_00B85C_SH1_CU_EN(0xffff)); - if (sctx->chip_class >= CIK) { + if (sctx->chip_class >= GFX7) { /* Also set R_00B858_COMPUTE_STATIC_THREAD_MGMT_SE2 / SE3 */ radeon_set_sh_reg_seq(cs, R_00B864_COMPUTE_STATIC_THREAD_MGMT_SE2, 2); @@ -342,7 +342,7 @@ static void si_initialize_compute(struct si_context *sctx) * kernel if we want to use something other than the default value, * which is now 0x22f. */ - if (sctx->chip_class <= SI) { + if (sctx->chip_class <= GFX6) { /* XXX: This should be: * (number of compute units) * 4 * (waves per simd) - 1 */ @@ -353,7 +353,7 @@ static void si_initialize_compute(struct si_context *sctx) /* Set the pointer to border colors. */ bc_va = sctx->border_color_buffer->gpu_address; - if (sctx->chip_class >= CIK) { + if (sctx->chip_class >= GFX7) { radeon_set_uconfig_reg_seq(cs, R_030E00_TA_CS_BC_BASE_ADDR, 2); radeon_emit(cs, bc_va >> 8); /* R_030E00_TA_CS_BC_BASE_ADDR */ radeon_emit(cs, S_030E04_ADDRESS(bc_va >> 40)); /* R_030E04_TA_CS_BC_BASE_ADDR_HI */ @@ -434,12 +434,12 @@ static bool si_switch_compute_shader(struct si_context *sctx, } lds_blocks = config->lds_size; - /* XXX: We are over allocating LDS. For SI, the shader reports + /* XXX: We are over allocating LDS. For GFX6, the shader reports * LDS in blocks of 256 bytes, so if there are 4 bytes lds * allocated in the shader and 4 bytes allocated by the state * tracker, then we will set LDS_SIZE to 512 bytes rather than 256. */ - if (sctx->chip_class <= SI) { + if (sctx->chip_class <= GFX6) { lds_blocks += align(program->local_size, 256) >> 8; } else { lds_blocks += align(program->local_size, 512) >> 9; @@ -474,7 +474,7 @@ static bool si_switch_compute_shader(struct si_context *sctx, * command. However, that would add more complexity and we're likely * to get a shader state change in that case anyway. */ - if (sctx->chip_class >= CIK) { + if (sctx->chip_class >= GFX7) { cik_prefetch_TC_L2_async(sctx, &program->shader.bo->b.b, 0, program->shader.bo->b.b.width0); } @@ -539,7 +539,7 @@ static void setup_scratch_rsrc_user_sgprs(struct si_context *sctx, } else { scratch_dword3 |= S_008F0C_ELEMENT_SIZE(max_private_element_size); - if (sctx->chip_class < VI) { + if (sctx->chip_class < GFX8) { /* BUF_DATA_FORMAT is ignored, but it cannot be * BUF_DATA_FORMAT_INVALID. */ scratch_dword3 |= @@ -764,7 +764,7 @@ static void si_emit_dispatch_packets(struct si_context *sctx, unsigned compute_resource_limits = S_00B854_SIMD_DEST_CNTL(waves_per_threadgroup % 4 == 0); - if (sctx->chip_class >= CIK) { + if (sctx->chip_class >= GFX7) { unsigned num_cu_per_se = sscreen->info.num_good_compute_units / sscreen->info.max_se; @@ -777,7 +777,7 @@ static void si_emit_dispatch_packets(struct si_context *sctx, compute_resource_limits |= S_00B854_WAVES_PER_SH(sctx->cs_max_waves_per_sh); } else { - /* SI */ + /* GFX6 */ if (sctx->cs_max_waves_per_sh) { unsigned limit_div16 = DIV_ROUND_UP(sctx->cs_max_waves_per_sh, 16); compute_resource_limits |= S_00B854_WAVES_PER_SH_SI(limit_div16); @@ -792,7 +792,7 @@ static void si_emit_dispatch_packets(struct si_context *sctx, S_00B800_FORCE_START_AT_000(1) | /* If the KMD allows it (there is a KMD hw register for it), * allow launching waves out-of-order. (same as Vulkan) */ - S_00B800_ORDER_MODE(sctx->chip_class >= CIK); + S_00B800_ORDER_MODE(sctx->chip_class >= GFX7); const uint *last_block = info->last_block; bool partial_block_en = last_block[0] || last_block[1] || last_block[2]; @@ -861,10 +861,10 @@ static void si_launch_grid( * compute isn't used, i.e. only one compute job can run at a time. * If async compute is possible, the threadgroup size must be limited * to 256 threads on all queues to avoid the bug. - * Only SI and certain CIK chips are affected. + * Only GFX6 and certain GFX7 chips are affected. */ bool cs_regalloc_hang = - (sctx->chip_class == SI || + (sctx->chip_class == GFX6 || sctx->family == CHIP_BONAIRE || sctx->family == CHIP_KABINI) && info->block[0] * info->block[1] * info->block[2] > 256; @@ -894,7 +894,7 @@ static void si_launch_grid( si_context_add_resource_size(sctx, info->indirect); /* Indirect buffers use TC L2 on GFX9, but not older hw. */ - if (sctx->chip_class <= VI && + if (sctx->chip_class <= GFX8 && si_resource(info->indirect)->TC_L2_dirty) { sctx->flags |= SI_CONTEXT_WRITEBACK_GLOBAL_L2; si_resource(info->indirect)->TC_L2_dirty = false; diff --git a/src/gallium/drivers/radeonsi/si_compute_blit.c b/src/gallium/drivers/radeonsi/si_compute_blit.c index fb0d8d2f1b6..1cfdc9b62c6 100644 --- a/src/gallium/drivers/radeonsi/si_compute_blit.c +++ b/src/gallium/drivers/radeonsi/si_compute_blit.c @@ -36,7 +36,7 @@ static enum si_cache_policy get_cache_policy(struct si_context *sctx, { if ((sctx->chip_class >= GFX9 && (coher == SI_COHERENCY_CB_META || coher == SI_COHERENCY_CP)) || - (sctx->chip_class >= CIK && coher == SI_COHERENCY_SHADER)) + (sctx->chip_class >= GFX7 && coher == SI_COHERENCY_SHADER)) return size <= 256 * 1024 ? L2_LRU : L2_STREAM; return L2_BYPASS; @@ -254,7 +254,7 @@ void si_clear_buffer(struct si_context *sctx, struct pipe_resource *dst, (!force_cpdma && clear_value_size == 4 && offset % 4 == 0 && - (size > 32*1024 || sctx->chip_class <= VI))) { + (size > 32*1024 || sctx->chip_class <= GFX8))) { si_compute_do_clear_or_copy(sctx, dst, offset, NULL, 0, aligned_size, clear_value, clear_value_size, coher); @@ -418,7 +418,7 @@ void si_compute_copy_image(struct si_context *sctx, ctx->launch_grid(ctx, &info); sctx->flags |= SI_CONTEXT_CS_PARTIAL_FLUSH | - (sctx->chip_class <= VI ? SI_CONTEXT_WRITEBACK_GLOBAL_L2 : 0) | + (sctx->chip_class <= GFX8 ? SI_CONTEXT_WRITEBACK_GLOBAL_L2 : 0) | si_get_flush_flags(sctx, SI_COHERENCY_SHADER, L2_STREAM); ctx->bind_compute_state(ctx, saved_cs); ctx->set_shader_images(ctx, PIPE_SHADER_COMPUTE, 0, 2, saved_image); @@ -597,7 +597,7 @@ void si_compute_clear_render_target(struct pipe_context *ctx, ctx->launch_grid(ctx, &info); sctx->flags |= SI_CONTEXT_CS_PARTIAL_FLUSH | - (sctx->chip_class <= VI ? SI_CONTEXT_WRITEBACK_GLOBAL_L2 : 0) | + (sctx->chip_class <= GFX8 ? SI_CONTEXT_WRITEBACK_GLOBAL_L2 : 0) | si_get_flush_flags(sctx, SI_COHERENCY_SHADER, L2_STREAM); ctx->bind_compute_state(ctx, saved_cs); ctx->set_shader_images(ctx, PIPE_SHADER_COMPUTE, 0, 1, &saved_image); diff --git a/src/gallium/drivers/radeonsi/si_cp_dma.c b/src/gallium/drivers/radeonsi/si_cp_dma.c index 404117d1813..f5c54ca0d52 100644 --- a/src/gallium/drivers/radeonsi/si_cp_dma.c +++ b/src/gallium/drivers/radeonsi/si_cp_dma.c @@ -61,7 +61,7 @@ static void si_emit_cp_dma(struct si_context *sctx, struct radeon_cmdbuf *cs, uint32_t header = 0, command = 0; assert(size <= cp_dma_max_byte_count(sctx)); - assert(sctx->chip_class != SI || cache_policy == L2_BYPASS); + assert(sctx->chip_class != GFX6 || cache_policy == L2_BYPASS); if (sctx->chip_class >= GFX9) command |= S_414_BYTE_COUNT_GFX9(size); @@ -90,7 +90,7 @@ static void si_emit_cp_dma(struct si_context *sctx, struct radeon_cmdbuf *cs, /* GDS increments the address, not CP. */ command |= S_414_DAS(V_414_REGISTER) | S_414_DAIC(V_414_NO_INCREMENT); - } else if (sctx->chip_class >= CIK && cache_policy != L2_BYPASS) { + } else if (sctx->chip_class >= GFX7 && cache_policy != L2_BYPASS) { header |= S_411_DST_SEL(V_411_DST_ADDR_TC_L2) | S_500_DST_CACHE_POLICY(cache_policy == L2_STREAM); } @@ -102,12 +102,12 @@ static void si_emit_cp_dma(struct si_context *sctx, struct radeon_cmdbuf *cs, /* Both of these are required for GDS. It does increment the address. */ command |= S_414_SAS(V_414_REGISTER) | S_414_SAIC(V_414_NO_INCREMENT); - } else if (sctx->chip_class >= CIK && cache_policy != L2_BYPASS) { + } else if (sctx->chip_class >= GFX7 && cache_policy != L2_BYPASS) { header |= S_411_SRC_SEL(V_411_SRC_ADDR_TC_L2) | S_500_SRC_CACHE_POLICY(cache_policy == L2_STREAM); } - if (sctx->chip_class >= CIK) { + if (sctx->chip_class >= GFX7) { radeon_emit(cs, PKT3(PKT3_DMA_DATA, 5, 0)); radeon_emit(cs, header); radeon_emit(cs, src_va); /* SRC_ADDR_LO [31:0] */ @@ -412,7 +412,7 @@ void si_cp_dma_copy_buffer(struct si_context *sctx, void cik_prefetch_TC_L2_async(struct si_context *sctx, struct pipe_resource *buf, uint64_t offset, unsigned size) { - assert(sctx->chip_class >= CIK); + assert(sctx->chip_class >= GFX7); si_cp_dma_copy_buffer(sctx, buf, buf, offset, offset, size, SI_CPDMA_SKIP_ALL, SI_COHERENCY_SHADER, L2_LRU); @@ -491,7 +491,7 @@ void cik_emit_prefetch_L2(struct si_context *sctx, bool vertex_stage_only) } } } else { - /* SI-CI-VI */ + /* GFX6-GFX8 */ /* Choose the right spot for the VBO prefetch. */ if (sctx->tes_shader.cso) { if (mask & SI_PREFETCH_LS) @@ -591,7 +591,7 @@ void si_cp_write_data(struct si_context *sctx, struct si_resource *buf, assert(offset % 4 == 0); assert(size % 4 == 0); - if (sctx->chip_class == SI && dst_sel == V_370_MEM) + if (sctx->chip_class == GFX6 && dst_sel == V_370_MEM) dst_sel = V_370_MEM_GRBM; radeon_add_to_buffer_list(sctx, cs, buf, diff --git a/src/gallium/drivers/radeonsi/si_debug.c b/src/gallium/drivers/radeonsi/si_debug.c index 9a4494a98fe..bd85fc49387 100644 --- a/src/gallium/drivers/radeonsi/si_debug.c +++ b/src/gallium/drivers/radeonsi/si_debug.c @@ -314,7 +314,7 @@ static void si_dump_debug_registers(struct si_context *sctx, FILE *f) si_dump_mmapped_reg(sctx, f, R_00803C_GRBM_STATUS_SE3); si_dump_mmapped_reg(sctx, f, R_00D034_SDMA0_STATUS_REG); si_dump_mmapped_reg(sctx, f, R_00D834_SDMA1_STATUS_REG); - if (sctx->chip_class <= VI) { + if (sctx->chip_class <= GFX8) { si_dump_mmapped_reg(sctx, f, R_000E50_SRBM_STATUS); si_dump_mmapped_reg(sctx, f, R_000E4C_SRBM_STATUS2); si_dump_mmapped_reg(sctx, f, R_000E54_SRBM_STATUS3); diff --git a/src/gallium/drivers/radeonsi/si_descriptors.c b/src/gallium/drivers/radeonsi/si_descriptors.c index f795c33cf26..5b812149754 100644 --- a/src/gallium/drivers/radeonsi/si_descriptors.c +++ b/src/gallium/drivers/radeonsi/si_descriptors.c @@ -347,7 +347,7 @@ void si_set_mutable_tex_desc_fields(struct si_screen *sscreen, base_level_info->mode == RADEON_SURF_MODE_2D) state[0] |= tex->surface.tile_swizzle; - if (sscreen->info.chip_class >= VI) { + if (sscreen->info.chip_class >= GFX8) { state[6] &= C_008F28_COMPRESSION_EN; state[7] = 0; @@ -355,7 +355,7 @@ void si_set_mutable_tex_desc_fields(struct si_screen *sscreen, meta_va = (!tex->dcc_separate_buffer ? tex->buffer.gpu_address : 0) + tex->dcc_offset; - if (sscreen->info.chip_class == VI) { + if (sscreen->info.chip_class == GFX8) { meta_va += base_level_info->dcc_offset; assert(base_level_info->mode == RADEON_SURF_MODE_2D); } @@ -399,7 +399,7 @@ void si_set_mutable_tex_desc_fields(struct si_screen *sscreen, S_008F24_META_RB_ALIGNED(meta.rb_aligned); } } else { - /* SI-CI-VI */ + /* GFX6-GFX8 */ unsigned pitch = base_level_info->nblk_x * block_width; unsigned index = si_tile_mode_index(tex, base_level, is_stencil); @@ -1141,7 +1141,7 @@ bool si_upload_vertex_buffer_descriptors(struct si_context *sctx) uint64_t va = buf->gpu_address + offset; int64_t num_records = (int64_t)buf->b.b.width0 - offset; - if (sctx->chip_class != VI && vb->stride) { + if (sctx->chip_class != GFX8 && vb->stride) { /* Round up by rounding down and adding 1 */ num_records = (num_records - velems->format_size[i]) / vb->stride + 1; @@ -1210,9 +1210,9 @@ static void si_set_constant_buffer(struct si_context *sctx, assert(slot < descs->num_elements); pipe_resource_reference(&buffers->buffers[slot], NULL); - /* CIK cannot unbind a constant buffer (S_BUFFER_LOAD is buggy + /* GFX7 cannot unbind a constant buffer (S_BUFFER_LOAD is buggy * with a NULL buffer). We need to use a dummy buffer instead. */ - if (sctx->chip_class == CIK && + if (sctx->chip_class == GFX7 && (!input || (!input->buffer && !input->user_buffer))) input = &sctx->null_const_buf; @@ -1467,7 +1467,7 @@ void si_set_ring_buffer(struct si_context *sctx, uint slot, break; } - if (sctx->chip_class >= VI && stride) + if (sctx->chip_class >= GFX8 && stride) num_records *= stride; /* Set the descriptor. */ diff --git a/src/gallium/drivers/radeonsi/si_dma_cs.c b/src/gallium/drivers/radeonsi/si_dma_cs.c index bba1bd95826..8f2e15833b6 100644 --- a/src/gallium/drivers/radeonsi/si_dma_cs.c +++ b/src/gallium/drivers/radeonsi/si_dma_cs.c @@ -30,7 +30,7 @@ static void si_dma_emit_wait_idle(struct si_context *sctx) struct radeon_cmdbuf *cs = sctx->dma_cs; /* NOP waits for idle. */ - if (sctx->chip_class >= CIK) + if (sctx->chip_class >= GFX7) radeon_emit(cs, 0x00000000); /* NOP */ else radeon_emit(cs, 0xf0000000); /* NOP */ @@ -42,7 +42,7 @@ void si_dma_emit_timestamp(struct si_context *sctx, struct si_resource *dst, struct radeon_cmdbuf *cs = sctx->dma_cs; uint64_t va = dst->gpu_address + offset; - if (sctx->chip_class == SI) { + if (sctx->chip_class == GFX6) { unreachable("SI DMA doesn't support the timestamp packet."); return; } @@ -87,7 +87,7 @@ void si_sdma_clear_buffer(struct si_context *sctx, struct pipe_resource *dst, offset += sdst->gpu_address; - if (sctx->chip_class == SI) { + if (sctx->chip_class == GFX6) { /* the same maximum size as for copying */ ncopy = DIV_ROUND_UP(size, SI_DMA_COPY_MAX_DWORD_ALIGNED_SIZE); si_need_dma_space(sctx, ncopy * 4, sdst, NULL); @@ -105,7 +105,7 @@ void si_sdma_clear_buffer(struct si_context *sctx, struct pipe_resource *dst, return; } - /* The following code is for CI, VI, Vega/Raven, etc. */ + /* The following code is for Sea Islands and later. */ /* the same maximum size as for copying */ ncopy = DIV_ROUND_UP(size, CIK_SDMA_COPY_MAX_SIZE); si_need_dma_space(sctx, ncopy * 5, sdst, NULL); diff --git a/src/gallium/drivers/radeonsi/si_fence.c b/src/gallium/drivers/radeonsi/si_fence.c index ffda98d2834..b3212c1db35 100644 --- a/src/gallium/drivers/radeonsi/si_fence.c +++ b/src/gallium/drivers/radeonsi/si_fence.c @@ -115,8 +115,8 @@ void si_cp_release_mem(struct si_context *ctx, radeon_emit(cs, 0); /* immediate data hi */ radeon_emit(cs, 0); /* unused */ } else { - if (ctx->chip_class == CIK || - ctx->chip_class == VI) { + if (ctx->chip_class == GFX7 || + ctx->chip_class == GFX8) { struct si_resource *scratch = ctx->eop_bug_scratch; uint64_t va = scratch->gpu_address; @@ -153,8 +153,8 @@ unsigned si_cp_write_fence_dwords(struct si_screen *screen) { unsigned dwords = 6; - if (screen->info.chip_class == CIK || - screen->info.chip_class == VI) + if (screen->info.chip_class == GFX7 || + screen->info.chip_class == GFX8) dwords *= 2; return dwords; diff --git a/src/gallium/drivers/radeonsi/si_get.c b/src/gallium/drivers/radeonsi/si_get.c index d97aca1de23..71350661c2b 100644 --- a/src/gallium/drivers/radeonsi/si_get.c +++ b/src/gallium/drivers/radeonsi/si_get.c @@ -254,7 +254,7 @@ static int si_get_param(struct pipe_screen *pscreen, enum pipe_cap param) return 32; case PIPE_CAP_TEXTURE_BORDER_COLOR_QUIRK: - return sscreen->info.chip_class <= VI ? + return sscreen->info.chip_class <= GFX8 ? PIPE_QUIRK_TEXTURE_BORDER_COLOR_SWIZZLE_R600 : 0; /* Stream output. */ diff --git a/src/gallium/drivers/radeonsi/si_gfx_cs.c b/src/gallium/drivers/radeonsi/si_gfx_cs.c index d0d405c473f..c81718950a4 100644 --- a/src/gallium/drivers/radeonsi/si_gfx_cs.c +++ b/src/gallium/drivers/radeonsi/si_gfx_cs.c @@ -82,7 +82,7 @@ void si_flush_gfx_cs(struct si_context *ctx, unsigned flags, wait_flags |= SI_CONTEXT_PS_PARTIAL_FLUSH | SI_CONTEXT_CS_PARTIAL_FLUSH | SI_CONTEXT_INV_GLOBAL_L2; - } else if (ctx->chip_class == SI) { + } else if (ctx->chip_class == GFX6) { /* The kernel flushes L2 before shaders are finished. */ wait_flags |= SI_CONTEXT_PS_PARTIAL_FLUSH | SI_CONTEXT_CS_PARTIAL_FLUSH; @@ -147,7 +147,7 @@ void si_flush_gfx_cs(struct si_context *ctx, unsigned flags, /* Make sure CP DMA is idle at the end of IBs after L2 prefetches * because the kernel doesn't wait for it. */ - if (ctx->chip_class >= CIK) + if (ctx->chip_class >= GFX7) si_cp_dma_wait_for_idle(ctx); /* Wait for draw calls to finish if needed. */ @@ -407,7 +407,7 @@ void si_begin_new_gfx_cs(struct si_context *ctx) ctx->tracked_regs.reg_value[SI_TRACKED_SPI_SHADER_COL_FORMAT] = 0x00000000; ctx->tracked_regs.reg_value[SI_TRACKED_CB_SHADER_MASK] = 0xffffffff; ctx->tracked_regs.reg_value[SI_TRACKED_VGT_TF_PARAM] = 0x00000000; - ctx->tracked_regs.reg_value[SI_TRACKED_VGT_VERTEX_REUSE_BLOCK_CNTL] = 0x0000001e; /* From VI */ + ctx->tracked_regs.reg_value[SI_TRACKED_VGT_VERTEX_REUSE_BLOCK_CNTL] = 0x0000001e; /* From GFX8 */ /* Set all saved registers state to saved. */ ctx->tracked_regs.reg_saved = 0xffffffffffffffff; diff --git a/src/gallium/drivers/radeonsi/si_gpu_load.c b/src/gallium/drivers/radeonsi/si_gpu_load.c index 481438f37bb..7c2e43b3fdd 100644 --- a/src/gallium/drivers/radeonsi/si_gpu_load.c +++ b/src/gallium/drivers/radeonsi/si_gpu_load.c @@ -102,7 +102,7 @@ static void si_update_mmio_counters(struct si_screen *sscreen, UPDATE_COUNTER(gui, GUI_ACTIVE); gui_busy = GUI_ACTIVE(value); - if (sscreen->info.chip_class == CIK || sscreen->info.chip_class == VI) { + if (sscreen->info.chip_class == GFX7 || sscreen->info.chip_class == GFX8) { /* SRBM_STATUS2 */ sscreen->ws->read_registers(sscreen->ws, SRBM_STATUS2, 1, &value); @@ -110,7 +110,7 @@ static void si_update_mmio_counters(struct si_screen *sscreen, sdma_busy = SDMA_BUSY(value); } - if (sscreen->info.chip_class >= VI) { + if (sscreen->info.chip_class >= GFX8) { /* CP_STAT */ sscreen->ws->read_registers(sscreen->ws, CP_STAT, 1, &value); diff --git a/src/gallium/drivers/radeonsi/si_perfcounter.c b/src/gallium/drivers/radeonsi/si_perfcounter.c index c15c444cc40..322950557e3 100644 --- a/src/gallium/drivers/radeonsi/si_perfcounter.c +++ b/src/gallium/drivers/radeonsi/si_perfcounter.c @@ -1284,11 +1284,11 @@ void si_init_perfcounters(struct si_screen *screen) unsigned i; switch (screen->info.chip_class) { - case CIK: + case GFX7: blocks = groups_CIK; num_blocks = ARRAY_SIZE(groups_CIK); break; - case VI: + case GFX8: blocks = groups_VI; num_blocks = ARRAY_SIZE(groups_VI); break; @@ -1296,13 +1296,13 @@ void si_init_perfcounters(struct si_screen *screen) blocks = groups_gfx9; num_blocks = ARRAY_SIZE(groups_gfx9); break; - case SI: + case GFX6: default: return; /* not implemented */ } if (screen->info.max_sh_per_se != 1) { - /* This should not happen on non-SI chips. */ + /* This should not happen on non-GFX6 chips. */ fprintf(stderr, "si_init_perfcounters: max_sh_per_se = %d not " "supported (inaccurate performance counters)\n", screen->info.max_sh_per_se); diff --git a/src/gallium/drivers/radeonsi/si_pipe.c b/src/gallium/drivers/radeonsi/si_pipe.c index 4d36fd46a9b..d9dae8363f0 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.c +++ b/src/gallium/drivers/radeonsi/si_pipe.c @@ -115,7 +115,7 @@ static void si_init_compiler(struct si_screen *sscreen, /* Only create the less-optimizing version of the compiler on APUs * predating Ryzen (Raven). */ bool create_low_opt_compiler = !sscreen->info.has_dedicated_vram && - sscreen->info.chip_class <= VI; + sscreen->info.chip_class <= GFX8; enum ac_target_machine_options tm_options = (sscreen->debug_flags & DBG(SI_SCHED) ? AC_TM_SISCHED : 0) | @@ -394,7 +394,7 @@ static struct pipe_context *si_create_context(struct pipe_screen *screen, if (!sctx) return NULL; - sctx->has_graphics = sscreen->info.chip_class == SI || + sctx->has_graphics = sscreen->info.chip_class == GFX6 || !(flags & PIPE_CONTEXT_COMPUTE_ONLY); if (flags & PIPE_CONTEXT_DEBUG) @@ -419,8 +419,8 @@ static struct pipe_context *si_create_context(struct pipe_screen *screen, } - if (sctx->chip_class == CIK || - sctx->chip_class == VI || + if (sctx->chip_class == GFX7 || + sctx->chip_class == GFX8 || sctx->chip_class == GFX9) { sctx->eop_bug_scratch = si_resource( pipe_buffer_create(&sscreen->b, 0, PIPE_USAGE_DEFAULT, @@ -536,7 +536,7 @@ static struct pipe_context *si_create_context(struct pipe_screen *screen, } /* Initialize SDMA functions. */ - if (sctx->chip_class >= CIK) + if (sctx->chip_class >= GFX7) cik_init_sdma_functions(sctx); else si_init_dma_functions(sctx); @@ -563,9 +563,9 @@ static struct pipe_context *si_create_context(struct pipe_screen *screen, V_370_MEM, V_370_ME, &sctx->wait_mem_number); } - /* CIK cannot unbind a constant buffer (S_BUFFER_LOAD doesn't skip loads + /* GFX7 cannot unbind a constant buffer (S_BUFFER_LOAD doesn't skip loads * if NUM_RECORDS == 0). We need to use a dummy buffer instead. */ - if (sctx->chip_class == CIK) { + if (sctx->chip_class == GFX7) { sctx->null_const_buf.buffer = pipe_aligned_buffer_create(screen, SI_RESOURCE_FLAG_32BIT, @@ -638,7 +638,7 @@ static struct pipe_context *si_create_context(struct pipe_screen *screen, /* this must be last */ si_begin_new_gfx_cs(sctx); - if (sctx->chip_class == CIK) { + if (sctx->chip_class == GFX7) { /* Clear the NULL constant buffer, because loads should return zeros. * Note that this forces CP DMA to be used, because clover deadlocks * for some reason when the compute codepath is used. @@ -1017,11 +1017,11 @@ struct pipe_screen *radeonsi_screen_create(struct radeon_winsys *ws, si_init_perfcounters(sscreen); /* Determine tessellation ring info. */ - bool double_offchip_buffers = sscreen->info.chip_class >= CIK && + bool double_offchip_buffers = sscreen->info.chip_class >= GFX7 && sscreen->info.family != CHIP_CARRIZO && sscreen->info.family != CHIP_STONEY; /* This must be one less than the maximum number due to a hw limitation. - * Various hardware bugs in SI, CIK, and GFX9 need this. + * Various hardware bugs need this. */ unsigned max_offchip_buffers_per_se; @@ -1052,8 +1052,8 @@ struct pipe_screen *radeonsi_screen_create(struct radeon_winsys *ws, sscreen->tess_offchip_ring_size = max_offchip_buffers * sscreen->tess_offchip_block_dw_size * 4; - if (sscreen->info.chip_class >= CIK) { - if (sscreen->info.chip_class >= VI) + if (sscreen->info.chip_class >= GFX7) { + if (sscreen->info.chip_class >= GFX8) --max_offchip_buffers; sscreen->vgt_hs_offchip_param = S_03093C_OFFCHIP_BUFFERING(max_offchip_buffers) | @@ -1065,28 +1065,28 @@ struct pipe_screen *radeonsi_screen_create(struct radeon_winsys *ws, } /* The mere presense of CLEAR_STATE in the IB causes random GPU hangs - * on SI. Some CLEAR_STATE cause asic hang on radeon kernel, etc. - * SPI_VS_OUT_CONFIG. So only enable CI CLEAR_STATE on amdgpu kernel.*/ - sscreen->has_clear_state = sscreen->info.chip_class >= CIK && + * on GFX6. Some CLEAR_STATE cause asic hang on radeon kernel, etc. + * SPI_VS_OUT_CONFIG. So only enable GFX7 CLEAR_STATE on amdgpu kernel.*/ + sscreen->has_clear_state = sscreen->info.chip_class >= GFX7 && sscreen->info.drm_major == 3; sscreen->has_distributed_tess = - sscreen->info.chip_class >= VI && + sscreen->info.chip_class >= GFX8 && sscreen->info.max_se >= 2; sscreen->has_draw_indirect_multi = (sscreen->info.family >= CHIP_POLARIS10) || - (sscreen->info.chip_class == VI && + (sscreen->info.chip_class == GFX8 && sscreen->info.pfp_fw_version >= 121 && sscreen->info.me_fw_version >= 87) || - (sscreen->info.chip_class == CIK && + (sscreen->info.chip_class == GFX7 && sscreen->info.pfp_fw_version >= 211 && sscreen->info.me_fw_version >= 173) || - (sscreen->info.chip_class == SI && + (sscreen->info.chip_class == GFX6 && sscreen->info.pfp_fw_version >= 79 && sscreen->info.me_fw_version >= 142); - sscreen->has_out_of_order_rast = sscreen->info.chip_class >= VI && + sscreen->has_out_of_order_rast = sscreen->info.chip_class >= GFX8 && sscreen->info.max_se >= 2 && !(sscreen->debug_flags & DBG(NO_OUT_OF_ORDER)); sscreen->assume_no_z_fights = @@ -1137,7 +1137,7 @@ struct pipe_screen *radeonsi_screen_create(struct radeon_winsys *ws, * by the reality that LLVM 5.0 doesn't have working VGPR indexing * on GFX9. */ - sscreen->llvm_has_working_vgpr_indexing = sscreen->info.chip_class <= VI; + sscreen->llvm_has_working_vgpr_indexing = sscreen->info.chip_class <= GFX8; /* Some chips have RB+ registers, but don't support RB+. Those must * always disable it. @@ -1157,7 +1157,7 @@ struct pipe_screen *radeonsi_screen_create(struct radeon_winsys *ws, sscreen->dcc_msaa_allowed = !(sscreen->debug_flags & DBG(NO_DCC_MSAA)); - sscreen->cpdma_prefetch_writes_memory = sscreen->info.chip_class <= VI; + sscreen->cpdma_prefetch_writes_memory = sscreen->info.chip_class <= GFX8; (void) mtx_init(&sscreen->shader_parts_mutex, mtx_plain); sscreen->use_monolithic_shaders = @@ -1165,7 +1165,7 @@ struct pipe_screen *radeonsi_screen_create(struct radeon_winsys *ws, sscreen->barrier_flags.cp_to_L2 = SI_CONTEXT_INV_SMEM_L1 | SI_CONTEXT_INV_VMEM_L1; - if (sscreen->info.chip_class <= VI) { + if (sscreen->info.chip_class <= GFX8) { sscreen->barrier_flags.cp_to_L2 |= SI_CONTEXT_INV_GLOBAL_L2; sscreen->barrier_flags.L2_to_cp |= SI_CONTEXT_WRITEBACK_GLOBAL_L2; } diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h index 695827c9dd7..1c98f41b5f3 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.h +++ b/src/gallium/drivers/radeonsi/si_pipe.h @@ -72,7 +72,7 @@ /* Used by everything except CB/DB, can be bypassed (SLC=1). Other names: TC L2 */ #define SI_CONTEXT_INV_GLOBAL_L2 (1 << 6) /* Write dirty L2 lines back to memory (shader and CP DMA stores), but don't - * invalidate L2. SI-CIK can't do it, so they will do complete invalidation. */ + * invalidate L2. GFX6-GFX7 can't do it, so they will do complete invalidation. */ #define SI_CONTEXT_WRITEBACK_GLOBAL_L2 (1 << 7) /* Writeback & invalidate the L2 metadata cache. It can only be coupled with * a CB or DB flush. */ @@ -366,7 +366,7 @@ struct si_surface { unsigned cb_color_view; unsigned cb_color_attrib; unsigned cb_color_attrib2; /* GFX9 and later */ - unsigned cb_dcc_control; /* VI and later */ + unsigned cb_dcc_control; /* GFX8 and later */ unsigned spi_shader_col_format:8; /* no blending, no alpha-to-coverage. */ unsigned spi_shader_col_format_alpha:8; /* alpha-to-coverage */ unsigned spi_shader_col_format_blend:8; /* blending without alpha. */ @@ -923,7 +923,7 @@ struct si_context { bool bo_list_add_all_compute_resources; /* other shader resources */ - struct pipe_constant_buffer null_const_buf; /* used for set_constant_buffer(NULL) on CIK */ + struct pipe_constant_buffer null_const_buf; /* used for set_constant_buffer(NULL) on GFX7 */ struct pipe_resource *esgs_ring; struct pipe_resource *gsvs_ring; struct pipe_resource *tess_rings; @@ -1572,7 +1572,7 @@ si_make_CB_shader_coherent(struct si_context *sctx, unsigned num_samples, else if (shaders_read_metadata) sctx->flags |= SI_CONTEXT_INV_L2_METADATA; } else { - /* SI-CI-VI */ + /* GFX6-GFX8 */ sctx->flags |= SI_CONTEXT_INV_GLOBAL_L2; } } @@ -1594,7 +1594,7 @@ si_make_DB_shader_coherent(struct si_context *sctx, unsigned num_samples, else if (shaders_read_metadata) sctx->flags |= SI_CONTEXT_INV_L2_METADATA; } else { - /* SI-CI-VI */ + /* GFX6-GFX8 */ sctx->flags |= SI_CONTEXT_INV_GLOBAL_L2; } } diff --git a/src/gallium/drivers/radeonsi/si_pm4.c b/src/gallium/drivers/radeonsi/si_pm4.c index 22c4a5b6e6e..0b7d53e745d 100644 --- a/src/gallium/drivers/radeonsi/si_pm4.c +++ b/src/gallium/drivers/radeonsi/si_pm4.c @@ -161,8 +161,8 @@ void si_pm4_upload_indirect_buffer(struct si_context *sctx, struct pipe_screen *screen = sctx->b.screen; unsigned aligned_ndw = align(state->ndw, 8); - /* only supported on CIK and later */ - if (sctx->chip_class < CIK) + /* only supported on GFX7 and later */ + if (sctx->chip_class < GFX7) return; assert(state->ndw); diff --git a/src/gallium/drivers/radeonsi/si_query.c b/src/gallium/drivers/radeonsi/si_query.c index d98bea2eeb3..cb42ebb92ce 100644 --- a/src/gallium/drivers/radeonsi/si_query.c +++ b/src/gallium/drivers/radeonsi/si_query.c @@ -1019,7 +1019,7 @@ static void si_emit_query_predication(struct si_context *ctx) /* Use the value written by compute shader as a workaround. Note that * the wait flag does not apply in this predication mode. * - * The shader outputs the result value to L2. Workarounds only affect VI + * The shader outputs the result value to L2. Workarounds only affect GFX8 * and later, where the CP reads data from L2, so we don't need an * additional flush. */ @@ -1608,11 +1608,11 @@ static void si_render_condition(struct pipe_context *ctx, if (query) { bool needs_workaround = false; - /* There was a firmware regression in VI which causes successive + /* There was a firmware regression in GFX8 which causes successive * SET_PREDICATION packets to give the wrong answer for * non-inverted stream overflow predication. */ - if (((sctx->chip_class == VI && sctx->screen->info.pfp_fw_feature < 49) || + if (((sctx->chip_class == GFX8 && sctx->screen->info.pfp_fw_feature < 49) || (sctx->chip_class == GFX9 && sctx->screen->info.pfp_fw_feature < 38)) && !condition && (squery->b.type == PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE || @@ -1786,7 +1786,7 @@ static unsigned si_get_num_queries(struct si_screen *sscreen) { /* amdgpu */ if (sscreen->info.drm_major == 3) { - if (sscreen->info.chip_class >= VI) + if (sscreen->info.chip_class >= GFX8) return ARRAY_SIZE(si_driver_query_list); else return ARRAY_SIZE(si_driver_query_list) - 7; @@ -1794,7 +1794,7 @@ static unsigned si_get_num_queries(struct si_screen *sscreen) /* radeon */ if (sscreen->info.has_read_registers_query) { - if (sscreen->info.chip_class == CIK) + if (sscreen->info.chip_class == GFX7) return ARRAY_SIZE(si_driver_query_list) - 6; else return ARRAY_SIZE(si_driver_query_list) - 7; diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c index f6d882cf583..98c11e1c98d 100644 --- a/src/gallium/drivers/radeonsi/si_shader.c +++ b/src/gallium/drivers/radeonsi/si_shader.c @@ -105,7 +105,7 @@ static bool llvm_type_is_64bit(struct si_shader_context *ctx, static bool is_merged_shader(struct si_shader_context *ctx) { - if (ctx->screen->info.chip_class <= VI) + if (ctx->screen->info.chip_class <= GFX8) return false; return ctx->shader->key.as_ls || @@ -3082,7 +3082,7 @@ static void si_write_tess_factors(struct lp_build_tgsi_context *bld_base, /* Store the dynamic HS control word. */ offset = 0; - if (ctx->screen->info.chip_class <= VI) { + if (ctx->screen->info.chip_class <= GFX8) { ac_build_buffer_store_dword(&ctx->ac, buffer, LLVMConstInt(ctx->i32, 0x80000000, 0), 1, ctx->i32_0, tf_base, @@ -3719,7 +3719,7 @@ static void si_llvm_return_fs_outputs(struct ac_shader_abi *abi, addrs[4 * i + 0], ""); break; default: - fprintf(stderr, "Warning: SI unhandled fs output type:%d\n", + fprintf(stderr, "Warning: GFX6 unhandled fs output type:%d\n", semantic_name); } } @@ -4215,11 +4215,11 @@ static void si_llvm_emit_barrier(const struct lp_build_tgsi_action *action, { struct si_shader_context *ctx = si_shader_context(bld_base); - /* SI only (thanks to a hw bug workaround): + /* GFX6 only (thanks to a hw bug workaround): * The real barrier instruction isn’t needed, because an entire patch * always fits into a single wave. */ - if (ctx->screen->info.chip_class == SI && + if (ctx->screen->info.chip_class == GFX6 && ctx->type == PIPE_SHADER_TESS_CTRL) { ac_build_waitcnt(&ctx->ac, LGKM_CNT & VM_CNT); return; @@ -4327,7 +4327,7 @@ static unsigned si_get_max_workgroup_size(const struct si_shader *shader) case PIPE_SHADER_TESS_CTRL: /* Return this so that LLVM doesn't remove s_barrier * instructions on chips where we use s_barrier. */ - return shader->selector->screen->info.chip_class >= CIK ? 128 : 64; + return shader->selector->screen->info.chip_class >= GFX7 ? 128 : 64; case PIPE_SHADER_GEOMETRY: return shader->selector->screen->info.chip_class >= GFX9 ? 128 : 64; @@ -4536,7 +4536,7 @@ static void create_function(struct si_shader_context *ctx) declare_vs_input_vgprs(ctx, &fninfo, &num_prolog_vgprs); break; - case PIPE_SHADER_TESS_CTRL: /* SI-CI-VI */ + case PIPE_SHADER_TESS_CTRL: /* GFX6-GFX8 */ declare_global_desc_pointers(ctx, &fninfo); declare_per_stage_desc_pointers(ctx, &fninfo, true); ctx->param_tcs_offchip_layout = add_arg(&fninfo, ARG_SGPR, ctx->i32); @@ -4851,7 +4851,7 @@ static void preload_ring_buffers(struct si_shader_context *ctx) LLVMValueRef buf_ptr = LLVMGetParam(ctx->main_fn, ctx->param_rw_buffers); - if (ctx->screen->info.chip_class <= VI && + if (ctx->screen->info.chip_class <= GFX8 && (ctx->shader->key.as_es || ctx->type == PIPE_SHADER_GEOMETRY)) { unsigned ring = ctx->type == PIPE_SHADER_GEOMETRY ? SI_GS_RING_ESGS @@ -4897,7 +4897,7 @@ static void preload_ring_buffers(struct si_shader_context *ctx) stride = 4 * num_components * sel->gs_max_out_vertices; - /* Limit on the stride field for <= CIK. */ + /* Limit on the stride field for <= GFX7. */ assert(stride < (1 << 14)); num_records = 64; @@ -5222,7 +5222,7 @@ static void si_calculate_max_simd_waves(struct si_shader *shader) struct si_screen *sscreen = shader->selector->screen; struct si_shader_config *conf = &shader->config; unsigned num_inputs = shader->selector->info.num_inputs; - unsigned lds_increment = sscreen->info.chip_class >= CIK ? 512 : 256; + unsigned lds_increment = sscreen->info.chip_class >= GFX7 ? 512 : 256; unsigned lds_per_wave = 0; unsigned max_simd_waves; @@ -5452,7 +5452,7 @@ static int si_compile_llvm(struct si_screen *sscreen, * - Floating-point output modifiers would be ignored by the hw. * - Some opcodes don't support denormals, such as v_mad_f32. We would * have to stop using those. - * - SI & CI would be very slow. + * - GFX6 & GFX7 would be very slow. */ conf->float_mode |= V_00B028_FP_64_DENORMS; @@ -6576,7 +6576,7 @@ static bool si_should_optimize_less(struct ac_llvm_compiler *compiler, /* Assume a slow CPU. */ assert(!sel->screen->info.has_dedicated_vram && - sel->screen->info.chip_class <= VI); + sel->screen->info.chip_class <= GFX8); /* For a crazy dEQP test containing 2597 memory opcodes, mostly * buffer stores. */ @@ -6831,7 +6831,7 @@ int si_compile_tgsi_shader(struct si_screen *sscreen, if (sel->type == PIPE_SHADER_COMPUTE) { unsigned wave_size = 64; unsigned max_vgprs = 256; - unsigned max_sgprs = sscreen->info.chip_class >= VI ? 800 : 512; + unsigned max_sgprs = sscreen->info.chip_class >= GFX8 ? 800 : 512; unsigned max_sgprs_per_wave = 128; unsigned max_block_threads = si_get_max_workgroup_size(shader); unsigned min_waves_per_cu = DIV_ROUND_UP(max_block_threads, wave_size); @@ -7263,7 +7263,7 @@ static void si_build_tcs_epilog_function(struct si_shader_context *ctx, /* Create the function. */ si_create_function(ctx, "tcs_epilog", NULL, 0, &fninfo, - ctx->screen->info.chip_class >= CIK ? 128 : 64); + ctx->screen->info.chip_class >= GFX7 ? 128 : 64); ac_declare_lds_as_pointer(&ctx->ac); func = ctx->main_fn; diff --git a/src/gallium/drivers/radeonsi/si_shader.h b/src/gallium/drivers/radeonsi/si_shader.h index 82c521efcb7..16b78fbf43e 100644 --- a/src/gallium/drivers/radeonsi/si_shader.h +++ b/src/gallium/drivers/radeonsi/si_shader.h @@ -247,7 +247,7 @@ enum { #define S_VS_STATE_LS_OUT_VERTEX_SIZE(x) (((unsigned)(x) & 0xFF) << 24) #define C_VS_STATE_LS_OUT_VERTEX_SIZE 0x00FFFFFF -/* SI-specific system values. */ +/* Driver-specific system values. */ enum { /* Values from set_tess_state. */ TGSI_SEMANTIC_DEFAULT_TESSOUTER_SI = TGSI_SEMANTIC_COUNT, diff --git a/src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c b/src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c index 5e540fc5098..be0cb89f722 100644 --- a/src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c +++ b/src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c @@ -48,8 +48,8 @@ static LLVMValueRef get_buffer_size( LLVMBuildExtractElement(builder, descriptor, LLVMConstInt(ctx->i32, 2, 0), ""); - if (ctx->screen->info.chip_class == VI) { - /* On VI, the descriptor contains the size in bytes, + if (ctx->screen->info.chip_class == GFX8) { + /* On GFX8, the descriptor contains the size in bytes, * but TXQ must return the size in elements. * The stride is always non-zero for resources using TXQ. */ @@ -132,7 +132,7 @@ ac_image_dim_from_tgsi_target(struct si_screen *screen, enum tgsi_texture_type t /* Match the resource type set in the descriptor. */ if (dim == ac_image_cube || - (screen->info.chip_class <= VI && dim == ac_image_3d)) + (screen->info.chip_class <= GFX8 && dim == ac_image_3d)) dim = ac_image_2darray; else if (target == TGSI_TEXTURE_2D && screen->info.chip_class >= GFX9) { /* When a single layer of a 3D texture is bound, the shader @@ -161,7 +161,7 @@ ac_image_dim_from_tgsi_target(struct si_screen *screen, enum tgsi_texture_type t static LLVMValueRef force_dcc_off(struct si_shader_context *ctx, LLVMValueRef rsrc) { - if (ctx->screen->info.chip_class <= CIK) { + if (ctx->screen->info.chip_class <= GFX7) { return rsrc; } else { LLVMValueRef i32_6 = LLVMConstInt(ctx->i32, 6, 0); @@ -327,11 +327,11 @@ static unsigned get_cache_policy(struct si_shader_context *ctx, unsigned cache_policy = 0; if (!atomic && - /* SI has a TC L1 bug causing corruption of 8bit/16bit stores. + /* GFX6 has a TC L1 bug causing corruption of 8bit/16bit stores. * All store opcodes not aligned to a dword are affected. * The only way to get unaligned stores in radeonsi is through * shader images. */ - ((may_store_unaligned && ctx->screen->info.chip_class == SI) || + ((may_store_unaligned && ctx->screen->info.chip_class == GFX6) || /* If this is write-only, don't keep data in L1 to prevent * evicting L1 cache lines that may be needed by other * instructions. */ @@ -1099,13 +1099,13 @@ LLVMValueRef si_load_sampler_desc(struct si_shader_context *ctx, /* Disable anisotropic filtering if BASE_LEVEL == LAST_LEVEL. * - * SI-CI: + * GFX6-GFX7: * If BASE_LEVEL == LAST_LEVEL, the shader must disable anisotropic * filtering manually. The driver sets img7 to a mask clearing * MAX_ANISO_RATIO if BASE_LEVEL == LAST_LEVEL. The shader must do: * s_and_b32 samp0, samp0, img7 * - * VI: + * GFX8: * The ANISO_OVERRIDE sampler field enables this fix in TA. */ static LLVMValueRef sici_fix_sampler_aniso(struct si_shader_context *ctx, @@ -1113,7 +1113,7 @@ static LLVMValueRef sici_fix_sampler_aniso(struct si_shader_context *ctx, { LLVMValueRef img7, samp0; - if (ctx->screen->info.chip_class >= VI) + if (ctx->screen->info.chip_class >= GFX8) return samp; img7 = LLVMBuildExtractElement(ctx->ac.builder, res, @@ -1446,7 +1446,7 @@ static void build_tex_intrinsic(const struct lp_build_tgsi_action *action, * so the depth comparison value isn't clamped for Z16 and * Z24 anymore. Do it manually here. */ - if (ctx->screen->info.chip_class >= VI) { + if (ctx->screen->info.chip_class >= GFX8) { LLVMValueRef upgraded; LLVMValueRef clamped; upgraded = LLVMBuildExtractElement(ctx->ac.builder, args.sampler, @@ -1530,7 +1530,7 @@ static void build_tex_intrinsic(const struct lp_build_tgsi_action *action, } else if (tgsi_is_array_sampler(target) && opcode != TGSI_OPCODE_TXF && opcode != TGSI_OPCODE_TXF_LZ && - ctx->screen->info.chip_class <= VI) { + ctx->screen->info.chip_class <= GFX8) { unsigned array_coord = target == TGSI_TEXTURE_1D_ARRAY ? 1 : 2; args.coords[array_coord] = ac_build_round(&ctx->ac, args.coords[array_coord]); } @@ -1687,7 +1687,7 @@ static void build_tex_intrinsic(const struct lp_build_tgsi_action *action, /* The hardware needs special lowering for Gather4 with integer formats. */ LLVMValueRef gather4_int_result_workaround = NULL; - if (ctx->screen->info.chip_class <= VI && + if (ctx->screen->info.chip_class <= GFX8 && opcode == TGSI_OPCODE_TG4) { assert(inst->Texture.ReturnType != TGSI_RETURN_TYPE_UNKNOWN); diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c index 55965bc86a1..bc91e6f5148 100644 --- a/src/gallium/drivers/radeonsi/si_state.c +++ b/src/gallium/drivers/radeonsi/si_state.c @@ -103,12 +103,12 @@ static void si_emit_cb_render_state(struct si_context *sctx) radeon_opt_set_context_reg(sctx, R_028238_CB_TARGET_MASK, SI_TRACKED_CB_TARGET_MASK, cb_target_mask); - if (sctx->chip_class >= VI) { + if (sctx->chip_class >= GFX8) { /* DCC MSAA workaround for blending. * Alternatively, we can set CB_COLORi_DCC_CONTROL.OVERWRITE_- * COMBINER_DISABLE, but that would be more complicated. */ - bool oc_disable = (sctx->chip_class == VI || + bool oc_disable = (sctx->chip_class == GFX8 || sctx->chip_class == GFX9) && blend && blend->blend_enable_4bit & cb_target_mask && @@ -1391,7 +1391,7 @@ static void si_emit_db_render_state(struct si_context *sctx) !sctx->occlusion_queries_disabled) { bool perfect = sctx->num_perfect_occlusion_queries > 0; - if (sctx->chip_class >= CIK) { + if (sctx->chip_class >= GFX7) { unsigned log_sample_rate = sctx->framebuffer.log_samples; /* Stoney doesn't increment occlusion query counters @@ -1413,7 +1413,7 @@ static void si_emit_db_render_state(struct si_context *sctx) } } else { /* Disable occlusion queries. */ - if (sctx->chip_class >= CIK) { + if (sctx->chip_class >= GFX7) { db_count_control = 0; } else { db_count_control = S_028004_ZPASS_INCREMENT_DISABLE(1); @@ -1433,8 +1433,8 @@ static void si_emit_db_render_state(struct si_context *sctx) db_shader_control = sctx->ps_db_shader_control; - /* Bug workaround for smoothing (overrasterization) on SI. */ - if (sctx->chip_class == SI && sctx->smoothing_enabled) { + /* Bug workaround for smoothing (overrasterization) on GFX6. */ + if (sctx->chip_class == GFX6 && sctx->smoothing_enabled) { db_shader_control &= C_02880C_Z_ORDER; db_shader_control |= S_02880C_Z_ORDER(V_02880C_LATE_Z); } @@ -1589,7 +1589,7 @@ static uint32_t si_translate_dbformat(enum pipe_format format) case PIPE_FORMAT_X8Z24_UNORM: case PIPE_FORMAT_Z24X8_UNORM: case PIPE_FORMAT_Z24_UNORM_S8_UINT: - return V_028040_Z_24; /* deprecated on SI */ + return V_028040_Z_24; /* deprecated on AMD GCN */ case PIPE_FORMAT_Z32_FLOAT: case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT: return V_028040_Z_32_FLOAT; @@ -1623,9 +1623,9 @@ static uint32_t si_translate_texformat(struct pipe_screen *screen, /* * Implemented as an 8_8_8_8 data format to fix texture * gathers in stencil sampling. This affects at least - * GL45-CTS.texture_cube_map_array.sampling on VI. + * GL45-CTS.texture_cube_map_array.sampling on GFX8. */ - if (sscreen->info.chip_class <= VI) + if (sscreen->info.chip_class <= GFX8) return V_008F14_IMG_DATA_FORMAT_8_8_8_8; if (format == PIPE_FORMAT_X24S8_UINT) @@ -2461,14 +2461,14 @@ static void si_initialize_color_surface(struct si_context *sctx, color_info |= S_028C70_COMPRESSION(1); unsigned fmask_bankh = util_logbase2(tex->surface.u.legacy.fmask.bankh); - if (sctx->chip_class == SI) { - /* due to a hw bug, FMASK_BANK_HEIGHT must be set on SI too */ + if (sctx->chip_class == GFX6) { + /* due to a hw bug, FMASK_BANK_HEIGHT must be set on GFX6 too */ color_attrib |= S_028C74_FMASK_BANK_HEIGHT(fmask_bankh); } } } - if (sctx->chip_class >= VI) { + if (sctx->chip_class >= GFX8) { unsigned max_uncompressed_block_size = V_028C78_MAX_BLOCK_SIZE_256B; unsigned min_compressed_block_size = V_028C78_MIN_BLOCK_SIZE_32B; @@ -2492,7 +2492,7 @@ static void si_initialize_color_surface(struct si_context *sctx, } /* This must be set for fast clear to work without FMASK. */ - if (!tex->surface.fmask_size && sctx->chip_class == SI) { + if (!tex->surface.fmask_size && sctx->chip_class == GFX6) { unsigned bankh = util_logbase2(tex->surface.u.legacy.bankh); color_attrib |= S_028C74_FMASK_BANK_HEIGHT(bankh); } @@ -2576,7 +2576,7 @@ static void si_init_depth_surface(struct si_context *sctx, } if (tex->surface.has_stencil) { - /* Stencil buffer workaround ported from the SI-CI-VI code. + /* Stencil buffer workaround ported from the GFX6-GFX8 code. * See that for explanation. */ s_info |= S_02803C_ALLOW_EXPCLEAR(tex->buffer.b.b.nr_samples <= 1); @@ -2592,7 +2592,7 @@ static void si_init_depth_surface(struct si_context *sctx, S_028ABC_RB_ALIGNED(tex->surface.u.gfx9.htile.rb_aligned); } } else { - /* SI-CI-VI */ + /* GFX6-GFX8 */ struct legacy_surf_level *levelinfo = &tex->surface.u.legacy.level[level]; assert(levelinfo->nblk_x % 8 == 0 && levelinfo->nblk_y % 8 == 0); @@ -2607,7 +2607,7 @@ static void si_init_depth_surface(struct si_context *sctx, s_info = S_028044_FORMAT(stencil_format); surf->db_depth_info = S_02803C_ADDR5_SWIZZLE_MASK(!tex->tc_compatible_htile); - if (sctx->chip_class >= CIK) { + if (sctx->chip_class >= GFX7) { struct radeon_info *info = &sctx->screen->info; unsigned index = tex->surface.u.legacy.tiling_index[level]; unsigned stencil_index = tex->surface.u.legacy.stencil_tiling_index[level]; @@ -2746,7 +2746,7 @@ static void si_set_framebuffer_state(struct pipe_context *ctx, bool unbound = false; int i; - /* Reject zero-sized framebuffers due to a hw bug on SI that occurs + /* Reject zero-sized framebuffers due to a hw bug on GFX6 that occurs * when PA_SU_HARDWARE_SCREEN_OFFSET != 0 and any_scissor.BR_X/Y <= 0. * We could implement the full workaround here, but it's a useless case. */ @@ -2935,7 +2935,7 @@ static void si_set_framebuffer_state(struct pipe_context *ctx, } /* For optimal DCC performance. */ - if (sctx->chip_class == VI) + if (sctx->chip_class == GFX8) sctx->framebuffer.dcc_overwrite_combiner_watermark = 4; else if (num_bpp64_colorbufs >= 5) sctx->framebuffer.dcc_overwrite_combiner_watermark = 8; @@ -3139,7 +3139,7 @@ static void si_emit_framebuffer_state(struct si_context *sctx) radeon_set_context_reg(cs, R_0287A0_CB_MRT0_EPITCH + i * 4, S_0287A0_EPITCH(tex->surface.u.gfx9.surf.epitch)); } else { - /* Compute mutable surface parameters (SI-CI-VI). */ + /* Compute mutable surface parameters (GFX6-GFX8). */ const struct legacy_surf_level *level_info = &tex->surface.u.legacy.level[cb->base.u.tex.level]; unsigned pitch_tile_max, slice_tile_max, tile_mode_index; @@ -3167,20 +3167,20 @@ static void si_emit_framebuffer_state(struct si_context *sctx) cb_color_slice = S_028C68_TILE_MAX(slice_tile_max); if (tex->surface.fmask_size) { - if (sctx->chip_class >= CIK) + if (sctx->chip_class >= GFX7) cb_color_pitch |= S_028C64_FMASK_TILE_MAX(tex->surface.u.legacy.fmask.pitch_in_pixels / 8 - 1); cb_color_attrib |= S_028C74_FMASK_TILE_MODE_INDEX(tex->surface.u.legacy.fmask.tiling_index); cb_color_fmask_slice = S_028C88_TILE_MAX(tex->surface.u.legacy.fmask.slice_tile_max); } else { /* This must be set for fast clear to work without FMASK. */ - if (sctx->chip_class >= CIK) + if (sctx->chip_class >= GFX7) cb_color_pitch |= S_028C64_FMASK_TILE_MAX(pitch_tile_max); cb_color_attrib |= S_028C74_FMASK_TILE_MODE_INDEX(tile_mode_index); cb_color_fmask_slice = S_028C88_TILE_MAX(slice_tile_max); } radeon_set_context_reg_seq(cs, R_028C60_CB_COLOR0_BASE + i * 0x3C, - sctx->chip_class >= VI ? 14 : 13); + sctx->chip_class >= GFX8 ? 14 : 13); radeon_emit(cs, cb_color_base); /* CB_COLOR0_BASE */ radeon_emit(cs, cb_color_pitch); /* CB_COLOR0_PITCH */ radeon_emit(cs, cb_color_slice); /* CB_COLOR0_SLICE */ @@ -3195,7 +3195,7 @@ static void si_emit_framebuffer_state(struct si_context *sctx) radeon_emit(cs, tex->color_clear_value[0]); /* CB_COLOR0_CLEAR_WORD0 */ radeon_emit(cs, tex->color_clear_value[1]); /* CB_COLOR0_CLEAR_WORD1 */ - if (sctx->chip_class >= VI) /* R_028C94_CB_COLOR0_DCC_BASE */ + if (sctx->chip_class >= GFX8) /* R_028C94_CB_COLOR0_DCC_BASE */ radeon_emit(cs, cb_dcc_base); } } @@ -3328,7 +3328,7 @@ static void si_emit_msaa_sample_locs(struct si_context *sctx) /* The exclusion bits can be set to improve rasterization efficiency * if no sample lies on the pixel boundary (-8 sample offset). */ - bool exclusion = sctx->chip_class >= CIK && + bool exclusion = sctx->chip_class >= GFX7 && (!rs->multisample_enable || nr_samples != 16); radeon_opt_set_context_reg(sctx, R_02882C_PA_SU_PRIM_FILTER_CNTL, SI_TRACKED_PA_SU_PRIM_FILTER_CNTL, @@ -3606,11 +3606,11 @@ si_make_buffer_descriptor(struct si_screen *screen, struct si_resource *buf, /* The NUM_RECORDS field has a different meaning depending on the chip, * instruction type, STRIDE, and SWIZZLE_ENABLE. * - * SI-CIK: + * GFX6-GFX7: * - If STRIDE == 0, it's in byte units. * - If STRIDE != 0, it's in units of STRIDE, used with inst.IDXEN. * - * VI: + * GFX8: * - For SMEM and STRIDE == 0, it's in byte units. * - For SMEM and STRIDE != 0, it's in units of STRIDE. * - For VMEM and STRIDE == 0 or SWIZZLE_ENABLE == 0, it's in byte units. @@ -3633,7 +3633,7 @@ si_make_buffer_descriptor(struct si_screen *screen, struct si_resource *buf, * the first element is readable when IDXEN == 0. */ num_records = num_records ? MAX2(num_records, stride) : 0; - else if (screen->info.chip_class == VI) + else if (screen->info.chip_class == GFX8) num_records *= stride; state[4] = 0; @@ -3720,9 +3720,9 @@ si_make_texture_descriptor(struct si_screen *screen, /* * X24S8 is implemented as an 8_8_8_8 data format, to * fix texture gathers. This affects at least - * GL45-CTS.texture_cube_map_array.sampling on VI. + * GL45-CTS.texture_cube_map_array.sampling on GFX8. */ - if (screen->info.chip_class <= VI) + if (screen->info.chip_class <= GFX8) util_format_compose_swizzles(swizzle_wwww, state_swizzle, swizzle); else util_format_compose_swizzles(swizzle_yyyy, state_swizzle, swizzle); @@ -3816,7 +3816,7 @@ si_make_texture_descriptor(struct si_screen *screen, if (!sampler && (res->target == PIPE_TEXTURE_CUBE || res->target == PIPE_TEXTURE_CUBE_ARRAY || - (screen->info.chip_class <= VI && + (screen->info.chip_class <= GFX8 && res->target == PIPE_TEXTURE_3D))) { /* For the purpose of shader images, treat cube maps and 3D * textures as 2D arrays. For 3D textures, the address @@ -3887,7 +3887,7 @@ si_make_texture_descriptor(struct si_screen *screen, /* The last dword is unused by hw. The shader uses it to clear * bits in the first dword of sampler state. */ - if (screen->info.chip_class <= CIK && res->nr_samples <= 1) { + if (screen->info.chip_class <= GFX7 && res->nr_samples <= 1) { if (first_level == last_level) state[7] = C_008F30_MAX_ANISO_RATIO; else @@ -4094,7 +4094,7 @@ si_create_sampler_view_custom(struct pipe_context *ctx, height = height0; depth = texture->depth0; - if (sctx->chip_class <= VI && force_level) { + if (sctx->chip_class <= GFX8 && force_level) { assert(force_level == first_level && force_level == last_level); base_level = force_level; @@ -4331,7 +4331,7 @@ static void *si_create_sampler_state(struct pipe_context *ctx, S_008F30_ANISO_THRESHOLD(max_aniso_ratio >> 1) | S_008F30_ANISO_BIAS(max_aniso_ratio) | S_008F30_DISABLE_CUBE_WRAP(!state->seamless_cube_map) | - S_008F30_COMPAT_MODE(sctx->chip_class >= VI)); + S_008F30_COMPAT_MODE(sctx->chip_class >= GFX8)); rstate->val[1] = (S_008F34_MIN_LOD(S_FIXED(CLAMP(state->min_lod, 0, 15), 8)) | S_008F34_MAX_LOD(S_FIXED(CLAMP(state->max_lod, 0, 15), 8)) | S_008F34_PERF_MIP(max_aniso_ratio ? max_aniso_ratio + 6 : 0)); @@ -4340,9 +4340,9 @@ static void *si_create_sampler_state(struct pipe_context *ctx, S_008F38_XY_MIN_FILTER(si_tex_filter(state->min_img_filter, max_aniso)) | S_008F38_MIP_FILTER(si_tex_mipfilter(state->min_mip_filter)) | S_008F38_MIP_POINT_PRECLAMP(0) | - S_008F38_DISABLE_LSB_CEIL(sctx->chip_class <= VI) | + S_008F38_DISABLE_LSB_CEIL(sctx->chip_class <= GFX8) | S_008F38_FILTER_PREC_FIX(1) | - S_008F38_ANISO_OVERRIDE(sctx->chip_class >= VI)); + S_008F38_ANISO_OVERRIDE(sctx->chip_class >= GFX8)); rstate->val[3] = si_translate_border_color(sctx, state, &state->border_color, false); /* Create sampler resource for integer textures. */ @@ -4537,9 +4537,9 @@ static void *si_create_vertex_elements(struct pipe_context *ctx, /* The hardware always treats the 2-bit alpha channel as * unsigned, so a shader workaround is needed. The affected - * chips are VI and older except Stoney (GFX8.1). + * chips are GFX8 and older except Stoney (GFX8.1). */ - always_fix = sscreen->info.chip_class <= VI && + always_fix = sscreen->info.chip_class <= GFX8 && sscreen->info.family != CHIP_STONEY && channel->type == UTIL_FORMAT_TYPE_SIGNED; } else if (elements[i].src_format == PIPE_FORMAT_R11G11B10_FLOAT) { @@ -4585,7 +4585,7 @@ static void *si_create_vertex_elements(struct pipe_context *ctx, * into account would complicate the fast path (where everything * is nicely aligned). */ - bool check_alignment = log_hw_load_size >= 1 && sscreen->info.chip_class == SI; + bool check_alignment = log_hw_load_size >= 1 && sscreen->info.chip_class == GFX6; bool opencode = sscreen->options.vs_fetch_always_opencode; if (check_alignment && @@ -4810,10 +4810,10 @@ static void si_memory_barrier(struct pipe_context *ctx, unsigned flags) } if (flags & PIPE_BARRIER_INDEX_BUFFER) { - /* Indices are read through TC L2 since VI. + /* Indices are read through TC L2 since GFX8. * L1 isn't used. */ - if (sctx->screen->info.chip_class <= CIK) + if (sctx->screen->info.chip_class <= GFX7) sctx->flags |= SI_CONTEXT_WRITEBACK_GLOBAL_L2; } @@ -4824,12 +4824,12 @@ static void si_memory_barrier(struct pipe_context *ctx, unsigned flags) sctx->framebuffer.uncompressed_cb_mask) { sctx->flags |= SI_CONTEXT_FLUSH_AND_INV_CB; - if (sctx->chip_class <= VI) + if (sctx->chip_class <= GFX8) sctx->flags |= SI_CONTEXT_WRITEBACK_GLOBAL_L2; } /* Indirect buffers use TC L2 on GFX9, but not older hw. */ - if (sctx->screen->info.chip_class <= VI && + if (sctx->screen->info.chip_class <= GFX8 && flags & PIPE_BARRIER_INDIRECT_BUFFER) sctx->flags |= SI_CONTEXT_WRITEBACK_GLOBAL_L2; } @@ -4917,7 +4917,7 @@ void si_init_screen_state_functions(struct si_screen *sscreen) static void si_set_grbm_gfx_index(struct si_context *sctx, struct si_pm4_state *pm4, unsigned value) { - unsigned reg = sctx->chip_class >= CIK ? R_030800_GRBM_GFX_INDEX : + unsigned reg = sctx->chip_class >= GFX7 ? R_030800_GRBM_GFX_INDEX : R_00802C_GRBM_GFX_INDEX; si_pm4_set_reg(pm4, reg, value); } @@ -4954,7 +4954,7 @@ si_write_harvested_raster_configs(struct si_context *sctx, } si_set_grbm_gfx_index(sctx, pm4, ~0); - if (sctx->chip_class >= CIK) { + if (sctx->chip_class >= GFX7) { si_pm4_set_reg(pm4, R_028354_PA_SC_RASTER_CONFIG_1, raster_config_1); } } @@ -4973,7 +4973,7 @@ static void si_set_raster_config(struct si_context *sctx, struct si_pm4_state *p */ si_pm4_set_reg(pm4, R_028350_PA_SC_RASTER_CONFIG, raster_config); - if (sctx->chip_class >= CIK) + if (sctx->chip_class >= GFX7) si_pm4_set_reg(pm4, R_028354_PA_SC_RASTER_CONFIG_1, raster_config_1); } else { @@ -4988,8 +4988,8 @@ static void si_init_config(struct si_context *sctx) bool has_clear_state = sscreen->has_clear_state; struct si_pm4_state *pm4 = CALLOC_STRUCT(si_pm4_state); - /* SI, radeon kernel disabled CLEAR_STATE. */ - assert(has_clear_state || sscreen->info.chip_class == SI || + /* GFX6, radeon kernel disabled CLEAR_STATE. */ + assert(has_clear_state || sscreen->info.chip_class == GFX6 || sscreen->info.drm_major != 3); if (!pm4) @@ -5006,7 +5006,7 @@ static void si_init_config(struct si_context *sctx) si_pm4_cmd_end(pm4, false); } - if (sctx->chip_class <= VI) + if (sctx->chip_class <= GFX8) si_set_raster_config(sctx, pm4); si_pm4_set_reg(pm4, R_028A18_VGT_HOS_MAX_TESS_LEVEL, fui(64)); @@ -5014,7 +5014,7 @@ static void si_init_config(struct si_context *sctx) si_pm4_set_reg(pm4, R_028A1C_VGT_HOS_MIN_TESS_LEVEL, fui(0)); /* FIXME calculate these values somehow ??? */ - if (sctx->chip_class <= VI) { + if (sctx->chip_class <= GFX8) { si_pm4_set_reg(pm4, R_028A54_VGT_GS_PER_ES, SI_GS_PER_ES); si_pm4_set_reg(pm4, R_028A58_VGT_ES_PER_GS, 0x40); } @@ -5028,14 +5028,14 @@ static void si_init_config(struct si_context *sctx) si_pm4_set_reg(pm4, R_028AA0_VGT_INSTANCE_STEP_RATE_0, 1); if (!has_clear_state) si_pm4_set_reg(pm4, R_028AB8_VGT_VTX_CNT_EN, 0x0); - if (sctx->chip_class < CIK) + if (sctx->chip_class < GFX7) si_pm4_set_reg(pm4, R_008A14_PA_CL_ENHANCE, S_008A14_NUM_CLIP_SEQ(3) | S_008A14_CLIP_VTX_REORDER_ENA(1)); /* CLEAR_STATE doesn't clear these correctly on certain generations. * I don't know why. Deduced by trial and error. */ - if (sctx->chip_class <= CIK) { + if (sctx->chip_class <= GFX7) { si_pm4_set_reg(pm4, R_028B28_VGT_STRMOUT_DRAW_OPAQUE_OFFSET, 0); si_pm4_set_reg(pm4, R_028204_PA_SC_WINDOW_SCISSOR_TL, S_028204_WINDOW_OFFSET_DISABLE(1)); si_pm4_set_reg(pm4, R_028240_PA_SC_GENERIC_SCISSOR_TL, S_028240_WINDOW_OFFSET_DISABLE(1)); @@ -5077,7 +5077,7 @@ static void si_init_config(struct si_context *sctx) si_pm4_set_reg(pm4, R_028408_VGT_INDX_OFFSET, 0); } - if (sctx->chip_class >= CIK) { + if (sctx->chip_class >= GFX7) { if (sctx->chip_class >= GFX9) { si_pm4_set_reg(pm4, R_00B41C_SPI_SHADER_PGM_RSRC3_HS, S_00B41C_CU_EN(0xffff) | S_00B41C_WAVE_LIMIT(0x3F)); @@ -5136,7 +5136,7 @@ static void si_init_config(struct si_context *sctx) S_00B01C_CU_EN(0xffff) | S_00B01C_WAVE_LIMIT(0x3F)); } - if (sctx->chip_class >= VI) { + if (sctx->chip_class >= GFX8) { unsigned vgt_tess_distribution; vgt_tess_distribution = @@ -5159,7 +5159,7 @@ static void si_init_config(struct si_context *sctx) } si_pm4_set_reg(pm4, R_028080_TA_BC_BASE_ADDR, border_color_va >> 8); - if (sctx->chip_class >= CIK) { + if (sctx->chip_class >= GFX7) { si_pm4_set_reg(pm4, R_028084_TA_BC_BASE_ADDR_HI, S_028084_ADDRESS(border_color_va >> 40)); } diff --git a/src/gallium/drivers/radeonsi/si_state_draw.c b/src/gallium/drivers/radeonsi/si_state_draw.c index 8e01e1b35e1..7bbe66d46ae 100644 --- a/src/gallium/drivers/radeonsi/si_state_draw.c +++ b/src/gallium/drivers/radeonsi/si_state_draw.c @@ -78,7 +78,7 @@ static void si_emit_derived_tess_state(struct si_context *sctx, struct si_shader_selector *tcs = sctx->tcs_shader.cso ? sctx->tcs_shader.cso : sctx->tes_shader.cso; unsigned tess_uses_primid = sctx->ia_multi_vgt_param_key.u.tess_uses_prim_id; - bool has_primid_instancing_bug = sctx->chip_class == SI && + bool has_primid_instancing_bug = sctx->chip_class == GFX6 && sctx->screen->info.max_se == 1; unsigned tes_sh_base = sctx->shader_pointers.sh_base[PIPE_SHADER_TESS_EVAL]; unsigned num_tcs_input_cp = info->vertices_per_patch; @@ -152,7 +152,7 @@ static void si_emit_derived_tess_state(struct si_context *sctx, /* Make sure that the data fits in LDS. This assumes the shaders only * use LDS for the inputs and outputs. * - * While CIK can use 64K per threadgroup, there is a hang on Stoney + * While GFX7 can use 64K per threadgroup, there is a hang on Stoney * with 2 CUs if we use more than 32K. The closed Vulkan driver also * uses 32K at most on all GCN chips. */ @@ -185,8 +185,8 @@ static void si_emit_derived_tess_state(struct si_context *sctx, if (temp_verts_per_tg > 64 && temp_verts_per_tg % 64 < 48) *num_patches = (temp_verts_per_tg & ~63) / max_verts_per_patch; - if (sctx->chip_class == SI) { - /* SI bug workaround, related to power management. Limit LS-HS + if (sctx->chip_class == GFX6) { + /* GFX6 bug workaround, related to power management. Limit LS-HS * threadgroups to only one wave. */ unsigned one_wave = 64 / max_verts_per_patch; @@ -200,7 +200,7 @@ static void si_emit_derived_tess_state(struct si_context *sctx, * The intended solution is to restrict threadgroups to * a single instance by setting SWITCH_ON_EOI, which * should cause IA to split instances up. However, this - * doesn't work correctly on SI when there is no other + * doesn't work correctly on GFX6 when there is no other * SE to switch to. */ if (has_primid_instancing_bug && tess_uses_primid) @@ -238,7 +238,7 @@ static void si_emit_derived_tess_state(struct si_context *sctx, /* Compute the LDS size. */ lds_size = output_patch0_offset + output_patch_size * *num_patches; - if (sctx->chip_class >= CIK) { + if (sctx->chip_class >= GFX7) { assert(lds_size <= 65536); lds_size = align(lds_size, 512) / 512; } else { @@ -272,7 +272,7 @@ static void si_emit_derived_tess_state(struct si_context *sctx, /* Due to a hw bug, RSRC2_LS must be written twice with another * LS register written in between. */ - if (sctx->chip_class == CIK && sctx->family != CHIP_HAWAII) + if (sctx->chip_class == GFX7 && sctx->family != CHIP_HAWAII) radeon_set_sh_reg(cs, R_00B52C_SPI_SHADER_PGM_RSRC2_LS, ls_rsrc2); radeon_set_sh_reg_seq(cs, R_00B528_SPI_SHADER_PGM_RSRC1_LS, 2); radeon_emit(cs, ls_current->config.rsrc1); @@ -297,7 +297,7 @@ static void si_emit_derived_tess_state(struct si_context *sctx, S_028B58_HS_NUM_OUTPUT_CP(num_tcs_output_cp); if (sctx->last_ls_hs_config != ls_hs_config) { - if (sctx->chip_class >= CIK) { + if (sctx->chip_class >= GFX7) { radeon_set_context_reg_idx(cs, R_028B58_VGT_LS_HS_CONFIG, 2, ls_hs_config); } else { @@ -349,10 +349,10 @@ si_get_init_multi_vgt_param(struct si_screen *sscreen, key->u.uses_gs) partial_vs_wave = true; - /* Needed for 028B6C_DISTRIBUTION_MODE != 0. (implies >= VI) */ + /* Needed for 028B6C_DISTRIBUTION_MODE != 0. (implies >= GFX8) */ if (sscreen->has_distributed_tess) { if (key->u.uses_gs) { - if (sscreen->info.chip_class == VI) + if (sscreen->info.chip_class == GFX8) partial_es_wave = true; } else { partial_vs_wave = true; @@ -367,7 +367,7 @@ si_get_init_multi_vgt_param(struct si_screen *sscreen, wd_switch_on_eop = true; } - if (sscreen->info.chip_class >= CIK) { + if (sscreen->info.chip_class >= GFX7) { /* WD_SWITCH_ON_EOP has no effect on GPUs with less than * 4 shader engines. Set 1 to pass the assertion below. * The other cases are hardware requirements. @@ -400,12 +400,12 @@ si_get_init_multi_vgt_param(struct si_screen *sscreen, * Assume indirect draws always use small instances. * This is needed for good VS wave utilization. */ - if (sscreen->info.chip_class <= VI && + if (sscreen->info.chip_class <= GFX8 && sscreen->info.max_se == 4 && key->u.multi_instances_smaller_than_primgroup) wd_switch_on_eop = true; - /* Required on CIK and later. */ + /* Required on GFX7 and later. */ if (sscreen->info.max_se == 4 && !wd_switch_on_eop) ia_switch_on_eoi = true; @@ -421,10 +421,10 @@ si_get_init_multi_vgt_param(struct si_screen *sscreen, sscreen->info.family == CHIP_VEGAM)) partial_vs_wave = true; - /* Required by Hawaii and, for some special cases, by VI. */ + /* Required by Hawaii and, for some special cases, by GFX8. */ if (ia_switch_on_eoi && (sscreen->info.family == CHIP_HAWAII || - (sscreen->info.chip_class == VI && + (sscreen->info.chip_class == GFX8 && (key->u.uses_gs || max_primgroup_in_wave != 2)))) partial_vs_wave = true; @@ -444,16 +444,16 @@ si_get_init_multi_vgt_param(struct si_screen *sscreen, } /* If SWITCH_ON_EOI is set, PARTIAL_ES_WAVE must be set too. */ - if (sscreen->info.chip_class <= VI && ia_switch_on_eoi) + if (sscreen->info.chip_class <= GFX8 && ia_switch_on_eoi) partial_es_wave = true; return S_028AA8_SWITCH_ON_EOP(ia_switch_on_eop) | S_028AA8_SWITCH_ON_EOI(ia_switch_on_eoi) | S_028AA8_PARTIAL_VS_WAVE_ON(partial_vs_wave) | S_028AA8_PARTIAL_ES_WAVE_ON(partial_es_wave) | - S_028AA8_WD_SWITCH_ON_EOP(sscreen->info.chip_class >= CIK ? wd_switch_on_eop : 0) | + S_028AA8_WD_SWITCH_ON_EOP(sscreen->info.chip_class >= GFX7 ? wd_switch_on_eop : 0) | /* The following field was moved to VGT_SHADER_STAGES_EN in GFX9. */ - S_028AA8_MAX_PRIMGRP_IN_WAVE(sscreen->info.chip_class == VI ? + S_028AA8_MAX_PRIMGRP_IN_WAVE(sscreen->info.chip_class == GFX8 ? max_primgroup_in_wave : 0) | S_030960_EN_INST_OPT_BASIC(sscreen->info.chip_class >= GFX9) | S_030960_EN_INST_OPT_ADV(sscreen->info.chip_class >= GFX9); @@ -519,7 +519,7 @@ static unsigned si_get_ia_multi_vgt_param(struct si_context *sctx, if (sctx->gs_shader.cso) { /* GS requirement. */ - if (sctx->chip_class <= VI && + if (sctx->chip_class <= GFX8 && SI_GS_PER_ES / primgroup_size >= sctx->screen->gs_table_depth - 3) ia_multi_vgt_param |= S_028AA8_PARTIAL_ES_WAVE_ON(1); @@ -625,7 +625,7 @@ static void si_emit_draw_registers(struct si_context *sctx, radeon_set_uconfig_reg_idx(cs, sctx->screen, R_030960_IA_MULTI_VGT_PARAM, 4, ia_multi_vgt_param); - else if (sctx->chip_class >= CIK) + else if (sctx->chip_class >= GFX7) radeon_set_context_reg_idx(cs, R_028AA8_IA_MULTI_VGT_PARAM, 1, ia_multi_vgt_param); else radeon_set_context_reg(cs, R_028AA8_IA_MULTI_VGT_PARAM, ia_multi_vgt_param); @@ -633,7 +633,7 @@ static void si_emit_draw_registers(struct si_context *sctx, sctx->last_multi_vgt_param = ia_multi_vgt_param; } if (prim != sctx->last_prim) { - if (sctx->chip_class >= CIK) + if (sctx->chip_class >= GFX7) radeon_set_uconfig_reg_idx(cs, sctx->screen, R_030908_VGT_PRIMITIVE_TYPE, 1, prim); else @@ -700,12 +700,12 @@ static void si_emit_draw_packets(struct si_context *sctx, break; case 2: index_type = V_028A7C_VGT_INDEX_16 | - (SI_BIG_ENDIAN && sctx->chip_class <= CIK ? + (SI_BIG_ENDIAN && sctx->chip_class <= GFX7 ? V_028A7C_VGT_DMA_SWAP_16_BIT : 0); break; case 4: index_type = V_028A7C_VGT_INDEX_32 | - (SI_BIG_ENDIAN && sctx->chip_class <= CIK ? + (SI_BIG_ENDIAN && sctx->chip_class <= GFX7 ? V_028A7C_VGT_DMA_SWAP_32_BIT : 0); break; default: @@ -733,10 +733,10 @@ static void si_emit_draw_packets(struct si_context *sctx, si_resource(indexbuf), RADEON_USAGE_READ, RADEON_PRIO_INDEX_BUFFER); } else { - /* On CI and later, non-indexed draws overwrite VGT_INDEX_TYPE, + /* On GFX7 and later, non-indexed draws overwrite VGT_INDEX_TYPE, * so the state must be re-emitted before the next indexed draw. */ - if (sctx->chip_class >= CIK) + if (sctx->chip_class >= GFX7) sctx->last_index_size = -1; } @@ -918,7 +918,7 @@ void si_emit_cache_flush(struct si_context *sctx) if (flags & SI_CONTEXT_FLUSH_AND_INV_DB) sctx->num_db_cache_flushes++; - /* SI has a bug that it always flushes ICACHE and KCACHE if either + /* GFX6 has a bug that it always flushes ICACHE and KCACHE if either * bit is set. An alternative way is to write SQC_CACHES, but that * doesn't seem to work reliably. Since the bug doesn't affect * correctness (it only does more work than necessary) and @@ -931,7 +931,7 @@ void si_emit_cache_flush(struct si_context *sctx) if (flags & SI_CONTEXT_INV_SMEM_L1) cp_coher_cntl |= S_0085F0_SH_KCACHE_ACTION_ENA(1); - if (sctx->chip_class <= VI) { + if (sctx->chip_class <= GFX8) { if (flags & SI_CONTEXT_FLUSH_AND_INV_CB) { cp_coher_cntl |= S_0085F0_CB_ACTION_ENA(1) | S_0085F0_CB0_DEST_BASE_ENA(1) | @@ -944,7 +944,7 @@ void si_emit_cache_flush(struct si_context *sctx) S_0085F0_CB7_DEST_BASE_ENA(1); /* Necessary for DCC */ - if (sctx->chip_class == VI) + if (sctx->chip_class == GFX8) si_cp_release_mem(sctx, V_028A90_FLUSH_AND_INV_CB_DATA_TS, 0, EOP_DST_SEL_MEM, EOP_INT_SEL_NONE, @@ -1085,25 +1085,25 @@ void si_emit_cache_flush(struct si_context *sctx) radeon_emit(cs, 0); } - /* SI-CI-VI only: + /* GFX6-GFX8 only: * When one of the CP_COHER_CNTL.DEST_BASE flags is set, SURFACE_SYNC * waits for idle, so it should be last. SURFACE_SYNC is done in PFP. * * cp_coher_cntl should contain all necessary flags except TC flags * at this point. * - * SI-CIK don't support L2 write-back. + * GFX6-GFX7 don't support L2 write-back. */ if (flags & SI_CONTEXT_INV_GLOBAL_L2 || - (sctx->chip_class <= CIK && + (sctx->chip_class <= GFX7 && (flags & SI_CONTEXT_WRITEBACK_GLOBAL_L2))) { - /* Invalidate L1 & L2. (L1 is always invalidated on SI) - * WB must be set on VI+ when TC_ACTION is set. + /* Invalidate L1 & L2. (L1 is always invalidated on GFX6) + * WB must be set on GFX8+ when TC_ACTION is set. */ si_emit_surface_sync(sctx, cp_coher_cntl | S_0085F0_TC_ACTION_ENA(1) | S_0085F0_TCL1_ACTION_ENA(1) | - S_0301F0_TC_WB_ACTION_ENA(sctx->chip_class >= VI)); + S_0301F0_TC_WB_ACTION_ENA(sctx->chip_class >= GFX8)); cp_coher_cntl = 0; sctx->num_L2_invalidates++; } else { @@ -1260,7 +1260,7 @@ static void si_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *i unsigned index_offset = info->indirect ? info->start * index_size : 0; if (likely(!info->indirect)) { - /* SI-CI treat instance_count==0 as instance_count==1. There is + /* GFX6-GFX7 treat instance_count==0 as instance_count==1. There is * no workaround for indirect draws, but we can at least skip * direct draws. */ @@ -1362,8 +1362,8 @@ static void si_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *i if (index_size) { /* Translate or upload, if needed. */ - /* 8-bit indices are supported on VI. */ - if (sctx->chip_class <= CIK && index_size == 1) { + /* 8-bit indices are supported on GFX8. */ + if (sctx->chip_class <= GFX7 && index_size == 1) { unsigned start, count, start_offset, size, offset; void *ptr; @@ -1403,9 +1403,9 @@ static void si_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *i /* info->start will be added by the drawing code */ index_offset -= start_offset; - } else if (sctx->chip_class <= CIK && + } else if (sctx->chip_class <= GFX7 && si_resource(indexbuf)->TC_L2_dirty) { - /* VI reads index buffers through TC L2, so it doesn't + /* GFX8 reads index buffers through TC L2, so it doesn't * need this. */ sctx->flags |= SI_CONTEXT_WRITEBACK_GLOBAL_L2; si_resource(indexbuf)->TC_L2_dirty = false; @@ -1419,7 +1419,7 @@ static void si_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *i si_context_add_resource_size(sctx, indirect->buffer); /* Indirect buffers use TC L2 on GFX9, but not older hw. */ - if (sctx->chip_class <= VI) { + if (sctx->chip_class <= GFX8) { if (si_resource(indirect->buffer)->TC_L2_dirty) { sctx->flags |= SI_CONTEXT_WRITEBACK_GLOBAL_L2; si_resource(indirect->buffer)->TC_L2_dirty = false; @@ -1498,7 +1498,7 @@ static void si_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *i /* Start prefetches after the draw has been started. Both will run * in parallel, but starting the draw first is more important. */ - if (sctx->chip_class >= CIK && sctx->prefetch_L2_mask) + if (sctx->chip_class >= GFX7 && sctx->prefetch_L2_mask) cik_emit_prefetch_L2(sctx, false); } else { /* If we don't wait for idle, start prefetches first, then set @@ -1508,7 +1508,7 @@ static void si_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *i si_emit_cache_flush(sctx); /* Only prefetch the API VS and VBO descriptors. */ - if (sctx->chip_class >= CIK && sctx->prefetch_L2_mask) + if (sctx->chip_class >= GFX7 && sctx->prefetch_L2_mask) cik_emit_prefetch_L2(sctx, true); if (!si_upload_graphics_shader_descriptors(sctx)) @@ -1527,7 +1527,7 @@ static void si_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *i /* Prefetch the remaining shaders after the draw has been * started. */ - if (sctx->chip_class >= CIK && sctx->prefetch_L2_mask) + if (sctx->chip_class >= GFX7 && sctx->prefetch_L2_mask) cik_emit_prefetch_L2(sctx, false); } diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c b/src/gallium/drivers/radeonsi/si_state_shaders.c index 51a3af92d0c..10677f175de 100644 --- a/src/gallium/drivers/radeonsi/si_state_shaders.c +++ b/src/gallium/drivers/radeonsi/si_state_shaders.c @@ -473,7 +473,7 @@ static void si_shader_ls(struct si_screen *sscreen, struct si_shader *shader) unsigned vgpr_comp_cnt; uint64_t va; - assert(sscreen->info.chip_class <= VI); + assert(sscreen->info.chip_class <= GFX8); pm4 = si_get_shader_pm4_state(shader); if (!pm4) @@ -547,7 +547,7 @@ static void si_shader_hs(struct si_screen *sscreen, struct si_shader *shader) S_00B428_FLOAT_MODE(shader->config.float_mode) | S_00B428_LS_VGPR_COMP_CNT(ls_vgpr_comp_cnt)); - if (sscreen->info.chip_class <= VI) { + if (sscreen->info.chip_class <= GFX8) { si_pm4_set_reg(pm4, R_00B42C_SPI_SHADER_PGM_RSRC2_HS, shader->config.rsrc2); } @@ -587,7 +587,7 @@ static void si_shader_es(struct si_screen *sscreen, struct si_shader *shader) uint64_t va; unsigned oc_lds_en; - assert(sscreen->info.chip_class <= VI); + assert(sscreen->info.chip_class <= GFX8); pm4 = si_get_shader_pm4_state(shader); if (!pm4) @@ -973,7 +973,7 @@ static void si_emit_shader_vs(struct si_context *sctx) SI_TRACKED_VGT_PRIMITIVEID_EN, shader->ctx_reg.vs.vgt_primitiveid_en); - if (sctx->chip_class <= VI) { + if (sctx->chip_class <= GFX8) { radeon_opt_set_context_reg(sctx, R_028AB4_VGT_REUSE_OFF, SI_TRACKED_VGT_REUSE_OFF, shader->ctx_reg.vs.vgt_reuse_off); @@ -1052,7 +1052,7 @@ static void si_shader_vs(struct si_screen *sscreen, struct si_shader *shader, shader->ctx_reg.vs.vgt_primitiveid_en = 0; } - if (sscreen->info.chip_class <= VI) { + if (sscreen->info.chip_class <= GFX8) { /* Reuse needs to be set off if we write oViewport. */ shader->ctx_reg.vs.vgt_reuse_off = S_028AB4_REUSE_OFF(info->writes_viewport_index); @@ -1602,11 +1602,11 @@ static inline void si_shader_selector_key(struct pipe_context *ctx, blend && blend->alpha_to_coverage) key->part.ps.epilog.spi_shader_col_format |= V_028710_SPI_SHADER_32_AR; - /* On SI and CIK except Hawaii, the CB doesn't clamp outputs + /* On GFX6 and GFX7 except Hawaii, the CB doesn't clamp outputs * to the range supported by the type if a channel has less * than 16 bits and the export format is 16_ABGR. */ - if (sctx->chip_class <= CIK && sctx->family != CHIP_HAWAII) { + if (sctx->chip_class <= GFX7 && sctx->family != CHIP_HAWAII) { key->part.ps.epilog.color_is_int8 = sctx->framebuffer.color_is_int8; key->part.ps.epilog.color_is_int10 = sctx->framebuffer.color_is_int10; } @@ -2706,10 +2706,10 @@ static void si_delete_shader(struct si_context *sctx, struct si_shader *shader) switch (shader->selector->type) { case PIPE_SHADER_VERTEX: if (shader->key.as_ls) { - assert(sctx->chip_class <= VI); + assert(sctx->chip_class <= GFX8); si_pm4_delete_state(sctx, ls, shader->pm4); } else if (shader->key.as_es) { - assert(sctx->chip_class <= VI); + assert(sctx->chip_class <= GFX8); si_pm4_delete_state(sctx, es, shader->pm4); } else { si_pm4_delete_state(sctx, vs, shader->pm4); @@ -2720,7 +2720,7 @@ static void si_delete_shader(struct si_context *sctx, struct si_shader *shader) break; case PIPE_SHADER_TESS_EVAL: if (shader->key.as_es) { - assert(sctx->chip_class <= VI); + assert(sctx->chip_class <= GFX8); si_pm4_delete_state(sctx, es, shader->pm4); } else { si_pm4_delete_state(sctx, vs, shader->pm4); @@ -2937,10 +2937,10 @@ static bool si_update_gs_ring_buffers(struct si_context *sctx) unsigned num_se = sctx->screen->info.max_se; unsigned wave_size = 64; unsigned max_gs_waves = 32 * num_se; /* max 32 per SE on GCN */ - /* On SI-CI, the value comes from VGT_GS_VERTEX_REUSE = 16. - * On VI+, the value comes from VGT_VERTEX_REUSE_BLOCK_CNTL = 30 (+2). + /* On GFX6-GFX7, the value comes from VGT_GS_VERTEX_REUSE = 16. + * On GFX8+, the value comes from VGT_VERTEX_REUSE_BLOCK_CNTL = 30 (+2). */ - unsigned gs_vertex_reuse = (sctx->chip_class >= VI ? 32 : 16) * num_se; + unsigned gs_vertex_reuse = (sctx->chip_class >= GFX8 ? 32 : 16) * num_se; unsigned alignment = 256 * num_se; /* The maximum size is 63.999 MB per SE. */ unsigned max_size = ((unsigned)(63.999 * 1024 * 1024) & ~255) * num_se; @@ -2967,7 +2967,7 @@ static bool si_update_gs_ring_buffers(struct si_context *sctx) * * GFX9 doesn't have the ESGS ring. */ - bool update_esgs = sctx->chip_class <= VI && + bool update_esgs = sctx->chip_class <= GFX8 && esgs_ring_size && (!sctx->esgs_ring || sctx->esgs_ring->width0 < esgs_ring_size); @@ -3005,9 +3005,9 @@ static bool si_update_gs_ring_buffers(struct si_context *sctx) if (!pm4) return false; - if (sctx->chip_class >= CIK) { + if (sctx->chip_class >= GFX7) { if (sctx->esgs_ring) { - assert(sctx->chip_class <= VI); + assert(sctx->chip_class <= GFX8); si_pm4_set_reg(pm4, R_030900_VGT_ESGS_RING_SIZE, sctx->esgs_ring->width0 / 256); } @@ -3039,7 +3039,7 @@ static bool si_update_gs_ring_buffers(struct si_context *sctx) /* Set ring bindings. */ if (sctx->esgs_ring) { - assert(sctx->chip_class <= VI); + assert(sctx->chip_class <= GFX8); si_set_ring_buffer(sctx, SI_ES_RING_ESGS, sctx->esgs_ring, 0, sctx->esgs_ring->width0, true, true, 4, 64, 0); @@ -3288,7 +3288,7 @@ static void si_init_tess_factor_ring(struct si_context *sctx) sctx->screen->tess_offchip_ring_size; /* Append these registers to the init config state. */ - if (sctx->chip_class >= CIK) { + if (sctx->chip_class >= GFX7) { si_pm4_set_reg(sctx->init_config, R_030938_VGT_TF_RING_SIZE, S_030938_SIZE(sctx->screen->tess_factor_ring_size / 4)); si_pm4_set_reg(sctx->init_config, R_030940_VGT_TF_MEMORY_BASE, @@ -3376,7 +3376,7 @@ bool si_update_shaders(struct si_context *sctx) } /* VS as LS */ - if (sctx->chip_class <= VI) { + if (sctx->chip_class <= GFX8) { r = si_shader_select(ctx, &sctx->vs_shader, &compiler_state); if (r) @@ -3408,7 +3408,7 @@ bool si_update_shaders(struct si_context *sctx) if (sctx->gs_shader.cso) { /* TES as ES */ - if (sctx->chip_class <= VI) { + if (sctx->chip_class <= GFX8) { r = si_shader_select(ctx, &sctx->tes_shader, &compiler_state); if (r) @@ -3424,7 +3424,7 @@ bool si_update_shaders(struct si_context *sctx) si_pm4_bind_state(sctx, vs, sctx->tes_shader.current->pm4); } } else if (sctx->gs_shader.cso) { - if (sctx->chip_class <= VI) { + if (sctx->chip_class <= GFX8) { /* VS as ES */ r = si_shader_select(ctx, &sctx->vs_shader, &compiler_state); @@ -3457,7 +3457,7 @@ bool si_update_shaders(struct si_context *sctx) return false; } else { si_pm4_bind_state(sctx, gs, NULL); - if (sctx->chip_class <= VI) + if (sctx->chip_class <= GFX8) si_pm4_bind_state(sctx, es, NULL); } @@ -3504,7 +3504,7 @@ bool si_update_shaders(struct si_context *sctx) sctx->smoothing_enabled = sctx->ps_shader.current->key.part.ps.epilog.poly_line_smoothing; si_mark_atom_dirty(sctx, &sctx->atoms.s.msaa_config); - if (sctx->chip_class == SI) + if (sctx->chip_class == GFX6) si_mark_atom_dirty(sctx, &sctx->atoms.s.db_render_state); if (sctx->framebuffer.nr_samples <= 1) @@ -3522,7 +3522,7 @@ bool si_update_shaders(struct si_context *sctx) return false; } - if (sctx->chip_class >= CIK) { + if (sctx->chip_class >= GFX7) { if (si_pm4_state_enabled_and_changed(sctx, ls)) sctx->prefetch_L2_mask |= SI_PREFETCH_LS; else if (!sctx->queued.named.ls) diff --git a/src/gallium/drivers/radeonsi/si_state_streamout.c b/src/gallium/drivers/radeonsi/si_state_streamout.c index 2a0a4bef9a2..e7058f19a8a 100644 --- a/src/gallium/drivers/radeonsi/si_state_streamout.c +++ b/src/gallium/drivers/radeonsi/si_state_streamout.c @@ -103,7 +103,7 @@ static void si_set_streamout_targets(struct pipe_context *ctx, * to flush it. * * The only cases which requires flushing it is VGT DMA index - * fetching (on <= CIK) and indirect draw data, which are rare + * fetching (on <= GFX7) and indirect draw data, which are rare * cases. Thus, flag the TC L2 dirtiness in the resource and * handle it at draw call time. */ @@ -195,7 +195,7 @@ static void si_flush_vgt_streamout(struct si_context *sctx) unsigned reg_strmout_cntl; /* The register is at different places on different ASICs. */ - if (sctx->chip_class >= CIK) { + if (sctx->chip_class >= GFX7) { reg_strmout_cntl = R_0300FC_CP_STRMOUT_CNTL; radeon_set_uconfig_reg(cs, reg_strmout_cntl, 0); } else { @@ -230,7 +230,7 @@ static void si_emit_streamout_begin(struct si_context *sctx) t[i]->stride_in_dw = stride_in_dw[i]; - /* SI binds streamout buffers as shader resources. + /* AMD GCN binds streamout buffers as shader resources. * VGT only counts primitives and tells the shader * through SGPRs what to do. */ radeon_set_context_reg_seq(cs, R_028AD0_VGT_STRMOUT_BUFFER_SIZE_0 + 16*i, 2); diff --git a/src/gallium/drivers/radeonsi/si_state_viewport.c b/src/gallium/drivers/radeonsi/si_state_viewport.c index 792d1c4efd1..a144d7b661c 100644 --- a/src/gallium/drivers/radeonsi/si_state_viewport.c +++ b/src/gallium/drivers/radeonsi/si_state_viewport.c @@ -126,10 +126,10 @@ static void si_emit_one_scissor(struct si_context *ctx, if (scissor) si_clip_scissor(&final, scissor); - /* Workaround for a hw bug on SI that occurs when PA_SU_HARDWARE_- + /* Workaround for a hw bug on GFX6 that occurs when PA_SU_HARDWARE_- * SCREEN_OFFSET != 0 and any_scissor.BR_X/Y <= 0. */ - if (ctx->chip_class == SI && (final.maxx == 0 || final.maxy == 0)) { + if (ctx->chip_class == GFX6 && (final.maxx == 0 || final.maxy == 0)) { radeon_emit(cs, S_028250_TL_X(1) | S_028250_TL_Y(1) | S_028250_WINDOW_OFFSET_DISABLE(1)); @@ -180,9 +180,9 @@ static void si_emit_guardband(struct si_context *ctx) int hw_screen_offset_x = (vp_as_scissor.maxx + vp_as_scissor.minx) / 2; int hw_screen_offset_y = (vp_as_scissor.maxy + vp_as_scissor.miny) / 2; - /* SI-CI need to align the offset to an ubertile consisting of all SEs. */ + /* GFX6-GFX7 need to align the offset to an ubertile consisting of all SEs. */ const unsigned hw_screen_offset_alignment = - ctx->chip_class >= VI ? 16 : MAX2(ctx->screen->se_tile_repeat, 16); + ctx->chip_class >= GFX8 ? 16 : MAX2(ctx->screen->se_tile_repeat, 16); /* Indexed by quantization modes */ static int max_viewport_size[] = {65535, 16383, 4095}; diff --git a/src/gallium/drivers/radeonsi/si_test_dma_perf.c b/src/gallium/drivers/radeonsi/si_test_dma_perf.c index 124f5bb5c12..263187d683f 100644 --- a/src/gallium/drivers/radeonsi/si_test_dma_perf.c +++ b/src/gallium/drivers/radeonsi/si_test_dma_perf.c @@ -112,11 +112,11 @@ void si_test_dma_perf(struct si_screen *sscreen) unsigned cs_dwords_per_thread = test_cs ? cs_dwords_per_thread_list[cs_method % NUM_SHADERS] : 0; - if (sctx->chip_class == SI) { - /* SI doesn't support CP DMA operations through L2. */ + if (sctx->chip_class == GFX6) { + /* GFX6 doesn't support CP DMA operations through L2. */ if (test_cp && cache_policy != L2_BYPASS) continue; - /* WAVES_PER_SH is in multiples of 16 on SI. */ + /* WAVES_PER_SH is in multiples of 16 on GFX6. */ if (test_cs && cs_waves_per_sh % 16 != 0) continue; } @@ -151,7 +151,7 @@ void si_test_dma_perf(struct si_screen *sscreen) unsigned query_type = PIPE_QUERY_TIME_ELAPSED; if (test_sdma) { - if (sctx->chip_class == SI) + if (sctx->chip_class == GFX6) query_type = SI_QUERY_TIME_ELAPSED_SDMA_SI; else query_type = SI_QUERY_TIME_ELAPSED_SDMA; @@ -346,10 +346,10 @@ void si_test_dma_perf(struct si_screen *sscreen) if (!r->is_valid) continue; - /* Ban CP DMA clears via MC on <= VI. They are super slow + /* Ban CP DMA clears via MC on <= GFX8. They are super slow * on GTT, which we can get due to BO evictions. */ - if (sctx->chip_class <= VI && placement == 1 && + if (sctx->chip_class <= GFX8 && placement == 1 && r->is_cp && r->cache_policy == L2_BYPASS) continue; diff --git a/src/gallium/drivers/radeonsi/si_texture.c b/src/gallium/drivers/radeonsi/si_texture.c index 59d50376438..74c9cf9d7bf 100644 --- a/src/gallium/drivers/radeonsi/si_texture.c +++ b/src/gallium/drivers/radeonsi/si_texture.c @@ -254,10 +254,10 @@ static int si_init_surface(struct si_screen *sscreen, array_mode == RADEON_SURF_MODE_2D)) { /* TC-compatible HTILE only supports Z32_FLOAT. * GFX9 also supports Z16_UNORM. - * On VI, promote Z16 to Z32. DB->CB copies will convert + * On GFX8, promote Z16 to Z32. DB->CB copies will convert * the format for transfers. */ - if (sscreen->info.chip_class == VI) + if (sscreen->info.chip_class == GFX8) bpe = 4; flags |= RADEON_SURF_TC_COMPATIBLE_HTILE; @@ -267,7 +267,7 @@ static int si_init_surface(struct si_screen *sscreen, flags |= RADEON_SURF_SBUFFER; } - if (sscreen->info.chip_class >= VI && + if (sscreen->info.chip_class >= GFX8 && (ptex->flags & SI_RESOURCE_FLAG_DISABLE_DCC || ptex->format == PIPE_FORMAT_R9G9B9E5_FLOAT || (ptex->nr_samples >= 2 && !sscreen->dcc_msaa_allowed))) @@ -278,8 +278,8 @@ static int si_init_surface(struct si_screen *sscreen, bpe == 16 && ptex->nr_samples >= 2) flags |= RADEON_SURF_DISABLE_DCC; - /* VI: DCC clear for 4x and 8x MSAA array textures unimplemented. */ - if (sscreen->info.chip_class == VI && + /* GFX8: DCC clear for 4x and 8x MSAA array textures unimplemented. */ + if (sscreen->info.chip_class == GFX8 && ptex->nr_storage_samples >= 4 && ptex->array_size > 1) flags |= RADEON_SURF_DISABLE_DCC; @@ -700,7 +700,7 @@ static void si_set_tex_bo_metadata(struct si_screen *sscreen, md.size_metadata = 10 * 4; /* Dwords [10:..] contain the mipmap level offsets. */ - if (sscreen->info.chip_class <= VI) { + if (sscreen->info.chip_class <= GFX8) { for (unsigned i = 0; i <= res->last_level; i++) md.metadata[10+i] = tex->surface.u.legacy.level[i].offset >> 8; @@ -716,7 +716,7 @@ static void si_get_opaque_metadata(struct si_screen *sscreen, { uint32_t *desc = &md->metadata[2]; - if (sscreen->info.chip_class < VI) + if (sscreen->info.chip_class < GFX8) return; /* Return if DCC is enabled. The texture should be set up with it @@ -757,7 +757,7 @@ static bool si_has_displayable_dcc(struct si_texture *tex) { struct si_screen *sscreen = (struct si_screen*)tex->buffer.b.b.screen; - if (sscreen->info.chip_class <= VI) + if (sscreen->info.chip_class <= GFX8) return false; /* This needs a cache flush before scanout. @@ -849,7 +849,7 @@ static boolean si_texture_get_handle(struct pipe_screen* screen, assert(tex->surface.tile_swizzle == 0); } - /* Since shader image stores don't support DCC on VI, + /* Since shader image stores don't support DCC on GFX8, * disable it for external clients that want write * access. */ @@ -974,7 +974,7 @@ static void si_texture_get_htile_size(struct si_screen *sscreen, unsigned slice_elements, slice_bytes, pipe_interleave_bytes, base_align; unsigned num_pipes = sscreen->info.num_tile_pipes; - assert(sscreen->info.chip_class <= VI); + assert(sscreen->info.chip_class <= GFX8); tex->surface.htile_size = 0; @@ -989,7 +989,7 @@ static void si_texture_get_htile_size(struct si_screen *sscreen, * are always reproducible. I think I have seen the test hang * on Carrizo too, though it was very rare there. */ - if (sscreen->info.chip_class >= CIK && num_pipes < 4) + if (sscreen->info.chip_class >= GFX7 && num_pipes < 4) num_pipes = 4; switch (num_pipes) { @@ -1036,7 +1036,7 @@ static void si_texture_get_htile_size(struct si_screen *sscreen, static void si_texture_allocate_htile(struct si_screen *sscreen, struct si_texture *tex) { - if (sscreen->info.chip_class <= VI && !tex->tc_compatible_htile) + if (sscreen->info.chip_class <= GFX8 && !tex->tc_compatible_htile) si_texture_get_htile_size(sscreen, tex); if (!tex->surface.htile_size) @@ -1229,7 +1229,7 @@ si_texture_create_object(struct pipe_screen *screen, RADEON_SURF_TC_COMPATIBLE_HTILE); /* TC-compatible HTILE: - * - VI only supports Z32_FLOAT. + * - GFX8 only supports Z32_FLOAT. * - GFX9 only supports Z32_FLOAT and Z16_UNORM. */ if (tex->tc_compatible_htile) { if (sscreen->info.chip_class >= GFX9 && @@ -1506,10 +1506,10 @@ si_choose_tiling(struct si_screen *sscreen, if (templ->flags & SI_RESOURCE_FLAG_TRANSFER) return RADEON_SURF_MODE_LINEAR_ALIGNED; - /* Avoid Z/S decompress blits by forcing TC-compatible HTILE on VI, + /* Avoid Z/S decompress blits by forcing TC-compatible HTILE on GFX8, * which requires 2D tiling. */ - if (sscreen->info.chip_class == VI && tc_compatible_htile) + if (sscreen->info.chip_class == GFX8 && tc_compatible_htile) return RADEON_SURF_MODE_2D; /* Handle common candidates for the linear mode. @@ -1525,7 +1525,7 @@ si_choose_tiling(struct si_screen *sscreen, if (desc->layout == UTIL_FORMAT_LAYOUT_SUBSAMPLED) return RADEON_SURF_MODE_LINEAR_ALIGNED; - /* Cursors are linear on SI. + /* Cursors are linear on AMD GCN. * (XXX double-check, maybe also use RADEON_SURF_SCANOUT) */ if (templ->bind & PIPE_BIND_CURSOR) return RADEON_SURF_MODE_LINEAR_ALIGNED; @@ -1582,7 +1582,7 @@ struct pipe_resource *si_texture_create(struct pipe_screen *screen, struct radeon_surf surface = {0}; bool is_flushed_depth = templ->flags & SI_RESOURCE_FLAG_FLUSHED_DEPTH; bool tc_compatible_htile = - sscreen->info.chip_class >= VI && + sscreen->info.chip_class >= GFX8 && /* There are issues with TC-compatible HTILE on Tonga (and * Iceland is the same design), and documented bug workarounds * don't help. For example, this fails: @@ -2450,7 +2450,7 @@ void vi_separate_dcc_try_enable(struct si_context *sctx, sctx->screen->debug_flags & DBG(NO_DCC_FB)) return; - assert(sctx->chip_class >= VI); + assert(sctx->chip_class >= GFX8); if (tex->dcc_offset) return; /* already enabled */ |