diff options
Diffstat (limited to 'src/gallium/drivers/radeonsi')
28 files changed, 282 insertions, 282 deletions
diff --git a/src/gallium/drivers/radeonsi/cik_sdma.c b/src/gallium/drivers/radeonsi/cik_sdma.c index da9b25a442d..2728541dd29 100644 --- a/src/gallium/drivers/radeonsi/cik_sdma.c +++ b/src/gallium/drivers/radeonsi/cik_sdma.c @@ -180,12 +180,12 @@ static bool cik_sdma_copy_texture(struct si_context *sctx, copy_width <= (1 << 14) && copy_height <= (1 << 14) && copy_depth <= (1 << 11) && - /* HW limitation - CIK: */ - (sctx->chip_class != CIK || + /* HW limitation - GFX7: */ + (sctx->chip_class != GFX7 || (copy_width < (1 << 14) && copy_height < (1 << 14) && copy_depth < (1 << 11))) && - /* HW limitation - some CIK parts: */ + /* HW limitation - some GFX7 parts: */ ((sctx->family != CHIP_BONAIRE && sctx->family != CHIP_KAVERI) || (srcx + copy_width != (1 << 14) && @@ -207,7 +207,7 @@ static bool cik_sdma_copy_texture(struct si_context *sctx, radeon_emit(cs, dstx | (dsty << 16)); radeon_emit(cs, dstz | ((dst_pitch - 1) << 16)); radeon_emit(cs, dst_slice_pitch - 1); - if (sctx->chip_class == CIK) { + if (sctx->chip_class == GFX7) { radeon_emit(cs, copy_width | (copy_height << 16)); radeon_emit(cs, copy_depth); } else { @@ -264,7 +264,7 @@ static bool cik_sdma_copy_texture(struct si_context *sctx, bpp == 16) return false; - if (sctx->chip_class == CIK && + if (sctx->chip_class == GFX7 && (copy_width_aligned == (1 << 14) || copy_height == (1 << 14) || copy_depth == (1 << 11))) @@ -371,7 +371,7 @@ static bool cik_sdma_copy_texture(struct si_context *sctx, radeon_emit(cs, linear_x | (linear_y << 16)); radeon_emit(cs, linear_z | ((linear_pitch - 1) << 16)); radeon_emit(cs, linear_slice_pitch - 1); - if (sctx->chip_class == CIK) { + if (sctx->chip_class == GFX7) { radeon_emit(cs, copy_width_aligned | (copy_height << 16)); radeon_emit(cs, copy_depth); } else { @@ -394,9 +394,9 @@ static bool cik_sdma_copy_texture(struct si_context *sctx, dsty % 8 == 0 && srcx % 8 == 0 && srcy % 8 == 0 && - /* this can either be equal, or display->rotated (VI+ only) */ + /* this can either be equal, or display->rotated (GFX8+ only) */ (src_micro_mode == dst_micro_mode || - (sctx->chip_class >= VI && + (sctx->chip_class >= GFX8 && src_micro_mode == V_009910_ADDR_SURF_DISPLAY_MICRO_TILING && dst_micro_mode == V_009910_ADDR_SURF_ROTATED_MICRO_TILING))) { assert(src_pitch % 8 == 0); @@ -434,12 +434,12 @@ static bool cik_sdma_copy_texture(struct si_context *sctx, copy_depth <= (1 << 11) && copy_width_aligned % 8 == 0 && copy_height_aligned % 8 == 0 && - /* HW limitation - CIK: */ - (sctx->chip_class != CIK || + /* HW limitation - GFX7: */ + (sctx->chip_class != GFX7 || (copy_width_aligned < (1 << 14) && copy_height_aligned < (1 << 14) && copy_depth < (1 << 11))) && - /* HW limitation - some CIK parts: */ + /* HW limitation - some GFX7 parts: */ ((sctx->family != CHIP_BONAIRE && sctx->family != CHIP_KAVERI && sctx->family != CHIP_KABINI && @@ -465,7 +465,7 @@ static bool cik_sdma_copy_texture(struct si_context *sctx, radeon_emit(cs, dstz | (dst_pitch_tile_max << 16)); radeon_emit(cs, dst_slice_tile_max); radeon_emit(cs, encode_tile_info(sctx, sdst, dst_level, false)); - if (sctx->chip_class == CIK) { + if (sctx->chip_class == GFX7) { radeon_emit(cs, copy_width_aligned | (copy_height_aligned << 16)); radeon_emit(cs, copy_depth); @@ -502,7 +502,7 @@ static void cik_sdma_copy(struct pipe_context *ctx, return; } - if ((sctx->chip_class == CIK || sctx->chip_class == VI) && + if ((sctx->chip_class == GFX7 || sctx->chip_class == GFX8) && cik_sdma_copy_texture(sctx, dst, dst_level, dstx, dsty, dstz, src, src_level, src_box)) return; diff --git a/src/gallium/drivers/radeonsi/si_blit.c b/src/gallium/drivers/radeonsi/si_blit.c index 9d3d7d3d27a..5806342cca9 100644 --- a/src/gallium/drivers/radeonsi/si_blit.c +++ b/src/gallium/drivers/radeonsi/si_blit.c @@ -1151,7 +1151,7 @@ static bool do_hardware_msaa_resolve(struct pipe_context *ctx, goto resolve_to_temp; /* This can happen with mipmapping. */ - if (sctx->chip_class == VI && + if (sctx->chip_class == GFX8 && !dst->surface.u.legacy.level[info->dst.level].dcc_fast_clear_size) goto resolve_to_temp; diff --git a/src/gallium/drivers/radeonsi/si_clear.c b/src/gallium/drivers/radeonsi/si_clear.c index d294f236914..d0094031a95 100644 --- a/src/gallium/drivers/radeonsi/si_clear.c +++ b/src/gallium/drivers/radeonsi/si_clear.c @@ -313,7 +313,7 @@ static void si_set_optimal_micro_tile_mode(struct si_screen *sscreen, assert(!"unexpected micro mode"); return; } - } else if (sscreen->info.chip_class >= CIK) { + } else if (sscreen->info.chip_class >= GFX7) { /* These magic numbers were copied from addrlib. It doesn't use * any definitions for them either. They are all 2D_TILED_THIN1 * modes with different bpp and micro tile mode. @@ -332,7 +332,7 @@ static void si_set_optimal_micro_tile_mode(struct si_screen *sscreen, assert(!"unexpected micro mode"); return; } - } else { /* SI */ + } else { /* GFX6 */ switch (tex->last_msaa_resolve_target_micro_mode) { case RADEON_MICRO_MODE_DISPLAY: switch (tex->surface.bpe) { @@ -434,7 +434,7 @@ static void si_do_fast_color_clear(struct si_context *sctx, !(tex->buffer.external_usage & PIPE_HANDLE_USAGE_EXPLICIT_FLUSH)) continue; - if (sctx->chip_class <= VI && + if (sctx->chip_class <= GFX8 && tex->surface.u.legacy.level[0].mode == RADEON_SURF_MODE_1D && !sctx->screen->info.htile_cmask_support_1d_tiling) continue; @@ -474,7 +474,7 @@ static void si_do_fast_color_clear(struct si_context *sctx, continue; /* This can happen with mipmapping or MSAA. */ - if (sctx->chip_class == VI && + if (sctx->chip_class == GFX8 && !tex->surface.u.legacy.level[level].dcc_fast_clear_size) continue; diff --git a/src/gallium/drivers/radeonsi/si_compute.c b/src/gallium/drivers/radeonsi/si_compute.c index f1a433b72df..46a0ba76ed5 100644 --- a/src/gallium/drivers/radeonsi/si_compute.c +++ b/src/gallium/drivers/radeonsi/si_compute.c @@ -327,7 +327,7 @@ static void si_initialize_compute(struct si_context *sctx) radeon_emit(cs, S_00B858_SH0_CU_EN(0xffff) | S_00B858_SH1_CU_EN(0xffff)); radeon_emit(cs, S_00B85C_SH0_CU_EN(0xffff) | S_00B85C_SH1_CU_EN(0xffff)); - if (sctx->chip_class >= CIK) { + if (sctx->chip_class >= GFX7) { /* Also set R_00B858_COMPUTE_STATIC_THREAD_MGMT_SE2 / SE3 */ radeon_set_sh_reg_seq(cs, R_00B864_COMPUTE_STATIC_THREAD_MGMT_SE2, 2); @@ -342,7 +342,7 @@ static void si_initialize_compute(struct si_context *sctx) * kernel if we want to use something other than the default value, * which is now 0x22f. */ - if (sctx->chip_class <= SI) { + if (sctx->chip_class <= GFX6) { /* XXX: This should be: * (number of compute units) * 4 * (waves per simd) - 1 */ @@ -353,7 +353,7 @@ static void si_initialize_compute(struct si_context *sctx) /* Set the pointer to border colors. */ bc_va = sctx->border_color_buffer->gpu_address; - if (sctx->chip_class >= CIK) { + if (sctx->chip_class >= GFX7) { radeon_set_uconfig_reg_seq(cs, R_030E00_TA_CS_BC_BASE_ADDR, 2); radeon_emit(cs, bc_va >> 8); /* R_030E00_TA_CS_BC_BASE_ADDR */ radeon_emit(cs, S_030E04_ADDRESS(bc_va >> 40)); /* R_030E04_TA_CS_BC_BASE_ADDR_HI */ @@ -434,12 +434,12 @@ static bool si_switch_compute_shader(struct si_context *sctx, } lds_blocks = config->lds_size; - /* XXX: We are over allocating LDS. For SI, the shader reports + /* XXX: We are over allocating LDS. For GFX6, the shader reports * LDS in blocks of 256 bytes, so if there are 4 bytes lds * allocated in the shader and 4 bytes allocated by the state * tracker, then we will set LDS_SIZE to 512 bytes rather than 256. */ - if (sctx->chip_class <= SI) { + if (sctx->chip_class <= GFX6) { lds_blocks += align(program->local_size, 256) >> 8; } else { lds_blocks += align(program->local_size, 512) >> 9; @@ -474,7 +474,7 @@ static bool si_switch_compute_shader(struct si_context *sctx, * command. However, that would add more complexity and we're likely * to get a shader state change in that case anyway. */ - if (sctx->chip_class >= CIK) { + if (sctx->chip_class >= GFX7) { cik_prefetch_TC_L2_async(sctx, &program->shader.bo->b.b, 0, program->shader.bo->b.b.width0); } @@ -539,7 +539,7 @@ static void setup_scratch_rsrc_user_sgprs(struct si_context *sctx, } else { scratch_dword3 |= S_008F0C_ELEMENT_SIZE(max_private_element_size); - if (sctx->chip_class < VI) { + if (sctx->chip_class < GFX8) { /* BUF_DATA_FORMAT is ignored, but it cannot be * BUF_DATA_FORMAT_INVALID. */ scratch_dword3 |= @@ -764,7 +764,7 @@ static void si_emit_dispatch_packets(struct si_context *sctx, unsigned compute_resource_limits = S_00B854_SIMD_DEST_CNTL(waves_per_threadgroup % 4 == 0); - if (sctx->chip_class >= CIK) { + if (sctx->chip_class >= GFX7) { unsigned num_cu_per_se = sscreen->info.num_good_compute_units / sscreen->info.max_se; @@ -777,7 +777,7 @@ static void si_emit_dispatch_packets(struct si_context *sctx, compute_resource_limits |= S_00B854_WAVES_PER_SH(sctx->cs_max_waves_per_sh); } else { - /* SI */ + /* GFX6 */ if (sctx->cs_max_waves_per_sh) { unsigned limit_div16 = DIV_ROUND_UP(sctx->cs_max_waves_per_sh, 16); compute_resource_limits |= S_00B854_WAVES_PER_SH_SI(limit_div16); @@ -792,7 +792,7 @@ static void si_emit_dispatch_packets(struct si_context *sctx, S_00B800_FORCE_START_AT_000(1) | /* If the KMD allows it (there is a KMD hw register for it), * allow launching waves out-of-order. (same as Vulkan) */ - S_00B800_ORDER_MODE(sctx->chip_class >= CIK); + S_00B800_ORDER_MODE(sctx->chip_class >= GFX7); const uint *last_block = info->last_block; bool partial_block_en = last_block[0] || last_block[1] || last_block[2]; @@ -861,10 +861,10 @@ static void si_launch_grid( * compute isn't used, i.e. only one compute job can run at a time. * If async compute is possible, the threadgroup size must be limited * to 256 threads on all queues to avoid the bug. - * Only SI and certain CIK chips are affected. + * Only GFX6 and certain GFX7 chips are affected. */ bool cs_regalloc_hang = - (sctx->chip_class == SI || + (sctx->chip_class == GFX6 || sctx->family == CHIP_BONAIRE || sctx->family == CHIP_KABINI) && info->block[0] * info->block[1] * info->block[2] > 256; @@ -894,7 +894,7 @@ static void si_launch_grid( si_context_add_resource_size(sctx, info->indirect); /* Indirect buffers use TC L2 on GFX9, but not older hw. */ - if (sctx->chip_class <= VI && + if (sctx->chip_class <= GFX8 && si_resource(info->indirect)->TC_L2_dirty) { sctx->flags |= SI_CONTEXT_WRITEBACK_GLOBAL_L2; si_resource(info->indirect)->TC_L2_dirty = false; diff --git a/src/gallium/drivers/radeonsi/si_compute_blit.c b/src/gallium/drivers/radeonsi/si_compute_blit.c index fb0d8d2f1b6..1cfdc9b62c6 100644 --- a/src/gallium/drivers/radeonsi/si_compute_blit.c +++ b/src/gallium/drivers/radeonsi/si_compute_blit.c @@ -36,7 +36,7 @@ static enum si_cache_policy get_cache_policy(struct si_context *sctx, { if ((sctx->chip_class >= GFX9 && (coher == SI_COHERENCY_CB_META || coher == SI_COHERENCY_CP)) || - (sctx->chip_class >= CIK && coher == SI_COHERENCY_SHADER)) + (sctx->chip_class >= GFX7 && coher == SI_COHERENCY_SHADER)) return size <= 256 * 1024 ? L2_LRU : L2_STREAM; return L2_BYPASS; @@ -254,7 +254,7 @@ void si_clear_buffer(struct si_context *sctx, struct pipe_resource *dst, (!force_cpdma && clear_value_size == 4 && offset % 4 == 0 && - (size > 32*1024 || sctx->chip_class <= VI))) { + (size > 32*1024 || sctx->chip_class <= GFX8))) { si_compute_do_clear_or_copy(sctx, dst, offset, NULL, 0, aligned_size, clear_value, clear_value_size, coher); @@ -418,7 +418,7 @@ void si_compute_copy_image(struct si_context *sctx, ctx->launch_grid(ctx, &info); sctx->flags |= SI_CONTEXT_CS_PARTIAL_FLUSH | - (sctx->chip_class <= VI ? SI_CONTEXT_WRITEBACK_GLOBAL_L2 : 0) | + (sctx->chip_class <= GFX8 ? SI_CONTEXT_WRITEBACK_GLOBAL_L2 : 0) | si_get_flush_flags(sctx, SI_COHERENCY_SHADER, L2_STREAM); ctx->bind_compute_state(ctx, saved_cs); ctx->set_shader_images(ctx, PIPE_SHADER_COMPUTE, 0, 2, saved_image); @@ -597,7 +597,7 @@ void si_compute_clear_render_target(struct pipe_context *ctx, ctx->launch_grid(ctx, &info); sctx->flags |= SI_CONTEXT_CS_PARTIAL_FLUSH | - (sctx->chip_class <= VI ? SI_CONTEXT_WRITEBACK_GLOBAL_L2 : 0) | + (sctx->chip_class <= GFX8 ? SI_CONTEXT_WRITEBACK_GLOBAL_L2 : 0) | si_get_flush_flags(sctx, SI_COHERENCY_SHADER, L2_STREAM); ctx->bind_compute_state(ctx, saved_cs); ctx->set_shader_images(ctx, PIPE_SHADER_COMPUTE, 0, 1, &saved_image); diff --git a/src/gallium/drivers/radeonsi/si_cp_dma.c b/src/gallium/drivers/radeonsi/si_cp_dma.c index 404117d1813..f5c54ca0d52 100644 --- a/src/gallium/drivers/radeonsi/si_cp_dma.c +++ b/src/gallium/drivers/radeonsi/si_cp_dma.c @@ -61,7 +61,7 @@ static void si_emit_cp_dma(struct si_context *sctx, struct radeon_cmdbuf *cs, uint32_t header = 0, command = 0; assert(size <= cp_dma_max_byte_count(sctx)); - assert(sctx->chip_class != SI || cache_policy == L2_BYPASS); + assert(sctx->chip_class != GFX6 || cache_policy == L2_BYPASS); if (sctx->chip_class >= GFX9) command |= S_414_BYTE_COUNT_GFX9(size); @@ -90,7 +90,7 @@ static void si_emit_cp_dma(struct si_context *sctx, struct radeon_cmdbuf *cs, /* GDS increments the address, not CP. */ command |= S_414_DAS(V_414_REGISTER) | S_414_DAIC(V_414_NO_INCREMENT); - } else if (sctx->chip_class >= CIK && cache_policy != L2_BYPASS) { + } else if (sctx->chip_class >= GFX7 && cache_policy != L2_BYPASS) { header |= S_411_DST_SEL(V_411_DST_ADDR_TC_L2) | S_500_DST_CACHE_POLICY(cache_policy == L2_STREAM); } @@ -102,12 +102,12 @@ static void si_emit_cp_dma(struct si_context *sctx, struct radeon_cmdbuf *cs, /* Both of these are required for GDS. It does increment the address. */ command |= S_414_SAS(V_414_REGISTER) | S_414_SAIC(V_414_NO_INCREMENT); - } else if (sctx->chip_class >= CIK && cache_policy != L2_BYPASS) { + } else if (sctx->chip_class >= GFX7 && cache_policy != L2_BYPASS) { header |= S_411_SRC_SEL(V_411_SRC_ADDR_TC_L2) | S_500_SRC_CACHE_POLICY(cache_policy == L2_STREAM); } - if (sctx->chip_class >= CIK) { + if (sctx->chip_class >= GFX7) { radeon_emit(cs, PKT3(PKT3_DMA_DATA, 5, 0)); radeon_emit(cs, header); radeon_emit(cs, src_va); /* SRC_ADDR_LO [31:0] */ @@ -412,7 +412,7 @@ void si_cp_dma_copy_buffer(struct si_context *sctx, void cik_prefetch_TC_L2_async(struct si_context *sctx, struct pipe_resource *buf, uint64_t offset, unsigned size) { - assert(sctx->chip_class >= CIK); + assert(sctx->chip_class >= GFX7); si_cp_dma_copy_buffer(sctx, buf, buf, offset, offset, size, SI_CPDMA_SKIP_ALL, SI_COHERENCY_SHADER, L2_LRU); @@ -491,7 +491,7 @@ void cik_emit_prefetch_L2(struct si_context *sctx, bool vertex_stage_only) } } } else { - /* SI-CI-VI */ + /* GFX6-GFX8 */ /* Choose the right spot for the VBO prefetch. */ if (sctx->tes_shader.cso) { if (mask & SI_PREFETCH_LS) @@ -591,7 +591,7 @@ void si_cp_write_data(struct si_context *sctx, struct si_resource *buf, assert(offset % 4 == 0); assert(size % 4 == 0); - if (sctx->chip_class == SI && dst_sel == V_370_MEM) + if (sctx->chip_class == GFX6 && dst_sel == V_370_MEM) dst_sel = V_370_MEM_GRBM; radeon_add_to_buffer_list(sctx, cs, buf, diff --git a/src/gallium/drivers/radeonsi/si_debug.c b/src/gallium/drivers/radeonsi/si_debug.c index 9a4494a98fe..bd85fc49387 100644 --- a/src/gallium/drivers/radeonsi/si_debug.c +++ b/src/gallium/drivers/radeonsi/si_debug.c @@ -314,7 +314,7 @@ static void si_dump_debug_registers(struct si_context *sctx, FILE *f) si_dump_mmapped_reg(sctx, f, R_00803C_GRBM_STATUS_SE3); si_dump_mmapped_reg(sctx, f, R_00D034_SDMA0_STATUS_REG); si_dump_mmapped_reg(sctx, f, R_00D834_SDMA1_STATUS_REG); - if (sctx->chip_class <= VI) { + if (sctx->chip_class <= GFX8) { si_dump_mmapped_reg(sctx, f, R_000E50_SRBM_STATUS); si_dump_mmapped_reg(sctx, f, R_000E4C_SRBM_STATUS2); si_dump_mmapped_reg(sctx, f, R_000E54_SRBM_STATUS3); diff --git a/src/gallium/drivers/radeonsi/si_descriptors.c b/src/gallium/drivers/radeonsi/si_descriptors.c index f795c33cf26..5b812149754 100644 --- a/src/gallium/drivers/radeonsi/si_descriptors.c +++ b/src/gallium/drivers/radeonsi/si_descriptors.c @@ -347,7 +347,7 @@ void si_set_mutable_tex_desc_fields(struct si_screen *sscreen, base_level_info->mode == RADEON_SURF_MODE_2D) state[0] |= tex->surface.tile_swizzle; - if (sscreen->info.chip_class >= VI) { + if (sscreen->info.chip_class >= GFX8) { state[6] &= C_008F28_COMPRESSION_EN; state[7] = 0; @@ -355,7 +355,7 @@ void si_set_mutable_tex_desc_fields(struct si_screen *sscreen, meta_va = (!tex->dcc_separate_buffer ? tex->buffer.gpu_address : 0) + tex->dcc_offset; - if (sscreen->info.chip_class == VI) { + if (sscreen->info.chip_class == GFX8) { meta_va += base_level_info->dcc_offset; assert(base_level_info->mode == RADEON_SURF_MODE_2D); } @@ -399,7 +399,7 @@ void si_set_mutable_tex_desc_fields(struct si_screen *sscreen, S_008F24_META_RB_ALIGNED(meta.rb_aligned); } } else { - /* SI-CI-VI */ + /* GFX6-GFX8 */ unsigned pitch = base_level_info->nblk_x * block_width; unsigned index = si_tile_mode_index(tex, base_level, is_stencil); @@ -1141,7 +1141,7 @@ bool si_upload_vertex_buffer_descriptors(struct si_context *sctx) uint64_t va = buf->gpu_address + offset; int64_t num_records = (int64_t)buf->b.b.width0 - offset; - if (sctx->chip_class != VI && vb->stride) { + if (sctx->chip_class != GFX8 && vb->stride) { /* Round up by rounding down and adding 1 */ num_records = (num_records - velems->format_size[i]) / vb->stride + 1; @@ -1210,9 +1210,9 @@ static void si_set_constant_buffer(struct si_context *sctx, assert(slot < descs->num_elements); pipe_resource_reference(&buffers->buffers[slot], NULL); - /* CIK cannot unbind a constant buffer (S_BUFFER_LOAD is buggy + /* GFX7 cannot unbind a constant buffer (S_BUFFER_LOAD is buggy * with a NULL buffer). We need to use a dummy buffer instead. */ - if (sctx->chip_class == CIK && + if (sctx->chip_class == GFX7 && (!input || (!input->buffer && !input->user_buffer))) input = &sctx->null_const_buf; @@ -1467,7 +1467,7 @@ void si_set_ring_buffer(struct si_context *sctx, uint slot, break; } - if (sctx->chip_class >= VI && stride) + if (sctx->chip_class >= GFX8 && stride) num_records *= stride; /* Set the descriptor. */ diff --git a/src/gallium/drivers/radeonsi/si_dma_cs.c b/src/gallium/drivers/radeonsi/si_dma_cs.c index bba1bd95826..8f2e15833b6 100644 --- a/src/gallium/drivers/radeonsi/si_dma_cs.c +++ b/src/gallium/drivers/radeonsi/si_dma_cs.c @@ -30,7 +30,7 @@ static void si_dma_emit_wait_idle(struct si_context *sctx) struct radeon_cmdbuf *cs = sctx->dma_cs; /* NOP waits for idle. */ - if (sctx->chip_class >= CIK) + if (sctx->chip_class >= GFX7) radeon_emit(cs, 0x00000000); /* NOP */ else radeon_emit(cs, 0xf0000000); /* NOP */ @@ -42,7 +42,7 @@ void si_dma_emit_timestamp(struct si_context *sctx, struct si_resource *dst, struct radeon_cmdbuf *cs = sctx->dma_cs; uint64_t va = dst->gpu_address + offset; - if (sctx->chip_class == SI) { + if (sctx->chip_class == GFX6) { unreachable("SI DMA doesn't support the timestamp packet."); return; } @@ -87,7 +87,7 @@ void si_sdma_clear_buffer(struct si_context *sctx, struct pipe_resource *dst, offset += sdst->gpu_address; - if (sctx->chip_class == SI) { + if (sctx->chip_class == GFX6) { /* the same maximum size as for copying */ ncopy = DIV_ROUND_UP(size, SI_DMA_COPY_MAX_DWORD_ALIGNED_SIZE); si_need_dma_space(sctx, ncopy * 4, sdst, NULL); @@ -105,7 +105,7 @@ void si_sdma_clear_buffer(struct si_context *sctx, struct pipe_resource *dst, return; } - /* The following code is for CI, VI, Vega/Raven, etc. */ + /* The following code is for Sea Islands and later. */ /* the same maximum size as for copying */ ncopy = DIV_ROUND_UP(size, CIK_SDMA_COPY_MAX_SIZE); si_need_dma_space(sctx, ncopy * 5, sdst, NULL); diff --git a/src/gallium/drivers/radeonsi/si_fence.c b/src/gallium/drivers/radeonsi/si_fence.c index ffda98d2834..b3212c1db35 100644 --- a/src/gallium/drivers/radeonsi/si_fence.c +++ b/src/gallium/drivers/radeonsi/si_fence.c @@ -115,8 +115,8 @@ void si_cp_release_mem(struct si_context *ctx, radeon_emit(cs, 0); /* immediate data hi */ radeon_emit(cs, 0); /* unused */ } else { - if (ctx->chip_class == CIK || - ctx->chip_class == VI) { + if (ctx->chip_class == GFX7 || + ctx->chip_class == GFX8) { struct si_resource *scratch = ctx->eop_bug_scratch; uint64_t va = scratch->gpu_address; @@ -153,8 +153,8 @@ unsigned si_cp_write_fence_dwords(struct si_screen *screen) { unsigned dwords = 6; - if (screen->info.chip_class == CIK || - screen->info.chip_class == VI) + if (screen->info.chip_class == GFX7 || + screen->info.chip_class == GFX8) dwords *= 2; return dwords; diff --git a/src/gallium/drivers/radeonsi/si_get.c b/src/gallium/drivers/radeonsi/si_get.c index d97aca1de23..71350661c2b 100644 --- a/src/gallium/drivers/radeonsi/si_get.c +++ b/src/gallium/drivers/radeonsi/si_get.c @@ -254,7 +254,7 @@ static int si_get_param(struct pipe_screen *pscreen, enum pipe_cap param) return 32; case PIPE_CAP_TEXTURE_BORDER_COLOR_QUIRK: - return sscreen->info.chip_class <= VI ? + return sscreen->info.chip_class <= GFX8 ? PIPE_QUIRK_TEXTURE_BORDER_COLOR_SWIZZLE_R600 : 0; /* Stream output. */ diff --git a/src/gallium/drivers/radeonsi/si_gfx_cs.c b/src/gallium/drivers/radeonsi/si_gfx_cs.c index d0d405c473f..c81718950a4 100644 --- a/src/gallium/drivers/radeonsi/si_gfx_cs.c +++ b/src/gallium/drivers/radeonsi/si_gfx_cs.c @@ -82,7 +82,7 @@ void si_flush_gfx_cs(struct si_context *ctx, unsigned flags, wait_flags |= SI_CONTEXT_PS_PARTIAL_FLUSH | SI_CONTEXT_CS_PARTIAL_FLUSH | SI_CONTEXT_INV_GLOBAL_L2; - } else if (ctx->chip_class == SI) { + } else if (ctx->chip_class == GFX6) { /* The kernel flushes L2 before shaders are finished. */ wait_flags |= SI_CONTEXT_PS_PARTIAL_FLUSH | SI_CONTEXT_CS_PARTIAL_FLUSH; @@ -147,7 +147,7 @@ void si_flush_gfx_cs(struct si_context *ctx, unsigned flags, /* Make sure CP DMA is idle at the end of IBs after L2 prefetches * because the kernel doesn't wait for it. */ - if (ctx->chip_class >= CIK) + if (ctx->chip_class >= GFX7) si_cp_dma_wait_for_idle(ctx); /* Wait for draw calls to finish if needed. */ @@ -407,7 +407,7 @@ void si_begin_new_gfx_cs(struct si_context *ctx) ctx->tracked_regs.reg_value[SI_TRACKED_SPI_SHADER_COL_FORMAT] = 0x00000000; ctx->tracked_regs.reg_value[SI_TRACKED_CB_SHADER_MASK] = 0xffffffff; ctx->tracked_regs.reg_value[SI_TRACKED_VGT_TF_PARAM] = 0x00000000; - ctx->tracked_regs.reg_value[SI_TRACKED_VGT_VERTEX_REUSE_BLOCK_CNTL] = 0x0000001e; /* From VI */ + ctx->tracked_regs.reg_value[SI_TRACKED_VGT_VERTEX_REUSE_BLOCK_CNTL] = 0x0000001e; /* From GFX8 */ /* Set all saved registers state to saved. */ ctx->tracked_regs.reg_saved = 0xffffffffffffffff; diff --git a/src/gallium/drivers/radeonsi/si_gpu_load.c b/src/gallium/drivers/radeonsi/si_gpu_load.c index 481438f37bb..7c2e43b3fdd 100644 --- a/src/gallium/drivers/radeonsi/si_gpu_load.c +++ b/src/gallium/drivers/radeonsi/si_gpu_load.c @@ -102,7 +102,7 @@ static void si_update_mmio_counters(struct si_screen *sscreen, UPDATE_COUNTER(gui, GUI_ACTIVE); gui_busy = GUI_ACTIVE(value); - if (sscreen->info.chip_class == CIK || sscreen->info.chip_class == VI) { + if (sscreen->info.chip_class == GFX7 || sscreen->info.chip_class == GFX8) { /* SRBM_STATUS2 */ sscreen->ws->read_registers(sscreen->ws, SRBM_STATUS2, 1, &value); @@ -110,7 +110,7 @@ static void si_update_mmio_counters(struct si_screen *sscreen, sdma_busy = SDMA_BUSY(value); } - if (sscreen->info.chip_class >= VI) { + if (sscreen->info.chip_class >= GFX8) { /* CP_STAT */ sscreen->ws->read_registers(sscreen->ws, CP_STAT, 1, &value); diff --git a/src/gallium/drivers/radeonsi/si_perfcounter.c b/src/gallium/drivers/radeonsi/si_perfcounter.c index c15c444cc40..322950557e3 100644 --- a/src/gallium/drivers/radeonsi/si_perfcounter.c +++ b/src/gallium/drivers/radeonsi/si_perfcounter.c @@ -1284,11 +1284,11 @@ void si_init_perfcounters(struct si_screen *screen) unsigned i; switch (screen->info.chip_class) { - case CIK: + case GFX7: blocks = groups_CIK; num_blocks = ARRAY_SIZE(groups_CIK); break; - case VI: + case GFX8: blocks = groups_VI; num_blocks = ARRAY_SIZE(groups_VI); break; @@ -1296,13 +1296,13 @@ void si_init_perfcounters(struct si_screen *screen) blocks = groups_gfx9; num_blocks = ARRAY_SIZE(groups_gfx9); break; - case SI: + case GFX6: default: return; /* not implemented */ } if (screen->info.max_sh_per_se != 1) { - /* This should not happen on non-SI chips. */ + /* This should not happen on non-GFX6 chips. */ fprintf(stderr, "si_init_perfcounters: max_sh_per_se = %d not " "supported (inaccurate performance counters)\n", screen->info.max_sh_per_se); diff --git a/src/gallium/drivers/radeonsi/si_pipe.c b/src/gallium/drivers/radeonsi/si_pipe.c index 4d36fd46a9b..d9dae8363f0 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.c +++ b/src/gallium/drivers/radeonsi/si_pipe.c @@ -115,7 +115,7 @@ static void si_init_compiler(struct si_screen *sscreen, /* Only create the less-optimizing version of the compiler on APUs * predating Ryzen (Raven). */ bool create_low_opt_compiler = !sscreen->info.has_dedicated_vram && - sscreen->info.chip_class <= VI; + sscreen->info.chip_class <= GFX8; enum ac_target_machine_options tm_options = (sscreen->debug_flags & DBG(SI_SCHED) ? AC_TM_SISCHED : 0) | @@ -394,7 +394,7 @@ static struct pipe_context *si_create_context(struct pipe_screen *screen, if (!sctx) return NULL; - sctx->has_graphics = sscreen->info.chip_class == SI || + sctx->has_graphics = sscreen->info.chip_class == GFX6 || !(flags & PIPE_CONTEXT_COMPUTE_ONLY); if (flags & PIPE_CONTEXT_DEBUG) @@ -419,8 +419,8 @@ static struct pipe_context *si_create_context(struct pipe_screen *screen, } - if (sctx->chip_class == CIK || - sctx->chip_class == VI || + if (sctx->chip_class == GFX7 || + sctx->chip_class == GFX8 || sctx->chip_class == GFX9) { sctx->eop_bug_scratch = si_resource( pipe_buffer_create(&sscreen->b, 0, PIPE_USAGE_DEFAULT, @@ -536,7 +536,7 @@ static struct pipe_context *si_create_context(struct pipe_screen *screen, } /* Initialize SDMA functions. */ - if (sctx->chip_class >= CIK) + if (sctx->chip_class >= GFX7) cik_init_sdma_functions(sctx); else si_init_dma_functions(sctx); @@ -563,9 +563,9 @@ static struct pipe_context *si_create_context(struct pipe_screen *screen, V_370_MEM, V_370_ME, &sctx->wait_mem_number); } - /* CIK cannot unbind a constant buffer (S_BUFFER_LOAD doesn't skip loads + /* GFX7 cannot unbind a constant buffer (S_BUFFER_LOAD doesn't skip loads * if NUM_RECORDS == 0). We need to use a dummy buffer instead. */ - if (sctx->chip_class == CIK) { + if (sctx->chip_class == GFX7) { sctx->null_const_buf.buffer = pipe_aligned_buffer_create(screen, SI_RESOURCE_FLAG_32BIT, @@ -638,7 +638,7 @@ static struct pipe_context *si_create_context(struct pipe_screen *screen, /* this must be last */ si_begin_new_gfx_cs(sctx); - if (sctx->chip_class == CIK) { + if (sctx->chip_class == GFX7) { /* Clear the NULL constant buffer, because loads should return zeros. * Note that this forces CP DMA to be used, because clover deadlocks * for some reason when the compute codepath is used. @@ -1017,11 +1017,11 @@ struct pipe_screen *radeonsi_screen_create(struct radeon_winsys *ws, si_init_perfcounters(sscreen); /* Determine tessellation ring info. */ - bool double_offchip_buffers = sscreen->info.chip_class >= CIK && + bool double_offchip_buffers = sscreen->info.chip_class >= GFX7 && sscreen->info.family != CHIP_CARRIZO && sscreen->info.family != CHIP_STONEY; /* This must be one less than the maximum number due to a hw limitation. - * Various hardware bugs in SI, CIK, and GFX9 need this. + * Various hardware bugs need this. */ unsigned max_offchip_buffers_per_se; @@ -1052,8 +1052,8 @@ struct pipe_screen *radeonsi_screen_create(struct radeon_winsys *ws, sscreen->tess_offchip_ring_size = max_offchip_buffers * sscreen->tess_offchip_block_dw_size * 4; - if (sscreen->info.chip_class >= CIK) { - if (sscreen->info.chip_class >= VI) + if (sscreen->info.chip_class >= GFX7) { + if (sscreen->info.chip_class >= GFX8) --max_offchip_buffers; sscreen->vgt_hs_offchip_param = S_03093C_OFFCHIP_BUFFERING(max_offchip_buffers) | @@ -1065,28 +1065,28 @@ struct pipe_screen *radeonsi_screen_create(struct radeon_winsys *ws, } /* The mere presense of CLEAR_STATE in the IB causes random GPU hangs - * on SI. Some CLEAR_STATE cause asic hang on radeon kernel, etc. - * SPI_VS_OUT_CONFIG. So only enable CI CLEAR_STATE on amdgpu kernel.*/ - sscreen->has_clear_state = sscreen->info.chip_class >= CIK && + * on GFX6. Some CLEAR_STATE cause asic hang on radeon kernel, etc. + * SPI_VS_OUT_CONFIG. So only enable GFX7 CLEAR_STATE on amdgpu kernel.*/ + sscreen->has_clear_state = sscreen->info.chip_class >= GFX7 && sscreen->info.drm_major == 3; sscreen->has_distributed_tess = - sscreen->info.chip_class >= VI && + sscreen->info.chip_class >= GFX8 && sscreen->info.max_se >= 2; sscreen->has_draw_indirect_multi = (sscreen->info.family >= CHIP_POLARIS10) || - (sscreen->info.chip_class == VI && + (sscreen->info.chip_class == GFX8 && sscreen->info.pfp_fw_version >= 121 && sscreen->info.me_fw_version >= 87) || - (sscreen->info.chip_class == CIK && + (sscreen->info.chip_class == GFX7 && sscreen->info.pfp_fw_version >= 211 && sscreen->info.me_fw_version >= 173) || - (sscreen->info.chip_class == SI && + (sscreen->info.chip_class == GFX6 && sscreen->info.pfp_fw_version >= 79 && sscreen->info.me_fw_version >= 142); - sscreen->has_out_of_order_rast = sscreen->info.chip_class >= VI && + sscreen->has_out_of_order_rast = sscreen->info.chip_class >= GFX8 && sscreen->info.max_se >= 2 && !(sscreen->debug_flags & DBG(NO_OUT_OF_ORDER)); sscreen->assume_no_z_fights = @@ -1137,7 +1137,7 @@ struct pipe_screen *radeonsi_screen_create(struct radeon_winsys *ws, * by the reality that LLVM 5.0 doesn't have working VGPR indexing * on GFX9. */ - sscreen->llvm_has_working_vgpr_indexing = sscreen->info.chip_class <= VI; + sscreen->llvm_has_working_vgpr_indexing = sscreen->info.chip_class <= GFX8; /* Some chips have RB+ registers, but don't support RB+. Those must * always disable it. @@ -1157,7 +1157,7 @@ struct pipe_screen *radeonsi_screen_create(struct radeon_winsys *ws, sscreen->dcc_msaa_allowed = !(sscreen->debug_flags & DBG(NO_DCC_MSAA)); - sscreen->cpdma_prefetch_writes_memory = sscreen->info.chip_class <= VI; + sscreen->cpdma_prefetch_writes_memory = sscreen->info.chip_class <= GFX8; (void) mtx_init(&sscreen->shader_parts_mutex, mtx_plain); sscreen->use_monolithic_shaders = @@ -1165,7 +1165,7 @@ struct pipe_screen *radeonsi_screen_create(struct radeon_winsys *ws, sscreen->barrier_flags.cp_to_L2 = SI_CONTEXT_INV_SMEM_L1 | SI_CONTEXT_INV_VMEM_L1; - if (sscreen->info.chip_class <= VI) { + if (sscreen->info.chip_class <= GFX8) { sscreen->barrier_flags.cp_to_L2 |= SI_CONTEXT_INV_GLOBAL_L2; sscreen->barrier_flags.L2_to_cp |= SI_CONTEXT_WRITEBACK_GLOBAL_L2; } diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h index 695827c9dd7..1c98f41b5f3 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.h +++ b/src/gallium/drivers/radeonsi/si_pipe.h @@ -72,7 +72,7 @@ /* Used by everything except CB/DB, can be bypassed (SLC=1). Other names: TC L2 */ #define SI_CONTEXT_INV_GLOBAL_L2 (1 << 6) /* Write dirty L2 lines back to memory (shader and CP DMA stores), but don't - * invalidate L2. SI-CIK can't do it, so they will do complete invalidation. */ + * invalidate L2. GFX6-GFX7 can't do it, so they will do complete invalidation. */ #define SI_CONTEXT_WRITEBACK_GLOBAL_L2 (1 << 7) /* Writeback & invalidate the L2 metadata cache. It can only be coupled with * a CB or DB flush. */ @@ -366,7 +366,7 @@ struct si_surface { unsigned cb_color_view; unsigned cb_color_attrib; unsigned cb_color_attrib2; /* GFX9 and later */ - unsigned cb_dcc_control; /* VI and later */ + unsigned cb_dcc_control; /* GFX8 and later */ unsigned spi_shader_col_format:8; /* no blending, no alpha-to-coverage. */ unsigned spi_shader_col_format_alpha:8; /* alpha-to-coverage */ unsigned spi_shader_col_format_blend:8; /* blending without alpha. */ @@ -923,7 +923,7 @@ struct si_context { bool bo_list_add_all_compute_resources; /* other shader resources */ - struct pipe_constant_buffer null_const_buf; /* used for set_constant_buffer(NULL) on CIK */ + struct pipe_constant_buffer null_const_buf; /* used for set_constant_buffer(NULL) on GFX7 */ struct pipe_resource *esgs_ring; struct pipe_resource *gsvs_ring; struct pipe_resource *tess_rings; @@ -1572,7 +1572,7 @@ si_make_CB_shader_coherent(struct si_context *sctx, unsigned num_samples, else if (shaders_read_metadata) sctx->flags |= SI_CONTEXT_INV_L2_METADATA; } else { - /* SI-CI-VI */ + /* GFX6-GFX8 */ sctx->flags |= SI_CONTEXT_INV_GLOBAL_L2; } } @@ -1594,7 +1594,7 @@ si_make_DB_shader_coherent(struct si_context *sctx, unsigned num_samples, else if (shaders_read_metadata) sctx->flags |= SI_CONTEXT_INV_L2_METADATA; } else { - /* SI-CI-VI */ + /* GFX6-GFX8 */ sctx->flags |= SI_CONTEXT_INV_GLOBAL_L2; } } diff --git a/src/gallium/drivers/radeonsi/si_pm4.c b/src/gallium/drivers/radeonsi/si_pm4.c index 22c4a5b6e6e..0b7d53e745d 100644 --- a/src/gallium/drivers/radeonsi/si_pm4.c +++ b/src/gallium/drivers/radeonsi/si_pm4.c @@ -161,8 +161,8 @@ void si_pm4_upload_indirect_buffer(struct si_context *sctx, struct pipe_screen *screen = sctx->b.screen; unsigned aligned_ndw = align(state->ndw, 8); - /* only supported on CIK and later */ - if (sctx->chip_class < CIK) + /* only supported on GFX7 and later */ + if (sctx->chip_class < GFX7) return; assert(state->ndw); diff --git a/src/gallium/drivers/radeonsi/si_query.c b/src/gallium/drivers/radeonsi/si_query.c index d98bea2eeb3..cb42ebb92ce 100644 --- a/src/gallium/drivers/radeonsi/si_query.c +++ b/src/gallium/drivers/radeonsi/si_query.c @@ -1019,7 +1019,7 @@ static void si_emit_query_predication(struct si_context *ctx) /* Use the value written by compute shader as a workaround. Note that * the wait flag does not apply in this predication mode. * - * The shader outputs the result value to L2. Workarounds only affect VI + * The shader outputs the result value to L2. Workarounds only affect GFX8 * and later, where the CP reads data from L2, so we don't need an * additional flush. */ @@ -1608,11 +1608,11 @@ static void si_render_condition(struct pipe_context *ctx, if (query) { bool needs_workaround = false; - /* There was a firmware regression in VI which causes successive + /* There was a firmware regression in GFX8 which causes successive * SET_PREDICATION packets to give the wrong answer for * non-inverted stream overflow predication. */ - if (((sctx->chip_class == VI && sctx->screen->info.pfp_fw_feature < 49) || + if (((sctx->chip_class == GFX8 && sctx->screen->info.pfp_fw_feature < 49) || (sctx->chip_class == GFX9 && sctx->screen->info.pfp_fw_feature < 38)) && !condition && (squery->b.type == PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE || @@ -1786,7 +1786,7 @@ static unsigned si_get_num_queries(struct si_screen *sscreen) { /* amdgpu */ if (sscreen->info.drm_major == 3) { - if (sscreen->info.chip_class >= VI) + if (sscreen->info.chip_class >= GFX8) return ARRAY_SIZE(si_driver_query_list); else return ARRAY_SIZE(si_driver_query_list) - 7; @@ -1794,7 +1794,7 @@ static unsigned si_get_num_queries(struct si_screen *sscreen) /* radeon */ if (sscreen->info.has_read_registers_query) { - if (sscreen->info.chip_class == CIK) + if (sscreen->info.chip_class == GFX7) return ARRAY_SIZE(si_driver_query_list) - 6; else return ARRAY_SIZE(si_driver_query_list) - 7; diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c index f6d882cf583..98c11e1c98d 100644 --- a/src/gallium/drivers/radeonsi/si_shader.c +++ b/src/gallium/drivers/radeonsi/si_shader.c @@ -105,7 +105,7 @@ static bool llvm_type_is_64bit(struct si_shader_context *ctx, static bool is_merged_shader(struct si_shader_context *ctx) { - if (ctx->screen->info.chip_class <= VI) + if (ctx->screen->info.chip_class <= GFX8) return false; return ctx->shader->key.as_ls || @@ -3082,7 +3082,7 @@ static void si_write_tess_factors(struct lp_build_tgsi_context *bld_base, /* Store the dynamic HS control word. */ offset = 0; - if (ctx->screen->info.chip_class <= VI) { + if (ctx->screen->info.chip_class <= GFX8) { ac_build_buffer_store_dword(&ctx->ac, buffer, LLVMConstInt(ctx->i32, 0x80000000, 0), 1, ctx->i32_0, tf_base, @@ -3719,7 +3719,7 @@ static void si_llvm_return_fs_outputs(struct ac_shader_abi *abi, addrs[4 * i + 0], ""); break; default: - fprintf(stderr, "Warning: SI unhandled fs output type:%d\n", + fprintf(stderr, "Warning: GFX6 unhandled fs output type:%d\n", semantic_name); } } @@ -4215,11 +4215,11 @@ static void si_llvm_emit_barrier(const struct lp_build_tgsi_action *action, { struct si_shader_context *ctx = si_shader_context(bld_base); - /* SI only (thanks to a hw bug workaround): + /* GFX6 only (thanks to a hw bug workaround): * The real barrier instruction isn’t needed, because an entire patch * always fits into a single wave. */ - if (ctx->screen->info.chip_class == SI && + if (ctx->screen->info.chip_class == GFX6 && ctx->type == PIPE_SHADER_TESS_CTRL) { ac_build_waitcnt(&ctx->ac, LGKM_CNT & VM_CNT); return; @@ -4327,7 +4327,7 @@ static unsigned si_get_max_workgroup_size(const struct si_shader *shader) case PIPE_SHADER_TESS_CTRL: /* Return this so that LLVM doesn't remove s_barrier * instructions on chips where we use s_barrier. */ - return shader->selector->screen->info.chip_class >= CIK ? 128 : 64; + return shader->selector->screen->info.chip_class >= GFX7 ? 128 : 64; case PIPE_SHADER_GEOMETRY: return shader->selector->screen->info.chip_class >= GFX9 ? 128 : 64; @@ -4536,7 +4536,7 @@ static void create_function(struct si_shader_context *ctx) declare_vs_input_vgprs(ctx, &fninfo, &num_prolog_vgprs); break; - case PIPE_SHADER_TESS_CTRL: /* SI-CI-VI */ + case PIPE_SHADER_TESS_CTRL: /* GFX6-GFX8 */ declare_global_desc_pointers(ctx, &fninfo); declare_per_stage_desc_pointers(ctx, &fninfo, true); ctx->param_tcs_offchip_layout = add_arg(&fninfo, ARG_SGPR, ctx->i32); @@ -4851,7 +4851,7 @@ static void preload_ring_buffers(struct si_shader_context *ctx) LLVMValueRef buf_ptr = LLVMGetParam(ctx->main_fn, ctx->param_rw_buffers); - if (ctx->screen->info.chip_class <= VI && + if (ctx->screen->info.chip_class <= GFX8 && (ctx->shader->key.as_es || ctx->type == PIPE_SHADER_GEOMETRY)) { unsigned ring = ctx->type == PIPE_SHADER_GEOMETRY ? SI_GS_RING_ESGS @@ -4897,7 +4897,7 @@ static void preload_ring_buffers(struct si_shader_context *ctx) stride = 4 * num_components * sel->gs_max_out_vertices; - /* Limit on the stride field for <= CIK. */ + /* Limit on the stride field for <= GFX7. */ assert(stride < (1 << 14)); num_records = 64; @@ -5222,7 +5222,7 @@ static void si_calculate_max_simd_waves(struct si_shader *shader) struct si_screen *sscreen = shader->selector->screen; struct si_shader_config *conf = &shader->config; unsigned num_inputs = shader->selector->info.num_inputs; - unsigned lds_increment = sscreen->info.chip_class >= CIK ? 512 : 256; + unsigned lds_increment = sscreen->info.chip_class >= GFX7 ? 512 : 256; unsigned lds_per_wave = 0; unsigned max_simd_waves; @@ -5452,7 +5452,7 @@ static int si_compile_llvm(struct si_screen *sscreen, * - Floating-point output modifiers would be ignored by the hw. * - Some opcodes don't support denormals, such as v_mad_f32. We would * have to stop using those. - * - SI & CI would be very slow. + * - GFX6 & GFX7 would be very slow. */ conf->float_mode |= V_00B028_FP_64_DENORMS; @@ -6576,7 +6576,7 @@ static bool si_should_optimize_less(struct ac_llvm_compiler *compiler, /* Assume a slow CPU. */ assert(!sel->screen->info.has_dedicated_vram && - sel->screen->info.chip_class <= VI); + sel->screen->info.chip_class <= GFX8); /* For a crazy dEQP test containing 2597 memory opcodes, mostly * buffer stores. */ @@ -6831,7 +6831,7 @@ int si_compile_tgsi_shader(struct si_screen *sscreen, if (sel->type == PIPE_SHADER_COMPUTE) { unsigned wave_size = 64; unsigned max_vgprs = 256; - unsigned max_sgprs = sscreen->info.chip_class >= VI ? 800 : 512; + unsigned max_sgprs = sscreen->info.chip_class >= GFX8 ? 800 : 512; unsigned max_sgprs_per_wave = 128; unsigned max_block_threads = si_get_max_workgroup_size(shader); unsigned min_waves_per_cu = DIV_ROUND_UP(max_block_threads, wave_size); @@ -7263,7 +7263,7 @@ static void si_build_tcs_epilog_function(struct si_shader_context *ctx, /* Create the function. */ si_create_function(ctx, "tcs_epilog", NULL, 0, &fninfo, - ctx->screen->info.chip_class >= CIK ? 128 : 64); + ctx->screen->info.chip_class >= GFX7 ? 128 : 64); ac_declare_lds_as_pointer(&ctx->ac); func = ctx->main_fn; diff --git a/src/gallium/drivers/radeonsi/si_shader.h b/src/gallium/drivers/radeonsi/si_shader.h index 82c521efcb7..16b78fbf43e 100644 --- a/src/gallium/drivers/radeonsi/si_shader.h +++ b/src/gallium/drivers/radeonsi/si_shader.h @@ -247,7 +247,7 @@ enum { #define S_VS_STATE_LS_OUT_VERTEX_SIZE(x) (((unsigned)(x) & 0xFF) << 24) #define C_VS_STATE_LS_OUT_VERTEX_SIZE 0x00FFFFFF -/* SI-specific system values. */ +/* Driver-specific system values. */ enum { /* Values from set_tess_state. */ TGSI_SEMANTIC_DEFAULT_TESSOUTER_SI = TGSI_SEMANTIC_COUNT, diff --git a/src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c b/src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c index 5e540fc5098..be0cb89f722 100644 --- a/src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c +++ b/src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c @@ -48,8 +48,8 @@ static LLVMValueRef get_buffer_size( LLVMBuildExtractElement(builder, descriptor, LLVMConstInt(ctx->i32, 2, 0), ""); - if (ctx->screen->info.chip_class == VI) { - /* On VI, the descriptor contains the size in bytes, + if (ctx->screen->info.chip_class == GFX8) { + /* On GFX8, the descriptor contains the size in bytes, * but TXQ must return the size in elements. * The stride is always non-zero for resources using TXQ. */ @@ -132,7 +132,7 @@ ac_image_dim_from_tgsi_target(struct si_screen *screen, enum tgsi_texture_type t /* Match the resource type set in the descriptor. */ if (dim == ac_image_cube || - (screen->info.chip_class <= VI && dim == ac_image_3d)) + (screen->info.chip_class <= GFX8 && dim == ac_image_3d)) dim = ac_image_2darray; else if (target == TGSI_TEXTURE_2D && screen->info.chip_class >= GFX9) { /* When a single layer of a 3D texture is bound, the shader @@ -161,7 +161,7 @@ ac_image_dim_from_tgsi_target(struct si_screen *screen, enum tgsi_texture_type t static LLVMValueRef force_dcc_off(struct si_shader_context *ctx, LLVMValueRef rsrc) { - if (ctx->screen->info.chip_class <= CIK) { + if (ctx->screen->info.chip_class <= GFX7) { return rsrc; } else { LLVMValueRef i32_6 = LLVMConstInt(ctx->i32, 6, 0); @@ -327,11 +327,11 @@ static unsigned get_cache_policy(struct si_shader_context *ctx, unsigned cache_policy = 0; if (!atomic && - /* SI has a TC L1 bug causing corruption of 8bit/16bit stores. + /* GFX6 has a TC L1 bug causing corruption of 8bit/16bit stores. * All store opcodes not aligned to a dword are affected. * The only way to get unaligned stores in radeonsi is through * shader images. */ - ((may_store_unaligned && ctx->screen->info.chip_class == SI) || + ((may_store_unaligned && ctx->screen->info.chip_class == GFX6) || /* If this is write-only, don't keep data in L1 to prevent * evicting L1 cache lines that may be needed by other * instructions. */ @@ -1099,13 +1099,13 @@ LLVMValueRef si_load_sampler_desc(struct si_shader_context *ctx, /* Disable anisotropic filtering if BASE_LEVEL == LAST_LEVEL. * - * SI-CI: + * GFX6-GFX7: * If BASE_LEVEL == LAST_LEVEL, the shader must disable anisotropic * filtering manually. The driver sets img7 to a mask clearing * MAX_ANISO_RATIO if BASE_LEVEL == LAST_LEVEL. The shader must do: * s_and_b32 samp0, samp0, img7 * - * VI: + * GFX8: * The ANISO_OVERRIDE sampler field enables this fix in TA. */ static LLVMValueRef sici_fix_sampler_aniso(struct si_shader_context *ctx, @@ -1113,7 +1113,7 @@ static LLVMValueRef sici_fix_sampler_aniso(struct si_shader_context *ctx, { LLVMValueRef img7, samp0; - if (ctx->screen->info.chip_class >= VI) + if (ctx->screen->info.chip_class >= GFX8) return samp; img7 = LLVMBuildExtractElement(ctx->ac.builder, res, @@ -1446,7 +1446,7 @@ static void build_tex_intrinsic(const struct lp_build_tgsi_action *action, * so the depth comparison value isn't clamped for Z16 and * Z24 anymore. Do it manually here. */ - if (ctx->screen->info.chip_class >= VI) { + if (ctx->screen->info.chip_class >= GFX8) { LLVMValueRef upgraded; LLVMValueRef clamped; upgraded = LLVMBuildExtractElement(ctx->ac.builder, args.sampler, @@ -1530,7 +1530,7 @@ static void build_tex_intrinsic(const struct lp_build_tgsi_action *action, } else if (tgsi_is_array_sampler(target) && opcode != TGSI_OPCODE_TXF && opcode != TGSI_OPCODE_TXF_LZ && - ctx->screen->info.chip_class <= VI) { + ctx->screen->info.chip_class <= GFX8) { unsigned array_coord = target == TGSI_TEXTURE_1D_ARRAY ? 1 : 2; args.coords[array_coord] = ac_build_round(&ctx->ac, args.coords[array_coord]); } @@ -1687,7 +1687,7 @@ static void build_tex_intrinsic(const struct lp_build_tgsi_action *action, /* The hardware needs special lowering for Gather4 with integer formats. */ LLVMValueRef gather4_int_result_workaround = NULL; - if (ctx->screen->info.chip_class <= VI && + if (ctx->screen->info.chip_class <= GFX8 && opcode == TGSI_OPCODE_TG4) { assert(inst->Texture.ReturnType != TGSI_RETURN_TYPE_UNKNOWN); diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c index 55965bc86a1..bc91e6f5148 100644 --- a/src/gallium/drivers/radeonsi/si_state.c +++ b/src/gallium/drivers/radeonsi/si_state.c @@ -103,12 +103,12 @@ static void si_emit_cb_render_state(struct si_context *sctx) radeon_opt_set_context_reg(sctx, R_028238_CB_TARGET_MASK, SI_TRACKED_CB_TARGET_MASK, cb_target_mask); - if (sctx->chip_class >= VI) { + if (sctx->chip_class >= GFX8) { /* DCC MSAA workaround for blending. * Alternatively, we can set CB_COLORi_DCC_CONTROL.OVERWRITE_- * COMBINER_DISABLE, but that would be more complicated. */ - bool oc_disable = (sctx->chip_class == VI || + bool oc_disable = (sctx->chip_class == GFX8 || sctx->chip_class == GFX9) && blend && blend->blend_enable_4bit & cb_target_mask && @@ -1391,7 +1391,7 @@ static void si_emit_db_render_state(struct si_context *sctx) !sctx->occlusion_queries_disabled) { bool perfect = sctx->num_perfect_occlusion_queries > 0; - if (sctx->chip_class >= CIK) { + if (sctx->chip_class >= GFX7) { unsigned log_sample_rate = sctx->framebuffer.log_samples; /* Stoney doesn't increment occlusion query counters @@ -1413,7 +1413,7 @@ static void si_emit_db_render_state(struct si_context *sctx) } } else { /* Disable occlusion queries. */ - if (sctx->chip_class >= CIK) { + if (sctx->chip_class >= GFX7) { db_count_control = 0; } else { db_count_control = S_028004_ZPASS_INCREMENT_DISABLE(1); @@ -1433,8 +1433,8 @@ static void si_emit_db_render_state(struct si_context *sctx) db_shader_control = sctx->ps_db_shader_control; - /* Bug workaround for smoothing (overrasterization) on SI. */ - if (sctx->chip_class == SI && sctx->smoothing_enabled) { + /* Bug workaround for smoothing (overrasterization) on GFX6. */ + if (sctx->chip_class == GFX6 && sctx->smoothing_enabled) { db_shader_control &= C_02880C_Z_ORDER; db_shader_control |= S_02880C_Z_ORDER(V_02880C_LATE_Z); } @@ -1589,7 +1589,7 @@ static uint32_t si_translate_dbformat(enum pipe_format format) case PIPE_FORMAT_X8Z24_UNORM: case PIPE_FORMAT_Z24X8_UNORM: case PIPE_FORMAT_Z24_UNORM_S8_UINT: - return V_028040_Z_24; /* deprecated on SI */ + return V_028040_Z_24; /* deprecated on AMD GCN */ case PIPE_FORMAT_Z32_FLOAT: case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT: return V_028040_Z_32_FLOAT; @@ -1623,9 +1623,9 @@ static uint32_t si_translate_texformat(struct pipe_screen *screen, /* * Implemented as an 8_8_8_8 data format to fix texture * gathers in stencil sampling. This affects at least - * GL45-CTS.texture_cube_map_array.sampling on VI. + * GL45-CTS.texture_cube_map_array.sampling on GFX8. */ - if (sscreen->info.chip_class <= VI) + if (sscreen->info.chip_class <= GFX8) return V_008F14_IMG_DATA_FORMAT_8_8_8_8; if (format == PIPE_FORMAT_X24S8_UINT) @@ -2461,14 +2461,14 @@ static void si_initialize_color_surface(struct si_context *sctx, color_info |= S_028C70_COMPRESSION(1); unsigned fmask_bankh = util_logbase2(tex->surface.u.legacy.fmask.bankh); - if (sctx->chip_class == SI) { - /* due to a hw bug, FMASK_BANK_HEIGHT must be set on SI too */ + if (sctx->chip_class == GFX6) { + /* due to a hw bug, FMASK_BANK_HEIGHT must be set on GFX6 too */ color_attrib |= S_028C74_FMASK_BANK_HEIGHT(fmask_bankh); } } } - if (sctx->chip_class >= VI) { + if (sctx->chip_class >= GFX8) { unsigned max_uncompressed_block_size = V_028C78_MAX_BLOCK_SIZE_256B; unsigned min_compressed_block_size = V_028C78_MIN_BLOCK_SIZE_32B; @@ -2492,7 +2492,7 @@ static void si_initialize_color_surface(struct si_context *sctx, } /* This must be set for fast clear to work without FMASK. */ - if (!tex->surface.fmask_size && sctx->chip_class == SI) { + if (!tex->surface.fmask_size && sctx->chip_class == GFX6) { unsigned bankh = util_logbase2(tex->surface.u.legacy.bankh); color_attrib |= S_028C74_FMASK_BANK_HEIGHT(bankh); } @@ -2576,7 +2576,7 @@ static void si_init_depth_surface(struct si_context *sctx, } if (tex->surface.has_stencil) { - /* Stencil buffer workaround ported from the SI-CI-VI code. + /* Stencil buffer workaround ported from the GFX6-GFX8 code. * See that for explanation. */ s_info |= S_02803C_ALLOW_EXPCLEAR(tex->buffer.b.b.nr_samples <= 1); @@ -2592,7 +2592,7 @@ static void si_init_depth_surface(struct si_context *sctx, S_028ABC_RB_ALIGNED(tex->surface.u.gfx9.htile.rb_aligned); } } else { - /* SI-CI-VI */ + /* GFX6-GFX8 */ struct legacy_surf_level *levelinfo = &tex->surface.u.legacy.level[level]; assert(levelinfo->nblk_x % 8 == 0 && levelinfo->nblk_y % 8 == 0); @@ -2607,7 +2607,7 @@ static void si_init_depth_surface(struct si_context *sctx, s_info = S_028044_FORMAT(stencil_format); surf->db_depth_info = S_02803C_ADDR5_SWIZZLE_MASK(!tex->tc_compatible_htile); - if (sctx->chip_class >= CIK) { + if (sctx->chip_class >= GFX7) { struct radeon_info *info = &sctx->screen->info; unsigned index = tex->surface.u.legacy.tiling_index[level]; unsigned stencil_index = tex->surface.u.legacy.stencil_tiling_index[level]; @@ -2746,7 +2746,7 @@ static void si_set_framebuffer_state(struct pipe_context *ctx, bool unbound = false; int i; - /* Reject zero-sized framebuffers due to a hw bug on SI that occurs + /* Reject zero-sized framebuffers due to a hw bug on GFX6 that occurs * when PA_SU_HARDWARE_SCREEN_OFFSET != 0 and any_scissor.BR_X/Y <= 0. * We could implement the full workaround here, but it's a useless case. */ @@ -2935,7 +2935,7 @@ static void si_set_framebuffer_state(struct pipe_context *ctx, } /* For optimal DCC performance. */ - if (sctx->chip_class == VI) + if (sctx->chip_class == GFX8) sctx->framebuffer.dcc_overwrite_combiner_watermark = 4; else if (num_bpp64_colorbufs >= 5) sctx->framebuffer.dcc_overwrite_combiner_watermark = 8; @@ -3139,7 +3139,7 @@ static void si_emit_framebuffer_state(struct si_context *sctx) radeon_set_context_reg(cs, R_0287A0_CB_MRT0_EPITCH + i * 4, S_0287A0_EPITCH(tex->surface.u.gfx9.surf.epitch)); } else { - /* Compute mutable surface parameters (SI-CI-VI). */ + /* Compute mutable surface parameters (GFX6-GFX8). */ const struct legacy_surf_level *level_info = &tex->surface.u.legacy.level[cb->base.u.tex.level]; unsigned pitch_tile_max, slice_tile_max, tile_mode_index; @@ -3167,20 +3167,20 @@ static void si_emit_framebuffer_state(struct si_context *sctx) cb_color_slice = S_028C68_TILE_MAX(slice_tile_max); if (tex->surface.fmask_size) { - if (sctx->chip_class >= CIK) + if (sctx->chip_class >= GFX7) cb_color_pitch |= S_028C64_FMASK_TILE_MAX(tex->surface.u.legacy.fmask.pitch_in_pixels / 8 - 1); cb_color_attrib |= S_028C74_FMASK_TILE_MODE_INDEX(tex->surface.u.legacy.fmask.tiling_index); cb_color_fmask_slice = S_028C88_TILE_MAX(tex->surface.u.legacy.fmask.slice_tile_max); } else { /* This must be set for fast clear to work without FMASK. */ - if (sctx->chip_class >= CIK) + if (sctx->chip_class >= GFX7) cb_color_pitch |= S_028C64_FMASK_TILE_MAX(pitch_tile_max); cb_color_attrib |= S_028C74_FMASK_TILE_MODE_INDEX(tile_mode_index); cb_color_fmask_slice = S_028C88_TILE_MAX(slice_tile_max); } radeon_set_context_reg_seq(cs, R_028C60_CB_COLOR0_BASE + i * 0x3C, - sctx->chip_class >= VI ? 14 : 13); + sctx->chip_class >= GFX8 ? 14 : 13); radeon_emit(cs, cb_color_base); /* CB_COLOR0_BASE */ radeon_emit(cs, cb_color_pitch); /* CB_COLOR0_PITCH */ radeon_emit(cs, cb_color_slice); /* CB_COLOR0_SLICE */ @@ -3195,7 +3195,7 @@ static void si_emit_framebuffer_state(struct si_context *sctx) radeon_emit(cs, tex->color_clear_value[0]); /* CB_COLOR0_CLEAR_WORD0 */ radeon_emit(cs, tex->color_clear_value[1]); /* CB_COLOR0_CLEAR_WORD1 */ - if (sctx->chip_class >= VI) /* R_028C94_CB_COLOR0_DCC_BASE */ + if (sctx->chip_class >= GFX8) /* R_028C94_CB_COLOR0_DCC_BASE */ radeon_emit(cs, cb_dcc_base); } } @@ -3328,7 +3328,7 @@ static void si_emit_msaa_sample_locs(struct si_context *sctx) /* The exclusion bits can be set to improve rasterization efficiency * if no sample lies on the pixel boundary (-8 sample offset). */ - bool exclusion = sctx->chip_class >= CIK && + bool exclusion = sctx->chip_class >= GFX7 && (!rs->multisample_enable || nr_samples != 16); radeon_opt_set_context_reg(sctx, R_02882C_PA_SU_PRIM_FILTER_CNTL, SI_TRACKED_PA_SU_PRIM_FILTER_CNTL, @@ -3606,11 +3606,11 @@ si_make_buffer_descriptor(struct si_screen *screen, struct si_resource *buf, /* The NUM_RECORDS field has a different meaning depending on the chip, * instruction type, STRIDE, and SWIZZLE_ENABLE. * - * SI-CIK: + * GFX6-GFX7: * - If STRIDE == 0, it's in byte units. * - If STRIDE != 0, it's in units of STRIDE, used with inst.IDXEN. * - * VI: + * GFX8: * - For SMEM and STRIDE == 0, it's in byte units. * - For SMEM and STRIDE != 0, it's in units of STRIDE. * - For VMEM and STRIDE == 0 or SWIZZLE_ENABLE == 0, it's in byte units. @@ -3633,7 +3633,7 @@ si_make_buffer_descriptor(struct si_screen *screen, struct si_resource *buf, * the first element is readable when IDXEN == 0. */ num_records = num_records ? MAX2(num_records, stride) : 0; - else if (screen->info.chip_class == VI) + else if (screen->info.chip_class == GFX8) num_records *= stride; state[4] = 0; @@ -3720,9 +3720,9 @@ si_make_texture_descriptor(struct si_screen *screen, /* * X24S8 is implemented as an 8_8_8_8 data format, to * fix texture gathers. This affects at least - * GL45-CTS.texture_cube_map_array.sampling on VI. + * GL45-CTS.texture_cube_map_array.sampling on GFX8. */ - if (screen->info.chip_class <= VI) + if (screen->info.chip_class <= GFX8) util_format_compose_swizzles(swizzle_wwww, state_swizzle, swizzle); else util_format_compose_swizzles(swizzle_yyyy, state_swizzle, swizzle); @@ -3816,7 +3816,7 @@ si_make_texture_descriptor(struct si_screen *screen, if (!sampler && (res->target == PIPE_TEXTURE_CUBE || res->target == PIPE_TEXTURE_CUBE_ARRAY || - (screen->info.chip_class <= VI && + (screen->info.chip_class <= GFX8 && res->target == PIPE_TEXTURE_3D))) { /* For the purpose of shader images, treat cube maps and 3D * textures as 2D arrays. For 3D textures, the address @@ -3887,7 +3887,7 @@ si_make_texture_descriptor(struct si_screen *screen, /* The last dword is unused by hw. The shader uses it to clear * bits in the first dword of sampler state. */ - if (screen->info.chip_class <= CIK && res->nr_samples <= 1) { + if (screen->info.chip_class <= GFX7 && res->nr_samples <= 1) { if (first_level == last_level) state[7] = C_008F30_MAX_ANISO_RATIO; else @@ -4094,7 +4094,7 @@ si_create_sampler_view_custom(struct pipe_context *ctx, height = height0; depth = texture->depth0; - if (sctx->chip_class <= VI && force_level) { + if (sctx->chip_class <= GFX8 && force_level) { assert(force_level == first_level && force_level == last_level); base_level = force_level; @@ -4331,7 +4331,7 @@ static void *si_create_sampler_state(struct pipe_context *ctx, S_008F30_ANISO_THRESHOLD(max_aniso_ratio >> 1) | S_008F30_ANISO_BIAS(max_aniso_ratio) | S_008F30_DISABLE_CUBE_WRAP(!state->seamless_cube_map) | - S_008F30_COMPAT_MODE(sctx->chip_class >= VI)); + S_008F30_COMPAT_MODE(sctx->chip_class >= GFX8)); rstate->val[1] = (S_008F34_MIN_LOD(S_FIXED(CLAMP(state->min_lod, 0, 15), 8)) | S_008F34_MAX_LOD(S_FIXED(CLAMP(state->max_lod, 0, 15), 8)) | S_008F34_PERF_MIP(max_aniso_ratio ? max_aniso_ratio + 6 : 0)); @@ -4340,9 +4340,9 @@ static void *si_create_sampler_state(struct pipe_context *ctx, S_008F38_XY_MIN_FILTER(si_tex_filter(state->min_img_filter, max_aniso)) | S_008F38_MIP_FILTER(si_tex_mipfilter(state->min_mip_filter)) | S_008F38_MIP_POINT_PRECLAMP(0) | - S_008F38_DISABLE_LSB_CEIL(sctx->chip_class <= VI) | + S_008F38_DISABLE_LSB_CEIL(sctx->chip_class <= GFX8) | S_008F38_FILTER_PREC_FIX(1) | - S_008F38_ANISO_OVERRIDE(sctx->chip_class >= VI)); + S_008F38_ANISO_OVERRIDE(sctx->chip_class >= GFX8)); rstate->val[3] = si_translate_border_color(sctx, state, &state->border_color, false); /* Create sampler resource for integer textures. */ @@ -4537,9 +4537,9 @@ static void *si_create_vertex_elements(struct pipe_context *ctx, /* The hardware always treats the 2-bit alpha channel as * unsigned, so a shader workaround is needed. The affected - * chips are VI and older except Stoney (GFX8.1). + * chips are GFX8 and older except Stoney (GFX8.1). */ - always_fix = sscreen->info.chip_class <= VI && + always_fix = sscreen->info.chip_class <= GFX8 && sscreen->info.family != CHIP_STONEY && channel->type == UTIL_FORMAT_TYPE_SIGNED; } else if (elements[i].src_format == PIPE_FORMAT_R11G11B10_FLOAT) { @@ -4585,7 +4585,7 @@ static void *si_create_vertex_elements(struct pipe_context *ctx, * into account would complicate the fast path (where everything * is nicely aligned). */ - bool check_alignment = log_hw_load_size >= 1 && sscreen->info.chip_class == SI; + bool check_alignment = log_hw_load_size >= 1 && sscreen->info.chip_class == GFX6; bool opencode = sscreen->options.vs_fetch_always_opencode; if (check_alignment && @@ -4810,10 +4810,10 @@ static void si_memory_barrier(struct pipe_context *ctx, unsigned flags) } if (flags & PIPE_BARRIER_INDEX_BUFFER) { - /* Indices are read through TC L2 since VI. + /* Indices are read through TC L2 since GFX8. * L1 isn't used. */ - if (sctx->screen->info.chip_class <= CIK) + if (sctx->screen->info.chip_class <= GFX7) sctx->flags |= SI_CONTEXT_WRITEBACK_GLOBAL_L2; } @@ -4824,12 +4824,12 @@ static void si_memory_barrier(struct pipe_context *ctx, unsigned flags) sctx->framebuffer.uncompressed_cb_mask) { sctx->flags |= SI_CONTEXT_FLUSH_AND_INV_CB; - if (sctx->chip_class <= VI) + if (sctx->chip_class <= GFX8) sctx->flags |= SI_CONTEXT_WRITEBACK_GLOBAL_L2; } /* Indirect buffers use TC L2 on GFX9, but not older hw. */ - if (sctx->screen->info.chip_class <= VI && + if (sctx->screen->info.chip_class <= GFX8 && flags & PIPE_BARRIER_INDIRECT_BUFFER) sctx->flags |= SI_CONTEXT_WRITEBACK_GLOBAL_L2; } @@ -4917,7 +4917,7 @@ void si_init_screen_state_functions(struct si_screen *sscreen) static void si_set_grbm_gfx_index(struct si_context *sctx, struct si_pm4_state *pm4, unsigned value) { - unsigned reg = sctx->chip_class >= CIK ? R_030800_GRBM_GFX_INDEX : + unsigned reg = sctx->chip_class >= GFX7 ? R_030800_GRBM_GFX_INDEX : R_00802C_GRBM_GFX_INDEX; si_pm4_set_reg(pm4, reg, value); } @@ -4954,7 +4954,7 @@ si_write_harvested_raster_configs(struct si_context *sctx, } si_set_grbm_gfx_index(sctx, pm4, ~0); - if (sctx->chip_class >= CIK) { + if (sctx->chip_class >= GFX7) { si_pm4_set_reg(pm4, R_028354_PA_SC_RASTER_CONFIG_1, raster_config_1); } } @@ -4973,7 +4973,7 @@ static void si_set_raster_config(struct si_context *sctx, struct si_pm4_state *p */ si_pm4_set_reg(pm4, R_028350_PA_SC_RASTER_CONFIG, raster_config); - if (sctx->chip_class >= CIK) + if (sctx->chip_class >= GFX7) si_pm4_set_reg(pm4, R_028354_PA_SC_RASTER_CONFIG_1, raster_config_1); } else { @@ -4988,8 +4988,8 @@ static void si_init_config(struct si_context *sctx) bool has_clear_state = sscreen->has_clear_state; struct si_pm4_state *pm4 = CALLOC_STRUCT(si_pm4_state); - /* SI, radeon kernel disabled CLEAR_STATE. */ - assert(has_clear_state || sscreen->info.chip_class == SI || + /* GFX6, radeon kernel disabled CLEAR_STATE. */ + assert(has_clear_state || sscreen->info.chip_class == GFX6 || sscreen->info.drm_major != 3); if (!pm4) @@ -5006,7 +5006,7 @@ static void si_init_config(struct si_context *sctx) si_pm4_cmd_end(pm4, false); } - if (sctx->chip_class <= VI) + if (sctx->chip_class <= GFX8) si_set_raster_config(sctx, pm4); si_pm4_set_reg(pm4, R_028A18_VGT_HOS_MAX_TESS_LEVEL, fui(64)); @@ -5014,7 +5014,7 @@ static void si_init_config(struct si_context *sctx) si_pm4_set_reg(pm4, R_028A1C_VGT_HOS_MIN_TESS_LEVEL, fui(0)); /* FIXME calculate these values somehow ??? */ - if (sctx->chip_class <= VI) { + if (sctx->chip_class <= GFX8) { si_pm4_set_reg(pm4, R_028A54_VGT_GS_PER_ES, SI_GS_PER_ES); si_pm4_set_reg(pm4, R_028A58_VGT_ES_PER_GS, 0x40); } @@ -5028,14 +5028,14 @@ static void si_init_config(struct si_context *sctx) si_pm4_set_reg(pm4, R_028AA0_VGT_INSTANCE_STEP_RATE_0, 1); if (!has_clear_state) si_pm4_set_reg(pm4, R_028AB8_VGT_VTX_CNT_EN, 0x0); - if (sctx->chip_class < CIK) + if (sctx->chip_class < GFX7) si_pm4_set_reg(pm4, R_008A14_PA_CL_ENHANCE, S_008A14_NUM_CLIP_SEQ(3) | S_008A14_CLIP_VTX_REORDER_ENA(1)); /* CLEAR_STATE doesn't clear these correctly on certain generations. * I don't know why. Deduced by trial and error. */ - if (sctx->chip_class <= CIK) { + if (sctx->chip_class <= GFX7) { si_pm4_set_reg(pm4, R_028B28_VGT_STRMOUT_DRAW_OPAQUE_OFFSET, 0); si_pm4_set_reg(pm4, R_028204_PA_SC_WINDOW_SCISSOR_TL, S_028204_WINDOW_OFFSET_DISABLE(1)); si_pm4_set_reg(pm4, R_028240_PA_SC_GENERIC_SCISSOR_TL, S_028240_WINDOW_OFFSET_DISABLE(1)); @@ -5077,7 +5077,7 @@ static void si_init_config(struct si_context *sctx) si_pm4_set_reg(pm4, R_028408_VGT_INDX_OFFSET, 0); } - if (sctx->chip_class >= CIK) { + if (sctx->chip_class >= GFX7) { if (sctx->chip_class >= GFX9) { si_pm4_set_reg(pm4, R_00B41C_SPI_SHADER_PGM_RSRC3_HS, S_00B41C_CU_EN(0xffff) | S_00B41C_WAVE_LIMIT(0x3F)); @@ -5136,7 +5136,7 @@ static void si_init_config(struct si_context *sctx) S_00B01C_CU_EN(0xffff) | S_00B01C_WAVE_LIMIT(0x3F)); } - if (sctx->chip_class >= VI) { + if (sctx->chip_class >= GFX8) { unsigned vgt_tess_distribution; vgt_tess_distribution = @@ -5159,7 +5159,7 @@ static void si_init_config(struct si_context *sctx) } si_pm4_set_reg(pm4, R_028080_TA_BC_BASE_ADDR, border_color_va >> 8); - if (sctx->chip_class >= CIK) { + if (sctx->chip_class >= GFX7) { si_pm4_set_reg(pm4, R_028084_TA_BC_BASE_ADDR_HI, S_028084_ADDRESS(border_color_va >> 40)); } diff --git a/src/gallium/drivers/radeonsi/si_state_draw.c b/src/gallium/drivers/radeonsi/si_state_draw.c index 8e01e1b35e1..7bbe66d46ae 100644 --- a/src/gallium/drivers/radeonsi/si_state_draw.c +++ b/src/gallium/drivers/radeonsi/si_state_draw.c @@ -78,7 +78,7 @@ static void si_emit_derived_tess_state(struct si_context *sctx, struct si_shader_selector *tcs = sctx->tcs_shader.cso ? sctx->tcs_shader.cso : sctx->tes_shader.cso; unsigned tess_uses_primid = sctx->ia_multi_vgt_param_key.u.tess_uses_prim_id; - bool has_primid_instancing_bug = sctx->chip_class == SI && + bool has_primid_instancing_bug = sctx->chip_class == GFX6 && sctx->screen->info.max_se == 1; unsigned tes_sh_base = sctx->shader_pointers.sh_base[PIPE_SHADER_TESS_EVAL]; unsigned num_tcs_input_cp = info->vertices_per_patch; @@ -152,7 +152,7 @@ static void si_emit_derived_tess_state(struct si_context *sctx, /* Make sure that the data fits in LDS. This assumes the shaders only * use LDS for the inputs and outputs. * - * While CIK can use 64K per threadgroup, there is a hang on Stoney + * While GFX7 can use 64K per threadgroup, there is a hang on Stoney * with 2 CUs if we use more than 32K. The closed Vulkan driver also * uses 32K at most on all GCN chips. */ @@ -185,8 +185,8 @@ static void si_emit_derived_tess_state(struct si_context *sctx, if (temp_verts_per_tg > 64 && temp_verts_per_tg % 64 < 48) *num_patches = (temp_verts_per_tg & ~63) / max_verts_per_patch; - if (sctx->chip_class == SI) { - /* SI bug workaround, related to power management. Limit LS-HS + if (sctx->chip_class == GFX6) { + /* GFX6 bug workaround, related to power management. Limit LS-HS * threadgroups to only one wave. */ unsigned one_wave = 64 / max_verts_per_patch; @@ -200,7 +200,7 @@ static void si_emit_derived_tess_state(struct si_context *sctx, * The intended solution is to restrict threadgroups to * a single instance by setting SWITCH_ON_EOI, which * should cause IA to split instances up. However, this - * doesn't work correctly on SI when there is no other + * doesn't work correctly on GFX6 when there is no other * SE to switch to. */ if (has_primid_instancing_bug && tess_uses_primid) @@ -238,7 +238,7 @@ static void si_emit_derived_tess_state(struct si_context *sctx, /* Compute the LDS size. */ lds_size = output_patch0_offset + output_patch_size * *num_patches; - if (sctx->chip_class >= CIK) { + if (sctx->chip_class >= GFX7) { assert(lds_size <= 65536); lds_size = align(lds_size, 512) / 512; } else { @@ -272,7 +272,7 @@ static void si_emit_derived_tess_state(struct si_context *sctx, /* Due to a hw bug, RSRC2_LS must be written twice with another * LS register written in between. */ - if (sctx->chip_class == CIK && sctx->family != CHIP_HAWAII) + if (sctx->chip_class == GFX7 && sctx->family != CHIP_HAWAII) radeon_set_sh_reg(cs, R_00B52C_SPI_SHADER_PGM_RSRC2_LS, ls_rsrc2); radeon_set_sh_reg_seq(cs, R_00B528_SPI_SHADER_PGM_RSRC1_LS, 2); radeon_emit(cs, ls_current->config.rsrc1); @@ -297,7 +297,7 @@ static void si_emit_derived_tess_state(struct si_context *sctx, S_028B58_HS_NUM_OUTPUT_CP(num_tcs_output_cp); if (sctx->last_ls_hs_config != ls_hs_config) { - if (sctx->chip_class >= CIK) { + if (sctx->chip_class >= GFX7) { radeon_set_context_reg_idx(cs, R_028B58_VGT_LS_HS_CONFIG, 2, ls_hs_config); } else { @@ -349,10 +349,10 @@ si_get_init_multi_vgt_param(struct si_screen *sscreen, key->u.uses_gs) partial_vs_wave = true; - /* Needed for 028B6C_DISTRIBUTION_MODE != 0. (implies >= VI) */ + /* Needed for 028B6C_DISTRIBUTION_MODE != 0. (implies >= GFX8) */ if (sscreen->has_distributed_tess) { if (key->u.uses_gs) { - if (sscreen->info.chip_class == VI) + if (sscreen->info.chip_class == GFX8) partial_es_wave = true; } else { partial_vs_wave = true; @@ -367,7 +367,7 @@ si_get_init_multi_vgt_param(struct si_screen *sscreen, wd_switch_on_eop = true; } - if (sscreen->info.chip_class >= CIK) { + if (sscreen->info.chip_class >= GFX7) { /* WD_SWITCH_ON_EOP has no effect on GPUs with less than * 4 shader engines. Set 1 to pass the assertion below. * The other cases are hardware requirements. @@ -400,12 +400,12 @@ si_get_init_multi_vgt_param(struct si_screen *sscreen, * Assume indirect draws always use small instances. * This is needed for good VS wave utilization. */ - if (sscreen->info.chip_class <= VI && + if (sscreen->info.chip_class <= GFX8 && sscreen->info.max_se == 4 && key->u.multi_instances_smaller_than_primgroup) wd_switch_on_eop = true; - /* Required on CIK and later. */ + /* Required on GFX7 and later. */ if (sscreen->info.max_se == 4 && !wd_switch_on_eop) ia_switch_on_eoi = true; @@ -421,10 +421,10 @@ si_get_init_multi_vgt_param(struct si_screen *sscreen, sscreen->info.family == CHIP_VEGAM)) partial_vs_wave = true; - /* Required by Hawaii and, for some special cases, by VI. */ + /* Required by Hawaii and, for some special cases, by GFX8. */ if (ia_switch_on_eoi && (sscreen->info.family == CHIP_HAWAII || - (sscreen->info.chip_class == VI && + (sscreen->info.chip_class == GFX8 && (key->u.uses_gs || max_primgroup_in_wave != 2)))) partial_vs_wave = true; @@ -444,16 +444,16 @@ si_get_init_multi_vgt_param(struct si_screen *sscreen, } /* If SWITCH_ON_EOI is set, PARTIAL_ES_WAVE must be set too. */ - if (sscreen->info.chip_class <= VI && ia_switch_on_eoi) + if (sscreen->info.chip_class <= GFX8 && ia_switch_on_eoi) partial_es_wave = true; return S_028AA8_SWITCH_ON_EOP(ia_switch_on_eop) | S_028AA8_SWITCH_ON_EOI(ia_switch_on_eoi) | S_028AA8_PARTIAL_VS_WAVE_ON(partial_vs_wave) | S_028AA8_PARTIAL_ES_WAVE_ON(partial_es_wave) | - S_028AA8_WD_SWITCH_ON_EOP(sscreen->info.chip_class >= CIK ? wd_switch_on_eop : 0) | + S_028AA8_WD_SWITCH_ON_EOP(sscreen->info.chip_class >= GFX7 ? wd_switch_on_eop : 0) | /* The following field was moved to VGT_SHADER_STAGES_EN in GFX9. */ - S_028AA8_MAX_PRIMGRP_IN_WAVE(sscreen->info.chip_class == VI ? + S_028AA8_MAX_PRIMGRP_IN_WAVE(sscreen->info.chip_class == GFX8 ? max_primgroup_in_wave : 0) | S_030960_EN_INST_OPT_BASIC(sscreen->info.chip_class >= GFX9) | S_030960_EN_INST_OPT_ADV(sscreen->info.chip_class >= GFX9); @@ -519,7 +519,7 @@ static unsigned si_get_ia_multi_vgt_param(struct si_context *sctx, if (sctx->gs_shader.cso) { /* GS requirement. */ - if (sctx->chip_class <= VI && + if (sctx->chip_class <= GFX8 && SI_GS_PER_ES / primgroup_size >= sctx->screen->gs_table_depth - 3) ia_multi_vgt_param |= S_028AA8_PARTIAL_ES_WAVE_ON(1); @@ -625,7 +625,7 @@ static void si_emit_draw_registers(struct si_context *sctx, radeon_set_uconfig_reg_idx(cs, sctx->screen, R_030960_IA_MULTI_VGT_PARAM, 4, ia_multi_vgt_param); - else if (sctx->chip_class >= CIK) + else if (sctx->chip_class >= GFX7) radeon_set_context_reg_idx(cs, R_028AA8_IA_MULTI_VGT_PARAM, 1, ia_multi_vgt_param); else radeon_set_context_reg(cs, R_028AA8_IA_MULTI_VGT_PARAM, ia_multi_vgt_param); @@ -633,7 +633,7 @@ static void si_emit_draw_registers(struct si_context *sctx, sctx->last_multi_vgt_param = ia_multi_vgt_param; } if (prim != sctx->last_prim) { - if (sctx->chip_class >= CIK) + if (sctx->chip_class >= GFX7) radeon_set_uconfig_reg_idx(cs, sctx->screen, R_030908_VGT_PRIMITIVE_TYPE, 1, prim); else @@ -700,12 +700,12 @@ static void si_emit_draw_packets(struct si_context *sctx, break; case 2: index_type = V_028A7C_VGT_INDEX_16 | - (SI_BIG_ENDIAN && sctx->chip_class <= CIK ? + (SI_BIG_ENDIAN && sctx->chip_class <= GFX7 ? V_028A7C_VGT_DMA_SWAP_16_BIT : 0); break; case 4: index_type = V_028A7C_VGT_INDEX_32 | - (SI_BIG_ENDIAN && sctx->chip_class <= CIK ? + (SI_BIG_ENDIAN && sctx->chip_class <= GFX7 ? V_028A7C_VGT_DMA_SWAP_32_BIT : 0); break; default: @@ -733,10 +733,10 @@ static void si_emit_draw_packets(struct si_context *sctx, si_resource(indexbuf), RADEON_USAGE_READ, RADEON_PRIO_INDEX_BUFFER); } else { - /* On CI and later, non-indexed draws overwrite VGT_INDEX_TYPE, + /* On GFX7 and later, non-indexed draws overwrite VGT_INDEX_TYPE, * so the state must be re-emitted before the next indexed draw. */ - if (sctx->chip_class >= CIK) + if (sctx->chip_class >= GFX7) sctx->last_index_size = -1; } @@ -918,7 +918,7 @@ void si_emit_cache_flush(struct si_context *sctx) if (flags & SI_CONTEXT_FLUSH_AND_INV_DB) sctx->num_db_cache_flushes++; - /* SI has a bug that it always flushes ICACHE and KCACHE if either + /* GFX6 has a bug that it always flushes ICACHE and KCACHE if either * bit is set. An alternative way is to write SQC_CACHES, but that * doesn't seem to work reliably. Since the bug doesn't affect * correctness (it only does more work than necessary) and @@ -931,7 +931,7 @@ void si_emit_cache_flush(struct si_context *sctx) if (flags & SI_CONTEXT_INV_SMEM_L1) cp_coher_cntl |= S_0085F0_SH_KCACHE_ACTION_ENA(1); - if (sctx->chip_class <= VI) { + if (sctx->chip_class <= GFX8) { if (flags & SI_CONTEXT_FLUSH_AND_INV_CB) { cp_coher_cntl |= S_0085F0_CB_ACTION_ENA(1) | S_0085F0_CB0_DEST_BASE_ENA(1) | @@ -944,7 +944,7 @@ void si_emit_cache_flush(struct si_context *sctx) S_0085F0_CB7_DEST_BASE_ENA(1); /* Necessary for DCC */ - if (sctx->chip_class == VI) + if (sctx->chip_class == GFX8) si_cp_release_mem(sctx, V_028A90_FLUSH_AND_INV_CB_DATA_TS, 0, EOP_DST_SEL_MEM, EOP_INT_SEL_NONE, @@ -1085,25 +1085,25 @@ void si_emit_cache_flush(struct si_context *sctx) radeon_emit(cs, 0); } - /* SI-CI-VI only: + /* GFX6-GFX8 only: * When one of the CP_COHER_CNTL.DEST_BASE flags is set, SURFACE_SYNC * waits for idle, so it should be last. SURFACE_SYNC is done in PFP. * * cp_coher_cntl should contain all necessary flags except TC flags * at this point. * - * SI-CIK don't support L2 write-back. + * GFX6-GFX7 don't support L2 write-back. */ if (flags & SI_CONTEXT_INV_GLOBAL_L2 || - (sctx->chip_class <= CIK && + (sctx->chip_class <= GFX7 && (flags & SI_CONTEXT_WRITEBACK_GLOBAL_L2))) { - /* Invalidate L1 & L2. (L1 is always invalidated on SI) - * WB must be set on VI+ when TC_ACTION is set. + /* Invalidate L1 & L2. (L1 is always invalidated on GFX6) + * WB must be set on GFX8+ when TC_ACTION is set. */ si_emit_surface_sync(sctx, cp_coher_cntl | S_0085F0_TC_ACTION_ENA(1) | S_0085F0_TCL1_ACTION_ENA(1) | - S_0301F0_TC_WB_ACTION_ENA(sctx->chip_class >= VI)); + S_0301F0_TC_WB_ACTION_ENA(sctx->chip_class >= GFX8)); cp_coher_cntl = 0; sctx->num_L2_invalidates++; } else { @@ -1260,7 +1260,7 @@ static void si_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *i unsigned index_offset = info->indirect ? info->start * index_size : 0; if (likely(!info->indirect)) { - /* SI-CI treat instance_count==0 as instance_count==1. There is + /* GFX6-GFX7 treat instance_count==0 as instance_count==1. There is * no workaround for indirect draws, but we can at least skip * direct draws. */ @@ -1362,8 +1362,8 @@ static void si_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *i if (index_size) { /* Translate or upload, if needed. */ - /* 8-bit indices are supported on VI. */ - if (sctx->chip_class <= CIK && index_size == 1) { + /* 8-bit indices are supported on GFX8. */ + if (sctx->chip_class <= GFX7 && index_size == 1) { unsigned start, count, start_offset, size, offset; void *ptr; @@ -1403,9 +1403,9 @@ static void si_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *i /* info->start will be added by the drawing code */ index_offset -= start_offset; - } else if (sctx->chip_class <= CIK && + } else if (sctx->chip_class <= GFX7 && si_resource(indexbuf)->TC_L2_dirty) { - /* VI reads index buffers through TC L2, so it doesn't + /* GFX8 reads index buffers through TC L2, so it doesn't * need this. */ sctx->flags |= SI_CONTEXT_WRITEBACK_GLOBAL_L2; si_resource(indexbuf)->TC_L2_dirty = false; @@ -1419,7 +1419,7 @@ static void si_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *i si_context_add_resource_size(sctx, indirect->buffer); /* Indirect buffers use TC L2 on GFX9, but not older hw. */ - if (sctx->chip_class <= VI) { + if (sctx->chip_class <= GFX8) { if (si_resource(indirect->buffer)->TC_L2_dirty) { sctx->flags |= SI_CONTEXT_WRITEBACK_GLOBAL_L2; si_resource(indirect->buffer)->TC_L2_dirty = false; @@ -1498,7 +1498,7 @@ static void si_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *i /* Start prefetches after the draw has been started. Both will run * in parallel, but starting the draw first is more important. */ - if (sctx->chip_class >= CIK && sctx->prefetch_L2_mask) + if (sctx->chip_class >= GFX7 && sctx->prefetch_L2_mask) cik_emit_prefetch_L2(sctx, false); } else { /* If we don't wait for idle, start prefetches first, then set @@ -1508,7 +1508,7 @@ static void si_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *i si_emit_cache_flush(sctx); /* Only prefetch the API VS and VBO descriptors. */ - if (sctx->chip_class >= CIK && sctx->prefetch_L2_mask) + if (sctx->chip_class >= GFX7 && sctx->prefetch_L2_mask) cik_emit_prefetch_L2(sctx, true); if (!si_upload_graphics_shader_descriptors(sctx)) @@ -1527,7 +1527,7 @@ static void si_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *i /* Prefetch the remaining shaders after the draw has been * started. */ - if (sctx->chip_class >= CIK && sctx->prefetch_L2_mask) + if (sctx->chip_class >= GFX7 && sctx->prefetch_L2_mask) cik_emit_prefetch_L2(sctx, false); } diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c b/src/gallium/drivers/radeonsi/si_state_shaders.c index 51a3af92d0c..10677f175de 100644 --- a/src/gallium/drivers/radeonsi/si_state_shaders.c +++ b/src/gallium/drivers/radeonsi/si_state_shaders.c @@ -473,7 +473,7 @@ static void si_shader_ls(struct si_screen *sscreen, struct si_shader *shader) unsigned vgpr_comp_cnt; uint64_t va; - assert(sscreen->info.chip_class <= VI); + assert(sscreen->info.chip_class <= GFX8); pm4 = si_get_shader_pm4_state(shader); if (!pm4) @@ -547,7 +547,7 @@ static void si_shader_hs(struct si_screen *sscreen, struct si_shader *shader) S_00B428_FLOAT_MODE(shader->config.float_mode) | S_00B428_LS_VGPR_COMP_CNT(ls_vgpr_comp_cnt)); - if (sscreen->info.chip_class <= VI) { + if (sscreen->info.chip_class <= GFX8) { si_pm4_set_reg(pm4, R_00B42C_SPI_SHADER_PGM_RSRC2_HS, shader->config.rsrc2); } @@ -587,7 +587,7 @@ static void si_shader_es(struct si_screen *sscreen, struct si_shader *shader) uint64_t va; unsigned oc_lds_en; - assert(sscreen->info.chip_class <= VI); + assert(sscreen->info.chip_class <= GFX8); pm4 = si_get_shader_pm4_state(shader); if (!pm4) @@ -973,7 +973,7 @@ static void si_emit_shader_vs(struct si_context *sctx) SI_TRACKED_VGT_PRIMITIVEID_EN, shader->ctx_reg.vs.vgt_primitiveid_en); - if (sctx->chip_class <= VI) { + if (sctx->chip_class <= GFX8) { radeon_opt_set_context_reg(sctx, R_028AB4_VGT_REUSE_OFF, SI_TRACKED_VGT_REUSE_OFF, shader->ctx_reg.vs.vgt_reuse_off); @@ -1052,7 +1052,7 @@ static void si_shader_vs(struct si_screen *sscreen, struct si_shader *shader, shader->ctx_reg.vs.vgt_primitiveid_en = 0; } - if (sscreen->info.chip_class <= VI) { + if (sscreen->info.chip_class <= GFX8) { /* Reuse needs to be set off if we write oViewport. */ shader->ctx_reg.vs.vgt_reuse_off = S_028AB4_REUSE_OFF(info->writes_viewport_index); @@ -1602,11 +1602,11 @@ static inline void si_shader_selector_key(struct pipe_context *ctx, blend && blend->alpha_to_coverage) key->part.ps.epilog.spi_shader_col_format |= V_028710_SPI_SHADER_32_AR; - /* On SI and CIK except Hawaii, the CB doesn't clamp outputs + /* On GFX6 and GFX7 except Hawaii, the CB doesn't clamp outputs * to the range supported by the type if a channel has less * than 16 bits and the export format is 16_ABGR. */ - if (sctx->chip_class <= CIK && sctx->family != CHIP_HAWAII) { + if (sctx->chip_class <= GFX7 && sctx->family != CHIP_HAWAII) { key->part.ps.epilog.color_is_int8 = sctx->framebuffer.color_is_int8; key->part.ps.epilog.color_is_int10 = sctx->framebuffer.color_is_int10; } @@ -2706,10 +2706,10 @@ static void si_delete_shader(struct si_context *sctx, struct si_shader *shader) switch (shader->selector->type) { case PIPE_SHADER_VERTEX: if (shader->key.as_ls) { - assert(sctx->chip_class <= VI); + assert(sctx->chip_class <= GFX8); si_pm4_delete_state(sctx, ls, shader->pm4); } else if (shader->key.as_es) { - assert(sctx->chip_class <= VI); + assert(sctx->chip_class <= GFX8); si_pm4_delete_state(sctx, es, shader->pm4); } else { si_pm4_delete_state(sctx, vs, shader->pm4); @@ -2720,7 +2720,7 @@ static void si_delete_shader(struct si_context *sctx, struct si_shader *shader) break; case PIPE_SHADER_TESS_EVAL: if (shader->key.as_es) { - assert(sctx->chip_class <= VI); + assert(sctx->chip_class <= GFX8); si_pm4_delete_state(sctx, es, shader->pm4); } else { si_pm4_delete_state(sctx, vs, shader->pm4); @@ -2937,10 +2937,10 @@ static bool si_update_gs_ring_buffers(struct si_context *sctx) unsigned num_se = sctx->screen->info.max_se; unsigned wave_size = 64; unsigned max_gs_waves = 32 * num_se; /* max 32 per SE on GCN */ - /* On SI-CI, the value comes from VGT_GS_VERTEX_REUSE = 16. - * On VI+, the value comes from VGT_VERTEX_REUSE_BLOCK_CNTL = 30 (+2). + /* On GFX6-GFX7, the value comes from VGT_GS_VERTEX_REUSE = 16. + * On GFX8+, the value comes from VGT_VERTEX_REUSE_BLOCK_CNTL = 30 (+2). */ - unsigned gs_vertex_reuse = (sctx->chip_class >= VI ? 32 : 16) * num_se; + unsigned gs_vertex_reuse = (sctx->chip_class >= GFX8 ? 32 : 16) * num_se; unsigned alignment = 256 * num_se; /* The maximum size is 63.999 MB per SE. */ unsigned max_size = ((unsigned)(63.999 * 1024 * 1024) & ~255) * num_se; @@ -2967,7 +2967,7 @@ static bool si_update_gs_ring_buffers(struct si_context *sctx) * * GFX9 doesn't have the ESGS ring. */ - bool update_esgs = sctx->chip_class <= VI && + bool update_esgs = sctx->chip_class <= GFX8 && esgs_ring_size && (!sctx->esgs_ring || sctx->esgs_ring->width0 < esgs_ring_size); @@ -3005,9 +3005,9 @@ static bool si_update_gs_ring_buffers(struct si_context *sctx) if (!pm4) return false; - if (sctx->chip_class >= CIK) { + if (sctx->chip_class >= GFX7) { if (sctx->esgs_ring) { - assert(sctx->chip_class <= VI); + assert(sctx->chip_class <= GFX8); si_pm4_set_reg(pm4, R_030900_VGT_ESGS_RING_SIZE, sctx->esgs_ring->width0 / 256); } @@ -3039,7 +3039,7 @@ static bool si_update_gs_ring_buffers(struct si_context *sctx) /* Set ring bindings. */ if (sctx->esgs_ring) { - assert(sctx->chip_class <= VI); + assert(sctx->chip_class <= GFX8); si_set_ring_buffer(sctx, SI_ES_RING_ESGS, sctx->esgs_ring, 0, sctx->esgs_ring->width0, true, true, 4, 64, 0); @@ -3288,7 +3288,7 @@ static void si_init_tess_factor_ring(struct si_context *sctx) sctx->screen->tess_offchip_ring_size; /* Append these registers to the init config state. */ - if (sctx->chip_class >= CIK) { + if (sctx->chip_class >= GFX7) { si_pm4_set_reg(sctx->init_config, R_030938_VGT_TF_RING_SIZE, S_030938_SIZE(sctx->screen->tess_factor_ring_size / 4)); si_pm4_set_reg(sctx->init_config, R_030940_VGT_TF_MEMORY_BASE, @@ -3376,7 +3376,7 @@ bool si_update_shaders(struct si_context *sctx) } /* VS as LS */ - if (sctx->chip_class <= VI) { + if (sctx->chip_class <= GFX8) { r = si_shader_select(ctx, &sctx->vs_shader, &compiler_state); if (r) @@ -3408,7 +3408,7 @@ bool si_update_shaders(struct si_context *sctx) if (sctx->gs_shader.cso) { /* TES as ES */ - if (sctx->chip_class <= VI) { + if (sctx->chip_class <= GFX8) { r = si_shader_select(ctx, &sctx->tes_shader, &compiler_state); if (r) @@ -3424,7 +3424,7 @@ bool si_update_shaders(struct si_context *sctx) si_pm4_bind_state(sctx, vs, sctx->tes_shader.current->pm4); } } else if (sctx->gs_shader.cso) { - if (sctx->chip_class <= VI) { + if (sctx->chip_class <= GFX8) { /* VS as ES */ r = si_shader_select(ctx, &sctx->vs_shader, &compiler_state); @@ -3457,7 +3457,7 @@ bool si_update_shaders(struct si_context *sctx) return false; } else { si_pm4_bind_state(sctx, gs, NULL); - if (sctx->chip_class <= VI) + if (sctx->chip_class <= GFX8) si_pm4_bind_state(sctx, es, NULL); } @@ -3504,7 +3504,7 @@ bool si_update_shaders(struct si_context *sctx) sctx->smoothing_enabled = sctx->ps_shader.current->key.part.ps.epilog.poly_line_smoothing; si_mark_atom_dirty(sctx, &sctx->atoms.s.msaa_config); - if (sctx->chip_class == SI) + if (sctx->chip_class == GFX6) si_mark_atom_dirty(sctx, &sctx->atoms.s.db_render_state); if (sctx->framebuffer.nr_samples <= 1) @@ -3522,7 +3522,7 @@ bool si_update_shaders(struct si_context *sctx) return false; } - if (sctx->chip_class >= CIK) { + if (sctx->chip_class >= GFX7) { if (si_pm4_state_enabled_and_changed(sctx, ls)) sctx->prefetch_L2_mask |= SI_PREFETCH_LS; else if (!sctx->queued.named.ls) diff --git a/src/gallium/drivers/radeonsi/si_state_streamout.c b/src/gallium/drivers/radeonsi/si_state_streamout.c index 2a0a4bef9a2..e7058f19a8a 100644 --- a/src/gallium/drivers/radeonsi/si_state_streamout.c +++ b/src/gallium/drivers/radeonsi/si_state_streamout.c @@ -103,7 +103,7 @@ static void si_set_streamout_targets(struct pipe_context *ctx, * to flush it. * * The only cases which requires flushing it is VGT DMA index - * fetching (on <= CIK) and indirect draw data, which are rare + * fetching (on <= GFX7) and indirect draw data, which are rare * cases. Thus, flag the TC L2 dirtiness in the resource and * handle it at draw call time. */ @@ -195,7 +195,7 @@ static void si_flush_vgt_streamout(struct si_context *sctx) unsigned reg_strmout_cntl; /* The register is at different places on different ASICs. */ - if (sctx->chip_class >= CIK) { + if (sctx->chip_class >= GFX7) { reg_strmout_cntl = R_0300FC_CP_STRMOUT_CNTL; radeon_set_uconfig_reg(cs, reg_strmout_cntl, 0); } else { @@ -230,7 +230,7 @@ static void si_emit_streamout_begin(struct si_context *sctx) t[i]->stride_in_dw = stride_in_dw[i]; - /* SI binds streamout buffers as shader resources. + /* AMD GCN binds streamout buffers as shader resources. * VGT only counts primitives and tells the shader * through SGPRs what to do. */ radeon_set_context_reg_seq(cs, R_028AD0_VGT_STRMOUT_BUFFER_SIZE_0 + 16*i, 2); diff --git a/src/gallium/drivers/radeonsi/si_state_viewport.c b/src/gallium/drivers/radeonsi/si_state_viewport.c index 792d1c4efd1..a144d7b661c 100644 --- a/src/gallium/drivers/radeonsi/si_state_viewport.c +++ b/src/gallium/drivers/radeonsi/si_state_viewport.c @@ -126,10 +126,10 @@ static void si_emit_one_scissor(struct si_context *ctx, if (scissor) si_clip_scissor(&final, scissor); - /* Workaround for a hw bug on SI that occurs when PA_SU_HARDWARE_- + /* Workaround for a hw bug on GFX6 that occurs when PA_SU_HARDWARE_- * SCREEN_OFFSET != 0 and any_scissor.BR_X/Y <= 0. */ - if (ctx->chip_class == SI && (final.maxx == 0 || final.maxy == 0)) { + if (ctx->chip_class == GFX6 && (final.maxx == 0 || final.maxy == 0)) { radeon_emit(cs, S_028250_TL_X(1) | S_028250_TL_Y(1) | S_028250_WINDOW_OFFSET_DISABLE(1)); @@ -180,9 +180,9 @@ static void si_emit_guardband(struct si_context *ctx) int hw_screen_offset_x = (vp_as_scissor.maxx + vp_as_scissor.minx) / 2; int hw_screen_offset_y = (vp_as_scissor.maxy + vp_as_scissor.miny) / 2; - /* SI-CI need to align the offset to an ubertile consisting of all SEs. */ + /* GFX6-GFX7 need to align the offset to an ubertile consisting of all SEs. */ const unsigned hw_screen_offset_alignment = - ctx->chip_class >= VI ? 16 : MAX2(ctx->screen->se_tile_repeat, 16); + ctx->chip_class >= GFX8 ? 16 : MAX2(ctx->screen->se_tile_repeat, 16); /* Indexed by quantization modes */ static int max_viewport_size[] = {65535, 16383, 4095}; diff --git a/src/gallium/drivers/radeonsi/si_test_dma_perf.c b/src/gallium/drivers/radeonsi/si_test_dma_perf.c index 124f5bb5c12..263187d683f 100644 --- a/src/gallium/drivers/radeonsi/si_test_dma_perf.c +++ b/src/gallium/drivers/radeonsi/si_test_dma_perf.c @@ -112,11 +112,11 @@ void si_test_dma_perf(struct si_screen *sscreen) unsigned cs_dwords_per_thread = test_cs ? cs_dwords_per_thread_list[cs_method % NUM_SHADERS] : 0; - if (sctx->chip_class == SI) { - /* SI doesn't support CP DMA operations through L2. */ + if (sctx->chip_class == GFX6) { + /* GFX6 doesn't support CP DMA operations through L2. */ if (test_cp && cache_policy != L2_BYPASS) continue; - /* WAVES_PER_SH is in multiples of 16 on SI. */ + /* WAVES_PER_SH is in multiples of 16 on GFX6. */ if (test_cs && cs_waves_per_sh % 16 != 0) continue; } @@ -151,7 +151,7 @@ void si_test_dma_perf(struct si_screen *sscreen) unsigned query_type = PIPE_QUERY_TIME_ELAPSED; if (test_sdma) { - if (sctx->chip_class == SI) + if (sctx->chip_class == GFX6) query_type = SI_QUERY_TIME_ELAPSED_SDMA_SI; else query_type = SI_QUERY_TIME_ELAPSED_SDMA; @@ -346,10 +346,10 @@ void si_test_dma_perf(struct si_screen *sscreen) if (!r->is_valid) continue; - /* Ban CP DMA clears via MC on <= VI. They are super slow + /* Ban CP DMA clears via MC on <= GFX8. They are super slow * on GTT, which we can get due to BO evictions. */ - if (sctx->chip_class <= VI && placement == 1 && + if (sctx->chip_class <= GFX8 && placement == 1 && r->is_cp && r->cache_policy == L2_BYPASS) continue; diff --git a/src/gallium/drivers/radeonsi/si_texture.c b/src/gallium/drivers/radeonsi/si_texture.c index 59d50376438..74c9cf9d7bf 100644 --- a/src/gallium/drivers/radeonsi/si_texture.c +++ b/src/gallium/drivers/radeonsi/si_texture.c @@ -254,10 +254,10 @@ static int si_init_surface(struct si_screen *sscreen, array_mode == RADEON_SURF_MODE_2D)) { /* TC-compatible HTILE only supports Z32_FLOAT. * GFX9 also supports Z16_UNORM. - * On VI, promote Z16 to Z32. DB->CB copies will convert + * On GFX8, promote Z16 to Z32. DB->CB copies will convert * the format for transfers. */ - if (sscreen->info.chip_class == VI) + if (sscreen->info.chip_class == GFX8) bpe = 4; flags |= RADEON_SURF_TC_COMPATIBLE_HTILE; @@ -267,7 +267,7 @@ static int si_init_surface(struct si_screen *sscreen, flags |= RADEON_SURF_SBUFFER; } - if (sscreen->info.chip_class >= VI && + if (sscreen->info.chip_class >= GFX8 && (ptex->flags & SI_RESOURCE_FLAG_DISABLE_DCC || ptex->format == PIPE_FORMAT_R9G9B9E5_FLOAT || (ptex->nr_samples >= 2 && !sscreen->dcc_msaa_allowed))) @@ -278,8 +278,8 @@ static int si_init_surface(struct si_screen *sscreen, bpe == 16 && ptex->nr_samples >= 2) flags |= RADEON_SURF_DISABLE_DCC; - /* VI: DCC clear for 4x and 8x MSAA array textures unimplemented. */ - if (sscreen->info.chip_class == VI && + /* GFX8: DCC clear for 4x and 8x MSAA array textures unimplemented. */ + if (sscreen->info.chip_class == GFX8 && ptex->nr_storage_samples >= 4 && ptex->array_size > 1) flags |= RADEON_SURF_DISABLE_DCC; @@ -700,7 +700,7 @@ static void si_set_tex_bo_metadata(struct si_screen *sscreen, md.size_metadata = 10 * 4; /* Dwords [10:..] contain the mipmap level offsets. */ - if (sscreen->info.chip_class <= VI) { + if (sscreen->info.chip_class <= GFX8) { for (unsigned i = 0; i <= res->last_level; i++) md.metadata[10+i] = tex->surface.u.legacy.level[i].offset >> 8; @@ -716,7 +716,7 @@ static void si_get_opaque_metadata(struct si_screen *sscreen, { uint32_t *desc = &md->metadata[2]; - if (sscreen->info.chip_class < VI) + if (sscreen->info.chip_class < GFX8) return; /* Return if DCC is enabled. The texture should be set up with it @@ -757,7 +757,7 @@ static bool si_has_displayable_dcc(struct si_texture *tex) { struct si_screen *sscreen = (struct si_screen*)tex->buffer.b.b.screen; - if (sscreen->info.chip_class <= VI) + if (sscreen->info.chip_class <= GFX8) return false; /* This needs a cache flush before scanout. @@ -849,7 +849,7 @@ static boolean si_texture_get_handle(struct pipe_screen* screen, assert(tex->surface.tile_swizzle == 0); } - /* Since shader image stores don't support DCC on VI, + /* Since shader image stores don't support DCC on GFX8, * disable it for external clients that want write * access. */ @@ -974,7 +974,7 @@ static void si_texture_get_htile_size(struct si_screen *sscreen, unsigned slice_elements, slice_bytes, pipe_interleave_bytes, base_align; unsigned num_pipes = sscreen->info.num_tile_pipes; - assert(sscreen->info.chip_class <= VI); + assert(sscreen->info.chip_class <= GFX8); tex->surface.htile_size = 0; @@ -989,7 +989,7 @@ static void si_texture_get_htile_size(struct si_screen *sscreen, * are always reproducible. I think I have seen the test hang * on Carrizo too, though it was very rare there. */ - if (sscreen->info.chip_class >= CIK && num_pipes < 4) + if (sscreen->info.chip_class >= GFX7 && num_pipes < 4) num_pipes = 4; switch (num_pipes) { @@ -1036,7 +1036,7 @@ static void si_texture_get_htile_size(struct si_screen *sscreen, static void si_texture_allocate_htile(struct si_screen *sscreen, struct si_texture *tex) { - if (sscreen->info.chip_class <= VI && !tex->tc_compatible_htile) + if (sscreen->info.chip_class <= GFX8 && !tex->tc_compatible_htile) si_texture_get_htile_size(sscreen, tex); if (!tex->surface.htile_size) @@ -1229,7 +1229,7 @@ si_texture_create_object(struct pipe_screen *screen, RADEON_SURF_TC_COMPATIBLE_HTILE); /* TC-compatible HTILE: - * - VI only supports Z32_FLOAT. + * - GFX8 only supports Z32_FLOAT. * - GFX9 only supports Z32_FLOAT and Z16_UNORM. */ if (tex->tc_compatible_htile) { if (sscreen->info.chip_class >= GFX9 && @@ -1506,10 +1506,10 @@ si_choose_tiling(struct si_screen *sscreen, if (templ->flags & SI_RESOURCE_FLAG_TRANSFER) return RADEON_SURF_MODE_LINEAR_ALIGNED; - /* Avoid Z/S decompress blits by forcing TC-compatible HTILE on VI, + /* Avoid Z/S decompress blits by forcing TC-compatible HTILE on GFX8, * which requires 2D tiling. */ - if (sscreen->info.chip_class == VI && tc_compatible_htile) + if (sscreen->info.chip_class == GFX8 && tc_compatible_htile) return RADEON_SURF_MODE_2D; /* Handle common candidates for the linear mode. @@ -1525,7 +1525,7 @@ si_choose_tiling(struct si_screen *sscreen, if (desc->layout == UTIL_FORMAT_LAYOUT_SUBSAMPLED) return RADEON_SURF_MODE_LINEAR_ALIGNED; - /* Cursors are linear on SI. + /* Cursors are linear on AMD GCN. * (XXX double-check, maybe also use RADEON_SURF_SCANOUT) */ if (templ->bind & PIPE_BIND_CURSOR) return RADEON_SURF_MODE_LINEAR_ALIGNED; @@ -1582,7 +1582,7 @@ struct pipe_resource *si_texture_create(struct pipe_screen *screen, struct radeon_surf surface = {0}; bool is_flushed_depth = templ->flags & SI_RESOURCE_FLAG_FLUSHED_DEPTH; bool tc_compatible_htile = - sscreen->info.chip_class >= VI && + sscreen->info.chip_class >= GFX8 && /* There are issues with TC-compatible HTILE on Tonga (and * Iceland is the same design), and documented bug workarounds * don't help. For example, this fails: @@ -2450,7 +2450,7 @@ void vi_separate_dcc_try_enable(struct si_context *sctx, sctx->screen->debug_flags & DBG(NO_DCC_FB)) return; - assert(sctx->chip_class >= VI); + assert(sctx->chip_class >= GFX8); if (tex->dcc_offset) return; /* already enabled */ |