diff options
Diffstat (limited to 'src/gallium/drivers/radeonsi/si_compute.c')
-rw-r--r-- | src/gallium/drivers/radeonsi/si_compute.c | 26 |
1 files changed, 13 insertions, 13 deletions
diff --git a/src/gallium/drivers/radeonsi/si_compute.c b/src/gallium/drivers/radeonsi/si_compute.c index f1a433b72df..46a0ba76ed5 100644 --- a/src/gallium/drivers/radeonsi/si_compute.c +++ b/src/gallium/drivers/radeonsi/si_compute.c @@ -327,7 +327,7 @@ static void si_initialize_compute(struct si_context *sctx) radeon_emit(cs, S_00B858_SH0_CU_EN(0xffff) | S_00B858_SH1_CU_EN(0xffff)); radeon_emit(cs, S_00B85C_SH0_CU_EN(0xffff) | S_00B85C_SH1_CU_EN(0xffff)); - if (sctx->chip_class >= CIK) { + if (sctx->chip_class >= GFX7) { /* Also set R_00B858_COMPUTE_STATIC_THREAD_MGMT_SE2 / SE3 */ radeon_set_sh_reg_seq(cs, R_00B864_COMPUTE_STATIC_THREAD_MGMT_SE2, 2); @@ -342,7 +342,7 @@ static void si_initialize_compute(struct si_context *sctx) * kernel if we want to use something other than the default value, * which is now 0x22f. */ - if (sctx->chip_class <= SI) { + if (sctx->chip_class <= GFX6) { /* XXX: This should be: * (number of compute units) * 4 * (waves per simd) - 1 */ @@ -353,7 +353,7 @@ static void si_initialize_compute(struct si_context *sctx) /* Set the pointer to border colors. */ bc_va = sctx->border_color_buffer->gpu_address; - if (sctx->chip_class >= CIK) { + if (sctx->chip_class >= GFX7) { radeon_set_uconfig_reg_seq(cs, R_030E00_TA_CS_BC_BASE_ADDR, 2); radeon_emit(cs, bc_va >> 8); /* R_030E00_TA_CS_BC_BASE_ADDR */ radeon_emit(cs, S_030E04_ADDRESS(bc_va >> 40)); /* R_030E04_TA_CS_BC_BASE_ADDR_HI */ @@ -434,12 +434,12 @@ static bool si_switch_compute_shader(struct si_context *sctx, } lds_blocks = config->lds_size; - /* XXX: We are over allocating LDS. For SI, the shader reports + /* XXX: We are over allocating LDS. For GFX6, the shader reports * LDS in blocks of 256 bytes, so if there are 4 bytes lds * allocated in the shader and 4 bytes allocated by the state * tracker, then we will set LDS_SIZE to 512 bytes rather than 256. */ - if (sctx->chip_class <= SI) { + if (sctx->chip_class <= GFX6) { lds_blocks += align(program->local_size, 256) >> 8; } else { lds_blocks += align(program->local_size, 512) >> 9; @@ -474,7 +474,7 @@ static bool si_switch_compute_shader(struct si_context *sctx, * command. However, that would add more complexity and we're likely * to get a shader state change in that case anyway. */ - if (sctx->chip_class >= CIK) { + if (sctx->chip_class >= GFX7) { cik_prefetch_TC_L2_async(sctx, &program->shader.bo->b.b, 0, program->shader.bo->b.b.width0); } @@ -539,7 +539,7 @@ static void setup_scratch_rsrc_user_sgprs(struct si_context *sctx, } else { scratch_dword3 |= S_008F0C_ELEMENT_SIZE(max_private_element_size); - if (sctx->chip_class < VI) { + if (sctx->chip_class < GFX8) { /* BUF_DATA_FORMAT is ignored, but it cannot be * BUF_DATA_FORMAT_INVALID. */ scratch_dword3 |= @@ -764,7 +764,7 @@ static void si_emit_dispatch_packets(struct si_context *sctx, unsigned compute_resource_limits = S_00B854_SIMD_DEST_CNTL(waves_per_threadgroup % 4 == 0); - if (sctx->chip_class >= CIK) { + if (sctx->chip_class >= GFX7) { unsigned num_cu_per_se = sscreen->info.num_good_compute_units / sscreen->info.max_se; @@ -777,7 +777,7 @@ static void si_emit_dispatch_packets(struct si_context *sctx, compute_resource_limits |= S_00B854_WAVES_PER_SH(sctx->cs_max_waves_per_sh); } else { - /* SI */ + /* GFX6 */ if (sctx->cs_max_waves_per_sh) { unsigned limit_div16 = DIV_ROUND_UP(sctx->cs_max_waves_per_sh, 16); compute_resource_limits |= S_00B854_WAVES_PER_SH_SI(limit_div16); @@ -792,7 +792,7 @@ static void si_emit_dispatch_packets(struct si_context *sctx, S_00B800_FORCE_START_AT_000(1) | /* If the KMD allows it (there is a KMD hw register for it), * allow launching waves out-of-order. (same as Vulkan) */ - S_00B800_ORDER_MODE(sctx->chip_class >= CIK); + S_00B800_ORDER_MODE(sctx->chip_class >= GFX7); const uint *last_block = info->last_block; bool partial_block_en = last_block[0] || last_block[1] || last_block[2]; @@ -861,10 +861,10 @@ static void si_launch_grid( * compute isn't used, i.e. only one compute job can run at a time. * If async compute is possible, the threadgroup size must be limited * to 256 threads on all queues to avoid the bug. - * Only SI and certain CIK chips are affected. + * Only GFX6 and certain GFX7 chips are affected. */ bool cs_regalloc_hang = - (sctx->chip_class == SI || + (sctx->chip_class == GFX6 || sctx->family == CHIP_BONAIRE || sctx->family == CHIP_KABINI) && info->block[0] * info->block[1] * info->block[2] > 256; @@ -894,7 +894,7 @@ static void si_launch_grid( si_context_add_resource_size(sctx, info->indirect); /* Indirect buffers use TC L2 on GFX9, but not older hw. */ - if (sctx->chip_class <= VI && + if (sctx->chip_class <= GFX8 && si_resource(info->indirect)->TC_L2_dirty) { sctx->flags |= SI_CONTEXT_WRITEBACK_GLOBAL_L2; si_resource(info->indirect)->TC_L2_dirty = false; |