diff options
author | Tom Stellard <[email protected]> | 2012-06-05 13:11:11 -0400 |
---|---|---|
committer | Tom Stellard <[email protected]> | 2012-06-06 10:49:36 -0400 |
commit | 0c4b19ac63efa41242c515824301e6161aceeea5 (patch) | |
tree | 7ae03e15d081e785b1a7d4b172f086f5000a2404 | |
parent | 2bb2e6a6e3017d462be0ae9308955f37c5ee03c6 (diff) |
r600g: Compute support for Cayman
-rw-r--r-- | src/gallium/drivers/r600/evergreen_compute.c | 47 | ||||
-rw-r--r-- | src/gallium/drivers/r600/evergreen_compute_internal.c | 16 | ||||
-rw-r--r-- | src/gallium/drivers/r600/evergreend.h | 3 | ||||
-rw-r--r-- | src/gallium/drivers/r600/r600_shader.c | 3 | ||||
-rw-r--r-- | src/gallium/drivers/radeon/R600Instructions.td | 92 |
5 files changed, 95 insertions, 66 deletions
diff --git a/src/gallium/drivers/r600/evergreen_compute.c b/src/gallium/drivers/r600/evergreen_compute.c index 7aeb4038ae1..ceb3b3a841d 100644 --- a/src/gallium/drivers/r600/evergreen_compute.c +++ b/src/gallium/drivers/r600/evergreen_compute.c @@ -165,8 +165,10 @@ static void evergreen_bind_compute_state(struct pipe_context *ctx_, void *state) struct evergreen_compute_resource* res = get_empty_res(ctx->cs_shader, COMPUTE_RESOURCE_SHADER, 0); - evergreen_reg_set(res, R_008C0C_SQ_GPR_RESOURCE_MGMT_3, + if (ctx->chip_class < CAYMAN) { + evergreen_reg_set(res, R_008C0C_SQ_GPR_RESOURCE_MGMT_3, S_008C0C_NUM_LS_GPRS(ctx->cs_shader->bc.ngpr)); + } ///maybe we can use it later evergreen_reg_set(res, R_0286C8_SPI_THREAD_GROUPING, 0); @@ -606,31 +608,48 @@ void evergreen_compute_init_config(struct r600_context *ctx) evergreen_reg_set(res, R_008C04_SQ_GPR_RESOURCE_MGMT_1, S_008C04_NUM_CLAUSE_TEMP_GPRS(num_temp_gprs)); - evergreen_reg_set(res, R_008C08_SQ_GPR_RESOURCE_MGMT_2, 0); + if (ctx->chip_class < CAYMAN) { + evergreen_reg_set(res, R_008C08_SQ_GPR_RESOURCE_MGMT_2, 0); + } evergreen_reg_set(res, R_008C10_SQ_GLOBAL_GPR_RESOURCE_MGMT_1, 0); evergreen_reg_set(res, R_008C14_SQ_GLOBAL_GPR_RESOURCE_MGMT_2, 0); evergreen_reg_set(res, R_008D8C_SQ_DYN_GPR_CNTL_PS_FLUSH_REQ, (1 << 8)); + /* workaround for hw issues with dyn gpr - must set all limits to 240 * instead of 0, 0x1e == 240/8 */ - evergreen_reg_set(res, R_028838_SQ_DYN_GPR_RESOURCE_LIMIT_1, + if (ctx->chip_class < CAYMAN) { + evergreen_reg_set(res, R_028838_SQ_DYN_GPR_RESOURCE_LIMIT_1, S_028838_PS_GPRS(0x1e) | S_028838_VS_GPRS(0x1e) | S_028838_GS_GPRS(0x1e) | S_028838_ES_GPRS(0x1e) | S_028838_HS_GPRS(0x1e) | S_028838_LS_GPRS(0x1e)); + } else { + evergreen_reg_set(res, 0x286f8, + S_028838_PS_GPRS(0x1e) | + S_028838_VS_GPRS(0x1e) | + S_028838_GS_GPRS(0x1e) | + S_028838_ES_GPRS(0x1e) | + S_028838_HS_GPRS(0x1e) | + S_028838_LS_GPRS(0x1e)); + } - - evergreen_reg_set(res, R_008E20_SQ_STATIC_THREAD_MGMT1, 0xFFFFFFFF); - evergreen_reg_set(res, R_008E24_SQ_STATIC_THREAD_MGMT2, 0xFFFFFFFF); - evergreen_reg_set(res, R_008E28_SQ_STATIC_THREAD_MGMT3, 0xFFFFFFFF); - evergreen_reg_set(res, R_008C18_SQ_THREAD_RESOURCE_MGMT_1, 0); - tmp = S_008C1C_NUM_LS_THREADS(num_threads); - evergreen_reg_set(res, R_008C1C_SQ_THREAD_RESOURCE_MGMT_2, tmp); - evergreen_reg_set(res, R_008C20_SQ_STACK_RESOURCE_MGMT_1, 0); - evergreen_reg_set(res, R_008C24_SQ_STACK_RESOURCE_MGMT_2, 0); - tmp = S_008C28_NUM_LS_STACK_ENTRIES(num_stack_entries); - evergreen_reg_set(res, R_008C28_SQ_STACK_RESOURCE_MGMT_3, tmp); + if (ctx->chip_class < CAYMAN) { + + evergreen_reg_set(res, R_008E20_SQ_STATIC_THREAD_MGMT1, 0xFFFFFFFF); + evergreen_reg_set(res, R_008E24_SQ_STATIC_THREAD_MGMT2, 0xFFFFFFFF); + evergreen_reg_set(res, R_008E20_SQ_STATIC_THREAD_MGMT1, 0xFFFFFFFF); + evergreen_reg_set(res, R_008E24_SQ_STATIC_THREAD_MGMT2, 0xFFFFFFFF); + evergreen_reg_set(res, R_008E28_SQ_STATIC_THREAD_MGMT3, 0xFFFFFFFF); + evergreen_reg_set(res, R_008C18_SQ_THREAD_RESOURCE_MGMT_1, 0); + tmp = S_008C1C_NUM_LS_THREADS(num_threads); + evergreen_reg_set(res, R_008C1C_SQ_THREAD_RESOURCE_MGMT_2, tmp); + evergreen_reg_set(res, R_008C20_SQ_STACK_RESOURCE_MGMT_1, 0); + evergreen_reg_set(res, R_008C24_SQ_STACK_RESOURCE_MGMT_2, 0); + tmp = S_008C28_NUM_LS_STACK_ENTRIES(num_stack_entries); + evergreen_reg_set(res, R_008C28_SQ_STACK_RESOURCE_MGMT_3, tmp); + } evergreen_reg_set(res, R_0286CC_SPI_PS_IN_CONTROL_0, S_0286CC_LINEAR_GRADIENT_ENA(1)); evergreen_reg_set(res, R_0286D0_SPI_PS_IN_CONTROL_1, 0); evergreen_reg_set(res, R_0286E4_SPI_PS_IN_CONTROL_2, 0); diff --git a/src/gallium/drivers/r600/evergreen_compute_internal.c b/src/gallium/drivers/r600/evergreen_compute_internal.c index 209f064d1de..d846cbe758a 100644 --- a/src/gallium/drivers/r600/evergreen_compute_internal.c +++ b/src/gallium/drivers/r600/evergreen_compute_internal.c @@ -438,8 +438,13 @@ void evergreen_set_lds( struct evergreen_compute_resource* res = get_empty_res(pipe, COMPUTE_RESOURCE_LDS, 0); - evergreen_reg_set(res, R_008E2C_SQ_LDS_RESOURCE_MGMT, - S_008E2C_NUM_LS_LDS(num_lds)); + if (pipe->ctx->chip_class < CAYMAN) { + evergreen_reg_set(res, R_008E2C_SQ_LDS_RESOURCE_MGMT, + S_008E2C_NUM_LS_LDS(num_lds)); + } else { + evergreen_reg_set(res, CM_R_0286FC_SPI_LDS_MGMT, + S_0286FC_NUM_LS_LDS(num_lds)); + } evergreen_reg_set(res, CM_R_0288E8_SQ_LDS_ALLOC, size | num_waves << 14); } @@ -620,6 +625,7 @@ void evergreen_set_vtx_resource( assert(id < 16); uint32_t sq_vtx_constant_word2, sq_vtx_constant_word3, sq_vtx_constant_word4; struct number_type_and_format fmt; + uint64_t va; fmt.format = 0; @@ -639,11 +645,13 @@ void evergreen_set_vtx_resource( // size = (size * util_format_get_blockwidth(bo->b.b.b.format) * // util_format_get_blocksize(bo->b.b.b.format)); + va = r600_resource_va(&pipe->ctx->screen->screen, &bo->b.b) + offset; + COMPUTE_DBG("id: %i vtx size: %i byte, width0: %i elem\n", id, size, bo->b.b.width0); sq_vtx_constant_word2 = - S_030008_BASE_ADDRESS_HI(offset >> 32) | + S_030008_BASE_ADDRESS_HI(va >> 32) | S_030008_STRIDE(stride) | S_030008_DATA_FORMAT(fmt.format) | S_030008_NUM_FORMAT_ALL(fmt.num_format_all) | @@ -662,7 +670,7 @@ void evergreen_set_vtx_resource( evergreen_emit_raw_value(res, PKT3C(PKT3_SET_RESOURCE, 8, 0)); evergreen_emit_raw_value(res, (id+816)*32 >> 2); - evergreen_emit_raw_value(res, (unsigned)((offset) & 0xffffffff)); + evergreen_emit_raw_value(res, (unsigned)((va) & 0xffffffff)); evergreen_emit_raw_value(res, size - 1); evergreen_emit_raw_value(res, sq_vtx_constant_word2); evergreen_emit_raw_value(res, sq_vtx_constant_word3); diff --git a/src/gallium/drivers/r600/evergreend.h b/src/gallium/drivers/r600/evergreend.h index 3b6d7304551..5d57ce3a9a2 100644 --- a/src/gallium/drivers/r600/evergreend.h +++ b/src/gallium/drivers/r600/evergreend.h @@ -2129,6 +2129,9 @@ #define ENDIAN_8IN32 2 #define ENDIAN_8IN64 3 +#define CM_R_0286FC_SPI_LDS_MGMT 0x286fc +#define S_0286FC_NUM_PS_LDS(x) ((x) & 0xff) +#define S_0286FC_NUM_LS_LDS(x) ((x) & 0xff) << 8 #define CM_R_0288E8_SQ_LDS_ALLOC 0x000288E8 #define CM_R_028804_DB_EQAA 0x00028804 diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c index 8a80dba38a4..63b9a037692 100644 --- a/src/gallium/drivers/r600/r600_shader.c +++ b/src/gallium/drivers/r600/r600_shader.c @@ -247,6 +247,9 @@ int r600_compute_shader_create(struct pipe_context * ctx, r600_bytecode_init(shader_ctx.bc, r600_ctx->chip_class, r600_ctx->family); shader_ctx.bc->type = TGSI_PROCESSOR_COMPUTE; r600_bytecode_from_byte_stream(&shader_ctx, bytes, byte_count); + if (shader_ctx.bc->chip_class == CAYMAN) { + cm_bytecode_add_cf_end(shader_ctx.bc); + } r600_bytecode_build(shader_ctx.bc); if (dump) { r600_bytecode_dump(shader_ctx.bc); diff --git a/src/gallium/drivers/radeon/R600Instructions.td b/src/gallium/drivers/radeon/R600Instructions.td index 9caaf1c86a0..12900fb40ab 100644 --- a/src/gallium/drivers/radeon/R600Instructions.td +++ b/src/gallium/drivers/radeon/R600Instructions.td @@ -784,54 +784,6 @@ class TRIG_HELPER_r700 <InstR600 trig_inst>: Pat < >; */ -/* ---------------------- */ -/* Evergreen Instructions */ -/* ---------------------- */ - - -let Predicates = [isEG] in { - -let usesCustomInserter = 1 in { - -def RAT_WRITE_CACHELESS_eg : EG_CF_RAT <0x57, 0x2, 0, (outs), - (ins R600_TReg32_X:$rw_gpr, R600_TReg32_X:$index_gpr), - "RAT_WRITE_CACHELESS_eg $rw_gpr, $index_gpr", - [(global_store (i32 R600_TReg32_X:$rw_gpr), R600_TReg32_X:$index_gpr)]> -{ - let RIM = 0; - /* XXX: Have a separate instruction for non-indexed writes. */ - let TYPE = 1; - let RW_REL = 0; - let ELEM_SIZE = 0; - - let ARRAY_SIZE = 0; - let COMP_MASK = 1; - let BURST_COUNT = 0; - let VPM = 0; - let EOP = 0; - let MARK = 0; - let BARRIER = 1; -} - -} // End usesCustomInserter = 1 - -class VTX_READ_eg <int buffer_id, list<dag> pattern> : InstR600ISA < - (outs R600_TReg32_X:$dst), - (ins MEMxi:$ptr), - "VTX_READ_eg $dst, $ptr", - pattern ->; - -def VTX_READ_PARAM_eg : VTX_READ_eg <0, - [(set (i32 R600_TReg32_X:$dst), (load_param ADDRVTX_READ:$ptr))] ->; - -def VTX_READ_GLOBAL_eg : VTX_READ_eg <1, - [(set (i32 R600_TReg32_X:$dst), (global_load ADDRVTX_READ:$ptr))] ->; - -} // End isEG Predicate - /* ------------------------------- */ /* Evergreen / Cayman Instructions */ /* ------------------------------- */ @@ -893,6 +845,50 @@ class TRIG_eg <InstR600 trig, Intrinsic intr> : Pat< def : Pat<(fp_to_uint R600_Reg32:$src), (FLT_TO_UINT_eg (TRUNC R600_Reg32:$src))>; + +//===----------------------------------------------------------------------===// +// Memory read/write instructions +//===----------------------------------------------------------------------===// + +let usesCustomInserter = 1 in { + +def RAT_WRITE_CACHELESS_eg : EG_CF_RAT <0x57, 0x2, 0, (outs), + (ins R600_TReg32_X:$rw_gpr, R600_TReg32_X:$index_gpr), + "RAT_WRITE_CACHELESS_eg $rw_gpr, $index_gpr", + [(global_store (i32 R600_TReg32_X:$rw_gpr), R600_TReg32_X:$index_gpr)]> +{ + let RIM = 0; + /* XXX: Have a separate instruction for non-indexed writes. */ + let TYPE = 1; + let RW_REL = 0; + let ELEM_SIZE = 0; + + let ARRAY_SIZE = 0; + let COMP_MASK = 1; + let BURST_COUNT = 0; + let VPM = 0; + let EOP = 0; + let MARK = 0; + let BARRIER = 1; +} + +} // End usesCustomInserter = 1 + +class VTX_READ_eg <int buffer_id, list<dag> pattern> : InstR600ISA < + (outs R600_TReg32_X:$dst), + (ins MEMxi:$ptr), + "VTX_READ_eg $dst, $ptr", + pattern +>; + +def VTX_READ_PARAM_eg : VTX_READ_eg <0, + [(set (i32 R600_TReg32_X:$dst), (load_param ADDRVTX_READ:$ptr))] +>; + +def VTX_READ_GLOBAL_eg : VTX_READ_eg <1, + [(set (i32 R600_TReg32_X:$dst), (global_load ADDRVTX_READ:$ptr))] +>; + } let Predicates = [isCayman] in { |