diff options
Diffstat (limited to 'src/gallium/drivers/r600')
-rw-r--r-- | src/gallium/drivers/r600/eg_asm.c | 10 | ||||
-rw-r--r-- | src/gallium/drivers/r600/evergreen_state.c | 151 | ||||
-rw-r--r-- | src/gallium/drivers/r600/evergreend.h | 43 | ||||
-rw-r--r-- | src/gallium/drivers/r600/r600.h | 2 | ||||
-rw-r--r-- | src/gallium/drivers/r600/r600_asm.c | 133 | ||||
-rw-r--r-- | src/gallium/drivers/r600/r600_asm.h | 2 | ||||
-rw-r--r-- | src/gallium/drivers/r600/r600_opcodes.h | 42 | ||||
-rw-r--r-- | src/gallium/drivers/r600/r600_pipe.c | 2 | ||||
-rw-r--r-- | src/gallium/drivers/r600/r600_shader.c | 833 |
9 files changed, 1018 insertions, 200 deletions
diff --git a/src/gallium/drivers/r600/eg_asm.c b/src/gallium/drivers/r600/eg_asm.c index 3793b919dde..fb0b0f104bf 100644 --- a/src/gallium/drivers/r600/eg_asm.c +++ b/src/gallium/drivers/r600/eg_asm.c @@ -62,14 +62,17 @@ int eg_bc_cf_build(struct r600_bc *bc, struct r600_bc_cf *cf) S_SQ_CF_ALLOC_EXPORT_WORD0_ELEM_SIZE(cf->output.elem_size) | S_SQ_CF_ALLOC_EXPORT_WORD0_ARRAY_BASE(cf->output.array_base) | S_SQ_CF_ALLOC_EXPORT_WORD0_TYPE(cf->output.type); - bc->bytecode[id++] = S_SQ_CF_ALLOC_EXPORT_WORD1_BURST_COUNT(cf->output.burst_count - 1) | + bc->bytecode[id] = S_SQ_CF_ALLOC_EXPORT_WORD1_BURST_COUNT(cf->output.burst_count - 1) | S_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_X(cf->output.swizzle_x) | S_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_Y(cf->output.swizzle_y) | S_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_Z(cf->output.swizzle_z) | S_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_W(cf->output.swizzle_w) | S_SQ_CF_ALLOC_EXPORT_WORD1_BARRIER(cf->output.barrier) | - S_SQ_CF_ALLOC_EXPORT_WORD1_CF_INST(cf->output.inst) | - S_SQ_CF_ALLOC_EXPORT_WORD1_END_OF_PROGRAM(cf->output.end_of_program); + S_SQ_CF_ALLOC_EXPORT_WORD1_CF_INST(cf->output.inst); + if (bc->chiprev == CHIPREV_EVERGREEN) /* no EOP on cayman */ + bc->bytecode[id] |= S_SQ_CF_ALLOC_EXPORT_WORD1_END_OF_PROGRAM(cf->output.end_of_program); + id++; + break; case EG_V_SQ_CF_WORD1_SQ_CF_INST_JUMP: case EG_V_SQ_CF_WORD1_SQ_CF_INST_ELSE: @@ -80,6 +83,7 @@ int eg_bc_cf_build(struct r600_bc *bc, struct r600_bc_cf *cf) case EG_V_SQ_CF_WORD1_SQ_CF_INST_LOOP_BREAK: case EG_V_SQ_CF_WORD1_SQ_CF_INST_CALL_FS: case EG_V_SQ_CF_WORD1_SQ_CF_INST_RETURN: + case CM_V_SQ_CF_WORD1_SQ_CF_INST_END: bc->bytecode[id++] = S_SQ_CF_WORD0_ADDR(cf->cf_addr >> 1); bc->bytecode[id++] = S_SQ_CF_WORD1_CF_INST(cf->inst) | S_SQ_CF_WORD1_BARRIER(1) | diff --git a/src/gallium/drivers/r600/evergreen_state.c b/src/gallium/drivers/r600/evergreen_state.c index 187f00e1e52..dcb28065499 100644 --- a/src/gallium/drivers/r600/evergreen_state.c +++ b/src/gallium/drivers/r600/evergreen_state.c @@ -71,15 +71,19 @@ static void evergreen_set_blend_color(struct pipe_context *ctx, static void *evergreen_create_blend_state(struct pipe_context *ctx, const struct pipe_blend_state *state) { + struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; struct r600_pipe_blend *blend = CALLOC_STRUCT(r600_pipe_blend); struct r600_pipe_state *rstate; u32 color_control, target_mask; /* FIXME there is more then 8 framebuffer */ unsigned blend_cntl[8]; + enum radeon_family family; if (blend == NULL) { return NULL; } + + family = r600_get_family(rctx->radeon); rstate = &blend->rstate; rstate->id = R600_PIPE_STATE_BLEND; @@ -102,9 +106,16 @@ static void *evergreen_create_blend_state(struct pipe_context *ctx, } } blend->cb_target_mask = target_mask; + r600_pipe_state_add_reg(rstate, R_028808_CB_COLOR_CONTROL, color_control, 0xFFFFFFFD, NULL); - r600_pipe_state_add_reg(rstate, R_028C3C_PA_SC_AA_MASK, 0xFFFFFFFF, 0xFFFFFFFF, NULL); + + if (family != CHIP_CAYMAN) + r600_pipe_state_add_reg(rstate, R_028C3C_PA_SC_AA_MASK, 0xFFFFFFFF, 0xFFFFFFFF, NULL); + else { + r600_pipe_state_add_reg(rstate, CM_R_028C38_PA_SC_AA_MASK_X0Y0_X1Y0, 0xFFFFFFFF, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, CM_R_028C3C_PA_SC_AA_MASK_X0Y1_X1Y1, 0xFFFFFFFF, 0xFFFFFFFF, NULL); + } for (int i = 0; i < 8; i++) { /* state->rt entries > 0 only written if independent blending */ @@ -229,11 +240,15 @@ static void *evergreen_create_dsa_state(struct pipe_context *ctx, static void *evergreen_create_rs_state(struct pipe_context *ctx, const struct pipe_rasterizer_state *state) { + struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; struct r600_pipe_rasterizer *rs = CALLOC_STRUCT(r600_pipe_rasterizer); struct r600_pipe_state *rstate; unsigned tmp; unsigned prov_vtx = 1, polygon_dual_mode; unsigned clip_rule; + enum radeon_family family; + + family = r600_get_family(rctx->radeon); if (rs == NULL) { return NULL; @@ -290,17 +305,30 @@ static void *evergreen_create_rs_state(struct pipe_context *ctx, tmp = (unsigned)state->line_width * 8; r600_pipe_state_add_reg(rstate, R_028A08_PA_SU_LINE_CNTL, S_028A08_WIDTH(tmp), 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_028C00_PA_SC_LINE_CNTL, 0x00000400, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_028C0C_PA_CL_GB_VERT_CLIP_ADJ, 0x3F800000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_028C10_PA_CL_GB_VERT_DISC_ADJ, 0x3F800000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_028C14_PA_CL_GB_HORZ_CLIP_ADJ, 0x3F800000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_028C18_PA_CL_GB_HORZ_DISC_ADJ, 0x3F800000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_028B7C_PA_SU_POLY_OFFSET_CLAMP, 0x0, 0xFFFFFFFF, NULL); + if (family == CHIP_CAYMAN) { + r600_pipe_state_add_reg(rstate, CM_R_028BDC_PA_SC_LINE_CNTL, 0x00000400, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, CM_R_028BE4_PA_SU_VTX_CNTL, + S_028C08_PIX_CENTER_HALF(state->gl_rasterization_rules), + 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, CM_R_028BE8_PA_CL_GB_VERT_CLIP_ADJ, 0x3F800000, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, CM_R_028BEC_PA_CL_GB_VERT_DISC_ADJ, 0x3F800000, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, CM_R_028BF0_PA_CL_GB_HORZ_CLIP_ADJ, 0x3F800000, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, CM_R_028BF4_PA_CL_GB_HORZ_DISC_ADJ, 0x3F800000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_028C08_PA_SU_VTX_CNTL, - S_028C08_PIX_CENTER_HALF(state->gl_rasterization_rules), - 0xFFFFFFFF, NULL); + } else { + r600_pipe_state_add_reg(rstate, R_028C00_PA_SC_LINE_CNTL, 0x00000400, 0xFFFFFFFF, NULL); + + r600_pipe_state_add_reg(rstate, R_028C0C_PA_CL_GB_VERT_CLIP_ADJ, 0x3F800000, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028C10_PA_CL_GB_VERT_DISC_ADJ, 0x3F800000, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028C14_PA_CL_GB_HORZ_CLIP_ADJ, 0x3F800000, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028C18_PA_CL_GB_HORZ_DISC_ADJ, 0x3F800000, 0xFFFFFFFF, NULL); + + r600_pipe_state_add_reg(rstate, R_028C08_PA_SU_VTX_CNTL, + S_028C08_PIX_CENTER_HALF(state->gl_rasterization_rules), + 0xFFFFFFFF, NULL); + } + r600_pipe_state_add_reg(rstate, R_028B7C_PA_SU_POLY_OFFSET_CLAMP, 0x0, 0xFFFFFFFF, NULL); r600_pipe_state_add_reg(rstate, R_02820C_PA_SC_CLIPRECT_RULE, clip_rule, 0xFFFFFFFF, NULL); return rstate; } @@ -832,10 +860,13 @@ static void evergreen_set_framebuffer_state(struct pipe_context *ctx, struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; struct r600_pipe_state *rstate = CALLOC_STRUCT(r600_pipe_state); u32 shader_mask, tl, br, target_mask; + enum radeon_family family; if (rstate == NULL) return; + family = r600_get_family(rctx->radeon); + evergreen_context_flush_dest_caches(&rctx->ctx); rctx->ctx.num_dest_buffers = state->nr_cbufs; @@ -898,10 +929,17 @@ static void evergreen_set_framebuffer_state(struct pipe_context *ctx, 0x00000000, target_mask, NULL); r600_pipe_state_add_reg(rstate, R_02823C_CB_SHADER_MASK, shader_mask, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_028C04_PA_SC_AA_CONFIG, - 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_028C1C_PA_SC_AA_SAMPLE_LOCS_MCTX, - 0x00000000, 0xFFFFFFFF, NULL); + + + if (family == CHIP_CAYMAN) { + r600_pipe_state_add_reg(rstate, CM_R_028BE0_PA_SC_AA_CONFIG, + 0x00000000, 0xFFFFFFFF, NULL); + } else { + r600_pipe_state_add_reg(rstate, R_028C04_PA_SC_AA_CONFIG, + 0x00000000, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028C1C_PA_SC_AA_SAMPLE_LOCS_MCTX, + 0x00000000, 0xFFFFFFFF, NULL); + } free(rctx->states[R600_PIPE_STATE_FRAMEBUFFER]); rctx->states[R600_PIPE_STATE_FRAMEBUFFER] = rstate; @@ -968,6 +1006,85 @@ void evergreen_init_state_functions(struct r600_pipe_context *rctx) rctx->context.texture_barrier = evergreen_texture_barrier; } +static void cayman_init_config(struct r600_pipe_context *rctx) +{ + struct r600_pipe_state *rstate = &rctx->config; + unsigned tmp; + + tmp = 0x00000000; + tmp |= S_008C00_EXPORT_SRC_C(1); + r600_pipe_state_add_reg(rstate, R_008C00_SQ_CONFIG, tmp, 0xFFFFFFFF, NULL); + + r600_pipe_state_add_reg(rstate, CM_R_008C10_SQ_GLOBAL_GPR_RESOURCE_MGMT_1, (4 << 28), 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_008D8C_SQ_DYN_GPR_CNTL_PS_FLUSH_REQ, (1 << 8), 0xFFFFFFFF, NULL); + + r600_pipe_state_add_reg(rstate, R_028A48_PA_SC_MODE_CNTL_0, 0x0, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028A4C_PA_SC_MODE_CNTL_1, 0x0, 0xFFFFFFFF, NULL); + + r600_pipe_state_add_reg(rstate, R_028A10_VGT_OUTPUT_PATH_CNTL, 0x0, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028A14_VGT_HOS_CNTL, 0x0, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028A18_VGT_HOS_MAX_TESS_LEVEL, 0x0, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028A1C_VGT_HOS_MIN_TESS_LEVEL, 0x0, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028A20_VGT_HOS_REUSE_DEPTH, 0x0, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028A24_VGT_GROUP_PRIM_TYPE, 0x0, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028A28_VGT_GROUP_FIRST_DECR, 0x0, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028A2C_VGT_GROUP_DECR, 0x0, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028A30_VGT_GROUP_VECT_0_CNTL, 0x0, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028A34_VGT_GROUP_VECT_1_CNTL, 0x0, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028A38_VGT_GROUP_VECT_0_FMT_CNTL, 0x0, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028A3C_VGT_GROUP_VECT_1_FMT_CNTL, 0x0, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028A40_VGT_GS_MODE, 0x0, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028B94_VGT_STRMOUT_CONFIG, 0x0, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028B98_VGT_STRMOUT_BUFFER_CONFIG, 0x0, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028AB4_VGT_REUSE_OFF, 0x00000000, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028AB8_VGT_VTX_CNT_EN, 0x0, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_008A14_PA_CL_ENHANCE, (3 << 1) | 1, 0xFFFFFFFF, NULL); + + r600_pipe_state_add_reg(rstate, R_028380_SQ_VTX_SEMANTIC_0, 0x0, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028384_SQ_VTX_SEMANTIC_1, 0x0, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028388_SQ_VTX_SEMANTIC_2, 0x0, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_02838C_SQ_VTX_SEMANTIC_3, 0x0, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028390_SQ_VTX_SEMANTIC_4, 0x0, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028394_SQ_VTX_SEMANTIC_5, 0x0, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028398_SQ_VTX_SEMANTIC_6, 0x0, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_02839C_SQ_VTX_SEMANTIC_7, 0x0, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_0283A0_SQ_VTX_SEMANTIC_8, 0x0, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_0283A4_SQ_VTX_SEMANTIC_9, 0x0, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_0283A8_SQ_VTX_SEMANTIC_10, 0x0, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_0283AC_SQ_VTX_SEMANTIC_11, 0x0, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_0283B0_SQ_VTX_SEMANTIC_12, 0x0, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_0283B4_SQ_VTX_SEMANTIC_13, 0x0, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_0283B8_SQ_VTX_SEMANTIC_14, 0x0, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_0283BC_SQ_VTX_SEMANTIC_15, 0x0, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_0283C0_SQ_VTX_SEMANTIC_16, 0x0, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_0283C4_SQ_VTX_SEMANTIC_17, 0x0, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_0283C8_SQ_VTX_SEMANTIC_18, 0x0, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_0283CC_SQ_VTX_SEMANTIC_19, 0x0, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_0283D0_SQ_VTX_SEMANTIC_20, 0x0, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_0283D4_SQ_VTX_SEMANTIC_21, 0x0, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_0283D8_SQ_VTX_SEMANTIC_22, 0x0, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_0283DC_SQ_VTX_SEMANTIC_23, 0x0, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_0283E0_SQ_VTX_SEMANTIC_24, 0x0, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_0283E4_SQ_VTX_SEMANTIC_25, 0x0, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_0283E8_SQ_VTX_SEMANTIC_26, 0x0, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_0283EC_SQ_VTX_SEMANTIC_27, 0x0, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_0283F0_SQ_VTX_SEMANTIC_28, 0x0, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_0283F4_SQ_VTX_SEMANTIC_29, 0x0, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_0283F8_SQ_VTX_SEMANTIC_30, 0x0, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_0283FC_SQ_VTX_SEMANTIC_31, 0x0, 0xFFFFFFFF, NULL); + + r600_pipe_state_add_reg(rstate, R_028810_PA_CL_CLIP_CNTL, 0x0, 0xFFFFFFFF, NULL); + + r600_pipe_state_add_reg(rstate, CM_R_028BD4_PA_SC_CENTROID_PRIORITY_0, 0x76543210, 0xffffffff, 0); + r600_pipe_state_add_reg(rstate, CM_R_028BD8_PA_SC_CENTROID_PRIORITY_1, 0xfedcba98, 0xffffffff, 0); + + r600_pipe_state_add_reg(rstate, CM_R_0288E8_SQ_LDS_ALLOC, 0, 0xffffffff, NULL); + r600_pipe_state_add_reg(rstate, R_0288EC_SQ_LDS_ALLOC_PS, 0, 0xffffffff, NULL); + + r600_pipe_state_add_reg(rstate, CM_R_028804_DB_EQAA, 0x110000, 0xffffffff, NULL); + r600_context_pipe_state_set(&rctx->ctx, rstate); +} + void evergreen_init_config(struct r600_pipe_context *rctx) { struct r600_pipe_state *rstate = &rctx->config; @@ -999,6 +1116,12 @@ void evergreen_init_config(struct r600_pipe_context *rctx) unsigned tmp; family = r600_get_family(rctx->radeon); + + if (family == CHIP_CAYMAN) { + cayman_init_config(rctx); + return; + } + ps_prio = 0; vs_prio = 1; gs_prio = 2; diff --git a/src/gallium/drivers/r600/evergreend.h b/src/gallium/drivers/r600/evergreend.h index 3e878106bea..ee0c7c9ed9b 100644 --- a/src/gallium/drivers/r600/evergreend.h +++ b/src/gallium/drivers/r600/evergreend.h @@ -33,15 +33,19 @@ #define EVERGREEN_CONTEXT_REG_END 0X00029000 #define EVERGREEN_RESOURCE_OFFSET 0x00030000 #define EVERGREEN_RESOURCE_END 0x00034000 +#define CAYMAN_RESOURCE_END 0x00038000 #define EVERGREEN_LOOP_CONST_OFFSET 0x0003A200 #define EVERGREEN_LOOP_CONST_END 0x0003A26C #define EVERGREEN_BOOL_CONST_OFFSET 0x0003A500 #define EVERGREEN_BOOL_CONST_END 0x0003A506 +#define CAYMAN_BOOL_CONST_END 0x0003A518 #define EVERGREEN_SAMPLER_OFFSET 0X0003C000 #define EVERGREEN_SAMPLER_END 0X0003CFF0 +#define CAYMAN_SAMPLER_END 0X0003C600 #define EVERGREEN_CTL_CONST_OFFSET 0x0003CFF0 #define EVERGREEN_CTL_CONST_END 0x0003E200 +#define CAYMAN_CTL_CONST_END 0x0003FF0C #define EVENT_TYPE_PS_PARTIAL_FLUSH 0x10 #define EVENT_TYPE_ZPASS_DONE 0x15 @@ -1907,4 +1911,43 @@ #define ENDIAN_8IN32 2 #define ENDIAN_8IN64 3 +#define CM_R_0288E8_SQ_LDS_ALLOC 0x000288E8 + +#define CM_R_028804_DB_EQAA 0x00028804 + +#define CM_R_028BD4_PA_SC_CENTROID_PRIORITY_0 0x00028BD4 +#define CM_R_028BD8_PA_SC_CENTROID_PRIORITY_1 0x00028BD8 +#define CM_R_028BDC_PA_SC_LINE_CNTL 0x28bdc +#define CM_R_028BE0_PA_SC_AA_CONFIG 0x28be0 +#define CM_R_028BE4_PA_SU_VTX_CNTL 0x28be4 +#define CM_R_028BE8_PA_CL_GB_VERT_CLIP_ADJ 0x28be8 +#define CM_R_028BEC_PA_CL_GB_VERT_DISC_ADJ 0x28bec +#define CM_R_028BF0_PA_CL_GB_HORZ_CLIP_ADJ 0x28bf0 +#define CM_R_028BF4_PA_CL_GB_HORZ_DISC_ADJ 0x28bf4 + +#define CM_R_028BF8_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0 0x28bf8 +#define CM_R_028BFC_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_1 0x28bfc +#define CM_R_028C00_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_2 0x28c00 +#define CM_R_028C04_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_3 0x28c04 + +#define CM_R_028C08_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_0 0x28c08 +#define CM_R_028C0C_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_1 0x28c0c +#define CM_R_028C10_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_2 0x28c10 +#define CM_R_028C14_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_3 0x28c14 + +#define CM_R_028C18_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_0 0x28c18 +#define CM_R_028C1C_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_1 0x28c1c +#define CM_R_028C20_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_2 0x28c20 +#define CM_R_028C24_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_3 0x28c24 + +#define CM_R_028C28_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_0 0x28c28 +#define CM_R_028C2C_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_1 0x28c2c +#define CM_R_028C30_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_2 0x28c30 +#define CM_R_028C34_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_3 0x28c34 + +#define CM_R_028C38_PA_SC_AA_MASK_X0Y0_X1Y0 0x28c38 +#define CM_R_028C3C_PA_SC_AA_MASK_X0Y1_X1Y1 0x28c3c + +#define CM_R_008C10_SQ_GLOBAL_GPR_RESOURCE_MGMT_1 0x00008C10 +#define CM_R_008C14_SQ_GLOBAL_GPR_RESOURCE_MGMT_2 0x00008C14 #endif diff --git a/src/gallium/drivers/r600/r600.h b/src/gallium/drivers/r600/r600.h index 7b57fc80dc2..996418aa03a 100644 --- a/src/gallium/drivers/r600/r600.h +++ b/src/gallium/drivers/r600/r600.h @@ -95,6 +95,7 @@ enum radeon_family { CHIP_BARTS, CHIP_TURKS, CHIP_CAICOS, + CHIP_CAYMAN, CHIP_LAST, }; @@ -102,6 +103,7 @@ enum chip_class { R600, R700, EVERGREEN, + CAYMAN, }; struct r600_tiling_info { diff --git a/src/gallium/drivers/r600/r600_asm.c b/src/gallium/drivers/r600/r600_asm.c index 033e84665f5..00572cbd5bd 100644 --- a/src/gallium/drivers/r600/r600_asm.c +++ b/src/gallium/drivers/r600/r600_asm.c @@ -94,6 +94,7 @@ static inline unsigned int r600_bc_get_num_operands(struct r600_bc *bc, struct r } break; case CHIPREV_EVERGREEN: + case CHIPREV_CAYMAN: switch (alu->inst) { case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP: return 0; @@ -226,6 +227,9 @@ int r600_bc_init(struct r600_bc *bc, enum radeon_family family) case CHIP_CAICOS: bc->chiprev = CHIPREV_EVERGREEN; break; + case CHIP_CAYMAN: + bc->chiprev = CHIPREV_CAYMAN; + break; default: R600_ERR("unknown family %d\n", bc->family); return -EINVAL; @@ -334,6 +338,7 @@ static int is_alu_once_inst(struct r600_bc *bc, struct r600_bc_alu *alu) alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETLT_PUSH_INT || alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETLE_PUSH_INT); case CHIPREV_EVERGREEN: + case CHIPREV_CAYMAN: default: return !alu->is_op3 && ( alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLE || @@ -384,6 +389,7 @@ static int is_alu_reduction_inst(struct r600_bc *bc, struct r600_bc_alu *alu) alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4_IEEE || alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX4); case CHIPREV_EVERGREEN: + case CHIPREV_CAYMAN: default: return !alu->is_op3 && ( alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_CUBE || @@ -401,6 +407,7 @@ static int is_alu_cube_inst(struct r600_bc *bc, struct r600_bc_alu *alu) return !alu->is_op3 && alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_CUBE; case CHIPREV_EVERGREEN: + case CHIPREV_CAYMAN: default: return !alu->is_op3 && alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_CUBE; @@ -417,6 +424,7 @@ static int is_alu_mova_inst(struct r600_bc *bc, struct r600_bc_alu *alu) alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_FLOOR || alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_INT); case CHIPREV_EVERGREEN: + case CHIPREV_CAYMAN: default: return !alu->is_op3 && ( alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_INT); @@ -469,6 +477,7 @@ static int is_alu_trans_unit_inst(struct r600_bc *bc, struct r600_bc_alu *alu) alu->inst == V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MUL_LIT_M2 || alu->inst == V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MUL_LIT_M4; case CHIPREV_EVERGREEN: + case CHIPREV_CAYMAN: default: if (!alu->is_op3) /* Note that FLT_TO_INT_* instructions are vector-only instructions @@ -514,13 +523,16 @@ static int assign_alu_units(struct r600_bc *bc, struct r600_bc_alu *alu_first, { struct r600_bc_alu *alu; unsigned i, chan, trans; + int max_slots = bc->chiprev == CHIPREV_CAYMAN ? 4 : 5; - for (i = 0; i < 5; i++) + for (i = 0; i < max_slots; i++) assignment[i] = NULL; for (alu = alu_first; alu; alu = LIST_ENTRY(struct r600_bc_alu, alu->list.next, list)) { chan = alu->dst.chan; - if (is_alu_trans_unit_inst(bc, alu)) + if (max_slots == 4) + trans = 0; + else if (is_alu_trans_unit_inst(bc, alu)) trans = 1; else if (is_alu_vec_unit_inst(bc, alu)) trans = 0; @@ -719,8 +731,10 @@ static int check_and_set_bank_swizzle(struct r600_bc *bc, struct alu_bank_swizzle bs; int bank_swizzle[5]; int i, r = 0, forced = 0; - boolean scalar_only = true; - for (i = 0; i < 5; i++) { + boolean scalar_only = bc->chiprev == CHIPREV_CAYMAN ? false : true; + int max_slots = bc->chiprev == CHIPREV_CAYMAN ? 4 : 5; + + for (i = 0; i < max_slots; i++) { if (slots[i] && slots[i]->bank_swizzle_force) { slots[i]->bank_swizzle = slots[i]->bank_swizzle_force; forced = 1; @@ -737,6 +751,13 @@ static int check_and_set_bank_swizzle(struct r600_bc *bc, bank_swizzle[i] = SQ_ALU_VEC_012; bank_swizzle[4] = SQ_ALU_SCL_210; while(bank_swizzle[4] <= SQ_ALU_SCL_221) { + + if (max_slots == 4) { + for (i = 0; i < max_slots; i++) { + if (bank_swizzle[i] == SQ_ALU_VEC_210) + return -1; + } + } init_bank_swizzle(&bs); if (scalar_only == false) { for (i = 0; i < 4; i++) { @@ -749,11 +770,11 @@ static int check_and_set_bank_swizzle(struct r600_bc *bc, } else r = 0; - if (!r && slots[4]) { + if (!r && slots[4] && max_slots == 5) { r = check_scalar(bc, slots[4], &bs, bank_swizzle[4]); } if (!r) { - for (i = 0; i < 5; i++) { + for (i = 0; i < max_slots; i++) { if (slots[i]) slots[i]->bank_swizzle = bank_swizzle[i]; } @@ -763,7 +784,7 @@ static int check_and_set_bank_swizzle(struct r600_bc *bc, if (scalar_only) { bank_swizzle[4]++; } else { - for (i = 0; i < 5; i++) { + for (i = 0; i < max_slots; i++) { bank_swizzle[i]++; if (bank_swizzle[i] <= SQ_ALU_VEC_210) break; @@ -783,12 +804,13 @@ static int replace_gpr_with_pv_ps(struct r600_bc *bc, struct r600_bc_alu *prev[5]; int gpr[5], chan[5]; int i, j, r, src, num_src; + int max_slots = bc->chiprev == CHIPREV_CAYMAN ? 4 : 5; r = assign_alu_units(bc, alu_prev, prev); if (r) return r; - for (i = 0; i < 5; ++i) { + for (i = 0; i < max_slots; ++i) { if(prev[i] && prev[i]->dst.write && !prev[i]->dst.rel) { gpr[i] = prev[i]->dst.sel; /* cube writes more than PV.X */ @@ -800,7 +822,7 @@ static int replace_gpr_with_pv_ps(struct r600_bc *bc, gpr[i] = -1; } - for (i = 0; i < 5; ++i) { + for (i = 0; i < max_slots; ++i) { struct r600_bc_alu *alu = slots[i]; if(!alu) continue; @@ -810,11 +832,13 @@ static int replace_gpr_with_pv_ps(struct r600_bc *bc, if (!is_gpr(alu->src[src].sel) || alu->src[src].rel) continue; - if (alu->src[src].sel == gpr[4] && - alu->src[src].chan == chan[4]) { - alu->src[src].sel = V_SQ_ALU_SRC_PS; - alu->src[src].chan = 0; - continue; + if (bc->chiprev < CHIPREV_CAYMAN) { + if (alu->src[src].sel == gpr[4] && + alu->src[src].chan == chan[4]) { + alu->src[src].sel = V_SQ_ALU_SRC_PS; + alu->src[src].chan = 0; + continue; + } } for (j = 0; j < 4; ++j) { @@ -922,12 +946,13 @@ static int merge_inst_groups(struct r600_bc *bc, struct r600_bc_alu *slots[5], int i, j, r, src, num_src; int num_once_inst = 0; int have_mova = 0, have_rel = 0; + int max_slots = bc->chiprev == CHIPREV_CAYMAN ? 4 : 5; r = assign_alu_units(bc, alu_prev, prev); if (r) return r; - for (i = 0; i < 5; ++i) { + for (i = 0; i < max_slots; ++i) { struct r600_bc_alu *alu; /* check number of literals */ @@ -951,7 +976,7 @@ static int merge_inst_groups(struct r600_bc *bc, struct r600_bc_alu *slots[5], result[i] = prev[i]; continue; } else if (prev[i] && slots[i]) { - if (result[4] == NULL && prev[4] == NULL && slots[4] == NULL) { + if (max_slots == 5 && result[4] == NULL && prev[4] == NULL && slots[4] == NULL) { /* Trans unit is still free try to use it. */ if (is_alu_any_unit_inst(bc, slots[i])) { result[i] = prev[i]; @@ -991,7 +1016,7 @@ static int merge_inst_groups(struct r600_bc *bc, struct r600_bc_alu *slots[5], if (!is_gpr(alu->src[src].sel)) continue; - for (j = 0; j < 5; ++j) { + for (j = 0; j < max_slots; ++j) { if (!prev[j] || !prev[j]->dst.write) continue; @@ -1019,7 +1044,7 @@ static int merge_inst_groups(struct r600_bc *bc, struct r600_bc_alu *slots[5], bc->cf_last->ndw -= align(prev_nliteral, 2); /* sort instructions */ - for (i = 0; i < 5; ++i) { + for (i = 0; i < max_slots; ++i) { slots[i] = result[i]; if (result[i]) { LIST_DEL(&result[i]->list); @@ -1032,7 +1057,7 @@ static int merge_inst_groups(struct r600_bc *bc, struct r600_bc_alu *slots[5], LIST_ENTRY(struct r600_bc_alu, bc->cf_last->alu.prev, list)->last = 1; /* determine new first instruction */ - for (i = 0; i < 5; ++i) { + for (i = 0; i < max_slots; ++i) { if (result[i]) { bc->cf_last->curr_bs_head = result[i]; break; @@ -1225,6 +1250,7 @@ int r600_bc_add_alu_type(struct r600_bc *bc, const struct r600_bc_alu *alu, int uint32_t literal[4]; unsigned nliteral; struct r600_bc_alu *slots[5]; + int max_slots = bc->chiprev == CHIPREV_CAYMAN ? 4 : 5; r = assign_alu_units(bc, bc->cf_last->curr_bs_head, slots); if (r) return r; @@ -1245,7 +1271,7 @@ int r600_bc_add_alu_type(struct r600_bc *bc, const struct r600_bc_alu *alu, int if (r) return r; - for (i = 0, nliteral = 0; i < 5; i++) { + for (i = 0, nliteral = 0; i < max_slots; i++) { if (slots[i]) { r = r600_bc_alu_nliterals(bc, slots[i], literal, &nliteral); if (r) @@ -1282,6 +1308,7 @@ static unsigned r600_bc_num_tex_and_vtx_instructions(const struct r600_bc *bc) return 16; case CHIPREV_EVERGREEN: + case CHIPREV_CAYMAN: return 64; default: @@ -1290,6 +1317,19 @@ static unsigned r600_bc_num_tex_and_vtx_instructions(const struct r600_bc *bc) } } +static inline boolean last_inst_was_vtx_fetch(struct r600_bc *bc) +{ + if (bc->chiprev == CHIPREV_CAYMAN) { + if (bc->cf_last->inst != CM_V_SQ_CF_WORD1_SQ_CF_INST_TC) + return TRUE; + } else { + if (bc->cf_last->inst != V_SQ_CF_WORD1_SQ_CF_INST_VTX && + bc->cf_last->inst != V_SQ_CF_WORD1_SQ_CF_INST_VTX_TC) + return TRUE; + } + return FALSE; +} + int r600_bc_add_vtx(struct r600_bc *bc, const struct r600_bc_vtx *vtx) { struct r600_bc_vtx *nvtx = r600_bc_vtx(); @@ -1301,15 +1341,17 @@ int r600_bc_add_vtx(struct r600_bc *bc, const struct r600_bc_vtx *vtx) /* cf can contains only alu or only vtx or only tex */ if (bc->cf_last == NULL || - (bc->cf_last->inst != V_SQ_CF_WORD1_SQ_CF_INST_VTX && - bc->cf_last->inst != V_SQ_CF_WORD1_SQ_CF_INST_VTX_TC) || - bc->force_add_cf) { + last_inst_was_vtx_fetch(bc) || + bc->force_add_cf) { r = r600_bc_add_cf(bc); if (r) { free(nvtx); return r; } - bc->cf_last->inst = V_SQ_CF_WORD1_SQ_CF_INST_VTX; + if (bc->chiprev == CHIPREV_CAYMAN) + bc->cf_last->inst = CM_V_SQ_CF_WORD1_SQ_CF_INST_TC; + else + bc->cf_last->inst = V_SQ_CF_WORD1_SQ_CF_INST_VTX; } LIST_ADDTAIL(&nvtx->list, &bc->cf_last->vtx); /* each fetch use 4 dwords */ @@ -1379,14 +1421,21 @@ int r600_bc_add_cfinst(struct r600_bc *bc, int inst) return 0; } +int cm_bc_add_cf_end(struct r600_bc *bc) +{ + return r600_bc_add_cfinst(bc, CM_V_SQ_CF_WORD1_SQ_CF_INST_END); +} + /* common to all 3 families */ static int r600_bc_vtx_build(struct r600_bc *bc, struct r600_bc_vtx *vtx, unsigned id) { - bc->bytecode[id++] = S_SQ_VTX_WORD0_BUFFER_ID(vtx->buffer_id) | + bc->bytecode[id] = S_SQ_VTX_WORD0_BUFFER_ID(vtx->buffer_id) | S_SQ_VTX_WORD0_FETCH_TYPE(vtx->fetch_type) | S_SQ_VTX_WORD0_SRC_GPR(vtx->src_gpr) | - S_SQ_VTX_WORD0_SRC_SEL_X(vtx->src_sel_x) | - S_SQ_VTX_WORD0_MEGA_FETCH_COUNT(vtx->mega_fetch_count); + S_SQ_VTX_WORD0_SRC_SEL_X(vtx->src_sel_x); + if (bc->chiprev < CHIPREV_CAYMAN) + bc->bytecode[id] |= S_SQ_VTX_WORD0_MEGA_FETCH_COUNT(vtx->mega_fetch_count); + id++; bc->bytecode[id++] = S_SQ_VTX_WORD1_DST_SEL_X(vtx->dst_sel_x) | S_SQ_VTX_WORD1_DST_SEL_Y(vtx->dst_sel_y) | S_SQ_VTX_WORD1_DST_SEL_Z(vtx->dst_sel_z) | @@ -1397,9 +1446,11 @@ static int r600_bc_vtx_build(struct r600_bc *bc, struct r600_bc_vtx *vtx, unsign S_SQ_VTX_WORD1_FORMAT_COMP_ALL(vtx->format_comp_all) | S_SQ_VTX_WORD1_SRF_MODE_ALL(vtx->srf_mode_all) | S_SQ_VTX_WORD1_GPR_DST_GPR(vtx->dst_gpr); - bc->bytecode[id++] = S_SQ_VTX_WORD2_OFFSET(vtx->offset) | - S_SQ_VTX_WORD2_ENDIAN_SWAP(vtx->endian) | - S_SQ_VTX_WORD2_MEGA_FETCH(1); + bc->bytecode[id] = S_SQ_VTX_WORD2_OFFSET(vtx->offset)| + S_SQ_VTX_WORD2_ENDIAN_SWAP(vtx->endian); + if (bc->chiprev < CHIPREV_CAYMAN) + bc->bytecode[id] |= S_SQ_VTX_WORD2_MEGA_FETCH(1); + id++; bc->bytecode[id++] = 0; return 0; } @@ -1601,6 +1652,7 @@ int r600_bc_build(struct r600_bc *bc) case V_SQ_CF_WORD1_SQ_CF_INST_LOOP_BREAK: case V_SQ_CF_WORD1_SQ_CF_INST_CALL_FS: case V_SQ_CF_WORD1_SQ_CF_INST_RETURN: + case CM_V_SQ_CF_WORD1_SQ_CF_INST_END: break; default: R600_ERR("unsupported CF instruction (0x%X)\n", cf->inst); @@ -1616,7 +1668,7 @@ int r600_bc_build(struct r600_bc *bc) return -ENOMEM; LIST_FOR_EACH_ENTRY(cf, &bc->cf, list) { addr = cf->addr; - if (bc->chiprev == CHIPREV_EVERGREEN) + if (bc->chiprev >= CHIPREV_EVERGREEN) r = eg_bc_cf_build(bc, cf); else r = r600_bc_cf_build(bc, cf); @@ -1640,6 +1692,7 @@ int r600_bc_build(struct r600_bc *bc) break; case CHIPREV_R700: case CHIPREV_EVERGREEN: /* eg alu is same encoding as r700 */ + case CHIPREV_CAYMAN: /* eg alu is same encoding as r700 */ r = r700_bc_alu_build(bc, alu, addr); break; default: @@ -1668,6 +1721,14 @@ int r600_bc_build(struct r600_bc *bc) } break; case V_SQ_CF_WORD1_SQ_CF_INST_TEX: + if (bc->chiprev == CHIPREV_CAYMAN) { + LIST_FOR_EACH_ENTRY(vtx, &cf->vtx, list) { + r = r600_bc_vtx_build(bc, vtx, addr); + if (r) + return r; + addr += 4; + } + } LIST_FOR_EACH_ENTRY(tex, &cf->tex, list) { r = r600_bc_tex_build(bc, tex, addr); if (r) @@ -1688,6 +1749,7 @@ int r600_bc_build(struct r600_bc *bc) case V_SQ_CF_WORD1_SQ_CF_INST_POP: case V_SQ_CF_WORD1_SQ_CF_INST_CALL_FS: case V_SQ_CF_WORD1_SQ_CF_INST_RETURN: + case CM_V_SQ_CF_WORD1_SQ_CF_INST_END: break; default: R600_ERR("unsupported CF instruction (0x%X)\n", cf->inst); @@ -1752,6 +1814,9 @@ void r600_bc_dump(struct r600_bc *bc) case 2: chip = 'E'; break; + case 3: + chip = 'C'; + break; case 0: default: chip = '6'; @@ -1818,6 +1883,7 @@ void r600_bc_dump(struct r600_bc *bc) case V_SQ_CF_WORD1_SQ_CF_INST_LOOP_BREAK: case V_SQ_CF_WORD1_SQ_CF_INST_CALL_FS: case V_SQ_CF_WORD1_SQ_CF_INST_RETURN: + case CM_V_SQ_CF_WORD1_SQ_CF_INST_END: fprintf(stderr, "%04d %08X CF ", id, bc->bytecode[id]); fprintf(stderr, "ADDR:%d\n", cf->cf_addr); id++; @@ -1920,7 +1986,10 @@ void r600_bc_dump(struct r600_bc *bc) fprintf(stderr, "%04d %08X ", id, bc->bytecode[id]); fprintf(stderr, "SRC(GPR:%d ", vtx->src_gpr); fprintf(stderr, "SEL_X:%d) ", vtx->src_sel_x); - fprintf(stderr, "MEGA_FETCH_COUNT:%d ", vtx->mega_fetch_count); + if (bc->chiprev < CHIPREV_CAYMAN) + fprintf(stderr, "MEGA_FETCH_COUNT:%d ", vtx->mega_fetch_count); + else + fprintf(stderr, "SEL_Y:%d) ", 0); fprintf(stderr, "DST(GPR:%d ", vtx->dst_gpr); fprintf(stderr, "SEL_X:%d ", vtx->dst_sel_x); fprintf(stderr, "SEL_Y:%d ", vtx->dst_sel_y); diff --git a/src/gallium/drivers/r600/r600_asm.h b/src/gallium/drivers/r600/r600_asm.h index 26d337fe125..540f45bbd06 100644 --- a/src/gallium/drivers/r600/r600_asm.h +++ b/src/gallium/drivers/r600/r600_asm.h @@ -205,6 +205,8 @@ int r600_bc_add_alu_type(struct r600_bc *bc, const struct r600_bc_alu *alu, int void r600_bc_special_constants(u32 value, unsigned *sel, unsigned *neg); void r600_bc_dump(struct r600_bc *bc); +int cm_bc_add_cf_end(struct r600_bc *bc); + int r600_vertex_elements_build_fetch_shader(struct r600_pipe_context *rctx, struct r600_vertex_element *ve); /* r700_asm.c */ diff --git a/src/gallium/drivers/r600/r600_opcodes.h b/src/gallium/drivers/r600/r600_opcodes.h index a85d0bbf1e1..b19cc2b078e 100644 --- a/src/gallium/drivers/r600/r600_opcodes.h +++ b/src/gallium/drivers/r600/r600_opcodes.h @@ -171,9 +171,12 @@ #define V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT 0x00000027 #define V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT_DONE 0x00000028 +/* cayman doesn't have VTX */ #define EG_V_SQ_CF_WORD1_SQ_CF_INST_NOP 0x00000000 #define EG_V_SQ_CF_WORD1_SQ_CF_INST_TEX 0x00000001 +#define CM_V_SQ_CF_WORD1_SQ_CF_INST_TC 0x00000001 #define EG_V_SQ_CF_WORD1_SQ_CF_INST_VTX 0x00000002 +#define CM_V_SQ_CF_WORD1_SQ_CF_INST_RSVD_2 0x00000002 #define EG_V_SQ_CF_WORD1_SQ_CF_INST_GDS 0x00000003 #define EG_V_SQ_CF_WORD1_SQ_CF_INST_LOOP_START 0x00000004 #define EG_V_SQ_CF_WORD1_SQ_CF_INST_LOOP_END 0x00000005 @@ -200,19 +203,37 @@ #define EG_V_SQ_CF_WORD1_SQ_CF_INST_WAIT_ACK 0x0000001a #define EG_V_SQ_CF_WORD1_SQ_CF_INST_TC_ACK 0x0000001b #define EG_V_SQ_CF_WORD1_SQ_CF_INST_VC_ACK 0x0000001c +#define CM_V_SQ_CF_WORD1_SQ_CF_INST_RSVD_28 0x0000001c #define EG_V_SQ_CF_WORD1_SQ_CF_INST_JUMPTABLE 0x0000001d #define EG_V_SQ_CF_WORD1_SQ_CF_INST_GLOBAL_WAVE_SYNC 0x0000001e #define EG_V_SQ_CF_WORD1_SQ_CF_INST_HALT 0x0000001f +#define EG_V_SQ_CF_WORD1_SQ_CF_INST_HALT 0x0000001f + +/* cayman extras */ +#define CM_V_SQ_CF_WORD1_SQ_CF_INST_END 0x00000020 +#define CM_V_SQ_CF_WORD1_SQ_CF_INST_LDS_DEALLOC 0x00000021 +#define CM_V_SQ_CF_WORD1_SQ_CF_INST_PUSH_WQM 0x00000022 +#define CM_V_SQ_CF_WORD1_SQ_CF_INST_POP_WQM 0x00000023 +#define CM_V_SQ_CF_WORD1_SQ_CF_INST_ELSE_WQM 0x00000024 +#define CM_V_SQ_CF_WORD1_SQ_CF_INST_JUMP_ANY 0x00000025 +#define CM_V_SQ_CF_WORD1_SQ_CF_INST_REACTIVATE 0x00000026 +#define CM_V_SQ_CF_WORD1_SQ_CF_INST_REACTIVATE_WQM 0x00000027 +#define CM_V_SQ_CF_WORD1_SQ_CF_INST_INTERRUPT 0x00000028 +#define CM_V_SQ_CF_WORD1_SQ_CF_INST_INTERRUPT_AND_SLEEP 0x00000029 +#define CM_V_SQ_CF_WORD1_SQ_CF_INST_SET_PRIORITY 0x00000030 #define EG_V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU 0x00000008 #define EG_V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_PUSH_BEFORE 0x00000009 #define EG_V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP_AFTER 0x0000000A #define EG_V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP2_AFTER 0x0000000B #define EG_V_SQ_CF_ALU_WORD1_SQ_CF_INST_EXTENDED 0x0000000C -#define EG_V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_CONTINUE 0x0000000D -#define EG_V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_BREAK 0x0000000E +#define EG_V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_CONTINUE 0x0000000D /* different on CAYMAN */ +#define EG_V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_BREAK 0x0000000E /* different on CAYMAN */ #define EG_V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_ELSE_AFTER 0x0000000F +#define CM_V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_REACTIVATE_BEFORE 0x0000000D +#define CM_V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_VALID_PIXEL_MODE 0x0000000E + #define EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD 0x00000000 #define EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL 0x00000001 #define EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL_IEEE 0x00000002 @@ -299,11 +320,11 @@ #define EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADDC_UINT 0x00000052 #define EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SUBB_UINT 0x00000053 #define EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_GROUP_BARRIER 0x00000054 -#define EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_GROUP_SEQ_BEGIN 0x00000055 -#define EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_GROUP_SEQ_END 0x00000056 +#define EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_GROUP_SEQ_BEGIN 0x00000055 /* not on CAYMAN */ +#define EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_GROUP_SEQ_END 0x00000056 /* not on CAYMAN */ #define EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SET_MODE 0x00000057 -#define EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SET_CF_IDX0 0x00000058 -#define EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SET_CF_IDX1 0x00000059 +#define EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SET_CF_IDX0 0x00000058 /* not on CAYMAN */ +#define EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SET_CF_IDX1 0x00000059 /* not on CAYMAN */ #define EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SET_LDS_SIZE 0x0000005A #define EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE 0x00000081 @@ -322,8 +343,8 @@ #define EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULHI_INT 0x00000090 #define EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULLO_UINT 0x00000091 #define EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULHI_UINT 0x00000092 -#define EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_INT 0x00000093 -#define EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_UINT 0x00000094 +#define EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_INT 0x00000093 /* not on CAYMAN */ +#define EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_UINT 0x00000094 /* not on CAYMAN */ #define EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_64 0x00000095 #define EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_CLAMPED_64 0x00000096 #define EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_64 0x00000097 @@ -393,9 +414,10 @@ #define CHIPREV_R600 0 #define CHIPREV_R700 1 #define CHIPREV_EVERGREEN 2 +#define CHIPREV_CAYMAN 3 -#define BC_INST(bc, x) ((bc)->chiprev == CHIPREV_EVERGREEN ? EG_##x : x) +#define BC_INST(bc, x) ((bc)->chiprev >= CHIPREV_EVERGREEN ? EG_##x : x) -#define CTX_INST(x) (ctx->bc->chiprev == CHIPREV_EVERGREEN ? EG_##x : x) +#define CTX_INST(x) (ctx->bc->chiprev >= CHIPREV_EVERGREEN ? EG_##x : x) #endif diff --git a/src/gallium/drivers/r600/r600_pipe.c b/src/gallium/drivers/r600/r600_pipe.c index 080180ffea3..402ccb27fe3 100644 --- a/src/gallium/drivers/r600/r600_pipe.c +++ b/src/gallium/drivers/r600/r600_pipe.c @@ -262,6 +262,7 @@ static struct pipe_context *r600_create_context(struct pipe_screen *screen, void case CHIP_BARTS: case CHIP_TURKS: case CHIP_CAICOS: + case CHIP_CAYMAN: evergreen_init_state_functions(rctx); if (evergreen_context_init(&rctx->ctx, rctx->radeon)) { r600_destroy_context(&rctx->context); @@ -336,6 +337,7 @@ static const char *r600_get_family_name(enum radeon_family family) case CHIP_BARTS: return "AMD BARTS"; case CHIP_TURKS: return "AMD TURKS"; case CHIP_CAICOS: return "AMD CAICOS"; + case CHIP_CAYMAN: return "AMD CAYMAN"; default: return "AMD unknown"; } } diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c index a27f4950dd7..39e6d85d7b4 100644 --- a/src/gallium/drivers/r600/r600_shader.c +++ b/src/gallium/drivers/r600/r600_shader.c @@ -36,6 +36,28 @@ #include <errno.h> #include <byteswap.h> +/* CAYMAN notes +Why CAYMAN got loops for lots of instructions is explained here. + +-These 8xx t-slot only ops are implemented in all vector slots. +MUL_LIT, FLT_TO_UINT, INT_TO_FLT, UINT_TO_FLT +These 8xx t-slot only opcodes become vector ops, with all four +slots expecting the arguments on sources a and b. Result is +broadcast to all channels. +MULLO_INT, MULHI_INT, MULLO_UINT, MULHI_UINT +These 8xx t-slot only opcodes become vector ops in the z, y, and +x slots. +EXP_IEEE, LOG_IEEE/CLAMPED, RECIP_IEEE/CLAMPED/FF/INT/UINT/_64/CLAMPED_64 +RECIPSQRT_IEEE/CLAMPED/FF/_64/CLAMPED_64 +SQRT_IEEE/_64 +SIN/COS +The w slot may have an independent co-issued operation, or if the +result is required to be in the w slot, the opcode above may be +issued in the w slot as well. +The compiler must issue the source argument to slots z, y, and x +*/ + + int r600_find_vs_semantic_index(struct r600_shader *vs, struct r600_shader *ps, int id) { @@ -185,7 +207,7 @@ struct r600_shader_tgsi_instruction { int (*process)(struct r600_shader_ctx *ctx); }; -static struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[], eg_shader_tgsi_instruction[]; +static struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[], eg_shader_tgsi_instruction[], cm_shader_tgsi_instruction[]; static int tgsi_helper_tempx_replicate(struct r600_shader_ctx *ctx); static int tgsi_is_supported(struct r600_shader_ctx *ctx) @@ -296,7 +318,7 @@ static int tgsi_declaration(struct r600_shader_ctx *ctx) ctx->shader->input[i].interpolate = d->Declaration.Interpolate; ctx->shader->input[i].centroid = d->Declaration.Centroid; ctx->shader->input[i].gpr = ctx->file_offset[TGSI_FILE_INPUT] + i; - if (ctx->type == TGSI_PROCESSOR_FRAGMENT && ctx->bc->chiprev == CHIPREV_EVERGREEN) { + if (ctx->type == TGSI_PROCESSOR_FRAGMENT && ctx->bc->chiprev >= CHIPREV_EVERGREEN) { /* turn input into interpolate on EG */ if (ctx->shader->input[i].name != TGSI_SEMANTIC_POSITION) { if (ctx->shader->input[i].interpolate > 0) { @@ -623,13 +645,13 @@ static int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_sh } if (ctx.type == TGSI_PROCESSOR_VERTEX) { ctx.file_offset[TGSI_FILE_INPUT] = 1; - if (ctx.bc->chiprev == CHIPREV_EVERGREEN) { + if (ctx.bc->chiprev >= CHIPREV_EVERGREEN) { r600_bc_add_cfinst(ctx.bc, EG_V_SQ_CF_WORD1_SQ_CF_INST_CALL_FS); } else { r600_bc_add_cfinst(ctx.bc, V_SQ_CF_WORD1_SQ_CF_INST_CALL_FS); } } - if (ctx.type == TGSI_PROCESSOR_FRAGMENT && ctx.bc->chiprev == CHIPREV_EVERGREEN) { + if (ctx.type == TGSI_PROCESSOR_FRAGMENT && ctx.bc->chiprev >= CHIPREV_EVERGREEN) { ctx.file_offset[TGSI_FILE_INPUT] = evergreen_gpr_count(&ctx); } ctx.file_offset[TGSI_FILE_OUTPUT] = ctx.file_offset[TGSI_FILE_INPUT] + @@ -683,7 +705,9 @@ static int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_sh goto out_err; if ((r = tgsi_split_literal_constant(&ctx))) goto out_err; - if (ctx.bc->chiprev == CHIPREV_EVERGREEN) + if (ctx.bc->chiprev == CHIPREV_CAYMAN) + ctx.inst_info = &cm_shader_tgsi_instruction[opcode]; + else if (ctx.bc->chiprev >= CHIPREV_EVERGREEN) ctx.inst_info = &eg_shader_tgsi_instruction[opcode]; else ctx.inst_info = &r600_shader_tgsi_instruction[opcode]; @@ -804,8 +828,10 @@ static int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_sh } /* set export done on last export of each type */ for (i = noutput - 1, output_done = 0; i >= 0; i--) { - if (i == (noutput - 1)) { - output[i].end_of_program = 1; + if (ctx.bc->chiprev < CHIPREV_CAYMAN) { + if (i == (noutput - 1)) { + output[i].end_of_program = 1; + } } if (!(output_done & (1 << output[i].type))) { output_done |= (1 << output[i].type); @@ -818,6 +844,10 @@ static int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_sh if (r) goto out_err; } + /* add program end */ + if (ctx.bc->chiprev == CHIPREV_CAYMAN) + cm_bc_add_cf_end(ctx.bc); + free(ctx.literals); tgsi_parse_free(&ctx.parse); return 0; @@ -937,6 +967,31 @@ static int tgsi_op2_swap(struct r600_shader_ctx *ctx) return tgsi_op2_s(ctx, 1); } +static int cayman_emit_float_instr(struct r600_shader_ctx *ctx) +{ + struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; + int i, j, r; + struct r600_bc_alu alu; + int last_slot = (inst->Dst[0].Register.WriteMask & 0x8) ? 4 : 3; + + for (i = 0 ; i < last_slot; i++) { + memset(&alu, 0, sizeof(struct r600_bc_alu)); + alu.inst = ctx->inst_info->r600_opcode; + for (j = 0; j < inst->Instruction.NumSrcRegs; j++) { + r600_bc_src(&alu.src[j], &ctx->src[j], 0); + } + tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); + alu.dst.write = (inst->Dst[0].Register.WriteMask >> i) & 1; + + if (i == last_slot - 1) + alu.last = 1; + r = r600_bc_add_alu(ctx->bc, &alu); + if (r) + return r; + } + return 0; +} + /* * r600 - trunc to -PI..PI range * r700 - normalize by dividing by 2PI @@ -1017,6 +1072,37 @@ static int tgsi_setup_trig(struct r600_shader_ctx *ctx) return 0; } +static int cayman_trig(struct r600_shader_ctx *ctx) +{ + struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; + struct r600_bc_alu alu; + int last_slot = (inst->Dst[0].Register.WriteMask & 0x8) ? 4 : 3; + int i, r; + + r = tgsi_setup_trig(ctx); + if (r) + return r; + + + for (i = 0; i < last_slot; i++) { + memset(&alu, 0, sizeof(struct r600_bc_alu)); + alu.inst = ctx->inst_info->r600_opcode; + alu.dst.chan = i; + + tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); + alu.dst.write = (inst->Dst[0].Register.WriteMask >> i) & 1; + + alu.src[0].sel = ctx->temp_reg; + alu.src[0].chan = 0; + if (i == last_slot - 1) + alu.last = 1; + r = r600_bc_add_alu(ctx->bc, &alu); + if (r) + return r; + } + return 0; +} + static int tgsi_trig(struct r600_shader_ctx *ctx) { struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; @@ -1064,7 +1150,7 @@ static int tgsi_scs(struct r600_shader_ctx *ctx) { struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; struct r600_bc_alu alu; - int r; + int i, r; /* We'll only need the trig stuff if we are going to write to the * X or Y components of the destination vector. @@ -1077,30 +1163,69 @@ static int tgsi_scs(struct r600_shader_ctx *ctx) /* dst.x = COS */ if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) { - memset(&alu, 0, sizeof(struct r600_bc_alu)); - alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS); - tgsi_dst(ctx, &inst->Dst[0], 0, &alu.dst); + if (ctx->bc->chiprev == CHIPREV_CAYMAN) { + for (i = 0 ; i < 3; i++) { + memset(&alu, 0, sizeof(struct r600_bc_alu)); + alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS); + tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); + + if (i == 0) + alu.dst.write = 1; + else + alu.dst.write = 0; + alu.src[0].sel = ctx->temp_reg; + alu.src[0].chan = 0; + if (i == 2) + alu.last = 1; + r = r600_bc_add_alu(ctx->bc, &alu); + if (r) + return r; + } + } else { + memset(&alu, 0, sizeof(struct r600_bc_alu)); + alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS); + tgsi_dst(ctx, &inst->Dst[0], 0, &alu.dst); - alu.src[0].sel = ctx->temp_reg; - alu.src[0].chan = 0; - alu.last = 1; - r = r600_bc_add_alu(ctx->bc, &alu); - if (r) - return r; + alu.src[0].sel = ctx->temp_reg; + alu.src[0].chan = 0; + alu.last = 1; + r = r600_bc_add_alu(ctx->bc, &alu); + if (r) + return r; + } } /* dst.y = SIN */ if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) { - memset(&alu, 0, sizeof(struct r600_bc_alu)); - alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN); - tgsi_dst(ctx, &inst->Dst[0], 1, &alu.dst); + if (ctx->bc->chiprev == CHIPREV_CAYMAN) { + for (i = 0 ; i < 3; i++) { + memset(&alu, 0, sizeof(struct r600_bc_alu)); + alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN); + tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); + if (i == 1) + alu.dst.write = 1; + else + alu.dst.write = 0; + alu.src[0].sel = ctx->temp_reg; + alu.src[0].chan = 0; + if (i == 2) + alu.last = 1; + r = r600_bc_add_alu(ctx->bc, &alu); + if (r) + return r; + } + } else { + memset(&alu, 0, sizeof(struct r600_bc_alu)); + alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN); + tgsi_dst(ctx, &inst->Dst[0], 1, &alu.dst); - alu.src[0].sel = ctx->temp_reg; - alu.src[0].chan = 0; - alu.last = 1; - r = r600_bc_add_alu(ctx->bc, &alu); - if (r) - return r; + alu.src[0].sel = ctx->temp_reg; + alu.src[0].chan = 0; + alu.last = 1; + r = r600_bc_add_alu(ctx->bc, &alu); + if (r) + return r; + } } /* dst.z = 0.0; */ @@ -1220,16 +1345,36 @@ static int tgsi_lit(struct r600_shader_ctx *ctx) { int chan; int sel; + int i; - /* dst.z = log(src.y) */ - memset(&alu, 0, sizeof(struct r600_bc_alu)); - alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_CLAMPED); - r600_bc_src(&alu.src[0], &ctx->src[0], 1); - tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst); - alu.last = 1; - r = r600_bc_add_alu(ctx->bc, &alu); - if (r) - return r; + if (ctx->bc->chiprev == CHIPREV_CAYMAN) { + for (i = 0; i < 3; i++) { + /* dst.z = log(src.y) */ + memset(&alu, 0, sizeof(struct r600_bc_alu)); + alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_CLAMPED); + r600_bc_src(&alu.src[0], &ctx->src[0], 1); + tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); + if (i == 2) { + alu.dst.write = 1; + alu.last = 1; + } else + alu.dst.write = 0; + + r = r600_bc_add_alu(ctx->bc, &alu); + if (r) + return r; + } + } else { + /* dst.z = log(src.y) */ + memset(&alu, 0, sizeof(struct r600_bc_alu)); + alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_CLAMPED); + r600_bc_src(&alu.src[0], &ctx->src[0], 1); + tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst); + alu.last = 1; + r = r600_bc_add_alu(ctx->bc, &alu); + if (r) + return r; + } chan = alu.dst.chan; sel = alu.dst.sel; @@ -1251,16 +1396,35 @@ static int tgsi_lit(struct r600_shader_ctx *ctx) if (r) return r; - /* dst.z = exp(tmp.x) */ - memset(&alu, 0, sizeof(struct r600_bc_alu)); - alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE); - alu.src[0].sel = ctx->temp_reg; - alu.src[0].chan = 0; - tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst); - alu.last = 1; - r = r600_bc_add_alu(ctx->bc, &alu); - if (r) - return r; + if (ctx->bc->chiprev == CHIPREV_CAYMAN) { + for (i = 0; i < 3; i++) { + /* dst.z = exp(tmp.x) */ + memset(&alu, 0, sizeof(struct r600_bc_alu)); + alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE); + alu.src[0].sel = ctx->temp_reg; + alu.src[0].chan = 0; + tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); + if (i == 2) { + alu.dst.write = 1; + alu.last = 1; + } else + alu.dst.write = 0; + r = r600_bc_add_alu(ctx->bc, &alu); + if (r) + return r; + } + } else { + /* dst.z = exp(tmp.x) */ + memset(&alu, 0, sizeof(struct r600_bc_alu)); + alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE); + alu.src[0].sel = ctx->temp_reg; + alu.src[0].chan = 0; + tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst); + alu.last = 1; + r = r600_bc_add_alu(ctx->bc, &alu); + if (r) + return r; + } } return 0; } @@ -1336,6 +1500,56 @@ static int tgsi_trans_srcx_replicate(struct r600_shader_ctx *ctx) return tgsi_helper_tempx_replicate(ctx); } +static int cayman_pow(struct r600_shader_ctx *ctx) +{ + struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; + int i, r; + struct r600_bc_alu alu; + int last_slot = (inst->Dst[0].Register.WriteMask & 0x8) ? 4 : 3; + + for (i = 0; i < 3; i++) { + memset(&alu, 0, sizeof(struct r600_bc_alu)); + alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE); + r600_bc_src(&alu.src[0], &ctx->src[0], 0); + alu.dst.sel = ctx->temp_reg; + alu.dst.chan = i; + alu.dst.write = 1; + if (i == 2) + alu.last = 1; + r = r600_bc_add_alu(ctx->bc, &alu); + if (r) + return r; + } + + /* b * LOG2(a) */ + memset(&alu, 0, sizeof(struct r600_bc_alu)); + alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL); + r600_bc_src(&alu.src[0], &ctx->src[1], 0); + alu.src[1].sel = ctx->temp_reg; + alu.dst.sel = ctx->temp_reg; + alu.dst.write = 1; + alu.last = 1; + r = r600_bc_add_alu(ctx->bc, &alu); + if (r) + return r; + + for (i = 0; i < last_slot; i++) { + /* POW(a,b) = EXP2(b * LOG2(a))*/ + memset(&alu, 0, sizeof(struct r600_bc_alu)); + alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE); + alu.src[0].sel = ctx->temp_reg; + + tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); + alu.dst.write = (inst->Dst[0].Register.WriteMask >> i) & 1; + if (i == last_slot - 1) + alu.last = 1; + r = r600_bc_add_alu(ctx->bc, &alu); + if (r) + return r; + } + return 0; +} + static int tgsi_pow(struct r600_shader_ctx *ctx) { struct r600_bc_alu alu; @@ -1554,24 +1768,46 @@ static int tgsi_tex(struct r600_shader_ctx *ctx) src_gpr = ctx->file_offset[inst->Src[0].Register.File] + inst->Src[0].Register.Index; if (inst->Instruction.Opcode == TGSI_OPCODE_TXP) { + int out_chan; /* Add perspective divide */ - memset(&alu, 0, sizeof(struct r600_bc_alu)); - alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE); - r600_bc_src(&alu.src[0], &ctx->src[0], 3); + if (ctx->bc->chiprev == CHIPREV_CAYMAN) { + out_chan = 2; + for (i = 0; i < 3; i++) { + memset(&alu, 0, sizeof(struct r600_bc_alu)); + alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE); + r600_bc_src(&alu.src[0], &ctx->src[0], 3); - alu.dst.sel = ctx->temp_reg; - alu.dst.chan = 3; - alu.last = 1; - alu.dst.write = 1; - r = r600_bc_add_alu(ctx->bc, &alu); - if (r) - return r; + alu.dst.sel = ctx->temp_reg; + alu.dst.chan = i; + if (i == 2) + alu.last = 1; + if (out_chan == i) + alu.dst.write = 1; + r = r600_bc_add_alu(ctx->bc, &alu); + if (r) + return r; + } + + } else { + out_chan = 3; + memset(&alu, 0, sizeof(struct r600_bc_alu)); + alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE); + r600_bc_src(&alu.src[0], &ctx->src[0], 3); + + alu.dst.sel = ctx->temp_reg; + alu.dst.chan = out_chan; + alu.last = 1; + alu.dst.write = 1; + r = r600_bc_add_alu(ctx->bc, &alu); + if (r) + return r; + } for (i = 0; i < 3; i++) { memset(&alu, 0, sizeof(struct r600_bc_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL); alu.src[0].sel = ctx->temp_reg; - alu.src[0].chan = 3; + alu.src[0].chan = out_chan; r600_bc_src(&alu.src[1], &ctx->src[0], i); alu.dst.sel = ctx->temp_reg; alu.dst.chan = i; @@ -1616,18 +1852,37 @@ static int tgsi_tex(struct r600_shader_ctx *ctx) } /* tmp1.z = RCP_e(|tmp1.z|) */ - memset(&alu, 0, sizeof(struct r600_bc_alu)); - alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE); - alu.src[0].sel = ctx->temp_reg; - alu.src[0].chan = 2; - alu.src[0].abs = 1; - alu.dst.sel = ctx->temp_reg; - alu.dst.chan = 2; - alu.dst.write = 1; - alu.last = 1; - r = r600_bc_add_alu(ctx->bc, &alu); - if (r) - return r; + if (ctx->bc->chiprev == CHIPREV_CAYMAN) { + for (i = 0; i < 3; i++) { + memset(&alu, 0, sizeof(struct r600_bc_alu)); + alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE); + alu.src[0].sel = ctx->temp_reg; + alu.src[0].chan = 2; + alu.src[0].abs = 1; + alu.dst.sel = ctx->temp_reg; + alu.dst.chan = i; + if (i == 2) + alu.dst.write = 1; + if (i == 2) + alu.last = 1; + r = r600_bc_add_alu(ctx->bc, &alu); + if (r) + return r; + } + } else { + memset(&alu, 0, sizeof(struct r600_bc_alu)); + alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE); + alu.src[0].sel = ctx->temp_reg; + alu.src[0].chan = 2; + alu.src[0].abs = 1; + alu.dst.sel = ctx->temp_reg; + alu.dst.chan = 2; + alu.dst.write = 1; + alu.last = 1; + r = r600_bc_add_alu(ctx->bc, &alu); + if (r) + return r; + } /* MULADD R0.x, R0.x, PS1, (0x3FC00000, 1.5f).x * MULADD R0.y, R0.y, PS1, (0x3FC00000, 1.5f).x @@ -1962,6 +2217,7 @@ static int tgsi_exp(struct r600_shader_ctx *ctx) struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; struct r600_bc_alu alu; int r; + int i; /* result.x = 2^floor(src); */ if (inst->Dst[0].Register.WriteMask & 1) { @@ -1978,17 +2234,35 @@ static int tgsi_exp(struct r600_shader_ctx *ctx) if (r) return r; - alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE); - alu.src[0].sel = ctx->temp_reg; - alu.src[0].chan = 0; + if (ctx->bc->chiprev == CHIPREV_CAYMAN) { + for (i = 0; i < 3; i++) { + alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE); + alu.src[0].sel = ctx->temp_reg; + alu.src[0].chan = 0; - alu.dst.sel = ctx->temp_reg; - alu.dst.chan = 0; - alu.dst.write = 1; - alu.last = 1; - r = r600_bc_add_alu(ctx->bc, &alu); - if (r) - return r; + alu.dst.sel = ctx->temp_reg; + alu.dst.chan = i; + if (i == 0) + alu.dst.write = 1; + if (i == 2) + alu.last = 1; + r = r600_bc_add_alu(ctx->bc, &alu); + if (r) + return r; + } + } else { + alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE); + alu.src[0].sel = ctx->temp_reg; + alu.src[0].chan = 0; + + alu.dst.sel = ctx->temp_reg; + alu.dst.chan = 0; + alu.dst.write = 1; + alu.last = 1; + r = r600_bc_add_alu(ctx->bc, &alu); + if (r) + return r; + } } /* result.y = tmp - floor(tmp); */ @@ -2016,19 +2290,38 @@ static int tgsi_exp(struct r600_shader_ctx *ctx) /* result.z = RoughApprox2ToX(tmp);*/ if ((inst->Dst[0].Register.WriteMask >> 2) & 0x1) { - memset(&alu, 0, sizeof(struct r600_bc_alu)); - alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE); - r600_bc_src(&alu.src[0], &ctx->src[0], 0); + if (ctx->bc->chiprev == CHIPREV_CAYMAN) { + for (i = 0; i < 3; i++) { + memset(&alu, 0, sizeof(struct r600_bc_alu)); + alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE); + r600_bc_src(&alu.src[0], &ctx->src[0], 0); - alu.dst.sel = ctx->temp_reg; - alu.dst.write = 1; - alu.dst.chan = 2; + alu.dst.sel = ctx->temp_reg; + alu.dst.chan = i; + if (i == 2) { + alu.dst.write = 1; + alu.last = 1; + } - alu.last = 1; + r = r600_bc_add_alu(ctx->bc, &alu); + if (r) + return r; + } + } else { + memset(&alu, 0, sizeof(struct r600_bc_alu)); + alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE); + r600_bc_src(&alu.src[0], &ctx->src[0], 0); - r = r600_bc_add_alu(ctx->bc, &alu); - if (r) - return r; + alu.dst.sel = ctx->temp_reg; + alu.dst.write = 1; + alu.dst.chan = 2; + + alu.last = 1; + + r = r600_bc_add_alu(ctx->bc, &alu); + if (r) + return r; + } } /* result.w = 1.0;*/ @@ -2055,21 +2348,42 @@ static int tgsi_log(struct r600_shader_ctx *ctx) struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; struct r600_bc_alu alu; int r; + int i; /* result.x = floor(log2(src)); */ if (inst->Dst[0].Register.WriteMask & 1) { - memset(&alu, 0, sizeof(struct r600_bc_alu)); + if (ctx->bc->chiprev == CHIPREV_CAYMAN) { + for (i = 0; i < 3; i++) { + memset(&alu, 0, sizeof(struct r600_bc_alu)); - alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE); - r600_bc_src(&alu.src[0], &ctx->src[0], 0); + alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE); + r600_bc_src(&alu.src[0], &ctx->src[0], 0); + + alu.dst.sel = ctx->temp_reg; + alu.dst.chan = i; + if (i == 0) + alu.dst.write = 1; + if (i == 2) + alu.last = 1; + r = r600_bc_add_alu(ctx->bc, &alu); + if (r) + return r; + } - alu.dst.sel = ctx->temp_reg; - alu.dst.chan = 0; - alu.dst.write = 1; - alu.last = 1; - r = r600_bc_add_alu(ctx->bc, &alu); - if (r) - return r; + } else { + memset(&alu, 0, sizeof(struct r600_bc_alu)); + + alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE); + r600_bc_src(&alu.src[0], &ctx->src[0], 0); + + alu.dst.sel = ctx->temp_reg; + alu.dst.chan = 0; + alu.dst.write = 1; + alu.last = 1; + r = r600_bc_add_alu(ctx->bc, &alu); + if (r) + return r; + } alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR); alu.src[0].sel = ctx->temp_reg; @@ -2087,19 +2401,40 @@ static int tgsi_log(struct r600_shader_ctx *ctx) /* result.y = src.x / (2 ^ floor(log2(src.x))); */ if ((inst->Dst[0].Register.WriteMask >> 1) & 1) { - memset(&alu, 0, sizeof(struct r600_bc_alu)); - alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE); - r600_bc_src(&alu.src[0], &ctx->src[0], 0); + if (ctx->bc->chiprev == CHIPREV_CAYMAN) { + for (i = 0; i < 3; i++) { + memset(&alu, 0, sizeof(struct r600_bc_alu)); - alu.dst.sel = ctx->temp_reg; - alu.dst.chan = 1; - alu.dst.write = 1; - alu.last = 1; + alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE); + r600_bc_src(&alu.src[0], &ctx->src[0], 0); - r = r600_bc_add_alu(ctx->bc, &alu); - if (r) - return r; + alu.dst.sel = ctx->temp_reg; + alu.dst.chan = i; + if (i == 1) + alu.dst.write = 1; + if (i == 2) + alu.last = 1; + + r = r600_bc_add_alu(ctx->bc, &alu); + if (r) + return r; + } + } else { + memset(&alu, 0, sizeof(struct r600_bc_alu)); + + alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE); + r600_bc_src(&alu.src[0], &ctx->src[0], 0); + + alu.dst.sel = ctx->temp_reg; + alu.dst.chan = 1; + alu.dst.write = 1; + alu.last = 1; + + r = r600_bc_add_alu(ctx->bc, &alu); + if (r) + return r; + } memset(&alu, 0, sizeof(struct r600_bc_alu)); @@ -2116,35 +2451,73 @@ static int tgsi_log(struct r600_shader_ctx *ctx) if (r) return r; - memset(&alu, 0, sizeof(struct r600_bc_alu)); - - alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE); - alu.src[0].sel = ctx->temp_reg; - alu.src[0].chan = 1; + if (ctx->bc->chiprev == CHIPREV_CAYMAN) { + for (i = 0; i < 3; i++) { + memset(&alu, 0, sizeof(struct r600_bc_alu)); + alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE); + alu.src[0].sel = ctx->temp_reg; + alu.src[0].chan = 1; + + alu.dst.sel = ctx->temp_reg; + alu.dst.chan = i; + if (i == 1) + alu.dst.write = 1; + if (i == 2) + alu.last = 1; - alu.dst.sel = ctx->temp_reg; - alu.dst.chan = 1; - alu.dst.write = 1; - alu.last = 1; + r = r600_bc_add_alu(ctx->bc, &alu); + if (r) + return r; + } + } else { + memset(&alu, 0, sizeof(struct r600_bc_alu)); + alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE); + alu.src[0].sel = ctx->temp_reg; + alu.src[0].chan = 1; - r = r600_bc_add_alu(ctx->bc, &alu); - if (r) - return r; + alu.dst.sel = ctx->temp_reg; + alu.dst.chan = 1; + alu.dst.write = 1; + alu.last = 1; - memset(&alu, 0, sizeof(struct r600_bc_alu)); + r = r600_bc_add_alu(ctx->bc, &alu); + if (r) + return r; + } - alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE); - alu.src[0].sel = ctx->temp_reg; - alu.src[0].chan = 1; + if (ctx->bc->chiprev == CHIPREV_CAYMAN) { + for (i = 0; i < 3; i++) { + memset(&alu, 0, sizeof(struct r600_bc_alu)); + alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE); + alu.src[0].sel = ctx->temp_reg; + alu.src[0].chan = 1; + + alu.dst.sel = ctx->temp_reg; + alu.dst.chan = i; + if (i == 1) + alu.dst.write = 1; + if (i == 2) + alu.last = 1; + + r = r600_bc_add_alu(ctx->bc, &alu); + if (r) + return r; + } + } else { + memset(&alu, 0, sizeof(struct r600_bc_alu)); + alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE); + alu.src[0].sel = ctx->temp_reg; + alu.src[0].chan = 1; - alu.dst.sel = ctx->temp_reg; - alu.dst.chan = 1; - alu.dst.write = 1; - alu.last = 1; + alu.dst.sel = ctx->temp_reg; + alu.dst.chan = 1; + alu.dst.write = 1; + alu.last = 1; - r = r600_bc_add_alu(ctx->bc, &alu); - if (r) - return r; + r = r600_bc_add_alu(ctx->bc, &alu); + if (r) + return r; + } memset(&alu, 0, sizeof(struct r600_bc_alu)); @@ -2167,19 +2540,39 @@ static int tgsi_log(struct r600_shader_ctx *ctx) /* result.z = log2(src);*/ if ((inst->Dst[0].Register.WriteMask >> 2) & 1) { - memset(&alu, 0, sizeof(struct r600_bc_alu)); + if (ctx->bc->chiprev == CHIPREV_CAYMAN) { + for (i = 0; i < 3; i++) { + memset(&alu, 0, sizeof(struct r600_bc_alu)); - alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE); - r600_bc_src(&alu.src[0], &ctx->src[0], 0); + alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE); + r600_bc_src(&alu.src[0], &ctx->src[0], 0); - alu.dst.sel = ctx->temp_reg; - alu.dst.write = 1; - alu.dst.chan = 2; - alu.last = 1; + alu.dst.sel = ctx->temp_reg; + if (i == 2) + alu.dst.write = 1; + alu.dst.chan = i; + if (i == 2) + alu.last = 1; - r = r600_bc_add_alu(ctx->bc, &alu); - if (r) - return r; + r = r600_bc_add_alu(ctx->bc, &alu); + if (r) + return r; + } + } else { + memset(&alu, 0, sizeof(struct r600_bc_alu)); + + alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE); + r600_bc_src(&alu.src[0], &ctx->src[0], 0); + + alu.dst.sel = ctx->temp_reg; + alu.dst.write = 1; + alu.dst.chan = 2; + alu.last = 1; + + r = r600_bc_add_alu(ctx->bc, &alu); + if (r) + return r; + } } /* result.w = 1.0; */ @@ -2950,3 +3343,161 @@ static struct r600_shader_tgsi_instruction eg_shader_tgsi_instruction[] = { {TGSI_OPCODE_ENDSWITCH, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, {TGSI_OPCODE_LAST, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, }; + +static struct r600_shader_tgsi_instruction cm_shader_tgsi_instruction[] = { + {TGSI_OPCODE_ARL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_eg_arl}, + {TGSI_OPCODE_MOV, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2}, + {TGSI_OPCODE_LIT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lit}, + {TGSI_OPCODE_RCP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE, cayman_emit_float_instr}, + {TGSI_OPCODE_RSQ, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_IEEE, cayman_emit_float_instr}, + {TGSI_OPCODE_EXP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_exp}, + {TGSI_OPCODE_LOG, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_log}, + {TGSI_OPCODE_MUL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL, tgsi_op2}, + {TGSI_OPCODE_ADD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2}, + {TGSI_OPCODE_DP3, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp}, + {TGSI_OPCODE_DP4, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp}, + {TGSI_OPCODE_DST, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_opdst}, + {TGSI_OPCODE_MIN, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN, tgsi_op2}, + {TGSI_OPCODE_MAX, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX, tgsi_op2}, + {TGSI_OPCODE_SLT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2_swap}, + {TGSI_OPCODE_SGE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2}, + {TGSI_OPCODE_MAD, 1, EG_V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD, tgsi_op3}, + {TGSI_OPCODE_SUB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2}, + {TGSI_OPCODE_LRP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lrp}, + {TGSI_OPCODE_CND, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + /* gap */ + {20, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_DP2A, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + /* gap */ + {22, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {23, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_FRC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT, tgsi_op2}, + {TGSI_OPCODE_CLAMP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_FLR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR, tgsi_op2}, + {TGSI_OPCODE_ROUND, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_EX2, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE, cayman_emit_float_instr}, + {TGSI_OPCODE_LG2, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE, cayman_emit_float_instr}, + {TGSI_OPCODE_POW, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, cayman_pow}, + {TGSI_OPCODE_XPD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_xpd}, + /* gap */ + {32, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_ABS, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2}, + {TGSI_OPCODE_RCC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_DPH, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp}, + {TGSI_OPCODE_COS, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS, cayman_trig}, + {TGSI_OPCODE_DDX, 0, SQ_TEX_INST_GET_GRADIENTS_H, tgsi_tex}, + {TGSI_OPCODE_DDY, 0, SQ_TEX_INST_GET_GRADIENTS_V, tgsi_tex}, + {TGSI_OPCODE_KILP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill}, /* predicated kill */ + {TGSI_OPCODE_PK2H, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_PK2US, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_PK4B, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_PK4UB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_RFL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_SEQ, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETE, tgsi_op2}, + {TGSI_OPCODE_SFL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_SGT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2}, + {TGSI_OPCODE_SIN, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN, cayman_trig}, + {TGSI_OPCODE_SLE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2_swap}, + {TGSI_OPCODE_SNE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETNE, tgsi_op2}, + {TGSI_OPCODE_STR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_TEX, 0, SQ_TEX_INST_SAMPLE, tgsi_tex}, + {TGSI_OPCODE_TXD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_TXP, 0, SQ_TEX_INST_SAMPLE, tgsi_tex}, + {TGSI_OPCODE_UP2H, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_UP2US, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_UP4B, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_UP4UB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_X2D, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_ARA, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_ARR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_eg_arl}, + {TGSI_OPCODE_BRA, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_CAL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_RET, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_SSG, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_ssg}, + {TGSI_OPCODE_CMP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_cmp}, + {TGSI_OPCODE_SCS, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_scs}, + {TGSI_OPCODE_TXB, 0, SQ_TEX_INST_SAMPLE_L, tgsi_tex}, + {TGSI_OPCODE_NRM, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_DIV, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_DP2, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp}, + {TGSI_OPCODE_TXL, 0, SQ_TEX_INST_SAMPLE_L, tgsi_tex}, + {TGSI_OPCODE_BRK, 0, EG_V_SQ_CF_WORD1_SQ_CF_INST_LOOP_BREAK, tgsi_loop_brk_cont}, + {TGSI_OPCODE_IF, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_if}, + /* gap */ + {75, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {76, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_ELSE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_else}, + {TGSI_OPCODE_ENDIF, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endif}, + /* gap */ + {79, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {80, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_PUSHA, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_POPA, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_CEIL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_I2F, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_NOT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_TRUNC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_TRUNC, tgsi_op2}, + {TGSI_OPCODE_SHL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + /* gap */ + {88, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_AND, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_OR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_MOD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_XOR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_SAD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_TXF, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_TXQ, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_CONT, 0, EG_V_SQ_CF_WORD1_SQ_CF_INST_LOOP_CONTINUE, tgsi_loop_brk_cont}, + {TGSI_OPCODE_EMIT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_ENDPRIM, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_BGNLOOP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_bgnloop}, + {TGSI_OPCODE_BGNSUB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_ENDLOOP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endloop}, + {TGSI_OPCODE_ENDSUB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + /* gap */ + {103, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {104, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {105, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {106, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_NOP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + /* gap */ + {108, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {109, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {110, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {111, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_NRM4, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_CALLNZ, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_IFC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_BREAKC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_KIL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill}, /* conditional kill */ + {TGSI_OPCODE_END, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_end}, /* aka HALT */ + /* gap */ + {118, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_F2I, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_IDIV, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_IMAX, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_IMIN, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_INEG, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_ISGE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_ISHR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_ISLT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_F2U, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_U2F, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_UADD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_UDIV, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_UMAD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_UMAX, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_UMIN, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_UMOD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_UMUL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_USEQ, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_USGE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_USHR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_USLT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_USNE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_SWITCH, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_CASE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_DEFAULT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_ENDSWITCH, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_LAST, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, +}; |