summaryrefslogtreecommitdiffstats
path: root/src/gallium/drivers/r600
diff options
context:
space:
mode:
Diffstat (limited to 'src/gallium/drivers/r600')
-rw-r--r--src/gallium/drivers/r600/eg_asm.c10
-rw-r--r--src/gallium/drivers/r600/evergreen_state.c268
-rw-r--r--src/gallium/drivers/r600/evergreend.h43
-rw-r--r--src/gallium/drivers/r600/r600.h60
-rw-r--r--src/gallium/drivers/r600/r600_asm.c139
-rw-r--r--src/gallium/drivers/r600/r600_asm.h2
-rw-r--r--src/gallium/drivers/r600/r600_opcodes.h41
-rw-r--r--src/gallium/drivers/r600/r600_pipe.c6
-rw-r--r--src/gallium/drivers/r600/r600_pipe.h21
-rw-r--r--src/gallium/drivers/r600/r600_shader.c837
-rw-r--r--src/gallium/drivers/r600/r600_state.c69
-rw-r--r--src/gallium/drivers/r600/r600_state_common.c206
12 files changed, 1371 insertions, 331 deletions
diff --git a/src/gallium/drivers/r600/eg_asm.c b/src/gallium/drivers/r600/eg_asm.c
index 3793b919dde..fb0b0f104bf 100644
--- a/src/gallium/drivers/r600/eg_asm.c
+++ b/src/gallium/drivers/r600/eg_asm.c
@@ -62,14 +62,17 @@ int eg_bc_cf_build(struct r600_bc *bc, struct r600_bc_cf *cf)
S_SQ_CF_ALLOC_EXPORT_WORD0_ELEM_SIZE(cf->output.elem_size) |
S_SQ_CF_ALLOC_EXPORT_WORD0_ARRAY_BASE(cf->output.array_base) |
S_SQ_CF_ALLOC_EXPORT_WORD0_TYPE(cf->output.type);
- bc->bytecode[id++] = S_SQ_CF_ALLOC_EXPORT_WORD1_BURST_COUNT(cf->output.burst_count - 1) |
+ bc->bytecode[id] = S_SQ_CF_ALLOC_EXPORT_WORD1_BURST_COUNT(cf->output.burst_count - 1) |
S_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_X(cf->output.swizzle_x) |
S_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_Y(cf->output.swizzle_y) |
S_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_Z(cf->output.swizzle_z) |
S_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_W(cf->output.swizzle_w) |
S_SQ_CF_ALLOC_EXPORT_WORD1_BARRIER(cf->output.barrier) |
- S_SQ_CF_ALLOC_EXPORT_WORD1_CF_INST(cf->output.inst) |
- S_SQ_CF_ALLOC_EXPORT_WORD1_END_OF_PROGRAM(cf->output.end_of_program);
+ S_SQ_CF_ALLOC_EXPORT_WORD1_CF_INST(cf->output.inst);
+ if (bc->chiprev == CHIPREV_EVERGREEN) /* no EOP on cayman */
+ bc->bytecode[id] |= S_SQ_CF_ALLOC_EXPORT_WORD1_END_OF_PROGRAM(cf->output.end_of_program);
+ id++;
+
break;
case EG_V_SQ_CF_WORD1_SQ_CF_INST_JUMP:
case EG_V_SQ_CF_WORD1_SQ_CF_INST_ELSE:
@@ -80,6 +83,7 @@ int eg_bc_cf_build(struct r600_bc *bc, struct r600_bc_cf *cf)
case EG_V_SQ_CF_WORD1_SQ_CF_INST_LOOP_BREAK:
case EG_V_SQ_CF_WORD1_SQ_CF_INST_CALL_FS:
case EG_V_SQ_CF_WORD1_SQ_CF_INST_RETURN:
+ case CM_V_SQ_CF_WORD1_SQ_CF_INST_END:
bc->bytecode[id++] = S_SQ_CF_WORD0_ADDR(cf->cf_addr >> 1);
bc->bytecode[id++] = S_SQ_CF_WORD1_CF_INST(cf->inst) |
S_SQ_CF_WORD1_BARRIER(1) |
diff --git a/src/gallium/drivers/r600/evergreen_state.c b/src/gallium/drivers/r600/evergreen_state.c
index 187f00e1e52..54f5410c324 100644
--- a/src/gallium/drivers/r600/evergreen_state.c
+++ b/src/gallium/drivers/r600/evergreen_state.c
@@ -71,15 +71,19 @@ static void evergreen_set_blend_color(struct pipe_context *ctx,
static void *evergreen_create_blend_state(struct pipe_context *ctx,
const struct pipe_blend_state *state)
{
+ struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
struct r600_pipe_blend *blend = CALLOC_STRUCT(r600_pipe_blend);
struct r600_pipe_state *rstate;
u32 color_control, target_mask;
/* FIXME there is more then 8 framebuffer */
unsigned blend_cntl[8];
+ enum radeon_family family;
if (blend == NULL) {
return NULL;
}
+
+ family = r600_get_family(rctx->radeon);
rstate = &blend->rstate;
rstate->id = R600_PIPE_STATE_BLEND;
@@ -102,9 +106,16 @@ static void *evergreen_create_blend_state(struct pipe_context *ctx,
}
}
blend->cb_target_mask = target_mask;
+
r600_pipe_state_add_reg(rstate, R_028808_CB_COLOR_CONTROL,
color_control, 0xFFFFFFFD, NULL);
- r600_pipe_state_add_reg(rstate, R_028C3C_PA_SC_AA_MASK, 0xFFFFFFFF, 0xFFFFFFFF, NULL);
+
+ if (family != CHIP_CAYMAN)
+ r600_pipe_state_add_reg(rstate, R_028C3C_PA_SC_AA_MASK, 0xFFFFFFFF, 0xFFFFFFFF, NULL);
+ else {
+ r600_pipe_state_add_reg(rstate, CM_R_028C38_PA_SC_AA_MASK_X0Y0_X1Y0, 0xFFFFFFFF, 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, CM_R_028C3C_PA_SC_AA_MASK_X0Y1_X1Y1, 0xFFFFFFFF, 0xFFFFFFFF, NULL);
+ }
for (int i = 0; i < 8; i++) {
/* state->rt entries > 0 only written if independent blending */
@@ -143,6 +154,7 @@ static void *evergreen_create_blend_state(struct pipe_context *ctx,
static void *evergreen_create_dsa_state(struct pipe_context *ctx,
const struct pipe_depth_stencil_alpha_state *state)
{
+ struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
struct r600_pipe_dsa *dsa = CALLOC_STRUCT(r600_pipe_dsa);
unsigned db_depth_control, alpha_test_control, alpha_ref, db_shader_control;
unsigned stencil_ref_mask, stencil_ref_mask_bf, db_render_override, db_render_control;
@@ -229,11 +241,15 @@ static void *evergreen_create_dsa_state(struct pipe_context *ctx,
static void *evergreen_create_rs_state(struct pipe_context *ctx,
const struct pipe_rasterizer_state *state)
{
+ struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
struct r600_pipe_rasterizer *rs = CALLOC_STRUCT(r600_pipe_rasterizer);
struct r600_pipe_state *rstate;
unsigned tmp;
unsigned prov_vtx = 1, polygon_dual_mode;
unsigned clip_rule;
+ enum radeon_family family;
+
+ family = r600_get_family(rctx->radeon);
if (rs == NULL) {
return NULL;
@@ -290,17 +306,30 @@ static void *evergreen_create_rs_state(struct pipe_context *ctx,
tmp = (unsigned)state->line_width * 8;
r600_pipe_state_add_reg(rstate, R_028A08_PA_SU_LINE_CNTL, S_028A08_WIDTH(tmp), 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, R_028C00_PA_SC_LINE_CNTL, 0x00000400, 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, R_028C0C_PA_CL_GB_VERT_CLIP_ADJ, 0x3F800000, 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, R_028C10_PA_CL_GB_VERT_DISC_ADJ, 0x3F800000, 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, R_028C14_PA_CL_GB_HORZ_CLIP_ADJ, 0x3F800000, 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, R_028C18_PA_CL_GB_HORZ_DISC_ADJ, 0x3F800000, 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, R_028B7C_PA_SU_POLY_OFFSET_CLAMP, 0x0, 0xFFFFFFFF, NULL);
+ if (family == CHIP_CAYMAN) {
+ r600_pipe_state_add_reg(rstate, CM_R_028BDC_PA_SC_LINE_CNTL, 0x00000400, 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, CM_R_028BE4_PA_SU_VTX_CNTL,
+ S_028C08_PIX_CENTER_HALF(state->gl_rasterization_rules),
+ 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, CM_R_028BE8_PA_CL_GB_VERT_CLIP_ADJ, 0x3F800000, 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, CM_R_028BEC_PA_CL_GB_VERT_DISC_ADJ, 0x3F800000, 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, CM_R_028BF0_PA_CL_GB_HORZ_CLIP_ADJ, 0x3F800000, 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, CM_R_028BF4_PA_CL_GB_HORZ_DISC_ADJ, 0x3F800000, 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, R_028C08_PA_SU_VTX_CNTL,
- S_028C08_PIX_CENTER_HALF(state->gl_rasterization_rules),
- 0xFFFFFFFF, NULL);
+ } else {
+ r600_pipe_state_add_reg(rstate, R_028C00_PA_SC_LINE_CNTL, 0x00000400, 0xFFFFFFFF, NULL);
+
+ r600_pipe_state_add_reg(rstate, R_028C0C_PA_CL_GB_VERT_CLIP_ADJ, 0x3F800000, 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_028C10_PA_CL_GB_VERT_DISC_ADJ, 0x3F800000, 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_028C14_PA_CL_GB_HORZ_CLIP_ADJ, 0x3F800000, 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_028C18_PA_CL_GB_HORZ_DISC_ADJ, 0x3F800000, 0xFFFFFFFF, NULL);
+
+ r600_pipe_state_add_reg(rstate, R_028C08_PA_SU_VTX_CNTL,
+ S_028C08_PIX_CENTER_HALF(state->gl_rasterization_rules),
+ 0xFFFFFFFF, NULL);
+ }
+ r600_pipe_state_add_reg(rstate, R_028B7C_PA_SU_POLY_OFFSET_CLAMP, 0x0, 0xFFFFFFFF, NULL);
r600_pipe_state_add_reg(rstate, R_02820C_PA_SC_CLIPRECT_RULE, clip_rule, 0xFFFFFFFF, NULL);
return rstate;
}
@@ -318,7 +347,7 @@ static void *evergreen_create_sampler_state(struct pipe_context *ctx,
rstate->id = R600_PIPE_STATE_SAMPLER;
util_pack_color(state->border_color, PIPE_FORMAT_B8G8R8A8_UNORM, &uc);
- r600_pipe_state_add_reg(rstate, R_03C000_SQ_TEX_SAMPLER_WORD0_0,
+ r600_pipe_state_add_reg_noblock(rstate, R_03C000_SQ_TEX_SAMPLER_WORD0_0,
S_03C000_CLAMP_X(r600_tex_wrap(state->wrap_s)) |
S_03C000_CLAMP_Y(r600_tex_wrap(state->wrap_t)) |
S_03C000_CLAMP_Z(r600_tex_wrap(state->wrap_r)) |
@@ -328,21 +357,21 @@ static void *evergreen_create_sampler_state(struct pipe_context *ctx,
S_03C000_MAX_ANISO(r600_tex_aniso_filter(state->max_anisotropy)) |
S_03C000_DEPTH_COMPARE_FUNCTION(r600_tex_compare(state->compare_func)) |
S_03C000_BORDER_COLOR_TYPE(uc.ui ? V_03C000_SQ_TEX_BORDER_COLOR_REGISTER : 0), 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, R_03C004_SQ_TEX_SAMPLER_WORD1_0,
+ r600_pipe_state_add_reg_noblock(rstate, R_03C004_SQ_TEX_SAMPLER_WORD1_0,
S_03C004_MIN_LOD(S_FIXED(CLAMP(state->min_lod, 0, 15), 8)) |
S_03C004_MAX_LOD(S_FIXED(CLAMP(state->max_lod, 0, 15), 8)),
0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, R_03C008_SQ_TEX_SAMPLER_WORD2_0,
- S_03C008_LOD_BIAS(S_FIXED(CLAMP(state->lod_bias, -16, 16), 8)) |
- (state->seamless_cube_map ? 0 : S_03C008_DISABLE_CUBE_WRAP(1)) |
- S_03C008_TYPE(1),
- 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg_noblock(rstate, R_03C008_SQ_TEX_SAMPLER_WORD2_0,
+ S_03C008_LOD_BIAS(S_FIXED(CLAMP(state->lod_bias, -16, 16), 8)) |
+ (state->seamless_cube_map ? 0 : S_03C008_DISABLE_CUBE_WRAP(1)) |
+ S_03C008_TYPE(1),
+ 0xFFFFFFFF, NULL);
if (uc.ui) {
- r600_pipe_state_add_reg(rstate, R_00A404_TD_PS_SAMPLER0_BORDER_RED, fui(state->border_color[0]), 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, R_00A408_TD_PS_SAMPLER0_BORDER_GREEN, fui(state->border_color[1]), 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, R_00A40C_TD_PS_SAMPLER0_BORDER_BLUE, fui(state->border_color[2]), 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, R_00A410_TD_PS_SAMPLER0_BORDER_ALPHA, fui(state->border_color[3]), 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg_noblock(rstate, R_00A404_TD_PS_SAMPLER0_BORDER_RED, fui(state->border_color[0]), 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg_noblock(rstate, R_00A408_TD_PS_SAMPLER0_BORDER_GREEN, fui(state->border_color[1]), 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg_noblock(rstate, R_00A40C_TD_PS_SAMPLER0_BORDER_BLUE, fui(state->border_color[2]), 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg_noblock(rstate, R_00A410_TD_PS_SAMPLER0_BORDER_ALPHA, fui(state->border_color[3]), 0xFFFFFFFF, NULL);
}
return rstate;
}
@@ -351,6 +380,7 @@ static struct pipe_sampler_view *evergreen_create_sampler_view(struct pipe_conte
struct pipe_resource *texture,
const struct pipe_sampler_view *state)
{
+ struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
struct r600_pipe_sampler_view *resource = CALLOC_STRUCT(r600_pipe_sampler_view);
struct r600_pipe_state *rstate;
const struct util_format_description *desc;
@@ -832,10 +862,14 @@ static void evergreen_set_framebuffer_state(struct pipe_context *ctx,
struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
struct r600_pipe_state *rstate = CALLOC_STRUCT(r600_pipe_state);
u32 shader_mask, tl, br, target_mask;
+ enum radeon_family family;
+ int tl_x, tl_y, br_x, br_y;
if (rstate == NULL)
return;
+ family = r600_get_family(rctx->radeon);
+
evergreen_context_flush_dest_caches(&rctx->ctx);
rctx->ctx.num_dest_buffers = state->nr_cbufs;
@@ -860,8 +894,22 @@ static void evergreen_set_framebuffer_state(struct pipe_context *ctx,
target_mask ^= 0xf << (i * 4);
shader_mask |= 0xf << (i * 4);
}
- tl = S_028240_TL_X(0) | S_028240_TL_Y(0);
- br = S_028244_BR_X(state->width) | S_028244_BR_Y(state->height);
+ tl_x = 0;
+ tl_y = 0;
+ br_x = state->width;
+ br_y = state->height;
+ /* EG hw workaround */
+ if (br_x == 0)
+ tl_x = 1;
+ if (br_y == 0)
+ tl_y = 1;
+ /* cayman hw workaround */
+ if (family == CHIP_CAYMAN) {
+ if (br_x == 1 && br_y == 1)
+ br_x = 2;
+ }
+ tl = S_028240_TL_X(tl_x) | S_028240_TL_Y(tl_y);
+ br = S_028244_BR_X(br_x) | S_028244_BR_Y(br_y);
r600_pipe_state_add_reg(rstate,
R_028240_PA_SC_GENERIC_SCISSOR_TL, tl,
@@ -898,10 +946,17 @@ static void evergreen_set_framebuffer_state(struct pipe_context *ctx,
0x00000000, target_mask, NULL);
r600_pipe_state_add_reg(rstate, R_02823C_CB_SHADER_MASK,
shader_mask, 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, R_028C04_PA_SC_AA_CONFIG,
- 0x00000000, 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, R_028C1C_PA_SC_AA_SAMPLE_LOCS_MCTX,
- 0x00000000, 0xFFFFFFFF, NULL);
+
+
+ if (family == CHIP_CAYMAN) {
+ r600_pipe_state_add_reg(rstate, CM_R_028BE0_PA_SC_AA_CONFIG,
+ 0x00000000, 0xFFFFFFFF, NULL);
+ } else {
+ r600_pipe_state_add_reg(rstate, R_028C04_PA_SC_AA_CONFIG,
+ 0x00000000, 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_028C1C_PA_SC_AA_SAMPLE_LOCS_MCTX,
+ 0x00000000, 0xFFFFFFFF, NULL);
+ }
free(rctx->states[R600_PIPE_STATE_FRAMEBUFFER]);
rctx->states[R600_PIPE_STATE_FRAMEBUFFER] = rstate;
@@ -968,6 +1023,85 @@ void evergreen_init_state_functions(struct r600_pipe_context *rctx)
rctx->context.texture_barrier = evergreen_texture_barrier;
}
+static void cayman_init_config(struct r600_pipe_context *rctx)
+{
+ struct r600_pipe_state *rstate = &rctx->config;
+ unsigned tmp;
+
+ tmp = 0x00000000;
+ tmp |= S_008C00_EXPORT_SRC_C(1);
+ r600_pipe_state_add_reg(rstate, R_008C00_SQ_CONFIG, tmp, 0xFFFFFFFF, NULL);
+
+ r600_pipe_state_add_reg(rstate, CM_R_008C10_SQ_GLOBAL_GPR_RESOURCE_MGMT_1, (4 << 28), 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_008D8C_SQ_DYN_GPR_CNTL_PS_FLUSH_REQ, (1 << 8), 0xFFFFFFFF, NULL);
+
+ r600_pipe_state_add_reg(rstate, R_028A48_PA_SC_MODE_CNTL_0, 0x0, 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_028A4C_PA_SC_MODE_CNTL_1, 0x0, 0xFFFFFFFF, NULL);
+
+ r600_pipe_state_add_reg(rstate, R_028A10_VGT_OUTPUT_PATH_CNTL, 0x0, 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_028A14_VGT_HOS_CNTL, 0x0, 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_028A18_VGT_HOS_MAX_TESS_LEVEL, 0x0, 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_028A1C_VGT_HOS_MIN_TESS_LEVEL, 0x0, 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_028A20_VGT_HOS_REUSE_DEPTH, 0x0, 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_028A24_VGT_GROUP_PRIM_TYPE, 0x0, 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_028A28_VGT_GROUP_FIRST_DECR, 0x0, 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_028A2C_VGT_GROUP_DECR, 0x0, 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_028A30_VGT_GROUP_VECT_0_CNTL, 0x0, 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_028A34_VGT_GROUP_VECT_1_CNTL, 0x0, 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_028A38_VGT_GROUP_VECT_0_FMT_CNTL, 0x0, 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_028A3C_VGT_GROUP_VECT_1_FMT_CNTL, 0x0, 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_028A40_VGT_GS_MODE, 0x0, 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_028B94_VGT_STRMOUT_CONFIG, 0x0, 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_028B98_VGT_STRMOUT_BUFFER_CONFIG, 0x0, 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_028AB4_VGT_REUSE_OFF, 0x00000000, 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_028AB8_VGT_VTX_CNT_EN, 0x0, 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_008A14_PA_CL_ENHANCE, (3 << 1) | 1, 0xFFFFFFFF, NULL);
+
+ r600_pipe_state_add_reg(rstate, R_028380_SQ_VTX_SEMANTIC_0, 0x0, 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_028384_SQ_VTX_SEMANTIC_1, 0x0, 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_028388_SQ_VTX_SEMANTIC_2, 0x0, 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_02838C_SQ_VTX_SEMANTIC_3, 0x0, 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_028390_SQ_VTX_SEMANTIC_4, 0x0, 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_028394_SQ_VTX_SEMANTIC_5, 0x0, 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_028398_SQ_VTX_SEMANTIC_6, 0x0, 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_02839C_SQ_VTX_SEMANTIC_7, 0x0, 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_0283A0_SQ_VTX_SEMANTIC_8, 0x0, 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_0283A4_SQ_VTX_SEMANTIC_9, 0x0, 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_0283A8_SQ_VTX_SEMANTIC_10, 0x0, 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_0283AC_SQ_VTX_SEMANTIC_11, 0x0, 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_0283B0_SQ_VTX_SEMANTIC_12, 0x0, 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_0283B4_SQ_VTX_SEMANTIC_13, 0x0, 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_0283B8_SQ_VTX_SEMANTIC_14, 0x0, 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_0283BC_SQ_VTX_SEMANTIC_15, 0x0, 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_0283C0_SQ_VTX_SEMANTIC_16, 0x0, 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_0283C4_SQ_VTX_SEMANTIC_17, 0x0, 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_0283C8_SQ_VTX_SEMANTIC_18, 0x0, 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_0283CC_SQ_VTX_SEMANTIC_19, 0x0, 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_0283D0_SQ_VTX_SEMANTIC_20, 0x0, 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_0283D4_SQ_VTX_SEMANTIC_21, 0x0, 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_0283D8_SQ_VTX_SEMANTIC_22, 0x0, 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_0283DC_SQ_VTX_SEMANTIC_23, 0x0, 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_0283E0_SQ_VTX_SEMANTIC_24, 0x0, 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_0283E4_SQ_VTX_SEMANTIC_25, 0x0, 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_0283E8_SQ_VTX_SEMANTIC_26, 0x0, 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_0283EC_SQ_VTX_SEMANTIC_27, 0x0, 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_0283F0_SQ_VTX_SEMANTIC_28, 0x0, 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_0283F4_SQ_VTX_SEMANTIC_29, 0x0, 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_0283F8_SQ_VTX_SEMANTIC_30, 0x0, 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_0283FC_SQ_VTX_SEMANTIC_31, 0x0, 0xFFFFFFFF, NULL);
+
+ r600_pipe_state_add_reg(rstate, R_028810_PA_CL_CLIP_CNTL, 0x0, 0xFFFFFFFF, NULL);
+
+ r600_pipe_state_add_reg(rstate, CM_R_028BD4_PA_SC_CENTROID_PRIORITY_0, 0x76543210, 0xffffffff, 0);
+ r600_pipe_state_add_reg(rstate, CM_R_028BD8_PA_SC_CENTROID_PRIORITY_1, 0xfedcba98, 0xffffffff, 0);
+
+ r600_pipe_state_add_reg(rstate, CM_R_0288E8_SQ_LDS_ALLOC, 0, 0xffffffff, NULL);
+ r600_pipe_state_add_reg(rstate, R_0288EC_SQ_LDS_ALLOC_PS, 0, 0xffffffff, NULL);
+
+ r600_pipe_state_add_reg(rstate, CM_R_028804_DB_EQAA, 0x110000, 0xffffffff, NULL);
+ r600_context_pipe_state_set(&rctx->ctx, rstate);
+}
+
void evergreen_init_config(struct r600_pipe_context *rctx)
{
struct r600_pipe_state *rstate = &rctx->config;
@@ -999,6 +1133,12 @@ void evergreen_init_config(struct r600_pipe_context *rctx)
unsigned tmp;
family = r600_get_family(rctx->radeon);
+
+ if (family == CHIP_CAYMAN) {
+ cayman_init_config(rctx);
+ return;
+ }
+
ps_prio = 0;
vs_prio = 1;
gs_prio = 2;
@@ -1115,6 +1255,48 @@ void evergreen_init_config(struct r600_pipe_context *rctx)
num_hs_stack_entries = 42;
num_ls_stack_entries = 42;
break;
+ case CHIP_SUMO:
+ num_ps_gprs = 93;
+ num_vs_gprs = 46;
+ num_temp_gprs = 4;
+ num_gs_gprs = 31;
+ num_es_gprs = 31;
+ num_hs_gprs = 23;
+ num_ls_gprs = 23;
+ num_ps_threads = 96;
+ num_vs_threads = 25;
+ num_gs_threads = 25;
+ num_es_threads = 25;
+ num_hs_threads = 25;
+ num_ls_threads = 25;
+ num_ps_stack_entries = 42;
+ num_vs_stack_entries = 42;
+ num_gs_stack_entries = 42;
+ num_es_stack_entries = 42;
+ num_hs_stack_entries = 42;
+ num_ls_stack_entries = 42;
+ break;
+ case CHIP_SUMO2:
+ num_ps_gprs = 93;
+ num_vs_gprs = 46;
+ num_temp_gprs = 4;
+ num_gs_gprs = 31;
+ num_es_gprs = 31;
+ num_hs_gprs = 23;
+ num_ls_gprs = 23;
+ num_ps_threads = 96;
+ num_vs_threads = 25;
+ num_gs_threads = 25;
+ num_es_threads = 25;
+ num_hs_threads = 25;
+ num_ls_threads = 25;
+ num_ps_stack_entries = 85;
+ num_vs_stack_entries = 85;
+ num_gs_stack_entries = 85;
+ num_es_stack_entries = 85;
+ num_hs_stack_entries = 85;
+ num_ls_stack_entries = 85;
+ break;
case CHIP_BARTS:
num_ps_gprs = 93;
num_vs_gprs = 46;
@@ -1184,6 +1366,8 @@ void evergreen_init_config(struct r600_pipe_context *rctx)
switch (family) {
case CHIP_CEDAR:
case CHIP_PALM:
+ case CHIP_SUMO:
+ case CHIP_SUMO2:
case CHIP_CAICOS:
break;
default:
@@ -1374,6 +1558,7 @@ void evergreen_polygon_offset_update(struct r600_pipe_context *rctx)
void evergreen_pipe_shader_ps(struct pipe_context *ctx, struct r600_pipe_shader *shader)
{
+ struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
struct r600_pipe_state *rstate = &shader->rstate;
struct r600_shader *rshader = &shader->shader;
unsigned i, exports_ps, num_cout, spi_ps_in_control_0, spi_input_z, spi_ps_in_control_1, db_shader_control;
@@ -1502,6 +1687,7 @@ void evergreen_pipe_shader_ps(struct pipe_context *ctx, struct r600_pipe_shader
void evergreen_pipe_shader_vs(struct pipe_context *ctx, struct r600_pipe_shader *shader)
{
+ struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
struct r600_pipe_state *rstate = &shader->rstate;
struct r600_shader *rshader = &shader->shader;
unsigned spi_vs_out_id[10];
@@ -1545,8 +1731,10 @@ void evergreen_pipe_shader_vs(struct pipe_context *ctx, struct r600_pipe_shader
0xFFFFFFFF, NULL);
}
-void evergreen_fetch_shader(struct r600_vertex_element *ve)
+void evergreen_fetch_shader(struct pipe_context *ctx,
+ struct r600_vertex_element *ve)
{
+ struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
struct r600_pipe_state *rstate = &ve->rstate;
rstate->id = R600_PIPE_STATE_FETCH_SHADER;
rstate->nregs = 0;
@@ -1580,11 +1768,13 @@ void *evergreen_create_db_flush_dsa(struct r600_pipe_context *rctx)
return rstate;
}
-void evergreen_pipe_set_buffer_resource(struct r600_pipe_context *rctx,
- struct r600_pipe_state *rstate,
- struct r600_resource *rbuffer,
- unsigned offset, unsigned stride)
+void evergreen_pipe_init_buffer_resource(struct r600_pipe_context *rctx,
+ struct r600_pipe_state *rstate,
+ struct r600_resource *rbuffer,
+ unsigned offset, unsigned stride)
{
+ rstate->id = R600_PIPE_STATE_RESOURCE;
+ rstate->nregs = 0;
r600_pipe_state_add_reg(rstate, R_030000_RESOURCE0_WORD0,
offset, 0xFFFFFFFF, rbuffer->bo);
r600_pipe_state_add_reg(rstate, R_030004_RESOURCE0_WORD1,
@@ -1607,3 +1797,17 @@ void evergreen_pipe_set_buffer_resource(struct r600_pipe_context *rctx,
r600_pipe_state_add_reg(rstate, R_03001C_RESOURCE0_WORD7,
0xC0000000, 0xFFFFFFFF, NULL);
}
+
+
+void evergreen_pipe_mod_buffer_resource(struct r600_pipe_state *rstate,
+ struct r600_resource *rbuffer,
+ unsigned offset, unsigned stride)
+{
+ rstate->nregs = 0;
+ r600_pipe_state_mod_reg_bo(rstate, offset, rbuffer->bo);
+ r600_pipe_state_mod_reg(rstate, rbuffer->bo_size - offset - 1);
+ r600_pipe_state_mod_reg(rstate, S_030008_ENDIAN_SWAP(r600_endian_swap(32)) |
+ S_030008_STRIDE(stride));
+ rstate->nregs = 8;
+
+}
diff --git a/src/gallium/drivers/r600/evergreend.h b/src/gallium/drivers/r600/evergreend.h
index 3e878106bea..ee0c7c9ed9b 100644
--- a/src/gallium/drivers/r600/evergreend.h
+++ b/src/gallium/drivers/r600/evergreend.h
@@ -33,15 +33,19 @@
#define EVERGREEN_CONTEXT_REG_END 0X00029000
#define EVERGREEN_RESOURCE_OFFSET 0x00030000
#define EVERGREEN_RESOURCE_END 0x00034000
+#define CAYMAN_RESOURCE_END 0x00038000
#define EVERGREEN_LOOP_CONST_OFFSET 0x0003A200
#define EVERGREEN_LOOP_CONST_END 0x0003A26C
#define EVERGREEN_BOOL_CONST_OFFSET 0x0003A500
#define EVERGREEN_BOOL_CONST_END 0x0003A506
+#define CAYMAN_BOOL_CONST_END 0x0003A518
#define EVERGREEN_SAMPLER_OFFSET 0X0003C000
#define EVERGREEN_SAMPLER_END 0X0003CFF0
+#define CAYMAN_SAMPLER_END 0X0003C600
#define EVERGREEN_CTL_CONST_OFFSET 0x0003CFF0
#define EVERGREEN_CTL_CONST_END 0x0003E200
+#define CAYMAN_CTL_CONST_END 0x0003FF0C
#define EVENT_TYPE_PS_PARTIAL_FLUSH 0x10
#define EVENT_TYPE_ZPASS_DONE 0x15
@@ -1907,4 +1911,43 @@
#define ENDIAN_8IN32 2
#define ENDIAN_8IN64 3
+#define CM_R_0288E8_SQ_LDS_ALLOC 0x000288E8
+
+#define CM_R_028804_DB_EQAA 0x00028804
+
+#define CM_R_028BD4_PA_SC_CENTROID_PRIORITY_0 0x00028BD4
+#define CM_R_028BD8_PA_SC_CENTROID_PRIORITY_1 0x00028BD8
+#define CM_R_028BDC_PA_SC_LINE_CNTL 0x28bdc
+#define CM_R_028BE0_PA_SC_AA_CONFIG 0x28be0
+#define CM_R_028BE4_PA_SU_VTX_CNTL 0x28be4
+#define CM_R_028BE8_PA_CL_GB_VERT_CLIP_ADJ 0x28be8
+#define CM_R_028BEC_PA_CL_GB_VERT_DISC_ADJ 0x28bec
+#define CM_R_028BF0_PA_CL_GB_HORZ_CLIP_ADJ 0x28bf0
+#define CM_R_028BF4_PA_CL_GB_HORZ_DISC_ADJ 0x28bf4
+
+#define CM_R_028BF8_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0 0x28bf8
+#define CM_R_028BFC_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_1 0x28bfc
+#define CM_R_028C00_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_2 0x28c00
+#define CM_R_028C04_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_3 0x28c04
+
+#define CM_R_028C08_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_0 0x28c08
+#define CM_R_028C0C_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_1 0x28c0c
+#define CM_R_028C10_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_2 0x28c10
+#define CM_R_028C14_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_3 0x28c14
+
+#define CM_R_028C18_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_0 0x28c18
+#define CM_R_028C1C_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_1 0x28c1c
+#define CM_R_028C20_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_2 0x28c20
+#define CM_R_028C24_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_3 0x28c24
+
+#define CM_R_028C28_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_0 0x28c28
+#define CM_R_028C2C_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_1 0x28c2c
+#define CM_R_028C30_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_2 0x28c30
+#define CM_R_028C34_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_3 0x28c34
+
+#define CM_R_028C38_PA_SC_AA_MASK_X0Y0_X1Y0 0x28c38
+#define CM_R_028C3C_PA_SC_AA_MASK_X0Y1_X1Y1 0x28c3c
+
+#define CM_R_008C10_SQ_GLOBAL_GPR_RESOURCE_MGMT_1 0x00008C10
+#define CM_R_008C14_SQ_GLOBAL_GPR_RESOURCE_MGMT_2 0x00008C14
#endif
diff --git a/src/gallium/drivers/r600/r600.h b/src/gallium/drivers/r600/r600.h
index 7b57fc80dc2..23e7181a86e 100644
--- a/src/gallium/drivers/r600/r600.h
+++ b/src/gallium/drivers/r600/r600.h
@@ -92,9 +92,12 @@ enum radeon_family {
CHIP_CYPRESS,
CHIP_HEMLOCK,
CHIP_PALM,
+ CHIP_SUMO,
+ CHIP_SUMO2,
CHIP_BARTS,
CHIP_TURKS,
CHIP_CAICOS,
+ CHIP_CAYMAN,
CHIP_LAST,
};
@@ -102,6 +105,7 @@ enum chip_class {
R600,
R700,
EVERGREEN,
+ CAYMAN,
};
struct r600_tiling_info {
@@ -141,11 +145,23 @@ static INLINE unsigned r600_bo_offset(struct r600_bo *bo)
#define R600_BLOCK_MAX_BO 32
#define R600_BLOCK_MAX_REG 128
+/* each range covers 9 bits of dword space = 512 dwords = 2k bytes */
+/* there is a block entry for each register so 512 blocks */
+/* we have no registers to read/write below 0x8000 (0x2000 in dw space) */
+/* we use some fake offsets at 0x40000 to do evergreen sampler borders so take 0x42000 as a max bound*/
+#define RANGE_OFFSET_START 0x8000
+#define HASH_SHIFT 9
+#define NUM_RANGES (0x42000 - RANGE_OFFSET_START) / (4 << HASH_SHIFT) /* 128 << 9 = 64k */
+
+#define CTX_RANGE_ID(offset) ((((offset - RANGE_OFFSET_START) >> 2) >> HASH_SHIFT) & 255)
+#define CTX_BLOCK_ID(offset) (((offset - RANGE_OFFSET_START) >> 2) & ((1 << HASH_SHIFT) - 1))
+
struct r600_pipe_reg {
- u32 offset;
- u32 mask;
u32 value;
- struct r600_bo *bo;
+ u32 mask;
+ struct r600_block *block;
+ struct r600_bo *bo;
+ u32 id;
};
struct r600_pipe_state {
@@ -154,18 +170,6 @@ struct r600_pipe_state {
struct r600_pipe_reg regs[R600_BLOCK_MAX_REG];
};
-static inline void r600_pipe_state_add_reg(struct r600_pipe_state *state,
- u32 offset, u32 value, u32 mask,
- struct r600_bo *bo)
-{
- state->regs[state->nregs].offset = offset;
- state->regs[state->nregs].value = value;
- state->regs[state->nregs].mask = mask;
- state->regs[state->nregs].bo = bo;
- state->nregs++;
- assert(state->nregs < R600_BLOCK_MAX_REG);
-}
-
#define R600_BLOCK_STATUS_ENABLED (1 << 0)
#define R600_BLOCK_STATUS_DIRTY (1 << 1)
@@ -307,4 +311,30 @@ void evergreen_context_pipe_state_set_vs_sampler(struct r600_context *ctx, struc
struct radeon *radeon_decref(struct radeon *radeon);
+void _r600_pipe_state_add_reg(struct r600_context *ctx,
+ struct r600_pipe_state *state,
+ u32 offset, u32 value, u32 mask,
+ u32 range_id, u32 block_id,
+ struct r600_bo *bo);
+
+void r600_pipe_state_add_reg_noblock(struct r600_pipe_state *state,
+ u32 offset, u32 value, u32 mask,
+ struct r600_bo *bo);
+#define r600_pipe_state_add_reg(state, offset, value, mask, bo) _r600_pipe_state_add_reg(&rctx->ctx, state, offset, value, mask, CTX_RANGE_ID(offset), CTX_BLOCK_ID(offset), bo)
+
+static inline void r600_pipe_state_mod_reg(struct r600_pipe_state *state,
+ u32 value)
+{
+ state->regs[state->nregs].value = value;
+ state->nregs++;
+}
+
+static inline void r600_pipe_state_mod_reg_bo(struct r600_pipe_state *state,
+ u32 value, struct r600_bo *bo)
+{
+ state->regs[state->nregs].value = value;
+ state->regs[state->nregs].bo = bo;
+ state->nregs++;
+}
+
#endif
diff --git a/src/gallium/drivers/r600/r600_asm.c b/src/gallium/drivers/r600/r600_asm.c
index 033e84665f5..65e539eba35 100644
--- a/src/gallium/drivers/r600/r600_asm.c
+++ b/src/gallium/drivers/r600/r600_asm.c
@@ -94,6 +94,7 @@ static inline unsigned int r600_bc_get_num_operands(struct r600_bc *bc, struct r
}
break;
case CHIPREV_EVERGREEN:
+ case CHIPREV_CAYMAN:
switch (alu->inst) {
case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP:
return 0;
@@ -221,11 +222,16 @@ int r600_bc_init(struct r600_bc *bc, enum radeon_family family)
case CHIP_CYPRESS:
case CHIP_HEMLOCK:
case CHIP_PALM:
+ case CHIP_SUMO:
+ case CHIP_SUMO2:
case CHIP_BARTS:
case CHIP_TURKS:
case CHIP_CAICOS:
bc->chiprev = CHIPREV_EVERGREEN;
break;
+ case CHIP_CAYMAN:
+ bc->chiprev = CHIPREV_CAYMAN;
+ break;
default:
R600_ERR("unknown family %d\n", bc->family);
return -EINVAL;
@@ -334,6 +340,7 @@ static int is_alu_once_inst(struct r600_bc *bc, struct r600_bc_alu *alu)
alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETLT_PUSH_INT ||
alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETLE_PUSH_INT);
case CHIPREV_EVERGREEN:
+ case CHIPREV_CAYMAN:
default:
return !alu->is_op3 && (
alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLE ||
@@ -384,6 +391,7 @@ static int is_alu_reduction_inst(struct r600_bc *bc, struct r600_bc_alu *alu)
alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4_IEEE ||
alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX4);
case CHIPREV_EVERGREEN:
+ case CHIPREV_CAYMAN:
default:
return !alu->is_op3 && (
alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_CUBE ||
@@ -401,6 +409,7 @@ static int is_alu_cube_inst(struct r600_bc *bc, struct r600_bc_alu *alu)
return !alu->is_op3 &&
alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_CUBE;
case CHIPREV_EVERGREEN:
+ case CHIPREV_CAYMAN:
default:
return !alu->is_op3 &&
alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_CUBE;
@@ -417,6 +426,7 @@ static int is_alu_mova_inst(struct r600_bc *bc, struct r600_bc_alu *alu)
alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_FLOOR ||
alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_INT);
case CHIPREV_EVERGREEN:
+ case CHIPREV_CAYMAN:
default:
return !alu->is_op3 && (
alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_INT);
@@ -469,6 +479,7 @@ static int is_alu_trans_unit_inst(struct r600_bc *bc, struct r600_bc_alu *alu)
alu->inst == V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MUL_LIT_M2 ||
alu->inst == V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MUL_LIT_M4;
case CHIPREV_EVERGREEN:
+ case CHIPREV_CAYMAN:
default:
if (!alu->is_op3)
/* Note that FLT_TO_INT_* instructions are vector-only instructions
@@ -514,13 +525,16 @@ static int assign_alu_units(struct r600_bc *bc, struct r600_bc_alu *alu_first,
{
struct r600_bc_alu *alu;
unsigned i, chan, trans;
+ int max_slots = bc->chiprev == CHIPREV_CAYMAN ? 4 : 5;
- for (i = 0; i < 5; i++)
+ for (i = 0; i < max_slots; i++)
assignment[i] = NULL;
for (alu = alu_first; alu; alu = LIST_ENTRY(struct r600_bc_alu, alu->list.next, list)) {
chan = alu->dst.chan;
- if (is_alu_trans_unit_inst(bc, alu))
+ if (max_slots == 4)
+ trans = 0;
+ else if (is_alu_trans_unit_inst(bc, alu))
trans = 1;
else if (is_alu_vec_unit_inst(bc, alu))
trans = 0;
@@ -719,8 +733,10 @@ static int check_and_set_bank_swizzle(struct r600_bc *bc,
struct alu_bank_swizzle bs;
int bank_swizzle[5];
int i, r = 0, forced = 0;
- boolean scalar_only = true;
- for (i = 0; i < 5; i++) {
+ boolean scalar_only = bc->chiprev == CHIPREV_CAYMAN ? false : true;
+ int max_slots = bc->chiprev == CHIPREV_CAYMAN ? 4 : 5;
+
+ for (i = 0; i < max_slots; i++) {
if (slots[i] && slots[i]->bank_swizzle_force) {
slots[i]->bank_swizzle = slots[i]->bank_swizzle_force;
forced = 1;
@@ -737,6 +753,13 @@ static int check_and_set_bank_swizzle(struct r600_bc *bc,
bank_swizzle[i] = SQ_ALU_VEC_012;
bank_swizzle[4] = SQ_ALU_SCL_210;
while(bank_swizzle[4] <= SQ_ALU_SCL_221) {
+
+ if (max_slots == 4) {
+ for (i = 0; i < max_slots; i++) {
+ if (bank_swizzle[i] == SQ_ALU_VEC_210)
+ return -1;
+ }
+ }
init_bank_swizzle(&bs);
if (scalar_only == false) {
for (i = 0; i < 4; i++) {
@@ -749,11 +772,11 @@ static int check_and_set_bank_swizzle(struct r600_bc *bc,
} else
r = 0;
- if (!r && slots[4]) {
+ if (!r && slots[4] && max_slots == 5) {
r = check_scalar(bc, slots[4], &bs, bank_swizzle[4]);
}
if (!r) {
- for (i = 0; i < 5; i++) {
+ for (i = 0; i < max_slots; i++) {
if (slots[i])
slots[i]->bank_swizzle = bank_swizzle[i];
}
@@ -763,7 +786,7 @@ static int check_and_set_bank_swizzle(struct r600_bc *bc,
if (scalar_only) {
bank_swizzle[4]++;
} else {
- for (i = 0; i < 5; i++) {
+ for (i = 0; i < max_slots; i++) {
bank_swizzle[i]++;
if (bank_swizzle[i] <= SQ_ALU_VEC_210)
break;
@@ -783,12 +806,13 @@ static int replace_gpr_with_pv_ps(struct r600_bc *bc,
struct r600_bc_alu *prev[5];
int gpr[5], chan[5];
int i, j, r, src, num_src;
+ int max_slots = bc->chiprev == CHIPREV_CAYMAN ? 4 : 5;
r = assign_alu_units(bc, alu_prev, prev);
if (r)
return r;
- for (i = 0; i < 5; ++i) {
+ for (i = 0; i < max_slots; ++i) {
if(prev[i] && prev[i]->dst.write && !prev[i]->dst.rel) {
gpr[i] = prev[i]->dst.sel;
/* cube writes more than PV.X */
@@ -800,7 +824,7 @@ static int replace_gpr_with_pv_ps(struct r600_bc *bc,
gpr[i] = -1;
}
- for (i = 0; i < 5; ++i) {
+ for (i = 0; i < max_slots; ++i) {
struct r600_bc_alu *alu = slots[i];
if(!alu)
continue;
@@ -810,11 +834,13 @@ static int replace_gpr_with_pv_ps(struct r600_bc *bc,
if (!is_gpr(alu->src[src].sel) || alu->src[src].rel)
continue;
- if (alu->src[src].sel == gpr[4] &&
- alu->src[src].chan == chan[4]) {
- alu->src[src].sel = V_SQ_ALU_SRC_PS;
- alu->src[src].chan = 0;
- continue;
+ if (bc->chiprev < CHIPREV_CAYMAN) {
+ if (alu->src[src].sel == gpr[4] &&
+ alu->src[src].chan == chan[4]) {
+ alu->src[src].sel = V_SQ_ALU_SRC_PS;
+ alu->src[src].chan = 0;
+ continue;
+ }
}
for (j = 0; j < 4; ++j) {
@@ -922,12 +948,13 @@ static int merge_inst_groups(struct r600_bc *bc, struct r600_bc_alu *slots[5],
int i, j, r, src, num_src;
int num_once_inst = 0;
int have_mova = 0, have_rel = 0;
+ int max_slots = bc->chiprev == CHIPREV_CAYMAN ? 4 : 5;
r = assign_alu_units(bc, alu_prev, prev);
if (r)
return r;
- for (i = 0; i < 5; ++i) {
+ for (i = 0; i < max_slots; ++i) {
struct r600_bc_alu *alu;
/* check number of literals */
@@ -951,7 +978,7 @@ static int merge_inst_groups(struct r600_bc *bc, struct r600_bc_alu *slots[5],
result[i] = prev[i];
continue;
} else if (prev[i] && slots[i]) {
- if (result[4] == NULL && prev[4] == NULL && slots[4] == NULL) {
+ if (max_slots == 5 && result[4] == NULL && prev[4] == NULL && slots[4] == NULL) {
/* Trans unit is still free try to use it. */
if (is_alu_any_unit_inst(bc, slots[i])) {
result[i] = prev[i];
@@ -991,7 +1018,7 @@ static int merge_inst_groups(struct r600_bc *bc, struct r600_bc_alu *slots[5],
if (!is_gpr(alu->src[src].sel))
continue;
- for (j = 0; j < 5; ++j) {
+ for (j = 0; j < max_slots; ++j) {
if (!prev[j] || !prev[j]->dst.write)
continue;
@@ -1019,7 +1046,7 @@ static int merge_inst_groups(struct r600_bc *bc, struct r600_bc_alu *slots[5],
bc->cf_last->ndw -= align(prev_nliteral, 2);
/* sort instructions */
- for (i = 0; i < 5; ++i) {
+ for (i = 0; i < max_slots; ++i) {
slots[i] = result[i];
if (result[i]) {
LIST_DEL(&result[i]->list);
@@ -1032,7 +1059,7 @@ static int merge_inst_groups(struct r600_bc *bc, struct r600_bc_alu *slots[5],
LIST_ENTRY(struct r600_bc_alu, bc->cf_last->alu.prev, list)->last = 1;
/* determine new first instruction */
- for (i = 0; i < 5; ++i) {
+ for (i = 0; i < max_slots; ++i) {
if (result[i]) {
bc->cf_last->curr_bs_head = result[i];
break;
@@ -1225,6 +1252,7 @@ int r600_bc_add_alu_type(struct r600_bc *bc, const struct r600_bc_alu *alu, int
uint32_t literal[4];
unsigned nliteral;
struct r600_bc_alu *slots[5];
+ int max_slots = bc->chiprev == CHIPREV_CAYMAN ? 4 : 5;
r = assign_alu_units(bc, bc->cf_last->curr_bs_head, slots);
if (r)
return r;
@@ -1245,7 +1273,7 @@ int r600_bc_add_alu_type(struct r600_bc *bc, const struct r600_bc_alu *alu, int
if (r)
return r;
- for (i = 0, nliteral = 0; i < 5; i++) {
+ for (i = 0, nliteral = 0; i < max_slots; i++) {
if (slots[i]) {
r = r600_bc_alu_nliterals(bc, slots[i], literal, &nliteral);
if (r)
@@ -1282,6 +1310,7 @@ static unsigned r600_bc_num_tex_and_vtx_instructions(const struct r600_bc *bc)
return 16;
case CHIPREV_EVERGREEN:
+ case CHIPREV_CAYMAN:
return 64;
default:
@@ -1290,6 +1319,19 @@ static unsigned r600_bc_num_tex_and_vtx_instructions(const struct r600_bc *bc)
}
}
+static inline boolean last_inst_was_vtx_fetch(struct r600_bc *bc)
+{
+ if (bc->chiprev == CHIPREV_CAYMAN) {
+ if (bc->cf_last->inst != CM_V_SQ_CF_WORD1_SQ_CF_INST_TC)
+ return TRUE;
+ } else {
+ if (bc->cf_last->inst != V_SQ_CF_WORD1_SQ_CF_INST_VTX &&
+ bc->cf_last->inst != V_SQ_CF_WORD1_SQ_CF_INST_VTX_TC)
+ return TRUE;
+ }
+ return FALSE;
+}
+
int r600_bc_add_vtx(struct r600_bc *bc, const struct r600_bc_vtx *vtx)
{
struct r600_bc_vtx *nvtx = r600_bc_vtx();
@@ -1301,15 +1343,17 @@ int r600_bc_add_vtx(struct r600_bc *bc, const struct r600_bc_vtx *vtx)
/* cf can contains only alu or only vtx or only tex */
if (bc->cf_last == NULL ||
- (bc->cf_last->inst != V_SQ_CF_WORD1_SQ_CF_INST_VTX &&
- bc->cf_last->inst != V_SQ_CF_WORD1_SQ_CF_INST_VTX_TC) ||
- bc->force_add_cf) {
+ last_inst_was_vtx_fetch(bc) ||
+ bc->force_add_cf) {
r = r600_bc_add_cf(bc);
if (r) {
free(nvtx);
return r;
}
- bc->cf_last->inst = V_SQ_CF_WORD1_SQ_CF_INST_VTX;
+ if (bc->chiprev == CHIPREV_CAYMAN)
+ bc->cf_last->inst = CM_V_SQ_CF_WORD1_SQ_CF_INST_TC;
+ else
+ bc->cf_last->inst = V_SQ_CF_WORD1_SQ_CF_INST_VTX;
}
LIST_ADDTAIL(&nvtx->list, &bc->cf_last->vtx);
/* each fetch use 4 dwords */
@@ -1379,14 +1423,21 @@ int r600_bc_add_cfinst(struct r600_bc *bc, int inst)
return 0;
}
+int cm_bc_add_cf_end(struct r600_bc *bc)
+{
+ return r600_bc_add_cfinst(bc, CM_V_SQ_CF_WORD1_SQ_CF_INST_END);
+}
+
/* common to all 3 families */
static int r600_bc_vtx_build(struct r600_bc *bc, struct r600_bc_vtx *vtx, unsigned id)
{
- bc->bytecode[id++] = S_SQ_VTX_WORD0_BUFFER_ID(vtx->buffer_id) |
+ bc->bytecode[id] = S_SQ_VTX_WORD0_BUFFER_ID(vtx->buffer_id) |
S_SQ_VTX_WORD0_FETCH_TYPE(vtx->fetch_type) |
S_SQ_VTX_WORD0_SRC_GPR(vtx->src_gpr) |
- S_SQ_VTX_WORD0_SRC_SEL_X(vtx->src_sel_x) |
- S_SQ_VTX_WORD0_MEGA_FETCH_COUNT(vtx->mega_fetch_count);
+ S_SQ_VTX_WORD0_SRC_SEL_X(vtx->src_sel_x);
+ if (bc->chiprev < CHIPREV_CAYMAN)
+ bc->bytecode[id] |= S_SQ_VTX_WORD0_MEGA_FETCH_COUNT(vtx->mega_fetch_count);
+ id++;
bc->bytecode[id++] = S_SQ_VTX_WORD1_DST_SEL_X(vtx->dst_sel_x) |
S_SQ_VTX_WORD1_DST_SEL_Y(vtx->dst_sel_y) |
S_SQ_VTX_WORD1_DST_SEL_Z(vtx->dst_sel_z) |
@@ -1397,9 +1448,11 @@ static int r600_bc_vtx_build(struct r600_bc *bc, struct r600_bc_vtx *vtx, unsign
S_SQ_VTX_WORD1_FORMAT_COMP_ALL(vtx->format_comp_all) |
S_SQ_VTX_WORD1_SRF_MODE_ALL(vtx->srf_mode_all) |
S_SQ_VTX_WORD1_GPR_DST_GPR(vtx->dst_gpr);
- bc->bytecode[id++] = S_SQ_VTX_WORD2_OFFSET(vtx->offset) |
- S_SQ_VTX_WORD2_ENDIAN_SWAP(vtx->endian) |
- S_SQ_VTX_WORD2_MEGA_FETCH(1);
+ bc->bytecode[id] = S_SQ_VTX_WORD2_OFFSET(vtx->offset)|
+ S_SQ_VTX_WORD2_ENDIAN_SWAP(vtx->endian);
+ if (bc->chiprev < CHIPREV_CAYMAN)
+ bc->bytecode[id] |= S_SQ_VTX_WORD2_MEGA_FETCH(1);
+ id++;
bc->bytecode[id++] = 0;
return 0;
}
@@ -1601,6 +1654,7 @@ int r600_bc_build(struct r600_bc *bc)
case V_SQ_CF_WORD1_SQ_CF_INST_LOOP_BREAK:
case V_SQ_CF_WORD1_SQ_CF_INST_CALL_FS:
case V_SQ_CF_WORD1_SQ_CF_INST_RETURN:
+ case CM_V_SQ_CF_WORD1_SQ_CF_INST_END:
break;
default:
R600_ERR("unsupported CF instruction (0x%X)\n", cf->inst);
@@ -1616,7 +1670,7 @@ int r600_bc_build(struct r600_bc *bc)
return -ENOMEM;
LIST_FOR_EACH_ENTRY(cf, &bc->cf, list) {
addr = cf->addr;
- if (bc->chiprev == CHIPREV_EVERGREEN)
+ if (bc->chiprev >= CHIPREV_EVERGREEN)
r = eg_bc_cf_build(bc, cf);
else
r = r600_bc_cf_build(bc, cf);
@@ -1640,6 +1694,7 @@ int r600_bc_build(struct r600_bc *bc)
break;
case CHIPREV_R700:
case CHIPREV_EVERGREEN: /* eg alu is same encoding as r700 */
+ case CHIPREV_CAYMAN: /* eg alu is same encoding as r700 */
r = r700_bc_alu_build(bc, alu, addr);
break;
default:
@@ -1668,6 +1723,14 @@ int r600_bc_build(struct r600_bc *bc)
}
break;
case V_SQ_CF_WORD1_SQ_CF_INST_TEX:
+ if (bc->chiprev == CHIPREV_CAYMAN) {
+ LIST_FOR_EACH_ENTRY(vtx, &cf->vtx, list) {
+ r = r600_bc_vtx_build(bc, vtx, addr);
+ if (r)
+ return r;
+ addr += 4;
+ }
+ }
LIST_FOR_EACH_ENTRY(tex, &cf->tex, list) {
r = r600_bc_tex_build(bc, tex, addr);
if (r)
@@ -1688,6 +1751,7 @@ int r600_bc_build(struct r600_bc *bc)
case V_SQ_CF_WORD1_SQ_CF_INST_POP:
case V_SQ_CF_WORD1_SQ_CF_INST_CALL_FS:
case V_SQ_CF_WORD1_SQ_CF_INST_RETURN:
+ case CM_V_SQ_CF_WORD1_SQ_CF_INST_END:
break;
default:
R600_ERR("unsupported CF instruction (0x%X)\n", cf->inst);
@@ -1752,6 +1816,9 @@ void r600_bc_dump(struct r600_bc *bc)
case 2:
chip = 'E';
break;
+ case 3:
+ chip = 'C';
+ break;
case 0:
default:
chip = '6';
@@ -1818,6 +1885,7 @@ void r600_bc_dump(struct r600_bc *bc)
case V_SQ_CF_WORD1_SQ_CF_INST_LOOP_BREAK:
case V_SQ_CF_WORD1_SQ_CF_INST_CALL_FS:
case V_SQ_CF_WORD1_SQ_CF_INST_RETURN:
+ case CM_V_SQ_CF_WORD1_SQ_CF_INST_END:
fprintf(stderr, "%04d %08X CF ", id, bc->bytecode[id]);
fprintf(stderr, "ADDR:%d\n", cf->cf_addr);
id++;
@@ -1920,7 +1988,10 @@ void r600_bc_dump(struct r600_bc *bc)
fprintf(stderr, "%04d %08X ", id, bc->bytecode[id]);
fprintf(stderr, "SRC(GPR:%d ", vtx->src_gpr);
fprintf(stderr, "SEL_X:%d) ", vtx->src_sel_x);
- fprintf(stderr, "MEGA_FETCH_COUNT:%d ", vtx->mega_fetch_count);
+ if (bc->chiprev < CHIPREV_CAYMAN)
+ fprintf(stderr, "MEGA_FETCH_COUNT:%d ", vtx->mega_fetch_count);
+ else
+ fprintf(stderr, "SEL_Y:%d) ", 0);
fprintf(stderr, "DST(GPR:%d ", vtx->dst_gpr);
fprintf(stderr, "SEL_X:%d ", vtx->dst_sel_x);
fprintf(stderr, "SEL_Y:%d ", vtx->dst_sel_y);
@@ -2212,9 +2283,9 @@ int r600_vertex_elements_build_fetch_shader(struct r600_pipe_context *rctx, stru
r600_bc_clear(&bc);
if (rctx->family >= CHIP_CEDAR)
- evergreen_fetch_shader(ve);
+ evergreen_fetch_shader(&rctx->context, ve);
else
- r600_fetch_shader(ve);
+ r600_fetch_shader(&rctx->context, ve);
return 0;
}
diff --git a/src/gallium/drivers/r600/r600_asm.h b/src/gallium/drivers/r600/r600_asm.h
index 26d337fe125..540f45bbd06 100644
--- a/src/gallium/drivers/r600/r600_asm.h
+++ b/src/gallium/drivers/r600/r600_asm.h
@@ -205,6 +205,8 @@ int r600_bc_add_alu_type(struct r600_bc *bc, const struct r600_bc_alu *alu, int
void r600_bc_special_constants(u32 value, unsigned *sel, unsigned *neg);
void r600_bc_dump(struct r600_bc *bc);
+int cm_bc_add_cf_end(struct r600_bc *bc);
+
int r600_vertex_elements_build_fetch_shader(struct r600_pipe_context *rctx, struct r600_vertex_element *ve);
/* r700_asm.c */
diff --git a/src/gallium/drivers/r600/r600_opcodes.h b/src/gallium/drivers/r600/r600_opcodes.h
index a85d0bbf1e1..184f32c9960 100644
--- a/src/gallium/drivers/r600/r600_opcodes.h
+++ b/src/gallium/drivers/r600/r600_opcodes.h
@@ -171,9 +171,12 @@
#define V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT 0x00000027
#define V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT_DONE 0x00000028
+/* cayman doesn't have VTX */
#define EG_V_SQ_CF_WORD1_SQ_CF_INST_NOP 0x00000000
#define EG_V_SQ_CF_WORD1_SQ_CF_INST_TEX 0x00000001
+#define CM_V_SQ_CF_WORD1_SQ_CF_INST_TC 0x00000001
#define EG_V_SQ_CF_WORD1_SQ_CF_INST_VTX 0x00000002
+#define CM_V_SQ_CF_WORD1_SQ_CF_INST_RSVD_2 0x00000002
#define EG_V_SQ_CF_WORD1_SQ_CF_INST_GDS 0x00000003
#define EG_V_SQ_CF_WORD1_SQ_CF_INST_LOOP_START 0x00000004
#define EG_V_SQ_CF_WORD1_SQ_CF_INST_LOOP_END 0x00000005
@@ -200,19 +203,36 @@
#define EG_V_SQ_CF_WORD1_SQ_CF_INST_WAIT_ACK 0x0000001a
#define EG_V_SQ_CF_WORD1_SQ_CF_INST_TC_ACK 0x0000001b
#define EG_V_SQ_CF_WORD1_SQ_CF_INST_VC_ACK 0x0000001c
+#define CM_V_SQ_CF_WORD1_SQ_CF_INST_RSVD_28 0x0000001c
#define EG_V_SQ_CF_WORD1_SQ_CF_INST_JUMPTABLE 0x0000001d
#define EG_V_SQ_CF_WORD1_SQ_CF_INST_GLOBAL_WAVE_SYNC 0x0000001e
#define EG_V_SQ_CF_WORD1_SQ_CF_INST_HALT 0x0000001f
+/* cayman extras */
+#define CM_V_SQ_CF_WORD1_SQ_CF_INST_END 0x00000020
+#define CM_V_SQ_CF_WORD1_SQ_CF_INST_LDS_DEALLOC 0x00000021
+#define CM_V_SQ_CF_WORD1_SQ_CF_INST_PUSH_WQM 0x00000022
+#define CM_V_SQ_CF_WORD1_SQ_CF_INST_POP_WQM 0x00000023
+#define CM_V_SQ_CF_WORD1_SQ_CF_INST_ELSE_WQM 0x00000024
+#define CM_V_SQ_CF_WORD1_SQ_CF_INST_JUMP_ANY 0x00000025
+#define CM_V_SQ_CF_WORD1_SQ_CF_INST_REACTIVATE 0x00000026
+#define CM_V_SQ_CF_WORD1_SQ_CF_INST_REACTIVATE_WQM 0x00000027
+#define CM_V_SQ_CF_WORD1_SQ_CF_INST_INTERRUPT 0x00000028
+#define CM_V_SQ_CF_WORD1_SQ_CF_INST_INTERRUPT_AND_SLEEP 0x00000029
+#define CM_V_SQ_CF_WORD1_SQ_CF_INST_SET_PRIORITY 0x00000030
+
#define EG_V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU 0x00000008
#define EG_V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_PUSH_BEFORE 0x00000009
#define EG_V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP_AFTER 0x0000000A
#define EG_V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP2_AFTER 0x0000000B
#define EG_V_SQ_CF_ALU_WORD1_SQ_CF_INST_EXTENDED 0x0000000C
-#define EG_V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_CONTINUE 0x0000000D
-#define EG_V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_BREAK 0x0000000E
+#define EG_V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_CONTINUE 0x0000000D /* different on CAYMAN */
+#define EG_V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_BREAK 0x0000000E /* different on CAYMAN */
#define EG_V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_ELSE_AFTER 0x0000000F
+#define CM_V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_REACTIVATE_BEFORE 0x0000000D
+#define CM_V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_VALID_PIXEL_MODE 0x0000000E
+
#define EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD 0x00000000
#define EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL 0x00000001
#define EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL_IEEE 0x00000002
@@ -299,11 +319,11 @@
#define EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADDC_UINT 0x00000052
#define EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SUBB_UINT 0x00000053
#define EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_GROUP_BARRIER 0x00000054
-#define EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_GROUP_SEQ_BEGIN 0x00000055
-#define EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_GROUP_SEQ_END 0x00000056
+#define EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_GROUP_SEQ_BEGIN 0x00000055 /* not on CAYMAN */
+#define EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_GROUP_SEQ_END 0x00000056 /* not on CAYMAN */
#define EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SET_MODE 0x00000057
-#define EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SET_CF_IDX0 0x00000058
-#define EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SET_CF_IDX1 0x00000059
+#define EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SET_CF_IDX0 0x00000058 /* not on CAYMAN */
+#define EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SET_CF_IDX1 0x00000059 /* not on CAYMAN */
#define EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SET_LDS_SIZE 0x0000005A
#define EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE 0x00000081
@@ -322,8 +342,8 @@
#define EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULHI_INT 0x00000090
#define EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULLO_UINT 0x00000091
#define EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULHI_UINT 0x00000092
-#define EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_INT 0x00000093
-#define EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_UINT 0x00000094
+#define EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_INT 0x00000093 /* not on CAYMAN */
+#define EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_UINT 0x00000094 /* not on CAYMAN */
#define EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_64 0x00000095
#define EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_CLAMPED_64 0x00000096
#define EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_64 0x00000097
@@ -393,9 +413,10 @@
#define CHIPREV_R600 0
#define CHIPREV_R700 1
#define CHIPREV_EVERGREEN 2
+#define CHIPREV_CAYMAN 3
-#define BC_INST(bc, x) ((bc)->chiprev == CHIPREV_EVERGREEN ? EG_##x : x)
+#define BC_INST(bc, x) ((bc)->chiprev >= CHIPREV_EVERGREEN ? EG_##x : x)
-#define CTX_INST(x) (ctx->bc->chiprev == CHIPREV_EVERGREEN ? EG_##x : x)
+#define CTX_INST(x) (ctx->bc->chiprev >= CHIPREV_EVERGREEN ? EG_##x : x)
#endif
diff --git a/src/gallium/drivers/r600/r600_pipe.c b/src/gallium/drivers/r600/r600_pipe.c
index ec13e48e14e..70e3619de4b 100644
--- a/src/gallium/drivers/r600/r600_pipe.c
+++ b/src/gallium/drivers/r600/r600_pipe.c
@@ -260,9 +260,12 @@ static struct pipe_context *r600_create_context(struct pipe_screen *screen, void
case CHIP_CYPRESS:
case CHIP_HEMLOCK:
case CHIP_PALM:
+ case CHIP_SUMO:
+ case CHIP_SUMO2:
case CHIP_BARTS:
case CHIP_TURKS:
case CHIP_CAICOS:
+ case CHIP_CAYMAN:
evergreen_init_state_functions(rctx);
if (evergreen_context_init(&rctx->ctx, rctx->radeon)) {
r600_destroy_context(&rctx->context);
@@ -334,9 +337,12 @@ static const char *r600_get_family_name(enum radeon_family family)
case CHIP_CYPRESS: return "AMD CYPRESS";
case CHIP_HEMLOCK: return "AMD HEMLOCK";
case CHIP_PALM: return "AMD PALM";
+ case CHIP_SUMO: return "AMD SUMO";
+ case CHIP_SUMO2: return "AMD SUMO2";
case CHIP_BARTS: return "AMD BARTS";
case CHIP_TURKS: return "AMD TURKS";
case CHIP_CAICOS: return "AMD CAICOS";
+ case CHIP_CAYMAN: return "AMD CAYMAN";
default: return "AMD unknown";
}
}
diff --git a/src/gallium/drivers/r600/r600_pipe.h b/src/gallium/drivers/r600/r600_pipe.h
index 0e4cfeb5b80..8002d943abd 100644
--- a/src/gallium/drivers/r600/r600_pipe.h
+++ b/src/gallium/drivers/r600/r600_pipe.h
@@ -65,6 +65,7 @@ enum r600_pipe_state_id {
R600_PIPE_STATE_RESOURCE,
R600_PIPE_STATE_POLYGON_OFFSET,
R600_PIPE_STATE_FETCH_SHADER,
+ R600_PIPE_STATE_SPI,
R600_PIPE_NSTATES
};
@@ -188,6 +189,8 @@ struct r600_pipe_context {
struct r600_pipe_state ps_const_buffer;
struct r600_pipe_state ps_const_buffer_resource[R600_MAX_CONST_BUFFERS];
struct r600_pipe_rasterizer *rasterizer;
+ struct r600_pipe_state vgt;
+ struct r600_pipe_state spi;
/* shader information */
unsigned sprite_coord_enable;
bool flatshade;
@@ -217,11 +220,14 @@ void evergreen_init_state_functions(struct r600_pipe_context *rctx);
void evergreen_init_config(struct r600_pipe_context *rctx);
void evergreen_pipe_shader_ps(struct pipe_context *ctx, struct r600_pipe_shader *shader);
void evergreen_pipe_shader_vs(struct pipe_context *ctx, struct r600_pipe_shader *shader);
-void evergreen_fetch_shader(struct r600_vertex_element *ve);
+void evergreen_fetch_shader(struct pipe_context *ctx, struct r600_vertex_element *ve);
void *evergreen_create_db_flush_dsa(struct r600_pipe_context *rctx);
void evergreen_polygon_offset_update(struct r600_pipe_context *rctx);
-void evergreen_pipe_set_buffer_resource(struct r600_pipe_context *rctx,
- struct r600_pipe_state *rstate,
+void evergreen_pipe_init_buffer_resource(struct r600_pipe_context *rctx,
+ struct r600_pipe_state *rstate,
+ struct r600_resource *rbuffer,
+ unsigned offset, unsigned stride);
+void evergreen_pipe_mod_buffer_resource(struct r600_pipe_state *rstate,
struct r600_resource *rbuffer,
unsigned offset, unsigned stride);
@@ -258,11 +264,14 @@ void r600_init_state_functions(struct r600_pipe_context *rctx);
void r600_init_config(struct r600_pipe_context *rctx);
void r600_pipe_shader_ps(struct pipe_context *ctx, struct r600_pipe_shader *shader);
void r600_pipe_shader_vs(struct pipe_context *ctx, struct r600_pipe_shader *shader);
-void r600_fetch_shader(struct r600_vertex_element *ve);
+void r600_fetch_shader(struct pipe_context *ctx, struct r600_vertex_element *ve);
void *r600_create_db_flush_dsa(struct r600_pipe_context *rctx);
void r600_polygon_offset_update(struct r600_pipe_context *rctx);
-void r600_pipe_set_buffer_resource(struct r600_pipe_context *rctx,
- struct r600_pipe_state *rstate,
+void r600_pipe_init_buffer_resource(struct r600_pipe_context *rctx,
+ struct r600_pipe_state *rstate,
+ struct r600_resource *rbuffer,
+ unsigned offset, unsigned stride);
+void r600_pipe_mod_buffer_resource(struct r600_pipe_state *rstate,
struct r600_resource *rbuffer,
unsigned offset, unsigned stride);
diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c
index 845d41ace02..39e6d85d7b4 100644
--- a/src/gallium/drivers/r600/r600_shader.c
+++ b/src/gallium/drivers/r600/r600_shader.c
@@ -36,11 +36,37 @@
#include <errno.h>
#include <byteswap.h>
+/* CAYMAN notes
+Why CAYMAN got loops for lots of instructions is explained here.
+
+-These 8xx t-slot only ops are implemented in all vector slots.
+MUL_LIT, FLT_TO_UINT, INT_TO_FLT, UINT_TO_FLT
+These 8xx t-slot only opcodes become vector ops, with all four
+slots expecting the arguments on sources a and b. Result is
+broadcast to all channels.
+MULLO_INT, MULHI_INT, MULLO_UINT, MULHI_UINT
+These 8xx t-slot only opcodes become vector ops in the z, y, and
+x slots.
+EXP_IEEE, LOG_IEEE/CLAMPED, RECIP_IEEE/CLAMPED/FF/INT/UINT/_64/CLAMPED_64
+RECIPSQRT_IEEE/CLAMPED/FF/_64/CLAMPED_64
+SQRT_IEEE/_64
+SIN/COS
+The w slot may have an independent co-issued operation, or if the
+result is required to be in the w slot, the opcode above may be
+issued in the w slot as well.
+The compiler must issue the source argument to slots z, y, and x
+*/
+
+
int r600_find_vs_semantic_index(struct r600_shader *vs,
struct r600_shader *ps, int id)
{
struct r600_shader_io *input = &ps->input[id];
+ /* position/face doesn't get/need a semantic index */
+ if (input->name == TGSI_SEMANTIC_POSITION || input->name == TGSI_SEMANTIC_FACE)
+ return 0;
+
for (int i = 0; i < vs->noutput; i++) {
if (input->name == vs->output[i].name &&
input->sid == vs->output[i].sid) {
@@ -181,7 +207,7 @@ struct r600_shader_tgsi_instruction {
int (*process)(struct r600_shader_ctx *ctx);
};
-static struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[], eg_shader_tgsi_instruction[];
+static struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[], eg_shader_tgsi_instruction[], cm_shader_tgsi_instruction[];
static int tgsi_helper_tempx_replicate(struct r600_shader_ctx *ctx);
static int tgsi_is_supported(struct r600_shader_ctx *ctx)
@@ -292,7 +318,7 @@ static int tgsi_declaration(struct r600_shader_ctx *ctx)
ctx->shader->input[i].interpolate = d->Declaration.Interpolate;
ctx->shader->input[i].centroid = d->Declaration.Centroid;
ctx->shader->input[i].gpr = ctx->file_offset[TGSI_FILE_INPUT] + i;
- if (ctx->type == TGSI_PROCESSOR_FRAGMENT && ctx->bc->chiprev == CHIPREV_EVERGREEN) {
+ if (ctx->type == TGSI_PROCESSOR_FRAGMENT && ctx->bc->chiprev >= CHIPREV_EVERGREEN) {
/* turn input into interpolate on EG */
if (ctx->shader->input[i].name != TGSI_SEMANTIC_POSITION) {
if (ctx->shader->input[i].interpolate > 0) {
@@ -619,13 +645,13 @@ static int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_sh
}
if (ctx.type == TGSI_PROCESSOR_VERTEX) {
ctx.file_offset[TGSI_FILE_INPUT] = 1;
- if (ctx.bc->chiprev == CHIPREV_EVERGREEN) {
+ if (ctx.bc->chiprev >= CHIPREV_EVERGREEN) {
r600_bc_add_cfinst(ctx.bc, EG_V_SQ_CF_WORD1_SQ_CF_INST_CALL_FS);
} else {
r600_bc_add_cfinst(ctx.bc, V_SQ_CF_WORD1_SQ_CF_INST_CALL_FS);
}
}
- if (ctx.type == TGSI_PROCESSOR_FRAGMENT && ctx.bc->chiprev == CHIPREV_EVERGREEN) {
+ if (ctx.type == TGSI_PROCESSOR_FRAGMENT && ctx.bc->chiprev >= CHIPREV_EVERGREEN) {
ctx.file_offset[TGSI_FILE_INPUT] = evergreen_gpr_count(&ctx);
}
ctx.file_offset[TGSI_FILE_OUTPUT] = ctx.file_offset[TGSI_FILE_INPUT] +
@@ -679,7 +705,9 @@ static int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_sh
goto out_err;
if ((r = tgsi_split_literal_constant(&ctx)))
goto out_err;
- if (ctx.bc->chiprev == CHIPREV_EVERGREEN)
+ if (ctx.bc->chiprev == CHIPREV_CAYMAN)
+ ctx.inst_info = &cm_shader_tgsi_instruction[opcode];
+ else if (ctx.bc->chiprev >= CHIPREV_EVERGREEN)
ctx.inst_info = &eg_shader_tgsi_instruction[opcode];
else
ctx.inst_info = &r600_shader_tgsi_instruction[opcode];
@@ -800,8 +828,10 @@ static int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_sh
}
/* set export done on last export of each type */
for (i = noutput - 1, output_done = 0; i >= 0; i--) {
- if (i == (noutput - 1)) {
- output[i].end_of_program = 1;
+ if (ctx.bc->chiprev < CHIPREV_CAYMAN) {
+ if (i == (noutput - 1)) {
+ output[i].end_of_program = 1;
+ }
}
if (!(output_done & (1 << output[i].type))) {
output_done |= (1 << output[i].type);
@@ -814,6 +844,10 @@ static int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_sh
if (r)
goto out_err;
}
+ /* add program end */
+ if (ctx.bc->chiprev == CHIPREV_CAYMAN)
+ cm_bc_add_cf_end(ctx.bc);
+
free(ctx.literals);
tgsi_parse_free(&ctx.parse);
return 0;
@@ -933,6 +967,31 @@ static int tgsi_op2_swap(struct r600_shader_ctx *ctx)
return tgsi_op2_s(ctx, 1);
}
+static int cayman_emit_float_instr(struct r600_shader_ctx *ctx)
+{
+ struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
+ int i, j, r;
+ struct r600_bc_alu alu;
+ int last_slot = (inst->Dst[0].Register.WriteMask & 0x8) ? 4 : 3;
+
+ for (i = 0 ; i < last_slot; i++) {
+ memset(&alu, 0, sizeof(struct r600_bc_alu));
+ alu.inst = ctx->inst_info->r600_opcode;
+ for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
+ r600_bc_src(&alu.src[j], &ctx->src[j], 0);
+ }
+ tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
+ alu.dst.write = (inst->Dst[0].Register.WriteMask >> i) & 1;
+
+ if (i == last_slot - 1)
+ alu.last = 1;
+ r = r600_bc_add_alu(ctx->bc, &alu);
+ if (r)
+ return r;
+ }
+ return 0;
+}
+
/*
* r600 - trunc to -PI..PI range
* r700 - normalize by dividing by 2PI
@@ -1013,6 +1072,37 @@ static int tgsi_setup_trig(struct r600_shader_ctx *ctx)
return 0;
}
+static int cayman_trig(struct r600_shader_ctx *ctx)
+{
+ struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
+ struct r600_bc_alu alu;
+ int last_slot = (inst->Dst[0].Register.WriteMask & 0x8) ? 4 : 3;
+ int i, r;
+
+ r = tgsi_setup_trig(ctx);
+ if (r)
+ return r;
+
+
+ for (i = 0; i < last_slot; i++) {
+ memset(&alu, 0, sizeof(struct r600_bc_alu));
+ alu.inst = ctx->inst_info->r600_opcode;
+ alu.dst.chan = i;
+
+ tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
+ alu.dst.write = (inst->Dst[0].Register.WriteMask >> i) & 1;
+
+ alu.src[0].sel = ctx->temp_reg;
+ alu.src[0].chan = 0;
+ if (i == last_slot - 1)
+ alu.last = 1;
+ r = r600_bc_add_alu(ctx->bc, &alu);
+ if (r)
+ return r;
+ }
+ return 0;
+}
+
static int tgsi_trig(struct r600_shader_ctx *ctx)
{
struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
@@ -1060,7 +1150,7 @@ static int tgsi_scs(struct r600_shader_ctx *ctx)
{
struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
struct r600_bc_alu alu;
- int r;
+ int i, r;
/* We'll only need the trig stuff if we are going to write to the
* X or Y components of the destination vector.
@@ -1073,30 +1163,69 @@ static int tgsi_scs(struct r600_shader_ctx *ctx)
/* dst.x = COS */
if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) {
- memset(&alu, 0, sizeof(struct r600_bc_alu));
- alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS);
- tgsi_dst(ctx, &inst->Dst[0], 0, &alu.dst);
+ if (ctx->bc->chiprev == CHIPREV_CAYMAN) {
+ for (i = 0 ; i < 3; i++) {
+ memset(&alu, 0, sizeof(struct r600_bc_alu));
+ alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS);
+ tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
+
+ if (i == 0)
+ alu.dst.write = 1;
+ else
+ alu.dst.write = 0;
+ alu.src[0].sel = ctx->temp_reg;
+ alu.src[0].chan = 0;
+ if (i == 2)
+ alu.last = 1;
+ r = r600_bc_add_alu(ctx->bc, &alu);
+ if (r)
+ return r;
+ }
+ } else {
+ memset(&alu, 0, sizeof(struct r600_bc_alu));
+ alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS);
+ tgsi_dst(ctx, &inst->Dst[0], 0, &alu.dst);
- alu.src[0].sel = ctx->temp_reg;
- alu.src[0].chan = 0;
- alu.last = 1;
- r = r600_bc_add_alu(ctx->bc, &alu);
- if (r)
- return r;
+ alu.src[0].sel = ctx->temp_reg;
+ alu.src[0].chan = 0;
+ alu.last = 1;
+ r = r600_bc_add_alu(ctx->bc, &alu);
+ if (r)
+ return r;
+ }
}
/* dst.y = SIN */
if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) {
- memset(&alu, 0, sizeof(struct r600_bc_alu));
- alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN);
- tgsi_dst(ctx, &inst->Dst[0], 1, &alu.dst);
+ if (ctx->bc->chiprev == CHIPREV_CAYMAN) {
+ for (i = 0 ; i < 3; i++) {
+ memset(&alu, 0, sizeof(struct r600_bc_alu));
+ alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN);
+ tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
+ if (i == 1)
+ alu.dst.write = 1;
+ else
+ alu.dst.write = 0;
+ alu.src[0].sel = ctx->temp_reg;
+ alu.src[0].chan = 0;
+ if (i == 2)
+ alu.last = 1;
+ r = r600_bc_add_alu(ctx->bc, &alu);
+ if (r)
+ return r;
+ }
+ } else {
+ memset(&alu, 0, sizeof(struct r600_bc_alu));
+ alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN);
+ tgsi_dst(ctx, &inst->Dst[0], 1, &alu.dst);
- alu.src[0].sel = ctx->temp_reg;
- alu.src[0].chan = 0;
- alu.last = 1;
- r = r600_bc_add_alu(ctx->bc, &alu);
- if (r)
- return r;
+ alu.src[0].sel = ctx->temp_reg;
+ alu.src[0].chan = 0;
+ alu.last = 1;
+ r = r600_bc_add_alu(ctx->bc, &alu);
+ if (r)
+ return r;
+ }
}
/* dst.z = 0.0; */
@@ -1216,16 +1345,36 @@ static int tgsi_lit(struct r600_shader_ctx *ctx)
{
int chan;
int sel;
+ int i;
- /* dst.z = log(src.y) */
- memset(&alu, 0, sizeof(struct r600_bc_alu));
- alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_CLAMPED);
- r600_bc_src(&alu.src[0], &ctx->src[0], 1);
- tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst);
- alu.last = 1;
- r = r600_bc_add_alu(ctx->bc, &alu);
- if (r)
- return r;
+ if (ctx->bc->chiprev == CHIPREV_CAYMAN) {
+ for (i = 0; i < 3; i++) {
+ /* dst.z = log(src.y) */
+ memset(&alu, 0, sizeof(struct r600_bc_alu));
+ alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_CLAMPED);
+ r600_bc_src(&alu.src[0], &ctx->src[0], 1);
+ tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
+ if (i == 2) {
+ alu.dst.write = 1;
+ alu.last = 1;
+ } else
+ alu.dst.write = 0;
+
+ r = r600_bc_add_alu(ctx->bc, &alu);
+ if (r)
+ return r;
+ }
+ } else {
+ /* dst.z = log(src.y) */
+ memset(&alu, 0, sizeof(struct r600_bc_alu));
+ alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_CLAMPED);
+ r600_bc_src(&alu.src[0], &ctx->src[0], 1);
+ tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst);
+ alu.last = 1;
+ r = r600_bc_add_alu(ctx->bc, &alu);
+ if (r)
+ return r;
+ }
chan = alu.dst.chan;
sel = alu.dst.sel;
@@ -1247,16 +1396,35 @@ static int tgsi_lit(struct r600_shader_ctx *ctx)
if (r)
return r;
- /* dst.z = exp(tmp.x) */
- memset(&alu, 0, sizeof(struct r600_bc_alu));
- alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
- alu.src[0].sel = ctx->temp_reg;
- alu.src[0].chan = 0;
- tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst);
- alu.last = 1;
- r = r600_bc_add_alu(ctx->bc, &alu);
- if (r)
- return r;
+ if (ctx->bc->chiprev == CHIPREV_CAYMAN) {
+ for (i = 0; i < 3; i++) {
+ /* dst.z = exp(tmp.x) */
+ memset(&alu, 0, sizeof(struct r600_bc_alu));
+ alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
+ alu.src[0].sel = ctx->temp_reg;
+ alu.src[0].chan = 0;
+ tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
+ if (i == 2) {
+ alu.dst.write = 1;
+ alu.last = 1;
+ } else
+ alu.dst.write = 0;
+ r = r600_bc_add_alu(ctx->bc, &alu);
+ if (r)
+ return r;
+ }
+ } else {
+ /* dst.z = exp(tmp.x) */
+ memset(&alu, 0, sizeof(struct r600_bc_alu));
+ alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
+ alu.src[0].sel = ctx->temp_reg;
+ alu.src[0].chan = 0;
+ tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst);
+ alu.last = 1;
+ r = r600_bc_add_alu(ctx->bc, &alu);
+ if (r)
+ return r;
+ }
}
return 0;
}
@@ -1332,6 +1500,56 @@ static int tgsi_trans_srcx_replicate(struct r600_shader_ctx *ctx)
return tgsi_helper_tempx_replicate(ctx);
}
+static int cayman_pow(struct r600_shader_ctx *ctx)
+{
+ struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
+ int i, r;
+ struct r600_bc_alu alu;
+ int last_slot = (inst->Dst[0].Register.WriteMask & 0x8) ? 4 : 3;
+
+ for (i = 0; i < 3; i++) {
+ memset(&alu, 0, sizeof(struct r600_bc_alu));
+ alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE);
+ r600_bc_src(&alu.src[0], &ctx->src[0], 0);
+ alu.dst.sel = ctx->temp_reg;
+ alu.dst.chan = i;
+ alu.dst.write = 1;
+ if (i == 2)
+ alu.last = 1;
+ r = r600_bc_add_alu(ctx->bc, &alu);
+ if (r)
+ return r;
+ }
+
+ /* b * LOG2(a) */
+ memset(&alu, 0, sizeof(struct r600_bc_alu));
+ alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
+ r600_bc_src(&alu.src[0], &ctx->src[1], 0);
+ alu.src[1].sel = ctx->temp_reg;
+ alu.dst.sel = ctx->temp_reg;
+ alu.dst.write = 1;
+ alu.last = 1;
+ r = r600_bc_add_alu(ctx->bc, &alu);
+ if (r)
+ return r;
+
+ for (i = 0; i < last_slot; i++) {
+ /* POW(a,b) = EXP2(b * LOG2(a))*/
+ memset(&alu, 0, sizeof(struct r600_bc_alu));
+ alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
+ alu.src[0].sel = ctx->temp_reg;
+
+ tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
+ alu.dst.write = (inst->Dst[0].Register.WriteMask >> i) & 1;
+ if (i == last_slot - 1)
+ alu.last = 1;
+ r = r600_bc_add_alu(ctx->bc, &alu);
+ if (r)
+ return r;
+ }
+ return 0;
+}
+
static int tgsi_pow(struct r600_shader_ctx *ctx)
{
struct r600_bc_alu alu;
@@ -1550,24 +1768,46 @@ static int tgsi_tex(struct r600_shader_ctx *ctx)
src_gpr = ctx->file_offset[inst->Src[0].Register.File] + inst->Src[0].Register.Index;
if (inst->Instruction.Opcode == TGSI_OPCODE_TXP) {
+ int out_chan;
/* Add perspective divide */
- memset(&alu, 0, sizeof(struct r600_bc_alu));
- alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE);
- r600_bc_src(&alu.src[0], &ctx->src[0], 3);
+ if (ctx->bc->chiprev == CHIPREV_CAYMAN) {
+ out_chan = 2;
+ for (i = 0; i < 3; i++) {
+ memset(&alu, 0, sizeof(struct r600_bc_alu));
+ alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE);
+ r600_bc_src(&alu.src[0], &ctx->src[0], 3);
- alu.dst.sel = ctx->temp_reg;
- alu.dst.chan = 3;
- alu.last = 1;
- alu.dst.write = 1;
- r = r600_bc_add_alu(ctx->bc, &alu);
- if (r)
- return r;
+ alu.dst.sel = ctx->temp_reg;
+ alu.dst.chan = i;
+ if (i == 2)
+ alu.last = 1;
+ if (out_chan == i)
+ alu.dst.write = 1;
+ r = r600_bc_add_alu(ctx->bc, &alu);
+ if (r)
+ return r;
+ }
+
+ } else {
+ out_chan = 3;
+ memset(&alu, 0, sizeof(struct r600_bc_alu));
+ alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE);
+ r600_bc_src(&alu.src[0], &ctx->src[0], 3);
+
+ alu.dst.sel = ctx->temp_reg;
+ alu.dst.chan = out_chan;
+ alu.last = 1;
+ alu.dst.write = 1;
+ r = r600_bc_add_alu(ctx->bc, &alu);
+ if (r)
+ return r;
+ }
for (i = 0; i < 3; i++) {
memset(&alu, 0, sizeof(struct r600_bc_alu));
alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
alu.src[0].sel = ctx->temp_reg;
- alu.src[0].chan = 3;
+ alu.src[0].chan = out_chan;
r600_bc_src(&alu.src[1], &ctx->src[0], i);
alu.dst.sel = ctx->temp_reg;
alu.dst.chan = i;
@@ -1612,18 +1852,37 @@ static int tgsi_tex(struct r600_shader_ctx *ctx)
}
/* tmp1.z = RCP_e(|tmp1.z|) */
- memset(&alu, 0, sizeof(struct r600_bc_alu));
- alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE);
- alu.src[0].sel = ctx->temp_reg;
- alu.src[0].chan = 2;
- alu.src[0].abs = 1;
- alu.dst.sel = ctx->temp_reg;
- alu.dst.chan = 2;
- alu.dst.write = 1;
- alu.last = 1;
- r = r600_bc_add_alu(ctx->bc, &alu);
- if (r)
- return r;
+ if (ctx->bc->chiprev == CHIPREV_CAYMAN) {
+ for (i = 0; i < 3; i++) {
+ memset(&alu, 0, sizeof(struct r600_bc_alu));
+ alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE);
+ alu.src[0].sel = ctx->temp_reg;
+ alu.src[0].chan = 2;
+ alu.src[0].abs = 1;
+ alu.dst.sel = ctx->temp_reg;
+ alu.dst.chan = i;
+ if (i == 2)
+ alu.dst.write = 1;
+ if (i == 2)
+ alu.last = 1;
+ r = r600_bc_add_alu(ctx->bc, &alu);
+ if (r)
+ return r;
+ }
+ } else {
+ memset(&alu, 0, sizeof(struct r600_bc_alu));
+ alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE);
+ alu.src[0].sel = ctx->temp_reg;
+ alu.src[0].chan = 2;
+ alu.src[0].abs = 1;
+ alu.dst.sel = ctx->temp_reg;
+ alu.dst.chan = 2;
+ alu.dst.write = 1;
+ alu.last = 1;
+ r = r600_bc_add_alu(ctx->bc, &alu);
+ if (r)
+ return r;
+ }
/* MULADD R0.x, R0.x, PS1, (0x3FC00000, 1.5f).x
* MULADD R0.y, R0.y, PS1, (0x3FC00000, 1.5f).x
@@ -1958,6 +2217,7 @@ static int tgsi_exp(struct r600_shader_ctx *ctx)
struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
struct r600_bc_alu alu;
int r;
+ int i;
/* result.x = 2^floor(src); */
if (inst->Dst[0].Register.WriteMask & 1) {
@@ -1974,17 +2234,35 @@ static int tgsi_exp(struct r600_shader_ctx *ctx)
if (r)
return r;
- alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
- alu.src[0].sel = ctx->temp_reg;
- alu.src[0].chan = 0;
+ if (ctx->bc->chiprev == CHIPREV_CAYMAN) {
+ for (i = 0; i < 3; i++) {
+ alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
+ alu.src[0].sel = ctx->temp_reg;
+ alu.src[0].chan = 0;
- alu.dst.sel = ctx->temp_reg;
- alu.dst.chan = 0;
- alu.dst.write = 1;
- alu.last = 1;
- r = r600_bc_add_alu(ctx->bc, &alu);
- if (r)
- return r;
+ alu.dst.sel = ctx->temp_reg;
+ alu.dst.chan = i;
+ if (i == 0)
+ alu.dst.write = 1;
+ if (i == 2)
+ alu.last = 1;
+ r = r600_bc_add_alu(ctx->bc, &alu);
+ if (r)
+ return r;
+ }
+ } else {
+ alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
+ alu.src[0].sel = ctx->temp_reg;
+ alu.src[0].chan = 0;
+
+ alu.dst.sel = ctx->temp_reg;
+ alu.dst.chan = 0;
+ alu.dst.write = 1;
+ alu.last = 1;
+ r = r600_bc_add_alu(ctx->bc, &alu);
+ if (r)
+ return r;
+ }
}
/* result.y = tmp - floor(tmp); */
@@ -2012,19 +2290,38 @@ static int tgsi_exp(struct r600_shader_ctx *ctx)
/* result.z = RoughApprox2ToX(tmp);*/
if ((inst->Dst[0].Register.WriteMask >> 2) & 0x1) {
- memset(&alu, 0, sizeof(struct r600_bc_alu));
- alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
- r600_bc_src(&alu.src[0], &ctx->src[0], 0);
+ if (ctx->bc->chiprev == CHIPREV_CAYMAN) {
+ for (i = 0; i < 3; i++) {
+ memset(&alu, 0, sizeof(struct r600_bc_alu));
+ alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
+ r600_bc_src(&alu.src[0], &ctx->src[0], 0);
- alu.dst.sel = ctx->temp_reg;
- alu.dst.write = 1;
- alu.dst.chan = 2;
+ alu.dst.sel = ctx->temp_reg;
+ alu.dst.chan = i;
+ if (i == 2) {
+ alu.dst.write = 1;
+ alu.last = 1;
+ }
- alu.last = 1;
+ r = r600_bc_add_alu(ctx->bc, &alu);
+ if (r)
+ return r;
+ }
+ } else {
+ memset(&alu, 0, sizeof(struct r600_bc_alu));
+ alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
+ r600_bc_src(&alu.src[0], &ctx->src[0], 0);
- r = r600_bc_add_alu(ctx->bc, &alu);
- if (r)
- return r;
+ alu.dst.sel = ctx->temp_reg;
+ alu.dst.write = 1;
+ alu.dst.chan = 2;
+
+ alu.last = 1;
+
+ r = r600_bc_add_alu(ctx->bc, &alu);
+ if (r)
+ return r;
+ }
}
/* result.w = 1.0;*/
@@ -2051,21 +2348,42 @@ static int tgsi_log(struct r600_shader_ctx *ctx)
struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
struct r600_bc_alu alu;
int r;
+ int i;
/* result.x = floor(log2(src)); */
if (inst->Dst[0].Register.WriteMask & 1) {
- memset(&alu, 0, sizeof(struct r600_bc_alu));
+ if (ctx->bc->chiprev == CHIPREV_CAYMAN) {
+ for (i = 0; i < 3; i++) {
+ memset(&alu, 0, sizeof(struct r600_bc_alu));
- alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE);
- r600_bc_src(&alu.src[0], &ctx->src[0], 0);
+ alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE);
+ r600_bc_src(&alu.src[0], &ctx->src[0], 0);
+
+ alu.dst.sel = ctx->temp_reg;
+ alu.dst.chan = i;
+ if (i == 0)
+ alu.dst.write = 1;
+ if (i == 2)
+ alu.last = 1;
+ r = r600_bc_add_alu(ctx->bc, &alu);
+ if (r)
+ return r;
+ }
- alu.dst.sel = ctx->temp_reg;
- alu.dst.chan = 0;
- alu.dst.write = 1;
- alu.last = 1;
- r = r600_bc_add_alu(ctx->bc, &alu);
- if (r)
- return r;
+ } else {
+ memset(&alu, 0, sizeof(struct r600_bc_alu));
+
+ alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE);
+ r600_bc_src(&alu.src[0], &ctx->src[0], 0);
+
+ alu.dst.sel = ctx->temp_reg;
+ alu.dst.chan = 0;
+ alu.dst.write = 1;
+ alu.last = 1;
+ r = r600_bc_add_alu(ctx->bc, &alu);
+ if (r)
+ return r;
+ }
alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR);
alu.src[0].sel = ctx->temp_reg;
@@ -2083,19 +2401,40 @@ static int tgsi_log(struct r600_shader_ctx *ctx)
/* result.y = src.x / (2 ^ floor(log2(src.x))); */
if ((inst->Dst[0].Register.WriteMask >> 1) & 1) {
- memset(&alu, 0, sizeof(struct r600_bc_alu));
- alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE);
- r600_bc_src(&alu.src[0], &ctx->src[0], 0);
+ if (ctx->bc->chiprev == CHIPREV_CAYMAN) {
+ for (i = 0; i < 3; i++) {
+ memset(&alu, 0, sizeof(struct r600_bc_alu));
- alu.dst.sel = ctx->temp_reg;
- alu.dst.chan = 1;
- alu.dst.write = 1;
- alu.last = 1;
+ alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE);
+ r600_bc_src(&alu.src[0], &ctx->src[0], 0);
- r = r600_bc_add_alu(ctx->bc, &alu);
- if (r)
- return r;
+ alu.dst.sel = ctx->temp_reg;
+ alu.dst.chan = i;
+ if (i == 1)
+ alu.dst.write = 1;
+ if (i == 2)
+ alu.last = 1;
+
+ r = r600_bc_add_alu(ctx->bc, &alu);
+ if (r)
+ return r;
+ }
+ } else {
+ memset(&alu, 0, sizeof(struct r600_bc_alu));
+
+ alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE);
+ r600_bc_src(&alu.src[0], &ctx->src[0], 0);
+
+ alu.dst.sel = ctx->temp_reg;
+ alu.dst.chan = 1;
+ alu.dst.write = 1;
+ alu.last = 1;
+
+ r = r600_bc_add_alu(ctx->bc, &alu);
+ if (r)
+ return r;
+ }
memset(&alu, 0, sizeof(struct r600_bc_alu));
@@ -2112,35 +2451,73 @@ static int tgsi_log(struct r600_shader_ctx *ctx)
if (r)
return r;
- memset(&alu, 0, sizeof(struct r600_bc_alu));
-
- alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
- alu.src[0].sel = ctx->temp_reg;
- alu.src[0].chan = 1;
+ if (ctx->bc->chiprev == CHIPREV_CAYMAN) {
+ for (i = 0; i < 3; i++) {
+ memset(&alu, 0, sizeof(struct r600_bc_alu));
+ alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
+ alu.src[0].sel = ctx->temp_reg;
+ alu.src[0].chan = 1;
+
+ alu.dst.sel = ctx->temp_reg;
+ alu.dst.chan = i;
+ if (i == 1)
+ alu.dst.write = 1;
+ if (i == 2)
+ alu.last = 1;
- alu.dst.sel = ctx->temp_reg;
- alu.dst.chan = 1;
- alu.dst.write = 1;
- alu.last = 1;
+ r = r600_bc_add_alu(ctx->bc, &alu);
+ if (r)
+ return r;
+ }
+ } else {
+ memset(&alu, 0, sizeof(struct r600_bc_alu));
+ alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
+ alu.src[0].sel = ctx->temp_reg;
+ alu.src[0].chan = 1;
- r = r600_bc_add_alu(ctx->bc, &alu);
- if (r)
- return r;
+ alu.dst.sel = ctx->temp_reg;
+ alu.dst.chan = 1;
+ alu.dst.write = 1;
+ alu.last = 1;
- memset(&alu, 0, sizeof(struct r600_bc_alu));
+ r = r600_bc_add_alu(ctx->bc, &alu);
+ if (r)
+ return r;
+ }
- alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE);
- alu.src[0].sel = ctx->temp_reg;
- alu.src[0].chan = 1;
+ if (ctx->bc->chiprev == CHIPREV_CAYMAN) {
+ for (i = 0; i < 3; i++) {
+ memset(&alu, 0, sizeof(struct r600_bc_alu));
+ alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE);
+ alu.src[0].sel = ctx->temp_reg;
+ alu.src[0].chan = 1;
+
+ alu.dst.sel = ctx->temp_reg;
+ alu.dst.chan = i;
+ if (i == 1)
+ alu.dst.write = 1;
+ if (i == 2)
+ alu.last = 1;
+
+ r = r600_bc_add_alu(ctx->bc, &alu);
+ if (r)
+ return r;
+ }
+ } else {
+ memset(&alu, 0, sizeof(struct r600_bc_alu));
+ alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE);
+ alu.src[0].sel = ctx->temp_reg;
+ alu.src[0].chan = 1;
- alu.dst.sel = ctx->temp_reg;
- alu.dst.chan = 1;
- alu.dst.write = 1;
- alu.last = 1;
+ alu.dst.sel = ctx->temp_reg;
+ alu.dst.chan = 1;
+ alu.dst.write = 1;
+ alu.last = 1;
- r = r600_bc_add_alu(ctx->bc, &alu);
- if (r)
- return r;
+ r = r600_bc_add_alu(ctx->bc, &alu);
+ if (r)
+ return r;
+ }
memset(&alu, 0, sizeof(struct r600_bc_alu));
@@ -2163,19 +2540,39 @@ static int tgsi_log(struct r600_shader_ctx *ctx)
/* result.z = log2(src);*/
if ((inst->Dst[0].Register.WriteMask >> 2) & 1) {
- memset(&alu, 0, sizeof(struct r600_bc_alu));
+ if (ctx->bc->chiprev == CHIPREV_CAYMAN) {
+ for (i = 0; i < 3; i++) {
+ memset(&alu, 0, sizeof(struct r600_bc_alu));
- alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE);
- r600_bc_src(&alu.src[0], &ctx->src[0], 0);
+ alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE);
+ r600_bc_src(&alu.src[0], &ctx->src[0], 0);
- alu.dst.sel = ctx->temp_reg;
- alu.dst.write = 1;
- alu.dst.chan = 2;
- alu.last = 1;
+ alu.dst.sel = ctx->temp_reg;
+ if (i == 2)
+ alu.dst.write = 1;
+ alu.dst.chan = i;
+ if (i == 2)
+ alu.last = 1;
- r = r600_bc_add_alu(ctx->bc, &alu);
- if (r)
- return r;
+ r = r600_bc_add_alu(ctx->bc, &alu);
+ if (r)
+ return r;
+ }
+ } else {
+ memset(&alu, 0, sizeof(struct r600_bc_alu));
+
+ alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE);
+ r600_bc_src(&alu.src[0], &ctx->src[0], 0);
+
+ alu.dst.sel = ctx->temp_reg;
+ alu.dst.write = 1;
+ alu.dst.chan = 2;
+ alu.last = 1;
+
+ r = r600_bc_add_alu(ctx->bc, &alu);
+ if (r)
+ return r;
+ }
}
/* result.w = 1.0; */
@@ -2946,3 +3343,161 @@ static struct r600_shader_tgsi_instruction eg_shader_tgsi_instruction[] = {
{TGSI_OPCODE_ENDSWITCH, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
{TGSI_OPCODE_LAST, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
};
+
+static struct r600_shader_tgsi_instruction cm_shader_tgsi_instruction[] = {
+ {TGSI_OPCODE_ARL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_eg_arl},
+ {TGSI_OPCODE_MOV, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2},
+ {TGSI_OPCODE_LIT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lit},
+ {TGSI_OPCODE_RCP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE, cayman_emit_float_instr},
+ {TGSI_OPCODE_RSQ, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_IEEE, cayman_emit_float_instr},
+ {TGSI_OPCODE_EXP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_exp},
+ {TGSI_OPCODE_LOG, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_log},
+ {TGSI_OPCODE_MUL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL, tgsi_op2},
+ {TGSI_OPCODE_ADD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2},
+ {TGSI_OPCODE_DP3, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
+ {TGSI_OPCODE_DP4, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
+ {TGSI_OPCODE_DST, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_opdst},
+ {TGSI_OPCODE_MIN, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN, tgsi_op2},
+ {TGSI_OPCODE_MAX, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX, tgsi_op2},
+ {TGSI_OPCODE_SLT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2_swap},
+ {TGSI_OPCODE_SGE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2},
+ {TGSI_OPCODE_MAD, 1, EG_V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD, tgsi_op3},
+ {TGSI_OPCODE_SUB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2},
+ {TGSI_OPCODE_LRP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lrp},
+ {TGSI_OPCODE_CND, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+ /* gap */
+ {20, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+ {TGSI_OPCODE_DP2A, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+ /* gap */
+ {22, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+ {23, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+ {TGSI_OPCODE_FRC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT, tgsi_op2},
+ {TGSI_OPCODE_CLAMP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+ {TGSI_OPCODE_FLR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR, tgsi_op2},
+ {TGSI_OPCODE_ROUND, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+ {TGSI_OPCODE_EX2, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE, cayman_emit_float_instr},
+ {TGSI_OPCODE_LG2, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE, cayman_emit_float_instr},
+ {TGSI_OPCODE_POW, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, cayman_pow},
+ {TGSI_OPCODE_XPD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_xpd},
+ /* gap */
+ {32, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+ {TGSI_OPCODE_ABS, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2},
+ {TGSI_OPCODE_RCC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+ {TGSI_OPCODE_DPH, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
+ {TGSI_OPCODE_COS, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS, cayman_trig},
+ {TGSI_OPCODE_DDX, 0, SQ_TEX_INST_GET_GRADIENTS_H, tgsi_tex},
+ {TGSI_OPCODE_DDY, 0, SQ_TEX_INST_GET_GRADIENTS_V, tgsi_tex},
+ {TGSI_OPCODE_KILP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill}, /* predicated kill */
+ {TGSI_OPCODE_PK2H, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+ {TGSI_OPCODE_PK2US, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+ {TGSI_OPCODE_PK4B, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+ {TGSI_OPCODE_PK4UB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+ {TGSI_OPCODE_RFL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+ {TGSI_OPCODE_SEQ, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETE, tgsi_op2},
+ {TGSI_OPCODE_SFL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+ {TGSI_OPCODE_SGT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2},
+ {TGSI_OPCODE_SIN, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN, cayman_trig},
+ {TGSI_OPCODE_SLE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2_swap},
+ {TGSI_OPCODE_SNE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETNE, tgsi_op2},
+ {TGSI_OPCODE_STR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+ {TGSI_OPCODE_TEX, 0, SQ_TEX_INST_SAMPLE, tgsi_tex},
+ {TGSI_OPCODE_TXD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+ {TGSI_OPCODE_TXP, 0, SQ_TEX_INST_SAMPLE, tgsi_tex},
+ {TGSI_OPCODE_UP2H, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+ {TGSI_OPCODE_UP2US, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+ {TGSI_OPCODE_UP4B, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+ {TGSI_OPCODE_UP4UB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+ {TGSI_OPCODE_X2D, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+ {TGSI_OPCODE_ARA, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+ {TGSI_OPCODE_ARR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_eg_arl},
+ {TGSI_OPCODE_BRA, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+ {TGSI_OPCODE_CAL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+ {TGSI_OPCODE_RET, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+ {TGSI_OPCODE_SSG, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_ssg},
+ {TGSI_OPCODE_CMP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_cmp},
+ {TGSI_OPCODE_SCS, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_scs},
+ {TGSI_OPCODE_TXB, 0, SQ_TEX_INST_SAMPLE_L, tgsi_tex},
+ {TGSI_OPCODE_NRM, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+ {TGSI_OPCODE_DIV, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+ {TGSI_OPCODE_DP2, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
+ {TGSI_OPCODE_TXL, 0, SQ_TEX_INST_SAMPLE_L, tgsi_tex},
+ {TGSI_OPCODE_BRK, 0, EG_V_SQ_CF_WORD1_SQ_CF_INST_LOOP_BREAK, tgsi_loop_brk_cont},
+ {TGSI_OPCODE_IF, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_if},
+ /* gap */
+ {75, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+ {76, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+ {TGSI_OPCODE_ELSE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_else},
+ {TGSI_OPCODE_ENDIF, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endif},
+ /* gap */
+ {79, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+ {80, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+ {TGSI_OPCODE_PUSHA, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+ {TGSI_OPCODE_POPA, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+ {TGSI_OPCODE_CEIL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+ {TGSI_OPCODE_I2F, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+ {TGSI_OPCODE_NOT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+ {TGSI_OPCODE_TRUNC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_TRUNC, tgsi_op2},
+ {TGSI_OPCODE_SHL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+ /* gap */
+ {88, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+ {TGSI_OPCODE_AND, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+ {TGSI_OPCODE_OR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+ {TGSI_OPCODE_MOD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+ {TGSI_OPCODE_XOR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+ {TGSI_OPCODE_SAD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+ {TGSI_OPCODE_TXF, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+ {TGSI_OPCODE_TXQ, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+ {TGSI_OPCODE_CONT, 0, EG_V_SQ_CF_WORD1_SQ_CF_INST_LOOP_CONTINUE, tgsi_loop_brk_cont},
+ {TGSI_OPCODE_EMIT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+ {TGSI_OPCODE_ENDPRIM, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+ {TGSI_OPCODE_BGNLOOP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_bgnloop},
+ {TGSI_OPCODE_BGNSUB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+ {TGSI_OPCODE_ENDLOOP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endloop},
+ {TGSI_OPCODE_ENDSUB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+ /* gap */
+ {103, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+ {104, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+ {105, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+ {106, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+ {TGSI_OPCODE_NOP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+ /* gap */
+ {108, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+ {109, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+ {110, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+ {111, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+ {TGSI_OPCODE_NRM4, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+ {TGSI_OPCODE_CALLNZ, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+ {TGSI_OPCODE_IFC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+ {TGSI_OPCODE_BREAKC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+ {TGSI_OPCODE_KIL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill}, /* conditional kill */
+ {TGSI_OPCODE_END, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_end}, /* aka HALT */
+ /* gap */
+ {118, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+ {TGSI_OPCODE_F2I, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+ {TGSI_OPCODE_IDIV, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+ {TGSI_OPCODE_IMAX, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+ {TGSI_OPCODE_IMIN, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+ {TGSI_OPCODE_INEG, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+ {TGSI_OPCODE_ISGE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+ {TGSI_OPCODE_ISHR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+ {TGSI_OPCODE_ISLT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+ {TGSI_OPCODE_F2U, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+ {TGSI_OPCODE_U2F, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+ {TGSI_OPCODE_UADD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+ {TGSI_OPCODE_UDIV, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+ {TGSI_OPCODE_UMAD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+ {TGSI_OPCODE_UMAX, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+ {TGSI_OPCODE_UMIN, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+ {TGSI_OPCODE_UMOD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+ {TGSI_OPCODE_UMUL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+ {TGSI_OPCODE_USEQ, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+ {TGSI_OPCODE_USGE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+ {TGSI_OPCODE_USHR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+ {TGSI_OPCODE_USLT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+ {TGSI_OPCODE_USNE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+ {TGSI_OPCODE_SWITCH, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+ {TGSI_OPCODE_CASE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+ {TGSI_OPCODE_DEFAULT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+ {TGSI_OPCODE_ENDSWITCH, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+ {TGSI_OPCODE_LAST, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+};
diff --git a/src/gallium/drivers/r600/r600_state.c b/src/gallium/drivers/r600/r600_state.c
index 0a1fa723e13..46fdbfed34a 100644
--- a/src/gallium/drivers/r600/r600_state.c
+++ b/src/gallium/drivers/r600/r600_state.c
@@ -199,6 +199,7 @@ static void *r600_create_blend_state(struct pipe_context *ctx,
static void *r600_create_dsa_state(struct pipe_context *ctx,
const struct pipe_depth_stencil_alpha_state *state)
{
+ struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
struct r600_pipe_dsa *dsa = CALLOC_STRUCT(r600_pipe_dsa);
unsigned db_depth_control, alpha_test_control, alpha_ref, db_shader_control;
unsigned stencil_ref_mask, stencil_ref_mask_bf, db_render_override, db_render_control;
@@ -286,6 +287,7 @@ static void *r600_create_dsa_state(struct pipe_context *ctx,
static void *r600_create_rs_state(struct pipe_context *ctx,
const struct pipe_rasterizer_state *state)
{
+ struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
struct r600_pipe_rasterizer *rs = CALLOC_STRUCT(r600_pipe_rasterizer);
struct r600_pipe_state *rstate;
unsigned tmp, cb;
@@ -382,26 +384,26 @@ static void *r600_create_sampler_state(struct pipe_context *ctx,
rstate->id = R600_PIPE_STATE_SAMPLER;
util_pack_color(state->border_color, PIPE_FORMAT_B8G8R8A8_UNORM, &uc);
- r600_pipe_state_add_reg(rstate, R_03C000_SQ_TEX_SAMPLER_WORD0_0,
- S_03C000_CLAMP_X(r600_tex_wrap(state->wrap_s)) |
- S_03C000_CLAMP_Y(r600_tex_wrap(state->wrap_t)) |
- S_03C000_CLAMP_Z(r600_tex_wrap(state->wrap_r)) |
- S_03C000_XY_MAG_FILTER(r600_tex_filter(state->mag_img_filter) | aniso_flag_offset) |
- S_03C000_XY_MIN_FILTER(r600_tex_filter(state->min_img_filter) | aniso_flag_offset) |
- S_03C000_MIP_FILTER(r600_tex_mipfilter(state->min_mip_filter)) |
- S_03C000_MAX_ANISO(r600_tex_aniso_filter(state->max_anisotropy)) |
- S_03C000_DEPTH_COMPARE_FUNCTION(r600_tex_compare(state->compare_func)) |
- S_03C000_BORDER_COLOR_TYPE(uc.ui ? V_03C000_SQ_TEX_BORDER_COLOR_REGISTER : 0), 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, R_03C004_SQ_TEX_SAMPLER_WORD1_0,
- S_03C004_MIN_LOD(S_FIXED(CLAMP(state->min_lod, 0, 15), 6)) |
- S_03C004_MAX_LOD(S_FIXED(CLAMP(state->max_lod, 0, 15), 6)) |
- S_03C004_LOD_BIAS(S_FIXED(CLAMP(state->lod_bias, -16, 16), 6)), 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, R_03C008_SQ_TEX_SAMPLER_WORD2_0, S_03C008_TYPE(1), 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg_noblock(rstate, R_03C000_SQ_TEX_SAMPLER_WORD0_0,
+ S_03C000_CLAMP_X(r600_tex_wrap(state->wrap_s)) |
+ S_03C000_CLAMP_Y(r600_tex_wrap(state->wrap_t)) |
+ S_03C000_CLAMP_Z(r600_tex_wrap(state->wrap_r)) |
+ S_03C000_XY_MAG_FILTER(r600_tex_filter(state->mag_img_filter) | aniso_flag_offset) |
+ S_03C000_XY_MIN_FILTER(r600_tex_filter(state->min_img_filter) | aniso_flag_offset) |
+ S_03C000_MIP_FILTER(r600_tex_mipfilter(state->min_mip_filter)) |
+ S_03C000_MAX_ANISO(r600_tex_aniso_filter(state->max_anisotropy)) |
+ S_03C000_DEPTH_COMPARE_FUNCTION(r600_tex_compare(state->compare_func)) |
+ S_03C000_BORDER_COLOR_TYPE(uc.ui ? V_03C000_SQ_TEX_BORDER_COLOR_REGISTER : 0), 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg_noblock(rstate, R_03C004_SQ_TEX_SAMPLER_WORD1_0,
+ S_03C004_MIN_LOD(S_FIXED(CLAMP(state->min_lod, 0, 15), 6)) |
+ S_03C004_MAX_LOD(S_FIXED(CLAMP(state->max_lod, 0, 15), 6)) |
+ S_03C004_LOD_BIAS(S_FIXED(CLAMP(state->lod_bias, -16, 16), 6)), 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg_noblock(rstate, R_03C008_SQ_TEX_SAMPLER_WORD2_0, S_03C008_TYPE(1), 0xFFFFFFFF, NULL);
if (uc.ui) {
- r600_pipe_state_add_reg(rstate, R_00A400_TD_PS_SAMPLER0_BORDER_RED, fui(state->border_color[0]), 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, R_00A404_TD_PS_SAMPLER0_BORDER_GREEN, fui(state->border_color[1]), 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, R_00A408_TD_PS_SAMPLER0_BORDER_BLUE, fui(state->border_color[2]), 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, R_00A40C_TD_PS_SAMPLER0_BORDER_ALPHA, fui(state->border_color[3]), 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg_noblock(rstate, R_00A400_TD_PS_SAMPLER0_BORDER_RED, fui(state->border_color[0]), 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg_noblock(rstate, R_00A404_TD_PS_SAMPLER0_BORDER_GREEN, fui(state->border_color[1]), 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg_noblock(rstate, R_00A408_TD_PS_SAMPLER0_BORDER_BLUE, fui(state->border_color[2]), 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg_noblock(rstate, R_00A40C_TD_PS_SAMPLER0_BORDER_ALPHA, fui(state->border_color[3]), 0xFFFFFFFF, NULL);
}
return rstate;
}
@@ -410,6 +412,7 @@ static struct pipe_sampler_view *r600_create_sampler_view(struct pipe_context *c
struct pipe_resource *texture,
const struct pipe_sampler_view *state)
{
+ struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
struct r600_pipe_sampler_view *resource = CALLOC_STRUCT(r600_pipe_sampler_view);
struct r600_pipe_state *rstate;
const struct util_format_description *desc;
@@ -1285,6 +1288,7 @@ void r600_init_config(struct r600_pipe_context *rctx)
void r600_pipe_shader_ps(struct pipe_context *ctx, struct r600_pipe_shader *shader)
{
+ struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
struct r600_pipe_state *rstate = &shader->rstate;
struct r600_shader *rshader = &shader->shader;
unsigned i, exports_ps, num_cout, spi_ps_in_control_0, spi_input_z, spi_ps_in_control_1, db_shader_control;
@@ -1378,6 +1382,7 @@ void r600_pipe_shader_ps(struct pipe_context *ctx, struct r600_pipe_shader *shad
void r600_pipe_shader_vs(struct pipe_context *ctx, struct r600_pipe_shader *shader)
{
+ struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
struct r600_pipe_state *rstate = &shader->rstate;
struct r600_shader *rshader = &shader->shader;
unsigned spi_vs_out_id[10];
@@ -1424,9 +1429,11 @@ void r600_pipe_shader_vs(struct pipe_context *ctx, struct r600_pipe_shader *shad
0xFFFFFFFF, NULL);
}
-void r600_fetch_shader(struct r600_vertex_element *ve)
+void r600_fetch_shader(struct pipe_context *ctx,
+ struct r600_vertex_element *ve)
{
struct r600_pipe_state *rstate;
+ struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
rstate = &ve->rstate;
rstate->id = R600_PIPE_STATE_FETCH_SHADER;
@@ -1478,11 +1485,13 @@ void *r600_create_db_flush_dsa(struct r600_pipe_context *rctx)
return rstate;
}
-void r600_pipe_set_buffer_resource(struct r600_pipe_context *rctx,
- struct r600_pipe_state *rstate,
- struct r600_resource *rbuffer,
- unsigned offset, unsigned stride)
+void r600_pipe_init_buffer_resource(struct r600_pipe_context *rctx,
+ struct r600_pipe_state *rstate,
+ struct r600_resource *rbuffer,
+ unsigned offset, unsigned stride)
{
+ rstate->id = R600_PIPE_STATE_RESOURCE;
+ rstate->nregs = 0;
r600_pipe_state_add_reg(rstate, R_038000_RESOURCE0_WORD0,
offset, 0xFFFFFFFF, rbuffer->bo);
r600_pipe_state_add_reg(rstate, R_038004_RESOURCE0_WORD1,
@@ -1499,3 +1508,15 @@ void r600_pipe_set_buffer_resource(struct r600_pipe_context *rctx,
r600_pipe_state_add_reg(rstate, R_038018_RESOURCE0_WORD6,
0xC0000000, 0xFFFFFFFF, NULL);
}
+
+void r600_pipe_mod_buffer_resource(struct r600_pipe_state *rstate,
+ struct r600_resource *rbuffer,
+ unsigned offset, unsigned stride)
+{
+ rstate->nregs = 0;
+ r600_pipe_state_mod_reg_bo(rstate, offset, rbuffer->bo);
+ r600_pipe_state_mod_reg(rstate, rbuffer->bo_size - offset - 1);
+ r600_pipe_state_mod_reg(rstate, S_038008_ENDIAN_SWAP(r600_endian_swap(32)) |
+ S_038008_STRIDE(stride));
+ rstate->nregs = 7;
+}
diff --git a/src/gallium/drivers/r600/r600_state_common.c b/src/gallium/drivers/r600/r600_state_common.c
index de1b811ce89..48ab15f9323 100644
--- a/src/gallium/drivers/r600/r600_state_common.c
+++ b/src/gallium/drivers/r600/r600_state_common.c
@@ -32,43 +32,33 @@
#include "r600_pipe.h"
#include "r600d.h"
+static void r600_spi_update(struct r600_pipe_context *rctx);
+
static int r600_conv_pipe_prim(unsigned pprim, unsigned *prim)
{
- switch (pprim) {
- case PIPE_PRIM_POINTS:
- *prim = V_008958_DI_PT_POINTLIST;
- return 0;
- case PIPE_PRIM_LINES:
- *prim = V_008958_DI_PT_LINELIST;
- return 0;
- case PIPE_PRIM_LINE_STRIP:
- *prim = V_008958_DI_PT_LINESTRIP;
- return 0;
- case PIPE_PRIM_LINE_LOOP:
- *prim = V_008958_DI_PT_LINELOOP;
- return 0;
- case PIPE_PRIM_TRIANGLES:
- *prim = V_008958_DI_PT_TRILIST;
- return 0;
- case PIPE_PRIM_TRIANGLE_STRIP:
- *prim = V_008958_DI_PT_TRISTRIP;
- return 0;
- case PIPE_PRIM_TRIANGLE_FAN:
- *prim = V_008958_DI_PT_TRIFAN;
- return 0;
- case PIPE_PRIM_POLYGON:
- *prim = V_008958_DI_PT_POLYGON;
- return 0;
- case PIPE_PRIM_QUADS:
- *prim = V_008958_DI_PT_QUADLIST;
- return 0;
- case PIPE_PRIM_QUAD_STRIP:
- *prim = V_008958_DI_PT_QUADSTRIP;
- return 0;
- default:
+ static const int prim_conv[] = {
+ V_008958_DI_PT_POINTLIST,
+ V_008958_DI_PT_LINELIST,
+ V_008958_DI_PT_LINELOOP,
+ V_008958_DI_PT_LINESTRIP,
+ V_008958_DI_PT_TRILIST,
+ V_008958_DI_PT_TRISTRIP,
+ V_008958_DI_PT_TRIFAN,
+ V_008958_DI_PT_QUADLIST,
+ V_008958_DI_PT_QUADSTRIP,
+ V_008958_DI_PT_POLYGON,
+ -1,
+ -1,
+ -1,
+ -1
+ };
+
+ *prim = prim_conv[pprim];
+ if (*prim == -1) {
fprintf(stderr, "%s:%d unsupported %d\n", __func__, __LINE__, pprim);
return -1;
}
+ return 0;
}
/* common state between evergreen and r600 */
@@ -121,6 +111,8 @@ void r600_bind_rs_state(struct pipe_context *ctx, void *state)
} else {
r600_polygon_offset_update(rctx);
}
+ if (rctx->ps_shader && rctx->vs_shader)
+ r600_spi_update(rctx);
}
void r600_delete_rs_state(struct pipe_context *ctx, void *state)
@@ -281,6 +273,8 @@ void r600_bind_ps_shader(struct pipe_context *ctx, void *state)
if (state) {
r600_context_pipe_state_set(&rctx->ctx, &rctx->ps_shader->rstate);
}
+ if (rctx->ps_shader && rctx->vs_shader)
+ r600_spi_update(rctx);
}
void r600_bind_vs_shader(struct pipe_context *ctx, void *state)
@@ -292,6 +286,8 @@ void r600_bind_vs_shader(struct pipe_context *ctx, void *state)
if (state) {
r600_context_pipe_state_set(&rctx->ctx, &rctx->vs_shader->rstate);
}
+ if (rctx->ps_shader && rctx->vs_shader)
+ r600_spi_update(rctx);
}
void r600_delete_ps_shader(struct pipe_context *ctx, void *state)
@@ -338,14 +334,27 @@ static void r600_update_alpha_ref(struct r600_pipe_context *rctx)
}
/* FIXME optimize away spi update when it's not needed */
-static void r600_spi_update(struct r600_pipe_context *rctx, unsigned prim)
+static void r600_spi_block_init(struct r600_pipe_context *rctx, struct r600_pipe_state *rstate)
+{
+ int i;
+ rstate->nregs = 0;
+ rstate->id = R600_PIPE_STATE_SPI;
+ for (i = 0; i < 32; i++) {
+ r600_pipe_state_add_reg(rstate, R_028644_SPI_PS_INPUT_CNTL_0 + i * 4, 0, 0xFFFFFFFF, NULL);
+ }
+}
+
+static void r600_spi_update(struct r600_pipe_context *rctx)
{
struct r600_pipe_shader *shader = rctx->ps_shader;
- struct r600_pipe_state rstate;
+ struct r600_pipe_state *rstate = &rctx->spi;
struct r600_shader *rshader = &shader->shader;
unsigned i, tmp;
- rstate.nregs = 0;
+ if (rctx->spi.id == 0)
+ r600_spi_block_init(rctx, &rctx->spi);
+
+ rstate->nregs = 0;
for (i = 0; i < rshader->ninput; i++) {
tmp = S_028644_SEMANTIC(r600_find_vs_semantic_index(&rctx->vs_shader->shader, rshader, i));
@@ -368,15 +377,10 @@ static void r600_spi_update(struct r600_pipe_context *rctx, unsigned prim)
tmp |= S_028644_SEL_LINEAR(1);
}
- r600_pipe_state_add_reg(&rstate, R_028644_SPI_PS_INPUT_CNTL_0 + i * 4, tmp, 0xFFFFFFFF, NULL);
+ r600_pipe_state_mod_reg(rstate, tmp);
}
- if (prim == PIPE_PRIM_QUADS || prim == PIPE_PRIM_QUAD_STRIP || prim == PIPE_PRIM_POLYGON) {
- r600_pipe_state_add_reg(&rstate, R_028814_PA_SU_SC_MODE_CNTL,
- S_028814_PROVOKING_VTX_LAST(1),
- S_028814_PROVOKING_VTX_LAST(1), NULL);
- }
- r600_context_pipe_state_set(&rctx->ctx, &rstate);
+ r600_context_pipe_state_set(&rctx->ctx, rstate);
}
void r600_set_constant_buffer(struct pipe_context *ctx, uint shader, uint index,
@@ -410,13 +414,19 @@ void r600_set_constant_buffer(struct pipe_context *ctx, uint shader, uint index,
r600_context_pipe_state_set(&rctx->ctx, &rctx->vs_const_buffer);
rstate = &rctx->vs_const_buffer_resource[index];
- rstate->id = R600_PIPE_STATE_RESOURCE;
- rstate->nregs = 0;
+ if (!rstate->id) {
+ if (rctx->family >= CHIP_CEDAR) {
+ evergreen_pipe_init_buffer_resource(rctx, rstate, &rbuffer->r, offset, 16);
+ } else {
+ r600_pipe_init_buffer_resource(rctx, rstate, &rbuffer->r, offset, 16);
+ }
+ }
+
if (rctx->family >= CHIP_CEDAR) {
- evergreen_pipe_set_buffer_resource(rctx, rstate, &rbuffer->r, offset, 16);
+ evergreen_pipe_mod_buffer_resource(rstate, &rbuffer->r, offset, 16);
evergreen_context_pipe_state_set_vs_resource(&rctx->ctx, rstate, index);
} else {
- r600_pipe_set_buffer_resource(rctx, rstate, &rbuffer->r, offset, 16);
+ r600_pipe_mod_buffer_resource(rstate, &rbuffer->r, offset, 16);
r600_context_pipe_state_set_vs_resource(&rctx->ctx, rstate, index);
}
break;
@@ -432,13 +442,18 @@ void r600_set_constant_buffer(struct pipe_context *ctx, uint shader, uint index,
r600_context_pipe_state_set(&rctx->ctx, &rctx->ps_const_buffer);
rstate = &rctx->ps_const_buffer_resource[index];
- rstate->id = R600_PIPE_STATE_RESOURCE;
- rstate->nregs = 0;
+ if (!rstate->id) {
+ if (rctx->family >= CHIP_CEDAR) {
+ evergreen_pipe_init_buffer_resource(rctx, rstate, &rbuffer->r, offset, 16);
+ } else {
+ r600_pipe_init_buffer_resource(rctx, rstate, &rbuffer->r, offset, 16);
+ }
+ }
if (rctx->family >= CHIP_CEDAR) {
- evergreen_pipe_set_buffer_resource(rctx, rstate, &rbuffer->r, offset, 16);
+ evergreen_pipe_mod_buffer_resource(rstate, &rbuffer->r, offset, 16);
evergreen_context_pipe_state_set_ps_resource(&rctx->ctx, rstate, index);
} else {
- r600_pipe_set_buffer_resource(rctx, rstate, &rbuffer->r, offset, 16);
+ r600_pipe_mod_buffer_resource(rstate, &rbuffer->r, offset, 16);
r600_context_pipe_state_set_ps_resource(&rctx->ctx, rstate, index);
}
break;
@@ -468,8 +483,6 @@ static void r600_vertex_buffer_update(struct r600_pipe_context *rctx)
for (i = 0 ; i < count; i++) {
rstate = &rctx->fs_resource[i];
- rstate->id = R600_PIPE_STATE_RESOURCE;
- rstate->nregs = 0;
if (rctx->vertex_elements->vbuffer_need_offset) {
/* one resource per vertex elements */
@@ -488,11 +501,19 @@ static void r600_vertex_buffer_update(struct r600_pipe_context *rctx)
continue;
offset += vertex_buffer->buffer_offset + r600_bo_offset(rbuffer->bo);
+ if (!rstate->id) {
+ if (rctx->family >= CHIP_CEDAR) {
+ evergreen_pipe_init_buffer_resource(rctx, rstate, rbuffer, offset, vertex_buffer->stride);
+ } else {
+ r600_pipe_init_buffer_resource(rctx, rstate, rbuffer, offset, vertex_buffer->stride);
+ }
+ }
+
if (rctx->family >= CHIP_CEDAR) {
- evergreen_pipe_set_buffer_resource(rctx, rstate, rbuffer, offset, vertex_buffer->stride);
+ evergreen_pipe_mod_buffer_resource(rstate, rbuffer, offset, vertex_buffer->stride);
evergreen_context_pipe_state_set_fs_resource(&rctx->ctx, rstate, i);
} else {
- r600_pipe_set_buffer_resource(rctx, rstate, rbuffer, offset, vertex_buffer->stride);
+ r600_pipe_mod_buffer_resource(rstate, rbuffer, offset, vertex_buffer->stride);
r600_context_pipe_state_set_fs_resource(&rctx->ctx, rstate, i);
}
}
@@ -504,7 +525,6 @@ void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info)
struct r600_resource *rbuffer;
u32 vgt_dma_index_type, vgt_dma_swap_mode, vgt_draw_initiator, mask;
struct r600_draw rdraw;
- struct r600_pipe_state vgt;
struct r600_drawl draw = {};
unsigned prim;
@@ -576,23 +596,41 @@ void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info)
}
r600_update_alpha_ref(rctx);
- r600_spi_update(rctx, draw.info.mode);
mask = 0;
for (int i = 0; i < rctx->framebuffer.nr_cbufs; i++) {
mask |= (0xF << (i * 4));
}
- vgt.id = R600_PIPE_STATE_VGT;
- vgt.nregs = 0;
- r600_pipe_state_add_reg(&vgt, R_008958_VGT_PRIMITIVE_TYPE, prim, 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(&vgt, R_028408_VGT_INDX_OFFSET, draw.info.index_bias, 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(&vgt, R_028400_VGT_MAX_VTX_INDX, draw.info.max_index, 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(&vgt, R_028404_VGT_MIN_VTX_INDX, draw.info.min_index, 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(&vgt, R_028238_CB_TARGET_MASK, rctx->cb_target_mask & mask, 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(&vgt, R_03CFF0_SQ_VTX_BASE_VTX_LOC, 0, 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(&vgt, R_03CFF4_SQ_VTX_START_INST_LOC, draw.info.start_instance, 0xFFFFFFFF, NULL);
- r600_context_pipe_state_set(&rctx->ctx, &vgt);
+ if (rctx->vgt.id != R600_PIPE_STATE_VGT) {
+ rctx->vgt.id = R600_PIPE_STATE_VGT;
+ rctx->vgt.nregs = 0;
+ r600_pipe_state_add_reg(&rctx->vgt, R_008958_VGT_PRIMITIVE_TYPE, prim, 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(&rctx->vgt, R_028238_CB_TARGET_MASK, rctx->cb_target_mask & mask, 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(&rctx->vgt, R_028400_VGT_MAX_VTX_INDX, draw.info.max_index, 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(&rctx->vgt, R_028404_VGT_MIN_VTX_INDX, draw.info.min_index, 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(&rctx->vgt, R_028408_VGT_INDX_OFFSET, draw.info.index_bias, 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(&rctx->vgt, R_03CFF0_SQ_VTX_BASE_VTX_LOC, 0, 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(&rctx->vgt, R_03CFF4_SQ_VTX_START_INST_LOC, draw.info.start_instance, 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(&rctx->vgt, R_028814_PA_SU_SC_MODE_CNTL,
+ 0,
+ S_028814_PROVOKING_VTX_LAST(1), NULL);
+
+ }
+
+ rctx->vgt.nregs = 0;
+ r600_pipe_state_mod_reg(&rctx->vgt, prim);
+ r600_pipe_state_mod_reg(&rctx->vgt, rctx->cb_target_mask & mask);
+ r600_pipe_state_mod_reg(&rctx->vgt, draw.info.max_index);
+ r600_pipe_state_mod_reg(&rctx->vgt, draw.info.min_index);
+ r600_pipe_state_mod_reg(&rctx->vgt, draw.info.index_bias);
+ r600_pipe_state_mod_reg(&rctx->vgt, 0);
+ r600_pipe_state_mod_reg(&rctx->vgt, draw.info.start_instance);
+ if (draw.info.mode == PIPE_PRIM_QUADS || draw.info.mode == PIPE_PRIM_QUAD_STRIP || draw.info.mode == PIPE_PRIM_POLYGON) {
+ r600_pipe_state_mod_reg(&rctx->vgt, S_028814_PROVOKING_VTX_LAST(1));
+ }
+
+ r600_context_pipe_state_set(&rctx->ctx, &rctx->vgt);
rdraw.vgt_num_indices = draw.info.count;
rdraw.vgt_num_instances = draw.info.instance_count;
@@ -621,3 +659,39 @@ void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info)
u_vbuf_mgr_draw_end(rctx->vbuf_mgr);
}
+
+void _r600_pipe_state_add_reg(struct r600_context *ctx,
+ struct r600_pipe_state *state,
+ u32 offset, u32 value, u32 mask,
+ u32 range_id, u32 block_id,
+ struct r600_bo *bo)
+{
+ struct r600_range *range;
+ struct r600_block *block;
+
+ range = &ctx->range[range_id];
+ block = range->blocks[block_id];
+ state->regs[state->nregs].block = block;
+ state->regs[state->nregs].id = (offset - block->start_offset) >> 2;
+
+ state->regs[state->nregs].value = value;
+ state->regs[state->nregs].mask = mask;
+ state->regs[state->nregs].bo = bo;
+
+ state->nregs++;
+ assert(state->nregs < R600_BLOCK_MAX_REG);
+}
+
+void r600_pipe_state_add_reg_noblock(struct r600_pipe_state *state,
+ u32 offset, u32 value, u32 mask,
+ struct r600_bo *bo)
+{
+ state->regs[state->nregs].id = offset;
+ state->regs[state->nregs].block = NULL;
+ state->regs[state->nregs].value = value;
+ state->regs[state->nregs].mask = mask;
+ state->regs[state->nregs].bo = bo;
+
+ state->nregs++;
+ assert(state->nregs < R600_BLOCK_MAX_REG);
+}