1 files changed, 166 insertions, 1 deletions
diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c
index 4210c3af685..fece8073452 100644
--- a/src/gallium/drivers/r600/r600_shader.c
+++ b/src/gallium/drivers/r600/r600_shader.c
@@ -2564,6 +2564,168 @@ static int r600_store_tcs_output(struct r600_shader_ctx *ctx)
 	return 0;
 }
 
+static int r600_tess_factor_read(struct r600_shader_ctx *ctx,
+				 int output_idx)
+{
+	int param;
+	unsigned temp_reg = r600_get_temp(ctx);
+	unsigned name = ctx->shader->output[output_idx].name;
+	int dreg = ctx->shader->output[output_idx].gpr;
+	int r;
+
+	param = r600_get_lds_unique_index(name, 0);
+	r = get_lds_offset0(ctx, 1, temp_reg, true);
+	if (r)
+		return r;
+
+	r = single_alu_op2(ctx, ALU_OP2_ADD_INT,
+			   temp_reg, 0,
+			   temp_reg, 0,
+			   V_SQ_ALU_SRC_LITERAL, param * 16);
+	if (r)
+		return r;
+
+	do_lds_fetch_values(ctx, temp_reg, dreg);
+	return 0;
+}
+
+static int r600_emit_tess_factor(struct r600_shader_ctx *ctx)
+{
+	int i;
+	int stride, outer_comps, inner_comps;
+	int tessinner_idx = -1, tessouter_idx = -1;
+	int r;
+	int temp_reg = r600_get_temp(ctx);
+	int treg[3] = {-1, -1, -1};
+	struct r600_bytecode_alu alu;
+	struct r600_bytecode_cf *cf_jump, *cf_pop;
+
+	/* only execute factor emission for invocation 0 */
+	/* PRED_SETE_INT __, R0.x, 0 */
+	memset(&alu, 0, sizeof(alu));
+	alu.op = ALU_OP2_PRED_SETE_INT;
+	alu.src[0].chan = 2;
+	alu.src[1].sel = V_SQ_ALU_SRC_LITERAL;
+	alu.execute_mask = 1;
+	alu.update_pred = 1;
+	alu.last = 1;
+	r600_bytecode_add_alu_type(ctx->bc, &alu, CF_OP_ALU_PUSH_BEFORE);
+
+	r600_bytecode_add_cfinst(ctx->bc, CF_OP_JUMP);
+	cf_jump = ctx->bc->cf_last;
+
+	treg[0] = r600_get_temp(ctx);
+	switch (ctx->shader->tcs_prim_mode) {
+	case PIPE_PRIM_LINES:
+		stride = 8; /* 2 dwords, 1 vec2 store */
+		outer_comps = 2;
+		inner_comps = 0;
+		break;
+	case PIPE_PRIM_TRIANGLES:
+		stride = 16; /* 4 dwords, 1 vec4 store */
+		outer_comps = 3;
+		inner_comps = 1;
+		treg[1] = r600_get_temp(ctx);
+		break;
+	case PIPE_PRIM_QUADS:
+		stride = 24; /* 6 dwords, 2 stores (vec4 + vec2) */
+		outer_comps = 4;
+		inner_comps = 2;
+		treg[1] = r600_get_temp(ctx);
+		treg[2] = r600_get_temp(ctx);
+		break;
+	default:
+		assert(0);
+		return -1;
+	}
+
+	/* R0 is InvocationID, RelPatchID, PatchID, tf_base */
+	/* TF_WRITE takes index in R.x, value in R.y */
+	for (i = 0; i < ctx->shader->noutput; i++) {
+		if (ctx->shader->output[i].name == TGSI_SEMANTIC_TESSINNER)
+			tessinner_idx = i;
+		if (ctx->shader->output[i].name == TGSI_SEMANTIC_TESSOUTER)
+			tessouter_idx = i;
+	}
+
+	if (tessouter_idx == -1)
+		return -1;
+
+	if (tessinner_idx == -1 && inner_comps)
+		return -1;
+
+	if (tessouter_idx != -1) {
+		r = r600_tess_factor_read(ctx, tessouter_idx);
+		if (r)
+			return r;
+	}
+
+	if (tessinner_idx != -1) {
+		r = r600_tess_factor_read(ctx, tessinner_idx);
+		if (r)
+			return r;
+	}
+
+	/* r.x = tf_base(r0.w) + relpatchid(r0.y) * tf_stride */
+	/* r.x = relpatchid(r0.y) * tf_stride */
+
+	/* multiply incoming r0.y * stride - t.x = r0.y * stride */
+	/* add incoming r0.w to it: t.x = t.x + r0.w */
+	r = single_alu_op3(ctx, ALU_OP3_MULADD_UINT24,
+			   temp_reg, 0,
+			   0, 1,
+			   V_SQ_ALU_SRC_LITERAL, stride,
+			   0, 3);
+	if (r)
+		return r;
+
+	for (i = 0; i < outer_comps + inner_comps; i++) {
+		int out_idx = i >= outer_comps ? tessinner_idx : tessouter_idx;
+		int out_comp = i >= outer_comps ? i - outer_comps : i;
+
+		r = single_alu_op2(ctx, ALU_OP2_ADD_INT,
+				   treg[i / 2], (2 * (i % 2)),
+				   temp_reg, 0,
+				   V_SQ_ALU_SRC_LITERAL, 4 * i);
+		if (r)
+			return r;
+		r = single_alu_op2(ctx, ALU_OP1_MOV,
+				   treg[i / 2], 1 + (2 * (i%2)),
+				   ctx->shader->output[out_idx].gpr, out_comp,
+				   0, 0);
+		if (r)
+			return r;
+	}
+	for (i = 0; i < outer_comps + inner_comps; i++) {
+		struct r600_bytecode_gds gds;
+
+		memset(&gds, 0, sizeof(struct r600_bytecode_gds));
+		gds.src_gpr = treg[i / 2];
+		gds.src_sel_x = 2 * (i % 2);
+		gds.src_sel_y = 1 + (2 * (i % 2));
+		gds.src_sel_z = 4;
+		gds.dst_sel_x = 7;
+		gds.dst_sel_y = 7;
+		gds.dst_sel_z = 7;
+		gds.dst_sel_w = 7;
+		gds.op = FETCH_OP_TF_WRITE;
+		r = r600_bytecode_add_gds(ctx->bc, &gds);
+		if (r)
+			return r;
+	}
+
+	// Patch up jump label
+	r600_bytecode_add_cfinst(ctx->bc, CF_OP_POP);
+	cf_pop = ctx->bc->cf_last;
+
+	cf_jump->cf_addr = cf_pop->id + 2;
+	cf_jump->pop_count = 1;
+	cf_pop->cf_addr = cf_pop->id + 2;
+	cf_pop->pop_count = 1;
+
+	return 0;
+}
+
 static int r600_shader_from_tgsi(struct r600_context *rctx,
 				 struct r600_pipe_shader *pipeshader,
 				 union r600_shader_key key)
@@ -3076,6 +3238,9 @@ static int r600_shader_from_tgsi(struct r600_context *rctx,
 	pipeshader->enabled_stream_buffers_mask = ctx.enabled_stream_buffers_mask;
 	convert_edgeflag_to_int(&ctx);
 
+	if (ctx.type == TGSI_PROCESSOR_TESS_CTRL)
+		r600_emit_tess_factor(&ctx);
+
 	if (lds_outputs) {
 		if (ctx.type == TGSI_PROCESSOR_VERTEX) {
 			if (ctx.shader->noutput)
@@ -3341,7 +3506,7 @@ static int r600_shader_from_tgsi(struct r600_context *rctx,
 				last = r600_isa_cf(ctx.bc->cf_last->op);
 
 			/* alu clause instructions don't have EOP bit, so add NOP */
-			if (!last || last->flags & CF_ALU || ctx.bc->cf_last->op == CF_OP_LOOP_END || ctx.bc->cf_last->op == CF_OP_CALL_FS)
+			if (!last || last->flags & CF_ALU || ctx.bc->cf_last->op == CF_OP_LOOP_END || ctx.bc->cf_last->op == CF_OP_CALL_FS || ctx.bc->cf_last->op == CF_OP_POP || ctx.bc->cf_last->op == CF_OP_GDS)
 				r600_bytecode_add_cfinst(ctx.bc, CF_OP_NOP);
 
 			ctx.bc->cf_last->end_of_program = 1;