summaryrefslogtreecommitdiffstats
path: root/src/gallium/drivers/r600/r600_llvm.c
blob: b3d4e6bab6886793f2368252460be221d9ad2d22 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
#include "r600_llvm.h"

#include "gallivm/lp_bld_const.h"
#include "gallivm/lp_bld_intr.h"
#include "gallivm/lp_bld_gather.h"
#include "tgsi/tgsi_parse.h"
#include "util/u_double_list.h"
#include "util/u_memory.h"

#include "r600.h"
#include "r600_asm.h"
#include "r600_opcodes.h"
#include "r600_shader.h"
#include "radeon_llvm.h"
#include "radeon_llvm_emit.h"

#include <stdio.h>

#if defined R600_USE_LLVM || defined HAVE_OPENCL

static LLVMValueRef llvm_fetch_const(
	struct lp_build_tgsi_context * bld_base,
	const struct tgsi_full_src_register *reg,
	enum tgsi_opcode_type type,
	unsigned swizzle)
{
	LLVMValueRef idx = lp_build_const_int32(bld_base->base.gallivm,
			radeon_llvm_reg_index_soa(reg->Register.Index, swizzle));
	LLVMValueRef cval = build_intrinsic(bld_base->base.gallivm->builder,
		"llvm.AMDGPU.load.const", bld_base->base.elem_type,
		&idx, 1, LLVMReadNoneAttribute);

	return bitcast(bld_base, type, cval);
}

static void llvm_load_system_value(
		struct radeon_llvm_context * ctx,
		unsigned index,
		const struct tgsi_full_declaration *decl)
{
	unsigned chan;

	switch (decl->Semantic.Name) {
	case TGSI_SEMANTIC_INSTANCEID: chan = 3; break;
	case TGSI_SEMANTIC_VERTEXID: chan = 0; break;
	default: assert(!"unknown system value");
	}

	LLVMValueRef reg = lp_build_const_int32(
			ctx->soa.bld_base.base.gallivm, chan);
	ctx->system_values[index] = build_intrinsic(
			ctx->soa.bld_base.base.gallivm->builder,
			"llvm.R600.load.input",
			ctx->soa.bld_base.base.elem_type, &reg, 1,
			LLVMReadNoneAttribute);
}

static LLVMValueRef llvm_fetch_system_value(
		struct lp_build_tgsi_context * bld_base,
		const struct tgsi_full_src_register *reg,
		enum tgsi_opcode_type type,
		unsigned swizzle)
{
	struct radeon_llvm_context * ctx = radeon_llvm_context(bld_base);
	LLVMValueRef cval = ctx->system_values[reg->Register.Index];
	return bitcast(bld_base, type, cval);
}

static LLVMValueRef
llvm_load_input_helper(
	struct radeon_llvm_context * ctx,
	const char *intrinsic, unsigned idx)
{
	LLVMValueRef reg = lp_build_const_int32(
		ctx->soa.bld_base.base.gallivm,
		idx);
	return build_intrinsic(
		ctx->soa.bld_base.base.gallivm->builder,
		intrinsic,
		ctx->soa.bld_base.base.elem_type, &reg, 1,
		LLVMReadNoneAttribute);
}

static LLVMValueRef
llvm_face_select_helper(
	struct radeon_llvm_context * ctx,
	const char *intrinsic, unsigned face_register,
	unsigned frontcolor_register, unsigned backcolor_regiser)
{

	LLVMValueRef backcolor = llvm_load_input_helper(
		ctx,
		intrinsic,
		backcolor_regiser);
	LLVMValueRef front_color = llvm_load_input_helper(
		ctx,
		intrinsic,
		frontcolor_register);
	LLVMValueRef face = llvm_load_input_helper(
		ctx,
		"llvm.R600.load.input",
		face_register);
	LLVMValueRef is_face_positive = LLVMBuildFCmp(
		ctx->soa.bld_base.base.gallivm->builder,
		LLVMRealUGT, face,
		lp_build_const_float(ctx->soa.bld_base.base.gallivm, 0.0f),
		"");
	return LLVMBuildSelect(
		ctx->soa.bld_base.base.gallivm->builder,
		is_face_positive,
		front_color,
		backcolor,
		"");
}

static void llvm_load_input(
	struct radeon_llvm_context * ctx,
	unsigned input_index,
	const struct tgsi_full_declaration *decl)
{
	unsigned chan;

	const char *intrinsics = "llvm.R600.load.input";
	unsigned offset = 4 * ctx->reserved_reg_count;

	if (ctx->type == TGSI_PROCESSOR_FRAGMENT && ctx->chip_class >= EVERGREEN) {
		switch (decl->Interp.Interpolate) {
		case TGSI_INTERPOLATE_COLOR:
		case TGSI_INTERPOLATE_PERSPECTIVE:
			offset = 0;
			intrinsics = "llvm.R600.load.input.perspective";
			break;
		case TGSI_INTERPOLATE_LINEAR:
			offset = 0;
			intrinsics = "llvm.R600.load.input.linear";
			break;
		case TGSI_INTERPOLATE_CONSTANT:
			offset = 0;
			intrinsics = "llvm.R600.load.input.constant";
			break;
		default:
			assert(0 && "Unknow Interpolate mode");
		}
	}

	for (chan = 0; chan < 4; chan++) {
		unsigned soa_index = radeon_llvm_reg_index_soa(input_index,
								chan);

		switch (decl->Semantic.Name) {
		case TGSI_SEMANTIC_FACE:
			ctx->inputs[soa_index] = llvm_load_input_helper(ctx,
				"llvm.R600.load.input",
				4 * ctx->face_input);
			break;
		case TGSI_SEMANTIC_POSITION:
			if (ctx->type != TGSI_PROCESSOR_FRAGMENT || chan != 3) {
				ctx->inputs[soa_index] = llvm_load_input_helper(ctx,
					"llvm.R600.load.input",
					soa_index + (ctx->reserved_reg_count * 4));
			} else {
				LLVMValueRef w_coord = llvm_load_input_helper(ctx,
				"llvm.R600.load.input",
				soa_index + (ctx->reserved_reg_count * 4));
				ctx->inputs[soa_index] = LLVMBuildFDiv(ctx->gallivm.builder,
				lp_build_const_float(&(ctx->gallivm), 1.0f), w_coord, "");
			}
			break;
		case TGSI_SEMANTIC_COLOR:
			if (ctx->two_side) {
				unsigned front_location, back_location;
				unsigned back_reg = ctx->r600_inputs[input_index]
					.potential_back_facing_reg;
				if (ctx->chip_class >= EVERGREEN) {
					front_location = 4 * ctx->r600_inputs[input_index].lds_pos + chan;
					back_location = 4 * ctx->r600_inputs[back_reg].lds_pos + chan;
				} else {
					front_location = soa_index + 4 * ctx->reserved_reg_count;
					back_location = radeon_llvm_reg_index_soa(
						ctx->r600_inputs[back_reg].gpr,
						chan);
				}
				ctx->inputs[soa_index] = llvm_face_select_helper(ctx,
					intrinsics,
					4 * ctx->face_input, front_location, back_location);
				break;
			}
		default:
			{
				unsigned location;
				if (ctx->chip_class >= EVERGREEN) {
					location = 4 * ctx->r600_inputs[input_index].lds_pos + chan;
				} else {
					location = soa_index + 4 * ctx->reserved_reg_count;
				}
				/* The * 4 is assuming that we are in soa mode. */
				ctx->inputs[soa_index] = llvm_load_input_helper(ctx,
					intrinsics, location);
					
			break;
			}
		}
	}
}

static void llvm_emit_prologue(struct lp_build_tgsi_context * bld_base)
{
	struct radeon_llvm_context * ctx = radeon_llvm_context(bld_base);
	struct lp_build_context * base = &bld_base->base;
	unsigned i;

	/* Reserve special input registers */
	for (i = 0; i < ctx->reserved_reg_count; i++) {
		unsigned chan;
		for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
			LLVMValueRef reg_index = lp_build_const_int32(
					base->gallivm,
					radeon_llvm_reg_index_soa(i, chan));
			lp_build_intrinsic_unary(base->gallivm->builder,
				"llvm.AMDGPU.reserve.reg",
				LLVMVoidTypeInContext(base->gallivm->context),
				reg_index);
		}
	}
}

static void llvm_emit_epilogue(struct lp_build_tgsi_context * bld_base)
{
	struct radeon_llvm_context * ctx = radeon_llvm_context(bld_base);
	struct lp_build_context * base = &bld_base->base;
	unsigned i;
	
	unsigned color_count = 0;
	boolean has_color = false;

	/* Add the necessary export instructions */
	for (i = 0; i < ctx->output_reg_count; i++) {
		unsigned chan;
		for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
			LLVMValueRef output;
			unsigned adjusted_reg_idx = i +
					ctx->reserved_reg_count;

			output = LLVMBuildLoad(base->gallivm->builder,
				ctx->soa.outputs[i][chan], "");

			if (ctx->type == TGSI_PROCESSOR_VERTEX) {
				LLVMValueRef reg_index = lp_build_const_int32(
					base->gallivm,
					radeon_llvm_reg_index_soa(adjusted_reg_idx, chan));
				lp_build_intrinsic_binary(
					base->gallivm->builder,
					"llvm.AMDGPU.store.output",
					LLVMVoidTypeInContext(base->gallivm->context),
					output, reg_index);
			} else if (ctx->type == TGSI_PROCESSOR_FRAGMENT) {
				switch (ctx->r600_outputs[i].name) {
				case TGSI_SEMANTIC_COLOR:
					has_color = true;
					if ( color_count/4 < ctx->color_buffer_count) {
						if (ctx->fs_color_all) {
							for (unsigned j = 0; j < ctx->color_buffer_count; j++) {
								LLVMValueRef reg_index = lp_build_const_int32(
									base->gallivm,
									(j * 4) + chan);
								lp_build_intrinsic_binary(
									base->gallivm->builder,
									"llvm.R600.store.pixel.color",
									LLVMVoidTypeInContext(base->gallivm->context),
									output, reg_index);
							}
						} else {
							LLVMValueRef reg_index = lp_build_const_int32(
								base->gallivm,
								(color_count++/4) * 4 + chan);
							lp_build_intrinsic_binary(
								base->gallivm->builder,
								"llvm.R600.store.pixel.color",
								LLVMVoidTypeInContext(base->gallivm->context),
								output, reg_index);
						}
					}
					break;
				case TGSI_SEMANTIC_POSITION:
					if (chan != 2)
						continue;
					lp_build_intrinsic_unary(
						base->gallivm->builder,
						"llvm.R600.store.pixel.depth",
						LLVMVoidTypeInContext(base->gallivm->context),
						output);
					break;
				case TGSI_SEMANTIC_STENCIL:
					if (chan != 1)
						continue;
					lp_build_intrinsic_unary(
						base->gallivm->builder,
						"llvm.R600.store.pixel.stencil",
						LLVMVoidTypeInContext(base->gallivm->context),
						output);
					break;
				}
			}
		}
	}

	if (!has_color && ctx->type == TGSI_PROCESSOR_FRAGMENT)
		lp_build_intrinsic(base->gallivm->builder, "llvm.R600.store.pixel.dummy", LLVMVoidTypeInContext(base->gallivm->context), 0, 0);
}

static void llvm_emit_tex(
	const struct lp_build_tgsi_action * action,
	struct lp_build_tgsi_context * bld_base,
	struct lp_build_emit_data * emit_data)
{
	struct gallivm_state * gallivm = bld_base->base.gallivm;
	LLVMValueRef args[6];
	unsigned c, sampler_src;

	assert(emit_data->arg_count + 2 <= Elements(args));

	for (c = 0; c < emit_data->arg_count; ++c)
		args[c] = emit_data->args[c];

	sampler_src = emit_data->inst->Instruction.NumSrcRegs-1;

	args[c++] = lp_build_const_int32(gallivm,
					emit_data->inst->Src[sampler_src].Register.Index);
	args[c++] = lp_build_const_int32(gallivm,
					emit_data->inst->Texture.Texture);

	emit_data->output[0] = build_intrinsic(gallivm->builder,
					action->intr_name,
					emit_data->dst_type, args, c, LLVMReadNoneAttribute);
}

static void emit_cndlt(
		const struct lp_build_tgsi_action * action,
		struct lp_build_tgsi_context * bld_base,
		struct lp_build_emit_data * emit_data)
{
	LLVMBuilderRef builder = bld_base->base.gallivm->builder;
	LLVMValueRef float_zero = lp_build_const_float(
		bld_base->base.gallivm, 0.0f);
	LLVMValueRef cmp = LLVMBuildFCmp(
		builder, LLVMRealULT, emit_data->args[0], float_zero, "");
	emit_data->output[emit_data->chan] = LLVMBuildSelect(builder,
		cmp, emit_data->args[1], emit_data->args[2], "");
}

static void dp_fetch_args(
	struct lp_build_tgsi_context * bld_base,
	struct lp_build_emit_data * emit_data)
{
	struct lp_build_context * base = &bld_base->base;
	unsigned chan;
	LLVMValueRef elements[2][4];
	unsigned opcode = emit_data->inst->Instruction.Opcode;
	unsigned dp_components = (opcode == TGSI_OPCODE_DP2 ? 2 :
					(opcode == TGSI_OPCODE_DP3 ? 3 : 4));
	for (chan = 0 ; chan < dp_components; chan++) {
		elements[0][chan] = lp_build_emit_fetch(bld_base,
						emit_data->inst, 0, chan);
		elements[1][chan] = lp_build_emit_fetch(bld_base,
						emit_data->inst, 1, chan);
	}

	for ( ; chan < 4; chan++) {
		elements[0][chan] = base->zero;
		elements[1][chan] = base->zero;
	}

	 /* Fix up for DPH */
	if (opcode == TGSI_OPCODE_DPH) {
		elements[0][TGSI_CHAN_W] = base->one;
	}

	emit_data->args[0] = lp_build_gather_values(bld_base->base.gallivm,
							elements[0], 4);
	emit_data->args[1] = lp_build_gather_values(bld_base->base.gallivm,
							elements[1], 4);
	emit_data->arg_count = 2;

	emit_data->dst_type = base->elem_type;
}

static struct lp_build_tgsi_action dot_action = {
	.fetch_args = dp_fetch_args,
	.emit = build_tgsi_intrinsic_nomem,
	.intr_name = "llvm.AMDGPU.dp4"
};



LLVMModuleRef r600_tgsi_llvm(
	struct radeon_llvm_context * ctx,
	const struct tgsi_token * tokens)
{
	struct tgsi_shader_info shader_info;
	struct lp_build_tgsi_context * bld_base = &ctx->soa.bld_base;
	radeon_llvm_context_init(ctx);
	tgsi_scan_shader(tokens, &shader_info);

	bld_base->info = &shader_info;
	bld_base->userdata = ctx;
	bld_base->emit_fetch_funcs[TGSI_FILE_CONSTANT] = llvm_fetch_const;
	bld_base->emit_fetch_funcs[TGSI_FILE_SYSTEM_VALUE] = llvm_fetch_system_value;
	bld_base->emit_prologue = llvm_emit_prologue;
	bld_base->emit_epilogue = llvm_emit_epilogue;
	ctx->userdata = ctx;
	ctx->load_input = llvm_load_input;
	ctx->load_system_value = llvm_load_system_value;

	bld_base->op_actions[TGSI_OPCODE_DP2] = dot_action;
	bld_base->op_actions[TGSI_OPCODE_DP3] = dot_action;
	bld_base->op_actions[TGSI_OPCODE_DP4] = dot_action;
	bld_base->op_actions[TGSI_OPCODE_DPH] = dot_action;
	bld_base->op_actions[TGSI_OPCODE_DDX].emit = llvm_emit_tex;
	bld_base->op_actions[TGSI_OPCODE_DDY].emit = llvm_emit_tex;
	bld_base->op_actions[TGSI_OPCODE_TEX].emit = llvm_emit_tex;
	bld_base->op_actions[TGSI_OPCODE_TXB].emit = llvm_emit_tex;
	bld_base->op_actions[TGSI_OPCODE_TXD].emit = llvm_emit_tex;
	bld_base->op_actions[TGSI_OPCODE_TXL].emit = llvm_emit_tex;
	bld_base->op_actions[TGSI_OPCODE_TXF].emit = llvm_emit_tex;
	bld_base->op_actions[TGSI_OPCODE_TXQ].emit = llvm_emit_tex;
	bld_base->op_actions[TGSI_OPCODE_TXP].emit = llvm_emit_tex;
	bld_base->op_actions[TGSI_OPCODE_CMP].emit = emit_cndlt;

	lp_build_tgsi_llvm(bld_base, tokens);

	radeon_llvm_finalize_module(ctx);

	return ctx->gallivm.module;
}

const char * r600_llvm_gpu_string(enum radeon_family family)
{
	const char * gpu_family;

	switch (family) {
	case CHIP_R600:
	case CHIP_RV610:
	case CHIP_RV630:
	case CHIP_RV620:
	case CHIP_RV635:
	case CHIP_RS780:
	case CHIP_RS880:
		gpu_family = "r600";
		break;
	case CHIP_RV710:
		gpu_family = "rv710";
		break;
	case CHIP_RV730:
		gpu_family = "rv730";
		break;
	case CHIP_RV670:
	case CHIP_RV740:
	case CHIP_RV770:
		gpu_family = "rv770";
		break;
	case CHIP_PALM:
	case CHIP_CEDAR:
		gpu_family = "cedar";
		break;
	case CHIP_SUMO:
	case CHIP_SUMO2:
	case CHIP_REDWOOD:
		gpu_family = "redwood";
		break;
	case CHIP_JUNIPER:
		gpu_family = "juniper";
		break;
	case CHIP_HEMLOCK:
	case CHIP_CYPRESS:
		gpu_family = "cypress";
		break;
	case CHIP_BARTS:
		gpu_family = "barts";
		break;
	case CHIP_TURKS:
		gpu_family = "turks";
		break;
	case CHIP_CAICOS:
		gpu_family = "caicos";
		break;
	case CHIP_CAYMAN:
        case CHIP_ARUBA:
		gpu_family = "cayman";
		break;
	default:
		gpu_family = "";
		fprintf(stderr, "Chip not supported by r600 llvm "
			"backend, please file a bug at bugs.freedesktop.org\n");
		break;
	}
	return gpu_family;
}

unsigned r600_llvm_compile(
	LLVMModuleRef mod,
	unsigned char ** inst_bytes,
	unsigned * inst_byte_count,
	enum radeon_family family,
	unsigned dump)
{
	const char * gpu_family = r600_llvm_gpu_string(family);
	return radeon_llvm_compile(mod, inst_bytes, inst_byte_count,
							gpu_family, dump);
}

#endif