summaryrefslogtreecommitdiffstats
path: root/src/gallium/drivers/r600/r600_asm.h
blob: 5841044bf819103d6a91e07443475470361ad3e5 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
/*
 * Copyright 2010 Jerome Glisse <glisse@freedesktop.org>
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
 * to deal in the Software without restriction, including without limitation
 * on the rights to use, copy, modify, merge, publish, distribute, sub
 * license, and/or sell copies of the Software, and to permit persons to whom
 * the Software is furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice (including the next
 * paragraph) shall be included in all copies or substantial portions of the
 * Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
 * USE OR OTHER DEALINGS IN THE SOFTWARE.
 */
#ifndef R600_ASM_H
#define R600_ASM_H

#include "r600_pipe.h"
#include "r600_isa.h"
#include "tgsi/tgsi_exec.h"

struct r600_bytecode_alu_src {
	unsigned			sel;
	unsigned			chan;
	unsigned			neg;
	unsigned			abs;
	unsigned			rel;
	unsigned			kc_bank;
	unsigned			kc_rel;
	uint32_t			value;
};

struct r600_bytecode_alu_dst {
	unsigned			sel;
	unsigned			chan;
	unsigned			clamp;
	unsigned			write;
	unsigned			rel;
};

struct r600_bytecode_alu {
	struct list_head		list;
	struct r600_bytecode_alu_src		src[3];
	struct r600_bytecode_alu_dst		dst;
	unsigned			op;
	unsigned			last;
	unsigned			is_op3;
	unsigned			is_lds_idx_op;
	unsigned			execute_mask;
	unsigned			update_pred;
	unsigned			pred_sel;
	unsigned			bank_swizzle;
	unsigned			bank_swizzle_force;
	unsigned			omod;
	unsigned                        index_mode;
	unsigned                        lds_idx;
};

struct r600_bytecode_tex {
	struct list_head		list;
	unsigned			op;
	unsigned			inst_mod;
	unsigned			resource_id;
	unsigned			src_gpr;
	unsigned			src_rel;
	unsigned			dst_gpr;
	unsigned			dst_rel;
	unsigned			dst_sel_x;
	unsigned			dst_sel_y;
	unsigned			dst_sel_z;
	unsigned			dst_sel_w;
	unsigned			lod_bias;
	unsigned			coord_type_x;
	unsigned			coord_type_y;
	unsigned			coord_type_z;
	unsigned			coord_type_w;
	int				offset_x;
	int				offset_y;
	int				offset_z;
	unsigned			sampler_id;
	unsigned			src_sel_x;
	unsigned			src_sel_y;
	unsigned			src_sel_z;
	unsigned			src_sel_w;
	/* indexed samplers/resources only on evergreen/cayman */
	unsigned			sampler_index_mode;
	unsigned			resource_index_mode;
};

struct r600_bytecode_vtx {
	struct list_head		list;
	unsigned			op;
	unsigned			fetch_type;
	unsigned			buffer_id;
	unsigned			src_gpr;
	unsigned			src_sel_x;
	unsigned			mega_fetch_count;
	unsigned			dst_gpr;
	unsigned			dst_sel_x;
	unsigned			dst_sel_y;
	unsigned			dst_sel_z;
	unsigned			dst_sel_w;
	unsigned			use_const_fields;
	unsigned			data_format;
	unsigned			num_format_all;
	unsigned			format_comp_all;
	unsigned			srf_mode_all;
	unsigned			offset;
	unsigned			endian;
	unsigned			buffer_index_mode;

	// READ_SCRATCH fields
	unsigned			uncached;
	unsigned			indexed;
	unsigned			src_sel_y;
	unsigned			src_rel;
	unsigned			elem_size;
	unsigned			array_size;
	unsigned			array_base;
	unsigned			burst_count;
	unsigned			dst_rel;
};

struct r600_bytecode_gds {
	struct list_head		list;
	unsigned			op;
	unsigned			src_gpr;
	unsigned			src_rel;
	unsigned			src_sel_x;
	unsigned			src_sel_y;
	unsigned			src_sel_z;
	unsigned			src_gpr2;
	unsigned			dst_gpr;
	unsigned			dst_rel;
	unsigned			dst_sel_x;
	unsigned			dst_sel_y;
	unsigned			dst_sel_z;
	unsigned			dst_sel_w;
	unsigned			uav_index_mode;
	unsigned                        uav_id;
	unsigned                        alloc_consume;
	unsigned                        bcast_first_req;
};

struct r600_bytecode_output {
	unsigned			array_base;
	unsigned			array_size;
	unsigned			comp_mask;
	unsigned			type;

	unsigned			op;

	unsigned			elem_size;
	unsigned			gpr;
	unsigned			swizzle_x;
	unsigned			swizzle_y;
	unsigned			swizzle_z;
	unsigned			swizzle_w;
	unsigned			burst_count;
	unsigned			index_gpr;
	unsigned			mark; /* used by MEM_SCRATCH */
};

struct r600_bytecode_rat {
	unsigned			id;
	unsigned			inst;
	unsigned			index_mode;
};

struct r600_bytecode_kcache {
	unsigned			bank;
	unsigned			mode;
	unsigned			addr;
	unsigned			index_mode;
};

struct r600_bytecode_cf {
	struct list_head		list;

	unsigned			op;
	unsigned			addr;
	unsigned			ndw;
	unsigned			id;
	unsigned			cond;
	unsigned			pop_count;
	unsigned			count;
	unsigned			cf_addr; /* control flow addr */
	struct r600_bytecode_kcache		kcache[4];
	unsigned			r6xx_uses_waterfall;
	unsigned			eg_alu_extended;
	unsigned			barrier;
	unsigned			end_of_program;
	unsigned                        mark;
	unsigned                        vpm;
	struct list_head		alu;
	struct list_head		tex;
	struct list_head		vtx;
	struct list_head		gds;
	struct r600_bytecode_output		output;
	struct r600_bytecode_rat		rat;
	struct r600_bytecode_alu		*curr_bs_head;
	struct r600_bytecode_alu		*prev_bs_head;
	struct r600_bytecode_alu		*prev2_bs_head;
	unsigned isa[2];
};

#define FC_NONE				0
#define FC_IF				1
#define FC_LOOP				2
#define FC_REP				3
#define FC_PUSH_VPM			4
#define FC_PUSH_WQM			5

struct r600_cf_stack_entry {
	int				type;
	struct r600_bytecode_cf		*start;
	struct r600_bytecode_cf		**mid; /* used to store the else point */
	int				num_mid;
};

#define SQ_MAX_CALL_DEPTH 0x00000020

#define AR_HANDLE_NORMAL 0
#define AR_HANDLE_RV6XX 1 /* except RV670 */

struct r600_stack_info {
	/* current level of non-WQM PUSH operations
	 * (PUSH, PUSH_ELSE, ALU_PUSH_BEFORE) */
	int push;
	/* current level of WQM PUSH operations
	 * (PUSH, PUSH_ELSE, PUSH_WQM) */
	int push_wqm;
	/* current loop level */
	int loop;

	/* required depth */
	int max_entries;
	/* subentries per entry */
	int entry_size;
};

struct r600_bytecode {
	enum chip_class			chip_class;
	enum radeon_family		family;
	bool				has_compressed_msaa_texturing;
	int				type;
	struct list_head		cf;
	struct r600_bytecode_cf		*cf_last;
	unsigned			ndw;
	unsigned			ncf;
	unsigned			ngpr;
	unsigned			nstack;
	unsigned			nlds_dw;
	unsigned			nresource;
	unsigned			force_add_cf;
	uint32_t			*bytecode;
	uint32_t			fc_sp;
	struct r600_cf_stack_entry	fc_stack[TGSI_EXEC_MAX_NESTING];
	struct r600_stack_info		stack;
	unsigned	ar_loaded;
	unsigned	ar_reg;
	unsigned	ar_chan;
	unsigned        ar_handling;
	unsigned        r6xx_nop_after_rel_dst;
	bool            index_loaded[2];
	unsigned        index_reg[2]; /* indexing register CF_INDEX_[01] */
	unsigned        debug_id;
	struct r600_isa* isa;
	struct r600_bytecode_output pending_outputs[5];
	int n_pending_outputs;
	boolean			need_wait_ack; /* emit a pending WAIT_ACK prior to control flow */
};

/* eg_asm.c */
int eg_bytecode_cf_build(struct r600_bytecode *bc, struct r600_bytecode_cf *cf);
int egcm_load_index_reg(struct r600_bytecode *bc, unsigned id, bool inside_alu_clause);
int eg_bytecode_gds_build(struct r600_bytecode *bc, struct r600_bytecode_gds *gds, unsigned id);
int eg_bytecode_alu_build(struct r600_bytecode *bc,
			  struct r600_bytecode_alu *alu, unsigned id);
/* r600_asm.c */
void r600_bytecode_init(struct r600_bytecode *bc,
			enum chip_class chip_class,
			enum radeon_family family,
			bool has_compressed_msaa_texturing);
void r600_bytecode_clear(struct r600_bytecode *bc);
int r600_bytecode_add_alu(struct r600_bytecode *bc,
		const struct r600_bytecode_alu *alu);
int r600_bytecode_add_vtx(struct r600_bytecode *bc,
		const struct r600_bytecode_vtx *vtx);
int r600_bytecode_add_vtx_tc(struct r600_bytecode *bc,
			     const struct r600_bytecode_vtx *vtx);
int r600_bytecode_add_tex(struct r600_bytecode *bc,
		const struct r600_bytecode_tex *tex);
int r600_bytecode_add_gds(struct r600_bytecode *bc,
		const struct r600_bytecode_gds *gds);
int r600_bytecode_add_output(struct r600_bytecode *bc,
		const struct r600_bytecode_output *output);
int r600_bytecode_add_pending_output(struct r600_bytecode *bc,
		const struct r600_bytecode_output *output);
void r600_bytecode_need_wait_ack(struct r600_bytecode *bc, boolean needed);
boolean r600_bytecode_get_need_wait_ack(struct r600_bytecode *bc);
int r600_bytecode_build(struct r600_bytecode *bc);
int r600_bytecode_add_cf(struct r600_bytecode *bc);
int r600_bytecode_add_cfinst(struct r600_bytecode *bc,
		unsigned op);
int r600_bytecode_add_alu_type(struct r600_bytecode *bc,
		const struct r600_bytecode_alu *alu, unsigned type);
void r600_bytecode_special_constants(uint32_t value,
		unsigned *sel, unsigned *neg, unsigned abs);
void r600_bytecode_disasm(struct r600_bytecode *bc);
void r600_bytecode_alu_read(struct r600_bytecode *bc,
		struct r600_bytecode_alu *alu, uint32_t word0, uint32_t word1);

int cm_bytecode_add_cf_end(struct r600_bytecode *bc);

void *r600_create_vertex_fetch_shader(struct pipe_context *ctx,
				      unsigned count,
				      const struct pipe_vertex_element *elements);

/* r700_asm.c */
void r700_bytecode_cf_vtx_build(uint32_t *bytecode,
		const struct r600_bytecode_cf *cf);
int r700_bytecode_alu_build(struct r600_bytecode *bc,
		struct r600_bytecode_alu *alu, unsigned id);
void r700_bytecode_alu_read(struct r600_bytecode *bc,
		struct r600_bytecode_alu *alu, uint32_t word0, uint32_t word1);
int r700_bytecode_fetch_mem_build(struct r600_bytecode *bc,
		struct r600_bytecode_vtx *mem, unsigned id);

void r600_bytecode_export_read(struct r600_bytecode *bc,
		struct r600_bytecode_output *output, uint32_t word0, uint32_t word1);
void eg_bytecode_export_read(struct r600_bytecode *bc,
		struct r600_bytecode_output *output, uint32_t word0, uint32_t word1);

void r600_vertex_data_type(enum pipe_format pformat, unsigned *format,
			   unsigned *num_format, unsigned *format_comp, unsigned *endian);

static inline int fp64_switch(int i)
{
	switch (i) {
	case 0:
		return 1;
	case 1:
		return 0;
	case 2:
		return 3;
	case 3:
		return 2;
	}
	return 0;
}
#endif