1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
|
/*
* Copyright 2013 Vadim Girlin <vadimgirlin@gmail.com>
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* on the rights to use, copy, modify, merge, publish, distribute, sub
* license, and/or sell copies of the Software, and to permit persons to whom
* the Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
* THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
* USE OR OTHER DEALINGS IN THE SOFTWARE.
*
* Authors:
* Vadim Girlin
*/
#ifndef SB_SCHED_H_
#define SB_SCHED_H_
namespace r600_sb {
typedef sb_map<node*, unsigned> uc_map;
// resource trackers for scheduler
// rp = read port
// uc = use count
typedef sb_set<unsigned> kc_lines;
class rp_kcache_tracker {
unsigned rp[4];
unsigned uc[4];
const unsigned sel_count;
unsigned kc_sel(sel_chan r) {
return sel_count == 4 ? (unsigned)r : ((r - 1) >> 1) + 1;
}
public:
rp_kcache_tracker(shader &sh);
bool try_reserve(node *n);
void unreserve(node *n);
bool try_reserve(sel_chan r);
void unreserve(sel_chan r);
void reset();
unsigned num_sels() { return !!rp[0] + !!rp[1] + !!rp[2] + !!rp[3]; }
unsigned get_lines(kc_lines &lines);
};
class literal_tracker {
literal lt[4];
unsigned uc[4];
public:
literal_tracker() : lt(), uc() {}
bool try_reserve(alu_node *n);
void unreserve(alu_node *n);
bool try_reserve(literal l);
void unreserve(literal l);
void reset();
unsigned count() { return !!uc[0] + !!uc[1] + !!uc[2] + !!uc[3]; }
void init_group_literals(alu_group_node *g);
};
class rp_gpr_tracker {
// rp[cycle][elem]
unsigned rp[3][4];
unsigned uc[3][4];
public:
rp_gpr_tracker() : rp(), uc() {}
bool try_reserve(alu_node *n);
void unreserve(alu_node *n);
bool try_reserve(unsigned cycle, unsigned sel, unsigned chan);
void unreserve(unsigned cycle, unsigned sel, unsigned chan);
void reset();
void dump();
};
class alu_group_tracker {
shader &sh;
rp_kcache_tracker kc;
rp_gpr_tracker gpr;
literal_tracker lt;
alu_node * slots[5];
unsigned available_slots;
unsigned max_slots;
typedef std::map<value*, unsigned> value_index_map;
value_index_map vmap;
bool has_mova;
bool uses_ar;
bool has_predset;
bool has_kill;
bool updates_exec_mask;
bool consumes_lds_oqa;
bool produces_lds_oqa;
unsigned chan_count[4];
// param index + 1 (0 means that group doesn't refer to Params)
// we can't use more than one param index in a group
unsigned interp_param;
unsigned next_id;
node_vec packed_ops;
void assign_slot(unsigned slot, alu_node *n);
public:
alu_group_tracker(shader &sh);
// FIXME use fast bs correctness check (values for same chan <= 3) ??
bool try_reserve(alu_node *n);
bool try_reserve(alu_packed_node *p);
void reinit();
void reset(bool keep_packed = false);
sel_chan get_value_id(value *v);
void update_flags(alu_node *n);
alu_node* slot(unsigned i) { return slots[i]; }
unsigned used_slots() {
return (~available_slots) & ((1 << max_slots) - 1);
}
unsigned inst_count() {
return __builtin_popcount(used_slots());
}
unsigned literal_count() { return lt.count(); }
unsigned literal_slot_count() { return (literal_count() + 1) >> 1; };
unsigned slot_count() { return inst_count() + literal_slot_count(); }
bool get_consumes_lds_oqa() { return consumes_lds_oqa; }
bool get_produces_lds_oqa() { return produces_lds_oqa; }
alu_group_node* emit();
rp_kcache_tracker& kcache() { return kc; }
bool has_update_exec_mask() { return updates_exec_mask; }
unsigned avail_slots() { return available_slots; }
void discard_all_slots(container_node &removed_nodes);
void discard_slots(unsigned slot_mask, container_node &removed_nodes);
bool has_ar_load() { return has_mova; }
};
class alu_kcache_tracker {
bc_kcache kc[4];
sb_set<unsigned> lines;
unsigned max_kcs;
public:
alu_kcache_tracker(sb_hw_class hc)
: kc(), lines(), max_kcs(hc >= HW_CLASS_EVERGREEN ? 4 : 2) {}
void reset();
bool try_reserve(alu_group_tracker >);
bool update_kc();
void init_clause(bc_cf &bc) {
memcpy(bc.kc, kc, sizeof(kc));
}
};
class alu_clause_tracker {
shader &sh;
alu_kcache_tracker kt;
unsigned slot_count;
alu_group_tracker grp0;
alu_group_tracker grp1;
unsigned group;
cf_node *clause;
bool push_exec_mask;
unsigned outstanding_lds_oqa_reads;
public:
container_node conflict_nodes;
// current values of AR and PR registers that we have to preload
// till the end of clause (in fact, beginning, because we're scheduling
// bottom-up)
value *current_ar;
value *current_pr;
// current values of CF_IDX registers that need preloading
value *current_idx[2];
alu_clause_tracker(shader &sh);
void reset();
// current group
alu_group_tracker& grp() { return group ? grp1 : grp0; }
// previous group
alu_group_tracker& prev_grp() { return group ? grp0 : grp1; }
void emit_group();
void emit_clause(container_node *c);
bool check_clause_limits();
void new_group();
bool is_empty();
alu_node* create_ar_load(value *v, chan_select ar_channel);
void discard_current_group();
unsigned total_slots() { return slot_count; }
};
class post_scheduler : public pass {
container_node ready, ready_copies; // alu only
container_node pending, bb_pending;
bb_node *cur_bb;
val_set live; // values live at the end of the alu clause
uc_map ucm;
alu_clause_tracker alu;
typedef std::map<sel_chan, value*> rv_map;
rv_map regmap, prev_regmap;
val_set cleared_interf;
void emit_index_registers();
public:
post_scheduler(shader &sh) : pass(sh),
ready(), ready_copies(), pending(), cur_bb(),
live(), ucm(), alu(sh), regmap(), cleared_interf() {}
virtual int run();
bool run_on(container_node *n);
bool schedule_bb(bb_node *bb);
void load_index_register(value *v, unsigned idx);
void process_fetch(container_node *c);
bool process_alu(container_node *c);
bool schedule_alu(container_node *c);
bool prepare_alu_group();
void release_op(node *n);
void release_src_values(node *n);
void release_src_vec(vvec &vv, bool src);
void release_src_val(value *v);
void init_uc_val(container_node *c, value *v);
void init_uc_vec(container_node *c, vvec &vv, bool src);
unsigned init_ucm(container_node *c, node *n);
void init_regmap();
bool check_interferences();
unsigned try_add_instruction(node *n);
bool check_copy(node *n);
void dump_group(alu_group_tracker &rt);
bool unmap_dst(alu_node *n);
bool unmap_dst_val(value *d);
bool map_src(alu_node *n);
bool map_src_vec(vvec &vv, bool src);
bool map_src_val(value *v);
bool recolor_local(value *v);
void update_local_interferences();
void update_live_src_vec(vvec &vv, val_set *born, bool src);
void update_live_dst_vec(vvec &vv);
void update_live(node *n, val_set *born);
void process_group();
void set_color_local_val(value *v, sel_chan color);
void set_color_local(value *v, sel_chan color);
void add_interferences(value *v, sb_bitset &rb, val_set &vs);
void init_globals(val_set &s, bool prealloc);
void recolor_locals();
void dump_regmap();
void emit_load_ar();
void emit_clause();
void process_ready_copies();
};
} // namespace r600_sb
#endif /* SB_SCHED_H_ */
|