1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
|
/*
* Copyright © 2017 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*/
#include "genX_boilerplate.h"
#include "brw_defines.h"
#include "brw_state.h"
static unsigned
flags_to_post_sync_op(uint32_t flags)
{
if (flags & PIPE_CONTROL_WRITE_IMMEDIATE)
return WriteImmediateData;
if (flags & PIPE_CONTROL_WRITE_DEPTH_COUNT)
return WritePSDepthCount;
if (flags & PIPE_CONTROL_WRITE_TIMESTAMP)
return WriteTimestamp;
return 0;
}
/**
* Do the given flags have a Post Sync or LRI Post Sync operation?
*/
static enum pipe_control_flags
get_post_sync_flags(enum pipe_control_flags flags)
{
flags &= PIPE_CONTROL_WRITE_IMMEDIATE |
PIPE_CONTROL_WRITE_DEPTH_COUNT |
PIPE_CONTROL_WRITE_TIMESTAMP |
PIPE_CONTROL_LRI_POST_SYNC_OP;
/* Only one "Post Sync Op" is allowed, and it's mutually exclusive with
* "LRI Post Sync Operation". So more than one bit set would be illegal.
*/
assert(util_bitcount(flags) <= 1);
return flags;
}
#define IS_COMPUTE_PIPELINE(brw) \
(GEN_GEN >= 7 && brw->last_pipeline == BRW_COMPUTE_PIPELINE)
/* Closed interval - GEN_GEN \in [x, y] */
#define IS_GEN_BETWEEN(x, y) (GEN_GEN >= x && GEN_GEN <= y)
#define IS_GENx10_BETWEEN(x, y) \
(GEN_VERSIONx10 >= x && GEN_VERSIONx10 <= y)
/**
* Emit a series of PIPE_CONTROL commands, taking into account any
* workarounds necessary to actually accomplish the caller's request.
*
* Unless otherwise noted, spec quotations in this function come from:
*
* Synchronization of the 3D Pipeline > PIPE_CONTROL Command > Programming
* Restrictions for PIPE_CONTROL.
*
* You should not use this function directly. Use the helpers in
* brw_pipe_control.c instead, which may split the pipe control further.
*/
void
genX(emit_raw_pipe_control)(struct brw_context *brw, uint32_t flags,
struct brw_bo *bo, uint32_t offset, uint64_t imm)
{
UNUSED const struct gen_device_info *devinfo = &brw->screen->devinfo;
enum pipe_control_flags post_sync_flags = get_post_sync_flags(flags);
enum pipe_control_flags non_lri_post_sync_flags =
post_sync_flags & ~PIPE_CONTROL_LRI_POST_SYNC_OP;
/* Recursive PIPE_CONTROL workarounds --------------------------------
* (http://knowyourmeme.com/memes/xzibit-yo-dawg)
*
* We do these first because we want to look at the original operation,
* rather than any workarounds we set.
*/
if (GEN_GEN == 6 && (flags & PIPE_CONTROL_RENDER_TARGET_FLUSH)) {
/* Hardware workaround: SNB B-Spec says:
*
* "[Dev-SNB{W/A}]: Before a PIPE_CONTROL with Write Cache Flush
* Enable = 1, a PIPE_CONTROL with any non-zero post-sync-op is
* required."
*/
brw_emit_post_sync_nonzero_flush(brw);
}
if (GEN_GEN == 9 && (flags & PIPE_CONTROL_VF_CACHE_INVALIDATE)) {
/* The PIPE_CONTROL "VF Cache Invalidation Enable" bit description
* lists several workarounds:
*
* "Project: SKL, KBL, BXT
*
* If the VF Cache Invalidation Enable is set to a 1 in a
* PIPE_CONTROL, a separate Null PIPE_CONTROL, all bitfields
* sets to 0, with the VF Cache Invalidation Enable set to 0
* needs to be sent prior to the PIPE_CONTROL with VF Cache
* Invalidation Enable set to a 1."
*/
genX(emit_raw_pipe_control)(brw, 0, NULL, 0, 0);
}
if (GEN_GEN == 9 && IS_COMPUTE_PIPELINE(brw) && post_sync_flags) {
/* Project: SKL / Argument: LRI Post Sync Operation [23]
*
* "PIPECONTROL command with “Command Streamer Stall Enable” must be
* programmed prior to programming a PIPECONTROL command with "LRI
* Post Sync Operation" in GPGPU mode of operation (i.e when
* PIPELINE_SELECT command is set to GPGPU mode of operation)."
*
* The same text exists a few rows below for Post Sync Op.
*/
genX(emit_raw_pipe_control)(brw, PIPE_CONTROL_CS_STALL, NULL, 0, 0);
}
/* "Flush Types" workarounds ---------------------------------------------
* We do these now because they may add post-sync operations or CS stalls.
*/
if (IS_GEN_BETWEEN(8, 10) && (flags & PIPE_CONTROL_VF_CACHE_INVALIDATE)) {
/* Project: BDW, SKL+ (stopping at CNL) / Argument: VF Invalidate
*
* "'Post Sync Operation' must be enabled to 'Write Immediate Data' or
* 'Write PS Depth Count' or 'Write Timestamp'."
*/
if (!bo) {
flags |= PIPE_CONTROL_WRITE_IMMEDIATE;
post_sync_flags |= PIPE_CONTROL_WRITE_IMMEDIATE;
non_lri_post_sync_flags |= PIPE_CONTROL_WRITE_IMMEDIATE;
bo = brw->workaround_bo;
offset = brw->workaround_bo_offset;
}
}
if (GEN_VERSIONx10 < 75 && (flags & PIPE_CONTROL_DEPTH_STALL)) {
/* Project: PRE-HSW / Argument: Depth Stall
*
* "The following bits must be clear:
* - Render Target Cache Flush Enable ([12] of DW1)
* - Depth Cache Flush Enable ([0] of DW1)"
*/
assert(!(flags & (PIPE_CONTROL_RENDER_TARGET_FLUSH |
PIPE_CONTROL_DEPTH_CACHE_FLUSH)));
}
if (GEN_GEN >= 6 && (flags & PIPE_CONTROL_DEPTH_STALL)) {
/* From the PIPE_CONTROL instruction table, bit 13 (Depth Stall Enable):
*
* "This bit must be DISABLED for operations other than writing
* PS_DEPTH_COUNT."
*
* This seems like nonsense. An Ivybridge workaround requires us to
* emit a PIPE_CONTROL with a depth stall and write immediate post-sync
* operation. Gen8+ requires us to emit depth stalls and depth cache
* flushes together. So, it's hard to imagine this means anything other
* than "we originally intended this to be used for PS_DEPTH_COUNT".
*
* We ignore the supposed restriction and do nothing.
*/
}
if (GEN_VERSIONx10 < 75 && (flags & PIPE_CONTROL_DEPTH_CACHE_FLUSH)) {
/* Project: PRE-HSW / Argument: Depth Cache Flush
*
* "Depth Stall must be clear ([13] of DW1)."
*/
assert(!(flags & PIPE_CONTROL_DEPTH_STALL));
}
if (flags & (PIPE_CONTROL_RENDER_TARGET_FLUSH |
PIPE_CONTROL_STALL_AT_SCOREBOARD)) {
/* From the PIPE_CONTROL instruction table, bit 12 and bit 1:
*
* "This bit must be DISABLED for End-of-pipe (Read) fences,
* PS_DEPTH_COUNT or TIMESTAMP queries."
*
* TODO: Implement end-of-pipe checking.
*/
assert(!(post_sync_flags & (PIPE_CONTROL_WRITE_DEPTH_COUNT |
PIPE_CONTROL_WRITE_TIMESTAMP)));
}
if (GEN_GEN < 11 && (flags & PIPE_CONTROL_STALL_AT_SCOREBOARD)) {
/* From the PIPE_CONTROL instruction table, bit 1:
*
* "This bit is ignored if Depth Stall Enable is set.
* Further, the render cache is not flushed even if Write Cache
* Flush Enable bit is set."
*
* We assert that the caller doesn't do this combination, to try and
* prevent mistakes. It shouldn't hurt the GPU, though.
*
* We skip this check on Gen11+ as the "Stall and Pixel Scoreboard"
* and "Render Target Flush" combo is explicitly required for BTI
* update workarounds.
*/
assert(!(flags & (PIPE_CONTROL_DEPTH_STALL |
PIPE_CONTROL_RENDER_TARGET_FLUSH)));
}
/* PIPE_CONTROL page workarounds ------------------------------------- */
if (IS_GEN_BETWEEN(7, 8) && (flags & PIPE_CONTROL_STATE_CACHE_INVALIDATE)) {
/* From the PIPE_CONTROL page itself:
*
* "IVB, HSW, BDW
* Restriction: Pipe_control with CS-stall bit set must be issued
* before a pipe-control command that has the State Cache
* Invalidate bit set."
*/
flags |= PIPE_CONTROL_CS_STALL;
}
if (GEN_IS_HASWELL) {
/* From the PIPE_CONTROL page itself:
*
* "HSW - Programming Note: PIPECONTROL with RO Cache Invalidation:
* Prior to programming a PIPECONTROL command with any of the RO
* cache invalidation bit set, program a PIPECONTROL flush command
* with “CS stall” bit and “HDC Flush” bit set."
*
* TODO: Actually implement this. What's an HDC Flush?
*/
}
if (flags & PIPE_CONTROL_FLUSH_LLC) {
/* From the PIPE_CONTROL instruction table, bit 26 (Flush LLC):
*
* "Project: ALL
* SW must always program Post-Sync Operation to "Write Immediate
* Data" when Flush LLC is set."
*
* For now, we just require the caller to do it.
*/
assert(flags & PIPE_CONTROL_WRITE_IMMEDIATE);
}
/* "Post-Sync Operation" workarounds -------------------------------- */
/* Project: All / Argument: Global Snapshot Count Reset [19]
*
* "This bit must not be exercised on any product.
* Requires stall bit ([20] of DW1) set."
*
* We don't use this, so we just assert that it isn't used. The
* PIPE_CONTROL instruction page indicates that they intended this
* as a debug feature and don't think it is useful in production,
* but it may actually be usable, should we ever want to.
*/
assert((flags & PIPE_CONTROL_GLOBAL_SNAPSHOT_COUNT_RESET) == 0);
if (flags & (PIPE_CONTROL_MEDIA_STATE_CLEAR |
PIPE_CONTROL_INDIRECT_STATE_POINTERS_DISABLE)) {
/* Project: All / Arguments:
*
* - Generic Media State Clear [16]
* - Indirect State Pointers Disable [16]
*
* "Requires stall bit ([20] of DW1) set."
*
* Also, the PIPE_CONTROL instruction table, bit 16 (Generic Media
* State Clear) says:
*
* "PIPECONTROL command with “Command Streamer Stall Enable” must be
* programmed prior to programming a PIPECONTROL command with "Media
* State Clear" set in GPGPU mode of operation"
*
* This is a subset of the earlier rule, so there's nothing to do.
*/
flags |= PIPE_CONTROL_CS_STALL;
}
if (flags & PIPE_CONTROL_STORE_DATA_INDEX) {
/* Project: All / Argument: Store Data Index
*
* "Post-Sync Operation ([15:14] of DW1) must be set to something other
* than '0'."
*
* For now, we just assert that the caller does this. We might want to
* automatically add a write to the workaround BO...
*/
assert(non_lri_post_sync_flags != 0);
}
if (flags & PIPE_CONTROL_SYNC_GFDT) {
/* Project: All / Argument: Sync GFDT
*
* "Post-Sync Operation ([15:14] of DW1) must be set to something other
* than '0' or 0x2520[13] must be set."
*
* For now, we just assert that the caller does this.
*/
assert(non_lri_post_sync_flags != 0);
}
if (IS_GENx10_BETWEEN(60, 75) && (flags & PIPE_CONTROL_TLB_INVALIDATE)) {
/* Project: SNB, IVB, HSW / Argument: TLB inv
*
* "{All SKUs}{All Steppings}: Post-Sync Operation ([15:14] of DW1)
* must be set to something other than '0'."
*
* For now, we just assert that the caller does this.
*/
assert(non_lri_post_sync_flags != 0);
}
if (GEN_GEN >= 7 && (flags & PIPE_CONTROL_TLB_INVALIDATE)) {
/* Project: IVB+ / Argument: TLB inv
*
* "Requires stall bit ([20] of DW1) set."
*
* Also, from the PIPE_CONTROL instruction table:
*
* "Project: SKL+
* Post Sync Operation or CS stall must be set to ensure a TLB
* invalidation occurs. Otherwise no cycle will occur to the TLB
* cache to invalidate."
*
* This is not a subset of the earlier rule, so there's nothing to do.
*/
flags |= PIPE_CONTROL_CS_STALL;
}
if (GEN_GEN == 9 && devinfo->gt == 4) {
/* TODO: The big Skylake GT4 post sync op workaround */
}
/* "GPGPU specific workarounds" (both post-sync and flush) ------------ */
if (IS_COMPUTE_PIPELINE(brw)) {
if (GEN_GEN >= 9 && (flags & PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE)) {
/* Project: SKL+ / Argument: Tex Invalidate
* "Requires stall bit ([20] of DW) set for all GPGPU Workloads."
*/
flags |= PIPE_CONTROL_CS_STALL;
}
if (GEN_GEN == 8 && (post_sync_flags ||
(flags & (PIPE_CONTROL_NOTIFY_ENABLE |
PIPE_CONTROL_DEPTH_STALL |
PIPE_CONTROL_RENDER_TARGET_FLUSH |
PIPE_CONTROL_DEPTH_CACHE_FLUSH |
PIPE_CONTROL_DATA_CACHE_FLUSH)))) {
/* Project: BDW / Arguments:
*
* - LRI Post Sync Operation [23]
* - Post Sync Op [15:14]
* - Notify En [8]
* - Depth Stall [13]
* - Render Target Cache Flush [12]
* - Depth Cache Flush [0]
* - DC Flush Enable [5]
*
* "Requires stall bit ([20] of DW) set for all GPGPU and Media
* Workloads."
*
* (The docs have separate table rows for each bit, with essentially
* the same workaround text. We've combined them here.)
*/
flags |= PIPE_CONTROL_CS_STALL;
/* Also, from the PIPE_CONTROL instruction table, bit 20:
*
* "Project: BDW
* This bit must be always set when PIPE_CONTROL command is
* programmed by GPGPU and MEDIA workloads, except for the cases
* when only Read Only Cache Invalidation bits are set (State
* Cache Invalidation Enable, Instruction cache Invalidation
* Enable, Texture Cache Invalidation Enable, Constant Cache
* Invalidation Enable). This is to WA FFDOP CG issue, this WA
* need not implemented when FF_DOP_CG is disable via "Fixed
* Function DOP Clock Gate Disable" bit in RC_PSMI_CTRL register."
*
* It sounds like we could avoid CS stalls in some cases, but we
* don't currently bother. This list isn't exactly the list above,
* either...
*/
}
}
/* Implement the WaCsStallAtEveryFourthPipecontrol workaround on IVB, BYT:
*
* "Every 4th PIPE_CONTROL command, not counting the PIPE_CONTROL with
* only read-cache-invalidate bit(s) set, must have a CS_STALL bit set."
*
* Note that the kernel does CS stalls between batches, so we only need
* to count them within a batch. We currently naively count every 4, and
* don't skip the ones with only read-cache-invalidate bits set. This
* may or may not be a problem...
*/
if (GEN_GEN == 7 && !GEN_IS_HASWELL) {
if (flags & PIPE_CONTROL_CS_STALL) {
/* If we're doing a CS stall, reset the counter and carry on. */
brw->pipe_controls_since_last_cs_stall = 0;
}
/* If this is the fourth pipe control without a CS stall, do one now. */
if (++brw->pipe_controls_since_last_cs_stall == 4) {
brw->pipe_controls_since_last_cs_stall = 0;
flags |= PIPE_CONTROL_CS_STALL;
}
}
/* "Stall" workarounds ----------------------------------------------
* These have to come after the earlier ones because we may have added
* some additional CS stalls above.
*/
if (GEN_GEN < 9 && (flags & PIPE_CONTROL_CS_STALL)) {
/* Project: PRE-SKL, VLV, CHV
*
* "[All Stepping][All SKUs]:
*
* One of the following must also be set:
*
* - Render Target Cache Flush Enable ([12] of DW1)
* - Depth Cache Flush Enable ([0] of DW1)
* - Stall at Pixel Scoreboard ([1] of DW1)
* - Depth Stall ([13] of DW1)
* - Post-Sync Operation ([13] of DW1)
* - DC Flush Enable ([5] of DW1)"
*
* If we don't already have one of those bits set, we choose to add
* "Stall at Pixel Scoreboard". Some of the other bits require a
* CS stall as a workaround (see above), which would send us into
* an infinite recursion of PIPE_CONTROLs. "Stall at Pixel Scoreboard"
* appears to be safe, so we choose that.
*/
const uint32_t wa_bits = PIPE_CONTROL_RENDER_TARGET_FLUSH |
PIPE_CONTROL_DEPTH_CACHE_FLUSH |
PIPE_CONTROL_WRITE_IMMEDIATE |
PIPE_CONTROL_WRITE_DEPTH_COUNT |
PIPE_CONTROL_WRITE_TIMESTAMP |
PIPE_CONTROL_STALL_AT_SCOREBOARD |
PIPE_CONTROL_DEPTH_STALL |
PIPE_CONTROL_DATA_CACHE_FLUSH;
if (!(flags & wa_bits))
flags |= PIPE_CONTROL_STALL_AT_SCOREBOARD;
}
/* Emit --------------------------------------------------------------- */
brw_batch_emit(brw, GENX(PIPE_CONTROL), pc) {
#if GEN_GEN >= 9
pc.FlushLLC = 0;
#endif
#if GEN_GEN >= 7
pc.LRIPostSyncOperation = NoLRIOperation;
pc.PipeControlFlushEnable = flags & PIPE_CONTROL_FLUSH_ENABLE;
pc.DCFlushEnable = flags & PIPE_CONTROL_DATA_CACHE_FLUSH;
#endif
#if GEN_GEN >= 6
pc.StoreDataIndex = 0;
pc.CommandStreamerStallEnable = flags & PIPE_CONTROL_CS_STALL;
pc.GlobalSnapshotCountReset =
flags & PIPE_CONTROL_GLOBAL_SNAPSHOT_COUNT_RESET;
pc.TLBInvalidate = flags & PIPE_CONTROL_TLB_INVALIDATE;
pc.GenericMediaStateClear = flags & PIPE_CONTROL_MEDIA_STATE_CLEAR;
pc.StallAtPixelScoreboard = flags & PIPE_CONTROL_STALL_AT_SCOREBOARD;
pc.RenderTargetCacheFlushEnable =
flags & PIPE_CONTROL_RENDER_TARGET_FLUSH;
pc.DepthCacheFlushEnable = flags & PIPE_CONTROL_DEPTH_CACHE_FLUSH;
pc.StateCacheInvalidationEnable =
flags & PIPE_CONTROL_STATE_CACHE_INVALIDATE;
pc.VFCacheInvalidationEnable = flags & PIPE_CONTROL_VF_CACHE_INVALIDATE;
pc.ConstantCacheInvalidationEnable =
flags & PIPE_CONTROL_CONST_CACHE_INVALIDATE;
#else
pc.WriteCacheFlush = flags & PIPE_CONTROL_RENDER_TARGET_FLUSH;
#endif
pc.PostSyncOperation = flags_to_post_sync_op(flags);
pc.DepthStallEnable = flags & PIPE_CONTROL_DEPTH_STALL;
pc.InstructionCacheInvalidateEnable =
flags & PIPE_CONTROL_INSTRUCTION_INVALIDATE;
pc.NotifyEnable = flags & PIPE_CONTROL_NOTIFY_ENABLE;
#if GEN_GEN >= 5 || GEN_IS_G4X
pc.IndirectStatePointersDisable =
flags & PIPE_CONTROL_INDIRECT_STATE_POINTERS_DISABLE;
#endif
#if GEN_GEN >= 6
pc.TextureCacheInvalidationEnable =
flags & PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE;
#elif GEN_GEN == 5 || GEN_IS_G4X
pc.TextureCacheFlushEnable =
flags & PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE;
#endif
pc.Address = ggtt_bo(bo, offset);
if (GEN_GEN < 7 && bo)
pc.DestinationAddressType = DAT_GGTT;
pc.ImmediateData = imm;
}
}
|