1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
|
/*
* Copyright (C) 2019 Collabora, Ltd.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
* Authors (Collabora):
* Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com>
*
*/
#include <stdio.h>
#include "util/u_memory.h"
#include "pan_blend_shaders.h"
#include "pan_blending.h"
#include "pan_bo.h"
#include "panfrost-quirks.h"
/* A given Gallium blend state can be encoded to the hardware in numerous,
* dramatically divergent ways due to the interactions of blending with
* framebuffer formats. Conceptually, there are two modes:
*
* - Fixed-function blending (for suitable framebuffer formats, suitable blend
* state, and suitable blend constant)
*
* - Blend shaders (for everything else)
*
* A given Gallium blend configuration will compile to exactly one
* fixed-function blend state, if it compiles to any, although the constant
* will vary across runs as that is tracked outside of the Gallium CSO.
*
* However, that same blend configuration will compile to many different blend
* shaders, depending on the framebuffer formats active. The rationale is that
* blend shaders override not just fixed-function blending but also
* fixed-function format conversion. As such, each blend shader must be
* hardcoded to a particular framebuffer format to correctly pack/unpack it. As
* a concrete example, to the hardware there is no difference (!) between RG16F
* and RG16UI -- both are simply 4-byte-per-pixel chunks. Thus both formats
* require a blend shader (even with blending is totally disabled!), required
* to do conversion as necessary (if necessary).
*
* All of this state is encapsulated in the panfrost_blend_state struct
* (our subclass of pipe_blend_state).
*/
/* Given an initialized CSO and a particular framebuffer format, grab a
* blend shader, generating and compiling it if it doesn't exist
* (lazy-loading in a way). This routine, when the cache hits, should
* befast, suitable for calling every draw to avoid wacky dirty
* tracking paths. If the cache hits, boom, done. */
static struct panfrost_blend_shader *
panfrost_get_blend_shader(
struct panfrost_context *ctx,
struct panfrost_blend_state *blend,
enum pipe_format fmt,
unsigned rt)
{
/* Prevent NULL collision issues.. */
assert(fmt != 0);
/* Check the cache. Key by the RT and format */
struct hash_table_u64 *shaders = blend->rt[rt].shaders;
unsigned key = (fmt << 3) | rt;
struct panfrost_blend_shader *shader =
_mesa_hash_table_u64_search(shaders, key);
if (shader)
return shader;
/* Cache miss. Build one instead, cache it, and go */
struct panfrost_blend_shader generated =
panfrost_compile_blend_shader(ctx, &blend->base, fmt, rt);
shader = mem_dup(&generated, sizeof(generated));
_mesa_hash_table_u64_insert(shaders, key, shader);
return shader;
}
/* Create a blend CSO. Essentially, try to compile a fixed-function
* expression and initialize blend shaders */
static void *
panfrost_create_blend_state(struct pipe_context *pipe,
const struct pipe_blend_state *blend)
{
struct panfrost_context *ctx = pan_context(pipe);
struct panfrost_blend_state *so = rzalloc(ctx, struct panfrost_blend_state);
so->base = *blend;
/* TODO: The following features are not yet implemented */
assert(!blend->alpha_to_coverage);
assert(!blend->alpha_to_one);
for (unsigned c = 0; c < PIPE_MAX_COLOR_BUFS; ++c) {
struct panfrost_blend_rt *rt = &so->rt[c];
/* There are two paths. First, we would like to try a
* fixed-function if we can */
/* Without indep blending, the first RT settings replicate */
if (!blend->logicop_enable) {
unsigned g =
blend->independent_blend_enable ? c : 0;
rt->has_fixed_function =
panfrost_make_fixed_blend_mode(
&blend->rt[g],
&rt->equation,
&rt->constant_mask,
blend->rt[g].colormask);
}
/* Regardless if that works, we also need to initialize
* the blend shaders */
rt->shaders = _mesa_hash_table_u64_create(so);
}
return so;
}
static void
panfrost_bind_blend_state(struct pipe_context *pipe,
void *cso)
{
struct panfrost_context *ctx = pan_context(pipe);
struct pipe_blend_state *blend = (struct pipe_blend_state *) cso;
struct panfrost_blend_state *pblend = (struct panfrost_blend_state *) cso;
ctx->blend = pblend;
if (!blend)
return;
}
static void
panfrost_delete_blend_shader(struct hash_entry *entry)
{
struct panfrost_blend_shader *shader = (struct panfrost_blend_shader *)entry->data;
free(shader->buffer);
free(shader);
}
static void
panfrost_delete_blend_state(struct pipe_context *pipe,
void *cso)
{
struct panfrost_blend_state *blend = (struct panfrost_blend_state *) cso;
for (unsigned c = 0; c < 4; ++c) {
struct panfrost_blend_rt *rt = &blend->rt[c];
_mesa_hash_table_u64_clear(rt->shaders, panfrost_delete_blend_shader);
}
ralloc_free(blend);
}
static void
panfrost_set_blend_color(struct pipe_context *pipe,
const struct pipe_blend_color *blend_color)
{
struct panfrost_context *ctx = pan_context(pipe);
if (blend_color)
ctx->blend_color = *blend_color;
}
/* Given a vec4 of constants, reduce it to just a single constant according to
* the mask (if we can) */
static bool
panfrost_blend_constant(float *out, float *in, unsigned mask)
{
/* If there is no components used, it automatically works. Do set a
* dummy constant just to avoid reading uninitialized memory. */
if (!mask) {
*out = 0.0;
return true;
}
/* Find some starter mask */
unsigned first = ffs(mask) - 1;
float cons = in[first];
mask ^= (1 << first);
/* Ensure the rest are equal */
while (mask) {
unsigned i = u_bit_scan(&mask);
if (in[i] != cons) {
*out = 0.0;
return false;
}
}
/* Otherwise, we're good to go */
*out = cons;
return true;
}
/* Create a final blend given the context */
struct panfrost_blend_final
panfrost_get_blend_for_context(struct panfrost_context *ctx, unsigned rti, struct panfrost_bo **bo, unsigned *shader_offset)
{
struct panfrost_batch *batch = panfrost_get_batch_for_fbo(ctx);
/* Grab the format, falling back gracefully if called invalidly (which
* has to happen for no-color-attachment FBOs, for instance) */
struct pipe_framebuffer_state *fb = &ctx->pipe_framebuffer;
enum pipe_format fmt = PIPE_FORMAT_R8G8B8A8_UNORM;
if ((fb->nr_cbufs > rti) && fb->cbufs[rti])
fmt = fb->cbufs[rti]->format;
/* Grab the blend state */
struct panfrost_blend_state *blend = ctx->blend;
assert(blend);
struct panfrost_blend_rt *rt = &blend->rt[rti];
struct panfrost_blend_final final;
/* First, we'll try a fixed function path */
if (rt->has_fixed_function && panfrost_can_fixed_blend(fmt)) {
if (panfrost_blend_constant(
&final.equation.constant,
ctx->blend_color.color,
rt->constant_mask)) {
/* There's an equation and suitable constant, so we're good to go */
final.is_shader = false;
final.equation.equation = &rt->equation;
final.no_blending =
(rt->equation.rgb_mode == 0x122) &&
(rt->equation.alpha_mode == 0x122) &&
(rt->equation.color_mask == 0xf);
return final;
}
}
/* Otherwise, we need to grab a shader */
struct panfrost_blend_shader *shader = panfrost_get_blend_shader(ctx, blend, fmt, rti);
final.is_shader = true;
final.no_blending = false;
final.shader.work_count = shader->work_count;
final.shader.first_tag = shader->first_tag;
/* Upload the shader, sharing a BO */
if (!(*bo)) {
*bo = panfrost_batch_create_bo(batch, 4096,
PAN_BO_EXECUTE,
PAN_BO_ACCESS_PRIVATE |
PAN_BO_ACCESS_READ |
PAN_BO_ACCESS_VERTEX_TILER |
PAN_BO_ACCESS_FRAGMENT);
}
/* Size check */
assert((*shader_offset + shader->size) < 4096);
memcpy((*bo)->cpu + *shader_offset, shader->buffer, shader->size);
final.shader.gpu = (*bo)->gpu + *shader_offset;
if (shader->patch_index) {
/* We have to specialize the blend shader to use constants, so
* patch in the current constants */
float *patch = (float *) ((*bo)->cpu + *shader_offset + shader->patch_index);
memcpy(patch, ctx->blend_color.color, sizeof(float) * 4);
}
*shader_offset += shader->size;
return final;
}
void
panfrost_blend_context_init(struct pipe_context *pipe)
{
pipe->create_blend_state = panfrost_create_blend_state;
pipe->bind_blend_state = panfrost_bind_blend_state;
pipe->delete_blend_state = panfrost_delete_blend_state;
pipe->set_blend_color = panfrost_set_blend_color;
}
|