src/mesa/drivers/dri/i965/intel_pixel_read.c


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300

/*
 * Copyright 2003 VMware, Inc.
 * All Rights Reserved.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the
 * "Software"), to deal in the Software without restriction, including
 * without limitation the rights to use, copy, modify, merge, publish,
 * distribute, sublicense, and/or sell copies of the Software, and to
 * permit persons to whom the Software is furnished to do so, subject to
 * the following conditions:
 *
 * The above copyright notice and this permission notice (including the
 * next paragraph) shall be included in all copies or substantial portions
 * of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 */

#include "main/enums.h"
#include "main/mtypes.h"
#include "main/macros.h"
#include "main/fbobject.h"
#include "main/image.h"
#include "main/bufferobj.h"
#include "main/readpix.h"
#include "main/state.h"
#include "main/glformats.h"
#include "program/prog_instruction.h"
#include "drivers/common/meta.h"

#include "brw_context.h"
#include "brw_blorp.h"
#include "intel_screen.h"
#include "intel_batchbuffer.h"
#include "intel_buffers.h"
#include "intel_fbo.h"
#include "intel_mipmap_tree.h"
#include "intel_pixel.h"
#include "intel_buffer_objects.h"
#include "intel_tiled_memcpy.h"

#define FILE_DEBUG_FLAG DEBUG_PIXEL

/**
 * \brief A fast path for glReadPixels
 *
 * This fast path is taken when the source format is BGRA, RGBA,
 * A or L and when the texture memory is X- or Y-tiled.  It downloads
 * the source data by directly mapping the memory without a GTT fence.
 * This then needs to be de-tiled on the CPU before presenting the data to
 * the user in the linear fasion.
 *
 * This is a performance win over the conventional texture download path.
 * In the conventional texture download path, the texture is either mapped
 * through the GTT or copied to a linear buffer with the blitter before
 * handing off to a software path.  This allows us to avoid round-tripping
 * through the GPU (in the case where we would be blitting) and do only a
 * single copy operation.
 */
static bool
intel_readpixels_tiled_memcpy(struct gl_context * ctx,
                              GLint xoffset, GLint yoffset,
                              GLsizei width, GLsizei height,
                              GLenum format, GLenum type,
                              GLvoid * pixels,
                              const struct gl_pixelstore_attrib *pack)
{
   struct brw_context *brw = brw_context(ctx);
   struct gl_renderbuffer *rb = ctx->ReadBuffer->_ColorReadBuffer;
   const struct gen_device_info *devinfo = &brw->screen->devinfo;

   /* This path supports reading from color buffers only */
   if (rb == NULL)
      return false;

   struct intel_renderbuffer *irb = intel_renderbuffer(rb);
   int dst_pitch;

   /* The miptree's buffer. */
   struct brw_bo *bo;

   uint32_t cpp;
   mem_copy_fn mem_copy = NULL;

   /* This fastpath is restricted to specific renderbuffer types:
    * a 2D BGRA, RGBA, L8 or A8 texture. It could be generalized to support
    * more types.
    */
   if (!devinfo->has_llc ||
       !(type == GL_UNSIGNED_BYTE || type == GL_UNSIGNED_INT_8_8_8_8_REV) ||
       pixels == NULL ||
       _mesa_is_bufferobj(pack->BufferObj) ||
       pack->Alignment > 4 ||
       pack->SkipPixels > 0 ||
       pack->SkipRows > 0 ||
       (pack->RowLength != 0 && pack->RowLength != width) ||
       pack->SwapBytes ||
       pack->LsbFirst ||
       pack->Invert)
      return false;

   /* Only a simple blit, no scale, bias or other mapping. */
   if (ctx->_ImageTransferState)
      return false;

   /* It is possible that the renderbuffer (or underlying texture) is
    * multisampled.  Since ReadPixels from a multisampled buffer requires a
    * multisample resolve, we can't handle this here
    */
   if (rb->NumSamples > 1)
      return false;

   /* We can't handle copying from RGBX or BGRX because the tiled_memcpy
    * function doesn't set the last channel to 1. Note this checks BaseFormat
    * rather than TexFormat in case the RGBX format is being simulated with an
    * RGBA format.
    */
   if (rb->_BaseFormat == GL_RGB)
      return false;

   if (!intel_get_memcpy(rb->Format, format, type, &mem_copy, &cpp))
      return false;

   if (!irb->mt ||
       (irb->mt->surf.tiling != ISL_TILING_X &&
        irb->mt->surf.tiling != ISL_TILING_Y0)) {
      /* The algorithm is written only for X- or Y-tiled memory. */
      return false;
   }

   /* tiled_to_linear() assumes that if the object is swizzled, it is using
    * I915_BIT6_SWIZZLE_9_10 for X and I915_BIT6_SWIZZLE_9 for Y.  This is only
    * true on gen5 and above.
    *
    * The killer on top is that some gen4 have an L-shaped swizzle mode, where
    * parts of the memory aren't swizzled at all. Userspace just can't handle
    * that.
    */
   if (devinfo->gen < 5 && brw->has_swizzling)
      return false;

   /* Since we are going to read raw data to the miptree, we need to resolve
    * any pending fast color clears before we start.
    */
   intel_miptree_access_raw(brw, irb->mt, irb->mt_level, irb->mt_layer, false);

   bo = irb->mt->bo;

   if (brw_batch_references(&brw->batch, bo)) {
      perf_debug("Flushing before mapping a referenced bo.\n");
      intel_batchbuffer_flush(brw);
   }

   void *map = brw_bo_map(brw, bo, MAP_READ | MAP_RAW);
   if (map == NULL) {
      DBG("%s: failed to map bo\n", __func__);
      return false;
   }

   unsigned slice_offset_x, slice_offset_y;
   intel_miptree_get_image_offset(irb->mt, irb->mt_level, irb->mt_layer,
                                  &slice_offset_x, &slice_offset_y);
   xoffset += slice_offset_x;
   yoffset += slice_offset_y;

   dst_pitch = _mesa_image_row_stride(pack, width, format, type);

   /* For a window-system renderbuffer, the buffer is actually flipped
    * vertically, so we need to handle that.  Since the detiling function
    * can only really work in the forwards direction, we have to be a
    * little creative.  First, we compute the Y-offset of the first row of
    * the renderbuffer (in renderbuffer coordinates).  We then match that
    * with the last row of the client's data.  Finally, we give
    * tiled_to_linear a negative pitch so that it walks through the
    * client's data backwards as it walks through the renderbufer forwards.
    */
   if (ctx->ReadBuffer->FlipY) {
      yoffset = rb->Height - yoffset - height;
      pixels += (ptrdiff_t) (height - 1) * dst_pitch;
      dst_pitch = -dst_pitch;
   }

   /* We postponed printing this message until having committed to executing
    * the function.
    */
   DBG("%s: x,y=(%d,%d) (w,h)=(%d,%d) format=0x%x type=0x%x "
       "mesa_format=0x%x tiling=%d "
       "pack=(alignment=%d row_length=%d skip_pixels=%d skip_rows=%d)\n",
       __func__, xoffset, yoffset, width, height,
       format, type, rb->Format, irb->mt->surf.tiling,
       pack->Alignment, pack->RowLength, pack->SkipPixels,
       pack->SkipRows);

   tiled_to_linear(
      xoffset * cpp, (xoffset + width) * cpp,
      yoffset, yoffset + height,
      pixels,
      map + irb->mt->offset,
      dst_pitch, irb->mt->surf.row_pitch_B,
      brw->has_swizzling,
      irb->mt->surf.tiling,
      mem_copy
   );

   brw_bo_unmap(bo);
   return true;
}

static bool
intel_readpixels_blorp(struct gl_context *ctx,
                       unsigned x, unsigned y,
                       unsigned w, unsigned h,
                       GLenum format, GLenum type, const void *pixels,
                       const struct gl_pixelstore_attrib *packing)
{
   struct brw_context *brw = brw_context(ctx);
   struct gl_renderbuffer *rb = ctx->ReadBuffer->_ColorReadBuffer;
   if (!rb)
      return false;

   struct intel_renderbuffer *irb = intel_renderbuffer(rb);

   /* _mesa_get_readpixels_transfer_ops() includes the cases of read
    * color clamping along with the ctx->_ImageTransferState.
    */
   if (_mesa_get_readpixels_transfer_ops(ctx, rb->Format, format,
                                         type, GL_FALSE))
      return false;

   GLenum dst_base_format = _mesa_unpack_format_to_base_format(format);
   if (_mesa_need_rgb_to_luminance_conversion(rb->_BaseFormat,
                                              dst_base_format))
      return false;

   unsigned swizzle;
   if (irb->Base.Base._BaseFormat == GL_RGB) {
      swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_ONE);
   } else {
      swizzle = SWIZZLE_XYZW;
   }

   return brw_blorp_download_miptree(brw, irb->mt, rb->Format, swizzle,
                                     irb->mt_level, x, y, irb->mt_layer,
                                     w, h, 1, GL_TEXTURE_2D, format, type,
                                     ctx->ReadBuffer->FlipY, pixels, packing);
}

void
intelReadPixels(struct gl_context * ctx,
                GLint x, GLint y, GLsizei width, GLsizei height,
                GLenum format, GLenum type,
                const struct gl_pixelstore_attrib *pack, GLvoid * pixels)
{
   bool ok;

   struct brw_context *brw = brw_context(ctx);
   bool dirty;

   DBG("%s\n", __func__);

   /* Reading pixels wont dirty the front buffer, so reset the dirty
    * flag after calling intel_prepare_render().
    */
   dirty = brw->front_buffer_dirty;
   intel_prepare_render(brw);
   brw->front_buffer_dirty = dirty;

   if (_mesa_is_bufferobj(pack->BufferObj)) {
      if (intel_readpixels_blorp(ctx, x, y, width, height,
                                 format, type, pixels, pack))
         return;

      perf_debug("%s: fallback to CPU mapping in PBO case\n", __func__);
   }

   ok = intel_readpixels_tiled_memcpy(ctx, x, y, width, height,
                                      format, type, pixels, pack);
   if(ok)
      return;

   /* Update Mesa state before calling _mesa_readpixels().
    * XXX this may not be needed since ReadPixels no longer uses the
    * span code.
    */

   if (ctx->NewState)
      _mesa_update_state(ctx);

   _mesa_readpixels(ctx, x, y, width, height, format, type, pack, pixels);

   /* There's an intel_prepare_render() call in intelSpanRenderStart(). */
   brw->front_buffer_dirty = dirty;
}