diff options
-rw-r--r-- | src/gallium/drivers/llvmpipe/lp_bld_depth.c | 19 | ||||
-rw-r--r-- | src/gallium/drivers/llvmpipe/lp_bld_depth.h | 2 | ||||
-rw-r--r-- | src/gallium/drivers/llvmpipe/lp_rast.c | 8 | ||||
-rw-r--r-- | src/gallium/drivers/llvmpipe/lp_scene.c | 2 | ||||
-rw-r--r-- | src/gallium/drivers/llvmpipe/lp_scene.h | 4 | ||||
-rw-r--r-- | src/gallium/drivers/llvmpipe/lp_state_fs.c | 121 | ||||
-rw-r--r-- | src/gallium/drivers/llvmpipe/lp_state_fs.h | 1 | ||||
-rw-r--r-- | src/gallium/drivers/llvmpipe/lp_texture.c | 24 | ||||
-rw-r--r-- | src/gallium/drivers/llvmpipe/lp_texture.h | 21 |
9 files changed, 158 insertions, 44 deletions
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_depth.c b/src/gallium/drivers/llvmpipe/lp_bld_depth.c index df6a6c41bbf..a8bd15f8751 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_depth.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_depth.c @@ -525,6 +525,7 @@ lp_build_occlusion_count(struct gallivm_state *gallivm, * * \param type the data type of the fragment depth/stencil values * \param format_desc description of the depth/stencil surface + * \param is_1d whether this resource has only one dimension * \param loop_counter the current loop iteration * \param depth_ptr pointer to the depth/stencil values of this 4x4 block * \param depth_stride stride of the depth/stencil buffer @@ -535,6 +536,7 @@ void lp_build_depth_stencil_load_swizzled(struct gallivm_state *gallivm, struct lp_type z_src_type, const struct util_format_description *format_desc, + boolean is_1d, LLVMValueRef depth_ptr, LLVMValueRef depth_stride, LLVMValueRef *z_fb, @@ -592,9 +594,14 @@ lp_build_depth_stencil_load_swizzled(struct gallivm_state *gallivm, zs_dst_ptr = LLVMBuildGEP(builder, depth_ptr, &depth_offset1, 1, ""); zs_dst_ptr = LLVMBuildBitCast(builder, zs_dst_ptr, load_ptr_type, ""); zs_dst1 = LLVMBuildLoad(builder, zs_dst_ptr, ""); - zs_dst_ptr = LLVMBuildGEP(builder, depth_ptr, &depth_offset2, 1, ""); - zs_dst_ptr = LLVMBuildBitCast(builder, zs_dst_ptr, load_ptr_type, ""); - zs_dst2 = LLVMBuildLoad(builder, zs_dst_ptr, ""); + if (is_1d) { + zs_dst2 = lp_build_undef(gallivm, zs_load_type); + } + else { + zs_dst_ptr = LLVMBuildGEP(builder, depth_ptr, &depth_offset2, 1, ""); + zs_dst_ptr = LLVMBuildBitCast(builder, zs_dst_ptr, load_ptr_type, ""); + zs_dst2 = LLVMBuildLoad(builder, zs_dst_ptr, ""); + } *z_fb = LLVMBuildShuffleVector(builder, zs_dst1, zs_dst2, LLVMConstVector(shuffles, zs_type.length), ""); @@ -648,6 +655,7 @@ lp_build_depth_stencil_load_swizzled(struct gallivm_state *gallivm, * * \param type the data type of the fragment depth/stencil values * \param format_desc description of the depth/stencil surface + * \param is_1d whether this resource has only one dimension * \param mask the alive/dead pixel mask for the quad (vector) * \param z_fb z values read from fb (with padding) * \param s_fb s values read from fb (with padding) @@ -661,6 +669,7 @@ void lp_build_depth_stencil_write_swizzled(struct gallivm_state *gallivm, struct lp_type z_src_type, const struct util_format_description *format_desc, + boolean is_1d, struct lp_build_mask_context *mask, LLVMValueRef z_fb, LLVMValueRef s_fb, @@ -791,7 +800,9 @@ lp_build_depth_stencil_write_swizzled(struct gallivm_state *gallivm, } LLVMBuildStore(builder, zs_dst1, zs_dst_ptr1); - LLVMBuildStore(builder, zs_dst2, zs_dst_ptr2); + if (!is_1d) { + LLVMBuildStore(builder, zs_dst2, zs_dst_ptr2); + } } /** diff --git a/src/gallium/drivers/llvmpipe/lp_bld_depth.h b/src/gallium/drivers/llvmpipe/lp_bld_depth.h index 2534dc309ce..d169c896711 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_depth.h +++ b/src/gallium/drivers/llvmpipe/lp_bld_depth.h @@ -74,6 +74,7 @@ void lp_build_depth_stencil_load_swizzled(struct gallivm_state *gallivm, struct lp_type z_src_type, const struct util_format_description *format_desc, + boolean is_1d, LLVMValueRef depth_ptr, LLVMValueRef depth_stride, LLVMValueRef *z_fb, @@ -84,6 +85,7 @@ void lp_build_depth_stencil_write_swizzled(struct gallivm_state *gallivm, struct lp_type z_src_type, const struct util_format_description *format_desc, + boolean is_1d, struct lp_build_mask_context *mask, LLVMValueRef z_fb, LLVMValueRef s_fb, diff --git a/src/gallium/drivers/llvmpipe/lp_rast.c b/src/gallium/drivers/llvmpipe/lp_rast.c index be5a286e3da..981dd712126 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast.c +++ b/src/gallium/drivers/llvmpipe/lp_rast.c @@ -95,10 +95,10 @@ lp_rast_tile_begin(struct lp_rasterizer_task *task, task->bin = bin; task->x = x * TILE_SIZE; task->y = y * TILE_SIZE; - task->width = TILE_SIZE + x * TILE_SIZE > task->scene->width_aligned ? - task->scene->width_aligned - x * TILE_SIZE : TILE_SIZE; - task->height = TILE_SIZE + y * TILE_SIZE > task->scene->height_aligned ? - task->scene->height_aligned - y * TILE_SIZE : TILE_SIZE; + task->width = TILE_SIZE + x * TILE_SIZE > task->scene->fb.width ? + task->scene->fb.width - x * TILE_SIZE : TILE_SIZE; + task->height = TILE_SIZE + y * TILE_SIZE > task->scene->fb.height ? + task->scene->fb.height - y * TILE_SIZE : TILE_SIZE; /* reset pointers to color and depth tile(s) */ memset(task->color_tiles, 0, sizeof(task->color_tiles)); diff --git a/src/gallium/drivers/llvmpipe/lp_scene.c b/src/gallium/drivers/llvmpipe/lp_scene.c index 2dfc7ff9ce7..771ad085a12 100644 --- a/src/gallium/drivers/llvmpipe/lp_scene.c +++ b/src/gallium/drivers/llvmpipe/lp_scene.c @@ -505,8 +505,6 @@ void lp_scene_begin_binning( struct lp_scene *scene, scene->tiles_x = align(fb->width, TILE_SIZE) / TILE_SIZE; scene->tiles_y = align(fb->height, TILE_SIZE) / TILE_SIZE; - scene->width_aligned = align(fb->width, LP_RASTER_BLOCK_SIZE); - scene->height_aligned = align(fb->height, LP_RASTER_BLOCK_SIZE); assert(scene->tiles_x <= TILES_X); assert(scene->tiles_y <= TILES_Y); diff --git a/src/gallium/drivers/llvmpipe/lp_scene.h b/src/gallium/drivers/llvmpipe/lp_scene.h index bc6c448bc7f..fa5bbcaf013 100644 --- a/src/gallium/drivers/llvmpipe/lp_scene.h +++ b/src/gallium/drivers/llvmpipe/lp_scene.h @@ -144,10 +144,6 @@ struct lp_scene { /** list of resources referenced by the scene commands */ struct resource_ref *resources; - /** aligned scene width, height */ - unsigned width_aligned; - unsigned height_aligned; - /** Total memory used by the scene (in bytes). This sums all the * data blocks and counts all bins, state, resource references and * other random allocations within the scene. diff --git a/src/gallium/drivers/llvmpipe/lp_state_fs.c b/src/gallium/drivers/llvmpipe/lp_state_fs.c index a7bd836918e..260d93ce98e 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_fs.c +++ b/src/gallium/drivers/llvmpipe/lp_state_fs.c @@ -353,7 +353,7 @@ generate_fs_loop(struct gallivm_state *gallivm, if (depth_mode & EARLY_DEPTH_TEST) { lp_build_depth_stencil_load_swizzled(gallivm, type, - zs_format_desc, + zs_format_desc, key->resource_1d, depth_ptr, depth_stride, &z_fb, &s_fb, loop_state.counter); lp_build_depth_stencil_test(gallivm, @@ -369,7 +369,8 @@ generate_fs_loop(struct gallivm_state *gallivm, !simple_shader); if (depth_mode & EARLY_DEPTH_WRITE) { - lp_build_depth_stencil_write_swizzled(gallivm, type, zs_format_desc, + lp_build_depth_stencil_write_swizzled(gallivm, type, + zs_format_desc, key->resource_1d, NULL, NULL, NULL, loop_state.counter, depth_ptr, depth_stride, z_value, s_value); @@ -424,7 +425,7 @@ generate_fs_loop(struct gallivm_state *gallivm, } lp_build_depth_stencil_load_swizzled(gallivm, type, - zs_format_desc, + zs_format_desc, key->resource_1d, depth_ptr, depth_stride, &z_fb, &s_fb, loop_state.counter); @@ -441,7 +442,8 @@ generate_fs_loop(struct gallivm_state *gallivm, !simple_shader); /* Late Z write */ if (depth_mode & LATE_DEPTH_WRITE) { - lp_build_depth_stencil_write_swizzled(gallivm, type, zs_format_desc, + lp_build_depth_stencil_write_swizzled(gallivm, type, + zs_format_desc, key->resource_1d, NULL, NULL, NULL, loop_state.counter, depth_ptr, depth_stride, z_value, s_value); @@ -454,7 +456,8 @@ generate_fs_loop(struct gallivm_state *gallivm, * depth value, update from zs_value with the new mask value and * write that out. */ - lp_build_depth_stencil_write_swizzled(gallivm, type, zs_format_desc, + lp_build_depth_stencil_write_swizzled(gallivm, type, + zs_format_desc, key->resource_1d, &mask, z_fb, s_fb, loop_state.counter, depth_ptr, depth_stride, z_value, s_value); @@ -508,6 +511,7 @@ generate_fs_loop(struct gallivm_state *gallivm, * * @param type fragment shader type (4x or 8x float) * @param num_fs number of fs_src + * @param is_1d whether we're outputting to a 1d resource * @param dst_channels number of output channels * @param fs_src output from fragment shader * @param dst pointer to store result @@ -1345,6 +1349,7 @@ generate_unswizzled_blend(struct gallivm_state *gallivm, LLVMValueRef blend_alpha; LLVMValueRef i32_zero; LLVMValueRef check_mask; + LLVMValueRef undef_src_val; struct lp_build_mask_context mask_ctx; struct lp_type mask_type; @@ -1369,9 +1374,16 @@ generate_unswizzled_blend(struct gallivm_state *gallivm, const boolean dual_source_blend = variant->key.blend.rt[0].blend_enable && util_blend_state_is_dual(&variant->key.blend, 0); + const boolean is_1d = variant->key.resource_1d; + unsigned num_fullblock_fs = is_1d ? 2 * num_fs : num_fs; + mask_type = lp_int32_vec4_type(); mask_type.length = fs_type.length; + for (i = num_fs; i < num_fullblock_fs; i++) { + fs_mask[i] = lp_build_zero(gallivm, mask_type); + } + /* Compute the alignment of the destination pointer in bytes */ #if 0 dst_alignment = (block_width * out_format_desc->block.bits + 7)/(out_format_desc->block.width * 8); @@ -1388,7 +1400,7 @@ generate_unswizzled_blend(struct gallivm_state *gallivm, if (do_branch) { check_mask = LLVMConstNull(lp_build_int_vec_type(gallivm, mask_type)); - for (i = 0; i < num_fs; ++i) { + for (i = 0; i < num_fullblock_fs; ++i) { check_mask = LLVMBuildOr(builder, check_mask, fs_mask[i], ""); } @@ -1399,6 +1411,17 @@ generate_unswizzled_blend(struct gallivm_state *gallivm, partial_mask |= !variant->opaque; i32_zero = lp_build_const_int32(gallivm, 0); +#if HAVE_LLVM < 0x0302 + /* + * undef triggers a crash in LLVMBuildTrunc in convert_from_blend_type in some + * cases (seen with r10g10b10a2, 128bit wide vectors) (only used for 1d case). + */ + undef_src_val = lp_build_zero(gallivm, fs_type); +#else + undef_src_val = lp_build_undef(gallivm, fs_type); +#endif + + /* Get type from output format */ lp_blend_type_from_format_desc(out_format_desc, &row_type); lp_mem_type_from_format_desc(out_format_desc, &dst_type); @@ -1459,14 +1482,25 @@ generate_unswizzled_blend(struct gallivm_state *gallivm, /* * Load shader output */ - for (i = 0; i < num_fs; ++i) { + for (i = 0; i < num_fullblock_fs; ++i) { /* Always load alpha for use in blending */ - LLVMValueRef alpha = LLVMBuildLoad(builder, fs_out_color[rt][alpha_channel][i], ""); + LLVMValueRef alpha; + if (i < num_fs) { + alpha = LLVMBuildLoad(builder, fs_out_color[rt][alpha_channel][i], ""); + } + else { + alpha = undef_src_val; + } /* Load each channel */ for (j = 0; j < dst_channels; ++j) { assert(swizzle[j] < 4); - fs_src[i][j] = LLVMBuildLoad(builder, fs_out_color[rt][swizzle[j]][i], ""); + if (i < num_fs) { + fs_src[i][j] = LLVMBuildLoad(builder, fs_out_color[rt][swizzle[j]][i], ""); + } + else { + fs_src[i][j] = undef_src_val; + } } /* If 3 channels then pad to include alpha for 4 element transpose */ @@ -1492,12 +1526,23 @@ generate_unswizzled_blend(struct gallivm_state *gallivm, } if (dual_source_blend) { /* same as above except different src/dst, skip masks and comments... */ - for (i = 0; i < num_fs; ++i) { - LLVMValueRef alpha = LLVMBuildLoad(builder, fs_out_color[1][alpha_channel][i], ""); + for (i = 0; i < num_fullblock_fs; ++i) { + LLVMValueRef alpha; + if (i < num_fs) { + alpha = LLVMBuildLoad(builder, fs_out_color[1][alpha_channel][i], ""); + } + else { + alpha = undef_src_val; + } for (j = 0; j < dst_channels; ++j) { assert(swizzle[j] < 4); - fs_src1[i][j] = LLVMBuildLoad(builder, fs_out_color[1][swizzle[j]][i], ""); + if (i < num_fs) { + fs_src1[i][j] = LLVMBuildLoad(builder, fs_out_color[1][swizzle[j]][i], ""); + } + else { + fs_src1[i][j] = undef_src_val; + } } if (dst_channels == 3 && !has_alpha) { fs_src1[i][3] = alpha; @@ -1518,7 +1563,7 @@ generate_unswizzled_blend(struct gallivm_state *gallivm, */ fs_type.floating = 0; fs_type.sign = dst_type.sign; - for (i = 0; i < num_fs; ++i) { + for (i = 0; i < num_fullblock_fs; ++i) { for (j = 0; j < dst_channels; ++j) { fs_src[i][j] = LLVMBuildBitCast(builder, fs_src[i][j], lp_build_vec_type(gallivm, fs_type), ""); @@ -1533,16 +1578,16 @@ generate_unswizzled_blend(struct gallivm_state *gallivm, /* * Pixel twiddle from fragment shader order to memory order */ - src_count = generate_fs_twiddle(gallivm, fs_type, num_fs, + src_count = generate_fs_twiddle(gallivm, fs_type, num_fullblock_fs, dst_channels, fs_src, src, pad_inline); if (dual_source_blend) { - generate_fs_twiddle(gallivm, fs_type, num_fs, dst_channels, + generate_fs_twiddle(gallivm, fs_type, num_fullblock_fs, dst_channels, fs_src1, src1, pad_inline); } src_channels = dst_channels < 3 ? dst_channels : 4; - if (src_count != num_fs * src_channels) { - unsigned ds = src_count / (num_fs * src_channels); + if (src_count != num_fullblock_fs * src_channels) { + unsigned ds = src_count / (num_fullblock_fs * src_channels); row_type.length /= ds; fs_type.length = row_type.length; } @@ -1685,8 +1730,18 @@ generate_unswizzled_blend(struct gallivm_state *gallivm, dst_type.length = block_width; } - load_unswizzled_block(gallivm, color_ptr, stride, block_width, block_height, - dst, dst_type, dst_count, dst_alignment); + if (is_1d) { + load_unswizzled_block(gallivm, color_ptr, stride, block_width, 1, + dst, dst_type, dst_count / 4, dst_alignment); + for (i = dst_count / 4; i < dst_count; i++) { + dst[i] = lp_build_undef(gallivm, dst_type); + } + + } + else { + load_unswizzled_block(gallivm, color_ptr, stride, block_width, block_height, + dst, dst_type, dst_count, dst_alignment); + } /* @@ -1761,8 +1816,14 @@ generate_unswizzled_blend(struct gallivm_state *gallivm, /* * Store blend result to memory */ - store_unswizzled_block(gallivm, color_ptr, stride, block_width, block_height, - dst, dst_type, dst_count, dst_alignment); + if (is_1d) { + store_unswizzled_block(gallivm, color_ptr, stride, block_width, 1, + dst, dst_type, dst_count / 4, dst_alignment); + } + else { + store_unswizzled_block(gallivm, color_ptr, stride, block_width, block_height, + dst, dst_type, dst_count, dst_alignment); + } if (do_branch) { lp_build_mask_end(&mask_ctx); @@ -1855,7 +1916,6 @@ generate_fragment(struct llvmpipe_context *lp, fs_type.norm = FALSE; /* values are not limited to [0,1] or [-1,1] */ fs_type.width = 32; /* 32-bit float */ fs_type.length = MIN2(lp_native_vector_width / 32, 16); /* n*4 elements per vector */ - num_fs = 16 / fs_type.length; /* number of loops per 4x4 stamp */ memset(&blend_type, 0, sizeof blend_type); blend_type.floating = FALSE; /* values are integers */ @@ -1944,6 +2004,11 @@ generate_fragment(struct llvmpipe_context *lp, /* code generated texture sampling */ sampler = lp_llvm_sampler_soa_create(key->state, context_ptr); + num_fs = 16 / fs_type.length; /* number of loops per 4x4 stamp */ + /* for 1d resources only run "upper half" of stamp */ + if (key->resource_1d) + num_fs /= 2; + { LLVMValueRef num_loop = lp_build_const_int32(gallivm, num_fs); LLVMTypeRef mask_type = lp_build_int_vec_type(gallivm, fs_type); @@ -2533,6 +2598,9 @@ make_variant_key(struct llvmpipe_context *lp, key->zsbuf_format = zsbuf_format; memcpy(&key->stencil, &lp->depth_stencil->stencil, sizeof key->stencil); } + if (llvmpipe_resource_is_1d(lp->framebuffer.zsbuf->texture)) { + key->resource_1d = TRUE; + } } /* alpha test only applies if render buffer 0 is non-integer (or does not exist) */ @@ -2570,6 +2638,15 @@ make_variant_key(struct llvmpipe_context *lp, key->cbuf_format[i] = format; + /* + * Figure out if this is a 1d resource. Note that OpenGL allows crazy + * mixing of 2d textures with height 1 and 1d textures, so make sure + * we pick 1d if any cbuf or zsbuf is 1d. + */ + if (llvmpipe_resource_is_1d(lp->framebuffer.cbufs[0]->texture)) { + key->resource_1d = TRUE; + } + format_desc = util_format_description(format); assert(format_desc->colorspace == UTIL_FORMAT_COLORSPACE_RGB || format_desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB); diff --git a/src/gallium/drivers/llvmpipe/lp_state_fs.h b/src/gallium/drivers/llvmpipe/lp_state_fs.h index c8dc1c33cfe..33140901c18 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_fs.h +++ b/src/gallium/drivers/llvmpipe/lp_state_fs.h @@ -75,6 +75,7 @@ struct lp_fragment_shader_variant_key unsigned nr_sampler_views:8; /* actually derivable from just the shader */ unsigned flatshade:1; unsigned occlusion_count:1; + unsigned resource_1d:1; enum pipe_format zsbuf_format; enum pipe_format cbuf_format[PIPE_MAX_COLOR_BUFS]; diff --git a/src/gallium/drivers/llvmpipe/lp_texture.c b/src/gallium/drivers/llvmpipe/lp_texture.c index 22f952c8924..f1a1ed0960a 100644 --- a/src/gallium/drivers/llvmpipe/lp_texture.c +++ b/src/gallium/drivers/llvmpipe/lp_texture.c @@ -83,22 +83,30 @@ llvmpipe_texture_layout(struct llvmpipe_screen *screen, /* Row stride and image stride */ { - unsigned alignment, nblocksx, nblocksy, block_size; + unsigned align_x, align_y, nblocksx, nblocksy, block_size; /* For non-compressed formats we need 4x4 pixel alignment - * (for now). We also want cache line size in x direction, + * so we can read/write LP_RASTER_BLOCK_SIZE when rendering to them. + * We also want cache line size in x direction, * otherwise same cache line could end up in multiple threads. - * XXX this blows up 1d/1d array textures by a factor of 4. + * For explicit 1d resources however we reduce this to 4x1 and + * handle specially in render output code (as we need to do special + * handling there for buffers in any case). */ if (util_format_is_compressed(pt->format)) - alignment = 1; - else - alignment = LP_RASTER_BLOCK_SIZE; + align_x = align_y = 1; + else { + align_x = LP_RASTER_BLOCK_SIZE; + if (llvmpipe_resource_is_1d(&lpr->base)) + align_y = 1; + else + align_y = LP_RASTER_BLOCK_SIZE; + } nblocksx = util_format_get_nblocksx(pt->format, - align(width, alignment)); + align(width, align_x)); nblocksy = util_format_get_nblocksy(pt->format, - align(height, alignment)); + align(height, align_y)); block_size = util_format_get_blocksize(pt->format); if (util_format_is_compressed(pt->format)) diff --git a/src/gallium/drivers/llvmpipe/lp_texture.h b/src/gallium/drivers/llvmpipe/lp_texture.h index faba6f21025..e73d44946d2 100644 --- a/src/gallium/drivers/llvmpipe/lp_texture.h +++ b/src/gallium/drivers/llvmpipe/lp_texture.h @@ -159,6 +159,27 @@ llvmpipe_resource_is_texture(const struct pipe_resource *resource) } +static INLINE boolean +llvmpipe_resource_is_1d(const struct pipe_resource *resource) +{ + switch (resource->target) { + case PIPE_BUFFER: + case PIPE_TEXTURE_1D: + case PIPE_TEXTURE_1D_ARRAY: + return TRUE; + case PIPE_TEXTURE_2D: + case PIPE_TEXTURE_2D_ARRAY: + case PIPE_TEXTURE_RECT: + case PIPE_TEXTURE_3D: + case PIPE_TEXTURE_CUBE: + return FALSE; + default: + assert(0); + return FALSE; + } +} + + static INLINE unsigned llvmpipe_resource_stride(struct pipe_resource *resource, unsigned level) |