diff options
author | Roland Scheidegger <sroland@vmware.com> | 2013-03-22 20:09:18 +0100 |
---|---|---|
committer | Roland Scheidegger <sroland@vmware.com> | 2013-03-22 20:10:53 +0100 |
commit | b101a094b5576337f5bb7aecf525eb1755757b9c (patch) | |
tree | d429450eb17043860c80edc726aa5df008f66474 /src/gallium/drivers/llvmpipe/lp_state_fs.c | |
parent | 31009b45213ae9efab6af9d8efcc061194919b12 (diff) |
llvmpipe: add EXT_packed_float render target format support
New conversion code to handle conversion from/to r11g11b10 AoS to/from
SoA floats, and also add code for conversion from rgb9e5 AoS to float SoA
(which works pretty much the same as r11g11b10 except for the packing).
(This code should also be used for texture sampling instead of
relying on u_format conversion but it's not yet, so rgb9e5 is unused.)
Unfortunately a crazy amount of hacks is necessary to get the conversion
code running in llvmpipe's generate_unswizzled_blend, which isn't well
suited for formats where the storage representation has nothing to do
with what's needed for blending (moreover, the conversion will convert
from packed AoS values, which is the storage format, to float SoA values,
because this is much more natural for the conversion, and likewise from
SoA values to packed AoS values - but the "blend" (which includes
trivial things like partial mask) works on AoS values, so incoming fs
values will go SoA->AoS, values from destination will go packed
AoS->SoA->AoS, then do blend, then AoS->SoA->packed AoS which probably
isn't the most efficient way though the shuffles are probably bearable).
Passes piglit fbo-blending-formats (with GL_EXT_packed_float parameter),
still need to verify Inf/NaNs (where most of the complexity in the
conversion comes from actually).
v2: drop the (very bogus) rgb9e5 part, and do component extraction
in the helper code for r11g11b10 to float conversion, making the code
slightly more compact (suggested by Jose), now that there are no other
callers left this works quite well. (Could do the same for the
opposite way but it's less than ideal there, final part of packing
needs to be done in caller anyway and there'd be another conditional.)
v3: minor style and comment fixes. Also fix a potential issue with
negative zero being potentially returned by max(src, zero) as we
don't have well-defined min/max behavior (fortunately no additonal cost).
Reviewed-by: Jose Fonseca <jfonseca@vmware.com>
Diffstat (limited to 'src/gallium/drivers/llvmpipe/lp_state_fs.c')
-rw-r--r-- | src/gallium/drivers/llvmpipe/lp_state_fs.c | 126 |
1 files changed, 126 insertions, 0 deletions
diff --git a/src/gallium/drivers/llvmpipe/lp_state_fs.c b/src/gallium/drivers/llvmpipe/lp_state_fs.c index d8369b4d807..953a5c1aa44 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_fs.c +++ b/src/gallium/drivers/llvmpipe/lp_state_fs.c @@ -972,6 +972,17 @@ lp_mem_type_from_format_desc(const struct util_format_description *format_desc, unsigned i; unsigned chan; + if (format_desc->format == PIPE_FORMAT_R11G11B10_FLOAT) { + /* just make this a 32bit uint */ + type->floating = false; + type->fixed = false; + type->sign = false; + type->norm = false; + type->width = 32; + type->length = 1; + return; + } + for (i = 0; i < 4; i++) if (format_desc->channel[i].type != UTIL_FORMAT_TYPE_VOID) break; @@ -1009,6 +1020,17 @@ lp_blend_type_from_format_desc(const struct util_format_description *format_desc unsigned i; unsigned chan; + if (format_desc->format == PIPE_FORMAT_R11G11B10_FLOAT) { + /* always use ordinary floats for blending */ + type->floating = true; + type->fixed = false; + type->sign = true; + type->norm = false; + type->width = 32; + type->length = 4; + return; + } + for (i = 0; i < 4; i++) if (format_desc->channel[i].type != UTIL_FORMAT_TYPE_VOID) break; @@ -1122,6 +1144,48 @@ convert_to_blend_type(struct gallivm_state *gallivm, unsigned pixels = 16 / num_srcs; bool is_arith; + /* + * full custom path for packed floats - none of the later functions would do + * anything useful, and given the lp_type representation they can't be fixed. + */ + if (src_fmt->format == PIPE_FORMAT_R11G11B10_FLOAT) { + LLVMValueRef tmpsrc[4]; + /* + * This is pretty suboptimal for this case blending in SoA would be much + * better, since conversion gets us SoA values so need to convert back. + */ + assert(src_type.width == 32); + assert(dst_type.floating); + assert(dst_type.width = 32); + assert(dst_type.length % 4 == 0); + for (i = 0; i < 4; i++) { + tmpsrc[i] = src[i]; + } + for (i = 0; i < num_srcs / 4; i++) { + LLVMValueRef tmpsoa[4]; + LLVMValueRef tmps = tmpsrc[i]; + if (num_srcs == 8) { + LLVMValueRef shuffles[8]; + unsigned j; + /* fetch was 4 values but need 8-wide output values */ + tmps = lp_build_concat(gallivm, &tmpsrc[i * 2], src_type, 2); + /* + * for 8-wide aos transpose would give us wrong order not matching + * incoming converted fs values and mask. ARGH. + */ + for (j = 0; j < 4; j++) { + shuffles[j] = lp_build_const_int32(gallivm, j * 2); + shuffles[j + 4] = lp_build_const_int32(gallivm, j * 2 + 1); + } + tmps = LLVMBuildShuffleVector(builder, tmps, tmps, + LLVMConstVector(shuffles, 8), ""); + } + lp_build_r11g11b10_to_float(gallivm, tmps, tmpsoa); + lp_build_transpose_aos(gallivm, dst_type, tmpsoa, &src[i * 4]); + } + return; + } + lp_mem_type_from_format_desc(src_fmt, &mem_type); lp_blend_type_from_format_desc(src_fmt, &blend_type); @@ -1225,6 +1289,47 @@ convert_from_blend_type(struct gallivm_state *gallivm, unsigned pixels = 16 / num_srcs; bool is_arith; + /* + * full custom path for packed floats - none of the later functions would do + * anything useful, and given the lp_type representation they can't be fixed. + */ + if (src_fmt->format == PIPE_FORMAT_R11G11B10_FLOAT) { + /* + * This is pretty suboptimal for this case blending in SoA would be much + * better - we need to transpose the AoS values back to SoA values for + * conversion/packing. + */ + assert(src_type.floating); + assert(src_type.width = 32); + assert(src_type.length % 4 == 0); + assert(dst_type.width == 32); + for (i = 0; i < num_srcs / 4; i++) { + LLVMValueRef tmpsoa[4], tmpdst; + lp_build_transpose_aos(gallivm, src_type, &src[i * 4], tmpsoa); + tmpdst = lp_build_float_to_r11g11b10(gallivm, tmpsoa); + if (num_srcs == 8) { + LLVMValueRef tmpaos, shuffles[8]; + unsigned j; + /* + * for 8-wide aos transpose has given us wrong order not matching + * output order. HMPF. Also need to split the output values manually. + */ + for (j = 0; j < 4; j++) { + shuffles[j * 2] = lp_build_const_int32(gallivm, j); + shuffles[j * 2 + 1] = lp_build_const_int32(gallivm, j + 4); + } + tmpaos = LLVMBuildShuffleVector(builder, tmpdst, tmpdst, + LLVMConstVector(shuffles, 8), ""); + src[i * 2] = lp_build_extract_range(gallivm, tmpaos, 0, 4); + src[i * 2 + 1] = lp_build_extract_range(gallivm, tmpaos, 4, 4); + } + else { + src[i] = tmpdst; + } + } + return; + } + lp_mem_type_from_format_desc(src_fmt, &mem_type); lp_blend_type_from_format_desc(src_fmt, &blend_type); @@ -1532,6 +1637,17 @@ generate_unswizzled_blend(struct gallivm_state *gallivm, } } + if (out_format == PIPE_FORMAT_R11G11B10_FLOAT) { + /* the code above can't work for layout_other */ + dst_channels = 4; /* HACK: this is fake 4 really but need it due to transpose stuff later */ + has_alpha = true; + swizzle[0] = 0; + swizzle[1] = 1; + swizzle[2] = 2; + swizzle[3] = 3; + pad_inline = true; /* HACK: prevent rgbxrgbx->rgbrgbxx conversion later */ + } + /* If 3 channels then pad to include alpha for 4 element transpose */ if (dst_channels == 3 && !has_alpha) { for (i = 0; i < TGSI_NUM_CHANNELS; i++) { @@ -1756,6 +1872,16 @@ generate_unswizzled_blend(struct gallivm_state *gallivm, dst_type.length *= 16 / dst_count; + if (out_format == PIPE_FORMAT_R11G11B10_FLOAT) { + /* + * we need multiple values at once for the conversion, so can as well + * load them vectorized here too instead of concatenating later. + * (Still need concatenation later for 8-wide vectors). + */ + dst_count = block_height; + dst_type.length = block_width; + } + load_unswizzled_block(gallivm, color_ptr, stride, block_width, block_height, dst, dst_type, dst_count, dst_alignment); |