diff options
Diffstat (limited to 'src/gallium/drivers/llvmpipe/lp_state_fs.c')
-rw-r--r-- | src/gallium/drivers/llvmpipe/lp_state_fs.c | 39 |
1 files changed, 35 insertions, 4 deletions
diff --git a/src/gallium/drivers/llvmpipe/lp_state_fs.c b/src/gallium/drivers/llvmpipe/lp_state_fs.c index 2f9f907edd6..5e28f0ec4ea 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_fs.c +++ b/src/gallium/drivers/llvmpipe/lp_state_fs.c @@ -868,12 +868,12 @@ lp_mem_type_from_format_desc(const struct util_format_description *format_desc, unsigned chan; if (format_expands_to_float_soa(format_desc)) { - /* just make this a 32bit uint */ + /* just make this a uint with width of block */ type->floating = false; type->fixed = false; type->sign = false; type->norm = false; - type->width = 32; + type->width = format_desc->block.bits; type->length = 1; return; } @@ -1137,12 +1137,24 @@ convert_to_blend_type(struct gallivm_state *gallivm, * This is pretty suboptimal for this case blending in SoA would be much * better, since conversion gets us SoA values so need to convert back. */ - assert(src_type.width == 32); + assert(src_type.width == 32 || src_type.width == 16); assert(dst_type.floating); assert(dst_type.width == 32); assert(dst_type.length % 4 == 0); assert(num_srcs % 4 == 0); + if (src_type.width == 16) { + /* expand 4x16bit values to 4x32bit */ + struct lp_type type32x4 = src_type; + LLVMTypeRef ltype32x4; + unsigned num_fetch = dst_type.length == 8 ? num_srcs / 2 : num_srcs / 4; + type32x4.width = 32; + ltype32x4 = lp_build_vec_type(gallivm, type32x4); + for (i = 0; i < num_fetch; i++) { + src[i] = LLVMBuildZExt(builder, src[i], ltype32x4, ""); + } + src_type.width = 32; + } for (i = 0; i < 4; i++) { tmpsrc[i] = src[i]; } @@ -1298,7 +1310,7 @@ convert_from_blend_type(struct gallivm_state *gallivm, assert(src_type.floating); assert(src_type.width == 32); assert(src_type.length % 4 == 0); - assert(dst_type.width == 32); + assert(dst_type.width == 32 || dst_type.width == 16); for (i = 0; i < num_srcs / 4; i++) { LLVMValueRef tmpsoa[4], tmpdst; @@ -1333,6 +1345,25 @@ convert_from_blend_type(struct gallivm_state *gallivm, src[i] = tmpdst; } } + if (dst_type.width == 16) { + struct lp_type type16x8 = dst_type; + struct lp_type type32x4 = dst_type; + LLVMTypeRef ltype16x4, ltypei64, ltypei128; + unsigned num_fetch = src_type.length == 8 ? num_srcs / 2 : num_srcs / 4; + type16x8.length = 8; + type32x4.width = 32; + ltypei128 = LLVMIntTypeInContext(gallivm->context, 128); + ltypei64 = LLVMIntTypeInContext(gallivm->context, 64); + ltype16x4 = lp_build_vec_type(gallivm, dst_type); + /* We could do vector truncation but it doesn't generate very good code */ + for (i = 0; i < num_fetch; i++) { + src[i] = lp_build_pack2(gallivm, type32x4, type16x8, + src[i], lp_build_zero(gallivm, type32x4)); + src[i] = LLVMBuildBitCast(builder, src[i], ltypei128, ""); + src[i] = LLVMBuildTrunc(builder, src[i], ltypei64, ""); + src[i] = LLVMBuildBitCast(builder, src[i], ltype16x4, ""); + } + } return; } |