From 4c36553a46b14f5485140bbb51d3aa35d2b79e14 Mon Sep 17 00:00:00 2001 From: Marek Olšák Date: Fri, 10 Feb 2017 01:16:34 +0100 Subject: radeonsi: implement legacy GL_DOUBLE vertex formats MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit so that we can disable u_vbuf for GL core profiles. Reviewed-by: Nicolai Hähnle --- src/gallium/drivers/radeonsi/si_shader.c | 78 ++++++++++++++++++++++++++------ src/gallium/drivers/radeonsi/si_shader.h | 4 ++ src/gallium/drivers/radeonsi/si_state.c | 56 ++++++++++++++++++++--- 3 files changed, 117 insertions(+), 21 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c index 9196de4f926..cfff54a5e00 100644 --- a/src/gallium/drivers/radeonsi/si_shader.c +++ b/src/gallium/drivers/radeonsi/si_shader.c @@ -319,6 +319,21 @@ static LLVMValueRef get_instance_index_for_fetch( LLVMGetParam(radeon_bld->main_fn, param_start_instance), ""); } +/* Bitcast <4 x float> to <2 x double>, extract the component, and convert + * to float. */ +static LLVMValueRef extract_double_to_float(struct si_shader_context *ctx, + LLVMValueRef vec4, + unsigned double_index) +{ + LLVMBuilderRef builder = ctx->gallivm.builder; + LLVMTypeRef f64 = LLVMDoubleTypeInContext(ctx->gallivm.context); + LLVMValueRef dvec2 = LLVMBuildBitCast(builder, vec4, + LLVMVectorType(f64, 2), ""); + LLVMValueRef index = LLVMConstInt(ctx->i32, double_index, 0); + LLVMValueRef value = LLVMBuildExtractElement(builder, dvec2, index, ""); + return LLVMBuildFPTrunc(builder, value, ctx->f32, ""); +} + static void declare_input_vs( struct si_shader_context *ctx, unsigned input_index, @@ -330,14 +345,15 @@ static void declare_input_vs( unsigned chan; unsigned fix_fetch; + unsigned num_fetches; + unsigned fetch_stride; LLVMValueRef t_list_ptr; LLVMValueRef t_offset; LLVMValueRef t_list; - LLVMValueRef attribute_offset; - LLVMValueRef buffer_index; + LLVMValueRef vertex_index; LLVMValueRef args[3]; - LLVMValueRef input; + LLVMValueRef input[3]; /* Load the T list */ t_list_ptr = LLVMGetParam(ctx->main_fn, SI_PARAM_VERTEX_BUFFERS); @@ -346,29 +362,42 @@ static void declare_input_vs( t_list = ac_build_indexed_load_const(&ctx->ac, t_list_ptr, t_offset); - /* Build the attribute offset */ - attribute_offset = lp_build_const_int32(gallivm, 0); - - buffer_index = LLVMGetParam(ctx->main_fn, + vertex_index = LLVMGetParam(ctx->main_fn, ctx->param_vertex_index0 + input_index); + fix_fetch = (ctx->shader->key.mono.vs.fix_fetch >> (4 * input_index)) & 0xf; + + /* Do multiple loads for double formats. */ + if (fix_fetch == SI_FIX_FETCH_RGB_64_FLOAT) { + num_fetches = 3; /* 3 2-dword loads */ + fetch_stride = 8; + } else if (fix_fetch == SI_FIX_FETCH_RGBA_64_FLOAT) { + num_fetches = 2; /* 2 4-dword loads */ + fetch_stride = 16; + } else { + num_fetches = 1; + fetch_stride = 0; + } + args[0] = t_list; - args[1] = attribute_offset; - args[2] = buffer_index; - input = lp_build_intrinsic(gallivm->builder, - "llvm.SI.vs.load.input", ctx->v4f32, args, 3, - LP_FUNC_ATTR_READNONE); + args[2] = vertex_index; + + for (unsigned i = 0; i < num_fetches; i++) { + args[1] = LLVMConstInt(ctx->i32, fetch_stride * i, 0); + + input[i] = lp_build_intrinsic(gallivm->builder, + "llvm.SI.vs.load.input", ctx->v4f32, args, 3, + LP_FUNC_ATTR_READNONE); + } /* Break up the vec4 into individual components */ for (chan = 0; chan < 4; chan++) { LLVMValueRef llvm_chan = lp_build_const_int32(gallivm, chan); out[chan] = LLVMBuildExtractElement(gallivm->builder, - input, llvm_chan, ""); + input[0], llvm_chan, ""); } - fix_fetch = (ctx->shader->key.mono.vs.fix_fetch >> (4 * input_index)) & 0xf; - switch (fix_fetch) { case SI_FIX_FETCH_A2_SNORM: case SI_FIX_FETCH_A2_SSCALED: @@ -464,6 +493,25 @@ static void declare_input_vs( out[chan], ctx->f32, ""); } break; + case SI_FIX_FETCH_RG_64_FLOAT: + for (chan = 0; chan < 2; chan++) + out[chan] = extract_double_to_float(ctx, input[0], chan); + + out[2] = LLVMConstReal(ctx->f32, 0); + out[3] = LLVMConstReal(ctx->f32, 1); + break; + case SI_FIX_FETCH_RGB_64_FLOAT: + for (chan = 0; chan < 3; chan++) + out[chan] = extract_double_to_float(ctx, input[chan], 0); + + out[3] = LLVMConstReal(ctx->f32, 1); + break; + case SI_FIX_FETCH_RGBA_64_FLOAT: + for (chan = 0; chan < 4; chan++) { + out[chan] = extract_double_to_float(ctx, input[chan / 2], + chan % 2); + } + break; } } diff --git a/src/gallium/drivers/radeonsi/si_shader.h b/src/gallium/drivers/radeonsi/si_shader.h index 5464d6747d8..6398b39a0ec 100644 --- a/src/gallium/drivers/radeonsi/si_shader.h +++ b/src/gallium/drivers/radeonsi/si_shader.h @@ -247,6 +247,10 @@ enum { SI_FIX_FETCH_RGBA_32_SSCALED, SI_FIX_FETCH_RGBA_32_FIXED, SI_FIX_FETCH_RGBX_32_FIXED, + SI_FIX_FETCH_RG_64_FLOAT, + SI_FIX_FETCH_RGB_64_FLOAT, + SI_FIX_FETCH_RGBA_64_FLOAT, + SI_FIX_FETCH_RESERVED_15, /* maximum */ }; struct si_shader; diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c index 1e0729c3b70..107bc06597d 100644 --- a/src/gallium/drivers/radeonsi/si_state.c +++ b/src/gallium/drivers/radeonsi/si_state.c @@ -1762,6 +1762,19 @@ static uint32_t si_translate_buffer_dataformat(struct pipe_screen *screen, return V_008F0C_BUF_DATA_FORMAT_32_32_32_32; } break; + case 64: + /* Legacy double formats. */ + switch (desc->nr_channels) { + case 1: /* 1 load */ + return V_008F0C_BUF_DATA_FORMAT_32_32; + case 2: /* 1 load */ + return V_008F0C_BUF_DATA_FORMAT_32_32_32_32; + case 3: /* 3 loads */ + return V_008F0C_BUF_DATA_FORMAT_32_32; + case 4: /* 2 loads */ + return V_008F0C_BUF_DATA_FORMAT_32_32_32_32; + } + break; } return V_008F0C_BUF_DATA_FORMAT_INVALID; @@ -3359,6 +3372,7 @@ static void *si_create_vertex_elements(struct pipe_context *ctx, unsigned data_format, num_format; int first_non_void; unsigned vbo_index = elements[i].vertex_buffer_index; + unsigned char swizzle[4]; if (vbo_index >= SI_NUM_VERTEX_BUFFERS) { FREE(v); @@ -3375,13 +3389,8 @@ static void *si_create_vertex_elements(struct pipe_context *ctx, data_format = si_translate_buffer_dataformat(ctx->screen, desc, first_non_void); num_format = si_translate_buffer_numformat(ctx->screen, desc, first_non_void); channel = first_non_void >= 0 ? &desc->channel[first_non_void] : NULL; + memcpy(swizzle, desc->swizzle, sizeof(swizzle)); - v->rsrc_word3[i] = S_008F0C_DST_SEL_X(si_map_swizzle(desc->swizzle[0])) | - S_008F0C_DST_SEL_Y(si_map_swizzle(desc->swizzle[1])) | - S_008F0C_DST_SEL_Z(si_map_swizzle(desc->swizzle[2])) | - S_008F0C_DST_SEL_W(si_map_swizzle(desc->swizzle[3])) | - S_008F0C_NUM_FORMAT(num_format) | - S_008F0C_DATA_FORMAT(data_format); v->format_size[i] = desc->block.bits / 8; /* The hardware always treats the 2-bit alpha channel as @@ -3421,8 +3430,43 @@ static void *si_create_vertex_elements(struct pipe_context *ctx, v->fix_fetch |= (uint64_t)SI_FIX_FETCH_RGBA_32_USCALED << (4 * i); } } + } else if (channel && channel->size == 64 && + channel->type == UTIL_FORMAT_TYPE_FLOAT) { + switch (desc->nr_channels) { + case 1: + case 2: + v->fix_fetch |= (uint64_t)SI_FIX_FETCH_RG_64_FLOAT << (4 * i); + swizzle[0] = PIPE_SWIZZLE_X; + swizzle[1] = PIPE_SWIZZLE_Y; + swizzle[2] = desc->nr_channels == 2 ? PIPE_SWIZZLE_Z : PIPE_SWIZZLE_0; + swizzle[3] = desc->nr_channels == 2 ? PIPE_SWIZZLE_W : PIPE_SWIZZLE_0; + break; + case 3: + v->fix_fetch |= (uint64_t)SI_FIX_FETCH_RGB_64_FLOAT << (4 * i); + swizzle[0] = PIPE_SWIZZLE_X; /* 3 loads */ + swizzle[1] = PIPE_SWIZZLE_Y; + swizzle[2] = PIPE_SWIZZLE_0; + swizzle[3] = PIPE_SWIZZLE_0; + break; + case 4: + v->fix_fetch |= (uint64_t)SI_FIX_FETCH_RGBA_64_FLOAT << (4 * i); + swizzle[0] = PIPE_SWIZZLE_X; /* 2 loads */ + swizzle[1] = PIPE_SWIZZLE_Y; + swizzle[2] = PIPE_SWIZZLE_Z; + swizzle[3] = PIPE_SWIZZLE_W; + break; + default: + assert(0); + } } + v->rsrc_word3[i] = S_008F0C_DST_SEL_X(si_map_swizzle(swizzle[0])) | + S_008F0C_DST_SEL_Y(si_map_swizzle(swizzle[1])) | + S_008F0C_DST_SEL_Z(si_map_swizzle(swizzle[2])) | + S_008F0C_DST_SEL_W(si_map_swizzle(swizzle[3])) | + S_008F0C_NUM_FORMAT(num_format) | + S_008F0C_DATA_FORMAT(data_format); + /* We work around the fact that 8_8_8 and 16_16_16 data formats * do not exist by using the corresponding 4-component formats. * This requires a fixup of the descriptor for bounds checks. -- cgit v1.2.3