diff options
Diffstat (limited to 'src/gallium/auxiliary')
66 files changed, 2744 insertions, 756 deletions
diff --git a/src/gallium/auxiliary/Makefile b/src/gallium/auxiliary/Makefile index 1d0930e024b..0ac18426d97 100644 --- a/src/gallium/auxiliary/Makefile +++ b/src/gallium/auxiliary/Makefile @@ -107,6 +107,7 @@ C_SOURCES = \ util/u_draw_quad.c \ util/u_format_access.c \ util/u_format_table.c \ + util/u_format_tests.c \ util/u_gen_mipmap.c \ util/u_handle_table.c \ util/u_hash_table.c \ @@ -130,7 +131,8 @@ C_SOURCES = \ vl/vl_mpeg12_mc_renderer.c \ vl/vl_compositor.c \ vl/vl_csc.c \ - vl/vl_shader_build.c + vl/vl_shader_build.c \ + target-helpers/wrap_screen.c GALLIVM_SOURCES = \ gallivm/lp_bld_alpha.c \ diff --git a/src/gallium/auxiliary/SConscript b/src/gallium/auxiliary/SConscript index f365c4bbdd6..2be16776fb8 100644 --- a/src/gallium/auxiliary/SConscript +++ b/src/gallium/auxiliary/SConscript @@ -7,6 +7,8 @@ env.Append(CPPPATH = [ 'util', ]) +env.Tool('udis86') + env.CodeGenerate( target = 'indices/u_indices_gen.c', script = 'indices/u_indices_gen.py', @@ -149,6 +151,7 @@ source = [ 'util/u_draw_quad.c', 'util/u_format_access.c', 'util/u_format_table.c', + 'util/u_format_tests.c', 'util/u_gen_mipmap.c', 'util/u_handle_table.c', 'util/u_hash.c', @@ -172,6 +175,7 @@ source = [ 'vl/vl_compositor.c', 'vl/vl_csc.c', 'vl/vl_shader_build.c', + 'target-helpers/wrap_screen.c', ] if drawllvm: diff --git a/src/gallium/auxiliary/cso_cache/cso_cache.c b/src/gallium/auxiliary/cso_cache/cso_cache.c index a6a07e72c2f..900c64df4b9 100644 --- a/src/gallium/auxiliary/cso_cache/cso_cache.c +++ b/src/gallium/auxiliary/cso_cache/cso_cache.c @@ -43,6 +43,7 @@ struct cso_cache { struct cso_hash *vs_hash; struct cso_hash *rasterizer_hash; struct cso_hash *sampler_hash; + struct cso_hash *velements_hash; int max_size; cso_sanitize_callback sanitize_cb; @@ -108,6 +109,9 @@ static struct cso_hash *_cso_hash_for_type(struct cso_cache *sc, enum cso_cache_ case CSO_VERTEX_SHADER: hash = sc->vs_hash; break; + case CSO_VELEMENTS: + hash = sc->velements_hash; + break; } return hash; @@ -161,6 +165,13 @@ static void delete_vs_state(void *state, void *data) FREE(state); } +static void delete_velements(void *state, void *data) +{ + struct cso_velements *cso = (struct cso_velements *)state; + if (cso->delete_state) + cso->delete_state(cso->context, cso->data); + FREE(state); +} static INLINE void delete_cso(void *state, enum cso_cache_type type) { @@ -183,6 +194,9 @@ static INLINE void delete_cso(void *state, enum cso_cache_type type) case CSO_VERTEX_SHADER: delete_vs_state(state, 0); break; + case CSO_VELEMENTS: + delete_velements(state, 0); + break; default: assert(0); FREE(state); @@ -294,6 +308,7 @@ struct cso_cache *cso_cache_create(void) sc->rasterizer_hash = cso_hash_create(); sc->fs_hash = cso_hash_create(); sc->vs_hash = cso_hash_create(); + sc->velements_hash = cso_hash_create(); sc->sanitize_cb = sanitize_cb; sc->sanitize_data = 0; @@ -325,6 +340,9 @@ void cso_for_each_state(struct cso_cache *sc, enum cso_cache_type type, case CSO_VERTEX_SHADER: hash = sc->vs_hash; break; + case CSO_VELEMENTS: + hash = sc->velements_hash; + break; } iter = cso_hash_first_node(hash); @@ -351,6 +369,7 @@ void cso_cache_delete(struct cso_cache *sc) cso_for_each_state(sc, CSO_VERTEX_SHADER, delete_vs_state, 0); cso_for_each_state(sc, CSO_RASTERIZER, delete_rasterizer_state, 0); cso_for_each_state(sc, CSO_SAMPLER, delete_sampler_state, 0); + cso_for_each_state(sc, CSO_VELEMENTS, delete_velements, 0); cso_hash_delete(sc->blend_hash); cso_hash_delete(sc->sampler_hash); @@ -358,6 +377,7 @@ void cso_cache_delete(struct cso_cache *sc) cso_hash_delete(sc->rasterizer_hash); cso_hash_delete(sc->fs_hash); cso_hash_delete(sc->vs_hash); + cso_hash_delete(sc->velements_hash); FREE(sc); } @@ -372,6 +392,7 @@ void cso_set_maximum_cache_size(struct cso_cache *sc, int number) sanitize_hash(sc, sc->vs_hash, CSO_VERTEX_SHADER, sc->max_size); sanitize_hash(sc, sc->rasterizer_hash, CSO_RASTERIZER, sc->max_size); sanitize_hash(sc, sc->sampler_hash, CSO_SAMPLER, sc->max_size); + sanitize_hash(sc, sc->velements_hash, CSO_VELEMENTS, sc->max_size); } int cso_maximum_cache_size(const struct cso_cache *sc) diff --git a/src/gallium/auxiliary/cso_cache/cso_cache.h b/src/gallium/auxiliary/cso_cache/cso_cache.h index eea60b940bb..fb09b83c623 100644 --- a/src/gallium/auxiliary/cso_cache/cso_cache.h +++ b/src/gallium/auxiliary/cso_cache/cso_cache.h @@ -53,6 +53,7 @@ * - rasterizer (old setup) * - sampler * - vertex shader + * - vertex elements * * Things that are not constant state objects include: * - blend_color @@ -90,7 +91,8 @@ enum cso_cache_type { CSO_DEPTH_STENCIL_ALPHA, CSO_RASTERIZER, CSO_FRAGMENT_SHADER, - CSO_VERTEX_SHADER + CSO_VERTEX_SHADER, + CSO_VELEMENTS }; typedef void (*cso_state_callback)(void *ctx, void *obj); @@ -144,6 +146,18 @@ struct cso_sampler { struct pipe_context *context; }; +struct cso_velems_state { + unsigned count; + struct pipe_vertex_element velems[PIPE_MAX_ATTRIBS]; +}; + +struct cso_velements { + struct cso_velems_state state; + void *data; + cso_state_callback delete_state; + struct pipe_context *context; +}; + unsigned cso_construct_key(void *item, int item_size); struct cso_cache *cso_cache_create(void); diff --git a/src/gallium/auxiliary/cso_cache/cso_context.c b/src/gallium/auxiliary/cso_cache/cso_context.c index a7335c340ca..6500891a10c 100644 --- a/src/gallium/auxiliary/cso_cache/cso_context.c +++ b/src/gallium/auxiliary/cso_cache/cso_context.c @@ -89,6 +89,7 @@ struct cso_context { void *rasterizer, *rasterizer_saved; void *fragment_shader, *fragment_shader_saved, *geometry_shader; void *vertex_shader, *vertex_shader_saved, *geometry_shader_saved; + void *velements, *velements_saved; struct pipe_clip_state clip; struct pipe_clip_state clip_saved; @@ -174,6 +175,20 @@ static boolean delete_vs_state(struct cso_context *ctx, void *state) return FALSE; } +static boolean delete_vertex_elements(struct cso_context *ctx, + void *state) +{ + struct cso_velements *cso = (struct cso_velements *)state; + + if (ctx->velements == cso->data) + return FALSE; + + if (cso->delete_state) + cso->delete_state(cso->context, cso->data); + FREE(state); + return TRUE; +} + static INLINE boolean delete_cso(struct cso_context *ctx, void *state, enum cso_cache_type type) @@ -197,6 +212,9 @@ static INLINE boolean delete_cso(struct cso_context *ctx, case CSO_VERTEX_SHADER: return delete_vs_state(ctx, state); break; + case CSO_VELEMENTS: + return delete_vertex_elements(ctx, state); + break; default: assert(0); FREE(state); @@ -271,6 +289,7 @@ void cso_release_all( struct cso_context *ctx ) ctx->pipe->bind_depth_stencil_alpha_state( ctx->pipe, NULL ); ctx->pipe->bind_fs_state( ctx->pipe, NULL ); ctx->pipe->bind_vs_state( ctx->pipe, NULL ); + ctx->pipe->bind_vertex_elements_state( ctx->pipe, NULL ); } for (i = 0; i < PIPE_MAX_SAMPLERS; i++) { @@ -1130,7 +1149,6 @@ void cso_restore_geometry_shader(struct cso_context *ctx) ctx->geometry_shader_saved = NULL; } - /* clip state */ static INLINE void @@ -1180,3 +1198,66 @@ cso_restore_clip(struct cso_context *ctx) ctx->pipe->set_clip_state(ctx->pipe, &ctx->clip_saved); } } + +enum pipe_error cso_set_vertex_elements(struct cso_context *ctx, + unsigned count, + const struct pipe_vertex_element *states) +{ + unsigned key_size, hash_key; + struct cso_hash_iter iter; + void *handle; + struct cso_velems_state velems_state; + + /* need to include the count into the stored state data too. + Otherwise first few count pipe_vertex_elements could be identical even if count + is different, and there's no guarantee the hash would be different in that + case neither */ + key_size = sizeof(struct pipe_vertex_element) * count + sizeof(unsigned); + velems_state.count = count; + memcpy(velems_state.velems, states, sizeof(struct pipe_vertex_element) * count); + hash_key = cso_construct_key((void*)&velems_state, key_size); + iter = cso_find_state_template(ctx->cache, hash_key, CSO_VELEMENTS, (void*)&velems_state, key_size); + + if (cso_hash_iter_is_null(iter)) { + struct cso_velements *cso = MALLOC(sizeof(struct cso_velements)); + if (!cso) + return PIPE_ERROR_OUT_OF_MEMORY; + + memcpy(&cso->state, &velems_state, key_size); + cso->data = ctx->pipe->create_vertex_elements_state(ctx->pipe, count, &cso->state.velems[0]); + cso->delete_state = (cso_state_callback)ctx->pipe->delete_vertex_elements_state; + cso->context = ctx->pipe; + + iter = cso_insert_state(ctx->cache, hash_key, CSO_VELEMENTS, cso); + if (cso_hash_iter_is_null(iter)) { + FREE(cso); + return PIPE_ERROR_OUT_OF_MEMORY; + } + + handle = cso->data; + } + else { + handle = ((struct cso_velements *)cso_hash_iter_data(iter))->data; + } + + if (ctx->velements != handle) { + ctx->velements = handle; + ctx->pipe->bind_vertex_elements_state(ctx->pipe, handle); + } + return PIPE_OK; +} + +void cso_save_vertex_elements(struct cso_context *ctx) +{ + assert(!ctx->velements_saved); + ctx->velements_saved = ctx->velements; +} + +void cso_restore_vertex_elements(struct cso_context *ctx) +{ + if (ctx->velements != ctx->velements_saved) { + ctx->velements = ctx->velements_saved; + ctx->pipe->bind_vertex_elements_state(ctx->pipe, ctx->velements_saved); + } + ctx->velements_saved = NULL; +} diff --git a/src/gallium/auxiliary/cso_cache/cso_context.h b/src/gallium/auxiliary/cso_cache/cso_context.h index 251a9a644f8..9c16abd28dd 100644 --- a/src/gallium/auxiliary/cso_cache/cso_context.h +++ b/src/gallium/auxiliary/cso_cache/cso_context.h @@ -122,6 +122,12 @@ void cso_restore_vertex_sampler_textures(struct cso_context *cso); +enum pipe_error cso_set_vertex_elements(struct cso_context *ctx, + unsigned count, + const struct pipe_vertex_element *states); +void cso_save_vertex_elements(struct cso_context *ctx); +void cso_restore_vertex_elements(struct cso_context *ctx); + /* These aren't really sensible -- most of the time the api provides * object semantics for shaders anyway, and the cases where it doesn't @@ -157,7 +163,6 @@ void cso_save_geometry_shader(struct cso_context *cso); void cso_restore_geometry_shader(struct cso_context *cso); - enum pipe_error cso_set_framebuffer(struct cso_context *cso, const struct pipe_framebuffer_state *fb); void cso_save_framebuffer(struct cso_context *cso); diff --git a/src/gallium/auxiliary/draw/draw_pipe_aaline.c b/src/gallium/auxiliary/draw/draw_pipe_aaline.c index 8f6ca15dfa2..1c07ab13654 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_aaline.c +++ b/src/gallium/auxiliary/draw/draw_pipe_aaline.c @@ -422,9 +422,9 @@ aaline_create_texture(struct aaline_stage *aaline) /* This texture is new, no need to flush. */ - transfer = screen->get_tex_transfer(screen, aaline->texture, 0, level, 0, + transfer = pipe->get_tex_transfer(pipe, aaline->texture, 0, level, 0, PIPE_TRANSFER_WRITE, 0, 0, size, size); - data = screen->transfer_map(screen, transfer); + data = pipe->transfer_map(pipe, transfer); if (data == NULL) return FALSE; @@ -448,8 +448,8 @@ aaline_create_texture(struct aaline_stage *aaline) } /* unmap */ - screen->transfer_unmap(screen, transfer); - screen->tex_transfer_destroy(transfer); + pipe->transfer_unmap(pipe, transfer); + pipe->tex_transfer_destroy(pipe, transfer); } return TRUE; } diff --git a/src/gallium/auxiliary/draw/draw_pipe_pstipple.c b/src/gallium/auxiliary/draw/draw_pipe_pstipple.c index d0d99aa331a..38c22bf4e94 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_pstipple.c +++ b/src/gallium/auxiliary/draw/draw_pipe_pstipple.c @@ -374,19 +374,21 @@ pstip_update_texture(struct pstip_stage *pstip) { static const uint bit31 = 1 << 31; struct pipe_context *pipe = pstip->pipe; - struct pipe_screen *screen = pipe->screen; struct pipe_transfer *transfer; const uint *stipple = pstip->state.stipple->stipple; uint i, j; ubyte *data; /* XXX: want to avoid flushing just because we use stipple: + * + * Flush should no longer be necessary if driver is properly + * interleaving drawing and transfers on a given context: */ pipe->flush( pipe, PIPE_FLUSH_TEXTURE_CACHE, NULL ); - transfer = screen->get_tex_transfer(screen, pstip->texture, 0, 0, 0, - PIPE_TRANSFER_WRITE, 0, 0, 32, 32); - data = screen->transfer_map(screen, transfer); + transfer = pipe->get_tex_transfer(pipe, pstip->texture, 0, 0, 0, + PIPE_TRANSFER_WRITE, 0, 0, 32, 32); + data = pipe->transfer_map(pipe, transfer); /* * Load alpha texture. @@ -408,8 +410,8 @@ pstip_update_texture(struct pstip_stage *pstip) } /* unmap */ - screen->transfer_unmap(screen, transfer); - screen->tex_transfer_destroy(transfer); + pipe->transfer_unmap(pipe, transfer); + pipe->tex_transfer_destroy(pipe, transfer); } diff --git a/src/gallium/auxiliary/draw/draw_pt.c b/src/gallium/auxiliary/draw/draw_pt.c index 6d90a6c42fd..a8cdc57ad96 100644 --- a/src/gallium/auxiliary/draw/draw_pt.c +++ b/src/gallium/auxiliary/draw/draw_pt.c @@ -307,9 +307,8 @@ draw_arrays_instanced(struct draw_context *draw, tgsi_dump(draw->vs.vertex_shader->state.tokens, 0); debug_printf("Elements:\n"); for (i = 0; i < draw->pt.nr_vertex_elements; i++) { - debug_printf(" format=%s comps=%u\n", - util_format_name(draw->pt.vertex_element[i].src_format), - draw->pt.vertex_element[i].nr_components); + debug_printf(" format=%s\n", + util_format_name(draw->pt.vertex_element[i].src_format)); } debug_printf("Buffers:\n"); for (i = 0; i < draw->pt.nr_vertex_buffers; i++) { diff --git a/src/gallium/auxiliary/draw/draw_pt_decompose.h b/src/gallium/auxiliary/draw/draw_pt_decompose.h index 4ca5b520204..3c44f7c11ee 100644 --- a/src/gallium/auxiliary/draw/draw_pt_decompose.h +++ b/src/gallium/auxiliary/draw/draw_pt_decompose.h @@ -105,40 +105,20 @@ static void FUNC( ARGS, case PIPE_PRIM_QUADS: - if (flatfirst) { - for (i = 0; i+3 < count; i += 4) { - QUAD( (i + 1), - (i + 2), - (i + 3), - (i + 0) ); - } - } - else { - for (i = 0; i+3 < count; i += 4) { - QUAD( (i + 0), - (i + 1), - (i + 2), - (i + 3)); - } + for (i = 0; i+3 < count; i += 4) { + QUAD( (i + 0), + (i + 1), + (i + 2), + (i + 3)); } break; case PIPE_PRIM_QUAD_STRIP: - if (flatfirst) { - for (i = 0; i+3 < count; i += 2) { - QUAD( (i + 1), - (i + 3), - (i + 2), - (i + 0) ); - } - } - else { - for (i = 0; i+3 < count; i += 2) { - QUAD( (i + 2), - (i + 0), - (i + 1), - (i + 3)); - } + for (i = 0; i+3 < count; i += 2) { + QUAD( (i + 2), + (i + 0), + (i + 1), + (i + 3)); } break; diff --git a/src/gallium/auxiliary/draw/draw_pt_vcache_tmp.h b/src/gallium/auxiliary/draw/draw_pt_vcache_tmp.h index 62822a3d562..7cba8547f15 100644 --- a/src/gallium/auxiliary/draw/draw_pt_vcache_tmp.h +++ b/src/gallium/auxiliary/draw/draw_pt_vcache_tmp.h @@ -118,39 +118,21 @@ static void FUNC( struct draw_pt_front_end *frontend, case PIPE_PRIM_QUADS: for (i = 0; i+3 < count; i += 4) { - if (flatfirst) { - QUAD( vcache, - get_elt(elts, i + 0), - get_elt(elts, i + 1), - get_elt(elts, i + 2), - get_elt(elts, i + 3) ); - } - else { - QUAD( vcache, - get_elt(elts, i + 0), - get_elt(elts, i + 1), - get_elt(elts, i + 2), - get_elt(elts, i + 3) ); - } + QUAD( vcache, + get_elt(elts, i + 0), + get_elt(elts, i + 1), + get_elt(elts, i + 2), + get_elt(elts, i + 3) ); } break; case PIPE_PRIM_QUAD_STRIP: for (i = 0; i+3 < count; i += 2) { - if (flatfirst) { - QUAD( vcache, - get_elt(elts, i + 0), - get_elt(elts, i + 1), - get_elt(elts, i + 3), - get_elt(elts, i + 2) ); - } - else { - QUAD( vcache, - get_elt(elts, i + 2), - get_elt(elts, i + 0), - get_elt(elts, i + 1), - get_elt(elts, i + 3) ); - } + QUAD( vcache, + get_elt(elts, i + 2), + get_elt(elts, i + 0), + get_elt(elts, i + 1), + get_elt(elts, i + 3) ); } break; diff --git a/src/gallium/auxiliary/gallivm/lp_bld_alpha.h b/src/gallium/auxiliary/gallivm/lp_bld_alpha.h index 634575670db..fe3cedcc48c 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_alpha.h +++ b/src/gallium/auxiliary/gallivm/lp_bld_alpha.h @@ -35,7 +35,7 @@ #define LP_BLD_ALPHA_H -#include <llvm-c/Core.h> +#include "os/os_llvm.h" struct pipe_alpha_state; struct lp_type; diff --git a/src/gallium/auxiliary/gallivm/lp_bld_arit.c b/src/gallium/auxiliary/gallivm/lp_bld_arit.c index 32f9e5201c5..90c84c6a4ee 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_arit.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_arit.c @@ -232,6 +232,37 @@ lp_build_add(struct lp_build_context *bld, } +/** Return the sum of the elements of a */ +LLVMValueRef +lp_build_sum_vector(struct lp_build_context *bld, + LLVMValueRef a) +{ + const struct lp_type type = bld->type; + LLVMValueRef index, res; + int i; + + if (a == bld->zero) + return bld->zero; + if (a == bld->undef) + return bld->undef; + assert(type.length > 1); + + assert(!bld->type.norm); + + index = LLVMConstInt(LLVMInt32Type(), 0, 0); + res = LLVMBuildExtractElement(bld->builder, a, index, ""); + + for (i = 1; i < type.length; i++) { + index = LLVMConstInt(LLVMInt32Type(), i, 0); + res = LLVMBuildAdd(bld->builder, res, + LLVMBuildExtractElement(bld->builder, a, index, ""), + ""); + } + + return res; +} + + /** * Generate a - b */ @@ -644,13 +675,26 @@ lp_build_abs(struct lp_build_context *bld, if(type.floating) { /* Mask out the sign bit */ - LLVMTypeRef int_vec_type = lp_build_int_vec_type(type); - unsigned long long absMask = ~(1ULL << (type.width - 1)); - LLVMValueRef mask = lp_build_int_const_scalar(type, ((unsigned long long) absMask)); - a = LLVMBuildBitCast(bld->builder, a, int_vec_type, ""); - a = LLVMBuildAnd(bld->builder, a, mask, ""); - a = LLVMBuildBitCast(bld->builder, a, vec_type, ""); - return a; + if (type.length == 1) { + LLVMTypeRef int_type = LLVMIntType(type.width); + LLVMTypeRef float_type = LLVMFloatType(); + unsigned long long absMask = ~(1ULL << (type.width - 1)); + LLVMValueRef mask = LLVMConstInt(int_type, absMask, 0); + a = LLVMBuildBitCast(bld->builder, a, int_type, ""); + a = LLVMBuildAnd(bld->builder, a, mask, ""); + a = LLVMBuildBitCast(bld->builder, a, float_type, ""); + return a; + } + else { + /* vector of floats */ + LLVMTypeRef int_vec_type = lp_build_int_vec_type(type); + unsigned long long absMask = ~(1ULL << (type.width - 1)); + LLVMValueRef mask = lp_build_int_const_scalar(type, ((unsigned long long) absMask)); + a = LLVMBuildBitCast(bld->builder, a, int_vec_type, ""); + a = LLVMBuildAnd(bld->builder, a, mask, ""); + a = LLVMBuildBitCast(bld->builder, a, vec_type, ""); + return a; + } } if(type.width*type.length == 128 && util_cpu_caps.has_ssse3) { @@ -753,7 +797,7 @@ lp_build_set_sign(struct lp_build_context *bld, /** - * Convert vector of int to vector of float. + * Convert vector of (or scalar) int to vector of (or scalar) float. */ LLVMValueRef lp_build_int_to_float(struct lp_build_context *bld, @@ -764,7 +808,11 @@ lp_build_int_to_float(struct lp_build_context *bld, assert(type.floating); /*assert(lp_check_value(type, a));*/ - { + if (type.length == 1) { + LLVMTypeRef float_type = LLVMFloatType(); + return LLVMBuildSIToFP(bld->builder, a, float_type, ""); + } + else { LLVMTypeRef vec_type = lp_build_vec_type(type); /*LLVMTypeRef int_vec_type = lp_build_int_vec_type(type);*/ LLVMValueRef res; @@ -866,6 +914,10 @@ lp_build_floor(struct lp_build_context *bld, assert(type.floating); + if (type.length == 1) { + return LLVMBuildFPTrunc(bld->builder, a, LLVMFloatType(), ""); + } + if(util_cpu_caps.has_sse4_1) return lp_build_round_sse41(bld, a, LP_BUILD_ROUND_SSE41_FLOOR); else { @@ -921,15 +973,24 @@ lp_build_itrunc(struct lp_build_context *bld, LLVMValueRef a) { const struct lp_type type = bld->type; - LLVMTypeRef int_vec_type = lp_build_int_vec_type(type); assert(type.floating); - assert(lp_check_value(type, a)); - return LLVMBuildFPToSI(bld->builder, a, int_vec_type, ""); + if (type.length == 1) { + LLVMTypeRef int_type = LLVMIntType(type.width); + return LLVMBuildFPTrunc(bld->builder, a, int_type, ""); + } + else { + LLVMTypeRef int_vec_type = lp_build_int_vec_type(type); + assert(lp_check_value(type, a)); + return LLVMBuildFPToSI(bld->builder, a, int_vec_type, ""); + } } +/** + * Convert float[] to int[] with round(). + */ LLVMValueRef lp_build_iround(struct lp_build_context *bld, LLVMValueRef a) @@ -939,6 +1000,15 @@ lp_build_iround(struct lp_build_context *bld, LLVMValueRef res; assert(type.floating); + + if (type.length == 1) { + /* scalar float to int */ + LLVMTypeRef int_type = LLVMIntType(type.width); + /* XXX we want rounding here! */ + res = LLVMBuildFPToSI(bld->builder, a, int_type, ""); + return res; + } + assert(lp_check_value(type, a)); if(util_cpu_caps.has_sse4_1) { @@ -981,6 +1051,14 @@ lp_build_ifloor(struct lp_build_context *bld, LLVMValueRef res; assert(type.floating); + + if (type.length == 1) { + /* scalar float to int */ + LLVMTypeRef int_type = LLVMIntType(type.width); + res = LLVMBuildFPToSI(bld->builder, a, int_type, ""); + return res; + } + assert(lp_check_value(type, a)); if(util_cpu_caps.has_sse4_1) { @@ -1207,6 +1285,7 @@ lp_build_polynomial(struct lp_build_context *bld, unsigned num_coeffs) { const struct lp_type type = bld->type; + LLVMTypeRef float_type = LLVMFloatType(); LLVMValueRef res = NULL; unsigned i; @@ -1216,7 +1295,13 @@ lp_build_polynomial(struct lp_build_context *bld, __FUNCTION__); for (i = num_coeffs; i--; ) { - LLVMValueRef coeff = lp_build_const_scalar(type, coeffs[i]); + LLVMValueRef coeff; + + if (type.length == 1) + coeff = LLVMConstReal(float_type, coeffs[i]); + else + coeff = lp_build_const_scalar(type, coeffs[i]); + if(res) res = lp_build_add(bld, coeff, lp_build_mul(bld, x, res)); else @@ -1410,11 +1495,87 @@ lp_build_log2_approx(struct lp_build_context *bld, } +/** scalar version of above function */ +static void +lp_build_float_log2_approx(struct lp_build_context *bld, + LLVMValueRef x, + LLVMValueRef *p_exp, + LLVMValueRef *p_floor_log2, + LLVMValueRef *p_log2) +{ + const struct lp_type type = bld->type; + LLVMTypeRef float_type = LLVMFloatType(); + LLVMTypeRef int_type = LLVMIntType(type.width); + + LLVMValueRef expmask = LLVMConstInt(int_type, 0x7f800000, 0); + LLVMValueRef mantmask = LLVMConstInt(int_type, 0x007fffff, 0); + LLVMValueRef one = LLVMConstBitCast(bld->one, int_type); + + LLVMValueRef i = NULL; + LLVMValueRef exp = NULL; + LLVMValueRef mant = NULL; + LLVMValueRef logexp = NULL; + LLVMValueRef logmant = NULL; + LLVMValueRef res = NULL; + + if(p_exp || p_floor_log2 || p_log2) { + /* TODO: optimize the constant case */ + if(LLVMIsConstant(x)) + debug_printf("%s: inefficient/imprecise constant arithmetic\n", + __FUNCTION__); + + assert(type.floating && type.width == 32); + + i = LLVMBuildBitCast(bld->builder, x, int_type, ""); + + /* exp = (float) exponent(x) */ + exp = LLVMBuildAnd(bld->builder, i, expmask, ""); + } + + if(p_floor_log2 || p_log2) { + LLVMValueRef c23 = LLVMConstInt(int_type, 23, 0); + LLVMValueRef c127 = LLVMConstInt(int_type, 127, 0); + logexp = LLVMBuildLShr(bld->builder, exp, c23, ""); + logexp = LLVMBuildSub(bld->builder, logexp, c127, ""); + logexp = LLVMBuildSIToFP(bld->builder, logexp, float_type, ""); + } + + if(p_log2) { + /* mant = (float) mantissa(x) */ + mant = LLVMBuildAnd(bld->builder, i, mantmask, ""); + mant = LLVMBuildOr(bld->builder, mant, one, ""); + mant = LLVMBuildBitCast(bld->builder, mant, float_type, ""); + + logmant = lp_build_polynomial(bld, mant, lp_build_log2_polynomial, + Elements(lp_build_log2_polynomial)); + + /* This effectively increases the polynomial degree by one, but ensures that log2(1) == 0*/ + logmant = LLVMBuildMul(bld->builder, logmant, LLVMBuildSub(bld->builder, mant, bld->one, ""), ""); + + res = LLVMBuildAdd(bld->builder, logmant, logexp, ""); + } + + if(p_exp) + *p_exp = exp; + + if(p_floor_log2) + *p_floor_log2 = logexp; + + if(p_log2) + *p_log2 = res; +} + + LLVMValueRef lp_build_log2(struct lp_build_context *bld, LLVMValueRef x) { LLVMValueRef res; - lp_build_log2_approx(bld, x, NULL, NULL, &res); + if (bld->type.length == 1) { + lp_build_float_log2_approx(bld, x, NULL, NULL, &res); + } + else { + lp_build_log2_approx(bld, x, NULL, NULL, &res); + } return res; } diff --git a/src/gallium/auxiliary/gallivm/lp_bld_arit.h b/src/gallium/auxiliary/gallivm/lp_bld_arit.h index 55385e3a66a..7a10fe12209 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_arit.h +++ b/src/gallium/auxiliary/gallivm/lp_bld_arit.h @@ -37,7 +37,7 @@ #define LP_BLD_ARIT_H -#include <llvm-c/Core.h> +#include "os/os_llvm.h" struct lp_type; @@ -57,6 +57,10 @@ lp_build_add(struct lp_build_context *bld, LLVMValueRef b); LLVMValueRef +lp_build_sum_vector(struct lp_build_context *bld, + LLVMValueRef a); + +LLVMValueRef lp_build_sub(struct lp_build_context *bld, LLVMValueRef a, LLVMValueRef b); diff --git a/src/gallium/auxiliary/gallivm/lp_bld_blend.h b/src/gallium/auxiliary/gallivm/lp_bld_blend.h index da272e549f3..5a9e1c1fb2f 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_blend.h +++ b/src/gallium/auxiliary/gallivm/lp_bld_blend.h @@ -40,7 +40,7 @@ * for a standalone example. */ -#include <llvm-c/Core.h> +#include "os/os_llvm.h" #include "pipe/p_format.h" diff --git a/src/gallium/auxiliary/gallivm/lp_bld_const.c b/src/gallium/auxiliary/gallivm/lp_bld_const.c index c8eaa8c3940..8a275fa72f3 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_const.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_const.c @@ -221,8 +221,16 @@ lp_build_undef(struct lp_type type) LLVMValueRef lp_build_zero(struct lp_type type) { - LLVMTypeRef vec_type = lp_build_vec_type(type); - return LLVMConstNull(vec_type); + if (type.length == 1) { + if (type.floating) + return LLVMConstReal(LLVMFloatType(), 0.0); + else + return LLVMConstInt(LLVMIntType(type.width), 0, 0); + } + else { + LLVMTypeRef vec_type = lp_build_vec_type(type); + return LLVMConstNull(vec_type); + } } @@ -264,10 +272,16 @@ lp_build_one(struct lp_type type) for(i = 1; i < type.length; ++i) elems[i] = elems[0]; - return LLVMConstVector(elems, type.length); + if (type.length == 1) + return elems[0]; + else + return LLVMConstVector(elems, type.length); } +/** + * Build constant-valued vector from a scalar value. + */ LLVMValueRef lp_build_const_scalar(struct lp_type type, double val) diff --git a/src/gallium/auxiliary/gallivm/lp_bld_const.h b/src/gallium/auxiliary/gallivm/lp_bld_const.h index cb8e1c7b006..40786361031 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_const.h +++ b/src/gallium/auxiliary/gallivm/lp_bld_const.h @@ -37,7 +37,7 @@ #define LP_BLD_CONST_H -#include <llvm-c/Core.h> +#include "os/os_llvm.h" #include <pipe/p_compiler.h> diff --git a/src/gallium/auxiliary/gallivm/lp_bld_conv.h b/src/gallium/auxiliary/gallivm/lp_bld_conv.h index 948e68fae4f..78e8155ff73 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_conv.h +++ b/src/gallium/auxiliary/gallivm/lp_bld_conv.h @@ -37,7 +37,7 @@ #define LP_BLD_CONV_H -#include <llvm-c/Core.h> +#include "os/os_llvm.h" struct lp_type; diff --git a/src/gallium/auxiliary/gallivm/lp_bld_debug.h b/src/gallium/auxiliary/gallivm/lp_bld_debug.h index 583e6132b4b..441ad94786f 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_debug.h +++ b/src/gallium/auxiliary/gallivm/lp_bld_debug.h @@ -30,7 +30,7 @@ #define LP_BLD_DEBUG_H -#include <llvm-c/Core.h> +#include "os/os_llvm.h" #include "pipe/p_compiler.h" #include "util/u_string.h" diff --git a/src/gallium/auxiliary/gallivm/lp_bld_depth.h b/src/gallium/auxiliary/gallivm/lp_bld_depth.h index 79d6981bb51..8be80024ae8 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_depth.h +++ b/src/gallium/auxiliary/gallivm/lp_bld_depth.h @@ -36,7 +36,7 @@ #define LP_BLD_DEPTH_H -#include <llvm-c/Core.h> +#include "os/os_llvm.h" struct pipe_depth_state; diff --git a/src/gallium/auxiliary/gallivm/lp_bld_flow.c b/src/gallium/auxiliary/gallivm/lp_bld_flow.c index bc831389085..c2f35419ec1 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_flow.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_flow.c @@ -308,7 +308,7 @@ lp_build_flow_scope_end(struct lp_build_flow_context *flow) * Note: this function has no dependencies on the flow code and could * be used elsewhere. */ -static LLVMBasicBlockRef +LLVMBasicBlockRef lp_build_insert_new_block(LLVMBuilderRef builder, const char *name) { LLVMBasicBlockRef current_block; diff --git a/src/gallium/auxiliary/gallivm/lp_bld_flow.h b/src/gallium/auxiliary/gallivm/lp_bld_flow.h index 4c225a0d4f9..e1588365491 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_flow.h +++ b/src/gallium/auxiliary/gallivm/lp_bld_flow.h @@ -35,7 +35,7 @@ #define LP_BLD_FLOW_H -#include <llvm-c/Core.h> +#include "os/os_llvm.h" struct lp_type; @@ -145,7 +145,9 @@ lp_build_else(struct lp_build_if_state *ctx); void lp_build_endif(struct lp_build_if_state *ctx); - + +LLVMBasicBlockRef +lp_build_insert_new_block(LLVMBuilderRef builder, const char *name); #endif /* !LP_BLD_FLOW_H */ diff --git a/src/gallium/auxiliary/gallivm/lp_bld_format.h b/src/gallium/auxiliary/gallivm/lp_bld_format.h index 970bee379f5..8972c0dc178 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_format.h +++ b/src/gallium/auxiliary/gallivm/lp_bld_format.h @@ -34,7 +34,7 @@ * Pixel format helpers. */ -#include <llvm-c/Core.h> +#include "os/os_llvm.h" #include "pipe/p_format.h" diff --git a/src/gallium/auxiliary/gallivm/lp_bld_interp.h b/src/gallium/auxiliary/gallivm/lp_bld_interp.h index ca958cdf343..177b5e943ee 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_interp.h +++ b/src/gallium/auxiliary/gallivm/lp_bld_interp.h @@ -41,7 +41,7 @@ #define LP_BLD_INTERP_H -#include <llvm-c/Core.h> +#include "os/os_llvm.h" #include "tgsi/tgsi_exec.h" diff --git a/src/gallium/auxiliary/gallivm/lp_bld_intr.h b/src/gallium/auxiliary/gallivm/lp_bld_intr.h index f813f27074b..7d5506c7338 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_intr.h +++ b/src/gallium/auxiliary/gallivm/lp_bld_intr.h @@ -37,7 +37,7 @@ #define LP_BLD_INTR_H -#include <llvm-c/Core.h> +#include "os/os_llvm.h" /** diff --git a/src/gallium/auxiliary/gallivm/lp_bld_logic.c b/src/gallium/auxiliary/gallivm/lp_bld_logic.c index 2726747eaea..f3df3dd1388 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_logic.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_logic.c @@ -42,6 +42,26 @@ #include "lp_bld_logic.h" +/* + * XXX + * + * Selection with vector conditional like + * + * select <4 x i1> %C, %A, %B + * + * is valid IR (e.g. llvm/test/Assembler/vector-select.ll), but it is not + * supported on any backend. + * + * Expanding the boolean vector to full SIMD register width, as in + * + * sext <4 x i1> %C to <4 x i32> + * + * is valid and supported (e.g., llvm/test/CodeGen/X86/vec_compare.ll), but + * it causes assertion failures in LLVM 2.6. It appears to work correctly on + * LLVM 2.7. + */ + + /** * Build code to compare two values 'a' and 'b' of 'type' using the given func. * \param func one of PIPE_FUNC_x @@ -54,13 +74,11 @@ lp_build_compare(LLVMBuilderRef builder, LLVMValueRef a, LLVMValueRef b) { - LLVMTypeRef vec_type = lp_build_vec_type(type); LLVMTypeRef int_vec_type = lp_build_int_vec_type(type); LLVMValueRef zeros = LLVMConstNull(int_vec_type); LLVMValueRef ones = LLVMConstAllOnes(int_vec_type); LLVMValueRef cond; LLVMValueRef res; - unsigned i; assert(func >= PIPE_FUNC_NEVER); assert(func <= PIPE_FUNC_ALWAYS); @@ -74,10 +92,12 @@ lp_build_compare(LLVMBuilderRef builder, /* XXX: It is not clear if we should use the ordered or unordered operators */ +#if HAVE_LLVM < 0x0207 #if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64) if(type.width * type.length == 128) { if(type.floating && util_cpu_caps.has_sse) { /* float[4] comparison */ + LLVMTypeRef vec_type = lp_build_vec_type(type); LLVMValueRef args[3]; unsigned cc; boolean swap; @@ -147,6 +167,7 @@ lp_build_compare(LLVMBuilderRef builder, const char *pcmpgt; LLVMValueRef args[2]; LLVMValueRef res; + LLVMTypeRef vec_type = lp_build_vec_type(type); switch (type.width) { case 8: @@ -198,8 +219,9 @@ lp_build_compare(LLVMBuilderRef builder, return res; } - } + } /* if (type.width * type.length == 128) */ #endif +#endif /* HAVE_LLVM < 0x0207 */ if(type.floating) { LLVMRealPredicate op; @@ -233,25 +255,33 @@ lp_build_compare(LLVMBuilderRef builder, return lp_build_undef(type); } -#if 0 - /* XXX: Although valid IR, no LLVM target currently support this */ +#if HAVE_LLVM >= 0x0207 cond = LLVMBuildFCmp(builder, op, a, b, ""); - res = LLVMBuildSelect(builder, cond, ones, zeros, ""); + res = LLVMBuildSExt(builder, cond, int_vec_type, ""); #else - debug_printf("%s: warning: using slow element-wise vector comparison\n", - __FUNCTION__); - res = LLVMGetUndef(int_vec_type); - for(i = 0; i < type.length; ++i) { - LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0); - cond = LLVMBuildFCmp(builder, op, - LLVMBuildExtractElement(builder, a, index, ""), - LLVMBuildExtractElement(builder, b, index, ""), - ""); - cond = LLVMBuildSelect(builder, cond, - LLVMConstExtractElement(ones, index), - LLVMConstExtractElement(zeros, index), - ""); - res = LLVMBuildInsertElement(builder, res, cond, index, ""); + if (type.length == 1) { + cond = LLVMBuildFCmp(builder, op, a, b, ""); + res = LLVMBuildSExt(builder, cond, int_vec_type, ""); + } + else { + unsigned i; + + res = LLVMGetUndef(int_vec_type); + + debug_printf("%s: warning: using slow element-wise float" + " vector comparison\n", __FUNCTION__); + for (i = 0; i < type.length; ++i) { + LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0); + cond = LLVMBuildFCmp(builder, op, + LLVMBuildExtractElement(builder, a, index, ""), + LLVMBuildExtractElement(builder, b, index, ""), + ""); + cond = LLVMBuildSelect(builder, cond, + LLVMConstExtractElement(ones, index), + LLVMConstExtractElement(zeros, index), + ""); + res = LLVMBuildInsertElement(builder, res, cond, index, ""); + } } #endif } @@ -281,25 +311,34 @@ lp_build_compare(LLVMBuilderRef builder, return lp_build_undef(type); } -#if 0 - /* XXX: Although valid IR, no LLVM target currently support this */ +#if HAVE_LLVM >= 0x0207 cond = LLVMBuildICmp(builder, op, a, b, ""); - res = LLVMBuildSelect(builder, cond, ones, zeros, ""); + res = LLVMBuildSExt(builder, cond, int_vec_type, ""); #else - debug_printf("%s: warning: using slow element-wise int vector comparison\n", - __FUNCTION__); - res = LLVMGetUndef(int_vec_type); - for(i = 0; i < type.length; ++i) { - LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0); - cond = LLVMBuildICmp(builder, op, - LLVMBuildExtractElement(builder, a, index, ""), - LLVMBuildExtractElement(builder, b, index, ""), - ""); - cond = LLVMBuildSelect(builder, cond, - LLVMConstExtractElement(ones, index), - LLVMConstExtractElement(zeros, index), - ""); - res = LLVMBuildInsertElement(builder, res, cond, index, ""); + if (type.length == 1) { + cond = LLVMBuildICmp(builder, op, a, b, ""); + res = LLVMBuildSExt(builder, cond, int_vec_type, ""); + } + else { + unsigned i; + + res = LLVMGetUndef(int_vec_type); + + debug_printf("%s: warning: using slow element-wise int" + " vector comparison\n", __FUNCTION__); + + for(i = 0; i < type.length; ++i) { + LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0); + cond = LLVMBuildICmp(builder, op, + LLVMBuildExtractElement(builder, a, index, ""), + LLVMBuildExtractElement(builder, b, index, ""), + ""); + cond = LLVMBuildSelect(builder, cond, + LLVMConstExtractElement(ones, index), + LLVMConstExtractElement(zeros, index), + ""); + res = LLVMBuildInsertElement(builder, res, cond, index, ""); + } } #endif } @@ -326,6 +365,8 @@ lp_build_cmp(struct lp_build_context *bld, /** * Return mask ? a : b; + * + * mask is a bitwise mask, composed of 0 or ~0 for each element. */ LLVMValueRef lp_build_select(struct lp_build_context *bld, @@ -339,26 +380,32 @@ lp_build_select(struct lp_build_context *bld, if(a == b) return a; - if(type.floating) { - LLVMTypeRef int_vec_type = lp_build_int_vec_type(type); - a = LLVMBuildBitCast(bld->builder, a, int_vec_type, ""); - b = LLVMBuildBitCast(bld->builder, b, int_vec_type, ""); + if (type.length == 1) { + mask = LLVMBuildTrunc(bld->builder, mask, LLVMInt1Type(), ""); + res = LLVMBuildSelect(bld->builder, mask, a, b, ""); } + else { + if(type.floating) { + LLVMTypeRef int_vec_type = lp_build_int_vec_type(type); + a = LLVMBuildBitCast(bld->builder, a, int_vec_type, ""); + b = LLVMBuildBitCast(bld->builder, b, int_vec_type, ""); + } - a = LLVMBuildAnd(bld->builder, a, mask, ""); + a = LLVMBuildAnd(bld->builder, a, mask, ""); - /* This often gets translated to PANDN, but sometimes the NOT is - * pre-computed and stored in another constant. The best strategy depends - * on available registers, so it is not a big deal -- hopefully LLVM does - * the right decision attending the rest of the program. - */ - b = LLVMBuildAnd(bld->builder, b, LLVMBuildNot(bld->builder, mask, ""), ""); + /* This often gets translated to PANDN, but sometimes the NOT is + * pre-computed and stored in another constant. The best strategy depends + * on available registers, so it is not a big deal -- hopefully LLVM does + * the right decision attending the rest of the program. + */ + b = LLVMBuildAnd(bld->builder, b, LLVMBuildNot(bld->builder, mask, ""), ""); - res = LLVMBuildOr(bld->builder, a, b, ""); + res = LLVMBuildOr(bld->builder, a, b, ""); - if(type.floating) { - LLVMTypeRef vec_type = lp_build_vec_type(type); - res = LLVMBuildBitCast(bld->builder, res, vec_type, ""); + if(type.floating) { + LLVMTypeRef vec_type = lp_build_vec_type(type); + res = LLVMBuildBitCast(bld->builder, res, vec_type, ""); + } } return res; diff --git a/src/gallium/auxiliary/gallivm/lp_bld_logic.h b/src/gallium/auxiliary/gallivm/lp_bld_logic.h index a399ebf39ef..b54ec13b701 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_logic.h +++ b/src/gallium/auxiliary/gallivm/lp_bld_logic.h @@ -37,7 +37,7 @@ #define LP_BLD_LOGIC_H -#include <llvm-c/Core.h> +#include "os/os_llvm.h" #include "pipe/p_defines.h" /* For PIPE_FUNC_xxx */ diff --git a/src/gallium/auxiliary/gallivm/lp_bld_pack.c b/src/gallium/auxiliary/gallivm/lp_bld_pack.c index bc360ad77ad..23398f41f99 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_pack.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_pack.c @@ -256,7 +256,9 @@ lp_build_pack2(LLVMBuilderRef builder, LLVMValueRef lo, LLVMValueRef hi) { +#if HAVE_LLVM < 0x0207 LLVMTypeRef src_vec_type = lp_build_vec_type(src_type); +#endif LLVMTypeRef dst_vec_type = lp_build_vec_type(dst_type); LLVMValueRef shuffle; LLVMValueRef res; @@ -272,11 +274,14 @@ lp_build_pack2(LLVMBuilderRef builder, switch(src_type.width) { case 32: if(dst_type.sign) { +#if HAVE_LLVM >= 0x0207 + res = lp_build_intrinsic_binary(builder, "llvm.x86.sse2.packssdw.128", dst_vec_type, lo, hi); +#else res = lp_build_intrinsic_binary(builder, "llvm.x86.sse2.packssdw.128", src_vec_type, lo, hi); +#endif } else { if (util_cpu_caps.has_sse4_1) { - /* PACKUSDW is the only instrinsic with a consistent signature */ return lp_build_intrinsic_binary(builder, "llvm.x86.sse41.packusdw", dst_vec_type, lo, hi); } else { @@ -288,9 +293,17 @@ lp_build_pack2(LLVMBuilderRef builder, case 16: if(dst_type.sign) +#if HAVE_LLVM >= 0x0207 + res = lp_build_intrinsic_binary(builder, "llvm.x86.sse2.packsswb.128", dst_vec_type, lo, hi); +#else res = lp_build_intrinsic_binary(builder, "llvm.x86.sse2.packsswb.128", src_vec_type, lo, hi); +#endif else +#if HAVE_LLVM >= 0x0207 + res = lp_build_intrinsic_binary(builder, "llvm.x86.sse2.packuswb.128", dst_vec_type, lo, hi); +#else res = lp_build_intrinsic_binary(builder, "llvm.x86.sse2.packuswb.128", src_vec_type, lo, hi); +#endif break; default: diff --git a/src/gallium/auxiliary/gallivm/lp_bld_pack.h b/src/gallium/auxiliary/gallivm/lp_bld_pack.h index fb2a34984a4..346a17d5803 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_pack.h +++ b/src/gallium/auxiliary/gallivm/lp_bld_pack.h @@ -37,7 +37,7 @@ #define LP_BLD_PACK_H -#include <llvm-c/Core.h> +#include "os/os_llvm.h" struct lp_type; diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample.c b/src/gallium/auxiliary/gallivm/lp_bld_sample.c index 6a026e468e1..543fd5fea3f 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_sample.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_sample.c @@ -62,6 +62,18 @@ lp_sampler_static_state(struct lp_sampler_static_state *state, if(!sampler) return; + /* + * We don't copy sampler state over unless it is actually enabled, to avoid + * spurious recompiles, as the sampler static state is part of the shader + * key. + * + * Ideally the state tracker or cso_cache module would make all state + * canonical, but until that happens it's better to be safe than sorry here. + * + * XXX: Actually there's much more than can be done here, especially + * regarding 1D/2D/3D/CUBE textures, wrap modes, etc. + */ + state->format = texture->format; state->target = texture->target; state->pot_width = util_is_pot(texture->width0); @@ -74,8 +86,12 @@ lp_sampler_static_state(struct lp_sampler_static_state *state, state->min_img_filter = sampler->min_img_filter; state->min_mip_filter = sampler->min_mip_filter; state->mag_img_filter = sampler->mag_img_filter; + state->compare_mode = sampler->compare_mode; - state->compare_func = sampler->compare_func; + if (sampler->compare_mode != PIPE_TEX_COMPARE_NONE) { + state->compare_func = sampler->compare_func; + } + state->normalized_coords = sampler->normalized_coords; state->lod_bias = sampler->lod_bias; state->min_lod = sampler->min_lod; @@ -139,15 +155,16 @@ lp_build_gather(LLVMBuilderRef builder, /** * Compute the offset of a pixel. * - * x, y, y_stride are vectors + * x, y, z, y_stride, z_stride are vectors */ LLVMValueRef lp_build_sample_offset(struct lp_build_context *bld, const struct util_format_description *format_desc, LLVMValueRef x, LLVMValueRef y, + LLVMValueRef z, LLVMValueRef y_stride, - LLVMValueRef data_ptr) + LLVMValueRef z_stride) { LLVMValueRef x_stride; LLVMValueRef offset; @@ -163,6 +180,10 @@ lp_build_sample_offset(struct lp_build_context *bld, LLVMValueRef y_offset_lo, y_offset_hi; LLVMValueRef offset_lo, offset_hi; + /* XXX 1D & 3D addressing not done yet */ + assert(!z); + assert(!z_stride); + x_lo = LLVMBuildAnd(bld->builder, x, bld->one, ""); y_lo = LLVMBuildAnd(bld->builder, y, bld->one, ""); @@ -186,13 +207,17 @@ lp_build_sample_offset(struct lp_build_context *bld, offset = lp_build_add(bld, offset_hi, offset_lo); } else { - LLVMValueRef x_offset; - LLVMValueRef y_offset; + offset = lp_build_mul(bld, x, x_stride); - x_offset = lp_build_mul(bld, x, x_stride); - y_offset = lp_build_mul(bld, y, y_stride); + if (y && y_stride) { + LLVMValueRef y_offset = lp_build_mul(bld, y, y_stride); + offset = lp_build_add(bld, offset, y_offset); + } - offset = lp_build_add(bld, x_offset, y_offset); + if (z && z_stride) { + LLVMValueRef z_offset = lp_build_mul(bld, z, z_stride); + offset = lp_build_add(bld, offset, z_offset); + } } return offset; diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample.h b/src/gallium/auxiliary/gallivm/lp_bld_sample.h index 5ba0925bb69..7f08bfaac1f 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_sample.h +++ b/src/gallium/auxiliary/gallivm/lp_bld_sample.h @@ -36,7 +36,7 @@ #define LP_BLD_SAMPLE_H -#include <llvm-c/Core.h> +#include "os/os_llvm.h" struct pipe_texture; struct pipe_sampler_state; @@ -113,9 +113,9 @@ struct lp_sampler_dynamic_state unsigned unit); LLVMValueRef - (*stride)( struct lp_sampler_dynamic_state *state, - LLVMBuilderRef builder, - unsigned unit); + (*row_stride)( struct lp_sampler_dynamic_state *state, + LLVMBuilderRef builder, + unsigned unit); LLVMValueRef (*data_ptr)( struct lp_sampler_dynamic_state *state, @@ -148,8 +148,9 @@ lp_build_sample_offset(struct lp_build_context *bld, const struct util_format_description *format_desc, LLVMValueRef x, LLVMValueRef y, + LLVMValueRef z, LLVMValueRef y_stride, - LLVMValueRef data_ptr); + LLVMValueRef z_stride); void diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c b/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c index 9058f76c1df..a4106883503 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c @@ -48,6 +48,7 @@ #include "lp_bld_logic.h" #include "lp_bld_swizzle.h" #include "lp_bld_pack.h" +#include "lp_bld_flow.h" #include "lp_bld_format.h" #include "lp_bld_sample.h" @@ -65,6 +66,14 @@ struct lp_build_sample_context const struct util_format_description *format_desc; + /** regular scalar float type */ + struct lp_type float_type; + struct lp_build_context float_bld; + + /** regular scalar float type */ + struct lp_type int_type; + struct lp_build_context int_bld; + /** Incoming coordinates type and build context */ struct lp_type coord_type; struct lp_build_context coord_bld; @@ -108,9 +117,78 @@ wrap_mode_uses_border_color(unsigned mode) } +static LLVMValueRef +lp_build_get_mipmap_level(struct lp_build_sample_context *bld, + LLVMValueRef data_array, LLVMValueRef level) +{ + LLVMValueRef indexes[2], data_ptr; + indexes[0] = LLVMConstInt(LLVMInt32Type(), 0, 0); + indexes[1] = level; + data_ptr = LLVMBuildGEP(bld->builder, data_array, indexes, 2, ""); + data_ptr = LLVMBuildLoad(bld->builder, data_ptr, ""); + return data_ptr; +} + + +static LLVMValueRef +lp_build_get_const_mipmap_level(struct lp_build_sample_context *bld, + LLVMValueRef data_array, int level) +{ + LLVMValueRef lvl = LLVMConstInt(LLVMInt32Type(), level, 0); + return lp_build_get_mipmap_level(bld, data_array, lvl); +} + /** - * Gen code to fetch a texel from a texture at int coords (x, y). + * Dereference stride_array[mipmap_level] array to get a stride. + * Return stride as a vector. + */ +static LLVMValueRef +lp_build_get_level_stride_vec(struct lp_build_sample_context *bld, + LLVMValueRef stride_array, LLVMValueRef level) +{ + LLVMValueRef indexes[2], stride; + indexes[0] = LLVMConstInt(LLVMInt32Type(), 0, 0); + indexes[1] = level; + stride = LLVMBuildGEP(bld->builder, stride_array, indexes, 2, ""); + stride = LLVMBuildLoad(bld->builder, stride, ""); + stride = lp_build_broadcast_scalar(&bld->int_coord_bld, stride); + return stride; +} + + +/** Dereference stride_array[0] array to get a stride (as vector). */ +static LLVMValueRef +lp_build_get_const_level_stride_vec(struct lp_build_sample_context *bld, + LLVMValueRef stride_array, int level) +{ + LLVMValueRef lvl = LLVMConstInt(LLVMInt32Type(), level, 0); + return lp_build_get_level_stride_vec(bld, stride_array, lvl); +} + + +static int +texture_dims(enum pipe_texture_target tex) +{ + switch (tex) { + case PIPE_TEXTURE_1D: + return 1; + case PIPE_TEXTURE_2D: + case PIPE_TEXTURE_CUBE: + return 2; + case PIPE_TEXTURE_3D: + return 3; + default: + assert(0 && "bad texture target in texture_dims()"); + return 2; + } +} + + + +/** + * Generate code to fetch a texel from a texture at int coords (x, y, z). + * The computation depends on whether the texture is 1D, 2D or 3D. * The result, texel, will be: * texel[0] = red values * texel[1] = green values @@ -121,12 +199,16 @@ static void lp_build_sample_texel_soa(struct lp_build_sample_context *bld, LLVMValueRef width, LLVMValueRef height, + LLVMValueRef depth, LLVMValueRef x, LLVMValueRef y, + LLVMValueRef z, LLVMValueRef y_stride, + LLVMValueRef z_stride, LLVMValueRef data_ptr, LLVMValueRef *texel) { + const int dims = texture_dims(bld->static_state->target); struct lp_build_context *int_coord_bld = &bld->int_coord_bld; LLVMValueRef offset; LLVMValueRef packed; @@ -140,7 +222,7 @@ lp_build_sample_texel_soa(struct lp_build_sample_context *bld, use_border = LLVMBuildOr(bld->builder, b1, b2, "b1_or_b2"); } - if (wrap_mode_uses_border_color(bld->static_state->wrap_t)) { + if (dims >= 2 && wrap_mode_uses_border_color(bld->static_state->wrap_t)) { LLVMValueRef b1, b2; b1 = lp_build_cmp(int_coord_bld, PIPE_FUNC_LESS, y, int_coord_bld->zero); b2 = lp_build_cmp(int_coord_bld, PIPE_FUNC_GEQUAL, y, height); @@ -153,6 +235,19 @@ lp_build_sample_texel_soa(struct lp_build_sample_context *bld, } } + if (dims == 3 && wrap_mode_uses_border_color(bld->static_state->wrap_r)) { + LLVMValueRef b1, b2; + b1 = lp_build_cmp(int_coord_bld, PIPE_FUNC_LESS, z, int_coord_bld->zero); + b2 = lp_build_cmp(int_coord_bld, PIPE_FUNC_GEQUAL, z, depth); + if (use_border) { + use_border = LLVMBuildOr(bld->builder, use_border, b1, "ub_or_b1"); + use_border = LLVMBuildOr(bld->builder, use_border, b2, "ub_or_b2"); + } + else { + use_border = LLVMBuildOr(bld->builder, b1, b2, "b1_or_b2"); + } + } + /* * Note: if we find an app which frequently samples the texture border * we might want to implement a true conditional here to avoid sampling @@ -168,11 +263,10 @@ lp_build_sample_texel_soa(struct lp_build_sample_context *bld, * the texel color results with the border color. */ - /* convert x,y coords to linear offset from start of texture, in bytes */ + /* convert x,y,z coords to linear offset from start of texture, in bytes */ offset = lp_build_sample_offset(&bld->uint_coord_bld, bld->format_desc, - x, y, y_stride, - data_ptr); + x, y, z, y_stride, z_stride); assert(bld->format_desc->block.width == 1); assert(bld->format_desc->block.height == 1); @@ -185,6 +279,8 @@ lp_build_sample_texel_soa(struct lp_build_sample_context *bld, bld->texel_type.width, data_ptr, offset); + texel[0] = texel[1] = texel[2] = texel[3] = NULL; + /* convert texels to float rgba */ lp_build_unpack_rgba_soa(bld->builder, bld->format_desc, @@ -210,19 +306,22 @@ lp_build_sample_packed(struct lp_build_sample_context *bld, LLVMValueRef x, LLVMValueRef y, LLVMValueRef y_stride, - LLVMValueRef data_ptr) + LLVMValueRef data_array) { LLVMValueRef offset; + LLVMValueRef data_ptr; offset = lp_build_sample_offset(&bld->uint_coord_bld, bld->format_desc, - x, y, y_stride, - data_ptr); + x, y, NULL, y_stride, NULL); assert(bld->format_desc->block.width == 1); assert(bld->format_desc->block.height == 1); assert(bld->format_desc->block.bits <= bld->texel_type.width); + /* get pointer to mipmap level 0 data */ + data_ptr = lp_build_get_const_mipmap_level(bld, data_array, 0); + return lp_build_gather(bld->builder, bld->texel_type.length, bld->format_desc->block.bits, @@ -711,76 +810,663 @@ lp_build_sample_wrap_nearest(struct lp_build_sample_context *bld, /** - * Sample 2D texture with nearest filtering. + * Codegen equivalent for u_minify(). + * Return max(1, base_size >> level); + */ +static LLVMValueRef +lp_build_minify(struct lp_build_sample_context *bld, + LLVMValueRef base_size, + LLVMValueRef level) +{ + LLVMValueRef size = LLVMBuildAShr(bld->builder, base_size, level, "minify"); + size = lp_build_max(&bld->int_coord_bld, size, bld->int_coord_bld.one); + return size; +} + + +/** + * Generate code to compute texture level of detail (lambda). + * \param s vector of texcoord s values + * \param t vector of texcoord t values + * \param r vector of texcoord r values + * \param width scalar int texture width + * \param height scalar int texture height + * \param depth scalar int texture depth + */ +static LLVMValueRef +lp_build_lod_selector(struct lp_build_sample_context *bld, + LLVMValueRef s, + LLVMValueRef t, + LLVMValueRef r, + LLVMValueRef width, + LLVMValueRef height, + LLVMValueRef depth) + +{ + const int dims = texture_dims(bld->static_state->target); + struct lp_build_context *float_bld = &bld->float_bld; + LLVMValueRef lod_bias = LLVMConstReal(LLVMFloatType(), bld->static_state->lod_bias); + LLVMValueRef min_lod = LLVMConstReal(LLVMFloatType(), bld->static_state->min_lod); + LLVMValueRef max_lod = LLVMConstReal(LLVMFloatType(), bld->static_state->max_lod); + + LLVMValueRef index0 = LLVMConstInt(LLVMInt32Type(), 0, 0); + LLVMValueRef index1 = LLVMConstInt(LLVMInt32Type(), 1, 0); + LLVMValueRef index2 = LLVMConstInt(LLVMInt32Type(), 2, 0); + + LLVMValueRef s0, s1, s2; + LLVMValueRef t0, t1, t2; + LLVMValueRef r0, r1, r2; + LLVMValueRef dsdx, dsdy, dtdx, dtdy, drdx, drdy; + LLVMValueRef rho, lod; + + /* + * dsdx = abs(s[1] - s[0]); + * dsdy = abs(s[2] - s[0]); + * dtdx = abs(t[1] - t[0]); + * dtdy = abs(t[2] - t[0]); + * drdx = abs(r[1] - r[0]); + * drdy = abs(r[2] - r[0]); + * XXX we're assuming a four-element quad in 2x2 layout here. + */ + s0 = LLVMBuildExtractElement(bld->builder, s, index0, "s0"); + s1 = LLVMBuildExtractElement(bld->builder, s, index1, "s1"); + s2 = LLVMBuildExtractElement(bld->builder, s, index2, "s2"); + dsdx = LLVMBuildSub(bld->builder, s1, s0, ""); + dsdx = lp_build_abs(float_bld, dsdx); + dsdy = LLVMBuildSub(bld->builder, s2, s0, ""); + dsdy = lp_build_abs(float_bld, dsdy); + if (dims > 1) { + t0 = LLVMBuildExtractElement(bld->builder, t, index0, "t0"); + t1 = LLVMBuildExtractElement(bld->builder, t, index1, "t1"); + t2 = LLVMBuildExtractElement(bld->builder, t, index2, "t2"); + dtdx = LLVMBuildSub(bld->builder, t1, t0, ""); + dtdx = lp_build_abs(float_bld, dtdx); + dtdy = LLVMBuildSub(bld->builder, t2, t0, ""); + dtdy = lp_build_abs(float_bld, dtdy); + if (dims > 2) { + r0 = LLVMBuildExtractElement(bld->builder, r, index0, "r0"); + r1 = LLVMBuildExtractElement(bld->builder, r, index1, "r1"); + r2 = LLVMBuildExtractElement(bld->builder, r, index2, "r2"); + drdx = LLVMBuildSub(bld->builder, r1, r0, ""); + drdx = lp_build_abs(float_bld, drdx); + drdy = LLVMBuildSub(bld->builder, r2, r0, ""); + drdy = lp_build_abs(float_bld, drdy); + } + } + + /* Compute rho = max of all partial derivatives scaled by texture size. + * XXX this could be vectorized somewhat + */ + rho = LLVMBuildMul(bld->builder, + lp_build_max(float_bld, dsdx, dsdy), + lp_build_int_to_float(float_bld, width), ""); + if (dims > 1) { + LLVMValueRef max; + max = LLVMBuildMul(bld->builder, + lp_build_max(float_bld, dtdx, dtdy), + lp_build_int_to_float(float_bld, height), ""); + rho = lp_build_max(float_bld, rho, max); + if (dims > 2) { + max = LLVMBuildMul(bld->builder, + lp_build_max(float_bld, drdx, drdy), + lp_build_int_to_float(float_bld, depth), ""); + rho = lp_build_max(float_bld, rho, max); + } + } + + /* compute lod = log2(rho) */ + lod = lp_build_log2(float_bld, rho); + + /* add lod bias */ + lod = LLVMBuildAdd(bld->builder, lod, lod_bias, "LOD bias"); + + /* clamp lod */ + lod = lp_build_clamp(float_bld, lod, min_lod, max_lod); + + return lod; +} + + +/** + * For PIPE_TEX_MIPFILTER_NEAREST, convert float LOD to integer + * mipmap level index. + * Note: this is all scalar code. + * \param lod scalar float texture level of detail + * \param level_out returns integer */ static void -lp_build_sample_2d_nearest_soa(struct lp_build_sample_context *bld, - LLVMValueRef s, - LLVMValueRef t, - LLVMValueRef width, - LLVMValueRef height, - LLVMValueRef stride, - LLVMValueRef data_ptr, - LLVMValueRef *texel) +lp_build_nearest_mip_level(struct lp_build_sample_context *bld, + unsigned unit, + LLVMValueRef lod, + LLVMValueRef *level_out) { - LLVMValueRef x, y; + struct lp_build_context *float_bld = &bld->float_bld; + struct lp_build_context *int_bld = &bld->int_bld; + LLVMValueRef last_level, level; - x = lp_build_sample_wrap_nearest(bld, s, width, - bld->static_state->pot_width, - bld->static_state->wrap_s); - y = lp_build_sample_wrap_nearest(bld, t, height, - bld->static_state->pot_height, - bld->static_state->wrap_t); + LLVMValueRef zero = LLVMConstInt(LLVMInt32Type(), 0, 0); - lp_build_name(x, "tex.x.wrapped"); - lp_build_name(y, "tex.y.wrapped"); + last_level = bld->dynamic_state->last_level(bld->dynamic_state, + bld->builder, unit); - lp_build_sample_texel_soa(bld, width, height, x, y, stride, data_ptr, texel); + /* convert float lod to integer */ + level = lp_build_iround(float_bld, lod); + + /* clamp level to legal range of levels */ + *level_out = lp_build_clamp(int_bld, level, zero, last_level); } /** - * Sample 2D texture with bilinear filtering. + * For PIPE_TEX_MIPFILTER_LINEAR, convert float LOD to integer to + * two (adjacent) mipmap level indexes. Later, we'll sample from those + * two mipmap levels and interpolate between them. + */ +static void +lp_build_linear_mip_levels(struct lp_build_sample_context *bld, + unsigned unit, + LLVMValueRef lod, + LLVMValueRef *level0_out, + LLVMValueRef *level1_out, + LLVMValueRef *weight_out) +{ + struct lp_build_context *float_bld = &bld->float_bld; + struct lp_build_context *int_bld = &bld->int_bld; + LLVMValueRef last_level, level; + + last_level = bld->dynamic_state->last_level(bld->dynamic_state, + bld->builder, unit); + + /* convert float lod to integer */ + level = lp_build_ifloor(float_bld, lod); + + /* compute level 0 and clamp to legal range of levels */ + *level0_out = lp_build_clamp(int_bld, level, + int_bld->zero, + last_level); + /* compute level 1 and clamp to legal range of levels */ + *level1_out = lp_build_add(int_bld, *level0_out, int_bld->one); + *level1_out = lp_build_min(int_bld, *level1_out, int_bld->zero); + + *weight_out = lp_build_fract(float_bld, lod); +} + + +/** + * Generate code to sample a mipmap level with nearest filtering. */ static void -lp_build_sample_2d_linear_soa(struct lp_build_sample_context *bld, +lp_build_sample_image_nearest(struct lp_build_sample_context *bld, + LLVMValueRef width_vec, + LLVMValueRef height_vec, + LLVMValueRef depth_vec, + LLVMValueRef row_stride_vec, + LLVMValueRef img_stride_vec, + LLVMValueRef data_ptr, LLVMValueRef s, LLVMValueRef t, - LLVMValueRef width, - LLVMValueRef height, - LLVMValueRef stride, - LLVMValueRef data_ptr, - LLVMValueRef *texel) + LLVMValueRef r, + LLVMValueRef colors_out[4]) { - LLVMValueRef s_fpart; - LLVMValueRef t_fpart; - LLVMValueRef x0, x1; - LLVMValueRef y0, y1; + const int dims = texture_dims(bld->static_state->target); + LLVMValueRef x, y, z; + + /* + * Compute integer texcoords. + */ + x = lp_build_sample_wrap_nearest(bld, s, width_vec, + bld->static_state->pot_width, + bld->static_state->wrap_s); + lp_build_name(x, "tex.x.wrapped"); + + if (dims >= 2) { + y = lp_build_sample_wrap_nearest(bld, t, height_vec, + bld->static_state->pot_height, + bld->static_state->wrap_t); + lp_build_name(y, "tex.y.wrapped"); + + if (dims == 3) { + z = lp_build_sample_wrap_nearest(bld, r, depth_vec, + bld->static_state->pot_height, + bld->static_state->wrap_r); + lp_build_name(z, "tex.z.wrapped"); + } + else { + z = NULL; + } + } + else { + y = z = NULL; + } + + /* + * Get texture colors. + */ + lp_build_sample_texel_soa(bld, width_vec, height_vec, depth_vec, + x, y, z, + row_stride_vec, img_stride_vec, + data_ptr, colors_out); +} + + +/** + * Generate code to sample a mipmap level with linear filtering. + * 1D, 2D and 3D images are suppored. + */ +static void +lp_build_sample_image_linear(struct lp_build_sample_context *bld, + LLVMValueRef width_vec, + LLVMValueRef height_vec, + LLVMValueRef depth_vec, + LLVMValueRef row_stride_vec, + LLVMValueRef img_stride_vec, + LLVMValueRef data_ptr, + LLVMValueRef s, + LLVMValueRef t, + LLVMValueRef r, + LLVMValueRef colors_out[4]) +{ + const int dims = texture_dims(bld->static_state->target); + LLVMValueRef x0, y0, z0, x1, y1, z1; + LLVMValueRef s_fpart, t_fpart, r_fpart; LLVMValueRef neighbors[2][2][4]; - unsigned chan; + int chan; + + /* + * Compute integer texcoords. + */ + lp_build_sample_wrap_linear(bld, s, width_vec, + bld->static_state->pot_width, + bld->static_state->wrap_s, + &x0, &x1, &s_fpart); + lp_build_name(x0, "tex.x0.wrapped"); + lp_build_name(x1, "tex.x1.wrapped"); + + if (dims >= 2) { + lp_build_sample_wrap_linear(bld, t, height_vec, + bld->static_state->pot_height, + bld->static_state->wrap_t, + &y0, &y1, &t_fpart); + lp_build_name(y0, "tex.y0.wrapped"); + lp_build_name(y1, "tex.y1.wrapped"); + + if (dims == 3) { + lp_build_sample_wrap_linear(bld, r, depth_vec, + bld->static_state->pot_depth, + bld->static_state->wrap_r, + &z0, &z1, &r_fpart); + lp_build_name(z0, "tex.z0.wrapped"); + lp_build_name(z1, "tex.z1.wrapped"); + } + else { + z0 = z1 = r_fpart = NULL; + } + } + else { + y0 = y1 = t_fpart = NULL; + z0 = z1 = r_fpart = NULL; + } - lp_build_sample_wrap_linear(bld, s, width, bld->static_state->pot_width, - bld->static_state->wrap_s, &x0, &x1, &s_fpart); - lp_build_sample_wrap_linear(bld, t, height, bld->static_state->pot_height, - bld->static_state->wrap_t, &y0, &y1, &t_fpart); + /* + * Get texture colors. + */ + /* get x0/x1 texels */ + lp_build_sample_texel_soa(bld, width_vec, height_vec, depth_vec, + x0, y0, z0, + row_stride_vec, img_stride_vec, + data_ptr, neighbors[0][0]); + lp_build_sample_texel_soa(bld, width_vec, height_vec, depth_vec, + x1, y0, z0, + row_stride_vec, img_stride_vec, + data_ptr, neighbors[0][1]); + + if (dims == 1) { + /* Interpolate two samples from 1D image to produce one color */ + for (chan = 0; chan < 4; chan++) { + colors_out[chan] = lp_build_lerp(&bld->texel_bld, s_fpart, + neighbors[0][0][chan], + neighbors[0][1][chan]); + } + } + else { + /* 2D/3D texture */ + LLVMValueRef colors0[4]; + + /* get x0/x1 texels at y1 */ + lp_build_sample_texel_soa(bld, width_vec, height_vec, depth_vec, + x0, y1, z0, + row_stride_vec, img_stride_vec, + data_ptr, neighbors[1][0]); + lp_build_sample_texel_soa(bld, width_vec, height_vec, depth_vec, + x1, y1, z0, + row_stride_vec, img_stride_vec, + data_ptr, neighbors[1][1]); + + /* Bilinear interpolate the four samples from the 2D image / 3D slice */ + for (chan = 0; chan < 4; chan++) { + colors0[chan] = lp_build_lerp_2d(&bld->texel_bld, + s_fpart, t_fpart, + neighbors[0][0][chan], + neighbors[0][1][chan], + neighbors[1][0][chan], + neighbors[1][1][chan]); + } - lp_build_sample_texel_soa(bld, width, height, x0, y0, stride, data_ptr, neighbors[0][0]); - lp_build_sample_texel_soa(bld, width, height, x1, y0, stride, data_ptr, neighbors[0][1]); - lp_build_sample_texel_soa(bld, width, height, x0, y1, stride, data_ptr, neighbors[1][0]); - lp_build_sample_texel_soa(bld, width, height, x1, y1, stride, data_ptr, neighbors[1][1]); + if (dims == 3) { + LLVMValueRef neighbors1[2][2][4]; + LLVMValueRef colors1[4]; + + /* get x0/x1/y0/y1 texels at z1 */ + lp_build_sample_texel_soa(bld, width_vec, height_vec, depth_vec, + x0, y0, z1, + row_stride_vec, img_stride_vec, + data_ptr, neighbors1[0][0]); + lp_build_sample_texel_soa(bld, width_vec, height_vec, depth_vec, + x1, y0, z1, + row_stride_vec, img_stride_vec, + data_ptr, neighbors1[0][1]); + lp_build_sample_texel_soa(bld, width_vec, height_vec, depth_vec, + x0, y1, z1, + row_stride_vec, img_stride_vec, + data_ptr, neighbors1[1][0]); + lp_build_sample_texel_soa(bld, width_vec, height_vec, depth_vec, + x1, y1, z1, + row_stride_vec, img_stride_vec, + data_ptr, neighbors1[1][1]); + + /* Bilinear interpolate the four samples from the second Z slice */ + for (chan = 0; chan < 4; chan++) { + colors1[chan] = lp_build_lerp_2d(&bld->texel_bld, + s_fpart, t_fpart, + neighbors1[0][0][chan], + neighbors1[0][1][chan], + neighbors1[1][0][chan], + neighbors1[1][1][chan]); + } - /* TODO: Don't interpolate missing channels */ - for(chan = 0; chan < 4; ++chan) { - texel[chan] = lp_build_lerp_2d(&bld->texel_bld, - s_fpart, t_fpart, - neighbors[0][0][chan], - neighbors[0][1][chan], - neighbors[1][0][chan], - neighbors[1][1][chan]); + /* Linearly interpolate the two samples from the two 3D slices */ + for (chan = 0; chan < 4; chan++) { + colors_out[chan] = lp_build_lerp(&bld->texel_bld, + r_fpart, + colors0[chan], colors1[chan]); + } + } + else { + /* 2D tex */ + for (chan = 0; chan < 4; chan++) { + colors_out[chan] = colors0[chan]; + } + } } } +/** + * Generate code to do cube face selection and per-face texcoords. + */ +static void +lp_build_cube_lookup(struct lp_build_sample_context *bld, + LLVMValueRef s, + LLVMValueRef t, + LLVMValueRef r, + LLVMValueRef *face, + LLVMValueRef *face_s, + LLVMValueRef *face_t) +{ +#if 0 + struct lp_build_context *float_bld = &bld->float_bld; + LLVMValueRef rx, ry, rz; + LLVMValueRef arx, ary, arz; + LLVMValueRef sc, tc, ma; + LLVMValueRef c25 = LLVMConstReal(LLVMFloatType(), 0.25); + LLVMValueRef arx_ge_ary, arx_ge_arz; + LLVMValueRef ary_ge_arx, ary_ge_arz; + LLVMValueRef arx_ge_ary_arz, ary_ge_arx_arz; + LLVMValueRef rx_pos, ry_pos, rz_pos; + + assert(bld->coord_bld.type.length == 4); + + /* + * Use the average of the four pixel's texcoords to choose the face. + */ + rx = lp_build_mul(float_bld, c25, + lp_build_sum_vector(&bld->coord_bld, s)); + ry = lp_build_mul(float_bld, c25, + lp_build_sum_vector(&bld->coord_bld, t)); + rz = lp_build_mul(float_bld, c25, + lp_build_sum_vector(&bld->coord_bld, r)); + + arx = lp_build_abs(float_bld, rx); + ary = lp_build_abs(float_bld, ry); + arz = lp_build_abs(float_bld, rz); + + /* + * Compare sign/magnitude of rx,ry,rz to determine face + */ + arx_ge_ary = LLVMBuildFCmp(bld->builder, LLVMRealUGE, arx, ary, ""); + arx_ge_arz = LLVMBuildFCmp(bld->builder, LLVMRealUGE, arx, arz, ""); + ary_ge_arx = LLVMBuildFCmp(bld->builder, LLVMRealUGE, ary, arx, ""); + ary_ge_arz = LLVMBuildFCmp(bld->builder, LLVMRealUGE, ary, arz, ""); + + arx_ge_ary_arz = LLVMBuildAnd(bld->builder, arx_ge_ary, arx_ge_arz, ""); + ary_ge_arx_arz = LLVMBuildAnd(bld->builder, ary_ge_arx, ary_ge_arz, ""); + + rx_pos = LLVMBuildFCmp(bld->builder, LLVMRealUGE, rx, float_bld->zero, ""); + ry_pos = LLVMBuildFCmp(bld->builder, LLVMRealUGE, ry, float_bld->zero, ""); + rz_pos = LLVMBuildFCmp(bld->builder, LLVMRealUGE, rz, float_bld->zero, ""); + + { + struct lp_build_flow_context *flow_ctx; + struct lp_build_if_state if_ctx, if2_ctx; + + flow_ctx = lp_build_flow_create(bld->builder); + + lp_build_if(&if_ctx, flow_ctx, bld->builder, arx_ge_ary_arz); + { +#if 0 + lp_build_if(&if2_ctx, flow_ctx, bld->builder, rx_pos); + { + /* Positive X face */ + } + lp_build_else(&if2_ctx); + { + /* Negative X face */ + } + lp_build_endif(&if2_ctx); +#endif + } + lp_build_else(&if_ctx); + { + + + } + lp_build_endif(&if_ctx); + + lp_build_flow_destroy(flow_ctx); + } +#endif +} + + + +/** + * General texture sampling codegen. + * This function handles texture sampling for all texture targets (1D, + * 2D, 3D, cube) and all filtering modes. + */ +static void +lp_build_sample_general(struct lp_build_sample_context *bld, + unsigned unit, + LLVMValueRef s, + LLVMValueRef t, + LLVMValueRef r, + LLVMValueRef width, + LLVMValueRef height, + LLVMValueRef depth, + LLVMValueRef width_vec, + LLVMValueRef height_vec, + LLVMValueRef depth_vec, + LLVMValueRef row_stride_array, + LLVMValueRef img_stride_vec, + LLVMValueRef data_array, + LLVMValueRef *colors_out) +{ + const unsigned mip_filter = bld->static_state->min_mip_filter; + const unsigned min_filter = bld->static_state->min_img_filter; + const unsigned mag_filter = bld->static_state->mag_img_filter; + const int dims = texture_dims(bld->static_state->target); + LLVMValueRef lod, lod_fpart; + LLVMValueRef ilevel0, ilevel1, ilevel0_vec, ilevel1_vec; + LLVMValueRef width0_vec = NULL, height0_vec = NULL, depth0_vec = NULL; + LLVMValueRef width1_vec = NULL, height1_vec = NULL, depth1_vec = NULL; + LLVMValueRef row_stride0_vec = NULL, row_stride1_vec = NULL; + LLVMValueRef img_stride0_vec = NULL, img_stride1_vec = NULL; + LLVMValueRef data_ptr0, data_ptr1; + int chan; + + /* + printf("%s mip %d min %d mag %d\n", __FUNCTION__, + mip_filter, min_filter, mag_filter); + */ + + /* + * Compute the level of detail (mipmap level index(es)). + */ + if (mip_filter == PIPE_TEX_MIPFILTER_NONE) { + /* always use mip level 0 */ + ilevel0 = LLVMConstInt(LLVMInt32Type(), 0, 0); + } + else { + /* compute float LOD */ + lod = lp_build_lod_selector(bld, s, t, r, width, height, depth); + + if (mip_filter == PIPE_TEX_MIPFILTER_NEAREST) { + lp_build_nearest_mip_level(bld, unit, lod, &ilevel0); + } + else { + assert(mip_filter == PIPE_TEX_MIPFILTER_LINEAR); + lp_build_linear_mip_levels(bld, unit, lod, &ilevel0, &ilevel1, + &lod_fpart); + lod_fpart = lp_build_broadcast_scalar(&bld->coord_bld, lod_fpart); + } + } + + /* + * Convert scalar integer mipmap levels into vectors. + */ + ilevel0_vec = lp_build_broadcast_scalar(&bld->int_coord_bld, ilevel0); + if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) + ilevel1_vec = lp_build_broadcast_scalar(&bld->int_coord_bld, ilevel1); + + /* + * Compute width, height at mipmap level 'ilevel0' + */ + width0_vec = lp_build_minify(bld, width_vec, ilevel0_vec); + if (dims >= 2) { + height0_vec = lp_build_minify(bld, height_vec, ilevel0_vec); + row_stride0_vec = lp_build_get_level_stride_vec(bld, row_stride_array, + ilevel0); + if (dims == 3) { + depth0_vec = lp_build_minify(bld, depth_vec, ilevel0_vec); + img_stride0_vec = lp_build_mul(&bld->int_coord_bld, + row_stride0_vec, height0_vec); + } + } + if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) { + /* compute width, height, depth for second mipmap level at ilevel1 */ + width1_vec = lp_build_minify(bld, width_vec, ilevel1_vec); + if (dims >= 2) { + height1_vec = lp_build_minify(bld, height_vec, ilevel1_vec); + row_stride1_vec = lp_build_get_level_stride_vec(bld, row_stride_array, + ilevel1); + if (dims == 3) { + depth1_vec = lp_build_minify(bld, depth_vec, ilevel1_vec); + img_stride1_vec = lp_build_mul(&bld->int_coord_bld, + row_stride1_vec, height1_vec); + } + } + } + + /* + * Choose cube face, recompute texcoords. + */ + if (bld->static_state->target == PIPE_TEXTURE_CUBE) { + LLVMValueRef face, face_s, face_t; + lp_build_cube_lookup(bld, s, t, r, &face, &face_s, &face_t); + } + + /* + * Get pointer(s) to image data for mipmap level(s). + */ + data_ptr0 = lp_build_get_mipmap_level(bld, data_array, ilevel0); + if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) { + data_ptr1 = lp_build_get_mipmap_level(bld, data_array, ilevel1); + } + + /* + * Get/interpolate texture colors. + */ + /* XXX temporarily force this path: */ + if (1 /*min_filter == mag_filter*/) { + /* same filter for minification or magnification */ + LLVMValueRef colors0[4], colors1[4]; + + if (min_filter == PIPE_TEX_FILTER_NEAREST) { + lp_build_sample_image_nearest(bld, + width0_vec, height0_vec, depth0_vec, + row_stride0_vec, img_stride0_vec, + data_ptr0, s, t, r, colors0); + + if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) { + /* sample the second mipmap level, and interp */ + lp_build_sample_image_nearest(bld, + width1_vec, height1_vec, depth1_vec, + row_stride1_vec, img_stride1_vec, + data_ptr1, s, t, r, colors1); + } + } + else { + assert(min_filter == PIPE_TEX_FILTER_LINEAR); + + lp_build_sample_image_linear(bld, + width0_vec, height0_vec, depth0_vec, + row_stride0_vec, img_stride0_vec, + data_ptr0, s, t, r, colors0); + + + if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) { + /* sample the second mipmap level, and interp */ + lp_build_sample_image_linear(bld, + width1_vec, height1_vec, depth1_vec, + row_stride1_vec, img_stride1_vec, + data_ptr1, s, t, r, colors1); + } + } + + if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) { + /* interpolate samples from the two mipmap levels */ + for (chan = 0; chan < 4; chan++) { + colors_out[chan] = lp_build_lerp(&bld->texel_bld, lod_fpart, + colors0[chan], colors1[chan]); + } + } + else { + /* use first/only level's colors */ + for (chan = 0; chan < 4; chan++) { + colors_out[chan] = colors0[chan]; + } + } + } + else { + /* emit conditional to choose min image filter or mag image filter + * depending on the lod being >0 or <= 0, respectively. + */ + abort(); + } +} + + + static void lp_build_rgba8_to_f32_soa(LLVMBuilderRef builder, struct lp_type dst_type, @@ -817,8 +1503,8 @@ lp_build_sample_2d_linear_aos(struct lp_build_sample_context *bld, LLVMValueRef t, LLVMValueRef width, LLVMValueRef height, - LLVMValueRef stride, - LLVMValueRef data_ptr, + LLVMValueRef stride_array, + LLVMValueRef data_array, LLVMValueRef *texel) { LLVMBuilderRef builder = bld->builder; @@ -834,8 +1520,9 @@ lp_build_sample_2d_linear_aos(struct lp_build_sample_context *bld, LLVMValueRef neighbors_hi[2][2]; LLVMValueRef packed, packed_lo, packed_hi; LLVMValueRef unswizzled[4]; + LLVMValueRef stride; - lp_build_context_init(&i32, builder, lp_type_int(32)); + lp_build_context_init(&i32, builder, lp_type_int_vec(32)); lp_build_context_init(&h16, builder, lp_type_ufixed(16)); lp_build_context_init(&u8n, builder, lp_type_unorm(8)); @@ -941,6 +1628,8 @@ lp_build_sample_2d_linear_aos(struct lp_build_sample_context *bld, t_fpart_hi = LLVMBuildShuffleVector(builder, t_fpart, h16.undef, shuffle_hi, ""); } + stride = lp_build_get_const_level_stride_vec(bld, stride_array, 0); + /* * Fetch the pixels as 4 x 32bit (rgba order might differ): * @@ -958,10 +1647,10 @@ lp_build_sample_2d_linear_aos(struct lp_build_sample_context *bld, * The higher 8 bits of the resulting elements will be zero. */ - neighbors[0][0] = lp_build_sample_packed(bld, x0, y0, stride, data_ptr); - neighbors[0][1] = lp_build_sample_packed(bld, x1, y0, stride, data_ptr); - neighbors[1][0] = lp_build_sample_packed(bld, x0, y1, stride, data_ptr); - neighbors[1][1] = lp_build_sample_packed(bld, x1, y1, stride, data_ptr); + neighbors[0][0] = lp_build_sample_packed(bld, x0, y0, stride, data_array); + neighbors[0][1] = lp_build_sample_packed(bld, x1, y0, stride, data_array); + neighbors[1][0] = lp_build_sample_packed(bld, x0, y1, stride, data_array); + neighbors[1][1] = lp_build_sample_packed(bld, x1, y1, stride, data_array); neighbors[0][0] = LLVMBuildBitCast(builder, neighbors[0][0], u8n_vec_type, ""); neighbors[0][1] = LLVMBuildBitCast(builder, neighbors[0][1], u8n_vec_type, ""); @@ -1044,194 +1733,11 @@ lp_build_sample_compare(struct lp_build_sample_context *bld, } -static int -texture_dims(enum pipe_texture_target tex) -{ - switch (tex) { - case PIPE_TEXTURE_1D: - return 1; - case PIPE_TEXTURE_2D: - case PIPE_TEXTURE_CUBE: - return 2; - case PIPE_TEXTURE_3D: - return 3; - default: - assert(0 && "bad texture target in texture_dims()"); - return 2; - } -} - - -/** - * Generate code to compute texture level of detail (lambda). - * \param s vector of texcoord s values - * \param t vector of texcoord t values - * \param r vector of texcoord r values - * \param width scalar int texture width - * \param height scalar int texture height - * \param depth scalar int texture depth - */ -static LLVMValueRef -lp_build_lod_selector(struct lp_build_sample_context *bld, - LLVMValueRef s, - LLVMValueRef t, - LLVMValueRef r, - LLVMValueRef width, - LLVMValueRef height, - LLVMValueRef depth) - -{ - const int dims = texture_dims(bld->static_state->target); - struct lp_build_context *coord_bld = &bld->coord_bld; - - LLVMValueRef lod_bias = lp_build_const_scalar(bld->coord_bld.type, - bld->static_state->lod_bias); - LLVMValueRef min_lod = lp_build_const_scalar(bld->coord_bld.type, - bld->static_state->min_lod); - LLVMValueRef max_lod = lp_build_const_scalar(bld->coord_bld.type, - bld->static_state->max_lod); - - LLVMValueRef index0 = LLVMConstInt(LLVMInt32Type(), 0, 0); - LLVMValueRef index1 = LLVMConstInt(LLVMInt32Type(), 1, 0); - LLVMValueRef index2 = LLVMConstInt(LLVMInt32Type(), 2, 0); - - LLVMValueRef s0, s1, s2; - LLVMValueRef t0, t1, t2; - LLVMValueRef r0, r1, r2; - LLVMValueRef dsdx, dsdy, dtdx, dtdy, drdx, drdy; - LLVMValueRef rho, lod; - - /* - * dsdx = abs(s[1] - s[0]); - * dsdy = abs(s[2] - s[0]); - * dtdx = abs(t[1] - t[0]); - * dtdy = abs(t[2] - t[0]); - * drdx = abs(r[1] - r[0]); - * drdy = abs(r[2] - r[0]); - * XXX we're assuming a four-element quad in 2x2 layout here. - */ - s0 = LLVMBuildExtractElement(bld->builder, s, index0, "s0"); - s1 = LLVMBuildExtractElement(bld->builder, s, index1, "s1"); - s2 = LLVMBuildExtractElement(bld->builder, s, index2, "s2"); - dsdx = lp_build_abs(coord_bld, lp_build_sub(coord_bld, s1, s0)); - dsdy = lp_build_abs(coord_bld, lp_build_sub(coord_bld, s2, s0)); - if (dims > 1) { - t0 = LLVMBuildExtractElement(bld->builder, t, index0, "t0"); - t1 = LLVMBuildExtractElement(bld->builder, t, index1, "t1"); - t2 = LLVMBuildExtractElement(bld->builder, t, index2, "t2"); - dtdx = lp_build_abs(coord_bld, lp_build_sub(coord_bld, t1, t0)); - dtdy = lp_build_abs(coord_bld, lp_build_sub(coord_bld, t2, t0)); - if (dims > 2) { - r0 = LLVMBuildExtractElement(bld->builder, r, index0, "r0"); - r1 = LLVMBuildExtractElement(bld->builder, r, index1, "r1"); - r2 = LLVMBuildExtractElement(bld->builder, r, index2, "r2"); - drdx = lp_build_abs(coord_bld, lp_build_sub(coord_bld, r1, r0)); - drdy = lp_build_abs(coord_bld, lp_build_sub(coord_bld, r2, r0)); - } - } - - /* Compute rho = max of all partial derivatives scaled by texture size. - * XXX this can be vectorized somewhat - */ - rho = lp_build_mul(coord_bld, - lp_build_max(coord_bld, dsdx, dsdy), - lp_build_int_to_float(coord_bld, width)); - if (dims > 1) { - LLVMValueRef max; - max = lp_build_mul(coord_bld, - lp_build_max(coord_bld, dtdx, dtdy), - lp_build_int_to_float(coord_bld, height)); - rho = lp_build_max(coord_bld, rho, max); - if (dims > 2) { - max = lp_build_mul(coord_bld, - lp_build_max(coord_bld, drdx, drdy), - lp_build_int_to_float(coord_bld, depth)); - rho = lp_build_max(coord_bld, rho, max); - } - } - - /* compute lod = log2(rho) */ - lod = lp_build_log2(coord_bld, rho); - - /* add lod bias */ - lod = lp_build_add(coord_bld, lod, lod_bias); - - /* clamp lod */ - lod = lp_build_clamp(coord_bld, lod, min_lod, max_lod); - - return lod; -} - - -/** - * For PIPE_TEX_MIPFILTER_NEAREST, convert float LOD to integer - * mipmap level index. - * \param lod scalar float texture level of detail - * \param level_out returns integer - */ -static void -lp_build_nearest_mip_level(struct lp_build_sample_context *bld, - unsigned unit, - LLVMValueRef lod, - LLVMValueRef *level_out) -{ - struct lp_build_context *coord_bld = &bld->coord_bld; - struct lp_build_context *int_coord_bld = &bld->int_coord_bld; - LLVMValueRef last_level, level; - - last_level = bld->dynamic_state->last_level(bld->dynamic_state, - bld->builder, unit); - - /* convert float lod to integer */ - level = lp_build_iround(coord_bld, lod); - - /* clamp level to legal range of levels */ - *level_out = lp_build_clamp(int_coord_bld, level, - int_coord_bld->zero, - last_level); -} - - -/** - * For PIPE_TEX_MIPFILTER_LINEAR, convert float LOD to integer to - * two (adjacent) mipmap level indexes. Later, we'll sample from those - * two mipmap levels and interpolate between them. - */ -static void -lp_build_linear_mip_levels(struct lp_build_sample_context *bld, - unsigned unit, - LLVMValueRef lod, - LLVMValueRef *level0_out, - LLVMValueRef *level1_out, - LLVMValueRef *weight_out) -{ - struct lp_build_context *coord_bld = &bld->coord_bld; - struct lp_build_context *int_coord_bld = &bld->int_coord_bld; - LLVMValueRef last_level, level; - - last_level = bld->dynamic_state->last_level(bld->dynamic_state, - bld->builder, unit); - - /* convert float lod to integer */ - level = lp_build_ifloor(coord_bld, lod); - - /* compute level 0 and clamp to legal range of levels */ - *level0_out = lp_build_clamp(int_coord_bld, level, - int_coord_bld->zero, - last_level); - /* compute level 1 and clamp to legal range of levels */ - *level1_out = lp_build_add(int_coord_bld, *level0_out, int_coord_bld->one); - *level1_out = lp_build_min(int_coord_bld, *level1_out, int_coord_bld->zero); - - *weight_out = lp_build_fract(coord_bld, lod); -} - - - /** * Build texture sampling code. * 'texel' will return a vector of four LLVMValueRefs corresponding to * R, G, B, A. + * \param type vector float type to use for coords, etc. */ void lp_build_sample_soa(LLVMBuilderRef builder, @@ -1245,10 +1751,11 @@ lp_build_sample_soa(LLVMBuilderRef builder, LLVMValueRef *texel) { struct lp_build_sample_context bld; - LLVMValueRef width; - LLVMValueRef height; - LLVMValueRef stride; - LLVMValueRef data_ptr; + LLVMValueRef width, width_vec; + LLVMValueRef height, height_vec; + LLVMValueRef depth, depth_vec; + LLVMValueRef stride_array; + LLVMValueRef data_array; LLVMValueRef s; LLVMValueRef t; LLVMValueRef r; @@ -1256,6 +1763,7 @@ lp_build_sample_soa(LLVMBuilderRef builder, (void) lp_build_lod_selector; /* temporary to silence warning */ (void) lp_build_nearest_mip_level; (void) lp_build_linear_mip_levels; + (void) lp_build_minify; /* Setup our build context */ memset(&bld, 0, sizeof bld); @@ -1263,10 +1771,16 @@ lp_build_sample_soa(LLVMBuilderRef builder, bld.static_state = static_state; bld.dynamic_state = dynamic_state; bld.format_desc = util_format_description(static_state->format); + + bld.float_type = lp_type_float(32); + bld.int_type = lp_type_int(32); bld.coord_type = type; bld.uint_coord_type = lp_uint_type(type); bld.int_coord_type = lp_int_type(type); bld.texel_type = type; + + lp_build_context_init(&bld.float_bld, builder, bld.float_type); + lp_build_context_init(&bld.int_bld, builder, bld.int_type); lp_build_context_init(&bld.coord_bld, builder, bld.coord_type); lp_build_context_init(&bld.uint_coord_bld, builder, bld.uint_coord_type); lp_build_context_init(&bld.int_coord_bld, builder, bld.int_coord_type); @@ -1275,41 +1789,37 @@ lp_build_sample_soa(LLVMBuilderRef builder, /* Get the dynamic state */ width = dynamic_state->width(dynamic_state, builder, unit); height = dynamic_state->height(dynamic_state, builder, unit); - stride = dynamic_state->stride(dynamic_state, builder, unit); - data_ptr = dynamic_state->data_ptr(dynamic_state, builder, unit); + depth = dynamic_state->depth(dynamic_state, builder, unit); + stride_array = dynamic_state->row_stride(dynamic_state, builder, unit); + data_array = dynamic_state->data_ptr(dynamic_state, builder, unit); + /* Note that data_array is an array[level] of pointers to texture images */ s = coords[0]; t = coords[1]; r = coords[2]; - width = lp_build_broadcast_scalar(&bld.uint_coord_bld, width); - height = lp_build_broadcast_scalar(&bld.uint_coord_bld, height); - stride = lp_build_broadcast_scalar(&bld.uint_coord_bld, stride); - - if(static_state->target == PIPE_TEXTURE_1D) - t = bld.coord_bld.zero; - - switch (static_state->min_img_filter) { - case PIPE_TEX_FILTER_NEAREST: - lp_build_sample_2d_nearest_soa(&bld, s, t, width, height, - stride, data_ptr, texel); - break; - case PIPE_TEX_FILTER_LINEAR: - if(lp_format_is_rgba8(bld.format_desc) && - is_simple_wrap_mode(static_state->wrap_s) && - is_simple_wrap_mode(static_state->wrap_t)) - lp_build_sample_2d_linear_aos(&bld, s, t, width, height, - stride, data_ptr, texel); - else - lp_build_sample_2d_linear_soa(&bld, s, t, width, height, - stride, data_ptr, texel); - break; - default: - assert(0); + width_vec = lp_build_broadcast_scalar(&bld.uint_coord_bld, width); + height_vec = lp_build_broadcast_scalar(&bld.uint_coord_bld, height); + depth_vec = lp_build_broadcast_scalar(&bld.uint_coord_bld, depth); + + if (lp_format_is_rgba8(bld.format_desc) && + static_state->target == PIPE_TEXTURE_2D && + static_state->min_img_filter == PIPE_TEX_FILTER_LINEAR && + static_state->mag_img_filter == PIPE_TEX_FILTER_LINEAR && + static_state->min_mip_filter == PIPE_TEX_MIPFILTER_NONE && + is_simple_wrap_mode(static_state->wrap_s) && + is_simple_wrap_mode(static_state->wrap_t)) { + /* special case */ + lp_build_sample_2d_linear_aos(&bld, s, t, width_vec, height_vec, + stride_array, data_array, texel); + } + else { + lp_build_sample_general(&bld, unit, s, t, r, + width, height, depth, + width_vec, height_vec, depth_vec, + stride_array, NULL, data_array, + texel); } - - /* FIXME: respect static_state->min_mip_filter */; - /* FIXME: respect static_state->mag_img_filter */; lp_build_sample_compare(&bld, r, texel); } diff --git a/src/gallium/auxiliary/gallivm/lp_bld_struct.h b/src/gallium/auxiliary/gallivm/lp_bld_struct.h index 740392f5611..34478c10f51 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_struct.h +++ b/src/gallium/auxiliary/gallivm/lp_bld_struct.h @@ -37,7 +37,7 @@ #define LP_BLD_STRUCT_H -#include <llvm-c/Core.h> +#include "os/os_llvm.h" #include <llvm-c/Target.h> #include "util/u_debug.h" diff --git a/src/gallium/auxiliary/gallivm/lp_bld_swizzle.h b/src/gallium/auxiliary/gallivm/lp_bld_swizzle.h index b9472127a63..57b5cc079f2 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_swizzle.h +++ b/src/gallium/auxiliary/gallivm/lp_bld_swizzle.h @@ -37,7 +37,7 @@ #define LP_BLD_SWIZZLE_H -#include <llvm-c/Core.h> +#include "os/os_llvm.h" struct lp_type; diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h b/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h index eddb7a83fa2..0f2f8a65b10 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h +++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h @@ -35,7 +35,7 @@ #ifndef LP_BLD_TGSI_H #define LP_BLD_TGSI_H -#include <llvm-c/Core.h> +#include "os/os_llvm.h" struct tgsi_token; diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c index 5f2c2a54ee9..5ec59d636cb 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c @@ -41,6 +41,7 @@ #include "util/u_debug.h" #include "util/u_math.h" #include "util/u_memory.h" +#include "tgsi/tgsi_dump.h" #include "tgsi/tgsi_info.h" #include "tgsi/tgsi_parse.h" #include "tgsi/tgsi_util.h" @@ -95,6 +96,19 @@ struct lp_exec_mask { int cond_stack_size; LLVMValueRef cond_mask; + LLVMValueRef break_stack[LP_TGSI_MAX_NESTING]; + int break_stack_size; + LLVMValueRef break_mask; + + LLVMValueRef cont_stack[LP_TGSI_MAX_NESTING]; + int cont_stack_size; + LLVMValueRef cont_mask; + + LLVMBasicBlockRef loop_stack[LP_TGSI_MAX_NESTING]; + int loop_stack_size; + LLVMBasicBlockRef loop_block; + + LLVMValueRef exec_mask; }; @@ -145,15 +159,33 @@ static void lp_exec_mask_init(struct lp_exec_mask *mask, struct lp_build_context mask->bld = bld; mask->has_mask = FALSE; mask->cond_stack_size = 0; + mask->loop_stack_size = 0; + mask->break_stack_size = 0; + mask->cont_stack_size = 0; mask->int_vec_type = lp_build_int_vec_type(mask->bld->type); } static void lp_exec_mask_update(struct lp_exec_mask *mask) { - mask->exec_mask = mask->cond_mask; - if (mask->cond_stack_size > 0) - mask->has_mask = TRUE; + if (mask->loop_stack_size) { + /*for loops we need to update the entire mask at + * runtime */ + LLVMValueRef tmp; + tmp = LLVMBuildAnd(mask->bld->builder, + mask->cont_mask, + mask->break_mask, + "maskcb"); + mask->exec_mask = LLVMBuildAnd(mask->bld->builder, + mask->cond_mask, + tmp, + "maskfull"); + } else + mask->exec_mask = mask->cond_mask; + + + mask->has_mask = (mask->cond_stack_size > 0 || + mask->loop_stack_size > 0); } static void lp_exec_mask_cond_push(struct lp_exec_mask *mask, @@ -190,6 +222,89 @@ static void lp_exec_mask_cond_pop(struct lp_exec_mask *mask) lp_exec_mask_update(mask); } +static void lp_exec_bgnloop(struct lp_exec_mask *mask) +{ + + if (mask->cont_stack_size == 0) + mask->cont_mask = LLVMConstAllOnes(mask->int_vec_type); + if (mask->cont_stack_size == 0) + mask->break_mask = LLVMConstAllOnes(mask->int_vec_type); + if (mask->cond_stack_size == 0) + mask->cond_mask = LLVMConstAllOnes(mask->int_vec_type); + mask->loop_stack[mask->loop_stack_size++] = mask->loop_block; + mask->loop_block = lp_build_insert_new_block(mask->bld->builder, "bgnloop"); + LLVMBuildBr(mask->bld->builder, mask->loop_block); + LLVMPositionBuilderAtEnd(mask->bld->builder, mask->loop_block); + + lp_exec_mask_update(mask); +} + +static void lp_exec_break(struct lp_exec_mask *mask) +{ + LLVMValueRef exec_mask = LLVMBuildNot(mask->bld->builder, + mask->exec_mask, + "break"); + + mask->break_stack[mask->break_stack_size++] = mask->break_mask; + if (mask->break_stack_size > 1) { + mask->break_mask = LLVMBuildAnd(mask->bld->builder, + mask->break_mask, + exec_mask, "break_full"); + } else + mask->break_mask = exec_mask; + + lp_exec_mask_update(mask); +} + +static void lp_exec_continue(struct lp_exec_mask *mask) +{ + LLVMValueRef exec_mask = LLVMBuildNot(mask->bld->builder, + mask->exec_mask, + ""); + + mask->cont_stack[mask->cont_stack_size++] = mask->cont_mask; + if (mask->cont_stack_size > 1) { + mask->cont_mask = LLVMBuildAnd(mask->bld->builder, + mask->cont_mask, + exec_mask, ""); + } else + mask->cont_mask = exec_mask; + + lp_exec_mask_update(mask); +} + + +static void lp_exec_endloop(struct lp_exec_mask *mask) +{ + LLVMBasicBlockRef endloop; + LLVMTypeRef reg_type = LLVMIntType(mask->bld->type.width* + mask->bld->type.length); + /* i1cond = (mask == 0) */ + LLVMValueRef i1cond = LLVMBuildICmp( + mask->bld->builder, + LLVMIntNE, + LLVMBuildBitCast(mask->bld->builder, mask->break_mask, reg_type, ""), + LLVMConstNull(reg_type), ""); + + endloop = lp_build_insert_new_block(mask->bld->builder, "endloop"); + + LLVMBuildCondBr(mask->bld->builder, + i1cond, mask->loop_block, endloop); + + LLVMPositionBuilderAtEnd(mask->bld->builder, endloop); + + mask->loop_block = mask->loop_stack[--mask->loop_stack_size]; + /* pop the break mask */ + if (mask->cont_stack_size) { + mask->cont_mask = mask->cont_stack[--mask->cont_stack_size]; + } + if (mask->break_stack_size) { + mask->break_mask = mask->cont_stack[--mask->break_stack_size]; + } + + lp_exec_mask_update(mask); +} + static void lp_exec_mask_store(struct lp_exec_mask *mask, LLVMValueRef val, LLVMValueRef dst) @@ -384,6 +499,11 @@ emit_store( assert(0); break; + case TGSI_FILE_PREDICATE: + /* FIXME */ + assert(0); + break; + default: assert( 0 ); } @@ -581,6 +701,17 @@ emit_instruction( if (indirect_temp_reference(inst)) return FALSE; + /* + * Stores and write masks are handled in a general fashion after the long + * instruction opcode switch statement. + * + * Although not stricitly necessary, we avoid generating instructions for + * channels which won't be stored, in cases where's that easy. For some + * complex instructions, like texture sampling, it is more convenient to + * assume a full writemask and then let LLVM optimization passes eliminate + * redundant code. + */ + assert(info->num_dst <= 1); if(info->num_dst) { FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { @@ -1126,7 +1257,6 @@ emit_instruction( break; case TGSI_OPCODE_TEX: - /* XXX what about dst0 writemask? */ emit_tex( bld, inst, FALSE, FALSE, dst0 ); break; @@ -1349,14 +1479,15 @@ emit_instruction( case TGSI_OPCODE_TXP: emit_tex( bld, inst, FALSE, TRUE, dst0 ); break; - + case TGSI_OPCODE_BRK: - /* FIXME */ - return 0; + lp_exec_break(&bld->exec_mask); break; case TGSI_OPCODE_IF: tmp0 = emit_fetch(bld, inst, 0, CHAN_X); + tmp0 = lp_build_cmp(&bld->base, PIPE_FUNC_NOTEQUAL, + tmp0, bld->base.zero); lp_exec_mask_cond_push(&bld->exec_mask, tmp0); break; @@ -1366,6 +1497,10 @@ emit_instruction( return 0; break; + case TGSI_OPCODE_BGNLOOP: + lp_exec_bgnloop(&bld->exec_mask); + break; + case TGSI_OPCODE_REP: /* deprecated */ assert(0); @@ -1386,6 +1521,10 @@ emit_instruction( return 0; break; + case TGSI_OPCODE_ENDLOOP: + lp_exec_endloop(&bld->exec_mask); + break; + case TGSI_OPCODE_ENDREP: /* deprecated */ assert(0); @@ -1485,8 +1624,7 @@ emit_instruction( break; case TGSI_OPCODE_CONT: - /* FIXME */ - return 0; + lp_exec_continue(&bld->exec_mask); break; case TGSI_OPCODE_EMIT: @@ -1589,7 +1727,14 @@ lp_build_tgsi_soa(LLVMBuilderRef builder, assert( 0 ); } } - + if (0) { + LLVMBasicBlockRef block = LLVMGetInsertBlock(builder); + LLVMValueRef function = LLVMGetBasicBlockParent(block); + debug_printf("11111111111111111111111111111 \n"); + tgsi_dump(tokens, 0); + LLVMDumpValue(function); + debug_printf("2222222222222222222222222222 \n"); + } tgsi_parse_free( &parse ); } diff --git a/src/gallium/auxiliary/gallivm/lp_bld_type.c b/src/gallium/auxiliary/gallivm/lp_bld_type.c index c327ba045a6..796af88caad 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_type.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_type.c @@ -58,7 +58,10 @@ LLVMTypeRef lp_build_vec_type(struct lp_type type) { LLVMTypeRef elem_type = lp_build_elem_type(type); - return LLVMVectorType(elem_type, type.length); + if (type.length == 1) + return elem_type; + else + return LLVMVectorType(elem_type, type.length); } @@ -115,6 +118,9 @@ lp_check_vec_type(struct lp_type type, LLVMTypeRef vec_type) if(!vec_type) return FALSE; + if (type.length == 1) + return lp_check_elem_type(type, vec_type); + if(LLVMGetTypeKind(vec_type) != LLVMVectorTypeKind) return FALSE; @@ -153,7 +159,10 @@ LLVMTypeRef lp_build_int_vec_type(struct lp_type type) { LLVMTypeRef elem_type = lp_build_int_elem_type(type); - return LLVMVectorType(elem_type, type.length); + if (type.length == 1) + return elem_type; + else + return LLVMVectorType(elem_type, type.length); } diff --git a/src/gallium/auxiliary/gallivm/lp_bld_type.h b/src/gallium/auxiliary/gallivm/lp_bld_type.h index 16946cc28a2..5b351476ac2 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_type.h +++ b/src/gallium/auxiliary/gallivm/lp_bld_type.h @@ -37,7 +37,7 @@ #define LP_BLD_TYPE_H -#include <llvm-c/Core.h> +#include "os/os_llvm.h" #include <pipe/p_compiler.h> @@ -103,7 +103,7 @@ struct lp_type { unsigned width:14; /** - * Vector length. + * Vector length. If length==1, this is a scalar (float/int) type. * * width*length should be a power of two greater or equal to eight. * @@ -139,6 +139,7 @@ struct lp_build_context }; +/** Create scalar float type */ static INLINE struct lp_type lp_type_float(unsigned width) { @@ -148,12 +149,29 @@ lp_type_float(unsigned width) res_type.floating = TRUE; res_type.sign = TRUE; res_type.width = width; + res_type.length = 1; + + return res_type; +} + + +/** Create vector of float type */ +static INLINE struct lp_type +lp_type_float_vec(unsigned width) +{ + struct lp_type res_type; + + memset(&res_type, 0, sizeof res_type); + res_type.floating = TRUE; + res_type.sign = TRUE; + res_type.width = width; res_type.length = LP_NATIVE_VECTOR_WIDTH / width; return res_type; } +/** Create scalar int type */ static INLINE struct lp_type lp_type_int(unsigned width) { @@ -162,12 +180,28 @@ lp_type_int(unsigned width) memset(&res_type, 0, sizeof res_type); res_type.sign = TRUE; res_type.width = width; + res_type.length = 1; + + return res_type; +} + + +/** Create vector int type */ +static INLINE struct lp_type +lp_type_int_vec(unsigned width) +{ + struct lp_type res_type; + + memset(&res_type, 0, sizeof res_type); + res_type.sign = TRUE; + res_type.width = width; res_type.length = LP_NATIVE_VECTOR_WIDTH / width; return res_type; } +/** Create scalar uint type */ static INLINE struct lp_type lp_type_uint(unsigned width) { @@ -175,6 +209,20 @@ lp_type_uint(unsigned width) memset(&res_type, 0, sizeof res_type); res_type.width = width; + res_type.length = 1; + + return res_type; +} + + +/** Create vector uint type */ +static INLINE struct lp_type +lp_type_uint_vec(unsigned width) +{ + struct lp_type res_type; + + memset(&res_type, 0, sizeof res_type); + res_type.width = width; res_type.length = LP_NATIVE_VECTOR_WIDTH / width; return res_type; diff --git a/src/gallium/auxiliary/os/os_llvm.h b/src/gallium/auxiliary/os/os_llvm.h new file mode 100644 index 00000000000..d5edfbfe923 --- /dev/null +++ b/src/gallium/auxiliary/os/os_llvm.h @@ -0,0 +1,47 @@ +/************************************************************************** + * + * Copyright 2010 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * @file + * Wrapper for LLVM header file #includes. + */ + + +#ifndef OS_LLVM_H +#define OS_LLVM_H + + +#include <llvm-c/Core.h> + + +/** Set version to 0 if missing to avoid #ifdef HAVE_LLVM everywhere */ +#ifndef HAVE_LLVM +#define HAVE_LLVM 0x0 +#endif + + +#endif /* OS_LLVM_H */ diff --git a/src/gallium/auxiliary/target-helpers/wrap_screen.c b/src/gallium/auxiliary/target-helpers/wrap_screen.c new file mode 100644 index 00000000000..5fe30139386 --- /dev/null +++ b/src/gallium/auxiliary/target-helpers/wrap_screen.c @@ -0,0 +1,65 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Bismarck, ND., USA + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * + **************************************************************************/ + +/* + * Authors: + * Keith Whitwell + */ + +#include "target-helpers/wrap_screen.h" +#include "trace/tr_public.h" +#include "identity/id_public.h" +#include "util/u_debug.h" + + +/* Centralized code to inject common wrapping layers: + */ +struct pipe_screen * +gallium_wrap_screen( struct pipe_screen *screen ) +{ + /* Screen wrapping functions are required not to fail. If it is + * impossible to wrap a screen, the unwrapped screen should be + * returned instead. Any failure condition should be returned in + * an OUT argument. + * + * Otherwise it is really messy trying to clean up in this code. + */ + if (debug_get_bool_option("GALLIUM_WRAP", FALSE)) { + screen = identity_screen_create(screen); + } + + if (debug_get_bool_option("GALLIUM_TRACE", FALSE)) { + screen = trace_screen_create( screen ); + } + + return screen; +} + + + + diff --git a/src/gallium/auxiliary/target-helpers/wrap_screen.h b/src/gallium/auxiliary/target-helpers/wrap_screen.h new file mode 100644 index 00000000000..7e76beb7c5a --- /dev/null +++ b/src/gallium/auxiliary/target-helpers/wrap_screen.h @@ -0,0 +1,16 @@ +#ifndef WRAP_SCREEN_HELPER_H +#define WRAP_SCREEN_HELPER_H + +#include "pipe/p_compiler.h" + +struct pipe_screen; + +/* Centralized code to inject common wrapping layers. Other layers + * can be introduced by specific targets, but these are the generally + * helpful ones we probably want everywhere. + */ +struct pipe_screen * +gallium_wrap_screen( struct pipe_screen *screen ); + + +#endif diff --git a/src/gallium/auxiliary/translate/translate_generic.c b/src/gallium/auxiliary/translate/translate_generic.c index c9ec2b32bfe..c3ec9ae3f4b 100644 --- a/src/gallium/auxiliary/translate/translate_generic.c +++ b/src/gallium/auxiliary/translate/translate_generic.c @@ -393,10 +393,10 @@ static fetch_func get_fetch_func( enum pipe_format format ) return &fetch_R8G8B8A8_SSCALED; case PIPE_FORMAT_B8G8R8A8_UNORM: - return &fetch_A8R8G8B8_UNORM; + return &fetch_B8G8R8A8_UNORM; case PIPE_FORMAT_A8R8G8B8_UNORM: - return &fetch_B8G8R8A8_UNORM; + return &fetch_A8R8G8B8_UNORM; case PIPE_FORMAT_R32_FIXED: return &fetch_R32_FIXED; @@ -552,10 +552,10 @@ static emit_func get_emit_func( enum pipe_format format ) return &emit_R8G8B8A8_SSCALED; case PIPE_FORMAT_B8G8R8A8_UNORM: - return &emit_A8R8G8B8_UNORM; + return &emit_B8G8R8A8_UNORM; case PIPE_FORMAT_A8R8G8B8_UNORM: - return &emit_B8G8R8A8_UNORM; + return &emit_A8R8G8B8_UNORM; default: assert(0); diff --git a/src/gallium/auxiliary/translate/translate_sse.c b/src/gallium/auxiliary/translate/translate_sse.c index 03e093c11ea..c13e7427387 100644 --- a/src/gallium/auxiliary/translate/translate_sse.c +++ b/src/gallium/auxiliary/translate/translate_sse.c @@ -336,7 +336,7 @@ static boolean translate_attr( struct translate_sse *p, case PIPE_FORMAT_R32G32B32A32_FLOAT: emit_load_R32G32B32A32(p, dataXMM, srcECX); break; - case PIPE_FORMAT_A8R8G8B8_UNORM: + case PIPE_FORMAT_B8G8R8A8_UNORM: emit_load_R8G8B8A8_UNORM(p, dataXMM, srcECX); emit_swizzle(p, dataXMM, dataXMM, SHUF(Z,Y,X,W)); break; @@ -360,7 +360,7 @@ static boolean translate_attr( struct translate_sse *p, case PIPE_FORMAT_R32G32B32A32_FLOAT: emit_store_R32G32B32A32(p, dstEAX, dataXMM); break; - case PIPE_FORMAT_A8R8G8B8_UNORM: + case PIPE_FORMAT_B8G8R8A8_UNORM: emit_swizzle(p, dataXMM, dataXMM, SHUF(Z,Y,X,W)); emit_store_R8G8B8A8_UNORM(p, dstEAX, dataXMM); break; diff --git a/src/gallium/auxiliary/util/u_blit.c b/src/gallium/auxiliary/util/u_blit.c index 0b263a9db5c..4d0737ccd3d 100644 --- a/src/gallium/auxiliary/util/u_blit.c +++ b/src/gallium/auxiliary/util/u_blit.c @@ -63,6 +63,7 @@ struct blit_state struct pipe_sampler_state sampler; struct pipe_viewport_state viewport; struct pipe_clip_state clip; + struct pipe_vertex_element velem[2]; void *vs; void *fs[TGSI_WRITEMASK_XYZW + 1]; @@ -114,6 +115,15 @@ util_create_blit(struct pipe_context *pipe, struct cso_context *cso) ctx->sampler.mag_img_filter = 0; /* set later */ ctx->sampler.normalized_coords = 1; + /* vertex elements state */ + memset(&ctx->velem[0], 0, sizeof(ctx->velem[0]) * 2); + for (i = 0; i < 2; i++) { + ctx->velem[i].src_offset = i * 4 * sizeof(float); + ctx->velem[i].instance_divisor = 0; + ctx->velem[i].vertex_buffer_index = 0; + ctx->velem[i].src_format = PIPE_FORMAT_R32G32B32A32_FLOAT; + } + /* vertex shader - still required to provide the linkage between * fragment shader input semantics and vertex_element/buffers. */ @@ -410,12 +420,14 @@ util_blit_pixels_writemask(struct blit_state *ctx, cso_save_fragment_shader(ctx->cso); cso_save_vertex_shader(ctx->cso); cso_save_clip(ctx->cso); + cso_save_vertex_elements(ctx->cso); /* set misc state we care about */ cso_set_blend(ctx->cso, &ctx->blend); cso_set_depth_stencil_alpha(ctx->cso, &ctx->depthstencil); cso_set_rasterizer(ctx->cso, &ctx->rasterizer); cso_set_clip(ctx->cso, &ctx->clip); + cso_set_vertex_elements(ctx->cso, 2, ctx->velem); /* sampler */ ctx->sampler.min_img_filter = filter; @@ -480,6 +492,7 @@ util_blit_pixels_writemask(struct blit_state *ctx, cso_restore_fragment_shader(ctx->cso); cso_restore_vertex_shader(ctx->cso); cso_restore_clip(ctx->cso); + cso_restore_vertex_elements(ctx->cso); pipe_texture_reference(&tex, NULL); } @@ -564,12 +577,14 @@ util_blit_pixels_tex(struct blit_state *ctx, cso_save_fragment_shader(ctx->cso); cso_save_vertex_shader(ctx->cso); cso_save_clip(ctx->cso); + cso_save_vertex_elements(ctx->cso); /* set misc state we care about */ cso_set_blend(ctx->cso, &ctx->blend); cso_set_depth_stencil_alpha(ctx->cso, &ctx->depthstencil); cso_set_rasterizer(ctx->cso, &ctx->rasterizer); cso_set_clip(ctx->cso, &ctx->clip); + cso_set_vertex_elements(ctx->cso, 2, ctx->velem); /* sampler */ ctx->sampler.min_img_filter = filter; @@ -628,4 +643,5 @@ util_blit_pixels_tex(struct blit_state *ctx, cso_restore_fragment_shader(ctx->cso); cso_restore_vertex_shader(ctx->cso); cso_restore_clip(ctx->cso); + cso_restore_vertex_elements(ctx->cso); } diff --git a/src/gallium/auxiliary/util/u_blitter.c b/src/gallium/auxiliary/util/u_blitter.c index 0ba09d33bfc..36d582491f3 100644 --- a/src/gallium/auxiliary/util/u_blitter.c +++ b/src/gallium/auxiliary/util/u_blitter.c @@ -88,6 +88,8 @@ struct blitter_context_priv void *dsa_write_depth_keep_stencil; void *dsa_keep_depth_stencil; + void *velem_state; + /* Sampler state for clamping to a miplevel. */ void *sampler_state[PIPE_MAX_TEXTURE_LEVELS]; @@ -104,10 +106,11 @@ struct blitter_context_priv struct blitter_context *util_blitter_create(struct pipe_context *pipe) { struct blitter_context_priv *ctx; - struct pipe_blend_state blend = { 0 }; - struct pipe_depth_stencil_alpha_state dsa = { { 0 } }; - struct pipe_rasterizer_state rs_state = { 0 }; + struct pipe_blend_state blend; + struct pipe_depth_stencil_alpha_state dsa; + struct pipe_rasterizer_state rs_state; struct pipe_sampler_state *sampler_state; + struct pipe_vertex_element velem[2]; unsigned i; ctx = CALLOC_STRUCT(blitter_context_priv); @@ -122,17 +125,20 @@ struct blitter_context *util_blitter_create(struct pipe_context *pipe) ctx->blitter.saved_rs_state = INVALID_PTR; ctx->blitter.saved_fs = INVALID_PTR; ctx->blitter.saved_vs = INVALID_PTR; + ctx->blitter.saved_velem_state = INVALID_PTR; ctx->blitter.saved_fb_state.nr_cbufs = ~0; ctx->blitter.saved_num_textures = ~0; ctx->blitter.saved_num_sampler_states = ~0; /* blend state objects */ + memset(&blend, 0, sizeof(blend)); ctx->blend_keep_color = pipe->create_blend_state(pipe, &blend); blend.rt[0].colormask = PIPE_MASK_RGBA; ctx->blend_write_color = pipe->create_blend_state(pipe, &blend); /* depth stencil alpha state objects */ + memset(&dsa, 0, sizeof(dsa)); ctx->dsa_keep_depth_stencil = pipe->create_depth_stencil_alpha_state(pipe, &dsa); @@ -170,6 +176,16 @@ struct blitter_context *util_blitter_create(struct pipe_context *pipe) rs_state.flatshade = 1; ctx->rs_state = pipe->create_rasterizer_state(pipe, &rs_state); + /* vertex elements state */ + memset(&velem[0], 0, sizeof(velem[0]) * 2); + for (i = 0; i < 2; i++) { + velem[i].src_offset = i * 4 * sizeof(float); + velem[i].instance_divisor = 0; + velem[i].vertex_buffer_index = 0; + velem[i].src_format = PIPE_FORMAT_R32G32B32A32_FLOAT; + } + ctx->velem_state = pipe->create_vertex_elements_state(pipe, 2, &velem[0]); + /* fragment shaders are created on-demand */ /* vertex shaders */ @@ -219,6 +235,7 @@ void util_blitter_destroy(struct blitter_context *blitter) pipe->delete_rasterizer_state(pipe, ctx->rs_state); pipe->delete_vs_state(pipe, ctx->vs_col); pipe->delete_vs_state(pipe, ctx->vs_tex); + pipe->delete_vertex_elements_state(pipe, ctx->velem_state); for (i = 0; i < PIPE_MAX_TEXTURE_TYPES; i++) { if (ctx->fs_texfetch_col[i]) @@ -246,7 +263,8 @@ static void blitter_check_saved_CSOs(struct blitter_context_priv *ctx) ctx->blitter.saved_dsa_state != INVALID_PTR && ctx->blitter.saved_rs_state != INVALID_PTR && ctx->blitter.saved_fs != INVALID_PTR && - ctx->blitter.saved_vs != INVALID_PTR); + ctx->blitter.saved_vs != INVALID_PTR && + ctx->blitter.saved_velem_state != INVALID_PTR); } static void blitter_restore_CSOs(struct blitter_context_priv *ctx) @@ -259,12 +277,14 @@ static void blitter_restore_CSOs(struct blitter_context_priv *ctx) pipe->bind_rasterizer_state(pipe, ctx->blitter.saved_rs_state); pipe->bind_fs_state(pipe, ctx->blitter.saved_fs); pipe->bind_vs_state(pipe, ctx->blitter.saved_vs); + pipe->bind_vertex_elements_state(pipe, ctx->blitter.saved_velem_state); ctx->blitter.saved_blend_state = INVALID_PTR; ctx->blitter.saved_dsa_state = INVALID_PTR; ctx->blitter.saved_rs_state = INVALID_PTR; ctx->blitter.saved_fs = INVALID_PTR; ctx->blitter.saved_vs = INVALID_PTR; + ctx->blitter.saved_velem_state = INVALID_PTR; pipe->set_stencil_ref(pipe, &ctx->blitter.saved_stencil_ref); @@ -569,6 +589,7 @@ void util_blitter_clear(struct blitter_context *blitter, pipe->bind_depth_stencil_alpha_state(pipe, ctx->dsa_keep_depth_stencil); pipe->bind_rasterizer_state(pipe, ctx->rs_state); + pipe->bind_vertex_elements_state(pipe, ctx->velem_state); pipe->bind_fs_state(pipe, blitter_get_fs_col(ctx, num_cbufs)); pipe->bind_vs_state(pipe, ctx->vs_col); @@ -634,6 +655,7 @@ static void util_blitter_do_copy(struct blitter_context *blitter, pipe->bind_vs_state(pipe, ctx->vs_tex); pipe->bind_fragment_sampler_states(pipe, 1, blitter_get_sampler_state(ctx, src->level)); + pipe->bind_vertex_elements_state(pipe, ctx->velem_state); pipe->set_fragment_sampler_textures(pipe, 1, &src->texture); pipe->set_framebuffer_state(pipe, &fb_state); @@ -807,6 +829,7 @@ void util_blitter_fill(struct blitter_context *blitter, pipe->bind_rasterizer_state(pipe, ctx->rs_state); pipe->bind_fs_state(pipe, blitter_get_fs_col(ctx, 1)); pipe->bind_vs_state(pipe, ctx->vs_col); + pipe->bind_vertex_elements_state(pipe, ctx->velem_state); /* set a framebuffer state */ fb_state.width = dst->width; diff --git a/src/gallium/auxiliary/util/u_blitter.h b/src/gallium/auxiliary/util/u_blitter.h index 92008fce992..ecafdabafae 100644 --- a/src/gallium/auxiliary/util/u_blitter.h +++ b/src/gallium/auxiliary/util/u_blitter.h @@ -43,6 +43,7 @@ struct blitter_context /* Private members, really. */ void *saved_blend_state; /**< blend state */ void *saved_dsa_state; /**< depth stencil alpha state */ + void *saved_velem_state; /**< vertex elements state */ void *saved_rs_state; /**< rasterizer state */ void *saved_fs, *saved_vs; /**< fragment shader, vertex shader */ @@ -173,6 +174,13 @@ void util_blitter_save_depth_stencil_alpha(struct blitter_context *blitter, } static INLINE +void util_blitter_save_vertex_elements(struct blitter_context *blitter, + void *state) +{ + blitter->saved_velem_state = state; +} + +static INLINE void util_blitter_save_stencil_ref(struct blitter_context *blitter, const struct pipe_stencil_ref *state) { diff --git a/src/gallium/auxiliary/util/u_debug.c b/src/gallium/auxiliary/util/u_debug.c index 94be682c4b1..e997cfa8a38 100644 --- a/src/gallium/auxiliary/util/u_debug.c +++ b/src/gallium/auxiliary/util/u_debug.c @@ -421,26 +421,31 @@ void debug_dump_image(const char *prefix, #endif } -void debug_dump_surface(const char *prefix, +void debug_dump_surface(struct pipe_context *pipe, + const char *prefix, struct pipe_surface *surface) { struct pipe_texture *texture; - struct pipe_screen *screen; struct pipe_transfer *transfer; void *data; if (!surface) return; + /* XXX: this doesn't necessarily work, as the driver may be using + * temporary storage for the surface which hasn't been propagated + * back into the texture. Need to nail down the semantics of views + * and transfers a bit better before we can say if extra work needs + * to be done here: + */ texture = surface->texture; - screen = texture->screen; - transfer = screen->get_tex_transfer(screen, texture, surface->face, - surface->level, surface->zslice, - PIPE_TRANSFER_READ, 0, 0, surface->width, - surface->height); + transfer = pipe->get_tex_transfer(pipe, texture, surface->face, + surface->level, surface->zslice, + PIPE_TRANSFER_READ, 0, 0, surface->width, + surface->height); - data = screen->transfer_map(screen, transfer); + data = pipe->transfer_map(pipe, transfer); if(!data) goto error; @@ -452,13 +457,14 @@ void debug_dump_surface(const char *prefix, transfer->stride, data); - screen->transfer_unmap(screen, transfer); + pipe->transfer_unmap(pipe, transfer); error: - screen->tex_transfer_destroy(transfer); + pipe->tex_transfer_destroy(pipe, transfer); } -void debug_dump_texture(const char *prefix, +void debug_dump_texture(struct pipe_context *pipe, + const char *prefix, struct pipe_texture *texture) { struct pipe_surface *surface; @@ -473,7 +479,7 @@ void debug_dump_texture(const char *prefix, surface = screen->get_tex_surface(screen, texture, 0, 0, 0, PIPE_TEXTURE_USAGE_SAMPLER); if (surface) { - debug_dump_surface(prefix, surface); + debug_dump_surface(pipe, prefix, surface); screen->tex_surface_destroy(surface); } } @@ -511,27 +517,28 @@ struct bmp_rgb_quad { }; void -debug_dump_surface_bmp(const char *filename, +debug_dump_surface_bmp(struct pipe_context *pipe, + const char *filename, struct pipe_surface *surface) { #ifndef PIPE_SUBSYSTEM_WINDOWS_MINIPORT struct pipe_transfer *transfer; struct pipe_texture *texture = surface->texture; - struct pipe_screen *screen = texture->screen; - transfer = screen->get_tex_transfer(screen, texture, surface->face, - surface->level, surface->zslice, - PIPE_TRANSFER_READ, 0, 0, surface->width, - surface->height); + transfer = pipe->get_tex_transfer(pipe, texture, surface->face, + surface->level, surface->zslice, + PIPE_TRANSFER_READ, 0, 0, surface->width, + surface->height); - debug_dump_transfer_bmp(filename, transfer); + debug_dump_transfer_bmp(pipe, filename, transfer); - screen->tex_transfer_destroy(transfer); + pipe->tex_transfer_destroy(pipe, transfer); #endif } void -debug_dump_transfer_bmp(const char *filename, +debug_dump_transfer_bmp(struct pipe_context *pipe, + const char *filename, struct pipe_transfer *transfer) { #ifndef PIPE_SUBSYSTEM_WINDOWS_MINIPORT @@ -544,7 +551,7 @@ debug_dump_transfer_bmp(const char *filename, if(!rgba) goto error1; - pipe_get_tile_rgba(transfer, 0, 0, + pipe_get_tile_rgba(pipe, transfer, 0, 0, transfer->width, transfer->height, rgba); diff --git a/src/gallium/auxiliary/util/u_debug.h b/src/gallium/auxiliary/util/u_debug.h index 0f4768f3444..98addeb372e 100644 --- a/src/gallium/auxiliary/util/u_debug.h +++ b/src/gallium/auxiliary/util/u_debug.h @@ -312,6 +312,7 @@ debug_memory_end(unsigned long beginning); #ifdef DEBUG +struct pipe_context; struct pipe_surface; struct pipe_transfer; struct pipe_texture; @@ -321,21 +322,25 @@ void debug_dump_image(const char *prefix, unsigned width, unsigned height, unsigned stride, const void *data); -void debug_dump_surface(const char *prefix, +void debug_dump_surface(struct pipe_context *pipe, + const char *prefix, struct pipe_surface *surface); -void debug_dump_texture(const char *prefix, +void debug_dump_texture(struct pipe_context *pipe, + const char *prefix, struct pipe_texture *texture); -void debug_dump_surface_bmp(const char *filename, +void debug_dump_surface_bmp(struct pipe_context *pipe, + const char *filename, struct pipe_surface *surface); -void debug_dump_transfer_bmp(const char *filename, +void debug_dump_transfer_bmp(struct pipe_context *pipe, + const char *filename, struct pipe_transfer *transfer); void debug_dump_float_rgba_bmp(const char *filename, unsigned width, unsigned height, float *rgba, unsigned stride); #else #define debug_dump_image(prefix, format, cpp, width, height, stride, data) ((void)0) -#define debug_dump_surface(prefix, surface) ((void)0) -#define debug_dump_surface_bmp(filename, surface) ((void)0) +#define debug_dump_surface(pipe, prefix, surface) ((void)0) +#define debug_dump_surface_bmp(pipe, filename, surface) ((void)0) #define debug_dump_transfer_bmp(filename, transfer) ((void)0) #define debug_dump_float_rgba_bmp(filename, width, height, rgba, stride) ((void)0) #endif diff --git a/src/gallium/auxiliary/util/u_draw_quad.c b/src/gallium/auxiliary/util/u_draw_quad.c index 14506e84519..8c194102bfc 100644 --- a/src/gallium/auxiliary/util/u_draw_quad.c +++ b/src/gallium/auxiliary/util/u_draw_quad.c @@ -45,8 +45,6 @@ util_draw_vertex_buffer(struct pipe_context *pipe, uint num_attribs) { struct pipe_vertex_buffer vbuffer; - struct pipe_vertex_element velements[PIPE_MAX_ATTRIBS]; - uint i; assert(num_attribs <= PIPE_MAX_ATTRIBS); @@ -58,15 +56,7 @@ util_draw_vertex_buffer(struct pipe_context *pipe, vbuffer.max_index = num_verts - 1; pipe->set_vertex_buffers(pipe, 1, &vbuffer); - /* tell pipe about the vertex attributes */ - for (i = 0; i < num_attribs; i++) { - velements[i].src_offset = i * 4 * sizeof(float); - velements[i].instance_divisor = 0; - velements[i].vertex_buffer_index = 0; - velements[i].src_format = PIPE_FORMAT_R32G32B32A32_FLOAT; - velements[i].nr_components = 4; - } - pipe->set_vertex_elements(pipe, num_attribs, velements); + /* note: vertex elements already set by caller */ /* draw */ pipe->draw_arrays(pipe, prim_type, 0, num_verts); diff --git a/src/gallium/auxiliary/util/u_dump_state.c b/src/gallium/auxiliary/util/u_dump_state.c index ae7afd7311e..52cf3ef4ce0 100644 --- a/src/gallium/auxiliary/util/u_dump_state.c +++ b/src/gallium/auxiliary/util/u_dump_state.c @@ -700,7 +700,6 @@ util_dump_vertex_element(struct os_stream *stream, const struct pipe_vertex_elem util_dump_member(stream, uint, state, src_offset); util_dump_member(stream, uint, state, vertex_buffer_index); - util_dump_member(stream, uint, state, nr_components); util_dump_member(stream, format, state, src_format); diff --git a/src/gallium/auxiliary/util/u_format.h b/src/gallium/auxiliary/util/u_format.h index e8fa0022b5b..c08fdcafcc8 100644 --- a/src/gallium/auxiliary/util/u_format.h +++ b/src/gallium/auxiliary/util/u_format.h @@ -120,9 +120,15 @@ struct util_format_channel_description struct util_format_description { enum pipe_format format; + const char *name; /** + * Short name, striped of the prefix, lower case. + */ + const char *short_name; + + /** * Pixel block dimensions. */ struct util_format_block block; @@ -140,6 +146,15 @@ struct util_format_description unsigned is_array:1; /** + * Whether the pixel format can be described as a bitfield structure. + * + * In particular: + * - pixel depth must be 8, 16, or 32 bits; + * - all channels must be unsigned, signed, or void + */ + unsigned is_bitmask:1; + + /** * Whether channels have mixed types (ignoring UTIL_FORMAT_TYPE_VOID). */ unsigned is_mixed:1; @@ -400,6 +415,16 @@ util_format_has_alpha(enum pipe_format format) } } +/** + * Return the number of components stored. + * Formats with block size != 1x1 will always have 1 component (the block). + */ +static INLINE unsigned +util_format_get_nr_components(enum pipe_format format) +{ + const struct util_format_description *desc = util_format_description(format); + return desc->nr_channels; +} /* * Format access functions. diff --git a/src/gallium/auxiliary/util/u_format_pack.py b/src/gallium/auxiliary/util/u_format_pack.py index 3f33f7cc021..409d024c637 100644 --- a/src/gallium/auxiliary/util/u_format_pack.py +++ b/src/gallium/auxiliary/util/u_format_pack.py @@ -252,9 +252,6 @@ def conversion_expr(src_channel, dst_channel, dst_native_type, value, clamp=True if src_channel.type == FLOAT and dst_channel.type == FLOAT: return '(%s)%s' % (dst_native_type, value) - if not src_channel.norm and not dst_channel.norm: - return '(%s)%s' % (dst_native_type, value) - if clamp: value = clamp_expr(src_channel, dst_channel, dst_native_type, value) @@ -280,15 +277,15 @@ def conversion_expr(src_channel, dst_channel, dst_native_type, value, clamp=True value = '(%s * %s)' % (value, scale) return '(%s)%s' % (dst_native_type, value) - if not src_channel.norm and not dst_channel.norm: - # neither is normalized -- just cast - return '(%s)%s' % (dst_native_type, value) - if src_channel.type in (SIGNED, UNSIGNED) and dst_channel.type in (SIGNED, UNSIGNED): + if not src_channel.norm and not dst_channel.norm: + # neither is normalized -- just cast + return '(%s)%s' % (dst_native_type, value) + src_one = get_one(src_channel) dst_one = get_one(dst_channel) - if src_one > dst_one and src_channel.norm: + if src_one > dst_one and src_channel.norm and dst_channel.norm: # We can just bitshift src_shift = get_one_shift(src_channel) dst_shift = get_one_shift(dst_channel) @@ -296,7 +293,7 @@ def conversion_expr(src_channel, dst_channel, dst_native_type, value, clamp=True else: # We need to rescale using an intermediate type big enough to hold the multiplication of both tmp_native_type = intermediate_native_type(src_channel.size + dst_channel.size, src_channel.sign and dst_channel.sign) - value = '(%s)%s' % (tmp_native_type, value) + value = '((%s)%s)' % (tmp_native_type, value) value = '(%s * 0x%x / 0x%x)' % (value, dst_one, src_one) value = '(%s)%s' % (dst_native_type, value) return value @@ -307,6 +304,8 @@ def conversion_expr(src_channel, dst_channel, dst_native_type, value, clamp=True def generate_format_unpack(format, dst_channel, dst_native_type, dst_suffix): '''Generate the function to unpack pixels from a particular format''' + assert format.layout == PLAIN + name = format.short_name() src_native_type = native_type(format) @@ -314,32 +313,99 @@ def generate_format_unpack(format, dst_channel, dst_native_type, dst_suffix): print 'static INLINE void' print 'util_format_%s_unpack_%s(%s *dst, const void *src)' % (name, dst_suffix, dst_native_type) print '{' - print ' union util_format_%s pixel;' % format.short_name() - print ' memcpy(&pixel, src, sizeof pixel);' - bswap_format(format) + + if format.is_bitmask(): + depth = format.block_size() + print ' uint%u_t value = *(uint%u_t *)src;' % (depth, depth) - assert format.layout == PLAIN + # Declare the intermediate variables + for i in range(format.nr_channels()): + src_channel = format.channels[i] + if src_channel.type == UNSIGNED: + print ' uint%u_t %s;' % (depth, src_channel.name) + elif src_channel.type == SIGNED: + print ' int%u_t %s;' % (depth, src_channel.name) - for i in range(4): - swizzle = format.swizzles[i] - if swizzle < 4: - src_channel = format.channels[swizzle] - value = 'pixel.chan.%s' % src_channel.name - value = conversion_expr(src_channel, dst_channel, dst_native_type, value) - elif swizzle == SWIZZLE_0: - value = '0' - elif swizzle == SWIZZLE_1: - value = get_one(dst_channel) - elif swizzle == SWIZZLE_NONE: - value = '0' - else: - assert False - if format.colorspace == ZS: - if i == 3: + print '#ifdef PIPE_ARCH_BIG_ENDIAN' + print ' value = util_bswap%u(value);' % depth + print '#endif' + + # Compute the intermediate unshifted values + shift = 0 + for i in range(format.nr_channels()): + src_channel = format.channels[i] + value = 'value' + if src_channel.type == UNSIGNED: + if shift: + value = '%s >> %u' % (value, shift) + if shift + src_channel.size < depth: + value = '(%s) & 0x%x' % (value, (1 << src_channel.size) - 1) + elif src_channel.type == SIGNED: + if shift + src_channel.size < depth: + # Align the sign bit + lshift = depth - (shift + src_channel.size) + value = '%s << %u' % (value, lshift) + # Cast to signed + value = '(int%u_t)(%s) ' % (depth, value) + if src_channel.size < depth: + # Align the LSB bit + rshift = depth - src_channel.size + value = '(%s) >> %u' % (value, rshift) + else: + value = None + + if value is not None: + print ' %s = %s;' % (src_channel.name, value) + + shift += src_channel.size + + # Convert, swizzle, and store final values + for i in range(4): + swizzle = format.swizzles[i] + if swizzle < 4: + src_channel = format.channels[swizzle] + value = src_channel.name + value = conversion_expr(src_channel, dst_channel, dst_native_type, value) + elif swizzle == SWIZZLE_0: + value = '0' + elif swizzle == SWIZZLE_1: + value = get_one(dst_channel) + elif swizzle == SWIZZLE_NONE: + value = '0' + else: + assert False + if format.colorspace == ZS: + if i == 3: + value = get_one(dst_channel) + elif i >= 1: + value = 'dst[0]' + print ' dst[%u] = %s; /* %s */' % (i, value, 'rgba'[i]) + + else: + print ' union util_format_%s pixel;' % format.short_name() + print ' memcpy(&pixel, src, sizeof pixel);' + bswap_format(format) + + for i in range(4): + swizzle = format.swizzles[i] + if swizzle < 4: + src_channel = format.channels[swizzle] + value = 'pixel.chan.%s' % src_channel.name + value = conversion_expr(src_channel, dst_channel, dst_native_type, value) + elif swizzle == SWIZZLE_0: + value = '0' + elif swizzle == SWIZZLE_1: value = get_one(dst_channel) - elif i >= 1: - value = 'dst[0]' - print ' dst[%u] = %s; /* %s */' % (i, value, 'rgba'[i]) + elif swizzle == SWIZZLE_NONE: + value = '0' + else: + assert False + if format.colorspace == ZS: + if i == 3: + value = get_one(dst_channel) + elif i >= 1: + value = 'dst[0]' + print ' dst[%u] = %s; /* %s */' % (i, value, 'rgba'[i]) print '}' print @@ -352,31 +418,70 @@ def generate_format_pack(format, src_channel, src_native_type, src_suffix): dst_native_type = native_type(format) - print 'static INLINE void' - print 'util_format_%s_pack_%s(void *dst, %s r, %s g, %s b, %s a)' % (name, src_suffix, src_native_type, src_native_type, src_native_type, src_native_type) - print '{' - print ' union util_format_%s pixel;' % format.short_name() - assert format.layout == PLAIN inv_swizzle = format.inv_swizzles() + + print 'static INLINE void' + print 'util_format_%s_pack_%s(void *dst, %s r, %s g, %s b, %s a)' % (name, src_suffix, src_native_type, src_native_type, src_native_type, src_native_type) + print '{' + + if format.is_bitmask(): + depth = format.block_size() + print ' uint%u_t value = 0;' % depth + + shift = 0 + for i in range(4): + dst_channel = format.channels[i] + if inv_swizzle[i] is not None: + value = 'rgba'[inv_swizzle[i]] + value = conversion_expr(src_channel, dst_channel, dst_native_type, value) + if format.colorspace == ZS: + if i == 3: + value = get_one(dst_channel) + elif i >= 1: + value = '0' + if dst_channel.type in (UNSIGNED, SIGNED): + if shift + dst_channel.size < depth: + value = '(%s) & 0x%x' % (value, (1 << dst_channel.size) - 1) + if shift: + value = '(%s) << %u' % (value, shift) + if dst_channel.type == SIGNED: + # Cast to unsigned + value = '(uint%u_t)(%s) ' % (depth, value) + else: + value = None + if value is not None: + print ' value |= %s;' % (value) + + shift += dst_channel.size - for i in range(4): - dst_channel = format.channels[i] - width = dst_channel.size - if inv_swizzle[i] is None: - continue - value = 'rgba'[inv_swizzle[i]] - value = conversion_expr(src_channel, dst_channel, dst_native_type, value) - if format.colorspace == ZS: - if i == 3: - value = get_one(dst_channel) - elif i >= 1: - value = '0' - print ' pixel.chan.%s = %s;' % (dst_channel.name, value) + print '#ifdef PIPE_ARCH_BIG_ENDIAN' + print ' value = util_bswap%u(value);' % depth + print '#endif' + + print ' *(uint%u_t *)dst = value;' % depth - bswap_format(format) - print ' memcpy(dst, &pixel, sizeof pixel);' + else: + print ' union util_format_%s pixel;' % format.short_name() + + for i in range(4): + dst_channel = format.channels[i] + width = dst_channel.size + if inv_swizzle[i] is None: + continue + value = 'rgba'[inv_swizzle[i]] + value = conversion_expr(src_channel, dst_channel, dst_native_type, value) + if format.colorspace == ZS: + if i == 3: + value = get_one(dst_channel) + elif i >= 1: + value = '0' + print ' pixel.chan.%s = %s;' % (dst_channel.name, value) + + bswap_format(format) + print ' memcpy(dst, &pixel, sizeof pixel);' + print '}' print diff --git a/src/gallium/auxiliary/util/u_format_parse.py b/src/gallium/auxiliary/util/u_format_parse.py index 250926418ec..f74dc5e88a4 100755 --- a/src/gallium/auxiliary/util/u_format_parse.py +++ b/src/gallium/auxiliary/util/u_format_parse.py @@ -78,7 +78,7 @@ class Channel: if self.type == UNSIGNED: return (1 << self.size) - 1 if self.type == SIGNED: - return self.size - 1 + return (1 << (self.size - 1)) - 1 assert False def min(self): @@ -166,17 +166,11 @@ class Format: return True def is_bitmask(self): - if self.block_size() > 32: - return False - if not self.is_pot(): + if self.block_size() not in (8, 16, 32): return False for channel in self.channels: - if not is_pot(channel.size): - return True if channel.type not in (VOID, UNSIGNED, SIGNED): return False - if channel.size >= 32: - return False return True def inv_swizzles(self): diff --git a/src/gallium/auxiliary/util/u_format_table.py b/src/gallium/auxiliary/util/u_format_table.py index 4e29d15f3bb..fb68852a530 100755 --- a/src/gallium/auxiliary/util/u_format_table.py +++ b/src/gallium/auxiliary/util/u_format_table.py @@ -90,11 +90,13 @@ def write_format_table(formats): print 'util_format_none_description = {' print " PIPE_FORMAT_NONE," print " \"PIPE_FORMAT_NONE\"," + print " \"none\"," print " {0, 0, 0}," print " 0," print " 0," print " 0," print " 0," + print " 0," print " {{0, 0, 0}, {0, 0, 0}, {0, 0, 0}, {0, 0, 0}}," print " {0, 0, 0, 0}," print " 0" @@ -105,10 +107,12 @@ def write_format_table(formats): print 'util_format_%s_description = {' % (format.short_name(),) print " %s," % (format.name,) print " \"%s\"," % (format.name,) + print " \"%s\"," % (format.short_name(),) print " {%u, %u, %u},\t/* block */" % (format.block_width, format.block_height, format.block_size()) print " %s," % (layout_map(format.layout),) print " %u,\t/* nr_channels */" % (format.nr_channels(),) print " %s,\t/* is_array */" % (bool_map(format.is_array()),) + print " %s,\t/* is_bitmask */" % (bool_map(format.is_bitmask()),) print " %s,\t/* is_mixed */" % (bool_map(format.is_mixed()),) print " {" for i in range(4): diff --git a/src/gallium/auxiliary/util/u_format_tests.c b/src/gallium/auxiliary/util/u_format_tests.c new file mode 100644 index 00000000000..182a4740448 --- /dev/null +++ b/src/gallium/auxiliary/util/u_format_tests.c @@ -0,0 +1,544 @@ +/************************************************************************** + * + * Copyright 2009-2010 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#include "u_memory.h" +#include "u_format_tests.h" + + +/* + * Helper macros to create the packed bytes for longer words. + */ + +#define PACKED_1x8(x) {x, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} +#define PACKED_2x8(x, y) {x, y, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} +#define PACKED_3x8(x, y, z) {x, y, z, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} +#define PACKED_4x8(x, y, z, w) {x, y, z, w, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} + +#define PACKED_1x16(x) {(x) & 0xff, (x) >> 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} +#define PACKED_2x16(x, y) {(x) & 0xff, (x) >> 8, (y) & 0xff, (y) >> 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} +#define PACKED_3x16(x, y, z) {(x) & 0xff, (x) >> 8, (y) & 0xff, (y) >> 8, (z) & 0xff, (z) >> 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} +#define PACKED_4x16(x, y, z, w) {(x) & 0xff, (x) >> 8, (y) & 0xff, (y) >> 8, (z) & 0xff, (z) >> 8, (w) & 0xff, (w) >> 8, 0, 0, 0, 0, 0, 0, 0, 0} + +#define PACKED_1x32(x) {(x) & 0xff, ((x) >> 8) & 0xff, ((x) >> 16) & 0xff, (x) >> 24, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} +#define PACKED_2x32(x, y) {(x) & 0xff, ((x) >> 8) & 0xff, ((x) >> 16) & 0xff, (x) >> 24, (y) & 0xff, ((y) >> 8) & 0xff, ((y) >> 16) & 0xff, (y) >> 24, 0, 0, 0, 0, 0, 0, 0, 0} +#define PACKED_3x32(x, y, z) {(x) & 0xff, ((x) >> 8) & 0xff, ((x) >> 16) & 0xff, (x) >> 24, (y) & 0xff, ((y) >> 8) & 0xff, ((y) >> 16) & 0xff, (y) >> 24, (z) & 0xff, ((z) >> 8) & 0xff, ((z) >> 16) & 0xff, (z) >> 24, 0, 0, 0, 0} +#define PACKED_4x32(x, y, z, w) {(x) & 0xff, ((x) >> 8) & 0xff, ((x) >> 16) & 0xff, (x) >> 24, (y) & 0xff, ((y) >> 8) & 0xff, ((y) >> 16) & 0xff, (y) >> 24, (z) & 0xff, ((z) >> 8) & 0xff, ((z) >> 16) & 0xff, (z) >> 24, (w) & 0xff, ((w) >> 8) & 0xff, ((w) >> 16) & 0xff, (w) >> 24} + + +/** + * Test cases. + * + * These were manually entered. We could generate these + * + * To keep this to a we cover only the corner cases, which should produce + * good enough coverage since that pixel format transformations are afine for + * non SRGB formats. + */ +const struct util_format_test_case +util_format_test_cases[] = +{ + + /* + * 32-bit rendertarget formats + */ + + {PIPE_FORMAT_B8G8R8A8_UNORM, PACKED_1x32(0xffffffff), PACKED_1x32(0x00000000), {0.0, 0.0, 0.0, 0.0}}, + {PIPE_FORMAT_B8G8R8A8_UNORM, PACKED_1x32(0xffffffff), PACKED_1x32(0x000000ff), {0.0, 0.0, 1.0, 0.0}}, + {PIPE_FORMAT_B8G8R8A8_UNORM, PACKED_1x32(0xffffffff), PACKED_1x32(0x0000ff00), {0.0, 1.0, 0.0, 0.0}}, + {PIPE_FORMAT_B8G8R8A8_UNORM, PACKED_1x32(0xffffffff), PACKED_1x32(0x00ff0000), {1.0, 0.0, 0.0, 0.0}}, + {PIPE_FORMAT_B8G8R8A8_UNORM, PACKED_1x32(0xffffffff), PACKED_1x32(0xff000000), {0.0, 0.0, 0.0, 1.0}}, + {PIPE_FORMAT_B8G8R8A8_UNORM, PACKED_1x32(0xffffffff), PACKED_1x32(0xffffffff), {1.0, 1.0, 1.0, 1.0}}, + + {PIPE_FORMAT_B8G8R8X8_UNORM, PACKED_1x32(0x00ffffff), PACKED_1x32(0x00000000), {0.0, 0.0, 0.0, 1.0}}, + {PIPE_FORMAT_B8G8R8X8_UNORM, PACKED_1x32(0x00ffffff), PACKED_1x32(0x000000ff), {0.0, 0.0, 1.0, 1.0}}, + {PIPE_FORMAT_B8G8R8X8_UNORM, PACKED_1x32(0x00ffffff), PACKED_1x32(0x0000ff00), {0.0, 1.0, 0.0, 1.0}}, + {PIPE_FORMAT_B8G8R8X8_UNORM, PACKED_1x32(0x00ffffff), PACKED_1x32(0x00ff0000), {1.0, 0.0, 0.0, 1.0}}, + {PIPE_FORMAT_B8G8R8X8_UNORM, PACKED_1x32(0x00ffffff), PACKED_1x32(0xff000000), {0.0, 0.0, 0.0, 1.0}}, + {PIPE_FORMAT_B8G8R8X8_UNORM, PACKED_1x32(0x00ffffff), PACKED_1x32(0xffffffff), {1.0, 1.0, 1.0, 1.0}}, + + {PIPE_FORMAT_A8R8G8B8_UNORM, PACKED_1x32(0xffffffff), PACKED_1x32(0x00000000), {0.0, 0.0, 0.0, 0.0}}, + {PIPE_FORMAT_A8R8G8B8_UNORM, PACKED_1x32(0xffffffff), PACKED_1x32(0x000000ff), {0.0, 0.0, 0.0, 1.0}}, + {PIPE_FORMAT_A8R8G8B8_UNORM, PACKED_1x32(0xffffffff), PACKED_1x32(0x0000ff00), {1.0, 0.0, 0.0, 0.0}}, + {PIPE_FORMAT_A8R8G8B8_UNORM, PACKED_1x32(0xffffffff), PACKED_1x32(0x00ff0000), {0.0, 1.0, 0.0, 0.0}}, + {PIPE_FORMAT_A8R8G8B8_UNORM, PACKED_1x32(0xffffffff), PACKED_1x32(0xff000000), {0.0, 0.0, 1.0, 0.0}}, + {PIPE_FORMAT_A8R8G8B8_UNORM, PACKED_1x32(0xffffffff), PACKED_1x32(0xffffffff), {1.0, 1.0, 1.0, 1.0}}, + + {PIPE_FORMAT_X8R8G8B8_UNORM, PACKED_1x32(0xffffff00), PACKED_1x32(0x00000000), {0.0, 0.0, 0.0, 1.0}}, + {PIPE_FORMAT_X8R8G8B8_UNORM, PACKED_1x32(0xffffff00), PACKED_1x32(0x000000ff), {0.0, 0.0, 0.0, 1.0}}, + {PIPE_FORMAT_X8R8G8B8_UNORM, PACKED_1x32(0xffffff00), PACKED_1x32(0x0000ff00), {1.0, 0.0, 0.0, 1.0}}, + {PIPE_FORMAT_X8R8G8B8_UNORM, PACKED_1x32(0xffffff00), PACKED_1x32(0x00ff0000), {0.0, 1.0, 0.0, 1.0}}, + {PIPE_FORMAT_X8R8G8B8_UNORM, PACKED_1x32(0xffffff00), PACKED_1x32(0xff000000), {0.0, 0.0, 1.0, 1.0}}, + {PIPE_FORMAT_X8R8G8B8_UNORM, PACKED_1x32(0xffffff00), PACKED_1x32(0xffffffff), {1.0, 1.0, 1.0, 1.0}}, + + {PIPE_FORMAT_A8B8G8R8_UNORM, PACKED_1x32(0xffffffff), PACKED_1x32(0x00000000), {0.0, 0.0, 0.0, 0.0}}, + {PIPE_FORMAT_A8B8G8R8_UNORM, PACKED_1x32(0xffffffff), PACKED_1x32(0x000000ff), {0.0, 0.0, 0.0, 1.0}}, + {PIPE_FORMAT_A8B8G8R8_UNORM, PACKED_1x32(0xffffffff), PACKED_1x32(0x0000ff00), {0.0, 0.0, 1.0, 0.0}}, + {PIPE_FORMAT_A8B8G8R8_UNORM, PACKED_1x32(0xffffffff), PACKED_1x32(0x00ff0000), {0.0, 1.0, 0.0, 0.0}}, + {PIPE_FORMAT_A8B8G8R8_UNORM, PACKED_1x32(0xffffffff), PACKED_1x32(0xff000000), {1.0, 0.0, 0.0, 0.0}}, + {PIPE_FORMAT_A8B8G8R8_UNORM, PACKED_1x32(0xffffffff), PACKED_1x32(0xffffffff), {1.0, 1.0, 1.0, 1.0}}, + + {PIPE_FORMAT_X8B8G8R8_UNORM, PACKED_1x32(0xffffff00), PACKED_1x32(0x00000000), {0.0, 0.0, 0.0, 1.0}}, + {PIPE_FORMAT_X8B8G8R8_UNORM, PACKED_1x32(0xffffff00), PACKED_1x32(0x000000ff), {0.0, 0.0, 0.0, 1.0}}, + {PIPE_FORMAT_X8B8G8R8_UNORM, PACKED_1x32(0xffffff00), PACKED_1x32(0x0000ff00), {0.0, 0.0, 1.0, 1.0}}, + {PIPE_FORMAT_X8B8G8R8_UNORM, PACKED_1x32(0xffffff00), PACKED_1x32(0x00ff0000), {0.0, 1.0, 0.0, 1.0}}, + {PIPE_FORMAT_X8B8G8R8_UNORM, PACKED_1x32(0xffffff00), PACKED_1x32(0xff000000), {1.0, 0.0, 0.0, 1.0}}, + {PIPE_FORMAT_X8B8G8R8_UNORM, PACKED_1x32(0xffffff00), PACKED_1x32(0xffffffff), {1.0, 1.0, 1.0, 1.0}}, + + {PIPE_FORMAT_R10G10B10A2_UNORM, PACKED_1x32(0xffffffff), PACKED_1x32(0x00000000), {0.0, 0.0, 0.0, 0.0}}, + {PIPE_FORMAT_R10G10B10A2_UNORM, PACKED_1x32(0xffffffff), PACKED_1x32(0x000003ff), {1.0, 0.0, 0.0, 0.0}}, + {PIPE_FORMAT_R10G10B10A2_UNORM, PACKED_1x32(0xffffffff), PACKED_1x32(0x000ffc00), {0.0, 1.0, 0.0, 0.0}}, + {PIPE_FORMAT_R10G10B10A2_UNORM, PACKED_1x32(0xffffffff), PACKED_1x32(0x3ff00000), {0.0, 0.0, 1.0, 0.0}}, + {PIPE_FORMAT_R10G10B10A2_UNORM, PACKED_1x32(0xffffffff), PACKED_1x32(0xc0000000), {0.0, 0.0, 0.0, 1.0}}, + {PIPE_FORMAT_R10G10B10A2_UNORM, PACKED_1x32(0xffffffff), PACKED_1x32(0xffffffff), {1.0, 1.0, 1.0, 1.0}}, + + /* + * 16-bit rendertarget formats + */ + + {PIPE_FORMAT_B5G5R5A1_UNORM, PACKED_1x16(0xffff), PACKED_1x16(0x0000), {0.0, 0.0, 0.0, 0.0}}, + {PIPE_FORMAT_B5G5R5A1_UNORM, PACKED_1x16(0xffff), PACKED_1x16(0x001f), {0.0, 0.0, 1.0, 0.0}}, + {PIPE_FORMAT_B5G5R5A1_UNORM, PACKED_1x16(0xffff), PACKED_1x16(0x03e0), {0.0, 1.0, 0.0, 0.0}}, + {PIPE_FORMAT_B5G5R5A1_UNORM, PACKED_1x16(0xffff), PACKED_1x16(0x7c00), {1.0, 0.0, 0.0, 0.0}}, + {PIPE_FORMAT_B5G5R5A1_UNORM, PACKED_1x16(0xffff), PACKED_1x16(0x8000), {0.0, 0.0, 0.0, 1.0}}, + {PIPE_FORMAT_B5G5R5A1_UNORM, PACKED_1x16(0xffff), PACKED_1x16(0xffff), {1.0, 1.0, 1.0, 1.0}}, + + {PIPE_FORMAT_B4G4R4A4_UNORM, PACKED_1x16(0xffff), PACKED_1x16(0x0000), {0.0, 0.0, 0.0, 0.0}}, + {PIPE_FORMAT_B4G4R4A4_UNORM, PACKED_1x16(0xffff), PACKED_1x16(0x000f), {0.0, 0.0, 1.0, 0.0}}, + {PIPE_FORMAT_B4G4R4A4_UNORM, PACKED_1x16(0xffff), PACKED_1x16(0x00f0), {0.0, 1.0, 0.0, 0.0}}, + {PIPE_FORMAT_B4G4R4A4_UNORM, PACKED_1x16(0xffff), PACKED_1x16(0x0f00), {1.0, 0.0, 0.0, 0.0}}, + {PIPE_FORMAT_B4G4R4A4_UNORM, PACKED_1x16(0xffff), PACKED_1x16(0xf000), {0.0, 0.0, 0.0, 1.0}}, + {PIPE_FORMAT_B4G4R4A4_UNORM, PACKED_1x16(0xffff), PACKED_1x16(0xffff), {1.0, 1.0, 1.0, 1.0}}, + + {PIPE_FORMAT_B5G6R5_UNORM, PACKED_1x16(0xffff), PACKED_1x16(0x0000), {0.0, 0.0, 0.0, 1.0}}, + {PIPE_FORMAT_B5G6R5_UNORM, PACKED_1x16(0xffff), PACKED_1x16(0x001f), {0.0, 0.0, 1.0, 1.0}}, + {PIPE_FORMAT_B5G6R5_UNORM, PACKED_1x16(0xffff), PACKED_1x16(0x07e0), {0.0, 1.0, 0.0, 1.0}}, + {PIPE_FORMAT_B5G6R5_UNORM, PACKED_1x16(0xffff), PACKED_1x16(0xf800), {1.0, 0.0, 0.0, 1.0}}, + {PIPE_FORMAT_B5G6R5_UNORM, PACKED_1x16(0xffff), PACKED_1x16(0xffff), {1.0, 1.0, 1.0, 1.0}}, + + /* + * Luminance/intensity/alpha formats + */ + + {PIPE_FORMAT_L8_UNORM, PACKED_1x8(0xff), PACKED_1x8(0x00), {0.0, 0.0, 0.0, 1.0}}, + {PIPE_FORMAT_L8_UNORM, PACKED_1x8(0xff), PACKED_1x8(0xff), {1.0, 1.0, 1.0, 1.0}}, + + {PIPE_FORMAT_A8_UNORM, PACKED_1x8(0xff), PACKED_1x8(0x00), {0.0, 0.0, 0.0, 0.0}}, + {PIPE_FORMAT_A8_UNORM, PACKED_1x8(0xff), PACKED_1x8(0xff), {0.0, 0.0, 0.0, 1.0}}, + + {PIPE_FORMAT_I8_UNORM, PACKED_1x8(0xff), PACKED_1x8(0x00), {0.0, 0.0, 0.0, 0.0}}, + {PIPE_FORMAT_I8_UNORM, PACKED_1x8(0xff), PACKED_1x8(0xff), {1.0, 1.0, 1.0, 1.0}}, + + {PIPE_FORMAT_L8A8_UNORM, PACKED_1x16(0xffff), PACKED_1x16(0x0000), {0.0, 0.0, 0.0, 0.0}}, + {PIPE_FORMAT_L8A8_UNORM, PACKED_1x16(0xffff), PACKED_1x16(0x00ff), {1.0, 1.0, 1.0, 0.0}}, + {PIPE_FORMAT_L8A8_UNORM, PACKED_1x16(0xffff), PACKED_1x16(0xff00), {0.0, 0.0, 0.0, 1.0}}, + {PIPE_FORMAT_L8A8_UNORM, PACKED_1x16(0xffff), PACKED_1x16(0xffff), {1.0, 1.0, 1.0, 1.0}}, + + {PIPE_FORMAT_L16_UNORM, PACKED_1x16(0xffff), PACKED_1x16(0x0000), {0.0, 0.0, 0.0, 1.0}}, + {PIPE_FORMAT_L16_UNORM, PACKED_1x16(0xffff), PACKED_1x16(0xffff), {1.0, 1.0, 1.0, 1.0}}, + + /* + * TODO: SRGB formats + */ + + /* + * Mixed-signed formats + */ + + {PIPE_FORMAT_R8SG8SB8UX8U_NORM, PACKED_4x8(0xff, 0xff, 0xff, 0x00), PACKED_4x8(0x00, 0x00, 0x00, 0x00), { 0.0, 0.0, 0.0, 1.0}}, + {PIPE_FORMAT_R8SG8SB8UX8U_NORM, PACKED_4x8(0xff, 0xff, 0xff, 0x00), PACKED_4x8(0x7f, 0x00, 0x00, 0x00), { 1.0, 0.0, 0.0, 1.0}}, + {PIPE_FORMAT_R8SG8SB8UX8U_NORM, PACKED_4x8(0xff, 0xff, 0xff, 0x00), PACKED_4x8(0x81, 0x00, 0x00, 0x00), {-1.0, 0.0, 0.0, 1.0}}, + {PIPE_FORMAT_R8SG8SB8UX8U_NORM, PACKED_4x8(0xff, 0xff, 0xff, 0x00), PACKED_4x8(0x00, 0x7f, 0x00, 0x00), { 0.0, 1.0, 0.0, 1.0}}, + {PIPE_FORMAT_R8SG8SB8UX8U_NORM, PACKED_4x8(0xff, 0xff, 0xff, 0x00), PACKED_4x8(0x00, 0x81, 0x00, 0x00), { 0.0, -1.0, 0.0, 1.0}}, + {PIPE_FORMAT_R8SG8SB8UX8U_NORM, PACKED_4x8(0xff, 0xff, 0xff, 0x00), PACKED_4x8(0x00, 0x00, 0xff, 0x00), { 0.0, 0.0, 1.0, 1.0}}, + + {PIPE_FORMAT_R5SG5SB6U_NORM, PACKED_1x16(0xffff), PACKED_1x16(0x0000), { 0.0, 0.0, 0.0, 1.0}}, + {PIPE_FORMAT_R5SG5SB6U_NORM, PACKED_1x16(0xffff), PACKED_1x16(0x000f), { 1.0, 0.0, 0.0, 1.0}}, + {PIPE_FORMAT_R5SG5SB6U_NORM, PACKED_1x16(0xffff), PACKED_1x16(0x0011), {-1.0, 0.0, 0.0, 1.0}}, + {PIPE_FORMAT_R5SG5SB6U_NORM, PACKED_1x16(0xffff), PACKED_1x16(0x01e0), { 0.0, 1.0, 0.0, 1.0}}, + {PIPE_FORMAT_R5SG5SB6U_NORM, PACKED_1x16(0xffff), PACKED_1x16(0x0220), { 0.0, -1.0, 0.0, 1.0}}, + {PIPE_FORMAT_R5SG5SB6U_NORM, PACKED_1x16(0xffff), PACKED_1x16(0xfc00), { 0.0, 0.0, 1.0, 1.0}}, + + /* + * TODO: Depth-stencil formats + */ + + /* + * TODO: YUV formats + */ + + /* + * TODO: Compressed formats + */ + + /* + * Standard 8-bit integer formats + */ + + {PIPE_FORMAT_R8_UNORM, PACKED_1x8(0xff), PACKED_1x8(0x00), {0.0, 0.0, 0.0, 1.0}}, + {PIPE_FORMAT_R8_UNORM, PACKED_1x8(0xff), PACKED_1x8(0xff), {1.0, 0.0, 0.0, 1.0}}, + + {PIPE_FORMAT_R8G8_UNORM, PACKED_2x8(0xff, 0xff), PACKED_2x8(0x00, 0x00), {0.0, 0.0, 0.0, 1.0}}, + {PIPE_FORMAT_R8G8_UNORM, PACKED_2x8(0xff, 0xff), PACKED_2x8(0xff, 0x00), {1.0, 0.0, 0.0, 1.0}}, + {PIPE_FORMAT_R8G8_UNORM, PACKED_2x8(0xff, 0xff), PACKED_2x8(0x00, 0xff), {0.0, 1.0, 0.0, 1.0}}, + {PIPE_FORMAT_R8G8_UNORM, PACKED_2x8(0xff, 0xff), PACKED_2x8(0xff, 0xff), {1.0, 1.0, 0.0, 1.0}}, + + {PIPE_FORMAT_R8G8B8_UNORM, PACKED_3x8(0xff, 0xff, 0xff), PACKED_3x8(0x00, 0x00, 0x00), {0.0, 0.0, 0.0, 1.0}}, + {PIPE_FORMAT_R8G8B8_UNORM, PACKED_3x8(0xff, 0xff, 0xff), PACKED_3x8(0xff, 0x00, 0x00), {1.0, 0.0, 0.0, 1.0}}, + {PIPE_FORMAT_R8G8B8_UNORM, PACKED_3x8(0xff, 0xff, 0xff), PACKED_3x8(0x00, 0xff, 0x00), {0.0, 1.0, 0.0, 1.0}}, + {PIPE_FORMAT_R8G8B8_UNORM, PACKED_3x8(0xff, 0xff, 0xff), PACKED_3x8(0x00, 0x00, 0xff), {0.0, 0.0, 1.0, 1.0}}, + {PIPE_FORMAT_R8G8B8_UNORM, PACKED_3x8(0xff, 0xff, 0xff), PACKED_3x8(0xff, 0xff, 0xff), {1.0, 1.0, 1.0, 1.0}}, + + {PIPE_FORMAT_R8G8B8A8_UNORM, PACKED_4x8(0xff, 0xff, 0xff, 0xff), PACKED_4x8(0x00, 0x00, 0x00, 0x00), {0.0, 0.0, 0.0, 0.0}}, + {PIPE_FORMAT_R8G8B8A8_UNORM, PACKED_4x8(0xff, 0xff, 0xff, 0xff), PACKED_4x8(0xff, 0x00, 0x00, 0x00), {1.0, 0.0, 0.0, 0.0}}, + {PIPE_FORMAT_R8G8B8A8_UNORM, PACKED_4x8(0xff, 0xff, 0xff, 0xff), PACKED_4x8(0x00, 0xff, 0x00, 0x00), {0.0, 1.0, 0.0, 0.0}}, + {PIPE_FORMAT_R8G8B8A8_UNORM, PACKED_4x8(0xff, 0xff, 0xff, 0xff), PACKED_4x8(0x00, 0x00, 0xff, 0x00), {0.0, 0.0, 1.0, 0.0}}, + {PIPE_FORMAT_R8G8B8A8_UNORM, PACKED_4x8(0xff, 0xff, 0xff, 0xff), PACKED_4x8(0x00, 0x00, 0x00, 0xff), {0.0, 0.0, 0.0, 1.0}}, + {PIPE_FORMAT_R8G8B8A8_UNORM, PACKED_4x8(0xff, 0xff, 0xff, 0xff), PACKED_4x8(0xff, 0xff, 0xff, 0xff), {1.0, 1.0, 1.0, 1.0}}, + + {PIPE_FORMAT_R8_USCALED, PACKED_1x8(0xff), PACKED_1x8(0x00), { 0.0, 0.0, 0.0, 1.0}}, + {PIPE_FORMAT_R8_USCALED, PACKED_1x8(0xff), PACKED_1x8(0xff), {255.0, 0.0, 0.0, 1.0}}, + + {PIPE_FORMAT_R8G8_USCALED, PACKED_2x8(0xff, 0xff), PACKED_2x8(0x00, 0x00), { 0.0, 0.0, 0.0, 1.0}}, + {PIPE_FORMAT_R8G8_USCALED, PACKED_2x8(0xff, 0xff), PACKED_2x8(0xff, 0x00), {255.0, 0.0, 0.0, 1.0}}, + {PIPE_FORMAT_R8G8_USCALED, PACKED_2x8(0xff, 0xff), PACKED_2x8(0x00, 0xff), { 0.0, 255.0, 0.0, 1.0}}, + {PIPE_FORMAT_R8G8_USCALED, PACKED_2x8(0xff, 0xff), PACKED_2x8(0xff, 0xff), {255.0, 255.0, 0.0, 1.0}}, + + {PIPE_FORMAT_R8G8B8_USCALED, PACKED_3x8(0xff, 0xff, 0xff), PACKED_3x8(0x00, 0x00, 0x00), { 0.0, 0.0, 0.0, 1.0}}, + {PIPE_FORMAT_R8G8B8_USCALED, PACKED_3x8(0xff, 0xff, 0xff), PACKED_3x8(0xff, 0x00, 0x00), {255.0, 0.0, 0.0, 1.0}}, + {PIPE_FORMAT_R8G8B8_USCALED, PACKED_3x8(0xff, 0xff, 0xff), PACKED_3x8(0x00, 0xff, 0x00), { 0.0, 255.0, 0.0, 1.0}}, + {PIPE_FORMAT_R8G8B8_USCALED, PACKED_3x8(0xff, 0xff, 0xff), PACKED_3x8(0x00, 0x00, 0xff), { 0.0, 0.0, 255.0, 1.0}}, + {PIPE_FORMAT_R8G8B8_USCALED, PACKED_3x8(0xff, 0xff, 0xff), PACKED_3x8(0xff, 0xff, 0xff), {255.0, 255.0, 255.0, 1.0}}, + + {PIPE_FORMAT_R8G8B8A8_USCALED, PACKED_4x8(0xff, 0xff, 0xff, 0xff), PACKED_4x8(0x00, 0x00, 0x00, 0x00), { 0.0, 0.0, 0.0, 0.0}}, + {PIPE_FORMAT_R8G8B8A8_USCALED, PACKED_4x8(0xff, 0xff, 0xff, 0xff), PACKED_4x8(0xff, 0x00, 0x00, 0x00), {255.0, 0.0, 0.0, 0.0}}, + {PIPE_FORMAT_R8G8B8A8_USCALED, PACKED_4x8(0xff, 0xff, 0xff, 0xff), PACKED_4x8(0x00, 0xff, 0x00, 0x00), { 0.0, 255.0, 0.0, 0.0}}, + {PIPE_FORMAT_R8G8B8A8_USCALED, PACKED_4x8(0xff, 0xff, 0xff, 0xff), PACKED_4x8(0x00, 0x00, 0xff, 0x00), { 0.0, 0.0, 255.0, 0.0}}, + {PIPE_FORMAT_R8G8B8A8_USCALED, PACKED_4x8(0xff, 0xff, 0xff, 0xff), PACKED_4x8(0x00, 0x00, 0x00, 0xff), { 0.0, 0.0, 0.0, 255.0}}, + {PIPE_FORMAT_R8G8B8A8_USCALED, PACKED_4x8(0xff, 0xff, 0xff, 0xff), PACKED_4x8(0xff, 0xff, 0xff, 0xff), {255.0, 255.0, 255.0, 255.0}}, + + {PIPE_FORMAT_R8_SNORM, PACKED_1x8(0xff), PACKED_1x8(0x00), { 0.0, 0.0, 0.0, 1.0}}, + {PIPE_FORMAT_R8_SNORM, PACKED_1x8(0xff), PACKED_1x8(0x7f), { 1.0, 0.0, 0.0, 1.0}}, + {PIPE_FORMAT_R8_SNORM, PACKED_1x8(0xff), PACKED_1x8(0x81), {-1.0, 0.0, 0.0, 1.0}}, + + {PIPE_FORMAT_R8G8_SNORM, PACKED_2x8(0xff, 0xff), PACKED_2x8(0x00, 0x00), { 0.0, 0.0, 0.0, 1.0}}, + {PIPE_FORMAT_R8G8_SNORM, PACKED_2x8(0xff, 0xff), PACKED_2x8(0x7f, 0x00), { 1.0, 0.0, 0.0, 1.0}}, + {PIPE_FORMAT_R8G8_SNORM, PACKED_2x8(0xff, 0xff), PACKED_2x8(0x81, 0x00), {-1.0, 0.0, 0.0, 1.0}}, + {PIPE_FORMAT_R8G8_SNORM, PACKED_2x8(0xff, 0xff), PACKED_2x8(0x00, 0x7f), { 0.0, 1.0, 0.0, 1.0}}, + {PIPE_FORMAT_R8G8_SNORM, PACKED_2x8(0xff, 0xff), PACKED_2x8(0x00, 0x81), { 0.0, -1.0, 0.0, 1.0}}, + + {PIPE_FORMAT_R8G8B8_SNORM, PACKED_3x8(0xff, 0xff, 0xff), PACKED_3x8(0x00, 0x00, 0x00), { 0.0, 0.0, 0.0, 1.0}}, + {PIPE_FORMAT_R8G8B8_SNORM, PACKED_3x8(0xff, 0xff, 0xff), PACKED_3x8(0x7f, 0x00, 0x00), { 1.0, 0.0, 0.0, 1.0}}, + {PIPE_FORMAT_R8G8B8_SNORM, PACKED_3x8(0xff, 0xff, 0xff), PACKED_3x8(0x81, 0x00, 0x00), {-1.0, 0.0, 0.0, 1.0}}, + {PIPE_FORMAT_R8G8B8_SNORM, PACKED_3x8(0xff, 0xff, 0xff), PACKED_3x8(0x00, 0x7f, 0x00), { 0.0, 1.0, 0.0, 1.0}}, + {PIPE_FORMAT_R8G8B8_SNORM, PACKED_3x8(0xff, 0xff, 0xff), PACKED_3x8(0x00, 0x81, 0x00), { 0.0, -1.0, 0.0, 1.0}}, + {PIPE_FORMAT_R8G8B8_SNORM, PACKED_3x8(0xff, 0xff, 0xff), PACKED_3x8(0x00, 0x00, 0x7f), { 0.0, 0.0, 1.0, 1.0}}, + {PIPE_FORMAT_R8G8B8_SNORM, PACKED_3x8(0xff, 0xff, 0xff), PACKED_3x8(0x00, 0x00, 0x81), { 0.0, 0.0, -1.0, 1.0}}, + + {PIPE_FORMAT_R8G8B8A8_SNORM, PACKED_4x8(0xff, 0xff, 0xff, 0xff), PACKED_4x8(0x00, 0x00, 0x00, 0x00), { 0.0, 0.0, 0.0, 0.0}}, + {PIPE_FORMAT_R8G8B8A8_SNORM, PACKED_4x8(0xff, 0xff, 0xff, 0xff), PACKED_4x8(0x7f, 0x00, 0x00, 0x00), { 1.0, 0.0, 0.0, 0.0}}, + {PIPE_FORMAT_R8G8B8A8_SNORM, PACKED_4x8(0xff, 0xff, 0xff, 0xff), PACKED_4x8(0x81, 0x00, 0x00, 0x00), {-1.0, 0.0, 0.0, 0.0}}, + {PIPE_FORMAT_R8G8B8A8_SNORM, PACKED_4x8(0xff, 0xff, 0xff, 0xff), PACKED_4x8(0x00, 0x7f, 0x00, 0x00), { 0.0, 1.0, 0.0, 0.0}}, + {PIPE_FORMAT_R8G8B8A8_SNORM, PACKED_4x8(0xff, 0xff, 0xff, 0xff), PACKED_4x8(0x00, 0x81, 0x00, 0x00), { 0.0, -1.0, 0.0, 0.0}}, + {PIPE_FORMAT_R8G8B8A8_SNORM, PACKED_4x8(0xff, 0xff, 0xff, 0xff), PACKED_4x8(0x00, 0x00, 0x7f, 0x00), { 0.0, 0.0, 1.0, 0.0}}, + {PIPE_FORMAT_R8G8B8A8_SNORM, PACKED_4x8(0xff, 0xff, 0xff, 0xff), PACKED_4x8(0x00, 0x00, 0x81, 0x00), { 0.0, 0.0, -1.0, 0.0}}, + {PIPE_FORMAT_R8G8B8A8_SNORM, PACKED_4x8(0xff, 0xff, 0xff, 0xff), PACKED_4x8(0x00, 0x00, 0x00, 0x7f), { 0.0, 0.0, 0.0, 1.0}}, + {PIPE_FORMAT_R8G8B8A8_SNORM, PACKED_4x8(0xff, 0xff, 0xff, 0xff), PACKED_4x8(0x00, 0x00, 0x00, 0x81), { 0.0, 0.0, 0.0, -1.0}}, + + {PIPE_FORMAT_R8_SSCALED, PACKED_1x8(0xff), PACKED_1x8(0x00), { 0.0, 0.0, 0.0, 1.0}}, + {PIPE_FORMAT_R8_SSCALED, PACKED_1x8(0xff), PACKED_1x8(0x7f), { 127.0, 0.0, 0.0, 1.0}}, + {PIPE_FORMAT_R8_SSCALED, PACKED_1x8(0xff), PACKED_1x8(0x80), {-128.0, 0.0, 0.0, 1.0}}, + + {PIPE_FORMAT_R8G8_SSCALED, PACKED_2x8(0xff, 0xff), PACKED_2x8(0x00, 0x00), { 0.0, 0.0, 0.0, 1.0}}, + {PIPE_FORMAT_R8G8_SSCALED, PACKED_2x8(0xff, 0xff), PACKED_2x8(0x7f, 0x00), { 127.0, 0.0, 0.0, 1.0}}, + {PIPE_FORMAT_R8G8_SSCALED, PACKED_2x8(0xff, 0xff), PACKED_2x8(0x80, 0x00), {-128.0, 0.0, 0.0, 1.0}}, + {PIPE_FORMAT_R8G8_SSCALED, PACKED_2x8(0xff, 0xff), PACKED_2x8(0x00, 0x7f), { 0.0, 127.0, 0.0, 1.0}}, + {PIPE_FORMAT_R8G8_SSCALED, PACKED_2x8(0xff, 0xff), PACKED_2x8(0x00, 0x80), { 0.0, -128.0, 0.0, 1.0}}, + + {PIPE_FORMAT_R8G8B8_SSCALED, PACKED_3x8(0xff, 0xff, 0xff), PACKED_3x8(0x00, 0x00, 0x00), { 0.0, 0.0, 0.0, 1.0}}, + {PIPE_FORMAT_R8G8B8_SSCALED, PACKED_3x8(0xff, 0xff, 0xff), PACKED_3x8(0x7f, 0x00, 0x00), { 127.0, 0.0, 0.0, 1.0}}, + {PIPE_FORMAT_R8G8B8_SSCALED, PACKED_3x8(0xff, 0xff, 0xff), PACKED_3x8(0x80, 0x00, 0x00), {-128.0, 0.0, 0.0, 1.0}}, + {PIPE_FORMAT_R8G8B8_SSCALED, PACKED_3x8(0xff, 0xff, 0xff), PACKED_3x8(0x00, 0x7f, 0x00), { 0.0, 127.0, 0.0, 1.0}}, + {PIPE_FORMAT_R8G8B8_SSCALED, PACKED_3x8(0xff, 0xff, 0xff), PACKED_3x8(0x00, 0x80, 0x00), { 0.0, -128.0, 0.0, 1.0}}, + {PIPE_FORMAT_R8G8B8_SSCALED, PACKED_3x8(0xff, 0xff, 0xff), PACKED_3x8(0x00, 0x00, 0x7f), { 0.0, 0.0, 127.0, 1.0}}, + {PIPE_FORMAT_R8G8B8_SSCALED, PACKED_3x8(0xff, 0xff, 0xff), PACKED_3x8(0x00, 0x00, 0x80), { 0.0, 0.0, -128.0, 1.0}}, + + {PIPE_FORMAT_R8G8B8A8_SSCALED, PACKED_4x8(0xff, 0xff, 0xff, 0xff), PACKED_4x8(0x00, 0x00, 0x00, 0x00), { 0.0, 0.0, 0.0, 0.0}}, + {PIPE_FORMAT_R8G8B8A8_SSCALED, PACKED_4x8(0xff, 0xff, 0xff, 0xff), PACKED_4x8(0x7f, 0x00, 0x00, 0x00), { 127.0, 0.0, 0.0, 0.0}}, + {PIPE_FORMAT_R8G8B8A8_SSCALED, PACKED_4x8(0xff, 0xff, 0xff, 0xff), PACKED_4x8(0x80, 0x00, 0x00, 0x00), {-128.0, 0.0, 0.0, 0.0}}, + {PIPE_FORMAT_R8G8B8A8_SSCALED, PACKED_4x8(0xff, 0xff, 0xff, 0xff), PACKED_4x8(0x00, 0x7f, 0x00, 0x00), { 0.0, 127.0, 0.0, 0.0}}, + {PIPE_FORMAT_R8G8B8A8_SSCALED, PACKED_4x8(0xff, 0xff, 0xff, 0xff), PACKED_4x8(0x00, 0x80, 0x00, 0x00), { 0.0, -128.0, 0.0, 0.0}}, + {PIPE_FORMAT_R8G8B8A8_SSCALED, PACKED_4x8(0xff, 0xff, 0xff, 0xff), PACKED_4x8(0x00, 0x00, 0x7f, 0x00), { 0.0, 0.0, 127.0, 0.0}}, + {PIPE_FORMAT_R8G8B8A8_SSCALED, PACKED_4x8(0xff, 0xff, 0xff, 0xff), PACKED_4x8(0x00, 0x00, 0x80, 0x00), { 0.0, 0.0, -128.0, 0.0}}, + {PIPE_FORMAT_R8G8B8A8_SSCALED, PACKED_4x8(0xff, 0xff, 0xff, 0xff), PACKED_4x8(0x00, 0x00, 0x00, 0x7f), { 0.0, 0.0, 0.0, 127.0}}, + {PIPE_FORMAT_R8G8B8A8_SSCALED, PACKED_4x8(0xff, 0xff, 0xff, 0xff), PACKED_4x8(0x00, 0x00, 0x00, 0x80), { 0.0, 0.0, 0.0, -128.0}}, + + /* + * Standard 16-bit integer formats + */ + + {PIPE_FORMAT_R16_UNORM, PACKED_1x16(0xffff), PACKED_1x16(0x0000), {0.0, 0.0, 0.0, 1.0}}, + {PIPE_FORMAT_R16_UNORM, PACKED_1x16(0xffff), PACKED_1x16(0xffff), {1.0, 0.0, 0.0, 1.0}}, + + {PIPE_FORMAT_R16G16_UNORM, PACKED_2x16(0xffff, 0xffff), PACKED_2x16(0x0000, 0x0000), {0.0, 0.0, 0.0, 1.0}}, + {PIPE_FORMAT_R16G16_UNORM, PACKED_2x16(0xffff, 0xffff), PACKED_2x16(0xffff, 0x0000), {1.0, 0.0, 0.0, 1.0}}, + {PIPE_FORMAT_R16G16_UNORM, PACKED_2x16(0xffff, 0xffff), PACKED_2x16(0x0000, 0xffff), {0.0, 1.0, 0.0, 1.0}}, + {PIPE_FORMAT_R16G16_UNORM, PACKED_2x16(0xffff, 0xffff), PACKED_2x16(0xffff, 0xffff), {1.0, 1.0, 0.0, 1.0}}, + + {PIPE_FORMAT_R16G16B16_UNORM, PACKED_3x16(0xffff, 0xffff, 0xffff), PACKED_3x16(0x0000, 0x0000, 0x0000), {0.0, 0.0, 0.0, 1.0}}, + {PIPE_FORMAT_R16G16B16_UNORM, PACKED_3x16(0xffff, 0xffff, 0xffff), PACKED_3x16(0xffff, 0x0000, 0x0000), {1.0, 0.0, 0.0, 1.0}}, + {PIPE_FORMAT_R16G16B16_UNORM, PACKED_3x16(0xffff, 0xffff, 0xffff), PACKED_3x16(0x0000, 0xffff, 0x0000), {0.0, 1.0, 0.0, 1.0}}, + {PIPE_FORMAT_R16G16B16_UNORM, PACKED_3x16(0xffff, 0xffff, 0xffff), PACKED_3x16(0x0000, 0x0000, 0xffff), {0.0, 0.0, 1.0, 1.0}}, + {PIPE_FORMAT_R16G16B16_UNORM, PACKED_3x16(0xffff, 0xffff, 0xffff), PACKED_3x16(0xffff, 0xffff, 0xffff), {1.0, 1.0, 1.0, 1.0}}, + + {PIPE_FORMAT_R16G16B16A16_UNORM, PACKED_4x16(0xffff, 0xffff, 0xffff, 0xffff), PACKED_4x16(0x0000, 0x0000, 0x0000, 0x0000), {0.0, 0.0, 0.0, 0.0}}, + {PIPE_FORMAT_R16G16B16A16_UNORM, PACKED_4x16(0xffff, 0xffff, 0xffff, 0xffff), PACKED_4x16(0xffff, 0x0000, 0x0000, 0x0000), {1.0, 0.0, 0.0, 0.0}}, + {PIPE_FORMAT_R16G16B16A16_UNORM, PACKED_4x16(0xffff, 0xffff, 0xffff, 0xffff), PACKED_4x16(0x0000, 0xffff, 0x0000, 0x0000), {0.0, 1.0, 0.0, 0.0}}, + {PIPE_FORMAT_R16G16B16A16_UNORM, PACKED_4x16(0xffff, 0xffff, 0xffff, 0xffff), PACKED_4x16(0x0000, 0x0000, 0xffff, 0x0000), {0.0, 0.0, 1.0, 0.0}}, + {PIPE_FORMAT_R16G16B16A16_UNORM, PACKED_4x16(0xffff, 0xffff, 0xffff, 0xffff), PACKED_4x16(0x0000, 0x0000, 0x0000, 0xffff), {0.0, 0.0, 0.0, 1.0}}, + {PIPE_FORMAT_R16G16B16A16_UNORM, PACKED_4x16(0xffff, 0xffff, 0xffff, 0xffff), PACKED_4x16(0xffff, 0xffff, 0xffff, 0xffff), {1.0, 1.0, 1.0, 1.0}}, + + {PIPE_FORMAT_R16_USCALED, PACKED_1x16(0xffff), PACKED_1x16(0x0000), { 0.0, 0.0, 0.0, 1.0}}, + {PIPE_FORMAT_R16_USCALED, PACKED_1x16(0xffff), PACKED_1x16(0xffff), {65535.0, 0.0, 0.0, 1.0}}, + + {PIPE_FORMAT_R16G16_USCALED, PACKED_2x16(0xffff, 0xffff), PACKED_2x16(0x0000, 0x0000), { 0.0, 0.0, 0.0, 1.0}}, + {PIPE_FORMAT_R16G16_USCALED, PACKED_2x16(0xffff, 0xffff), PACKED_2x16(0xffff, 0x0000), {65535.0, 0.0, 0.0, 1.0}}, + {PIPE_FORMAT_R16G16_USCALED, PACKED_2x16(0xffff, 0xffff), PACKED_2x16(0x0000, 0xffff), { 0.0, 65535.0, 0.0, 1.0}}, + {PIPE_FORMAT_R16G16_USCALED, PACKED_2x16(0xffff, 0xffff), PACKED_2x16(0xffff, 0xffff), {65535.0, 65535.0, 0.0, 1.0}}, + + {PIPE_FORMAT_R16G16B16_USCALED, PACKED_3x16(0xffff, 0xffff, 0xffff), PACKED_3x16(0x0000, 0x0000, 0x0000), { 0.0, 0.0, 0.0, 1.0}}, + {PIPE_FORMAT_R16G16B16_USCALED, PACKED_3x16(0xffff, 0xffff, 0xffff), PACKED_3x16(0xffff, 0x0000, 0x0000), {65535.0, 0.0, 0.0, 1.0}}, + {PIPE_FORMAT_R16G16B16_USCALED, PACKED_3x16(0xffff, 0xffff, 0xffff), PACKED_3x16(0x0000, 0xffff, 0x0000), { 0.0, 65535.0, 0.0, 1.0}}, + {PIPE_FORMAT_R16G16B16_USCALED, PACKED_3x16(0xffff, 0xffff, 0xffff), PACKED_3x16(0x0000, 0x0000, 0xffff), { 0.0, 0.0, 65535.0, 1.0}}, + {PIPE_FORMAT_R16G16B16_USCALED, PACKED_3x16(0xffff, 0xffff, 0xffff), PACKED_3x16(0xffff, 0xffff, 0xffff), {65535.0, 65535.0, 65535.0, 1.0}}, + + {PIPE_FORMAT_R16G16B16A16_USCALED, PACKED_4x16(0xffff, 0xffff, 0xffff, 0xffff), PACKED_4x16(0x0000, 0x0000, 0x0000, 0x0000), { 0.0, 0.0, 0.0, 0.0}}, + {PIPE_FORMAT_R16G16B16A16_USCALED, PACKED_4x16(0xffff, 0xffff, 0xffff, 0xffff), PACKED_4x16(0xffff, 0x0000, 0x0000, 0x0000), {65535.0, 0.0, 0.0, 0.0}}, + {PIPE_FORMAT_R16G16B16A16_USCALED, PACKED_4x16(0xffff, 0xffff, 0xffff, 0xffff), PACKED_4x16(0x0000, 0xffff, 0x0000, 0x0000), { 0.0, 65535.0, 0.0, 0.0}}, + {PIPE_FORMAT_R16G16B16A16_USCALED, PACKED_4x16(0xffff, 0xffff, 0xffff, 0xffff), PACKED_4x16(0x0000, 0x0000, 0xffff, 0x0000), { 0.0, 0.0, 65535.0, 0.0}}, + {PIPE_FORMAT_R16G16B16A16_USCALED, PACKED_4x16(0xffff, 0xffff, 0xffff, 0xffff), PACKED_4x16(0x0000, 0x0000, 0x0000, 0xffff), { 0.0, 0.0, 0.0, 65535.0}}, + {PIPE_FORMAT_R16G16B16A16_USCALED, PACKED_4x16(0xffff, 0xffff, 0xffff, 0xffff), PACKED_4x16(0xffff, 0xffff, 0xffff, 0xffff), {65535.0, 65535.0, 65535.0, 65535.0}}, + + {PIPE_FORMAT_R16_SNORM, PACKED_1x16(0xffff), PACKED_1x16(0x0000), { 0.0, 0.0, 0.0, 1.0}}, + {PIPE_FORMAT_R16_SNORM, PACKED_1x16(0xffff), PACKED_1x16(0x7fff), { 1.0, 0.0, 0.0, 1.0}}, + {PIPE_FORMAT_R16_SNORM, PACKED_1x16(0xffff), PACKED_1x16(0x8001), { -1.0, 0.0, 0.0, 1.0}}, + + {PIPE_FORMAT_R16G16_SNORM, PACKED_2x16(0xffff, 0xffff), PACKED_2x16(0x0000, 0x0000), { 0.0, 0.0, 0.0, 1.0}}, + {PIPE_FORMAT_R16G16_SNORM, PACKED_2x16(0xffff, 0xffff), PACKED_2x16(0x7fff, 0x0000), { 1.0, 0.0, 0.0, 1.0}}, + {PIPE_FORMAT_R16G16_SNORM, PACKED_2x16(0xffff, 0xffff), PACKED_2x16(0x8001, 0x0000), { -1.0, 0.0, 0.0, 1.0}}, + {PIPE_FORMAT_R16G16_SNORM, PACKED_2x16(0xffff, 0xffff), PACKED_2x16(0x0000, 0x7fff), { 0.0, 1.0, 0.0, 1.0}}, + {PIPE_FORMAT_R16G16_SNORM, PACKED_2x16(0xffff, 0xffff), PACKED_2x16(0x0000, 0x8001), { 0.0, -1.0, 0.0, 1.0}}, + + {PIPE_FORMAT_R16G16B16_SNORM, PACKED_3x16(0xffff, 0xffff, 0xffff), PACKED_3x16(0x0000, 0x0000, 0x0000), { 0.0, 0.0, 0.0, 1.0}}, + {PIPE_FORMAT_R16G16B16_SNORM, PACKED_3x16(0xffff, 0xffff, 0xffff), PACKED_3x16(0x7fff, 0x0000, 0x0000), { 1.0, 0.0, 0.0, 1.0}}, + {PIPE_FORMAT_R16G16B16_SNORM, PACKED_3x16(0xffff, 0xffff, 0xffff), PACKED_3x16(0x8001, 0x0000, 0x0000), { -1.0, 0.0, 0.0, 1.0}}, + {PIPE_FORMAT_R16G16B16_SNORM, PACKED_3x16(0xffff, 0xffff, 0xffff), PACKED_3x16(0x0000, 0x7fff, 0x0000), { 0.0, 1.0, 0.0, 1.0}}, + {PIPE_FORMAT_R16G16B16_SNORM, PACKED_3x16(0xffff, 0xffff, 0xffff), PACKED_3x16(0x0000, 0x8001, 0x0000), { 0.0, -1.0, 0.0, 1.0}}, + {PIPE_FORMAT_R16G16B16_SNORM, PACKED_3x16(0xffff, 0xffff, 0xffff), PACKED_3x16(0x0000, 0x0000, 0x7fff), { 0.0, 0.0, 1.0, 1.0}}, + {PIPE_FORMAT_R16G16B16_SNORM, PACKED_3x16(0xffff, 0xffff, 0xffff), PACKED_3x16(0x0000, 0x0000, 0x8001), { 0.0, 0.0, -1.0, 1.0}}, + + {PIPE_FORMAT_R16G16B16A16_SNORM, PACKED_4x16(0xffff, 0xffff, 0xffff, 0xffff), PACKED_4x16(0x0000, 0x0000, 0x0000, 0x0000), { 0.0, 0.0, 0.0, 0.0}}, + {PIPE_FORMAT_R16G16B16A16_SNORM, PACKED_4x16(0xffff, 0xffff, 0xffff, 0xffff), PACKED_4x16(0x7fff, 0x0000, 0x0000, 0x0000), { 1.0, 0.0, 0.0, 0.0}}, + {PIPE_FORMAT_R16G16B16A16_SNORM, PACKED_4x16(0xffff, 0xffff, 0xffff, 0xffff), PACKED_4x16(0x8001, 0x0000, 0x0000, 0x0000), { -1.0, 0.0, 0.0, 0.0}}, + {PIPE_FORMAT_R16G16B16A16_SNORM, PACKED_4x16(0xffff, 0xffff, 0xffff, 0xffff), PACKED_4x16(0x0000, 0x7fff, 0x0000, 0x0000), { 0.0, 1.0, 0.0, 0.0}}, + {PIPE_FORMAT_R16G16B16A16_SNORM, PACKED_4x16(0xffff, 0xffff, 0xffff, 0xffff), PACKED_4x16(0x0000, 0x8001, 0x0000, 0x0000), { 0.0, -1.0, 0.0, 0.0}}, + {PIPE_FORMAT_R16G16B16A16_SNORM, PACKED_4x16(0xffff, 0xffff, 0xffff, 0xffff), PACKED_4x16(0x0000, 0x0000, 0x7fff, 0x0000), { 0.0, 0.0, 1.0, 0.0}}, + {PIPE_FORMAT_R16G16B16A16_SNORM, PACKED_4x16(0xffff, 0xffff, 0xffff, 0xffff), PACKED_4x16(0x0000, 0x0000, 0x8001, 0x0000), { 0.0, 0.0, -1.0, 0.0}}, + {PIPE_FORMAT_R16G16B16A16_SNORM, PACKED_4x16(0xffff, 0xffff, 0xffff, 0xffff), PACKED_4x16(0x0000, 0x0000, 0x0000, 0x7fff), { 0.0, 0.0, 0.0, 1.0}}, + {PIPE_FORMAT_R16G16B16A16_SNORM, PACKED_4x16(0xffff, 0xffff, 0xffff, 0xffff), PACKED_4x16(0x0000, 0x0000, 0x0000, 0x8001), { 0.0, 0.0, 0.0, -1.0}}, + + {PIPE_FORMAT_R16_SSCALED, PACKED_1x16(0xffff), PACKED_1x16(0x0000), { 0.0, 0.0, 0.0, 1.0}}, + {PIPE_FORMAT_R16_SSCALED, PACKED_1x16(0xffff), PACKED_1x16(0x7fff), { 32767.0, 0.0, 0.0, 1.0}}, + {PIPE_FORMAT_R16_SSCALED, PACKED_1x16(0xffff), PACKED_1x16(0x8000), {-32768.0, 0.0, 0.0, 1.0}}, + + {PIPE_FORMAT_R16G16_SSCALED, PACKED_2x16(0xffff, 0xffff), PACKED_2x16(0x0000, 0x0000), { 0.0, 0.0, 0.0, 1.0}}, + {PIPE_FORMAT_R16G16_SSCALED, PACKED_2x16(0xffff, 0xffff), PACKED_2x16(0x7fff, 0x0000), { 32767.0, 0.0, 0.0, 1.0}}, + {PIPE_FORMAT_R16G16_SSCALED, PACKED_2x16(0xffff, 0xffff), PACKED_2x16(0x8000, 0x0000), {-32768.0, 0.0, 0.0, 1.0}}, + {PIPE_FORMAT_R16G16_SSCALED, PACKED_2x16(0xffff, 0xffff), PACKED_2x16(0x0000, 0x7fff), { 0.0, 32767.0, 0.0, 1.0}}, + {PIPE_FORMAT_R16G16_SSCALED, PACKED_2x16(0xffff, 0xffff), PACKED_2x16(0x0000, 0x8000), { 0.0, -32768.0, 0.0, 1.0}}, + + {PIPE_FORMAT_R16G16B16_SSCALED, PACKED_3x16(0xffff, 0xffff, 0xffff), PACKED_3x16(0x0000, 0x0000, 0x0000), { 0.0, 0.0, 0.0, 1.0}}, + {PIPE_FORMAT_R16G16B16_SSCALED, PACKED_3x16(0xffff, 0xffff, 0xffff), PACKED_3x16(0x7fff, 0x0000, 0x0000), { 32767.0, 0.0, 0.0, 1.0}}, + {PIPE_FORMAT_R16G16B16_SSCALED, PACKED_3x16(0xffff, 0xffff, 0xffff), PACKED_3x16(0x8000, 0x0000, 0x0000), {-32768.0, 0.0, 0.0, 1.0}}, + {PIPE_FORMAT_R16G16B16_SSCALED, PACKED_3x16(0xffff, 0xffff, 0xffff), PACKED_3x16(0x0000, 0x7fff, 0x0000), { 0.0, 32767.0, 0.0, 1.0}}, + {PIPE_FORMAT_R16G16B16_SSCALED, PACKED_3x16(0xffff, 0xffff, 0xffff), PACKED_3x16(0x0000, 0x8000, 0x0000), { 0.0, -32768.0, 0.0, 1.0}}, + {PIPE_FORMAT_R16G16B16_SSCALED, PACKED_3x16(0xffff, 0xffff, 0xffff), PACKED_3x16(0x0000, 0x0000, 0x7fff), { 0.0, 0.0, 32767.0, 1.0}}, + {PIPE_FORMAT_R16G16B16_SSCALED, PACKED_3x16(0xffff, 0xffff, 0xffff), PACKED_3x16(0x0000, 0x0000, 0x8000), { 0.0, 0.0, -32768.0, 1.0}}, + + {PIPE_FORMAT_R16G16B16A16_SSCALED, PACKED_4x16(0xffff, 0xffff, 0xffff, 0xffff), PACKED_4x16(0x0000, 0x0000, 0x0000, 0x0000), { 0.0, 0.0, 0.0, 0.0}}, + {PIPE_FORMAT_R16G16B16A16_SSCALED, PACKED_4x16(0xffff, 0xffff, 0xffff, 0xffff), PACKED_4x16(0x7fff, 0x0000, 0x0000, 0x0000), { 32767.0, 0.0, 0.0, 0.0}}, + {PIPE_FORMAT_R16G16B16A16_SSCALED, PACKED_4x16(0xffff, 0xffff, 0xffff, 0xffff), PACKED_4x16(0x8000, 0x0000, 0x0000, 0x0000), {-32768.0, 0.0, 0.0, 0.0}}, + {PIPE_FORMAT_R16G16B16A16_SSCALED, PACKED_4x16(0xffff, 0xffff, 0xffff, 0xffff), PACKED_4x16(0x0000, 0x7fff, 0x0000, 0x0000), { 0.0, 32767.0, 0.0, 0.0}}, + {PIPE_FORMAT_R16G16B16A16_SSCALED, PACKED_4x16(0xffff, 0xffff, 0xffff, 0xffff), PACKED_4x16(0x0000, 0x8000, 0x0000, 0x0000), { 0.0, -32768.0, 0.0, 0.0}}, + {PIPE_FORMAT_R16G16B16A16_SSCALED, PACKED_4x16(0xffff, 0xffff, 0xffff, 0xffff), PACKED_4x16(0x0000, 0x0000, 0x7fff, 0x0000), { 0.0, 0.0, 32767.0, 0.0}}, + {PIPE_FORMAT_R16G16B16A16_SSCALED, PACKED_4x16(0xffff, 0xffff, 0xffff, 0xffff), PACKED_4x16(0x0000, 0x0000, 0x8000, 0x0000), { 0.0, 0.0, -32768.0, 0.0}}, + {PIPE_FORMAT_R16G16B16A16_SSCALED, PACKED_4x16(0xffff, 0xffff, 0xffff, 0xffff), PACKED_4x16(0x0000, 0x0000, 0x0000, 0x7fff), { 0.0, 0.0, 0.0, 32767.0}}, + {PIPE_FORMAT_R16G16B16A16_SSCALED, PACKED_4x16(0xffff, 0xffff, 0xffff, 0xffff), PACKED_4x16(0x0000, 0x0000, 0x0000, 0x8000), { 0.0, 0.0, 0.0, -32768.0}}, + + /* + * Standard 32-bit integer formats + * + * NOTE: We can't accurately represent integers larger than +/-0x1000000 + * with single precision floats, so that's as far as we test. + */ + + {PIPE_FORMAT_R32_UNORM, PACKED_1x32(0xffffffff), PACKED_1x32(0x00000000), {0.0, 0.0, 0.0, 1.0}}, + {PIPE_FORMAT_R32_UNORM, PACKED_1x32(0xffffffff), PACKED_1x32(0xffffffff), {1.0, 0.0, 0.0, 1.0}}, + + {PIPE_FORMAT_R32G32_UNORM, PACKED_2x32(0xffffffff, 0xffffffff), PACKED_2x32(0x00000000, 0x00000000), {0.0, 0.0, 0.0, 1.0}}, + {PIPE_FORMAT_R32G32_UNORM, PACKED_2x32(0xffffffff, 0xffffffff), PACKED_2x32(0xffffffff, 0x00000000), {1.0, 0.0, 0.0, 1.0}}, + {PIPE_FORMAT_R32G32_UNORM, PACKED_2x32(0xffffffff, 0xffffffff), PACKED_2x32(0x00000000, 0xffffffff), {0.0, 1.0, 0.0, 1.0}}, + {PIPE_FORMAT_R32G32_UNORM, PACKED_2x32(0xffffffff, 0xffffffff), PACKED_2x32(0xffffffff, 0xffffffff), {1.0, 1.0, 0.0, 1.0}}, + + {PIPE_FORMAT_R32G32B32_UNORM, PACKED_3x32(0xffffffff, 0xffffffff, 0xffffffff), PACKED_3x32(0x00000000, 0x00000000, 0x00000000), {0.0, 0.0, 0.0, 1.0}}, + {PIPE_FORMAT_R32G32B32_UNORM, PACKED_3x32(0xffffffff, 0xffffffff, 0xffffffff), PACKED_3x32(0xffffffff, 0x00000000, 0x00000000), {1.0, 0.0, 0.0, 1.0}}, + {PIPE_FORMAT_R32G32B32_UNORM, PACKED_3x32(0xffffffff, 0xffffffff, 0xffffffff), PACKED_3x32(0x00000000, 0xffffffff, 0x00000000), {0.0, 1.0, 0.0, 1.0}}, + {PIPE_FORMAT_R32G32B32_UNORM, PACKED_3x32(0xffffffff, 0xffffffff, 0xffffffff), PACKED_3x32(0x00000000, 0x00000000, 0xffffffff), {0.0, 0.0, 1.0, 1.0}}, + {PIPE_FORMAT_R32G32B32_UNORM, PACKED_3x32(0xffffffff, 0xffffffff, 0xffffffff), PACKED_3x32(0xffffffff, 0xffffffff, 0xffffffff), {1.0, 1.0, 1.0, 1.0}}, + + {PIPE_FORMAT_R32G32B32A32_UNORM, PACKED_4x32(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff), PACKED_4x32(0x00000000, 0x00000000, 0x00000000, 0x00000000), {0.0, 0.0, 0.0, 0.0}}, + {PIPE_FORMAT_R32G32B32A32_UNORM, PACKED_4x32(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff), PACKED_4x32(0xffffffff, 0x00000000, 0x00000000, 0x00000000), {1.0, 0.0, 0.0, 0.0}}, + {PIPE_FORMAT_R32G32B32A32_UNORM, PACKED_4x32(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff), PACKED_4x32(0x00000000, 0xffffffff, 0x00000000, 0x00000000), {0.0, 1.0, 0.0, 0.0}}, + {PIPE_FORMAT_R32G32B32A32_UNORM, PACKED_4x32(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff), PACKED_4x32(0x00000000, 0x00000000, 0xffffffff, 0x00000000), {0.0, 0.0, 1.0, 0.0}}, + {PIPE_FORMAT_R32G32B32A32_UNORM, PACKED_4x32(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff), PACKED_4x32(0x00000000, 0x00000000, 0x00000000, 0xffffffff), {0.0, 0.0, 0.0, 1.0}}, + {PIPE_FORMAT_R32G32B32A32_UNORM, PACKED_4x32(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff), PACKED_4x32(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff), {1.0, 1.0, 1.0, 1.0}}, + + {PIPE_FORMAT_R32_USCALED, PACKED_1x32(0xffffffff), PACKED_1x32(0x00000000), { 0.0, 0.0, 0.0, 1.0}}, + {PIPE_FORMAT_R32_USCALED, PACKED_1x32(0xffffffff), PACKED_1x32(0x01000000), {16777216.0, 0.0, 0.0, 1.0}}, + + {PIPE_FORMAT_R32G32_USCALED, PACKED_2x32(0xffffffff, 0xffffffff), PACKED_2x32(0x00000000, 0x00000000), { 0.0, 0.0, 0.0, 1.0}}, + {PIPE_FORMAT_R32G32_USCALED, PACKED_2x32(0xffffffff, 0xffffffff), PACKED_2x32(0x01000000, 0x00000000), {16777216.0, 0.0, 0.0, 1.0}}, + {PIPE_FORMAT_R32G32_USCALED, PACKED_2x32(0xffffffff, 0xffffffff), PACKED_2x32(0x00000000, 0x01000000), { 0.0, 16777216.0, 0.0, 1.0}}, + {PIPE_FORMAT_R32G32_USCALED, PACKED_2x32(0xffffffff, 0xffffffff), PACKED_2x32(0x01000000, 0x01000000), {16777216.0, 16777216.0, 0.0, 1.0}}, + + {PIPE_FORMAT_R32G32B32_USCALED, PACKED_3x32(0xffffffff, 0xffffffff, 0xffffffff), PACKED_3x32(0x00000000, 0x00000000, 0x00000000), { 0.0, 0.0, 0.0, 1.0}}, + {PIPE_FORMAT_R32G32B32_USCALED, PACKED_3x32(0xffffffff, 0xffffffff, 0xffffffff), PACKED_3x32(0x01000000, 0x00000000, 0x00000000), {16777216.0, 0.0, 0.0, 1.0}}, + {PIPE_FORMAT_R32G32B32_USCALED, PACKED_3x32(0xffffffff, 0xffffffff, 0xffffffff), PACKED_3x32(0x00000000, 0x01000000, 0x00000000), { 0.0, 16777216.0, 0.0, 1.0}}, + {PIPE_FORMAT_R32G32B32_USCALED, PACKED_3x32(0xffffffff, 0xffffffff, 0xffffffff), PACKED_3x32(0x00000000, 0x00000000, 0x01000000), { 0.0, 0.0, 16777216.0, 1.0}}, + {PIPE_FORMAT_R32G32B32_USCALED, PACKED_3x32(0xffffffff, 0xffffffff, 0xffffffff), PACKED_3x32(0x01000000, 0x01000000, 0x01000000), {16777216.0, 16777216.0, 16777216.0, 1.0}}, + + {PIPE_FORMAT_R32G32B32A32_USCALED, PACKED_4x32(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff), PACKED_4x32(0x00000000, 0x00000000, 0x00000000, 0x00000000), { 0.0, 0.0, 0.0, 0.0}}, + {PIPE_FORMAT_R32G32B32A32_USCALED, PACKED_4x32(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff), PACKED_4x32(0x01000000, 0x00000000, 0x00000000, 0x00000000), {16777216.0, 0.0, 0.0, 0.0}}, + {PIPE_FORMAT_R32G32B32A32_USCALED, PACKED_4x32(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff), PACKED_4x32(0x00000000, 0x01000000, 0x00000000, 0x00000000), { 0.0, 16777216.0, 0.0, 0.0}}, + {PIPE_FORMAT_R32G32B32A32_USCALED, PACKED_4x32(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff), PACKED_4x32(0x00000000, 0x00000000, 0x01000000, 0x00000000), { 0.0, 0.0, 16777216.0, 0.0}}, + {PIPE_FORMAT_R32G32B32A32_USCALED, PACKED_4x32(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff), PACKED_4x32(0x00000000, 0x00000000, 0x00000000, 0x01000000), { 0.0, 0.0, 0.0, 16777216.0}}, + {PIPE_FORMAT_R32G32B32A32_USCALED, PACKED_4x32(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff), PACKED_4x32(0x01000000, 0x01000000, 0x01000000, 0x01000000), {16777216.0, 16777216.0, 16777216.0, 16777216.0}}, + + {PIPE_FORMAT_R32_SNORM, PACKED_1x32(0xffffffff), PACKED_1x32(0x00000000), { 0.0, 0.0, 0.0, 1.0}}, + {PIPE_FORMAT_R32_SNORM, PACKED_1x32(0xffffffff), PACKED_1x32(0x7fffffff), { 1.0, 0.0, 0.0, 1.0}}, + {PIPE_FORMAT_R32_SNORM, PACKED_1x32(0xffffffff), PACKED_1x32(0x80000001), { -1.0, 0.0, 0.0, 1.0}}, + + {PIPE_FORMAT_R32G32_SNORM, PACKED_2x32(0xffffffff, 0xffffffff), PACKED_2x32(0x00000000, 0x00000000), { 0.0, 0.0, 0.0, 1.0}}, + {PIPE_FORMAT_R32G32_SNORM, PACKED_2x32(0xffffffff, 0xffffffff), PACKED_2x32(0x7fffffff, 0x00000000), { 1.0, 0.0, 0.0, 1.0}}, + {PIPE_FORMAT_R32G32_SNORM, PACKED_2x32(0xffffffff, 0xffffffff), PACKED_2x32(0x80000001, 0x00000000), { -1.0, 0.0, 0.0, 1.0}}, + {PIPE_FORMAT_R32G32_SNORM, PACKED_2x32(0xffffffff, 0xffffffff), PACKED_2x32(0x00000000, 0x7fffffff), { 0.0, 1.0, 0.0, 1.0}}, + {PIPE_FORMAT_R32G32_SNORM, PACKED_2x32(0xffffffff, 0xffffffff), PACKED_2x32(0x00000000, 0x80000001), { 0.0, -1.0, 0.0, 1.0}}, + + {PIPE_FORMAT_R32G32B32_SNORM, PACKED_3x32(0xffffffff, 0xffffffff, 0xffffffff), PACKED_3x32(0x00000000, 0x00000000, 0x00000000), { 0.0, 0.0, 0.0, 1.0}}, + {PIPE_FORMAT_R32G32B32_SNORM, PACKED_3x32(0xffffffff, 0xffffffff, 0xffffffff), PACKED_3x32(0x7fffffff, 0x00000000, 0x00000000), { 1.0, 0.0, 0.0, 1.0}}, + {PIPE_FORMAT_R32G32B32_SNORM, PACKED_3x32(0xffffffff, 0xffffffff, 0xffffffff), PACKED_3x32(0x80000001, 0x00000000, 0x00000000), { -1.0, 0.0, 0.0, 1.0}}, + {PIPE_FORMAT_R32G32B32_SNORM, PACKED_3x32(0xffffffff, 0xffffffff, 0xffffffff), PACKED_3x32(0x00000000, 0x7fffffff, 0x00000000), { 0.0, 1.0, 0.0, 1.0}}, + {PIPE_FORMAT_R32G32B32_SNORM, PACKED_3x32(0xffffffff, 0xffffffff, 0xffffffff), PACKED_3x32(0x00000000, 0x80000001, 0x00000000), { 0.0, -1.0, 0.0, 1.0}}, + {PIPE_FORMAT_R32G32B32_SNORM, PACKED_3x32(0xffffffff, 0xffffffff, 0xffffffff), PACKED_3x32(0x00000000, 0x00000000, 0x7fffffff), { 0.0, 0.0, 1.0, 1.0}}, + {PIPE_FORMAT_R32G32B32_SNORM, PACKED_3x32(0xffffffff, 0xffffffff, 0xffffffff), PACKED_3x32(0x00000000, 0x00000000, 0x80000001), { 0.0, 0.0, -1.0, 1.0}}, + + {PIPE_FORMAT_R32G32B32A32_SNORM, PACKED_4x32(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff), PACKED_4x32(0x00000000, 0x00000000, 0x00000000, 0x00000000), { 0.0, 0.0, 0.0, 0.0}}, + {PIPE_FORMAT_R32G32B32A32_SNORM, PACKED_4x32(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff), PACKED_4x32(0x7fffffff, 0x00000000, 0x00000000, 0x00000000), { 1.0, 0.0, 0.0, 0.0}}, + {PIPE_FORMAT_R32G32B32A32_SNORM, PACKED_4x32(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff), PACKED_4x32(0x80000001, 0x00000000, 0x00000000, 0x00000000), { -1.0, 0.0, 0.0, 0.0}}, + {PIPE_FORMAT_R32G32B32A32_SNORM, PACKED_4x32(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff), PACKED_4x32(0x00000000, 0x7fffffff, 0x00000000, 0x00000000), { 0.0, 1.0, 0.0, 0.0}}, + {PIPE_FORMAT_R32G32B32A32_SNORM, PACKED_4x32(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff), PACKED_4x32(0x00000000, 0x80000001, 0x00000000, 0x00000000), { 0.0, -1.0, 0.0, 0.0}}, + {PIPE_FORMAT_R32G32B32A32_SNORM, PACKED_4x32(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff), PACKED_4x32(0x00000000, 0x00000000, 0x7fffffff, 0x00000000), { 0.0, 0.0, 1.0, 0.0}}, + {PIPE_FORMAT_R32G32B32A32_SNORM, PACKED_4x32(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff), PACKED_4x32(0x00000000, 0x00000000, 0x80000001, 0x00000000), { 0.0, 0.0, -1.0, 0.0}}, + {PIPE_FORMAT_R32G32B32A32_SNORM, PACKED_4x32(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff), PACKED_4x32(0x00000000, 0x00000000, 0x00000000, 0x7fffffff), { 0.0, 0.0, 0.0, 1.0}}, + {PIPE_FORMAT_R32G32B32A32_SNORM, PACKED_4x32(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff), PACKED_4x32(0x00000000, 0x00000000, 0x00000000, 0x80000001), { 0.0, 0.0, 0.0, -1.0}}, + + {PIPE_FORMAT_R32_SSCALED, PACKED_1x32(0xffffffff), PACKED_1x32(0x00000000), { 0.0, 0.0, 0.0, 1.0}}, + {PIPE_FORMAT_R32_SSCALED, PACKED_1x32(0xffffffff), PACKED_1x32(0x01000000), { 16777216.0, 0.0, 0.0, 1.0}}, + {PIPE_FORMAT_R32_SSCALED, PACKED_1x32(0xffffffff), PACKED_1x32(0xff000000), {-16777216.0, 0.0, 0.0, 1.0}}, + + {PIPE_FORMAT_R32G32_SSCALED, PACKED_2x32(0xffffffff, 0xffffffff), PACKED_2x32(0x00000000, 0x00000000), { 0.0, 0.0, 0.0, 1.0}}, + {PIPE_FORMAT_R32G32_SSCALED, PACKED_2x32(0xffffffff, 0xffffffff), PACKED_2x32(0x01000000, 0x00000000), { 16777216.0, 0.0, 0.0, 1.0}}, + {PIPE_FORMAT_R32G32_SSCALED, PACKED_2x32(0xffffffff, 0xffffffff), PACKED_2x32(0xff000000, 0x00000000), {-16777216.0, 0.0, 0.0, 1.0}}, + {PIPE_FORMAT_R32G32_SSCALED, PACKED_2x32(0xffffffff, 0xffffffff), PACKED_2x32(0x00000000, 0x01000000), { 0.0, 16777216.0, 0.0, 1.0}}, + {PIPE_FORMAT_R32G32_SSCALED, PACKED_2x32(0xffffffff, 0xffffffff), PACKED_2x32(0x00000000, 0xff000000), { 0.0, -16777216.0, 0.0, 1.0}}, + + {PIPE_FORMAT_R32G32B32_SSCALED, PACKED_3x32(0xffffffff, 0xffffffff, 0xffffffff), PACKED_3x32(0x00000000, 0x00000000, 0x00000000), { 0.0, 0.0, 0.0, 1.0}}, + {PIPE_FORMAT_R32G32B32_SSCALED, PACKED_3x32(0xffffffff, 0xffffffff, 0xffffffff), PACKED_3x32(0x01000000, 0x00000000, 0x00000000), { 16777216.0, 0.0, 0.0, 1.0}}, + {PIPE_FORMAT_R32G32B32_SSCALED, PACKED_3x32(0xffffffff, 0xffffffff, 0xffffffff), PACKED_3x32(0xff000000, 0x00000000, 0x00000000), {-16777216.0, 0.0, 0.0, 1.0}}, + {PIPE_FORMAT_R32G32B32_SSCALED, PACKED_3x32(0xffffffff, 0xffffffff, 0xffffffff), PACKED_3x32(0x00000000, 0x01000000, 0x00000000), { 0.0, 16777216.0, 0.0, 1.0}}, + {PIPE_FORMAT_R32G32B32_SSCALED, PACKED_3x32(0xffffffff, 0xffffffff, 0xffffffff), PACKED_3x32(0x00000000, 0xff000000, 0x00000000), { 0.0, -16777216.0, 0.0, 1.0}}, + {PIPE_FORMAT_R32G32B32_SSCALED, PACKED_3x32(0xffffffff, 0xffffffff, 0xffffffff), PACKED_3x32(0x00000000, 0x00000000, 0x01000000), { 0.0, 0.0, 16777216.0, 1.0}}, + {PIPE_FORMAT_R32G32B32_SSCALED, PACKED_3x32(0xffffffff, 0xffffffff, 0xffffffff), PACKED_3x32(0x00000000, 0x00000000, 0xff000000), { 0.0, 0.0, -16777216.0, 1.0}}, + + {PIPE_FORMAT_R32G32B32A32_SSCALED, PACKED_4x32(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff), PACKED_4x32(0x00000000, 0x00000000, 0x00000000, 0x00000000), { 0.0, 0.0, 0.0, 0.0}}, + {PIPE_FORMAT_R32G32B32A32_SSCALED, PACKED_4x32(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff), PACKED_4x32(0x01000000, 0x00000000, 0x00000000, 0x00000000), { 16777216.0, 0.0, 0.0, 0.0}}, + {PIPE_FORMAT_R32G32B32A32_SSCALED, PACKED_4x32(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff), PACKED_4x32(0xff000000, 0x00000000, 0x00000000, 0x00000000), {-16777216.0, 0.0, 0.0, 0.0}}, + {PIPE_FORMAT_R32G32B32A32_SSCALED, PACKED_4x32(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff), PACKED_4x32(0x00000000, 0x01000000, 0x00000000, 0x00000000), { 0.0, 16777216.0, 0.0, 0.0}}, + {PIPE_FORMAT_R32G32B32A32_SSCALED, PACKED_4x32(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff), PACKED_4x32(0x00000000, 0xff000000, 0x00000000, 0x00000000), { 0.0, -16777216.0, 0.0, 0.0}}, + {PIPE_FORMAT_R32G32B32A32_SSCALED, PACKED_4x32(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff), PACKED_4x32(0x00000000, 0x00000000, 0x01000000, 0x00000000), { 0.0, 0.0, 16777216.0, 0.0}}, + {PIPE_FORMAT_R32G32B32A32_SSCALED, PACKED_4x32(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff), PACKED_4x32(0x00000000, 0x00000000, 0xff000000, 0x00000000), { 0.0, 0.0, -16777216.0, 0.0}}, + {PIPE_FORMAT_R32G32B32A32_SSCALED, PACKED_4x32(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff), PACKED_4x32(0x00000000, 0x00000000, 0x00000000, 0x01000000), { 0.0, 0.0, 0.0, 16777216.0}}, + {PIPE_FORMAT_R32G32B32A32_SSCALED, PACKED_4x32(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff), PACKED_4x32(0x00000000, 0x00000000, 0x00000000, 0xff000000), { 0.0, 0.0, 0.0, -16777216.0}}, + + /* + * Standard 32-bit float formats + */ + + {PIPE_FORMAT_R32_FLOAT, PACKED_1x32(0xffffffff), PACKED_1x32(0x00000000), { 0.0, 0.0, 0.0, 1.0}}, + {PIPE_FORMAT_R32_FLOAT, PACKED_1x32(0xffffffff), PACKED_1x32(0x3f800000), { 1.0, 0.0, 0.0, 1.0}}, + {PIPE_FORMAT_R32_FLOAT, PACKED_1x32(0xffffffff), PACKED_1x32(0xbf800000), { -1.0, 0.0, 0.0, 1.0}}, + + {PIPE_FORMAT_R32G32_FLOAT, PACKED_2x32(0xffffffff, 0xffffffff), PACKED_2x32(0x00000000, 0x00000000), { 0.0, 0.0, 0.0, 1.0}}, + {PIPE_FORMAT_R32G32_FLOAT, PACKED_2x32(0xffffffff, 0xffffffff), PACKED_2x32(0x3f800000, 0x00000000), { 1.0, 0.0, 0.0, 1.0}}, + {PIPE_FORMAT_R32G32_FLOAT, PACKED_2x32(0xffffffff, 0xffffffff), PACKED_2x32(0xbf800000, 0x00000000), {-1.0, 0.0, 0.0, 1.0}}, + {PIPE_FORMAT_R32G32_FLOAT, PACKED_2x32(0xffffffff, 0xffffffff), PACKED_2x32(0x00000000, 0x3f800000), { 0.0, 1.0, 0.0, 1.0}}, + {PIPE_FORMAT_R32G32_FLOAT, PACKED_2x32(0xffffffff, 0xffffffff), PACKED_2x32(0x00000000, 0xbf800000), { 0.0, -1.0, 0.0, 1.0}}, + {PIPE_FORMAT_R32G32_FLOAT, PACKED_2x32(0xffffffff, 0xffffffff), PACKED_2x32(0x3f800000, 0x3f800000), { 1.0, 1.0, 0.0, 1.0}}, + + {PIPE_FORMAT_R32G32B32_FLOAT, PACKED_3x32(0xffffffff, 0xffffffff, 0xffffffff), PACKED_3x32(0x00000000, 0x00000000, 0x00000000), { 0.0, 0.0, 0.0, 1.0}}, + {PIPE_FORMAT_R32G32B32_FLOAT, PACKED_3x32(0xffffffff, 0xffffffff, 0xffffffff), PACKED_3x32(0x3f800000, 0x00000000, 0x00000000), { 1.0, 0.0, 0.0, 1.0}}, + {PIPE_FORMAT_R32G32B32_FLOAT, PACKED_3x32(0xffffffff, 0xffffffff, 0xffffffff), PACKED_3x32(0xbf800000, 0x00000000, 0x00000000), {-1.0, 0.0, 0.0, 1.0}}, + {PIPE_FORMAT_R32G32B32_FLOAT, PACKED_3x32(0xffffffff, 0xffffffff, 0xffffffff), PACKED_3x32(0x00000000, 0x3f800000, 0x00000000), { 0.0, 1.0, 0.0, 1.0}}, + {PIPE_FORMAT_R32G32B32_FLOAT, PACKED_3x32(0xffffffff, 0xffffffff, 0xffffffff), PACKED_3x32(0x00000000, 0xbf800000, 0x00000000), { 0.0, -1.0, 0.0, 1.0}}, + {PIPE_FORMAT_R32G32B32_FLOAT, PACKED_3x32(0xffffffff, 0xffffffff, 0xffffffff), PACKED_3x32(0x00000000, 0x00000000, 0x3f800000), { 0.0, 0.0, 1.0, 1.0}}, + {PIPE_FORMAT_R32G32B32_FLOAT, PACKED_3x32(0xffffffff, 0xffffffff, 0xffffffff), PACKED_3x32(0x00000000, 0x00000000, 0xbf800000), { 0.0, 0.0, -1.0, 1.0}}, + {PIPE_FORMAT_R32G32B32_FLOAT, PACKED_3x32(0xffffffff, 0xffffffff, 0xffffffff), PACKED_3x32(0x3f800000, 0x3f800000, 0x3f800000), { 1.0, 1.0, 1.0, 1.0}}, + + {PIPE_FORMAT_R32G32B32A32_FLOAT, PACKED_4x32(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff), PACKED_4x32(0x00000000, 0x00000000, 0x00000000, 0x00000000), { 0.0, 0.0, 0.0, 0.0}}, + {PIPE_FORMAT_R32G32B32A32_FLOAT, PACKED_4x32(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff), PACKED_4x32(0x3f800000, 0x00000000, 0x00000000, 0x00000000), { 1.0, 0.0, 0.0, 0.0}}, + {PIPE_FORMAT_R32G32B32A32_FLOAT, PACKED_4x32(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff), PACKED_4x32(0xbf800000, 0x00000000, 0x00000000, 0x00000000), {-1.0, 0.0, 0.0, 0.0}}, + {PIPE_FORMAT_R32G32B32A32_FLOAT, PACKED_4x32(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff), PACKED_4x32(0x00000000, 0x3f800000, 0x00000000, 0x00000000), { 0.0, 1.0, 0.0, 0.0}}, + {PIPE_FORMAT_R32G32B32A32_FLOAT, PACKED_4x32(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff), PACKED_4x32(0x00000000, 0xbf800000, 0x00000000, 0x00000000), { 0.0, -1.0, 0.0, 0.0}}, + {PIPE_FORMAT_R32G32B32A32_FLOAT, PACKED_4x32(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff), PACKED_4x32(0x00000000, 0x00000000, 0x3f800000, 0x00000000), { 0.0, 0.0, 1.0, 0.0}}, + {PIPE_FORMAT_R32G32B32A32_FLOAT, PACKED_4x32(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff), PACKED_4x32(0x00000000, 0x00000000, 0xbf800000, 0x00000000), { 0.0, 0.0, -1.0, 0.0}}, + {PIPE_FORMAT_R32G32B32A32_FLOAT, PACKED_4x32(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff), PACKED_4x32(0x00000000, 0x00000000, 0x00000000, 0x3f800000), { 0.0, 0.0, 0.0, 1.0}}, + {PIPE_FORMAT_R32G32B32A32_FLOAT, PACKED_4x32(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff), PACKED_4x32(0x00000000, 0x00000000, 0x00000000, 0xbf800000), { 0.0, 0.0, 0.0, -1.0}}, + {PIPE_FORMAT_R32G32B32A32_FLOAT, PACKED_4x32(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff), PACKED_4x32(0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000), { 1.0, 1.0, 1.0, 1.0}}, +}; + + +const unsigned util_format_nr_test_cases = Elements(util_format_test_cases); diff --git a/src/gallium/auxiliary/util/u_format_tests.h b/src/gallium/auxiliary/util/u_format_tests.h new file mode 100644 index 00000000000..2d4d9d5fa9d --- /dev/null +++ b/src/gallium/auxiliary/util/u_format_tests.h @@ -0,0 +1,69 @@ +/************************************************************************** + * + * Copyright 2010 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + **************************************************************************/ + + +#ifndef U_FORMAT_TESTS_H_ +#define U_FORMAT_TESTS_H_ + + +#include "pipe/p_compiler.h" +#include "pipe/p_format.h" + + +#define UTIL_FORMAT_MAX_PACKED_BYTES 16 + + +/** + * A (packed, unpacked) color pair. + */ +struct util_format_test_case +{ + enum pipe_format format; + + /** + * Mask of the bits that actually meaningful data. Used to mask out the + * "X" channels. + */ + uint8_t mask[UTIL_FORMAT_MAX_PACKED_BYTES]; + + uint8_t packed[UTIL_FORMAT_MAX_PACKED_BYTES]; + + /** + * RGBA. + */ + double unpacked[4]; +}; + + +extern const struct util_format_test_case +util_format_test_cases[]; + + +extern const unsigned util_format_nr_test_cases; + + +#endif /* U_FORMAT_TESTS_H_ */ diff --git a/src/gallium/auxiliary/util/u_gen_mipmap.c b/src/gallium/auxiliary/util/u_gen_mipmap.c index fc027e48e4e..5c51b53d7bd 100644 --- a/src/gallium/auxiliary/util/u_gen_mipmap.c +++ b/src/gallium/auxiliary/util/u_gen_mipmap.c @@ -62,6 +62,7 @@ struct gen_mipmap_state struct pipe_rasterizer_state rasterizer; struct pipe_sampler_state sampler; struct pipe_clip_state clip; + struct pipe_vertex_element velem[2]; void *vs; void *fs2d, *fsCube; @@ -1118,7 +1119,6 @@ make_1d_mipmap(struct gen_mipmap_state *ctx, uint face, uint baseLevel, uint lastLevel) { struct pipe_context *pipe = ctx->pipe; - struct pipe_screen *screen = pipe->screen; const uint zslice = 0; uint dstLevel; @@ -1127,27 +1127,27 @@ make_1d_mipmap(struct gen_mipmap_state *ctx, struct pipe_transfer *srcTrans, *dstTrans; void *srcMap, *dstMap; - srcTrans = screen->get_tex_transfer(screen, pt, face, srcLevel, zslice, + srcTrans = pipe->get_tex_transfer(pipe, pt, face, srcLevel, zslice, PIPE_TRANSFER_READ, 0, 0, u_minify(pt->width0, srcLevel), u_minify(pt->height0, srcLevel)); - dstTrans = screen->get_tex_transfer(screen, pt, face, dstLevel, zslice, + dstTrans = pipe->get_tex_transfer(pipe, pt, face, dstLevel, zslice, PIPE_TRANSFER_WRITE, 0, 0, u_minify(pt->width0, dstLevel), u_minify(pt->height0, dstLevel)); - srcMap = (ubyte *) screen->transfer_map(screen, srcTrans); - dstMap = (ubyte *) screen->transfer_map(screen, dstTrans); + srcMap = (ubyte *) pipe->transfer_map(pipe, srcTrans); + dstMap = (ubyte *) pipe->transfer_map(pipe, dstTrans); reduce_1d(pt->format, srcTrans->width, srcMap, dstTrans->width, dstMap); - screen->transfer_unmap(screen, srcTrans); - screen->transfer_unmap(screen, dstTrans); + pipe->transfer_unmap(pipe, srcTrans); + pipe->transfer_unmap(pipe, dstTrans); - screen->tex_transfer_destroy(srcTrans); - screen->tex_transfer_destroy(dstTrans); + pipe->tex_transfer_destroy(pipe, srcTrans); + pipe->tex_transfer_destroy(pipe, dstTrans); } } @@ -1158,7 +1158,6 @@ make_2d_mipmap(struct gen_mipmap_state *ctx, uint face, uint baseLevel, uint lastLevel) { struct pipe_context *pipe = ctx->pipe; - struct pipe_screen *screen = pipe->screen; const uint zslice = 0; uint dstLevel; @@ -1170,17 +1169,17 @@ make_2d_mipmap(struct gen_mipmap_state *ctx, struct pipe_transfer *srcTrans, *dstTrans; ubyte *srcMap, *dstMap; - srcTrans = screen->get_tex_transfer(screen, pt, face, srcLevel, zslice, + srcTrans = pipe->get_tex_transfer(pipe, pt, face, srcLevel, zslice, PIPE_TRANSFER_READ, 0, 0, u_minify(pt->width0, srcLevel), u_minify(pt->height0, srcLevel)); - dstTrans = screen->get_tex_transfer(screen, pt, face, dstLevel, zslice, + dstTrans = pipe->get_tex_transfer(pipe, pt, face, dstLevel, zslice, PIPE_TRANSFER_WRITE, 0, 0, u_minify(pt->width0, dstLevel), u_minify(pt->height0, dstLevel)); - srcMap = (ubyte *) screen->transfer_map(screen, srcTrans); - dstMap = (ubyte *) screen->transfer_map(screen, dstTrans); + srcMap = (ubyte *) pipe->transfer_map(pipe, srcTrans); + dstMap = (ubyte *) pipe->transfer_map(pipe, dstTrans); reduce_2d(pt->format, srcTrans->width, srcTrans->height, @@ -1188,11 +1187,11 @@ make_2d_mipmap(struct gen_mipmap_state *ctx, dstTrans->width, dstTrans->height, dstTrans->stride, dstMap); - screen->transfer_unmap(screen, srcTrans); - screen->transfer_unmap(screen, dstTrans); + pipe->transfer_unmap(pipe, srcTrans); + pipe->transfer_unmap(pipe, dstTrans); - screen->tex_transfer_destroy(srcTrans); - screen->tex_transfer_destroy(dstTrans); + pipe->tex_transfer_destroy(pipe, srcTrans); + pipe->tex_transfer_destroy(pipe, dstTrans); } } @@ -1215,17 +1214,17 @@ make_3d_mipmap(struct gen_mipmap_state *ctx, struct pipe_transfer *srcTrans, *dstTrans; ubyte *srcMap, *dstMap; - srcTrans = screen->get_tex_transfer(screen, pt, face, srcLevel, zslice, + srcTrans = pipe->get_tex_transfer(pipe, pt, face, srcLevel, zslice, PIPE_TRANSFER_READ, 0, 0, u_minify(pt->width0, srcLevel), u_minify(pt->height0, srcLevel)); - dstTrans = screen->get_tex_transfer(screen, pt, face, dstLevel, zslice, + dstTrans = pipe->get_tex_transfer(pipe, pt, face, dstLevel, zslice, PIPE_TRANSFER_WRITE, 0, 0, u_minify(pt->width0, dstLevel), u_minify(pt->height0, dstLevel)); - srcMap = (ubyte *) screen->transfer_map(screen, srcTrans); - dstMap = (ubyte *) screen->transfer_map(screen, dstTrans); + srcMap = (ubyte *) pipe->transfer_map(pipe, srcTrans); + dstMap = (ubyte *) pipe->transfer_map(pipe, dstTrans); reduce_3d(pt->format, srcTrans->width, srcTrans->height, @@ -1233,11 +1232,11 @@ make_3d_mipmap(struct gen_mipmap_state *ctx, dstTrans->width, dstTrans->height, dstTrans->stride, dstMap); - screen->transfer_unmap(screen, srcTrans); - screen->transfer_unmap(screen, dstTrans); + pipe->transfer_unmap(pipe, srcTrans); + pipe->transfer_unmap(pipe, dstTrans); - screen->tex_transfer_destroy(srcTrans); - screen->tex_transfer_destroy(dstTrans); + pipe->tex_transfer_destroy(pipe, srcTrans); + pipe->tex_transfer_destroy(pipe, dstTrans); } #else (void) reduce_3d; @@ -1307,6 +1306,15 @@ util_create_gen_mipmap(struct pipe_context *pipe, ctx->sampler.min_mip_filter = PIPE_TEX_MIPFILTER_NEAREST; ctx->sampler.normalized_coords = 1; + /* vertex elements state */ + memset(&ctx->velem[0], 0, sizeof(ctx->velem[0]) * 2); + for (i = 0; i < 2; i++) { + ctx->velem[i].src_offset = i * 4 * sizeof(float); + ctx->velem[i].instance_divisor = 0; + ctx->velem[i].vertex_buffer_index = 0; + ctx->velem[i].src_format = PIPE_FORMAT_R32G32B32A32_FLOAT; + } + /* vertex shader - still needed to specify mapping from fragment * shader input semantics to vertex elements */ @@ -1501,12 +1509,14 @@ util_gen_mipmap(struct gen_mipmap_state *ctx, cso_save_vertex_shader(ctx->cso); cso_save_viewport(ctx->cso); cso_save_clip(ctx->cso); + cso_save_vertex_elements(ctx->cso); /* bind our state */ cso_set_blend(ctx->cso, &ctx->blend); cso_set_depth_stencil_alpha(ctx->cso, &ctx->depthstencil); cso_set_rasterizer(ctx->cso, &ctx->rasterizer); cso_set_clip(ctx->cso, &ctx->clip); + cso_set_vertex_elements(ctx->cso, 2, ctx->velem); cso_set_fragment_shader_handle(ctx->cso, fs); cso_set_vertex_shader_handle(ctx->cso, ctx->vs); @@ -1593,4 +1603,5 @@ util_gen_mipmap(struct gen_mipmap_state *ctx, cso_restore_vertex_shader(ctx->cso); cso_restore_viewport(ctx->cso); cso_restore_clip(ctx->cso); + cso_restore_vertex_elements(ctx->cso); } diff --git a/src/gallium/auxiliary/util/u_inlines.h b/src/gallium/auxiliary/util/u_inlines.h index 0cb3432c6e4..e7255e3baa8 100644 --- a/src/gallium/auxiliary/util/u_inlines.h +++ b/src/gallium/auxiliary/util/u_inlines.h @@ -264,24 +264,24 @@ pipe_buffer_read(struct pipe_screen *screen, } static INLINE void * -pipe_transfer_map( struct pipe_transfer *transf ) +pipe_transfer_map( struct pipe_context *context, + struct pipe_transfer *transf ) { - struct pipe_screen *screen = transf->texture->screen; - return screen->transfer_map(screen, transf); + return context->transfer_map(context, transf); } static INLINE void -pipe_transfer_unmap( struct pipe_transfer *transf ) +pipe_transfer_unmap( struct pipe_context *context, + struct pipe_transfer *transf ) { - struct pipe_screen *screen = transf->texture->screen; - screen->transfer_unmap(screen, transf); + context->transfer_unmap(context, transf); } static INLINE void -pipe_transfer_destroy( struct pipe_transfer *transf ) +pipe_transfer_destroy( struct pipe_context *context, + struct pipe_transfer *transfer ) { - struct pipe_screen *screen = transf->texture->screen; - screen->tex_transfer_destroy(transf); + context->tex_transfer_destroy(context, transfer); } static INLINE unsigned diff --git a/src/gallium/auxiliary/util/u_rect.c b/src/gallium/auxiliary/util/u_rect.c index 8479161c744..e73797f1b7e 100644 --- a/src/gallium/auxiliary/util/u_rect.c +++ b/src/gallium/auxiliary/util/u_rect.c @@ -169,7 +169,6 @@ util_surface_copy(struct pipe_context *pipe, unsigned src_x, unsigned src_y, unsigned w, unsigned h) { - struct pipe_screen *screen = pipe->screen; struct pipe_transfer *src_trans, *dst_trans; void *dst_map; const void *src_map; @@ -182,7 +181,7 @@ util_surface_copy(struct pipe_context *pipe, src_format = src->texture->format; dst_format = dst->texture->format; - src_trans = screen->get_tex_transfer(screen, + src_trans = pipe->get_tex_transfer(pipe, src->texture, src->face, src->level, @@ -190,7 +189,7 @@ util_surface_copy(struct pipe_context *pipe, PIPE_TRANSFER_READ, src_x, src_y, w, h); - dst_trans = screen->get_tex_transfer(screen, + dst_trans = pipe->get_tex_transfer(pipe, dst->texture, dst->face, dst->level, @@ -202,8 +201,8 @@ util_surface_copy(struct pipe_context *pipe, assert(util_format_get_blockwidth(dst_format) == util_format_get_blockwidth(src_format)); assert(util_format_get_blockheight(dst_format) == util_format_get_blockheight(src_format)); - src_map = pipe->screen->transfer_map(screen, src_trans); - dst_map = pipe->screen->transfer_map(screen, dst_trans); + src_map = pipe->transfer_map(pipe, src_trans); + dst_map = pipe->transfer_map(pipe, dst_trans); assert(src_map); assert(dst_map); @@ -221,11 +220,11 @@ util_surface_copy(struct pipe_context *pipe, do_flip ? h - 1 : 0); } - pipe->screen->transfer_unmap(pipe->screen, src_trans); - pipe->screen->transfer_unmap(pipe->screen, dst_trans); + pipe->transfer_unmap(pipe, src_trans); + pipe->transfer_unmap(pipe, dst_trans); - screen->tex_transfer_destroy(src_trans); - screen->tex_transfer_destroy(dst_trans); + pipe->tex_transfer_destroy(pipe, src_trans); + pipe->tex_transfer_destroy(pipe, dst_trans); } @@ -243,14 +242,13 @@ util_surface_fill(struct pipe_context *pipe, unsigned dstx, unsigned dsty, unsigned width, unsigned height, unsigned value) { - struct pipe_screen *screen = pipe->screen; struct pipe_transfer *dst_trans; void *dst_map; assert(dst->texture); if (!dst->texture) return; - dst_trans = screen->get_tex_transfer(screen, + dst_trans = pipe->get_tex_transfer(pipe, dst->texture, dst->face, dst->level, @@ -258,7 +256,7 @@ util_surface_fill(struct pipe_context *pipe, PIPE_TRANSFER_WRITE, dstx, dsty, width, height); - dst_map = pipe->screen->transfer_map(screen, dst_trans); + dst_map = pipe->transfer_map(pipe, dst_trans); assert(dst_map); @@ -302,6 +300,6 @@ util_surface_fill(struct pipe_context *pipe, } } - pipe->screen->transfer_unmap(pipe->screen, dst_trans); - screen->tex_transfer_destroy(dst_trans); + pipe->transfer_unmap(pipe, dst_trans); + pipe->tex_transfer_destroy(pipe, dst_trans); } diff --git a/src/gallium/auxiliary/util/u_simple_screen.c b/src/gallium/auxiliary/util/u_simple_screen.c index 53f3c16dbcc..9203cb6580c 100644 --- a/src/gallium/auxiliary/util/u_simple_screen.c +++ b/src/gallium/auxiliary/util/u_simple_screen.c @@ -59,22 +59,7 @@ pass_user_buffer_create(struct pipe_screen *screen, return buffer; } -static struct pipe_buffer * -pass_surface_buffer_create(struct pipe_screen *screen, - unsigned width, unsigned height, - enum pipe_format format, - unsigned usage, - unsigned tex_usage, - unsigned *stride) -{ - struct pipe_buffer *buffer = - screen->winsys->surface_buffer_create(screen->winsys, width, height, - format, usage, tex_usage, stride); - buffer->screen = screen; - - return buffer; -} static void * pass_buffer_map(struct pipe_screen *screen, @@ -135,7 +120,6 @@ u_simple_screen_init(struct pipe_screen *screen) { screen->buffer_create = pass_buffer_create; screen->user_buffer_create = pass_user_buffer_create; - screen->surface_buffer_create = pass_surface_buffer_create; screen->buffer_map = pass_buffer_map; screen->buffer_unmap = pass_buffer_unmap; diff --git a/src/gallium/auxiliary/util/u_tile.c b/src/gallium/auxiliary/util/u_tile.c index 79481b710bf..e445895efc5 100644 --- a/src/gallium/auxiliary/util/u_tile.c +++ b/src/gallium/auxiliary/util/u_tile.c @@ -45,11 +45,11 @@ * Move raw block of pixels from transfer object to user memory. */ void -pipe_get_tile_raw(struct pipe_transfer *pt, +pipe_get_tile_raw(struct pipe_context *pipe, + struct pipe_transfer *pt, uint x, uint y, uint w, uint h, void *dst, int dst_stride) { - struct pipe_screen *screen = pt->texture->screen; const void *src; if (dst_stride == 0) @@ -58,14 +58,14 @@ pipe_get_tile_raw(struct pipe_transfer *pt, if (pipe_clip_tile(x, y, &w, &h, pt)) return; - src = screen->transfer_map(screen, pt); + src = pipe->transfer_map(pipe, pt); assert(src); if(!src) return; util_copy_rect(dst, pt->texture->format, dst_stride, 0, 0, w, h, src, pt->stride, x, y); - screen->transfer_unmap(screen, pt); + pipe->transfer_unmap(pipe, pt); } @@ -73,11 +73,11 @@ pipe_get_tile_raw(struct pipe_transfer *pt, * Move raw block of pixels from user memory to transfer object. */ void -pipe_put_tile_raw(struct pipe_transfer *pt, +pipe_put_tile_raw(struct pipe_context *pipe, + struct pipe_transfer *pt, uint x, uint y, uint w, uint h, const void *src, int src_stride) { - struct pipe_screen *screen = pt->texture->screen; void *dst; enum pipe_format format = pt->texture->format; @@ -87,14 +87,14 @@ pipe_put_tile_raw(struct pipe_transfer *pt, if (pipe_clip_tile(x, y, &w, &h, pt)) return; - dst = screen->transfer_map(screen, pt); + dst = pipe->transfer_map(pipe, pt); assert(dst); if(!dst) return; util_copy_rect(dst, format, pt->stride, x, y, w, h, src, src_stride, 0, 0); - screen->transfer_unmap(screen, pt); + pipe->transfer_unmap(pipe, pt); } @@ -1246,7 +1246,8 @@ pipe_tile_raw_to_rgba(enum pipe_format format, void -pipe_get_tile_rgba(struct pipe_transfer *pt, +pipe_get_tile_rgba(struct pipe_context *pipe, + struct pipe_transfer *pt, uint x, uint y, uint w, uint h, float *p) { @@ -1265,7 +1266,7 @@ pipe_get_tile_rgba(struct pipe_transfer *pt, if(format == PIPE_FORMAT_UYVY || format == PIPE_FORMAT_YUYV) assert((x & 1) == 0); - pipe_get_tile_raw(pt, x, y, w, h, packed, 0); + pipe_get_tile_raw(pipe, pt, x, y, w, h, packed, 0); pipe_tile_raw_to_rgba(format, packed, w, h, p, dst_stride); @@ -1274,7 +1275,8 @@ pipe_get_tile_rgba(struct pipe_transfer *pt, void -pipe_put_tile_rgba(struct pipe_transfer *pt, +pipe_put_tile_rgba(struct pipe_context *pipe, + struct pipe_transfer *pt, uint x, uint y, uint w, uint h, const float *p) { @@ -1363,7 +1365,7 @@ pipe_put_tile_rgba(struct pipe_transfer *pt, 0, 0, w, h); } - pipe_put_tile_raw(pt, x, y, w, h, packed, 0); + pipe_put_tile_raw(pipe, pt, x, y, w, h, packed, 0); FREE(packed); } @@ -1373,11 +1375,11 @@ pipe_put_tile_rgba(struct pipe_transfer *pt, * Get a block of Z values, converted to 32-bit range. */ void -pipe_get_tile_z(struct pipe_transfer *pt, +pipe_get_tile_z(struct pipe_context *pipe, + struct pipe_transfer *pt, uint x, uint y, uint w, uint h, uint *z) { - struct pipe_screen *screen = pt->texture->screen; const uint dstStride = w; ubyte *map; uint *pDest = z; @@ -1387,7 +1389,7 @@ pipe_get_tile_z(struct pipe_transfer *pt, if (pipe_clip_tile(x, y, &w, &h, pt)) return; - map = (ubyte *)screen->transfer_map(screen, pt); + map = (ubyte *)pipe->transfer_map(pipe, pt); if (!map) { assert(0); return; @@ -1453,16 +1455,16 @@ pipe_get_tile_z(struct pipe_transfer *pt, assert(0); } - screen->transfer_unmap(screen, pt); + pipe->transfer_unmap(pipe, pt); } void -pipe_put_tile_z(struct pipe_transfer *pt, +pipe_put_tile_z(struct pipe_context *pipe, + struct pipe_transfer *pt, uint x, uint y, uint w, uint h, const uint *zSrc) { - struct pipe_screen *screen = pt->texture->screen; const uint srcStride = w; const uint *ptrc = zSrc; ubyte *map; @@ -1472,7 +1474,7 @@ pipe_put_tile_z(struct pipe_transfer *pt, if (pipe_clip_tile(x, y, &w, &h, pt)) return; - map = (ubyte *)screen->transfer_map(screen, pt); + map = (ubyte *)pipe->transfer_map(pipe, pt); if (!map) { assert(0); return; @@ -1560,7 +1562,7 @@ pipe_put_tile_z(struct pipe_transfer *pt, assert(0); } - screen->transfer_unmap(screen, pt); + pipe->transfer_unmap(pipe, pt); } diff --git a/src/gallium/auxiliary/util/u_tile.h b/src/gallium/auxiliary/util/u_tile.h index 1453af38b8a..8329087cfa6 100644 --- a/src/gallium/auxiliary/util/u_tile.h +++ b/src/gallium/auxiliary/util/u_tile.h @@ -56,34 +56,40 @@ extern "C" { #endif void -pipe_get_tile_raw(struct pipe_transfer *pt, +pipe_get_tile_raw(struct pipe_context *pipe, + struct pipe_transfer *pt, uint x, uint y, uint w, uint h, void *p, int dst_stride); void -pipe_put_tile_raw(struct pipe_transfer *pt, +pipe_put_tile_raw(struct pipe_context *pipe, + struct pipe_transfer *pt, uint x, uint y, uint w, uint h, const void *p, int src_stride); void -pipe_get_tile_rgba(struct pipe_transfer *pt, +pipe_get_tile_rgba(struct pipe_context *pipe, + struct pipe_transfer *pt, uint x, uint y, uint w, uint h, float *p); void -pipe_put_tile_rgba(struct pipe_transfer *pt, +pipe_put_tile_rgba(struct pipe_context *pipe, + struct pipe_transfer *pt, uint x, uint y, uint w, uint h, const float *p); void -pipe_get_tile_z(struct pipe_transfer *pt, +pipe_get_tile_z(struct pipe_context *pipe, + struct pipe_transfer *pt, uint x, uint y, uint w, uint h, uint *z); void -pipe_put_tile_z(struct pipe_transfer *pt, +pipe_put_tile_z(struct pipe_context *pipe, + struct pipe_transfer *pt, uint x, uint y, uint w, uint h, const uint *z); diff --git a/src/gallium/auxiliary/vl/vl_compositor.c b/src/gallium/auxiliary/vl/vl_compositor.c index ba23435f698..6d461cb8800 100644 --- a/src/gallium/auxiliary/vl/vl_compositor.c +++ b/src/gallium/auxiliary/vl/vl_compositor.c @@ -230,6 +230,7 @@ static bool init_pipe_state(struct vl_compositor *c) { struct pipe_sampler_state sampler; + struct pipe_vertex_element vertex_elems[2]; assert(c); @@ -251,15 +252,27 @@ init_pipe_state(struct vl_compositor *c) /*sampler.border_color[i] = ;*/ /*sampler.max_anisotropy = ;*/ c->sampler = c->pipe->create_sampler_state(c->pipe, &sampler); - + + vertex_elems[0].src_offset = 0; + vertex_elems[0].instance_divisor = 0; + vertex_elems[0].vertex_buffer_index = 0; + vertex_elems[0].src_format = PIPE_FORMAT_R32G32_FLOAT; + vertex_elems[1].src_offset = 0; + vertex_elems[1].instance_divisor = 0; + vertex_elems[1].vertex_buffer_index = 1; + vertex_elems[1].src_format = PIPE_FORMAT_R32G32_FLOAT; + c->vertex_elems = c->pipe->create_vertex_elements_state(c->pipe, 2, vertex_elems); + + return true; } static void cleanup_pipe_state(struct vl_compositor *c) { assert(c); - + c->pipe->delete_sampler_state(c->pipe, c->sampler); + c->pipe->delete_vertex_elements_state(c->pipe, c->vertex_elems); } static bool @@ -314,12 +327,6 @@ init_buffers(struct vl_compositor *c) pipe_buffer_unmap(c->pipe->screen, c->vertex_bufs[0].buffer); - c->vertex_elems[0].src_offset = 0; - c->vertex_elems[0].instance_divisor = 0; - c->vertex_elems[0].vertex_buffer_index = 0; - c->vertex_elems[0].nr_components = 2; - c->vertex_elems[0].src_format = PIPE_FORMAT_R32G32_FLOAT; - /* * Create our texcoord buffer and texcoord buffer element * Texcoord buffer contains the TCs for mapping the rendered surface to the 4 vertices @@ -344,12 +351,6 @@ init_buffers(struct vl_compositor *c) pipe_buffer_unmap(c->pipe->screen, c->vertex_bufs[1].buffer); - c->vertex_elems[1].src_offset = 0; - c->vertex_elems[1].instance_divisor = 0; - c->vertex_elems[1].vertex_buffer_index = 1; - c->vertex_elems[1].nr_components = 2; - c->vertex_elems[1].src_format = PIPE_FORMAT_R32G32_FLOAT; - /* * Create our vertex shader's constant buffer * Const buffer contains scaling and translation vectors @@ -483,7 +484,7 @@ void vl_compositor_render(struct vl_compositor *compositor, compositor->pipe->bind_vs_state(compositor->pipe, compositor->vertex_shader); compositor->pipe->bind_fs_state(compositor->pipe, compositor->fragment_shader); compositor->pipe->set_vertex_buffers(compositor->pipe, 2, compositor->vertex_bufs); - compositor->pipe->set_vertex_elements(compositor->pipe, 2, compositor->vertex_elems); + compositor->pipe->bind_vertex_elements_state(compositor->pipe, compositor->vertex_elems); compositor->pipe->set_constant_buffer(compositor->pipe, PIPE_SHADER_VERTEX, 0, compositor->vs_const_buf); compositor->pipe->set_constant_buffer(compositor->pipe, PIPE_SHADER_FRAGMENT, 0, compositor->fs_const_buf); diff --git a/src/gallium/auxiliary/vl/vl_compositor.h b/src/gallium/auxiliary/vl/vl_compositor.h index 6a9a3fd7af1..51755554da1 100644 --- a/src/gallium/auxiliary/vl/vl_compositor.h +++ b/src/gallium/auxiliary/vl/vl_compositor.h @@ -43,10 +43,10 @@ struct vl_compositor void *sampler; void *vertex_shader; void *fragment_shader; + void *vertex_elems; struct pipe_viewport_state viewport; struct pipe_scissor_state scissor; struct pipe_vertex_buffer vertex_bufs[2]; - struct pipe_vertex_element vertex_elems[2]; struct pipe_buffer *vs_const_buf, *fs_const_buf; }; diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c index f323de0ea55..beb4722901e 100644 --- a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c +++ b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c @@ -680,14 +680,14 @@ xfer_buffers_map(struct vl_mpeg12_mc_renderer *r) assert(r); for (i = 0; i < 3; ++i) { - r->tex_transfer[i] = r->pipe->screen->get_tex_transfer + r->tex_transfer[i] = r->pipe->get_tex_transfer ( - r->pipe->screen, r->textures.all[i], + r->pipe, r->textures.all[i], 0, 0, 0, PIPE_TRANSFER_WRITE, 0, 0, r->textures.all[i]->width0, r->textures.all[i]->height0 ); - r->texels[i] = r->pipe->screen->transfer_map(r->pipe->screen, r->tex_transfer[i]); + r->texels[i] = r->pipe->transfer_map(r->pipe, r->tex_transfer[i]); } } @@ -699,8 +699,8 @@ xfer_buffers_unmap(struct vl_mpeg12_mc_renderer *r) assert(r); for (i = 0; i < 3; ++i) { - r->pipe->screen->transfer_unmap(r->pipe->screen, r->tex_transfer[i]); - r->pipe->screen->tex_transfer_destroy(r->tex_transfer[i]); + r->pipe->transfer_unmap(r->pipe, r->tex_transfer[i]); + r->pipe->tex_transfer_destroy(r->pipe, r->tex_transfer[i]); } } @@ -708,6 +708,7 @@ static bool init_pipe_state(struct vl_mpeg12_mc_renderer *r) { struct pipe_sampler_state sampler; + struct pipe_vertex_element vertex_elems[8]; unsigned filters[5]; unsigned i; @@ -771,6 +772,59 @@ init_pipe_state(struct vl_mpeg12_mc_renderer *r) r->samplers.all[i] = r->pipe->create_sampler_state(r->pipe, &sampler); } + /* Position element */ + vertex_elems[0].src_offset = 0; + vertex_elems[0].instance_divisor = 0; + vertex_elems[0].vertex_buffer_index = 0; + vertex_elems[0].src_format = PIPE_FORMAT_R32G32_FLOAT; + + /* Luma, texcoord element */ + vertex_elems[1].src_offset = sizeof(struct vertex2f); + vertex_elems[1].instance_divisor = 0; + vertex_elems[1].vertex_buffer_index = 0; + vertex_elems[1].src_format = PIPE_FORMAT_R32G32_FLOAT; + + /* Chroma Cr texcoord element */ + vertex_elems[2].src_offset = sizeof(struct vertex2f) * 2; + vertex_elems[2].instance_divisor = 0; + vertex_elems[2].vertex_buffer_index = 0; + vertex_elems[2].src_format = PIPE_FORMAT_R32G32_FLOAT; + + /* Chroma Cb texcoord element */ + vertex_elems[3].src_offset = sizeof(struct vertex2f) * 3; + vertex_elems[3].instance_divisor = 0; + vertex_elems[3].vertex_buffer_index = 0; + vertex_elems[3].src_format = PIPE_FORMAT_R32G32_FLOAT; + + /* First ref surface top field texcoord element */ + vertex_elems[4].src_offset = 0; + vertex_elems[4].instance_divisor = 0; + vertex_elems[4].vertex_buffer_index = 1; + vertex_elems[4].src_format = PIPE_FORMAT_R32G32_FLOAT; + + /* First ref surface bottom field texcoord element */ + vertex_elems[5].src_offset = sizeof(struct vertex2f); + vertex_elems[5].instance_divisor = 0; + vertex_elems[5].vertex_buffer_index = 1; + vertex_elems[5].src_format = PIPE_FORMAT_R32G32_FLOAT; + + /* Second ref surface top field texcoord element */ + vertex_elems[6].src_offset = 0; + vertex_elems[6].instance_divisor = 0; + vertex_elems[6].vertex_buffer_index = 2; + vertex_elems[6].src_format = PIPE_FORMAT_R32G32_FLOAT; + + /* Second ref surface bottom field texcoord element */ + vertex_elems[7].src_offset = sizeof(struct vertex2f); + vertex_elems[7].instance_divisor = 0; + vertex_elems[7].vertex_buffer_index = 2; + vertex_elems[7].src_format = PIPE_FORMAT_R32G32_FLOAT; + + /* need versions with 4,6 and 8 vertex elems */ + r->vertex_elems[0] = r->pipe->create_vertex_elements_state(r->pipe, 4, vertex_elems); + r->vertex_elems[1] = r->pipe->create_vertex_elements_state(r->pipe, 6, vertex_elems); + r->vertex_elems[2] = r->pipe->create_vertex_elements_state(r->pipe, 8, vertex_elems); + return true; } @@ -783,6 +837,8 @@ cleanup_pipe_state(struct vl_mpeg12_mc_renderer *r) for (i = 0; i < 5; ++i) r->pipe->delete_sampler_state(r->pipe, r->samplers.all[i]); + for (i = 0; i < 3; i++) + r->pipe->delete_vertex_elements_state(r->pipe, r->vertex_elems[i]); } static bool @@ -888,62 +944,6 @@ init_buffers(struct vl_mpeg12_mc_renderer *r) ); } - /* Position element */ - r->vertex_elems[0].src_offset = 0; - r->vertex_elems[0].instance_divisor = 0; - r->vertex_elems[0].vertex_buffer_index = 0; - r->vertex_elems[0].nr_components = 2; - r->vertex_elems[0].src_format = PIPE_FORMAT_R32G32_FLOAT; - - /* Luma, texcoord element */ - r->vertex_elems[1].src_offset = sizeof(struct vertex2f); - r->vertex_elems[1].instance_divisor = 0; - r->vertex_elems[1].vertex_buffer_index = 0; - r->vertex_elems[1].nr_components = 2; - r->vertex_elems[1].src_format = PIPE_FORMAT_R32G32_FLOAT; - - /* Chroma Cr texcoord element */ - r->vertex_elems[2].src_offset = sizeof(struct vertex2f) * 2; - r->vertex_elems[2].instance_divisor = 0; - r->vertex_elems[2].vertex_buffer_index = 0; - r->vertex_elems[2].nr_components = 2; - r->vertex_elems[2].src_format = PIPE_FORMAT_R32G32_FLOAT; - - /* Chroma Cb texcoord element */ - r->vertex_elems[3].src_offset = sizeof(struct vertex2f) * 3; - r->vertex_elems[3].instance_divisor = 0; - r->vertex_elems[3].vertex_buffer_index = 0; - r->vertex_elems[3].nr_components = 2; - r->vertex_elems[3].src_format = PIPE_FORMAT_R32G32_FLOAT; - - /* First ref surface top field texcoord element */ - r->vertex_elems[4].src_offset = 0; - r->vertex_elems[4].instance_divisor = 0; - r->vertex_elems[4].vertex_buffer_index = 1; - r->vertex_elems[4].nr_components = 2; - r->vertex_elems[4].src_format = PIPE_FORMAT_R32G32_FLOAT; - - /* First ref surface bottom field texcoord element */ - r->vertex_elems[5].src_offset = sizeof(struct vertex2f); - r->vertex_elems[5].instance_divisor = 0; - r->vertex_elems[5].vertex_buffer_index = 1; - r->vertex_elems[5].nr_components = 2; - r->vertex_elems[5].src_format = PIPE_FORMAT_R32G32_FLOAT; - - /* Second ref surface top field texcoord element */ - r->vertex_elems[6].src_offset = 0; - r->vertex_elems[6].instance_divisor = 0; - r->vertex_elems[6].vertex_buffer_index = 2; - r->vertex_elems[6].nr_components = 2; - r->vertex_elems[6].src_format = PIPE_FORMAT_R32G32_FLOAT; - - /* Second ref surface bottom field texcoord element */ - r->vertex_elems[7].src_offset = sizeof(struct vertex2f); - r->vertex_elems[7].instance_divisor = 0; - r->vertex_elems[7].vertex_buffer_index = 2; - r->vertex_elems[7].nr_components = 2; - r->vertex_elems[7].src_format = PIPE_FORMAT_R32G32_FLOAT; - r->vs_const_buf = pipe_buffer_create ( r->pipe->screen, @@ -1307,7 +1307,7 @@ flush(struct vl_mpeg12_mc_renderer *r) if (num_macroblocks[MACROBLOCK_TYPE_INTRA] > 0) { r->pipe->set_vertex_buffers(r->pipe, 1, r->vertex_bufs.all); - r->pipe->set_vertex_elements(r->pipe, 4, r->vertex_elems); + r->pipe->bind_vertex_elements_state(r->pipe, r->vertex_elems[0]); r->pipe->set_fragment_sampler_textures(r->pipe, 3, r->textures.all); r->pipe->bind_fragment_sampler_states(r->pipe, 3, r->samplers.all); r->pipe->bind_vs_state(r->pipe, r->i_vs); @@ -1320,7 +1320,7 @@ flush(struct vl_mpeg12_mc_renderer *r) if (num_macroblocks[MACROBLOCK_TYPE_FWD_FRAME_PRED] > 0) { r->pipe->set_vertex_buffers(r->pipe, 2, r->vertex_bufs.all); - r->pipe->set_vertex_elements(r->pipe, 6, r->vertex_elems); + r->pipe->bind_vertex_elements_state(r->pipe, r->vertex_elems[1]); r->textures.individual.ref[0] = r->past; r->pipe->set_fragment_sampler_textures(r->pipe, 4, r->textures.all); r->pipe->bind_fragment_sampler_states(r->pipe, 4, r->samplers.all); @@ -1334,7 +1334,7 @@ flush(struct vl_mpeg12_mc_renderer *r) if (false /*num_macroblocks[MACROBLOCK_TYPE_FWD_FIELD_PRED] > 0 */ ) { r->pipe->set_vertex_buffers(r->pipe, 2, r->vertex_bufs.all); - r->pipe->set_vertex_elements(r->pipe, 6, r->vertex_elems); + r->pipe->bind_vertex_elements_state(r->pipe, r->vertex_elems[1]); r->textures.individual.ref[0] = r->past; r->pipe->set_fragment_sampler_textures(r->pipe, 4, r->textures.all); r->pipe->bind_fragment_sampler_states(r->pipe, 4, r->samplers.all); @@ -1348,7 +1348,7 @@ flush(struct vl_mpeg12_mc_renderer *r) if (num_macroblocks[MACROBLOCK_TYPE_BKWD_FRAME_PRED] > 0) { r->pipe->set_vertex_buffers(r->pipe, 2, r->vertex_bufs.all); - r->pipe->set_vertex_elements(r->pipe, 6, r->vertex_elems); + r->pipe->bind_vertex_elements_state(r->pipe, r->vertex_elems[1]); r->textures.individual.ref[0] = r->future; r->pipe->set_fragment_sampler_textures(r->pipe, 4, r->textures.all); r->pipe->bind_fragment_sampler_states(r->pipe, 4, r->samplers.all); @@ -1362,7 +1362,7 @@ flush(struct vl_mpeg12_mc_renderer *r) if (false /*num_macroblocks[MACROBLOCK_TYPE_BKWD_FIELD_PRED] > 0 */ ) { r->pipe->set_vertex_buffers(r->pipe, 2, r->vertex_bufs.all); - r->pipe->set_vertex_elements(r->pipe, 6, r->vertex_elems); + r->pipe->bind_vertex_elements_state(r->pipe, r->vertex_elems[1]); r->textures.individual.ref[0] = r->future; r->pipe->set_fragment_sampler_textures(r->pipe, 4, r->textures.all); r->pipe->bind_fragment_sampler_states(r->pipe, 4, r->samplers.all); @@ -1376,7 +1376,7 @@ flush(struct vl_mpeg12_mc_renderer *r) if (num_macroblocks[MACROBLOCK_TYPE_BI_FRAME_PRED] > 0) { r->pipe->set_vertex_buffers(r->pipe, 3, r->vertex_bufs.all); - r->pipe->set_vertex_elements(r->pipe, 8, r->vertex_elems); + r->pipe->bind_vertex_elements_state(r->pipe, r->vertex_elems[2]); r->textures.individual.ref[0] = r->past; r->textures.individual.ref[1] = r->future; r->pipe->set_fragment_sampler_textures(r->pipe, 5, r->textures.all); @@ -1391,7 +1391,7 @@ flush(struct vl_mpeg12_mc_renderer *r) if (false /*num_macroblocks[MACROBLOCK_TYPE_BI_FIELD_PRED] > 0 */ ) { r->pipe->set_vertex_buffers(r->pipe, 3, r->vertex_bufs.all); - r->pipe->set_vertex_elements(r->pipe, 8, r->vertex_elems); + r->pipe->bind_vertex_elements_state(r->pipe, r->vertex_elems[2]); r->textures.individual.ref[0] = r->past; r->textures.individual.ref[1] = r->future; r->pipe->set_fragment_sampler_textures(r->pipe, 5, r->textures.all); diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.h b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.h index f00b8c7b8b1..a11a3e7307b 100644 --- a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.h +++ b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.h @@ -66,8 +66,8 @@ struct vl_mpeg12_mc_renderer struct pipe_buffer *vs_const_buf; struct pipe_buffer *fs_const_buf; struct pipe_framebuffer_state fb_state; - struct pipe_vertex_element vertex_elems[8]; - + void *vertex_elems[3]; + union { void *all[5]; |