diff options
Diffstat (limited to 'src/gallium')
111 files changed, 3480 insertions, 2032 deletions
diff --git a/src/gallium/Makefile.template b/src/gallium/Makefile.template index 5d9d2db7866..91a9b54b362 100644 --- a/src/gallium/Makefile.template +++ b/src/gallium/Makefile.template @@ -21,9 +21,6 @@ INCLUDES = \ -I$(TOP)/src/gallium/include \ -I$(TOP)/src/gallium/auxiliary \ -I$(TOP)/src/gallium/drivers \ - -I$(GALLIUM)/src/gallium/include \ - -I$(GALLIUM)/src/gallium/auxiliary \ - -I$(GALLIUM)/src/gallium/drivers \ $(LIBRARY_INCLUDES) @@ -34,10 +31,10 @@ default: depend lib$(LIBNAME).a lib$(LIBNAME).a: $(OBJECTS) $(EXTRA_OBJECTS) Makefile $(TOP)/src/gallium/Makefile.template $(MKLIB) -o $(LIBNAME) -static $(OBJECTS) $(EXTRA_OBJECTS) -depend: $(C_SOURCES) $(CPP_SOURCES) $(ASM_SOURCES) $(SYMLINKS) +depend: $(C_SOURCES) $(CPP_SOURCES) $(ASM_SOURCES) $(SYMLINKS) $(GENERATED_SOURCES) rm -f depend touch depend - $(MKDEP) $(MKDEP_OPTIONS) $(INCLUDES) $(C_SOURCES) $(CPP_SOURCES) $(ASM_SOURCES) 2> /dev/null + $(MKDEP) $(MKDEP_OPTIONS) $(INCLUDES) $(C_SOURCES) $(CPP_SOURCES) $(ASM_SOURCES) $(GENERATED_SOURCES) 2> /dev/null # Emacs tags tags: @@ -45,7 +42,7 @@ tags: # Remove .o and backup files clean: - rm -f $(OBJECTS) lib$(LIBNAME).a depend depend.bak + rm -f $(OBJECTS) $(GENERATED_SOURCES) lib$(LIBNAME).a depend depend.bak # Dummy target install: @@ -54,16 +51,16 @@ install: ##### RULES ##### %.s: %.c - $(CC) -S $(INCLUDES) $(DEFINES) $(CFLAGS) $(LIBRARY_DEFINES) $< -o $@ + $(CC) -S $(INCLUDES) $(CFLAGS) $(LIBRARY_DEFINES) $< -o $@ %.o: %.c - $(CC) -c $(INCLUDES) $(DEFINES) $(CFLAGS) $(LIBRARY_DEFINES) $< -o $@ + $(CC) -c $(INCLUDES) $(CFLAGS) $(LIBRARY_DEFINES) $< -o $@ %.o: %.cpp - $(CXX) -c $(INCLUDES) $(DEFINES) $(CXXFLAGS) $(LIBRARY_DEFINES) $< -o $@ + $(CXX) -c $(INCLUDES) $(CXXFLAGS) $(LIBRARY_DEFINES) $< -o $@ %.o: %.S - $(CC) -c $(INCLUDES) $(DEFINES) $(CFLAGS) $(LIBRARY_DEFINES) $< -o $@ + $(CC) -c $(INCLUDES) $(CFLAGS) $(LIBRARY_DEFINES) $< -o $@ sinclude depend diff --git a/src/gallium/auxiliary/Makefile b/src/gallium/auxiliary/Makefile index 916f5f6c91c..60f9c2ae3c3 100644 --- a/src/gallium/auxiliary/Makefile +++ b/src/gallium/auxiliary/Makefile @@ -105,7 +105,6 @@ C_SOURCES = \ util/u_cpu_detect.c \ util/u_dl.c \ util/u_draw_quad.c \ - util/u_format.c \ util/u_format_access.c \ util/u_format_table.c \ util/u_gen_mipmap.c \ @@ -161,6 +160,13 @@ GALLIVM_SOURCES = \ GALLIVM_CPP_SOURCES = \ gallivm/lp_bld_misc.cpp +GENERATED_SOURCES = \ + indices/u_indices_gen.c \ + indices/u_unfilled_gen.c \ + util/u_format_access.c \ + util/u_format_pack.h \ + util/u_format_table.c + ifeq ($(MESA_LLVM),1) C_SOURCES += \ @@ -185,5 +191,9 @@ indices/u_unfilled_gen.c: indices/u_unfilled_gen.py util/u_format_table.c: util/u_format_table.py util/u_format_parse.py util/u_format.csv python util/u_format_table.py util/u_format.csv > $@ +util/u_format_pack.h: util/u_format_pack.py util/u_format_parse.py util/u_format.csv + python util/u_format_pack.py util/u_format.csv > $@ + util/u_format_access.c: util/u_format_access.py util/u_format_parse.py util/u_format.csv python util/u_format_access.py util/u_format.csv > $@ + diff --git a/src/gallium/auxiliary/SConscript b/src/gallium/auxiliary/SConscript index b531ad2dbd9..47de50bf3ec 100644 --- a/src/gallium/auxiliary/SConscript +++ b/src/gallium/auxiliary/SConscript @@ -29,6 +29,13 @@ env.CodeGenerate( ) env.CodeGenerate( + target = File('util/u_format_pack.h').srcnode(), + script = 'util/u_format_pack.py', + source = ['util/u_format.csv'], + command = 'python $SCRIPT $SOURCE > $TARGET' +) + +env.CodeGenerate( target = 'util/u_format_access.c', script = 'util/u_format_access.py', source = ['util/u_format.csv'], @@ -140,7 +147,6 @@ source = [ 'util/u_dump_state.c', 'util/u_dl.c', 'util/u_draw_quad.c', - 'util/u_format.c', 'util/u_format_access.c', 'util/u_format_table.c', 'util/u_gen_mipmap.c', diff --git a/src/gallium/auxiliary/draw/draw_pipe_offset.c b/src/gallium/auxiliary/draw/draw_pipe_offset.c index e829492423e..8e321946ced 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_offset.c +++ b/src/gallium/auxiliary/draw/draw_pipe_offset.c @@ -161,7 +161,7 @@ struct draw_stage *draw_offset_stage( struct draw_context *draw ) { struct offset_stage *offset = CALLOC_STRUCT(offset_stage); if (offset == NULL) - goto fail; + return NULL; draw_alloc_temp_verts( &offset->stage, 3 ); @@ -176,10 +176,4 @@ struct draw_stage *draw_offset_stage( struct draw_context *draw ) offset->stage.destroy = offset_destroy; return &offset->stage; - - fail: - if (offset) - offset->stage.destroy( &offset->stage ); - - return NULL; } diff --git a/src/gallium/auxiliary/gallivm/lp_bld_arit.c b/src/gallium/auxiliary/gallivm/lp_bld_arit.c index 54b31befe6d..bbce31f9eb2 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_arit.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_arit.c @@ -614,6 +614,22 @@ lp_build_max(struct lp_build_context *bld, /** + * Generate clamp(a, min, max) + * Do checks for special cases. + */ +LLVMValueRef +lp_build_clamp(struct lp_build_context *bld, + LLVMValueRef a, + LLVMValueRef min, + LLVMValueRef max) +{ + a = lp_build_min(bld, a, max); + a = lp_build_max(bld, a, min); + return a; +} + + +/** * Generate abs(a) */ LLVMValueRef @@ -693,6 +709,29 @@ lp_build_sgn(struct lp_build_context *bld, } +/** + * Convert vector of int to vector of float. + */ +LLVMValueRef +lp_build_int_to_float(struct lp_build_context *bld, + LLVMValueRef a) +{ + const struct lp_type type = bld->type; + + assert(type.floating); + /*assert(lp_check_value(type, a));*/ + + { + LLVMTypeRef vec_type = lp_build_vec_type(type); + /*LLVMTypeRef int_vec_type = lp_build_int_vec_type(type);*/ + LLVMValueRef res; + res = LLVMBuildSIToFP(bld->builder, a, vec_type, ""); + return res; + } +} + + + enum lp_build_round_sse41_mode { LP_BUILD_ROUND_SSE41_NEAREST = 0, @@ -819,7 +858,7 @@ lp_build_ceil(struct lp_build_context *bld, /** * Convert to integer, through whichever rounding method that's fastest, - * typically truncating to zero. + * typically truncating toward zero. */ LLVMValueRef lp_build_itrunc(struct lp_build_context *bld, diff --git a/src/gallium/auxiliary/gallivm/lp_bld_arit.h b/src/gallium/auxiliary/gallivm/lp_bld_arit.h index 62be4b9aee1..da84b7ca02d 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_arit.h +++ b/src/gallium/auxiliary/gallivm/lp_bld_arit.h @@ -107,6 +107,12 @@ lp_build_max(struct lp_build_context *bld, LLVMValueRef b); LLVMValueRef +lp_build_clamp(struct lp_build_context *bld, + LLVMValueRef a, + LLVMValueRef min, + LLVMValueRef max); + +LLVMValueRef lp_build_abs(struct lp_build_context *bld, LLVMValueRef a); @@ -115,6 +121,10 @@ lp_build_sgn(struct lp_build_context *bld, LLVMValueRef a); LLVMValueRef +lp_build_int_to_float(struct lp_build_context *bld, + LLVMValueRef a); + +LLVMValueRef lp_build_round(struct lp_build_context *bld, LLVMValueRef a); diff --git a/src/gallium/auxiliary/gallivm/lp_bld_depth.c b/src/gallium/auxiliary/gallivm/lp_bld_depth.c index d438c0e63d7..f08f8eb6d8b 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_depth.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_depth.c @@ -171,7 +171,7 @@ lp_build_depth_test(LLVMBuilderRef builder, unsigned padding_right; unsigned chan; - assert(format_desc->layout == UTIL_FORMAT_LAYOUT_ARITH); + assert(format_desc->layout == UTIL_FORMAT_LAYOUT_PLAIN); assert(format_desc->channel[z_swizzle].type == UTIL_FORMAT_TYPE_UNSIGNED); assert(format_desc->channel[z_swizzle].size <= format_desc->block.bits); assert(format_desc->channel[z_swizzle].normalized); diff --git a/src/gallium/auxiliary/gallivm/lp_bld_format_aos.c b/src/gallium/auxiliary/gallivm/lp_bld_format_aos.c index dfa080b8533..a07f7418f2c 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_format_aos.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_format_aos.c @@ -70,7 +70,7 @@ lp_build_unpack_rgba_aos(LLVMBuilderRef builder, unsigned i; /* FIXME: Support more formats */ - assert(desc->layout == UTIL_FORMAT_LAYOUT_ARITH); + assert(desc->layout == UTIL_FORMAT_LAYOUT_PLAIN); assert(desc->block.width == 1); assert(desc->block.height == 1); assert(desc->block.bits <= 32); @@ -189,7 +189,7 @@ lp_build_unpack_rgba8_aos(LLVMBuilderRef builder, lp_build_context_init(&bld, builder, type); /* FIXME: Support more formats */ - assert(desc->layout == UTIL_FORMAT_LAYOUT_ARITH); + assert(desc->layout == UTIL_FORMAT_LAYOUT_PLAIN); assert(desc->block.width == 1); assert(desc->block.height == 1); assert(desc->block.bits <= 32); @@ -303,7 +303,7 @@ lp_build_pack_rgba_aos(LLVMBuilderRef builder, unsigned shift; unsigned i, j; - assert(desc->layout == UTIL_FORMAT_LAYOUT_ARITH); + assert(desc->layout == UTIL_FORMAT_LAYOUT_PLAIN); assert(desc->block.width == 1); assert(desc->block.height == 1); diff --git a/src/gallium/auxiliary/gallivm/lp_bld_format_soa.c b/src/gallium/auxiliary/gallivm/lp_bld_format_soa.c index 64151d169da..abb27e4c328 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_format_soa.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_format_soa.c @@ -92,9 +92,7 @@ lp_build_unpack_rgba_soa(LLVMBuilderRef builder, unsigned chan; /* FIXME: Support more formats */ - assert(format_desc->layout == UTIL_FORMAT_LAYOUT_ARITH || - (format_desc->layout == UTIL_FORMAT_LAYOUT_ARRAY && - format_desc->block.bits == format_desc->channel[0].size)); + assert(format_desc->layout == UTIL_FORMAT_LAYOUT_PLAIN); assert(format_desc->block.width == 1); assert(format_desc->block.height == 1); assert(format_desc->block.bits <= 32); diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c index 4cf28a9f934..81b0ab760ec 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c @@ -52,6 +52,7 @@ #include "lp_bld_swizzle.h" #include "lp_bld_flow.h" #include "lp_bld_tgsi.h" +#include "lp_bld_debug.h" #define LP_MAX_TEMPS 256 @@ -81,6 +82,23 @@ #define QUAD_BOTTOM_LEFT 2 #define QUAD_BOTTOM_RIGHT 3 +#define LP_TGSI_MAX_NESTING 16 + +struct lp_exec_mask { + struct lp_build_context *bld; + + boolean has_mask; + + LLVMTypeRef int_vec_type; + + LLVMValueRef cond_stack[LP_TGSI_MAX_NESTING]; + int cond_stack_size; + LLVMValueRef cond_mask; + + LLVMValueRef exec_mask; + + LLVMValueRef inv_mask; +}; struct lp_build_tgsi_soa_context { @@ -97,9 +115,9 @@ struct lp_build_tgsi_soa_context LLVMValueRef temps[LP_MAX_TEMPS][NUM_CHANNELS]; struct lp_build_mask_context *mask; + struct lp_exec_mask exec_mask; }; - static const unsigned char swizzle_left[4] = { QUAD_TOP_LEFT, QUAD_TOP_LEFT, @@ -124,6 +142,70 @@ swizzle_bottom[4] = { QUAD_BOTTOM_LEFT, QUAD_BOTTOM_RIGHT }; +static void lp_exec_mask_init(struct lp_exec_mask *mask, struct lp_build_context *bld) +{ + mask->bld = bld; + mask->has_mask = FALSE; + mask->cond_stack_size = 0; + + mask->int_vec_type = lp_build_int_vec_type(mask->bld->type); + mask->inv_mask = + LLVMConstSub(LLVMConstNull(mask->int_vec_type), + LLVMConstAllOnes(mask->int_vec_type)); +} + +static void lp_exec_mask_update(struct lp_exec_mask *mask) +{ + mask->exec_mask = mask->cond_mask; + if (mask->cond_stack_size > 0) + mask->has_mask = TRUE; +} + +static void lp_exec_mask_cond_push(struct lp_exec_mask *mask, + LLVMValueRef val) +{ + mask->cond_stack[mask->cond_stack_size++] = mask->cond_mask; + mask->cond_mask = LLVMBuildBitCast(mask->bld->builder, val, + mask->int_vec_type, ""); + + lp_exec_mask_update(mask); +} + +static void lp_exec_mask_cond_invert(struct lp_exec_mask *mask) +{ + LLVMValueRef prev_mask = mask->cond_stack[mask->cond_stack_size - 1]; + LLVMValueRef inv_mask = LLVMBuildXor(mask->bld->builder, + mask->cond_mask, + mask->inv_mask, ""); + mask->cond_mask = LLVMBuildAnd(mask->bld->builder, + inv_mask, + prev_mask, ""); + lp_exec_mask_update(mask); +} + +static void lp_exec_mask_cond_pop(struct lp_exec_mask *mask) +{ + mask->cond_mask = mask->cond_stack[--mask->cond_stack_size]; + lp_exec_mask_update(mask); +} + +static void lp_exec_mask_store(struct lp_exec_mask *mask, + LLVMValueRef val, + LLVMValueRef dst) +{ + if (mask->has_mask) { + LLVMValueRef real_val, dst_val; + + dst_val = LLVMBuildLoad(mask->bld->builder, dst, ""); + real_val = lp_build_select(mask->bld, + mask->exec_mask, + val, dst_val); + + LLVMBuildStore(mask->bld->builder, real_val, dst); + } else + LLVMBuildStore(mask->bld->builder, val, dst); +} + static LLVMValueRef emit_ddx(struct lp_build_tgsi_soa_context *bld, @@ -287,13 +369,13 @@ emit_store( switch( reg->Register.File ) { case TGSI_FILE_OUTPUT: - LLVMBuildStore(bld->base.builder, value, - bld->outputs[reg->Register.Index][chan_index]); + lp_exec_mask_store(&bld->exec_mask, value, + bld->outputs[reg->Register.Index][chan_index]); break; case TGSI_FILE_TEMPORARY: - LLVMBuildStore(bld->base.builder, value, - bld->temps[reg->Register.Index][chan_index]); + lp_exec_mask_store(&bld->exec_mask, value, + bld->temps[reg->Register.Index][chan_index]); break; case TGSI_FILE_ADDRESS: @@ -1272,8 +1354,8 @@ emit_instruction( break; case TGSI_OPCODE_IF: - /* FIXME */ - return 0; + tmp0 = emit_fetch(bld, inst, 0, CHAN_X); + lp_exec_mask_cond_push(&bld->exec_mask, tmp0); break; case TGSI_OPCODE_BGNFOR: @@ -1289,13 +1371,11 @@ emit_instruction( break; case TGSI_OPCODE_ELSE: - /* FIXME */ - return 0; + lp_exec_mask_cond_invert(&bld->exec_mask); break; case TGSI_OPCODE_ENDIF: - /* FIXME */ - return 0; + lp_exec_mask_cond_pop(&bld->exec_mask); break; case TGSI_OPCODE_ENDFOR: @@ -1458,6 +1538,8 @@ lp_build_tgsi_soa(LLVMBuilderRef builder, bld.consts_ptr = consts_ptr; bld.sampler = sampler; + lp_exec_mask_init(&bld.exec_mask, &bld.base); + tgsi_parse_init( &parse, tokens ); while( !tgsi_parse_end_of_tokens( &parse ) ) { diff --git a/src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.c b/src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.c index 95eb5f65635..d97f749b6ed 100644 --- a/src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.c +++ b/src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.c @@ -696,7 +696,7 @@ fenced_buffer_map(struct pb_buffer *buf, * Don't wait for the GPU to finish accessing it, if blocking is forbidden. */ if((flags & PIPE_BUFFER_USAGE_DONTBLOCK) && - ops->fence_signalled(ops, fenced_buf->fence, 0) == 0) { + ops->fence_signalled(ops, fenced_buf->fence, 0) != 0) { goto done; } diff --git a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_cache.c b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_cache.c index 53bc019a204..86f9266c95f 100644 --- a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_cache.c +++ b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_cache.c @@ -294,7 +294,7 @@ pb_cache_manager_create_buffer(struct pb_manager *_mgr, LIST_DEL(&buf->head); pipe_mutex_unlock(mgr->mutex); /* Increase refcount */ - pipe_reference(NULL, &buf->base.base.reference); + pipe_reference_init(&buf->base.base.reference, 1); return &buf->base; } diff --git a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_debug.c b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_debug.c index c2593cf1653..a5dbded2bce 100644 --- a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_debug.c +++ b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_debug.c @@ -179,7 +179,9 @@ pb_debug_buffer_check(struct pb_debug_buffer *buf) { uint8_t *map; - map = pb_map(buf->buffer, PIPE_BUFFER_USAGE_CPU_READ); + map = pb_map(buf->buffer, + PIPE_BUFFER_USAGE_CPU_READ | + PIPE_BUFFER_USAGE_UNSYNCHRONIZED); assert(map); if(map) { boolean underflow, overflow; diff --git a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_slab.c b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_slab.c index c445cb578b0..24e2820f881 100644 --- a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_slab.c +++ b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_slab.c @@ -483,11 +483,15 @@ pb_slab_range_manager_create_buffer(struct pb_manager *_mgr, { struct pb_slab_range_manager *mgr = pb_slab_range_manager(_mgr); pb_size bufSize; + pb_size reqSize = size; unsigned i; + if(desc->alignment > reqSize) + reqSize = desc->alignment; + bufSize = mgr->minBufSize; for (i = 0; i < mgr->numBuckets; ++i) { - if(bufSize >= size) + if(bufSize >= reqSize) return mgr->buckets[i]->create_buffer(mgr->buckets[i], size, desc); bufSize *= 2; } diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.c b/src/gallium/auxiliary/tgsi/tgsi_exec.c index 593c3cbfb38..f853ea2820e 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_exec.c +++ b/src/gallium/auxiliary/tgsi/tgsi_exec.c @@ -110,6 +110,42 @@ micro_ceil(union tgsi_exec_channel *dst, } static void +micro_clamp(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src0, + const union tgsi_exec_channel *src1, + const union tgsi_exec_channel *src2) +{ + dst->f[0] = src0->f[0] < src1->f[0] ? src1->f[0] : src0->f[0] > src2->f[0] ? src2->f[0] : src0->f[0]; + dst->f[1] = src0->f[1] < src1->f[1] ? src1->f[1] : src0->f[1] > src2->f[1] ? src2->f[1] : src0->f[1]; + dst->f[2] = src0->f[2] < src1->f[2] ? src1->f[2] : src0->f[2] > src2->f[2] ? src2->f[2] : src0->f[2]; + dst->f[3] = src0->f[3] < src1->f[3] ? src1->f[3] : src0->f[3] > src2->f[3] ? src2->f[3] : src0->f[3]; +} + +static void +micro_cmp(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src0, + const union tgsi_exec_channel *src1, + const union tgsi_exec_channel *src2) +{ + dst->f[0] = src0->f[0] < 0.0f ? src1->f[0] : src2->f[0]; + dst->f[1] = src0->f[1] < 0.0f ? src1->f[1] : src2->f[1]; + dst->f[2] = src0->f[2] < 0.0f ? src1->f[2] : src2->f[2]; + dst->f[3] = src0->f[3] < 0.0f ? src1->f[3] : src2->f[3]; +} + +static void +micro_cnd(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src0, + const union tgsi_exec_channel *src1, + const union tgsi_exec_channel *src2) +{ + dst->f[0] = src2->f[0] > 0.5f ? src0->f[0] : src1->f[0]; + dst->f[1] = src2->f[1] > 0.5f ? src0->f[1] : src1->f[1]; + dst->f[2] = src2->f[2] > 0.5f ? src0->f[2] : src1->f[2]; + dst->f[3] = src2->f[3] > 0.5f ? src0->f[3] : src1->f[3]; +} + +static void micro_cos(union tgsi_exec_channel *dst, const union tgsi_exec_channel *src) { @@ -960,18 +996,6 @@ micro_pow( #endif } -#if 0 -static void -micro_sqrt( union tgsi_exec_channel *dst, - const union tgsi_exec_channel *src ) -{ - dst->f[0] = sqrtf( src->f[0] ); - dst->f[1] = sqrtf( src->f[1] ); - dst->f[2] = sqrtf( src->f[2] ); - dst->f[3] = sqrtf( src->f[3] ); -} -#endif - static void micro_sub(union tgsi_exec_channel *dst, const union tgsi_exec_channel *src0, @@ -2665,15 +2689,7 @@ exec_instruction( break; case TGSI_OPCODE_CND: - FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { - FETCH(&r[0], 0, chan_index); - FETCH(&r[1], 1, chan_index); - FETCH(&r[2], 2, chan_index); - micro_lt(&d[chan_index], &mach->Temps[TEMP_HALF_I].xyzw[TEMP_HALF_C], &r[2], &r[0], &r[1]); - } - FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { - STORE(&d[chan_index], 0, chan_index); - } + exec_vector_trinary(mach, inst, micro_cnd, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); break; case TGSI_OPCODE_DP2A: @@ -2685,16 +2701,7 @@ exec_instruction( break; case TGSI_OPCODE_CLAMP: - FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { - FETCH(&r[0], 0, chan_index); - FETCH(&r[1], 1, chan_index); - micro_max(&r[0], &r[0], &r[1]); - FETCH(&r[1], 2, chan_index); - micro_min(&d[chan_index], &r[0], &r[1]); - } - FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { - STORE(&d[chan_index], 0, chan_index); - } + exec_vector_trinary(mach, inst, micro_clamp, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); break; case TGSI_OPCODE_FLR: @@ -3088,15 +3095,7 @@ exec_instruction( break; case TGSI_OPCODE_CMP: - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH(&r[0], 0, chan_index); - FETCH(&r[1], 1, chan_index); - FETCH(&r[2], 2, chan_index); - micro_lt(&d[chan_index], &r[0], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &r[1], &r[2]); - } - FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { - STORE(&d[chan_index], 0, chan_index); - } + exec_vector_trinary(mach, inst, micro_cmp, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); break; case TGSI_OPCODE_SCS: diff --git a/src/gallium/auxiliary/tgsi/tgsi_sanity.c b/src/gallium/auxiliary/tgsi/tgsi_sanity.c index 91e1b27da12..371f690b295 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_sanity.c +++ b/src/gallium/auxiliary/tgsi/tgsi_sanity.c @@ -321,6 +321,9 @@ iter_instruction( reg, "destination", FALSE ); + if (!inst->Dst[i].Register.WriteMask) { + report_error(ctx, "Destination register has empty writemask"); + } } for (i = 0; i < inst->Instruction.NumSrcRegs; i++) { scan_register *reg = create_scan_register_src(&inst->Src[i]); diff --git a/src/gallium/auxiliary/util/.gitignore b/src/gallium/auxiliary/util/.gitignore index 29c586c9b51..448d2f304fb 100644 --- a/src/gallium/auxiliary/util/.gitignore +++ b/src/gallium/auxiliary/util/.gitignore @@ -1,2 +1,3 @@ u_format_access.c u_format_table.c +u_format_pack.h diff --git a/src/gallium/auxiliary/util/u_format.csv b/src/gallium/auxiliary/util/u_format.csv index 01f7931aed1..a7bd6abf819 100644 --- a/src/gallium/auxiliary/util/u_format.csv +++ b/src/gallium/auxiliary/util/u_format.csv @@ -1,109 +1,199 @@ -PIPE_FORMAT_A8R8G8B8_UNORM , arith , 1, 1, un8 , un8 , un8 , un8 , zyxw, rgb -PIPE_FORMAT_X8R8G8B8_UNORM , arith , 1, 1, un8 , un8 , un8 , un8 , zyx1, rgb -PIPE_FORMAT_B8G8R8A8_UNORM , arith , 1, 1, un8 , un8 , un8 , un8 , yzwx, rgb -PIPE_FORMAT_B8G8R8X8_UNORM , arith , 1, 1, un8 , un8 , un8 , un8 , yzw1, rgb -PIPE_FORMAT_A1R5G5B5_UNORM , arith , 1, 1, un5 , un5 , un5 , un1 , zyxw, rgb -PIPE_FORMAT_A4R4G4B4_UNORM , arith , 1, 1, un4 , un4 , un4 , un4 , zyxw, rgb -PIPE_FORMAT_R5G6B5_UNORM , arith , 1, 1, un5 , un6 , un5 , , zyx1, rgb -PIPE_FORMAT_A2B10G10R10_UNORM , arith , 1, 1, un10, un10, un10, un2 , xyzw, rgb -PIPE_FORMAT_L8_UNORM , arith , 1, 1, un8 , , , , xxx1, rgb -PIPE_FORMAT_A8_UNORM , arith , 1, 1, un8 , , , , 000x, rgb -PIPE_FORMAT_I8_UNORM , arith , 1, 1, un8 , , , , xxxx, rgb -PIPE_FORMAT_A8L8_UNORM , arith , 1, 1, un8 , un8 , , , xxxy, rgb -PIPE_FORMAT_L16_UNORM , arith , 1, 1, un16, , , , xxx1, rgb -PIPE_FORMAT_YCBCR , yuv , 2, 1, x32 , , , , xyz1, yuv -PIPE_FORMAT_YCBCR_REV , yuv , 2, 1, x32 , , , , xyz1, yuv -PIPE_FORMAT_Z16_UNORM , array , 1, 1, un16, , , , x___, zs -PIPE_FORMAT_Z32_UNORM , array , 1, 1, un32, , , , x___, zs -PIPE_FORMAT_Z32_FLOAT , array , 1, 1, f32 , , , , x___, zs -PIPE_FORMAT_S8Z24_UNORM , arith , 1, 1, un24, un8 , , , xy__, zs -PIPE_FORMAT_Z24S8_UNORM , arith , 1, 1, un8 , un24, , , yx__, zs -PIPE_FORMAT_X8Z24_UNORM , arith , 1, 1, un24, un8 , , , x___, zs -PIPE_FORMAT_Z24X8_UNORM , arith , 1, 1, un8 , un24, , , y___, zs -PIPE_FORMAT_S8_UNORM , array , 1, 1, un8 , , , , _x__, zs -PIPE_FORMAT_R64_FLOAT , array , 1, 1, f64 , , , , x001, rgb -PIPE_FORMAT_R64G64_FLOAT , array , 1, 1, f64 , f64 , , , xy01, rgb -PIPE_FORMAT_R64G64B64_FLOAT , array , 1, 1, f64 , f64 , f64 , , xyz1, rgb -PIPE_FORMAT_R64G64B64A64_FLOAT , array , 1, 1, f64 , f64 , f64 , f64 , xyzw, rgb -PIPE_FORMAT_R32_FLOAT , array , 1, 1, f32 , , , , x001, rgb -PIPE_FORMAT_R32G32_FLOAT , array , 1, 1, f32 , f32 , , , xy01, rgb -PIPE_FORMAT_R32G32B32_FLOAT , array , 1, 1, f32 , f32 , f32 , , xyz1, rgb -PIPE_FORMAT_R32G32B32A32_FLOAT , array , 1, 1, f32 , f32 , f32 , f32 , xyzw, rgb -PIPE_FORMAT_R32_UNORM , array , 1, 1, un32, , , , x001, rgb -PIPE_FORMAT_R32G32_UNORM , array , 1, 1, un32, un32, , , xy01, rgb -PIPE_FORMAT_R32G32B32_UNORM , array , 1, 1, un32, un32, un32, , xyz1, rgb -PIPE_FORMAT_R32G32B32A32_UNORM , array , 1, 1, un32, un32, un32, un32, xyzw, rgb -PIPE_FORMAT_R32_USCALED , array , 1, 1, u32 , , , , x001, rgb -PIPE_FORMAT_R32G32_USCALED , array , 1, 1, u32 , u32 , , , xy01, rgb -PIPE_FORMAT_R32G32B32_USCALED , array , 1, 1, u32 , u32 , u32 , , xyz1, rgb -PIPE_FORMAT_R32G32B32A32_USCALED , array , 1, 1, u32 , u32 , u32 , u32 , xyzw, rgb -PIPE_FORMAT_R32_SNORM , array , 1, 1, sn32, , , , x001, rgb -PIPE_FORMAT_R32G32_SNORM , array , 1, 1, sn32, sn32, , , xy01, rgb -PIPE_FORMAT_R32G32B32_SNORM , array , 1, 1, sn32, sn32, sn32, , xyz1, rgb -PIPE_FORMAT_R32G32B32A32_SNORM , array , 1, 1, sn32, sn32, sn32, sn32, xyzw, rgb -PIPE_FORMAT_R32_SSCALED , array , 1, 1, s32 , , , , x001, rgb -PIPE_FORMAT_R32G32_SSCALED , array , 1, 1, s32 , s32 , , , xy01, rgb -PIPE_FORMAT_R32G32B32_SSCALED , array , 1, 1, s32 , s32 , s32 , , xyz1, rgb -PIPE_FORMAT_R32G32B32A32_SSCALED , array , 1, 1, s32 , s32 , s32 , s32 , xyzw, rgb -PIPE_FORMAT_R16_UNORM , array , 1, 1, un16, , , , x001, rgb -PIPE_FORMAT_R16G16_UNORM , array , 1, 1, un16, un16, , , xy01, rgb -PIPE_FORMAT_R16G16B16_UNORM , array , 1, 1, un16, un16, un16, , xyz1, rgb -PIPE_FORMAT_R16G16B16A16_UNORM , array , 1, 1, un16, un16, un16, un16, xyzw, rgb -PIPE_FORMAT_R16_USCALED , array , 1, 1, u16 , , , , x001, rgb -PIPE_FORMAT_R16G16_USCALED , array , 1, 1, u16 , u16 , , , xy01, rgb -PIPE_FORMAT_R16G16B16_USCALED , array , 1, 1, u16 , u16 , u16 , , xyz1, rgb -PIPE_FORMAT_R16G16B16A16_USCALED , array , 1, 1, u16 , u16 , u16 , u16 , xyzw, rgb -PIPE_FORMAT_R16_SNORM , array , 1, 1, sn16, , , , x001, rgb -PIPE_FORMAT_R16G16_SNORM , array , 1, 1, sn16, sn16, , , xy01, rgb -PIPE_FORMAT_R16G16B16_SNORM , array , 1, 1, sn16, sn16, sn16, , xyz1, rgb -PIPE_FORMAT_R16G16B16A16_SNORM , array , 1, 1, sn16, sn16, sn16, sn16, xyzw, rgb -PIPE_FORMAT_R16_SSCALED , array , 1, 1, s16 , , , , x001, rgb -PIPE_FORMAT_R16G16_SSCALED , array , 1, 1, s16 , s16 , , , xy01, rgb -PIPE_FORMAT_R16G16B16_SSCALED , array , 1, 1, s16 , s16 , s16 , , xyz1, rgb -PIPE_FORMAT_R16G16B16A16_SSCALED , array , 1, 1, s16 , s16 , s16 , s16 , xyzw, rgb -PIPE_FORMAT_R8_UNORM , array , 1, 1, un8 , , , , x001, rgb -PIPE_FORMAT_R8G8_UNORM , array , 1, 1, un8 , un8 , , , yx01, rgb -PIPE_FORMAT_R8G8B8_UNORM , array , 1, 1, un8 , un8 , un8 , , zyx1, rgb -PIPE_FORMAT_R8G8B8A8_UNORM , array , 1, 1, un8 , un8 , un8 , un8 , wzyx, rgb -PIPE_FORMAT_R8G8B8X8_UNORM , array , 1, 1, un8 , un8 , un8 , un8 , wzy1, rgb -PIPE_FORMAT_R8_USCALED , array , 1, 1, u8 , , , , x001, rgb -PIPE_FORMAT_R8G8_USCALED , array , 1, 1, u8 , u8 , , , xy01, rgb -PIPE_FORMAT_R8G8B8_USCALED , array , 1, 1, u8 , u8 , u8 , , xyz1, rgb -PIPE_FORMAT_R8G8B8A8_USCALED , array , 1, 1, u8 , u8 , u8 , u8 , xyzw, rgb -PIPE_FORMAT_R8G8B8X8_USCALED , array , 1, 1, u8 , u8 , u8 , u8 , xyz1, rgb -PIPE_FORMAT_R8_SNORM , array , 1, 1, sn8 , , , , x001, rgb -PIPE_FORMAT_R8G8_SNORM , array , 1, 1, sn8 , sn8 , , , xy01, rgb -PIPE_FORMAT_R8G8B8_SNORM , array , 1, 1, sn8 , sn8 , sn8 , , xyz1, rgb -PIPE_FORMAT_R8G8B8A8_SNORM , array , 1, 1, sn8 , sn8 , sn8 , sn8 , xyzw, rgb -PIPE_FORMAT_R8G8B8X8_SNORM , array , 1, 1, sn8 , sn8 , sn8 , sn8 , xyz1, rgb -PIPE_FORMAT_B6G5R5_SNORM , arith , 1, 1, sn5 , sn5 , sn6 , , xyz1, rgb -PIPE_FORMAT_A8B8G8R8_SNORM , array , 1, 1, sn8 , sn8 , sn8 , sn8 , wzyx, rgb -PIPE_FORMAT_X8B8G8R8_SNORM , array , 1, 1, sn8 , sn8 , sn8 , sn8 , wzy1, rgb -PIPE_FORMAT_R8_SSCALED , array , 1, 1, s8 , , , , x001, rgb -PIPE_FORMAT_R8G8_SSCALED , array , 1, 1, s8 , s8 , , , xy01, rgb -PIPE_FORMAT_R8G8B8_SSCALED , array , 1, 1, s8 , s8 , s8 , , xyz1, rgb -PIPE_FORMAT_R8G8B8A8_SSCALED , array , 1, 1, s8 , s8 , s8 , s8 , xyzw, rgb -PIPE_FORMAT_R8G8B8X8_SSCALED , array , 1, 1, s8 , s8 , s8 , s8 , xyz1, rgb -PIPE_FORMAT_R32_FIXED , array , 1, 1, h32 , , , , x001, rgb -PIPE_FORMAT_R32G32_FIXED , array , 1, 1, h32 , h32 , , , xy01, rgb -PIPE_FORMAT_R32G32B32_FIXED , array , 1, 1, h32 , h32 , h32 , , xyz1, rgb -PIPE_FORMAT_R32G32B32A32_FIXED , array , 1, 1, h32 , h32 , h32 , h32 , xyzw, rgb -PIPE_FORMAT_L8_SRGB , arith , 1, 1, u8 , , , , xxx1, srgb -PIPE_FORMAT_A8L8_SRGB , arith , 1, 1, u8 , u8 , , , xxxy, srgb -PIPE_FORMAT_R8G8B8_SRGB , array , 1, 1, u8 , u8 , u8 , , xyz1, srgb -PIPE_FORMAT_R8G8B8A8_SRGB , array , 1, 1, u8 , u8 , u8 , u8 , xyzw, srgb -PIPE_FORMAT_R8G8B8X8_SRGB , array , 1, 1, u8 , u8 , u8 , u8 , xyz1, srgb -PIPE_FORMAT_A8R8G8B8_SRGB , array , 1, 1, u8 , u8 , u8 , u8 , yzwx, srgb -PIPE_FORMAT_X8R8G8B8_SRGB , array , 1, 1, u8 , u8 , u8 , u8 , yzw1, srgb -PIPE_FORMAT_B8G8R8A8_SRGB , array , 1, 1, u8 , u8 , u8 , u8 , zyxw, srgb -PIPE_FORMAT_B8G8R8X8_SRGB , array , 1, 1, u8 , u8 , u8 , u8 , zyx1, srgb -PIPE_FORMAT_X8UB8UG8SR8S_NORM , array , 1, 1, sn8 , sn8 , un8 , x8 , wzy1, rgb -PIPE_FORMAT_B6UG5SR5S_NORM , arith , 1, 1, sn5 , sn5 , un6 , , xyz1, rgb -PIPE_FORMAT_DXT1_RGB , dxt , 4, 4, x64 , , , , xyz1, rgb -PIPE_FORMAT_DXT1_RGBA , dxt , 4, 4, x64 , , , , xyzw, rgb -PIPE_FORMAT_DXT3_RGBA , dxt , 4, 4, x128, , , , xyzw, rgb -PIPE_FORMAT_DXT5_RGBA , dxt , 4, 4, x128, , , , xyzw, rgb -PIPE_FORMAT_DXT1_SRGB , dxt , 4, 4, x64 , , , , xyz1, srgb -PIPE_FORMAT_DXT1_SRGBA , dxt , 4, 4, x64 , , , , xyzw, srgb -PIPE_FORMAT_DXT3_SRGBA , dxt , 4, 4, x128, , , , xyzw, srgb -PIPE_FORMAT_DXT5_SRGBA , dxt , 4, 4, x128, , , , xyzw, srgb +########################################################################### +# +# Copyright 2009-2010 VMware, Inc. +# All Rights Reserved. +# +# Permission is hereby granted, free of charge, to any person obtaining a +# copy of this software and associated documentation files (the +# "Software"), to deal in the Software without restriction, including +# without limitation the rights to use, copy, modify, merge, publish, +# distribute, sub license, and/or sell copies of the Software, and to +# permit persons to whom the Software is furnished to do so, subject to +# the following conditions: +# +# The above copyright notice and this permission notice (including the +# next paragraph) shall be included in all copies or substantial portions +# of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS +# OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. +# IN NO EVENT SHALL THE AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR +# ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE +# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +# +########################################################################### + +# This CSV file has the input data for u_format.h's struct +# util_format_description. +# +# Each format entry contains: +# - name, per enum pipe_format +# - layout, per enum util_format_layout, in shortened lower caps +# - pixel block's width +# - pixel block's height +# - channel encoding (only meaningful for plain layout), containing for each +# channel the following information: +# - type, one of +# - 'x': void +# - 'u': unsigned +# - 's': signed +# - 'h': fixed +# - 'f': FLOAT +# - optionally followed by 'n' if it is normalized +# - number of bits +# - channel swizzle +# - color space: rgb, yub, sz +# +# See also: +# - http://msdn.microsoft.com/en-us/library/ee416489.aspx +# - http://msdn.microsoft.com/en-us/library/ee415668.aspx +# +# Note that GL doesn't really specify the layout of internal formats. See +# OpenGL 2.1 specification, Table 3.16, on the "Correspondence of sized +# internal formats to base in- ternal formats, and desired component +# resolutions for each sized internal format." + +# Typical rendertarget formats +PIPE_FORMAT_A8R8G8B8_UNORM , plain, 1, 1, un8 , un8 , un8 , un8 , zyxw, rgb +PIPE_FORMAT_X8R8G8B8_UNORM , plain, 1, 1, un8 , un8 , un8 , un8 , zyx1, rgb +PIPE_FORMAT_B8G8R8A8_UNORM , plain, 1, 1, un8 , un8 , un8 , un8 , yzwx, rgb +PIPE_FORMAT_B8G8R8X8_UNORM , plain, 1, 1, un8 , un8 , un8 , un8 , yzw1, rgb +PIPE_FORMAT_R8G8B8X8_UNORM , plain, 1, 1, un8 , un8 , un8 , un8 , wzy1, rgb +# XXX: insert PIPE_FORMAT_R8G8B8A8_UNORM here later +# XXX: insert PIPE_FORMAT_R8G8B8_UNORM here later +PIPE_FORMAT_A1R5G5B5_UNORM , plain, 1, 1, un5 , un5 , un5 , un1 , zyxw, rgb +PIPE_FORMAT_A4R4G4B4_UNORM , plain, 1, 1, un4 , un4 , un4 , un4 , zyxw, rgb +PIPE_FORMAT_R5G6B5_UNORM , plain, 1, 1, un5 , un6 , un5 , , zyx1, rgb +PIPE_FORMAT_A2B10G10R10_UNORM , plain, 1, 1, un10, un10, un10, un2 , xyzw, rgb + +# Luminance/Intensity/Alpha formats +PIPE_FORMAT_L8_UNORM , plain, 1, 1, un8 , , , , xxx1, rgb +PIPE_FORMAT_A8_UNORM , plain, 1, 1, un8 , , , , 000x, rgb +PIPE_FORMAT_I8_UNORM , plain, 1, 1, un8 , , , , xxxx, rgb +PIPE_FORMAT_A8L8_UNORM , plain, 1, 1, un8 , un8 , , , xxxy, rgb +PIPE_FORMAT_L16_UNORM , plain, 1, 1, un16, , , , xxx1, rgb + +# SRGB formats +PIPE_FORMAT_L8_SRGB , plain, 1, 1, un8 , , , , xxx1, srgb +PIPE_FORMAT_A8L8_SRGB , plain, 1, 1, un8 , un8 , , , xxxy, srgb +PIPE_FORMAT_R8G8B8_SRGB , plain, 1, 1, un8 , un8 , un8 , , zyx1, srgb +PIPE_FORMAT_R8G8B8A8_SRGB , plain, 1, 1, un8 , un8 , un8 , un8 , wzyx, srgb +PIPE_FORMAT_R8G8B8X8_SRGB , plain, 1, 1, un8 , un8 , un8 , un8 , wzy1, srgb +PIPE_FORMAT_A8R8G8B8_SRGB , plain, 1, 1, un8 , un8 , un8 , un8 , zyxw, srgb +PIPE_FORMAT_X8R8G8B8_SRGB , plain, 1, 1, un8 , un8 , un8 , un8 , zyx1, srgb +PIPE_FORMAT_B8G8R8A8_SRGB , plain, 1, 1, un8 , un8 , un8 , un8 , yzwx, srgb +PIPE_FORMAT_B8G8R8X8_SRGB , plain, 1, 1, un8 , un8 , un8 , un8 , yzw1, srgb + +# Signed formats (typically used for bump map textures) +PIPE_FORMAT_A8B8G8R8_SNORM , plain, 1, 1, sn8 , sn8 , sn8 , sn8 , xyzw, rgb +PIPE_FORMAT_X8B8G8R8_SNORM , plain, 1, 1, sn8 , sn8 , sn8 , sn8 , xyz1, rgb +PIPE_FORMAT_X8UB8UG8SR8S_NORM , plain, 1, 1, sn8 , sn8 , un8 , x8 , xyz1, rgb +PIPE_FORMAT_B6UG5SR5S_NORM , plain, 1, 1, sn5 , sn5 , un6 , , xyz1, rgb + +# Depth-stencil formats +PIPE_FORMAT_S8_UNORM , plain, 1, 1, un8 , , , , _x__, zs +PIPE_FORMAT_Z16_UNORM , plain, 1, 1, un16, , , , x___, zs +PIPE_FORMAT_Z32_UNORM , plain, 1, 1, un32, , , , x___, zs +PIPE_FORMAT_Z32_FLOAT , plain, 1, 1, f32 , , , , x___, zs +PIPE_FORMAT_S8Z24_UNORM , plain, 1, 1, un24, un8 , , , xy__, zs +PIPE_FORMAT_Z24S8_UNORM , plain, 1, 1, un8 , un24, , , yx__, zs +PIPE_FORMAT_X8Z24_UNORM , plain, 1, 1, un24, un8 , , , x___, zs +PIPE_FORMAT_Z24X8_UNORM , plain, 1, 1, un8 , un24, , , y___, zs + +# YUV formats +# http://www.fourcc.org/yuv.php#UYVY +PIPE_FORMAT_YCBCR , subsampled, 2, 1, x32 , , , , xyz1, yuv +# http://www.fourcc.org/yuv.php#YUYV (a.k.a http://www.fourcc.org/yuv.php#YUY2) +# XXX: u_tile.c's ycbcr_get_tile_rgba actually interprets it as VYUY but the +# intent should be to match D3DFMT_YUY2 +PIPE_FORMAT_YCBCR_REV , subsampled, 2, 1, x32 , , , , xyz1, yuv + +# Compressed formats +PIPE_FORMAT_DXT1_RGB , compressed, 4, 4, x64 , , , , xyz1, rgb +PIPE_FORMAT_DXT1_RGBA , compressed, 4, 4, x64 , , , , xyzw, rgb +PIPE_FORMAT_DXT3_RGBA , compressed, 4, 4, x128, , , , xyzw, rgb +PIPE_FORMAT_DXT5_RGBA , compressed, 4, 4, x128, , , , xyzw, rgb +PIPE_FORMAT_DXT1_SRGB , compressed, 4, 4, x64 , , , , xyz1, srgb +PIPE_FORMAT_DXT1_SRGBA , compressed, 4, 4, x64 , , , , xyzw, srgb +PIPE_FORMAT_DXT3_SRGBA , compressed, 4, 4, x128, , , , xyzw, srgb +PIPE_FORMAT_DXT5_SRGBA , compressed, 4, 4, x128, , , , xyzw, srgb + +# Old vector formats +# XXX: Swizzle notation is reversed for these! +# See also: +# - src/gallium/auxiliary/translate/translate_generic.c +# - src/mesa/state_tracker/st_draw.c +PIPE_FORMAT_R64_FLOAT , plain, 1, 1, f64 , , , , x001, rgb +PIPE_FORMAT_R64G64_FLOAT , plain, 1, 1, f64 , f64 , , , xy01, rgb +PIPE_FORMAT_R64G64B64_FLOAT , plain, 1, 1, f64 , f64 , f64 , , xyz1, rgb +PIPE_FORMAT_R64G64B64A64_FLOAT , plain, 1, 1, f64 , f64 , f64 , f64 , xyzw, rgb +PIPE_FORMAT_R32_FLOAT , plain, 1, 1, f32 , , , , x001, rgb +PIPE_FORMAT_R32G32_FLOAT , plain, 1, 1, f32 , f32 , , , xy01, rgb +PIPE_FORMAT_R32G32B32_FLOAT , plain, 1, 1, f32 , f32 , f32 , , xyz1, rgb +PIPE_FORMAT_R32G32B32A32_FLOAT , plain, 1, 1, f32 , f32 , f32 , f32 , xyzw, rgb +PIPE_FORMAT_R32_UNORM , plain, 1, 1, un32, , , , x001, rgb +PIPE_FORMAT_R32G32_UNORM , plain, 1, 1, un32, un32, , , xy01, rgb +PIPE_FORMAT_R32G32B32_UNORM , plain, 1, 1, un32, un32, un32, , xyz1, rgb +PIPE_FORMAT_R32G32B32A32_UNORM , plain, 1, 1, un32, un32, un32, un32, xyzw, rgb +PIPE_FORMAT_R32_USCALED , plain, 1, 1, u32 , , , , x001, rgb +PIPE_FORMAT_R32G32_USCALED , plain, 1, 1, u32 , u32 , , , xy01, rgb +PIPE_FORMAT_R32G32B32_USCALED , plain, 1, 1, u32 , u32 , u32 , , xyz1, rgb +PIPE_FORMAT_R32G32B32A32_USCALED , plain, 1, 1, u32 , u32 , u32 , u32 , xyzw, rgb +PIPE_FORMAT_R32_SNORM , plain, 1, 1, sn32, , , , x001, rgb +PIPE_FORMAT_R32G32_SNORM , plain, 1, 1, sn32, sn32, , , xy01, rgb +PIPE_FORMAT_R32G32B32_SNORM , plain, 1, 1, sn32, sn32, sn32, , xyz1, rgb +PIPE_FORMAT_R32G32B32A32_SNORM , plain, 1, 1, sn32, sn32, sn32, sn32, xyzw, rgb +PIPE_FORMAT_R32_SSCALED , plain, 1, 1, s32 , , , , x001, rgb +PIPE_FORMAT_R32G32_SSCALED , plain, 1, 1, s32 , s32 , , , xy01, rgb +PIPE_FORMAT_R32G32B32_SSCALED , plain, 1, 1, s32 , s32 , s32 , , xyz1, rgb +PIPE_FORMAT_R32G32B32A32_SSCALED , plain, 1, 1, s32 , s32 , s32 , s32 , xyzw, rgb +PIPE_FORMAT_R32_FIXED , plain, 1, 1, h32 , , , , x001, rgb +PIPE_FORMAT_R32G32_FIXED , plain, 1, 1, h32 , h32 , , , xy01, rgb +PIPE_FORMAT_R32G32B32_FIXED , plain, 1, 1, h32 , h32 , h32 , , xyz1, rgb +PIPE_FORMAT_R32G32B32A32_FIXED , plain, 1, 1, h32 , h32 , h32 , h32 , xyzw, rgb +PIPE_FORMAT_R16_UNORM , plain, 1, 1, un16, , , , x001, rgb +PIPE_FORMAT_R16G16_UNORM , plain, 1, 1, un16, un16, , , xy01, rgb +PIPE_FORMAT_R16G16B16_UNORM , plain, 1, 1, un16, un16, un16, , xyz1, rgb +PIPE_FORMAT_R16G16B16A16_UNORM , plain, 1, 1, un16, un16, un16, un16, xyzw, rgb +PIPE_FORMAT_R16_USCALED , plain, 1, 1, u16 , , , , x001, rgb +PIPE_FORMAT_R16G16_USCALED , plain, 1, 1, u16 , u16 , , , xy01, rgb +PIPE_FORMAT_R16G16B16_USCALED , plain, 1, 1, u16 , u16 , u16 , , xyz1, rgb +PIPE_FORMAT_R16G16B16A16_USCALED , plain, 1, 1, u16 , u16 , u16 , u16 , xyzw, rgb +PIPE_FORMAT_R16_SNORM , plain, 1, 1, sn16, , , , x001, rgb +PIPE_FORMAT_R16G16_SNORM , plain, 1, 1, sn16, sn16, , , xy01, rgb +PIPE_FORMAT_R16G16B16_SNORM , plain, 1, 1, sn16, sn16, sn16, , xyz1, rgb +PIPE_FORMAT_R16G16B16A16_SNORM , plain, 1, 1, sn16, sn16, sn16, sn16, xyzw, rgb +PIPE_FORMAT_R16_SSCALED , plain, 1, 1, s16 , , , , x001, rgb +PIPE_FORMAT_R16G16_SSCALED , plain, 1, 1, s16 , s16 , , , xy01, rgb +PIPE_FORMAT_R16G16B16_SSCALED , plain, 1, 1, s16 , s16 , s16 , , xyz1, rgb +PIPE_FORMAT_R16G16B16A16_SSCALED , plain, 1, 1, s16 , s16 , s16 , s16 , xyzw, rgb +PIPE_FORMAT_R8_UNORM , plain, 1, 1, un8 , , , , x001, rgb +PIPE_FORMAT_R8G8_UNORM , plain, 1, 1, un8 , un8 , , , xy01, rgb +# XXX: insert PIPE_FORMAT_R8G8B8_UNORM here later +# XXX: insert PIPE_FORMAT_R8G8B8A8_UNORM here later +PIPE_FORMAT_R8_USCALED , plain, 1, 1, u8 , , , , x001, rgb +PIPE_FORMAT_R8G8_USCALED , plain, 1, 1, u8 , u8 , , , xy01, rgb +PIPE_FORMAT_R8G8B8_USCALED , plain, 1, 1, u8 , u8 , u8 , , xyz1, rgb +PIPE_FORMAT_R8G8B8A8_USCALED , plain, 1, 1, u8 , u8 , u8 , u8 , xyzw, rgb +PIPE_FORMAT_R8_SNORM , plain, 1, 1, sn8 , , , , x001, rgb +PIPE_FORMAT_R8G8_SNORM , plain, 1, 1, sn8 , sn8 , , , xy01, rgb +PIPE_FORMAT_R8G8B8_SNORM , plain, 1, 1, sn8 , sn8 , sn8 , , xyz1, rgb +PIPE_FORMAT_R8G8B8A8_SNORM , plain, 1, 1, sn8 , sn8 , sn8 , sn8 , xyzw, rgb +PIPE_FORMAT_R8_SSCALED , plain, 1, 1, s8 , , , , x001, rgb +PIPE_FORMAT_R8G8_SSCALED , plain, 1, 1, s8 , s8 , , , xy01, rgb +PIPE_FORMAT_R8G8B8_SSCALED , plain, 1, 1, s8 , s8 , s8 , , xyz1, rgb +PIPE_FORMAT_R8G8B8A8_SSCALED , plain, 1, 1, s8 , s8 , s8 , s8 , xyzw, rgb + +# Ambiguous formats +# FIXME: They are used with different meanings in different places!!! +PIPE_FORMAT_R8G8B8_UNORM , plain, 1, 1, un8 , un8 , un8 , , zyx1, rgb +PIPE_FORMAT_R8G8B8A8_UNORM , plain, 1, 1, un8 , un8 , un8 , un8 , wzyx, rgb + +# Unused formats +# XXX: Couldn't find any state tracker using them!! +PIPE_FORMAT_B6G5R5_SNORM , plain, 1, 1, sn5 , sn5 , sn6 , , xyz1, rgb +PIPE_FORMAT_R8G8B8X8_SNORM , plain, 1, 1, sn8 , sn8 , sn8 , sn8 , wzy1, rgb +PIPE_FORMAT_R8G8B8X8_USCALED , plain, 1, 1, u8 , u8 , u8 , u8 , wzy1, rgb +PIPE_FORMAT_R8G8B8X8_SSCALED , plain, 1, 1, s8 , s8 , s8 , s8 , wzy1, rgb diff --git a/src/gallium/auxiliary/util/u_format.h b/src/gallium/auxiliary/util/u_format.h index 2fbbb83d4ba..e8fa0022b5b 100644 --- a/src/gallium/auxiliary/util/u_format.h +++ b/src/gallium/auxiliary/util/u_format.h @@ -1,6 +1,6 @@ /************************************************************************** * - * Copyright 2009 Vmware, Inc. + * Copyright 2009-2010 Vmware, Inc. * All Rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a @@ -39,47 +39,32 @@ extern "C" { /** - * Describe how to best pack/unpack pixels into/from the prescribed format. + * Describe how to pack/unpack pixels into/from the prescribed format. * - * These are used for automatic code generation of pixel packing and unpacking - * routines (in compile time, e.g., u_format_access.py, or in runtime, like - * llvmpipe does). - * - * Thumb rule is: if you're not code generating pixel packing/unpacking then - * these are irrelevant for you. - * - * Note that this can be deduced from other values in util_format_description - * structure. This is by design, to make code generation of pixel - * packing/unpacking/sampling routines simple and efficient. - * - * XXX: This should be renamed to something like util_format_pack. + * XXX: This could be renamed to something like util_format_pack, or broke down + * in flags inside util_format_block that said exactly what we want. */ enum util_format_layout { /** - * Single scalar component. + * Formats with util_format_block::width == util_format_block::height == 1 + * that can be described as an ordinary data structure. */ - UTIL_FORMAT_LAYOUT_SCALAR = 0, + UTIL_FORMAT_LAYOUT_PLAIN = 0, /** - * One or more components of mixed integer formats, arithmetically encoded - * in a word up to 32bits. + * Formats with sub-sampled channels. + * + * This is for formats like YV12 where there is less than one sample per + * pixel. + * + * XXX: This could actually b */ - UTIL_FORMAT_LAYOUT_ARITH = 1, + UTIL_FORMAT_LAYOUT_SUBSAMPLED = 3, /** - * One or more components, no mixed formats, each with equal power of two - * number of bytes. + * An unspecified compression algorithm. */ - UTIL_FORMAT_LAYOUT_ARRAY = 2, - - /** - * XXX: Not used yet. These might go away and be replaced by a single entry, - * for formats where multiple pixels have to be - * read in order to determine a single pixel value (i.e., block.width > 1 - * || block.height > 1) - */ - UTIL_FORMAT_LAYOUT_YUV = 3, - UTIL_FORMAT_LAYOUT_DXT = 4 + UTIL_FORMAT_LAYOUT_COMPRESSED = 4 }; @@ -136,10 +121,50 @@ struct util_format_description { enum pipe_format format; const char *name; + + /** + * Pixel block dimensions. + */ struct util_format_block block; + enum util_format_layout layout; + + /** + * The number of channels. + */ + unsigned nr_channels:3; + + /** + * Whether all channels have the same number of (whole) bytes. + */ + unsigned is_array:1; + + /** + * Whether channels have mixed types (ignoring UTIL_FORMAT_TYPE_VOID). + */ + unsigned is_mixed:1; + + /** + * Input channel description. + * + * Only valid for UTIL_FORMAT_LAYOUT_PLAIN formats. + */ struct util_format_channel_description channel[4]; + + /** + * Output channel swizzle. + * + * The order is either: + * - RGBA + * - YUV(A) + * - ZS + * depending on the colorspace. + */ unsigned char swizzle[4]; + + /** + * Colorspace transformation. + */ enum util_format_colorspace colorspace; }; @@ -179,7 +204,7 @@ util_format_is_compressed(enum pipe_format format) return FALSE; } - return desc->layout == UTIL_FORMAT_LAYOUT_DXT ? TRUE : FALSE; + return desc->layout == UTIL_FORMAT_LAYOUT_COMPRESSED ? TRUE : FALSE; } static INLINE boolean @@ -253,14 +278,7 @@ util_format_get_blockwidth(enum pipe_format format) return 1; } - switch (desc->layout) { - case UTIL_FORMAT_LAYOUT_YUV: - return 2; - case UTIL_FORMAT_LAYOUT_DXT: - return 4; - default: - return 1; - } + return desc->block.width; } static INLINE uint @@ -273,12 +291,7 @@ util_format_get_blockheight(enum pipe_format format) return 1; } - switch (desc->layout) { - case UTIL_FORMAT_LAYOUT_DXT: - return 4; - default: - return 1; - } + return desc->block.height; } static INLINE unsigned @@ -373,31 +386,14 @@ util_format_has_alpha(enum pipe_format format) return FALSE; } - switch (desc->layout) { - case UTIL_FORMAT_LAYOUT_SCALAR: - case UTIL_FORMAT_LAYOUT_ARITH: - case UTIL_FORMAT_LAYOUT_ARRAY: - /* FIXME: pf_get_component_bits( PIPE_FORMAT_A8L8_UNORM, PIPE_FORMAT_COMP_A ) should not return 0 right? */ - if (format == PIPE_FORMAT_A8_UNORM || - format == PIPE_FORMAT_A8L8_UNORM || - format == PIPE_FORMAT_A8L8_SRGB) { - return TRUE; - } - return util_format_get_component_bits(format, UTIL_FORMAT_COLORSPACE_RGB, 3) != 0; - case UTIL_FORMAT_LAYOUT_YUV: + switch (desc->colorspace) { + case UTIL_FORMAT_COLORSPACE_RGB: + case UTIL_FORMAT_COLORSPACE_SRGB: + return desc->swizzle[3] != UTIL_FORMAT_SWIZZLE_1; + case UTIL_FORMAT_COLORSPACE_YUV: + return FALSE; + case UTIL_FORMAT_COLORSPACE_ZS: return FALSE; - case UTIL_FORMAT_LAYOUT_DXT: - switch (format) { - case PIPE_FORMAT_DXT1_RGBA: - case PIPE_FORMAT_DXT3_RGBA: - case PIPE_FORMAT_DXT5_RGBA: - case PIPE_FORMAT_DXT1_SRGBA: - case PIPE_FORMAT_DXT3_SRGBA: - case PIPE_FORMAT_DXT5_SRGBA: - return TRUE; - default: - return FALSE; - } default: assert(0); return FALSE; diff --git a/src/gallium/auxiliary/util/u_format_access.py b/src/gallium/auxiliary/util/u_format_access.py index 0b05ddb9312..1c9be1b538e 100644 --- a/src/gallium/auxiliary/util/u_format_access.py +++ b/src/gallium/auxiliary/util/u_format_access.py @@ -39,18 +39,7 @@ import sys -from u_format_parse import * - - -def short_name(format): - '''Make up a short norm for a format, suitable to be used as suffix in - function names.''' - - name = format.name - if name.startswith('PIPE_FORMAT_'): - name = name[len('PIPE_FORMAT_'):] - name = name.lower() - return name +from u_format_pack import * def is_format_supported(format): @@ -63,16 +52,16 @@ def is_format_supported(format): if format.colorspace not in ('rgb', 'zs'): return False - if format.layout not in (ARITH, ARRAY): + if format.layout != PLAIN: return False for i in range(4): - type = format.in_types[i] - if type.kind not in (VOID, UNSIGNED, FLOAT): + channel = format.channels[i] + if channel.type not in (VOID, UNSIGNED, FLOAT): return False # We can only read a color from a depth/stencil format if the depth channel is present - if format.colorspace == 'zs' and format.out_swizzle[0] == SWIZZLE_NONE: + if format.colorspace == 'zs' and format.swizzles[0] == SWIZZLE_NONE: return False return True @@ -81,187 +70,34 @@ def is_format_supported(format): def native_type(format): '''Get the native appropriate for a format.''' - if format.layout == ARITH: - # For arithmetic pixel formats return the integer type that matches the whole pixel - return 'uint%u_t' % format.block_size() - elif format.layout == ARRAY: - # For array pixel formats return the integer type that matches the color channel - type = format.in_types[0] - if type.kind == UNSIGNED: - return 'uint%u_t' % type.size - elif type.kind == SIGNED: - return 'int%u_t' % type.size - elif type.kind == FLOAT: - if type.size == 32: - return 'float' - elif type.size == 64: - return 'double' + if format.layout == PLAIN: + if not format.is_array(): + # For arithmetic pixel formats return the integer type that matches the whole pixel + return 'uint%u_t' % format.block_size() + else: + # For array pixel formats return the integer type that matches the color channel + channel = format.channels[0] + if channel.type == UNSIGNED: + return 'uint%u_t' % channel.size + elif channel.type == SIGNED: + return 'int%u_t' % channel.size + elif channel.type == FLOAT: + if channel.size == 32: + return 'float' + elif channel.size == 64: + return 'double' + else: + assert False else: assert False - else: - assert False - else: - assert False - - -def intermediate_native_type(bits, sign): - '''Find a native type adequate to hold intermediate results of the request bit size.''' - - bytes = 4 # don't use anything smaller than 32bits - while bytes * 8 < bits: - bytes *= 2 - bits = bytes*8 - - if sign: - return 'int%u_t' % bits - else: - return 'uint%u_t' % bits - - -def get_one_shift(type): - '''Get the number of the bit that matches unity for this type.''' - if type.kind == 'FLOAT': - assert False - if not type.norm: - return 0 - if type.kind == UNSIGNED: - return type.size - if type.kind == SIGNED: - return type.size - 1 - if type.kind == FIXED: - return type.size / 2 - assert False - - -def get_one(type): - '''Get the value of unity for this type.''' - if type.kind == 'FLOAT' or not type.norm: - return 1 - else: - return (1 << get_one_shift(type)) - 1 - - -def generate_clamp(): - '''Code generate the clamping functions for each type. - - We don't use a macro so that arguments with side effects, - like *src_pixel++ are correctly handled. - ''' - - for suffix, native_type in [ - ('', 'double'), - ('f', 'float'), - ('ui', 'unsigned int'), - ('si', 'int'), - ]: - print 'static INLINE %s' % native_type - print 'clamp%s(%s value, %s lbound, %s ubound)' % (suffix, native_type, native_type, native_type) - print '{' - print ' if(value < lbound)' - print ' return lbound;' - print ' if(value > ubound)' - print ' return ubound;' - print ' return value;' - print '}' - print - - -def clamp_expr(src_type, dst_type, dst_native_type, value): - '''Generate the expression to clamp the value in the source type to the - destination type range.''' - - if src_type == dst_type: - return value - - # Pick the approriate clamp function - if src_type.kind == FLOAT: - if src_type.size == 32: - func = 'clampf' - elif src_type.size == 64: - func = 'clamp' - else: - assert False - elif src_type.kind == UNSIGNED: - func = 'clampui' - elif src_type.kind == SIGNED: - func = 'clampsi' else: assert False - # Clamp floats to [-1, 1] or [0, 1] range - if src_type.kind == FLOAT and dst_type.norm: - max = 1 - if src_type.sign and dst_type.sign: - min = -1 - else: - min = 0 - return '%s(%s, %s, %s)' % (func, value, min, max) - - # FIXME: Also clamp scaled values - - return value - - -def conversion_expr(src_type, dst_type, dst_native_type, value): - '''Generate the expression to convert a value between two types.''' - - if src_type == dst_type: - return value - if src_type.kind == FLOAT and dst_type.kind == FLOAT: - return '(%s)%s' % (dst_native_type, value) - - if not src_type.norm and not dst_type.norm: - return '(%s)%s' % (dst_native_type, value) - - value = clamp_expr(src_type, dst_type, dst_native_type, value) - - if dst_type.kind == FLOAT: - if src_type.norm: - one = get_one(src_type) - if src_type.size <= 23: - scale = '(1.0f/0x%x)' % one - else: - # bigger than single precision mantissa, use double - scale = '(1.0/0x%x)' % one - value = '(%s * %s)' % (value, scale) - return '(%s)%s' % (dst_native_type, value) - - if src_type.kind == FLOAT: - if dst_type.norm: - dst_one = get_one(dst_type) - if dst_type.size <= 23: - scale = '0x%x' % dst_one - else: - # bigger than single precision mantissa, use double - scale = '(double)0x%x' % dst_one - value = '(%s * %s)' % (value, scale) - return '(%s)%s' % (dst_native_type, value) - - if src_type.kind == dst_type.kind: - src_one = get_one(src_type) - dst_one = get_one(dst_type) - - if src_one > dst_one and src_type.norm and dst_type.norm: - # We can just bitshift - src_shift = get_one_shift(src_type) - dst_shift = get_one_shift(dst_type) - value = '(%s >> %s)' % (value, src_shift - dst_shift) - else: - # We need to rescale using an intermediate type big enough to hold the multiplication of both - tmp_native_type = intermediate_native_type(src_type.size + dst_type.size, src_type.sign and dst_type.sign) - value = '(%s)%s' % (tmp_native_type, value) - value = '%s * 0x%x / 0x%x' % (value, dst_one, src_one) - value = '(%s)%s' % (dst_native_type, value) - return value - - assert False - - -def generate_format_read(format, dst_type, dst_native_type, dst_suffix): +def generate_format_read(format, dst_channel, dst_native_type, dst_suffix): '''Generate the function to read pixels from a particular format''' - name = short_name(format) + name = format.short_name() src_native_type = native_type(format) @@ -279,11 +115,11 @@ def generate_format_read(format, dst_type, dst_native_type, dst_suffix): names = ['']*4 if format.colorspace == 'rgb': for i in range(4): - swizzle = format.out_swizzle[i] + swizzle = format.swizzles[i] if swizzle < 4: names[swizzle] += 'rgba'[i] elif format.colorspace == 'zs': - swizzle = format.out_swizzle[0] + swizzle = format.swizzles[0] if swizzle < 4: names[swizzle] = 'z' else: @@ -291,64 +127,66 @@ def generate_format_read(format, dst_type, dst_native_type, dst_suffix): else: assert False - if format.layout == ARITH: - print ' %s pixel = *src_pixel++;' % src_native_type - shift = 0; - for i in range(4): - src_type = format.in_types[i] - width = src_type.size - if names[i]: - value = 'pixel' - mask = (1 << width) - 1 - if shift: - value = '(%s >> %u)' % (value, shift) - if shift + width < format.block_size(): - value = '(%s & 0x%x)' % (value, mask) - value = conversion_expr(src_type, dst_type, dst_native_type, value) - print ' %s %s = %s;' % (dst_native_type, names[i], value) - shift += width - elif format.layout == ARRAY: - for i in range(4): - src_type = format.in_types[i] - if names[i]: - value = '(*src_pixel++)' - value = conversion_expr(src_type, dst_type, dst_native_type, value) - print ' %s %s = %s;' % (dst_native_type, names[i], value) + if format.layout == PLAIN: + if not format.is_array(): + print ' %s pixel = *src_pixel++;' % src_native_type + shift = 0; + for i in range(4): + src_channel = format.channels[i] + width = src_channel.size + if names[i]: + value = 'pixel' + mask = (1 << width) - 1 + if shift: + value = '(%s >> %u)' % (value, shift) + if shift + width < format.block_size(): + value = '(%s & 0x%x)' % (value, mask) + value = conversion_expr(src_channel, dst_channel, dst_native_type, value) + print ' %s %s = %s;' % (dst_native_type, names[i], value) + shift += width + else: + for i in range(4): + src_channel = format.channels[i] + if names[i]: + value = 'src_pixel[%u]' % i + value = conversion_expr(src_channel, dst_channel, dst_native_type, value) + print ' %s %s = %s;' % (dst_native_type, names[i], value) + print ' src_pixel += %u;' % (format.nr_channels()) else: assert False for i in range(4): if format.colorspace == 'rgb': - swizzle = format.out_swizzle[i] + swizzle = format.swizzles[i] if swizzle < 4: value = names[swizzle] elif swizzle == SWIZZLE_0: value = '0' elif swizzle == SWIZZLE_1: - value = get_one(dst_type) + value = get_one(dst_channel) else: assert False elif format.colorspace == 'zs': if i < 3: value = 'z' else: - value = get_one(dst_type) + value = get_one(dst_channel) else: assert False print ' *dst_pixel++ = %s; /* %s */' % (value, 'rgba'[i]) print ' }' print ' src_row += src_stride;' - print ' dst_row += dst_stride/sizeof(%s);' % dst_native_type + print ' dst_row += dst_stride/sizeof(*dst_row);' print ' }' print '}' print -def generate_format_write(format, src_type, src_native_type, src_suffix): +def generate_format_write(format, src_channel, src_native_type, src_suffix): '''Generate the function to write pixels to a particular format''' - name = short_name(format) + name = format.short_name() dst_native_type = native_type(format) @@ -363,58 +201,48 @@ def generate_format_write(format, src_type, src_native_type, src_suffix): print ' const %s *src_pixel = src_row;' %src_native_type print ' for (x = 0; x < w; ++x) {' - inv_swizzle = [None]*4 - if format.colorspace == 'rgb': - for i in range(4): - swizzle = format.out_swizzle[i] - if swizzle < 4: - inv_swizzle[swizzle] = i - elif format.colorspace == 'zs': - swizzle = format.out_swizzle[0] - if swizzle < 4: - inv_swizzle[swizzle] = 0 - else: - assert False - - if format.layout == ARITH: - print ' %s pixel = 0;' % dst_native_type - shift = 0; - for i in range(4): - dst_type = format.in_types[i] - width = dst_type.size - if inv_swizzle[i] is not None: - value = 'src_pixel[%u]' % inv_swizzle[i] - value = conversion_expr(src_type, dst_type, dst_native_type, value) - if shift: - value = '(%s << %u)' % (value, shift) - print ' pixel |= %s;' % value - shift += width - print ' *dst_pixel++ = pixel;' - elif format.layout == ARRAY: - for i in range(4): - dst_type = format.in_types[i] - if inv_swizzle[i] is not None: - value = 'src_pixel[%u]' % inv_swizzle[i] - value = conversion_expr(src_type, dst_type, dst_native_type, value) - print ' *dst_pixel++ = %s;' % value + inv_swizzle = format.inv_swizzles() + + if format.layout == PLAIN: + if not format.is_array(): + print ' %s pixel = 0;' % dst_native_type + shift = 0; + for i in range(4): + dst_channel = format.channels[i] + width = dst_channel.size + if inv_swizzle[i] is not None: + value = 'src_pixel[%u]' % inv_swizzle[i] + value = conversion_expr(src_channel, dst_channel, dst_native_type, value) + if shift: + value = '(%s << %u)' % (value, shift) + print ' pixel |= %s;' % value + shift += width + print ' *dst_pixel++ = pixel;' + else: + for i in range(4): + dst_channel = format.channels[i] + if inv_swizzle[i] is not None: + value = 'src_pixel[%u]' % inv_swizzle[i] + value = conversion_expr(src_channel, dst_channel, dst_native_type, value) + print ' *dst_pixel++ = %s;' % value else: assert False print ' src_pixel += 4;' print ' }' print ' dst_row += dst_stride;' - print ' src_row += src_stride/sizeof(%s);' % src_native_type + print ' src_row += src_stride/sizeof(*src_row);' print ' }' print '}' print -def generate_read(formats, dst_type, dst_native_type, dst_suffix): +def generate_read(formats, dst_channel, dst_native_type, dst_suffix): '''Generate the dispatch function to read pixels from any format''' for format in formats: if is_format_supported(format): - generate_format_read(format, dst_type, dst_native_type, dst_suffix) + generate_format_read(format, dst_channel, dst_native_type, dst_suffix) print 'void' print 'util_format_read_%s(enum pipe_format format, %s *dst, unsigned dst_stride, const void *src, unsigned src_stride, unsigned x, unsigned y, unsigned w, unsigned h)' % (dst_suffix, dst_native_type) @@ -424,7 +252,7 @@ def generate_read(formats, dst_type, dst_native_type, dst_suffix): for format in formats: if is_format_supported(format): print ' case %s:' % format.name - print ' func = &util_format_%s_read_%s;' % (short_name(format), dst_suffix) + print ' func = &util_format_%s_read_%s;' % (format.short_name(), dst_suffix) print ' break;' print ' default:' print ' debug_printf("unsupported format\\n");' @@ -435,12 +263,12 @@ def generate_read(formats, dst_type, dst_native_type, dst_suffix): print -def generate_write(formats, src_type, src_native_type, src_suffix): +def generate_write(formats, src_channel, src_native_type, src_suffix): '''Generate the dispatch function to write pixels to any format''' for format in formats: if is_format_supported(format): - generate_format_write(format, src_type, src_native_type, src_suffix) + generate_format_write(format, src_channel, src_native_type, src_suffix) print 'void' print 'util_format_write_%s(enum pipe_format format, const %s *src, unsigned src_stride, void *dst, unsigned dst_stride, unsigned x, unsigned y, unsigned w, unsigned h)' % (src_suffix, src_native_type) @@ -451,7 +279,7 @@ def generate_write(formats, src_type, src_native_type, src_suffix): for format in formats: if is_format_supported(format): print ' case %s:' % format.name - print ' func = &util_format_%s_write_%s;' % (short_name(format), src_suffix) + print ' func = &util_format_%s_write_%s;' % (format.short_name(), src_suffix) print ' break;' print ' default:' print ' debug_printf("unsupported format\\n");' @@ -473,20 +301,18 @@ def main(): print __doc__.strip() print print '#include "pipe/p_compiler.h"' - print '#include "u_format.h"' print '#include "u_math.h"' + print '#include "u_format_pack.h"' print - generate_clamp() - - type = Type(FLOAT, False, 32) + type = Channel(FLOAT, False, 32) native_type = 'float' suffix = '4f' generate_read(formats, type, native_type, suffix) generate_write(formats, type, native_type, suffix) - type = Type(UNSIGNED, True, 8) + type = Channel(UNSIGNED, True, 8) native_type = 'uint8_t' suffix = '4ub' diff --git a/src/gallium/auxiliary/util/u_format_pack.py b/src/gallium/auxiliary/util/u_format_pack.py new file mode 100644 index 00000000000..3f33f7cc021 --- /dev/null +++ b/src/gallium/auxiliary/util/u_format_pack.py @@ -0,0 +1,484 @@ +#!/usr/bin/env python + +''' +/************************************************************************** + * + * Copyright 2009 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * @file + * Pixel format packing and unpacking functions. + * + * @author Jose Fonseca <[email protected]> + */ +''' + + +import sys + +from u_format_parse import * + + +def generate_format_type(format): + '''Generate a structure that describes the format.''' + + print 'union util_format_%s {' % format.short_name() + if format.is_bitmask(): + print ' uint%u_t value;' % (format.block_size(),) + print ' struct {' + for channel in format.channels: + if format.is_bitmask() and not format.is_array(): + if channel.type == VOID: + if channel.size: + print ' unsigned %s:%u;' % (channel.name, channel.size) + elif channel.type == UNSIGNED: + print ' unsigned %s:%u;' % (channel.name, channel.size) + elif channel.type == SIGNED: + print ' int %s:%u;' % (channel.name, channel.size) + else: + assert 0 + else: + assert channel.size % 8 == 0 and is_pot(channel.size) + if channel.type == VOID: + if channel.size: + print ' uint%u_t %s;' % (channel.size, channel.name) + elif channel.type == UNSIGNED: + print ' uint%u_t %s;' % (channel.size, channel.name) + elif channel.type in (SIGNED, FIXED): + print ' int%u_t %s;' % (channel.size, channel.name) + elif channel.type == FLOAT: + if channel.size == 64: + print ' double %s;' % (channel.name) + elif channel.size == 32: + print ' float %s;' % (channel.name) + elif channel.size == 16: + print ' uint16_t %s;' % (channel.name) + else: + assert 0 + else: + assert 0 + print ' } chan;' + print '};' + print + + +def bswap_format(format): + '''Generate a structure that describes the format.''' + + if format.is_bitmask() and not format.is_array(): + print '#ifdef PIPE_ARCH_BIG_ENDIAN' + print ' pixel.value = util_bswap%u(pixel.value);' % format.block_size() + print '#endif' + + +def is_format_supported(format): + '''Determines whether we actually have the plumbing necessary to generate the + to read/write to/from this format.''' + + # FIXME: Ideally we would support any format combination here. + + if format.layout != PLAIN: + return False + + for i in range(4): + channel = format.channels[i] + if channel.type not in (VOID, UNSIGNED, SIGNED, FLOAT): + return False + + # We can only read a color from a depth/stencil format if the depth channel is present + if format.colorspace == 'zs' and format.swizzles[0] == SWIZZLE_NONE: + return False + + return True + + +def native_type(format): + '''Get the native appropriate for a format.''' + + if format.layout == PLAIN: + if not format.is_array(): + # For arithmetic pixel formats return the integer type that matches the whole pixel + return 'uint%u_t' % format.block_size() + else: + # For array pixel formats return the integer type that matches the color channel + type = format.channels[0] + if type.type == UNSIGNED: + return 'uint%u_t' % type.size + elif type.type == SIGNED: + return 'int%u_t' % type.size + elif type.type == FLOAT: + if type.size == 32: + return 'float' + elif type.size == 64: + return 'double' + else: + assert False + else: + assert False + else: + assert False + + +def intermediate_native_type(bits, sign): + '''Find a native type adequate to hold intermediate results of the request bit size.''' + + bytes = 4 # don't use anything smaller than 32bits + while bytes * 8 < bits: + bytes *= 2 + bits = bytes*8 + + if sign: + return 'int%u_t' % bits + else: + return 'uint%u_t' % bits + + +def get_one_shift(type): + '''Get the number of the bit that matches unity for this type.''' + if type.type == 'FLOAT': + assert False + if not type.norm: + return 0 + if type.type == UNSIGNED: + return type.size + if type.type == SIGNED: + return type.size - 1 + if type.type == FIXED: + return type.size / 2 + assert False + + +def get_one(type): + '''Get the value of unity for this type.''' + if type.type == 'FLOAT' or not type.norm: + return 1 + else: + return (1 << get_one_shift(type)) - 1 + + +def generate_clamp(): + '''Code generate the clamping functions for each type. + + We don't use a macro so that arguments with side effects, + like *src_pixel++ are correctly handled. + ''' + + for suffix, native_type in [ + ('', 'double'), + ('f', 'float'), + ('ui', 'unsigned int'), + ('si', 'int'), + ]: + print 'static INLINE %s' % native_type + print 'clamp%s(%s value, %s lbound, %s ubound)' % (suffix, native_type, native_type, native_type) + print '{' + print ' if(value < lbound)' + print ' return lbound;' + print ' if(value > ubound)' + print ' return ubound;' + print ' return value;' + print '}' + print + + +def clamp_expr(src_channel, dst_channel, dst_native_type, value): + '''Generate the expression to clamp the value in the source type to the + destination type range.''' + + if src_channel == dst_channel: + return value + + # Pick the approriate clamp function + if src_channel.type == FLOAT: + if src_channel.size == 32: + func = 'clampf' + elif src_channel.size == 64: + func = 'clamp' + else: + assert False + elif src_channel.type == UNSIGNED: + func = 'clampui' + elif src_channel.type == SIGNED: + func = 'clampsi' + else: + assert False + + src_min = src_channel.min() + src_max = src_channel.max() + dst_min = dst_channel.min() + dst_max = dst_channel.max() + + if src_min < dst_min and src_max > dst_max: + return 'CLAMP(%s, %s, %s)' % (value, dst_min, dst_max) + + if src_max > dst_max: + return 'MIN2(%s, %s)' % (value, dst_max) + + if src_min < dst_min: + return 'MAX2(%s, %s)' % (value, dst_min) + + return value + + +def conversion_expr(src_channel, dst_channel, dst_native_type, value, clamp=True): + '''Generate the expression to convert a value between two types.''' + + if src_channel == dst_channel: + return value + + if src_channel.type == FLOAT and dst_channel.type == FLOAT: + return '(%s)%s' % (dst_native_type, value) + + if not src_channel.norm and not dst_channel.norm: + return '(%s)%s' % (dst_native_type, value) + + if clamp: + value = clamp_expr(src_channel, dst_channel, dst_native_type, value) + + if dst_channel.type == FLOAT: + if src_channel.norm: + one = get_one(src_channel) + if src_channel.size <= 23: + scale = '(1.0f/0x%x)' % one + else: + # bigger than single precision mantissa, use double + scale = '(1.0/0x%x)' % one + value = '(%s * %s)' % (value, scale) + return '(%s)%s' % (dst_native_type, value) + + if src_channel.type == FLOAT: + if dst_channel.norm: + dst_one = get_one(dst_channel) + if dst_channel.size <= 23: + scale = '0x%x' % dst_one + else: + # bigger than single precision mantissa, use double + scale = '(double)0x%x' % dst_one + value = '(%s * %s)' % (value, scale) + return '(%s)%s' % (dst_native_type, value) + + if not src_channel.norm and not dst_channel.norm: + # neither is normalized -- just cast + return '(%s)%s' % (dst_native_type, value) + + if src_channel.type in (SIGNED, UNSIGNED) and dst_channel.type in (SIGNED, UNSIGNED): + src_one = get_one(src_channel) + dst_one = get_one(dst_channel) + + if src_one > dst_one and src_channel.norm: + # We can just bitshift + src_shift = get_one_shift(src_channel) + dst_shift = get_one_shift(dst_channel) + value = '(%s >> %s)' % (value, src_shift - dst_shift) + else: + # We need to rescale using an intermediate type big enough to hold the multiplication of both + tmp_native_type = intermediate_native_type(src_channel.size + dst_channel.size, src_channel.sign and dst_channel.sign) + value = '(%s)%s' % (tmp_native_type, value) + value = '(%s * 0x%x / 0x%x)' % (value, dst_one, src_one) + value = '(%s)%s' % (dst_native_type, value) + return value + + assert False + + +def generate_format_unpack(format, dst_channel, dst_native_type, dst_suffix): + '''Generate the function to unpack pixels from a particular format''' + + name = format.short_name() + + src_native_type = native_type(format) + + print 'static INLINE void' + print 'util_format_%s_unpack_%s(%s *dst, const void *src)' % (name, dst_suffix, dst_native_type) + print '{' + print ' union util_format_%s pixel;' % format.short_name() + print ' memcpy(&pixel, src, sizeof pixel);' + bswap_format(format) + + assert format.layout == PLAIN + + for i in range(4): + swizzle = format.swizzles[i] + if swizzle < 4: + src_channel = format.channels[swizzle] + value = 'pixel.chan.%s' % src_channel.name + value = conversion_expr(src_channel, dst_channel, dst_native_type, value) + elif swizzle == SWIZZLE_0: + value = '0' + elif swizzle == SWIZZLE_1: + value = get_one(dst_channel) + elif swizzle == SWIZZLE_NONE: + value = '0' + else: + assert False + if format.colorspace == ZS: + if i == 3: + value = get_one(dst_channel) + elif i >= 1: + value = 'dst[0]' + print ' dst[%u] = %s; /* %s */' % (i, value, 'rgba'[i]) + + print '}' + print + + +def generate_format_pack(format, src_channel, src_native_type, src_suffix): + '''Generate the function to pack pixels to a particular format''' + + name = format.short_name() + + dst_native_type = native_type(format) + + print 'static INLINE void' + print 'util_format_%s_pack_%s(void *dst, %s r, %s g, %s b, %s a)' % (name, src_suffix, src_native_type, src_native_type, src_native_type, src_native_type) + print '{' + print ' union util_format_%s pixel;' % format.short_name() + + assert format.layout == PLAIN + + inv_swizzle = format.inv_swizzles() + + for i in range(4): + dst_channel = format.channels[i] + width = dst_channel.size + if inv_swizzle[i] is None: + continue + value = 'rgba'[inv_swizzle[i]] + value = conversion_expr(src_channel, dst_channel, dst_native_type, value) + if format.colorspace == ZS: + if i == 3: + value = get_one(dst_channel) + elif i >= 1: + value = '0' + print ' pixel.chan.%s = %s;' % (dst_channel.name, value) + + bswap_format(format) + print ' memcpy(dst, &pixel, sizeof pixel);' + print '}' + print + + +def generate_unpack(formats, dst_channel, dst_native_type, dst_suffix): + '''Generate the dispatch function to unpack pixels from any format''' + + for format in formats: + if is_format_supported(format): + generate_format_unpack(format, dst_channel, dst_native_type, dst_suffix) + + print 'static INLINE void' + print 'util_format_unpack_%s(enum pipe_format format, %s *dst, const void *src)' % (dst_suffix, dst_native_type) + print '{' + print ' void (*func)(%s *dst, const void *src);' % dst_native_type + print ' switch(format) {' + for format in formats: + if is_format_supported(format): + print ' case %s:' % format.name + print ' func = &util_format_%s_unpack_%s;' % (format.short_name(), dst_suffix) + print ' break;' + print ' default:' + print ' debug_printf("unsupported format\\n");' + print ' return;' + print ' }' + print ' func(dst, src);' + print '}' + print + + +def generate_pack(formats, src_channel, src_native_type, src_suffix): + '''Generate the dispatch function to pack pixels to any format''' + + for format in formats: + if is_format_supported(format): + generate_format_pack(format, src_channel, src_native_type, src_suffix) + + print 'static INLINE void' + print 'util_format_pack_%s(enum pipe_format format, void *dst, %s r, %s g, %s b, %s a)' % (src_suffix, src_native_type, src_native_type, src_native_type, src_native_type) + print '{' + print ' void (*func)(void *dst, %s r, %s g, %s b, %s a);' % (src_native_type, src_native_type, src_native_type, src_native_type) + print ' switch(format) {' + for format in formats: + if is_format_supported(format): + print ' case %s:' % format.name + print ' func = &util_format_%s_pack_%s;' % (format.short_name(), src_suffix) + print ' break;' + print ' default:' + print ' debug_printf("%s: unsupported format\\n", __FUNCTION__);' + print ' return;' + print ' }' + print ' func(dst, r, g, b, a);' + print '}' + print + + +def main(): + formats = [] + for arg in sys.argv[1:]: + formats.extend(parse(arg)) + + print '/* This file is autogenerated by u_format_pack.py from u_format.csv. Do not edit directly. */' + print + # This will print the copyright message on the top of this file + print __doc__.strip() + + print + print '#ifndef U_FORMAT_PACK_H' + print '#define U_FORMAT_PACK_H' + print + print '#include "pipe/p_compiler.h"' + print '#include "u_math.h"' + print '#include "u_format.h"' + print + + generate_clamp() + + for format in formats: + if format.layout == PLAIN: + generate_format_type(format) + + channel = Channel(FLOAT, False, 32) + native_type = 'float' + suffix = '4f' + + generate_unpack(formats, channel, native_type, suffix) + generate_pack(formats, channel, native_type, suffix) + + channel = Channel(UNSIGNED, True, 8) + native_type = 'uint8_t' + suffix = '4ub' + + generate_unpack(formats, channel, native_type, suffix) + generate_pack(formats, channel, native_type, suffix) + + print + print '#ifdef __cplusplus' + print '}' + print '#endif' + print + print '#endif /* ! U_FORMAT_PACK_H */' + + +if __name__ == '__main__': + main() diff --git a/src/gallium/auxiliary/util/u_format_parse.py b/src/gallium/auxiliary/util/u_format_parse.py index 493aff71127..250926418ec 100755 --- a/src/gallium/auxiliary/util/u_format_parse.py +++ b/src/gallium/auxiliary/util/u_format_parse.py @@ -30,64 +30,169 @@ ''' -import sys - - VOID, UNSIGNED, SIGNED, FIXED, FLOAT = range(5) SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_W, SWIZZLE_0, SWIZZLE_1, SWIZZLE_NONE, = range(7) -ARITH = 'arith' -ARRAY = 'array' +PLAIN = 'plain' + +RGB = 'rgb' +SRGB = 'srgb' +YUV = 'yuv' +ZS = 'zs' + +def is_pot(x): + return (x & (x - 1)) == 0; -class Type: - '''Describe the type of a color channel.''' + +VERY_LARGE = 99999999999999999999999 + + +class Channel: + '''Describe the channel of a color channel.''' - def __init__(self, kind, norm, size): - self.kind = kind + def __init__(self, type, norm, size, name = ''): + self.type = type self.norm = norm self.size = size - self.sign = kind in (SIGNED, FIXED, FLOAT) + self.sign = type in (SIGNED, FIXED, FLOAT) + self.name = name def __str__(self): - s = str(self.kind) + s = str(self.type) if self.norm: s += 'n' s += str(self.size) return s def __eq__(self, other): - return self.kind == other.kind and self.norm == other.norm and self.size == other.size + return self.type == other.type and self.norm == other.norm and self.size == other.size + + def max(self): + '''Maximum representable number.''' + if self.type == FLOAT: + return VERY_LARGE + if self.norm: + return 1 + if self.type == UNSIGNED: + return (1 << self.size) - 1 + if self.type == SIGNED: + return self.size - 1 + assert False + + def min(self): + '''Minimum representable number.''' + if self.type == FLOAT: + return -VERY_LARGE + if self.type == UNSIGNED: + return 0 + if self.norm: + return -1 + if self.type == SIGNED: + return -(1 << (self.size - 1)) + assert False class Format: '''Describe a pixel format.''' - def __init__(self, name, layout, block_width, block_height, in_types, out_swizzle, colorspace): + def __init__(self, name, layout, block_width, block_height, channels, swizzles, colorspace): self.name = name self.layout = layout self.block_width = block_width self.block_height = block_height - self.in_types = in_types - self.out_swizzle = out_swizzle + self.channels = channels + self.swizzles = swizzles self.name = name self.colorspace = colorspace def __str__(self): return self.name + def short_name(self): + '''Make up a short norm for a format, suitable to be used as suffix in + function names.''' + + name = self.name + if name.startswith('PIPE_FORMAT_'): + name = name[len('PIPE_FORMAT_'):] + name = name.lower() + return name + def block_size(self): size = 0 - for type in self.in_types: - size += type.size + for channel in self.channels: + size += channel.size return size + def nr_channels(self): + nr_channels = 0 + for channel in self.channels: + if channel.size: + nr_channels += 1 + return nr_channels + + def is_array(self): + ref_channel = self.channels[0] + for channel in self.channels[1:]: + if channel.size and (channel.size != ref_channel.size or channel.size % 8): + return False + return True + + def is_mixed(self): + ref_channel = self.channels[0] + for channel in self.channels[1:]: + if channel.type != VOID: + if channel.type != ref_channel.type: + return True + if channel.norm != ref_channel.norm: + return True + return False + + def is_pot(self): + return is_pot(self.block_size()) + + def is_int(self): + for channel in self.channels: + if channel.type not in (VOID, UNSIGNED, SIGNED): + return False + return True + + def is_float(self): + for channel in self.channels: + if channel.type not in (VOID, FLOAT): + return False + return True + + def is_bitmask(self): + if self.block_size() > 32: + return False + if not self.is_pot(): + return False + for channel in self.channels: + if not is_pot(channel.size): + return True + if channel.type not in (VOID, UNSIGNED, SIGNED): + return False + if channel.size >= 32: + return False + return True + + def inv_swizzles(self): + '''Return an array[4] of inverse swizzle terms''' + inv_swizzle = [None]*4 + for i in range(4): + swizzle = self.swizzles[i] + if swizzle < 4: + inv_swizzle[swizzle] = i + return inv_swizzle + def stride(self): return self.block_size()/8 -_kind_parse_map = { +_type_parse_map = { '': VOID, 'x': VOID, 'u': UNSIGNED, @@ -108,20 +213,55 @@ _swizzle_parse_map = { def parse(filename): '''Parse the format descrition in CSV format in terms of the - Type and Format classes above.''' + Channel and Format classes above.''' stream = open(filename) formats = [] for line in stream: - line = line.rstrip() + try: + comment = line.index('#') + except ValueError: + pass + else: + line = line[:comment] + line = line.strip() + if not line: + continue + fields = [field.strip() for field in line.split(',')] + name = fields[0] layout = fields[1] block_width, block_height = map(int, fields[2:4]) - in_types = [] - for field in fields[4:8]: + + swizzles = [_swizzle_parse_map[swizzle] for swizzle in fields[8]] + colorspace = fields[9] + + if layout == PLAIN: + names = ['']*4 + if colorspace in (RGB, SRGB): + for i in range(4): + swizzle = swizzles[i] + if swizzle < 4: + names[swizzle] += 'rgba'[i] + elif colorspace == ZS: + for i in range(4): + swizzle = swizzles[i] + if swizzle < 4: + names[swizzle] += 'zs'[i] + else: + assert False + for i in range(4): + if names[i] == '': + names[i] = 'x' + else: + names = ['x', 'y', 'z', 'w'] + + channels = [] + for i in range(0, 4): + field = fields[4 + i] if field: - kind = _kind_parse_map[field[0]] + type = _type_parse_map[field[0]] if field[1] == 'n': norm = True size = int(field[2:]) @@ -129,13 +269,13 @@ def parse(filename): norm = False size = int(field[1:]) else: - kind = VOID + type = VOID norm = False size = 0 - in_type = Type(kind, norm, size) - in_types.append(in_type) - out_swizzle = [_swizzle_parse_map[swizzle] for swizzle in fields[8]] - colorspace = fields[9] - formats.append(Format(name, layout, block_width, block_height, in_types, out_swizzle, colorspace)) + channel = Channel(type, norm, size, names[i]) + channels.append(channel) + + format = Format(name, layout, block_width, block_height, channels, swizzles, colorspace) + formats.append(format) return formats diff --git a/src/gallium/auxiliary/util/u_format_table.py b/src/gallium/auxiliary/util/u_format_table.py index 571cab55dc8..4e29d15f3bb 100755 --- a/src/gallium/auxiliary/util/u_format_table.py +++ b/src/gallium/auxiliary/util/u_format_table.py @@ -51,7 +51,7 @@ colorspace_channels_map = { } -kind_map = { +type_map = { VOID: "UTIL_FORMAT_TYPE_VOID", UNSIGNED: "UTIL_FORMAT_TYPE_UNSIGNED", SIGNED: "UTIL_FORMAT_TYPE_SIGNED", @@ -87,35 +87,44 @@ def write_format_table(formats): print '#include "u_format.h"' print print 'const struct util_format_description' - print 'util_format_description_table[] = ' - print "{" - print " {" - print " PIPE_FORMAT_NONE," - print " \"PIPE_FORMAT_NONE\"," - print " {0, 0, 0}," - print " 0," - print " {{0, 0, 0}, {0, 0, 0}, {0, 0, 0}, {0, 0, 0}}," - print " {0, 0, 0, 0}," - print " 0" - print " }," + print 'util_format_none_description = {' + print " PIPE_FORMAT_NONE," + print " \"PIPE_FORMAT_NONE\"," + print " {0, 0, 0}," + print " 0," + print " 0," + print " 0," + print " 0," + print " {{0, 0, 0}, {0, 0, 0}, {0, 0, 0}, {0, 0, 0}}," + print " {0, 0, 0, 0}," + print " 0" + print "};" + print for format in formats: + print 'const struct util_format_description' + print 'util_format_%s_description = {' % (format.short_name(),) + print " %s," % (format.name,) + print " \"%s\"," % (format.name,) + print " {%u, %u, %u},\t/* block */" % (format.block_width, format.block_height, format.block_size()) + print " %s," % (layout_map(format.layout),) + print " %u,\t/* nr_channels */" % (format.nr_channels(),) + print " %s,\t/* is_array */" % (bool_map(format.is_array()),) + print " %s,\t/* is_mixed */" % (bool_map(format.is_mixed()),) print " {" - print " %s," % (format.name,) - print " \"%s\"," % (format.name,) - print " {%u, %u, %u},\t/* block */" % (format.block_width, format.block_height, format.block_size()) - print " %s," % (layout_map(format.layout),) - print " {" for i in range(4): - type = format.in_types[i] + channel = format.channels[i] if i < 3: sep = "," else: sep = "" - print " {%s, %s, %u}%s\t/* %s */" % (kind_map[type.kind], bool_map(type.norm), type.size, sep, "xyzw"[i]) - print " }," - print " {" + if channel.size: + print " {%s, %s, %u}%s\t/* %s = %s */" % (type_map[channel.type], bool_map(channel.norm), channel.size, sep, "xyzw"[i], channel.name) + else: + print " {0, 0, 0}%s" % (sep,) + print " }," + print " {" for i in range(4): - swizzle = format.out_swizzle[i] + swizzle = format.swizzles[i] if i < 3: sep = "," else: @@ -124,11 +133,30 @@ def write_format_table(formats): comment = colorspace_channels_map[format.colorspace][i] except (KeyError, IndexError): comment = 'ignored' - print " %s%s\t/* %s */" % (swizzle_map[swizzle], sep, comment) - print " }," - print " %s," % (colorspace_map(format.colorspace),) + print " %s%s\t/* %s */" % (swizzle_map[swizzle], sep, comment) print " }," - print "};" + print " %s," % (colorspace_map(format.colorspace),) + print "};" + print + print "const struct util_format_description *" + print "util_format_description(enum pipe_format format)" + print "{" + print " if (format >= PIPE_FORMAT_COUNT) {" + print " return NULL;" + print " }" + print + print " switch (format) {" + print " case PIPE_FORMAT_NONE:" + print " return &util_format_none_description;" + for format in formats: + print " case %s:" % format.name + print " return &util_format_%s_description;" % (format.short_name(),) + print " default:" + print " assert(0);" + print " return NULL;" + print " }" + print "}" + print def main(): diff --git a/src/gallium/auxiliary/util/u_inlines.h b/src/gallium/auxiliary/util/u_inlines.h index e95d58ea863..0cb3432c6e4 100644 --- a/src/gallium/auxiliary/util/u_inlines.h +++ b/src/gallium/auxiliary/util/u_inlines.h @@ -90,7 +90,10 @@ pipe_reference(struct pipe_reference *ptr, struct pipe_reference *reference) static INLINE void pipe_buffer_reference(struct pipe_buffer **ptr, struct pipe_buffer *buf) { - struct pipe_buffer *old_buf = *ptr; + struct pipe_buffer *old_buf; + + assert(ptr); + old_buf = *ptr; if (pipe_reference(&(*ptr)->reference, &buf->reference)) old_buf->screen->buffer_destroy(old_buf); diff --git a/src/gallium/auxiliary/util/u_math.h b/src/gallium/auxiliary/util/u_math.h index b2969a210a7..d1ec13def30 100644 --- a/src/gallium/auxiliary/util/u_math.h +++ b/src/gallium/auxiliary/util/u_math.h @@ -532,6 +532,17 @@ util_bswap32(uint32_t n) /** + * Reverse byte order of a 16 bit word. + */ +static INLINE uint16_t +util_bswap16(uint16_t n) +{ + return (n >> 8) | + (n << 8); +} + + +/** * Clamp X to [MIN, MAX]. * This is a macro to allow float, int, uint, etc. types. */ diff --git a/src/gallium/docs/source/screen.rst b/src/gallium/docs/source/screen.rst index 27f65522b69..e78634e59e9 100644 --- a/src/gallium/docs/source/screen.rst +++ b/src/gallium/docs/source/screen.rst @@ -147,16 +147,30 @@ These flags determine the possible roles a texture may be used for during its lifetime. Texture usage flags are cumulative and may be combined to create a texture that can be used as multiple things. -* ``RENDER_TARGET``: A colorbuffer or pixelbuffer. +* ``RENDER_TARGET``: A color buffer or pixel buffer which will be rendered to. * ``DISPLAY_TARGET``: A sharable buffer that can be given to another process. -* ``PRIMARY``: A frontbuffer or scanout buffer. -* ``DEPTH_STENCIL``: A depthbuffer, stencilbuffer, or Z buffer. Gallium does - not explicitly provide for stencil-only buffers, so any stencilbuffer - validated here is implicitly also a depthbuffer. +* ``PRIMARY``: A front color buffer or scanout buffer. +* ``DEPTH_STENCIL``: A depth (Z) buffer or stencil buffer. Gallium does + not explicitly provide for stencil-only buffers, so any stencil buffer + validated here is implicitly also a depth buffer. * ``SAMPLER``: A texture that may be sampled from in a fragment or vertex shader. * ``DYNAMIC``: A texture that will be mapped frequently. + +PIPE_TEXTURE_GEOM +^^^^^^^^^^^^^^^^^ + +These flags are used when querying whether a particular pipe_format is +supported by the driver (with the `is_format_supported` function). +Some formats may only be supported for certain kinds of textures. +For example, a compressed format might only be used for POT textures. + +* ``PIPE_TEXTURE_GEOM_NON_SQUARE``: The texture may not be square +* ``PIPE_TEXTURE_GEOM_NON_POWER_OF_TWO``: The texture dimensions may not be + powers of two. + + Methods ------- diff --git a/src/gallium/drivers/i915/i915_state_emit.c b/src/gallium/drivers/i915/i915_state_emit.c index a3d4e3b04e5..ecbcabb4403 100644 --- a/src/gallium/drivers/i915/i915_state_emit.c +++ b/src/gallium/drivers/i915/i915_state_emit.c @@ -244,6 +244,7 @@ i915_emit_hardware_state(struct i915_context *i915 ) OUT_BATCH(_3DSTATE_BUF_INFO_CMD); + assert(tex); OUT_BATCH(BUF_3D_ID_DEPTH | BUF_3D_PITCH(tex->stride) | /* pitch in bytes */ ztile); diff --git a/src/gallium/drivers/i915/i915_texture.c b/src/gallium/drivers/i915/i915_texture.c index e101c8683ec..7ba222c78b7 100644 --- a/src/gallium/drivers/i915/i915_texture.c +++ b/src/gallium/drivers/i915/i915_texture.c @@ -223,7 +223,7 @@ i915_miptree_layout_2d(struct i915_texture *tex) if (i915_scanout_layout(tex)) return; - /* for shared buffers we use some very like scanout */ + /* for shared buffers we use something very like scanout */ if (pt->tex_usage & PIPE_TEXTURE_USAGE_DISPLAY_TARGET) if (i915_display_target_layout(tex)) return; diff --git a/src/gallium/drivers/i965/brw_draw_upload.c b/src/gallium/drivers/i965/brw_draw_upload.c index d59261557b5..9f136eec71c 100644 --- a/src/gallium/drivers/i965/brw_draw_upload.c +++ b/src/gallium/drivers/i965/brw_draw_upload.c @@ -359,9 +359,9 @@ static int brw_emit_vertex_elements(struct brw_context *brw) uint32_t comp3 = BRW_VE1_COMPONENT_STORE_SRC; switch (input->nr_components) { - case 0: comp0 = BRW_VE1_COMPONENT_STORE_0; - case 1: comp1 = BRW_VE1_COMPONENT_STORE_0; - case 2: comp2 = BRW_VE1_COMPONENT_STORE_0; + case 0: comp0 = BRW_VE1_COMPONENT_STORE_0; /* fallthrough */ + case 1: comp1 = BRW_VE1_COMPONENT_STORE_0; /* fallthrough */ + case 2: comp2 = BRW_VE1_COMPONENT_STORE_0; /* fallthrough */ case 3: comp3 = BRW_VE1_COMPONENT_STORE_1_FLT; break; } diff --git a/src/gallium/drivers/i965/brw_screen_texture.c b/src/gallium/drivers/i965/brw_screen_texture.c index 8bdd43cf140..38e99613981 100644 --- a/src/gallium/drivers/i965/brw_screen_texture.c +++ b/src/gallium/drivers/i965/brw_screen_texture.c @@ -250,7 +250,7 @@ static struct pipe_texture *brw_texture_create( struct pipe_screen *screen, tex->ss.ss0.mipmap_layout_mode = BRW_SURFACE_MIPMAPLAYOUT_BELOW; tex->ss.ss0.surface_type = translate_tex_target(tex->base.target); - format = translate_tex_target(tex->base.format); + format = translate_tex_format(tex->base.format); assert(format != BRW_SURFACEFORMAT_INVALID); tex->ss.ss0.surface_format = format; diff --git a/src/gallium/drivers/identity/id_drm.c b/src/gallium/drivers/identity/id_drm.c index b89724e4f32..f258c38cd71 100644 --- a/src/gallium/drivers/identity/id_drm.c +++ b/src/gallium/drivers/identity/id_drm.c @@ -28,11 +28,11 @@ #include "state_tracker/drm_api.h" #include "util/u_memory.h" -#include "identity/id_drm.h" -#include "identity/id_screen.h" -#include "identity/id_public.h" -#include "identity/id_screen.h" -#include "identity/id_objects.h" +#include "id_drm.h" +#include "id_screen.h" +#include "id_public.h" +#include "id_screen.h" +#include "id_objects.h" struct identity_drm_api { diff --git a/src/gallium/drivers/llvmpipe/Makefile b/src/gallium/drivers/llvmpipe/Makefile index 31732514379..41ac1cee72d 100644 --- a/src/gallium/drivers/llvmpipe/Makefile +++ b/src/gallium/drivers/llvmpipe/Makefile @@ -37,6 +37,7 @@ C_SOURCES = \ lp_surface.c \ lp_tex_sample_llvm.c \ lp_texture.c \ + lp_tile_image.c \ lp_tile_soa.c CPP_SOURCES = \ diff --git a/src/gallium/drivers/llvmpipe/README b/src/gallium/drivers/llvmpipe/README index 72d9f39658f..ae2c1ba9437 100644 --- a/src/gallium/drivers/llvmpipe/README +++ b/src/gallium/drivers/llvmpipe/README @@ -86,7 +86,7 @@ Building To build everything on Linux invoke scons as: - scons debug=yes statetrackers=mesa drivers=trace,llvmpipe winsys=xlib dri=false + scons debug=yes statetrackers=mesa drivers=llvmpipe winsys=xlib dri=false Alternatively, you can build it with GNU make, if you prefer, by invoking it as @@ -96,7 +96,7 @@ but the rest of these instructions assume that scons is used. For windows is everything the except except the winsys: - scons debug=yes statetrackers=mesa drivers=trace,llvmpipe winsys=gdi dri=false + scons debug=yes statetrackers=mesa drivers=llvmpipe winsys=gdi dri=false Using ===== diff --git a/src/gallium/drivers/llvmpipe/SConscript b/src/gallium/drivers/llvmpipe/SConscript index a39283e5e86..13c1a13e87a 100644 --- a/src/gallium/drivers/llvmpipe/SConscript +++ b/src/gallium/drivers/llvmpipe/SConscript @@ -18,6 +18,13 @@ env.CodeGenerate( command = 'python $SCRIPT $SOURCE > $TARGET' ) +# XXX: Our dependency scanner only finds depended modules in relative dirs. +env.Depends('lp_tile_soa.c', [ + '#src/gallium/auxiliary/util/u_format_parse.py', + '#src/gallium/auxiliary/util/u_format_pack.py', + '#src/gallium/auxiliary/util/u_format_access.py', +]) + llvmpipe = env.ConvenienceLibrary( target = 'llvmpipe', source = [ @@ -52,6 +59,7 @@ llvmpipe = env.ConvenienceLibrary( 'lp_surface.c', 'lp_tex_sample_llvm.c', 'lp_texture.c', + 'lp_tile_image.c', 'lp_tile_soa.c', ]) diff --git a/src/gallium/drivers/llvmpipe/lp_rast.c b/src/gallium/drivers/llvmpipe/lp_rast.c index 6dbcb3c9b31..82c006d78b0 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast.c +++ b/src/gallium/drivers/llvmpipe/lp_rast.c @@ -156,14 +156,13 @@ lp_rast_end( struct lp_rasterizer *rast ) * \param y window Y position of the tile, in pixels */ static void -lp_rast_start_tile( struct lp_rasterizer *rast, - unsigned thread_index, - unsigned x, unsigned y ) +lp_rast_start_tile(struct lp_rasterizer_task *task, + unsigned x, unsigned y) { LP_DBG(DEBUG_RAST, "%s %d,%d\n", __FUNCTION__, x, y); - rast->tasks[thread_index].x = x; - rast->tasks[thread_index].y = y; + task->x = x; + task->y = y; } @@ -171,12 +170,13 @@ lp_rast_start_tile( struct lp_rasterizer *rast, * Clear the rasterizer's current color tile. * This is a bin command called during bin processing. */ -void lp_rast_clear_color( struct lp_rasterizer *rast, - unsigned thread_index, - const union lp_rast_cmd_arg arg ) +void +lp_rast_clear_color(struct lp_rasterizer_task *task, + const union lp_rast_cmd_arg arg) { + struct lp_rasterizer *rast = task->rast; const uint8_t *clear_color = arg.clear_color; - uint8_t **color_tile = rast->tasks[thread_index].tile.color; + uint8_t **color_tile = task->tile.color; unsigned i; LP_DBG(DEBUG_RAST, "%s 0x%x,0x%x,0x%x,0x%x\n", __FUNCTION__, @@ -225,11 +225,11 @@ void lp_rast_clear_color( struct lp_rasterizer *rast, * Clear the rasterizer's current z/stencil tile. * This is a bin command called during bin processing. */ -void lp_rast_clear_zstencil( struct lp_rasterizer *rast, - unsigned thread_index, - const union lp_rast_cmd_arg arg) +void +lp_rast_clear_zstencil(struct lp_rasterizer_task *task, + const union lp_rast_cmd_arg arg) { - struct lp_rasterizer_task *task = &rast->tasks[thread_index]; + struct lp_rasterizer *rast = task->rast; const unsigned tile_x = task->x; const unsigned tile_y = task->y; const unsigned height = TILE_SIZE/TILE_VECTOR_HEIGHT; @@ -288,13 +288,12 @@ void lp_rast_clear_zstencil( struct lp_rasterizer *rast, * Load tile color from the framebuffer surface. * This is a bin command called during bin processing. */ -void lp_rast_load_color( struct lp_rasterizer *rast, - unsigned thread_index, - const union lp_rast_cmd_arg arg) +void +lp_rast_load_color(struct lp_rasterizer_task *task, + const union lp_rast_cmd_arg arg) { - struct lp_rasterizer_task *task = &rast->tasks[thread_index]; - const unsigned x = task->x; - const unsigned y = task->y; + struct lp_rasterizer *rast = task->rast; + const unsigned x = task->x, y = task->y; unsigned i; LP_DBG(DEBUG_RAST, "%s at %u, %u\n", __FUNCTION__, x, y); @@ -304,10 +303,7 @@ void lp_rast_load_color( struct lp_rasterizer *rast, int w = TILE_SIZE; int h = TILE_SIZE; - if (x >= transfer->width) - continue; - - if (y >= transfer->height) + if (x >= transfer->width || y >= transfer->height) continue; assert(w >= 0); @@ -327,16 +323,16 @@ void lp_rast_load_color( struct lp_rasterizer *rast, } -void lp_rast_set_state( struct lp_rasterizer *rast, - unsigned thread_index, - const union lp_rast_cmd_arg arg ) +void +lp_rast_set_state(struct lp_rasterizer_task *task, + const union lp_rast_cmd_arg arg) { const struct lp_rast_state *state = arg.set_state; LP_DBG(DEBUG_RAST, "%s %p\n", __FUNCTION__, (void *) state); /* just set the current state pointer for this rasterizer */ - rast->tasks[thread_index].current_state = state; + task->current_state = state; } @@ -346,16 +342,15 @@ void lp_rast_set_state( struct lp_rasterizer *rast, * completely contained inside a triangle. * This is a bin command called during bin processing. */ -void lp_rast_shade_tile( struct lp_rasterizer *rast, - unsigned thread_index, - const union lp_rast_cmd_arg arg ) +void +lp_rast_shade_tile(struct lp_rasterizer_task *task, + const union lp_rast_cmd_arg arg) { - struct lp_rasterizer_task *task = &rast->tasks[thread_index]; + struct lp_rasterizer *rast = task->rast; const struct lp_rast_state *state = task->current_state; struct lp_rast_tile *tile = &task->tile; const struct lp_rast_shader_inputs *inputs = arg.shade_tile; - const unsigned tile_x = task->x; - const unsigned tile_y = task->y; + const unsigned tile_x = task->x, tile_y = task->y; unsigned x, y; LP_DBG(DEBUG_RAST, "%s\n", __FUNCTION__); @@ -396,14 +391,13 @@ void lp_rast_shade_tile( struct lp_rasterizer *rast, * Compute shading for a 4x4 block of pixels. * This is a bin command called during bin processing. */ -void lp_rast_shade_quads( struct lp_rasterizer *rast, - unsigned thread_index, +void lp_rast_shade_quads( struct lp_rasterizer_task *task, const struct lp_rast_shader_inputs *inputs, unsigned x, unsigned y, int32_t c1, int32_t c2, int32_t c3) { - struct lp_rasterizer_task *task = &rast->tasks[thread_index]; const struct lp_rast_state *state = task->current_state; + struct lp_rasterizer *rast = task->rast; struct lp_rast_tile *tile = &task->tile; uint8_t *color[PIPE_MAX_COLOR_BUFS]; void *depth; @@ -515,12 +509,11 @@ outline_subtiles(uint8_t *tile) /** * Write the rasterizer's color tile to the framebuffer. */ -static void lp_rast_store_color( struct lp_rasterizer *rast, - unsigned thread_index) +static void +lp_rast_store_color(struct lp_rasterizer_task *task) { - struct lp_rasterizer_task *task = &rast->tasks[thread_index]; - const unsigned x = task->x; - const unsigned y = task->y; + struct lp_rasterizer *rast = task->rast; + const unsigned x = task->x, y = task->y; unsigned i; for (i = 0; i < rast->state.fb.nr_cbufs; i++) { @@ -535,7 +528,7 @@ static void lp_rast_store_color( struct lp_rasterizer *rast, continue; LP_DBG(DEBUG_RAST, "%s [%u] %d,%d %dx%d\n", __FUNCTION__, - thread_index, x, y, w, h); + task->thread_index, x, y, w, h); if (LP_DEBUG & DEBUG_SHOW_SUBTILES) outline_subtiles(task->tile.color[i]); @@ -558,13 +551,14 @@ static void lp_rast_store_color( struct lp_rasterizer *rast, * Write the rasterizer's tiles to the framebuffer. */ static void -lp_rast_end_tile( struct lp_rasterizer *rast, - unsigned thread_index ) +lp_rast_end_tile(struct lp_rasterizer_task *task) { + struct lp_rasterizer *rast = task->rast; + LP_DBG(DEBUG_RAST, "%s\n", __FUNCTION__); if (rast->state.write_color) - lp_rast_store_color(rast, thread_index); + lp_rast_store_color(task); } @@ -572,9 +566,9 @@ lp_rast_end_tile( struct lp_rasterizer *rast, * Signal on a fence. This is called during bin execution/rasterization. * Called per thread. */ -void lp_rast_fence( struct lp_rasterizer *rast, - unsigned thread_index, - const union lp_rast_cmd_arg arg ) +void +lp_rast_fence(struct lp_rasterizer_task *task, + const union lp_rast_cmd_arg arg) { struct lp_fence *fence = arg.fence; @@ -603,6 +597,9 @@ release_scene( struct lp_rasterizer *rast, util_unreference_framebuffer_state( &scene->fb ); lp_scene_reset( scene ); + + assert(lp_scene_is_empty(scene)); + lp_scene_enqueue( rast->empty_scenes, scene ); rast->curr_scene = NULL; } @@ -615,25 +612,24 @@ release_scene( struct lp_rasterizer *rast, * Called per thread. */ static void -rasterize_bin( struct lp_rasterizer *rast, - unsigned thread_index, - const struct cmd_bin *bin, - int x, int y) +rasterize_bin(struct lp_rasterizer_task *task, + const struct cmd_bin *bin, + int x, int y) { const struct cmd_block_list *commands = &bin->commands; struct cmd_block *block; unsigned k; - lp_rast_start_tile( rast, thread_index, x, y ); + lp_rast_start_tile( task, x, y ); /* simply execute each of the commands in the block list */ for (block = commands->head; block; block = block->next) { for (k = 0; k < block->count; k++) { - block->cmd[k]( rast, thread_index, block->arg[k] ); + block->cmd[k]( task, block->arg[k] ); } } - lp_rast_end_tile( rast, thread_index ); + lp_rast_end_tile( task ); } @@ -717,10 +713,9 @@ is_empty_bin( const struct cmd_bin *bin ) * Called per thread. */ static void -rasterize_scene( struct lp_rasterizer *rast, - unsigned thread_index, +rasterize_scene(struct lp_rasterizer_task *task, struct lp_scene *scene, - bool write_depth ) + bool write_depth) { /* loop over scene bins, rasterize each */ #if 0 @@ -728,9 +723,8 @@ rasterize_scene( struct lp_rasterizer *rast, unsigned i, j; for (i = 0; i < scene->tiles_x; i++) { for (j = 0; j < scene->tiles_y; j++) { - struct cmd_bin *bin = lp_get_bin(scene, i, j); - rasterize_bin( rast, thread_index, - bin, i * TILE_SIZE, j * TILE_SIZE ); + struct cmd_bin *bin = lp_scene_get_bin(scene, i, j); + rasterize_bin(task, bin, i * TILE_SIZE, j * TILE_SIZE); } } } @@ -742,7 +736,7 @@ rasterize_scene( struct lp_rasterizer *rast, assert(scene); while ((bin = lp_scene_bin_iter_next(scene, &x, &y))) { if (!is_empty_bin( bin )) - rasterize_bin( rast, thread_index, bin, x * TILE_SIZE, y * TILE_SIZE); + rasterize_bin(task, bin, x * TILE_SIZE, y * TILE_SIZE); } } #endif @@ -786,7 +780,7 @@ lp_rasterize_scene( struct lp_rasterizer *rast, fb->zsbuf != NULL && write_depth ); lp_scene_bin_iter_begin( scene ); - rasterize_scene( rast, 0, scene, write_depth ); + rasterize_scene( &rast->tasks[0], scene, write_depth ); release_scene( rast, scene ); @@ -832,6 +826,9 @@ static PIPE_THREAD_ROUTINE( thread_func, init_data ) debug_printf("thread %d waiting for work\n", task->thread_index); pipe_semaphore_wait(&task->work_ready); + if (rast->exit_flag) + break; + if (task->thread_index == 0) { /* thread[0]: * - get next scene to rasterize @@ -860,10 +857,9 @@ static PIPE_THREAD_ROUTINE( thread_func, init_data ) /* do work */ if (debug) debug_printf("thread %d doing work\n", task->thread_index); - rasterize_scene(rast, - task->thread_index, - rast->curr_scene, - rast->curr_scene->write_depth); + rasterize_scene(task, + rast->curr_scene, + rast->curr_scene->write_depth); /* wait for all threads to finish with this scene */ pipe_barrier_wait( &rast->barrier ); @@ -968,6 +964,20 @@ void lp_rast_destroy( struct lp_rasterizer *rast ) align_free(rast->tasks[i].tile.color[cbuf]); } + /* Set exit_flag and signal each thread's work_ready semaphore. + * Each thread will be woken up, notice that the exit_flag is set and + * break out of its main loop. The thread will then exit. + */ + rast->exit_flag = TRUE; + for (i = 0; i < rast->num_threads; i++) { + pipe_semaphore_signal(&rast->tasks[i].work_ready); + } + + for (i = 0; i < rast->num_threads; i++) { + pipe_semaphore_destroy(&rast->tasks[i].work_ready); + pipe_semaphore_destroy(&rast->tasks[i].work_done); + } + /* for synchronizing rasterization threads */ pipe_barrier_destroy( &rast->barrier ); diff --git a/src/gallium/drivers/llvmpipe/lp_rast.h b/src/gallium/drivers/llvmpipe/lp_rast.h index 875f18e0c0c..1ed27001914 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast.h +++ b/src/gallium/drivers/llvmpipe/lp_rast.h @@ -53,6 +53,9 @@ struct pipe_screen; #define FIXED_ONE (1<<FIXED_ORDER) +struct lp_rasterizer_task; + + /** * Rasterization state. * Objects of this type are put into the shared data bin and pointed @@ -201,32 +204,25 @@ lp_rast_arg_null( void ) * the bins are executed. */ -void lp_rast_clear_color( struct lp_rasterizer *, - unsigned thread_index, +void lp_rast_clear_color( struct lp_rasterizer_task *, const union lp_rast_cmd_arg ); -void lp_rast_clear_zstencil( struct lp_rasterizer *, - unsigned thread_index, +void lp_rast_clear_zstencil( struct lp_rasterizer_task *, const union lp_rast_cmd_arg ); -void lp_rast_load_color( struct lp_rasterizer *, - unsigned thread_index, +void lp_rast_load_color( struct lp_rasterizer_task *, const union lp_rast_cmd_arg ); -void lp_rast_set_state( struct lp_rasterizer *, - unsigned thread_index, +void lp_rast_set_state( struct lp_rasterizer_task *, const union lp_rast_cmd_arg ); -void lp_rast_triangle( struct lp_rasterizer *, - unsigned thread_index, +void lp_rast_triangle( struct lp_rasterizer_task *, const union lp_rast_cmd_arg ); -void lp_rast_shade_tile( struct lp_rasterizer *, - unsigned thread_index, +void lp_rast_shade_tile( struct lp_rasterizer_task *, const union lp_rast_cmd_arg ); -void lp_rast_fence( struct lp_rasterizer *, - unsigned thread_index, +void lp_rast_fence( struct lp_rasterizer_task *, const union lp_rast_cmd_arg ); #endif diff --git a/src/gallium/drivers/llvmpipe/lp_rast_priv.h b/src/gallium/drivers/llvmpipe/lp_rast_priv.h index 5c5497e0929..abc5a9ad899 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast_priv.h +++ b/src/gallium/drivers/llvmpipe/lp_rast_priv.h @@ -84,6 +84,7 @@ struct lp_rasterizer { boolean clipped_tile; boolean check_for_clipped_tiles; + boolean exit_flag; /* Framebuffer stuff */ @@ -121,8 +122,7 @@ struct lp_rasterizer }; -void lp_rast_shade_quads( struct lp_rasterizer *rast, - unsigned thread_index, +void lp_rast_shade_quads( struct lp_rasterizer_task *task, const struct lp_rast_shader_inputs *inputs, unsigned x, unsigned y, int32_t c1, int32_t c2, int32_t c3); @@ -159,13 +159,13 @@ lp_rast_depth_pointer( struct lp_rasterizer *rast, * \param x, y location of 4x4 block in window coords */ static INLINE void -lp_rast_shade_quads_all( struct lp_rasterizer *rast, - unsigned thread_index, +lp_rast_shade_quads_all( struct lp_rasterizer_task *task, const struct lp_rast_shader_inputs *inputs, unsigned x, unsigned y ) { - const struct lp_rast_state *state = rast->tasks[thread_index].current_state; - struct lp_rast_tile *tile = &rast->tasks[thread_index].tile; + struct lp_rasterizer *rast = task->rast; + const struct lp_rast_state *state = task->current_state; + struct lp_rast_tile *tile = &task->tile; const unsigned ix = x % TILE_SIZE, iy = y % TILE_SIZE; uint8_t *color[PIPE_MAX_COLOR_BUFS]; void *depth; diff --git a/src/gallium/drivers/llvmpipe/lp_rast_tri.c b/src/gallium/drivers/llvmpipe/lp_rast_tri.c index 0334705ef79..a5f0d14c95d 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast_tri.c +++ b/src/gallium/drivers/llvmpipe/lp_rast_tri.c @@ -89,14 +89,11 @@ static const int pos_table16[16][2] = { * Shade all pixels in a 4x4 block. */ static void -block_full_4( struct lp_rasterizer_task *rast_task, - const struct lp_rast_triangle *tri, - int x, int y ) +block_full_4(struct lp_rasterizer_task *task, + const struct lp_rast_triangle *tri, + int x, int y) { - lp_rast_shade_quads_all(rast_task->rast, - rast_task->thread_index, - &tri->inputs, - x, y); + lp_rast_shade_quads_all(task, &tri->inputs, x, y); } @@ -104,16 +101,16 @@ block_full_4( struct lp_rasterizer_task *rast_task, * Shade all pixels in a 16x16 block. */ static void -block_full_16( struct lp_rasterizer_task *rast_task, - const struct lp_rast_triangle *tri, - int x, int y ) +block_full_16(struct lp_rasterizer_task *task, + const struct lp_rast_triangle *tri, + int x, int y) { unsigned ix, iy; assert(x % 16 == 0); assert(y % 16 == 0); for (iy = 0; iy < 16; iy += 4) for (ix = 0; ix < 16; ix += 4) - block_full_4(rast_task, tri, x + ix, y + iy); + block_full_4(task, tri, x + ix, y + iy); } @@ -123,18 +120,15 @@ block_full_16( struct lp_rasterizer_task *rast_task, * will be done as part of the fragment shader. */ static void -do_block_4( struct lp_rasterizer_task *rast_task, - const struct lp_rast_triangle *tri, - int x, int y, - int c1, - int c2, - int c3 ) +do_block_4(struct lp_rasterizer_task *task, + const struct lp_rast_triangle *tri, + int x, int y, + int c1, int c2, int c3) { - lp_rast_shade_quads(rast_task->rast, - rast_task->thread_index, - &tri->inputs, - x, y, - -c1, -c2, -c3); + assert(x >= 0); + assert(y >= 0); + + lp_rast_shade_quads(task, &tri->inputs, x, y, -c1, -c2, -c3); } @@ -143,18 +137,18 @@ do_block_4( struct lp_rasterizer_task *rast_task, * of the triangle's bounds. */ static void -do_block_16( struct lp_rasterizer_task *rast_task, - const struct lp_rast_triangle *tri, - int x, int y, - int c0, - int c1, - int c2 ) +do_block_16(struct lp_rasterizer_task *task, + const struct lp_rast_triangle *tri, + int x, int y, + int c0, int c1, int c2) { unsigned mask = 0; int eo[3]; int c[3]; int i, j; + assert(x >= 0); + assert(y >= 0); assert(x % 16 == 0); assert(y % 16 == 0); @@ -193,7 +187,7 @@ do_block_16( struct lp_rasterizer_task *rast_task, * the triangle. It's a little faster to do it in the jit code. */ LP_COUNT(nr_non_empty_4); - do_block_4(rast_task, tri, px, py, cx1, cx2, cx3); + do_block_4(task, tri, px, py, cx1, cx2, cx3); } } @@ -203,15 +197,11 @@ do_block_16( struct lp_rasterizer_task *rast_task, * for this triangle. */ void -lp_rast_triangle( struct lp_rasterizer *rast, - unsigned thread_index, - const union lp_rast_cmd_arg arg ) +lp_rast_triangle(struct lp_rasterizer_task *task, + const union lp_rast_cmd_arg arg) { - struct lp_rasterizer_task *rast_task = &rast->tasks[thread_index]; const struct lp_rast_triangle *tri = arg.triangle; - - int x = rast_task->x; - int y = rast_task->y; + const int x = task->x, y = task->y; int ei[3], eo[3], c[3]; unsigned outmask, inmask, partial_mask; unsigned i, j; @@ -272,7 +262,7 @@ lp_rast_triangle( struct lp_rasterizer *rast, partial_mask &= ~(1 << i); LP_COUNT(nr_partially_covered_16); - do_block_16(rast_task, tri, px, py, cx1, cx2, cx3); + do_block_16(task, tri, px, py, cx1, cx2, cx3); } /* Iterate over fulls: @@ -285,6 +275,6 @@ lp_rast_triangle( struct lp_rasterizer *rast, inmask &= ~(1 << i); LP_COUNT(nr_fully_covered_16); - block_full_16(rast_task, tri, px, py); + block_full_16(task, tri, px, py); } } diff --git a/src/gallium/drivers/llvmpipe/lp_scene.c b/src/gallium/drivers/llvmpipe/lp_scene.c index b7116297ece..cba0e212985 100644 --- a/src/gallium/drivers/llvmpipe/lp_scene.c +++ b/src/gallium/drivers/llvmpipe/lp_scene.c @@ -100,6 +100,9 @@ lp_scene_bin_reset(struct lp_scene *scene, unsigned x, unsigned y) struct cmd_block *block; struct cmd_block *tmp; + assert(x < TILES_X); + assert(y < TILES_Y); + for (block = list->head; block != list->tail; block = tmp) { tmp = block->next; FREE(block); diff --git a/src/gallium/drivers/llvmpipe/lp_scene.h b/src/gallium/drivers/llvmpipe/lp_scene.h index fb478cc2eb5..8d725cd4375 100644 --- a/src/gallium/drivers/llvmpipe/lp_scene.h +++ b/src/gallium/drivers/llvmpipe/lp_scene.h @@ -56,8 +56,7 @@ /* switch to a non-pointer value for this: */ -typedef void (*lp_rast_cmd)( struct lp_rasterizer *, - unsigned thread_index, +typedef void (*lp_rast_cmd)( struct lp_rasterizer_task *, const union lp_rast_cmd_arg ); struct cmd_block { diff --git a/src/gallium/drivers/llvmpipe/lp_screen.c b/src/gallium/drivers/llvmpipe/lp_screen.c index 1cd3ea9a840..f84ede675b3 100644 --- a/src/gallium/drivers/llvmpipe/lp_screen.c +++ b/src/gallium/drivers/llvmpipe/lp_screen.c @@ -83,7 +83,7 @@ llvmpipe_get_param(struct pipe_screen *screen, int param) case PIPE_CAP_MAX_TEXTURE_IMAGE_UNITS: return PIPE_MAX_SAMPLERS; case PIPE_CAP_MAX_VERTEX_TEXTURE_UNITS: - return PIPE_MAX_VERTEX_SAMPLERS; + return 0; case PIPE_CAP_MAX_COMBINED_SAMPLERS: return PIPE_MAX_SAMPLERS + PIPE_MAX_VERTEX_SAMPLERS; case PIPE_CAP_NPOT_TEXTURES: @@ -194,9 +194,7 @@ llvmpipe_is_format_supported( struct pipe_screen *_screen, format_desc->block.height != 1) return FALSE; - if(format_desc->layout != UTIL_FORMAT_LAYOUT_SCALAR && - format_desc->layout != UTIL_FORMAT_LAYOUT_ARITH && - format_desc->layout != UTIL_FORMAT_LAYOUT_ARRAY) + if(format_desc->layout != UTIL_FORMAT_LAYOUT_PLAIN) return FALSE; if(format_desc->colorspace != UTIL_FORMAT_COLORSPACE_RGB && @@ -224,15 +222,16 @@ llvmpipe_is_format_supported( struct pipe_screen *_screen, format_desc->block.height != 1) return FALSE; - if(format_desc->layout != UTIL_FORMAT_LAYOUT_SCALAR && - format_desc->layout != UTIL_FORMAT_LAYOUT_ARITH && - format_desc->layout != UTIL_FORMAT_LAYOUT_ARRAY) + if(format_desc->layout != UTIL_FORMAT_LAYOUT_PLAIN) return FALSE; if(format_desc->colorspace != UTIL_FORMAT_COLORSPACE_RGB && - format_desc->colorspace != UTIL_FORMAT_COLORSPACE_SRGB && format_desc->colorspace != UTIL_FORMAT_COLORSPACE_ZS) return FALSE; + + /* not supported yet */ + if (format == PIPE_FORMAT_Z16_UNORM) + return FALSE; } return TRUE; diff --git a/src/gallium/drivers/llvmpipe/lp_setup.c b/src/gallium/drivers/llvmpipe/lp_setup.c index cb873667a20..3aec9de3732 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup.c +++ b/src/gallium/drivers/llvmpipe/lp_setup.c @@ -64,6 +64,8 @@ lp_setup_get_current_scene(struct setup_context *setup) */ setup->scene = lp_scene_dequeue(setup->empty_scenes, TRUE); + assert(lp_scene_is_empty(setup->scene)); + if(0)lp_scene_reset( setup->scene ); /* XXX temporary? */ lp_scene_set_framebuffer_size(setup->scene, diff --git a/src/gallium/drivers/llvmpipe/lp_texture.c b/src/gallium/drivers/llvmpipe/lp_texture.c index 022bf92cb46..7f456355428 100644 --- a/src/gallium/drivers/llvmpipe/lp_texture.c +++ b/src/gallium/drivers/llvmpipe/lp_texture.c @@ -124,12 +124,6 @@ llvmpipe_texture_create(struct pipe_screen *_screen, pipe_reference_init(&lpt->base.reference, 1); lpt->base.screen = &screen->base; - /* XXX: The xlib state tracker is brain-dead and will request - * PIPE_FORMAT_Z16_UNORM no matter how much we tell it we don't support it. - */ - if (lpt->base.format == PIPE_FORMAT_Z16_UNORM) - lpt->base.format = PIPE_FORMAT_Z32_UNORM; - if (lpt->base.tex_usage & (PIPE_TEXTURE_USAGE_DISPLAY_TARGET | PIPE_TEXTURE_USAGE_PRIMARY)) { if (!llvmpipe_displaytarget_layout(screen, lpt)) diff --git a/src/gallium/drivers/llvmpipe/lp_tile_image.c b/src/gallium/drivers/llvmpipe/lp_tile_image.c new file mode 100644 index 00000000000..c1980b316d5 --- /dev/null +++ b/src/gallium/drivers/llvmpipe/lp_tile_image.c @@ -0,0 +1,126 @@ +/************************************************************************** + * + * Copyright 2010 VMware, Inc. All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#include "lp_tile_soa.h" +#include "lp_tile_image.h" + + +#define BYTES_PER_TILE (TILE_SIZE * TILE_SIZE * 4) + + +/** + * Convert a tiled image into a linear image. + * \param src_stride source row stride in bytes (bytes per row of tiles) + * \param dst_stride dest row stride in bytes + */ +void +lp_tiled_to_linear(const uint8_t *src, + uint8_t *dst, + unsigned width, unsigned height, + enum pipe_format format, + unsigned src_stride, + unsigned dst_stride) +{ + const unsigned tiles_per_row = src_stride / BYTES_PER_TILE; + unsigned i, j; + + for (j = 0; j < height; j += TILE_SIZE) { + for (i = 0; i < width; i += TILE_SIZE) { + unsigned tile_offset = + ((j / TILE_SIZE) * tiles_per_row + i / TILE_SIZE); + unsigned byte_offset = tile_offset * BYTES_PER_TILE; + const uint8_t *src_tile = src + byte_offset; + + lp_tile_write_4ub(format, + src_tile, + dst, + dst_stride, + i, j, TILE_SIZE, TILE_SIZE); + } + } +} + + +/** + * Convert a linear image into a tiled image. + * \param src_stride source row stride in bytes + * \param dst_stride dest row stride in bytes (bytes per row of tiles) + */ +void +lp_linear_to_tiled(const uint8_t *src, + uint8_t *dst, + unsigned width, unsigned height, + enum pipe_format format, + unsigned src_stride, + unsigned dst_stride) +{ + const unsigned tiles_per_row = dst_stride / BYTES_PER_TILE; + unsigned i, j; + + for (j = 0; j < height; j += TILE_SIZE) { + for (i = 0; i < width; i += TILE_SIZE) { + unsigned tile_offset = + ((j / TILE_SIZE) * tiles_per_row + i / TILE_SIZE); + unsigned byte_offset = tile_offset * BYTES_PER_TILE; + uint8_t *dst_tile = dst + byte_offset; + + lp_tile_read_4ub(format, + dst_tile, + src, + src_stride, + i, j, TILE_SIZE, TILE_SIZE); + } + } +} + + +/** + * For testing only. + */ +void +test_tiled_linear_conversion(uint8_t *data, + enum pipe_format format, + unsigned width, unsigned height, + unsigned stride) +{ + /* size in tiles */ + unsigned wt = (width + TILE_SIZE - 1) / TILE_SIZE; + unsigned ht = (height + TILE_SIZE - 1) / TILE_SIZE; + + uint8_t *tiled = malloc(wt * ht * TILE_SIZE * TILE_SIZE * 4); + + unsigned tiled_stride = wt * TILE_SIZE * TILE_SIZE * 4; + + lp_linear_to_tiled(data, tiled, width, height, format, + stride, tiled_stride); + + lp_tiled_to_linear(tiled, data, width, height, format, + tiled_stride, stride); + + free(tiled); +} + diff --git a/src/gallium/auxiliary/util/u_format.c b/src/gallium/drivers/llvmpipe/lp_tile_image.h index e0724a1a8be..60d472e8c5b 100644 --- a/src/gallium/auxiliary/util/u_format.c +++ b/src/gallium/drivers/llvmpipe/lp_tile_image.h @@ -1,8 +1,7 @@ /************************************************************************** - * - * Copyright 2009 Vmware, Inc. - * All Rights Reserved. - * + * + * Copyright 2010 VMware, Inc. All Rights Reserved. + * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the * "Software"), to deal in the Software without restriction, including @@ -10,36 +9,49 @@ * distribute, sub license, and/or sell copies of the Software, and to * permit persons to whom the Software is furnished to do so, subject to * the following conditions: - * + * * The above copyright notice and this permission notice (including the * next paragraph) shall be included in all copies or substantial portions * of the Software. - * + * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * IN NO EVENT SHALL THE AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * + * **************************************************************************/ -#include "u_format.h" +#ifndef LP_TILE_IMAGE_H +#define LP_TILE_IMAGE_H -const struct util_format_description * -util_format_description(enum pipe_format format) -{ - const struct util_format_description *desc; +void +lp_tiled_to_linear(const uint8_t *src, + uint8_t *dst, + unsigned width, unsigned height, + enum pipe_format format, + unsigned src_stride, + unsigned dst_stride); - if (format >= PIPE_FORMAT_COUNT) { - return NULL; - } - desc = &util_format_description_table[format]; - assert(desc->format == format); +void +lp_linear_to_tiled(const uint8_t *src, + uint8_t *dst, + unsigned width, unsigned height, + enum pipe_format format, + unsigned src_stride, + unsigned dst_stride); - return desc; -} + +void +test_tiled_linear_conversion(uint8_t *data, + enum pipe_format format, + unsigned width, unsigned height, + unsigned stride); + + +#endif /* LP_TILE_IMAGE_H */ diff --git a/src/gallium/drivers/llvmpipe/lp_tile_soa.py b/src/gallium/drivers/llvmpipe/lp_tile_soa.py index 5d53689a3db..00b8d4fc382 100644 --- a/src/gallium/drivers/llvmpipe/lp_tile_soa.py +++ b/src/gallium/drivers/llvmpipe/lp_tile_soa.py @@ -45,10 +45,10 @@ sys.path.insert(0, os.path.join(os.path.dirname(sys.argv[0]), '../../auxiliary/u from u_format_access import * -def generate_format_read(format, dst_type, dst_native_type, dst_suffix): +def generate_format_read(format, dst_channel, dst_native_type, dst_suffix): '''Generate the function to read pixels from a particular format''' - name = short_name(format) + name = format.short_name() src_native_type = native_type(format) @@ -64,11 +64,11 @@ def generate_format_read(format, dst_type, dst_native_type, dst_suffix): names = ['']*4 if format.colorspace == 'rgb': for i in range(4): - swizzle = format.out_swizzle[i] + swizzle = format.swizzles[i] if swizzle < 4: names[swizzle] += 'rgba'[i] elif format.colorspace == 'zs': - swizzle = format.out_swizzle[0] + swizzle = format.swizzles[0] if swizzle < 4: names[swizzle] = 'z' else: @@ -76,48 +76,49 @@ def generate_format_read(format, dst_type, dst_native_type, dst_suffix): else: assert False - if format.layout == ARITH: - print ' %s pixel = *src_pixel++;' % src_native_type - shift = 0; - for i in range(4): - src_type = format.in_types[i] - width = src_type.size - if names[i]: - value = 'pixel' - mask = (1 << width) - 1 - if shift: - value = '(%s >> %u)' % (value, shift) - if shift + width < format.block_size(): - value = '(%s & 0x%x)' % (value, mask) - value = conversion_expr(src_type, dst_type, dst_native_type, value) - print ' %s %s = %s;' % (dst_native_type, names[i], value) - shift += width - elif format.layout == ARRAY: - for i in range(4): - src_type = format.in_types[i] - if names[i]: - value = '(*src_pixel++)' - value = conversion_expr(src_type, dst_type, dst_native_type, value) - print ' %s %s = %s;' % (dst_native_type, names[i], value) + if format.layout == PLAIN: + if not format.is_array(): + print ' %s pixel = *src_pixel++;' % src_native_type + shift = 0; + for i in range(4): + src_channel = format.channels[i] + width = src_channel.size + if names[i]: + value = 'pixel' + mask = (1 << width) - 1 + if shift: + value = '(%s >> %u)' % (value, shift) + if shift + width < format.block_size(): + value = '(%s & 0x%x)' % (value, mask) + value = conversion_expr(src_channel, dst_channel, dst_native_type, value, clamp=False) + print ' %s %s = %s;' % (dst_native_type, names[i], value) + shift += width + else: + for i in range(4): + src_channel = format.channels[i] + if names[i]: + value = '(*src_pixel++)' + value = conversion_expr(src_channel, dst_channel, dst_native_type, value, clamp=False) + print ' %s %s = %s;' % (dst_native_type, names[i], value) else: assert False for i in range(4): if format.colorspace == 'rgb': - swizzle = format.out_swizzle[i] + swizzle = format.swizzles[i] if swizzle < 4: value = names[swizzle] elif swizzle == SWIZZLE_0: value = '0' elif swizzle == SWIZZLE_1: - value = '1' + value = get_one(dst_channel) else: assert False elif format.colorspace == 'zs': if i < 3: value = 'z' else: - value = '1' + value = get_one(dst_channel) else: assert False print ' TILE_PIXEL(dst, x, y, %u) = %s; /* %s */' % (i, value, 'rgba'[i]) @@ -129,31 +130,16 @@ def generate_format_read(format, dst_type, dst_native_type, dst_suffix): print -def compute_inverse_swizzle(format): - '''Return an array[4] of inverse swizzle terms''' - inv_swizzle = [None]*4 - if format.colorspace == 'rgb': - for i in range(4): - swizzle = format.out_swizzle[i] - if swizzle < 4: - inv_swizzle[swizzle] = i - elif format.colorspace == 'zs': - swizzle = format.out_swizzle[0] - if swizzle < 4: - inv_swizzle[swizzle] = 0 - return inv_swizzle - - -def pack_rgba(format, src_type, r, g, b, a): +def pack_rgba(format, src_channel, r, g, b, a): """Return an expression for packing r, g, b, a into a pixel of the given format. Ex: '(b << 24) | (g << 16) | (r << 8) | (a << 0)' """ assert format.colorspace == 'rgb' - inv_swizzle = compute_inverse_swizzle(format) + inv_swizzle = format.inv_swizzles() shift = 0 expr = None for i in range(4): - # choose r, g, b, or a depending on the inverse swizzle term + # choose r, g, b, or a depending on the inverse swizzle term if inv_swizzle[i] == 0: value = r elif inv_swizzle[i] == 1: @@ -166,25 +152,25 @@ def pack_rgba(format, src_type, r, g, b, a): value = None if value: - dst_type = format.in_types[i] + dst_channel = format.channels[i] dst_native_type = native_type(format) - value = conversion_expr(src_type, dst_type, dst_native_type, value) + value = conversion_expr(src_channel, dst_channel, dst_native_type, value, clamp=False) term = "((%s) << %d)" % (value, shift) if expr: expr = expr + " | " + term else: expr = term - width = format.in_types[i].size + width = format.channels[i].size shift = shift + width return expr -def emit_unrolled_write_code(format, src_type): +def emit_unrolled_write_code(format, src_channel): '''Emit code for writing a block based on unrolled loops. This is considerably faster than the TILE_PIXEL-based code below. ''' - dst_native_type = native_type(format) + dst_native_type = 'uint%u_t' % format.block_size() print ' const unsigned dstpix_stride = dst_stride / %d;' % format.stride() print ' %s *dstpix = (%s *) dst;' % (dst_native_type, dst_native_type) print ' unsigned int qx, qy, i;' @@ -199,8 +185,8 @@ def emit_unrolled_write_code(format, src_type): print ' const uint8_t *a = src + 3 * TILE_C_STRIDE;' print ' (void) r; (void) g; (void) b; (void) a; /* silence warnings */' print ' for (i = 0; i < TILE_C_STRIDE; i += 2) {' - print ' const uint32_t pixel0 = %s;' % pack_rgba(format, src_type, "r[i+0]", "g[i+0]", "b[i+0]", "a[i+0]") - print ' const uint32_t pixel1 = %s;' % pack_rgba(format, src_type, "r[i+1]", "g[i+1]", "b[i+1]", "a[i+1]") + print ' const uint32_t pixel0 = %s;' % pack_rgba(format, src_channel, "r[i+0]", "g[i+0]", "b[i+0]", "a[i+0]") + print ' const uint32_t pixel1 = %s;' % pack_rgba(format, src_channel, "r[i+1]", "g[i+1]", "b[i+1]", "a[i+1]") print ' const unsigned offset = (py + tile_y_offset[i]) * dstpix_stride + (px + tile_x_offset[i]);' print ' dstpix[offset + 0] = pixel0;' print ' dstpix[offset + 1] = pixel1;' @@ -210,11 +196,11 @@ def emit_unrolled_write_code(format, src_type): print ' }' -def emit_tile_pixel_write_code(format, src_type): +def emit_tile_pixel_write_code(format, src_channel): '''Emit code for writing a block based on the TILE_PIXEL macro.''' dst_native_type = native_type(format) - inv_swizzle = compute_inverse_swizzle(format) + inv_swizzle = format.inv_swizzles() print ' unsigned x, y;' print ' uint8_t *dst_row = dst + y0*dst_stride;' @@ -222,27 +208,28 @@ def emit_tile_pixel_write_code(format, src_type): print ' %s *dst_pixel = (%s *)(dst_row + x0*%u);' % (dst_native_type, dst_native_type, format.stride()) print ' for (x = 0; x < w; ++x) {' - if format.layout == ARITH: - print ' %s pixel = 0;' % dst_native_type - shift = 0; - for i in range(4): - dst_type = format.in_types[i] - width = dst_type.size - if inv_swizzle[i] is not None: - value = 'TILE_PIXEL(src, x, y, %u)' % inv_swizzle[i] - value = conversion_expr(src_type, dst_type, dst_native_type, value) - if shift: - value = '(%s << %u)' % (value, shift) - print ' pixel |= %s;' % value - shift += width - print ' *dst_pixel++ = pixel;' - elif format.layout == ARRAY: - for i in range(4): - dst_type = format.in_types[i] - if inv_swizzle[i] is not None: - value = 'TILE_PIXEL(src, x, y, %u)' % inv_swizzle[i] - value = conversion_expr(src_type, dst_type, dst_native_type, value) - print ' *dst_pixel++ = %s;' % value + if format.layout == PLAIN: + if not format.is_array(): + print ' %s pixel = 0;' % dst_native_type + shift = 0; + for i in range(4): + dst_channel = format.channels[i] + width = dst_channel.size + if inv_swizzle[i] is not None: + value = 'TILE_PIXEL(src, x, y, %u)' % inv_swizzle[i] + value = conversion_expr(src_channel, dst_channel, dst_native_type, value, clamp=False) + if shift: + value = '(%s << %u)' % (value, shift) + print ' pixel |= %s;' % value + shift += width + print ' *dst_pixel++ = pixel;' + else: + for i in range(4): + dst_channel = format.channels[i] + if inv_swizzle[i] is not None: + value = 'TILE_PIXEL(src, x, y, %u)' % inv_swizzle[i] + value = conversion_expr(src_channel, dst_channel, dst_native_type, value, clamp=False) + print ' *dst_pixel++ = %s;' % value else: assert False @@ -251,28 +238,33 @@ def emit_tile_pixel_write_code(format, src_type): print ' }' -def generate_format_write(format, src_type, src_native_type, src_suffix): +def generate_format_write(format, src_channel, src_native_type, src_suffix): '''Generate the function to write pixels to a particular format''' - name = short_name(format) + name = format.short_name() print 'static void' print 'lp_tile_%s_write_%s(const %s *src, uint8_t *dst, unsigned dst_stride, unsigned x0, unsigned y0, unsigned w, unsigned h)' % (name, src_suffix, src_native_type) print '{' - if format.layout == ARITH and format.colorspace == 'rgb': - emit_unrolled_write_code(format, src_type) + if format.layout == PLAIN \ + and format.colorspace == 'rgb' \ + and format.block_size() <= 32 \ + and format.is_pot() \ + and not format.is_mixed() \ + and format.channels[0].type == UNSIGNED: + emit_unrolled_write_code(format, src_channel) else: - emit_tile_pixel_write_code(format, src_type) + emit_tile_pixel_write_code(format, src_channel) print '}' print -def generate_read(formats, dst_type, dst_native_type, dst_suffix): +def generate_read(formats, dst_channel, dst_native_type, dst_suffix): '''Generate the dispatch function to read pixels from any format''' for format in formats: if is_format_supported(format): - generate_format_read(format, dst_type, dst_native_type, dst_suffix) + generate_format_read(format, dst_channel, dst_native_type, dst_suffix) print 'void' print 'lp_tile_read_%s(enum pipe_format format, %s *dst, const void *src, unsigned src_stride, unsigned x, unsigned y, unsigned w, unsigned h)' % (dst_suffix, dst_native_type) @@ -282,7 +274,7 @@ def generate_read(formats, dst_type, dst_native_type, dst_suffix): for format in formats: if is_format_supported(format): print ' case %s:' % format.name - print ' func = &lp_tile_%s_read_%s;' % (short_name(format), dst_suffix) + print ' func = &lp_tile_%s_read_%s;' % (format.short_name(), dst_suffix) print ' break;' print ' default:' print ' debug_printf("unsupported format\\n");' @@ -293,12 +285,12 @@ def generate_read(formats, dst_type, dst_native_type, dst_suffix): print -def generate_write(formats, src_type, src_native_type, src_suffix): +def generate_write(formats, src_channel, src_native_type, src_suffix): '''Generate the dispatch function to write pixels to any format''' for format in formats: if is_format_supported(format): - generate_format_write(format, src_type, src_native_type, src_suffix) + generate_format_write(format, src_channel, src_native_type, src_suffix) print 'void' print 'lp_tile_write_%s(enum pipe_format format, const %s *src, void *dst, unsigned dst_stride, unsigned x, unsigned y, unsigned w, unsigned h)' % (src_suffix, src_native_type) @@ -309,7 +301,7 @@ def generate_write(formats, src_type, src_native_type, src_suffix): for format in formats: if is_format_supported(format): print ' case %s:' % format.name - print ' func = &lp_tile_%s_write_%s;' % (short_name(format), src_suffix) + print ' func = &lp_tile_%s_write_%s;' % (format.short_name(), src_suffix) print ' break;' print ' default:' print ' debug_printf("unsupported format\\n");' @@ -359,12 +351,12 @@ def main(): generate_clamp() - type = Type(UNSIGNED, True, 8) + channel = Channel(UNSIGNED, True, 8) native_type = 'uint8_t' suffix = '4ub' - generate_read(formats, type, native_type, suffix) - generate_write(formats, type, native_type, suffix) + generate_read(formats, channel, native_type, suffix) + generate_write(formats, channel, native_type, suffix) if __name__ == '__main__': diff --git a/src/gallium/drivers/nouveau/nouveau_context.c b/src/gallium/drivers/nouveau/nouveau_context.c index 23443869e68..15174983e7f 100644 --- a/src/gallium/drivers/nouveau/nouveau_context.c +++ b/src/gallium/drivers/nouveau/nouveau_context.c @@ -1,5 +1,5 @@ -#include <pipe/p_defines.h> -#include <pipe/p_context.h> +#include "pipe/p_defines.h" +#include "pipe/p_context.h" #include "nouveau/nouveau_screen.h" #include "nouveau/nouveau_context.h" diff --git a/src/gallium/drivers/nouveau/nouveau_screen.c b/src/gallium/drivers/nouveau/nouveau_screen.c index 81bc296ab44..3c2f771b51e 100644 --- a/src/gallium/drivers/nouveau/nouveau_screen.c +++ b/src/gallium/drivers/nouveau/nouveau_screen.c @@ -1,9 +1,9 @@ -#include <pipe/p_defines.h> -#include <pipe/p_screen.h> -#include <pipe/p_state.h> +#include "pipe/p_defines.h" +#include "pipe/p_screen.h" +#include "pipe/p_state.h" -#include <util/u_memory.h> -#include <util/u_inlines.h> +#include "util/u_memory.h" +#include "util/u_inlines.h" #include <stdio.h> #include <errno.h> diff --git a/src/gallium/drivers/nv30/nv30_transfer.c b/src/gallium/drivers/nv30/nv30_transfer.c index 554bcbbdd0e..3aeda51ea19 100644 --- a/src/gallium/drivers/nv30/nv30_transfer.c +++ b/src/gallium/drivers/nv30/nv30_transfer.c @@ -1,10 +1,10 @@ -#include <pipe/p_state.h> -#include <pipe/p_defines.h> -#include <util/u_inlines.h> -#include <util/u_format.h> -#include <util/u_memory.h> -#include <util/u_math.h> -#include <nouveau/nouveau_winsys.h> +#include "pipe/p_state.h" +#include "pipe/p_defines.h" +#include "util/u_inlines.h" +#include "util/u_format.h" +#include "util/u_memory.h" +#include "util/u_math.h" +#include "nouveau/nouveau_winsys.h" #include "nv30_context.h" #include "nv30_screen.h" #include "nv30_state.h" diff --git a/src/gallium/drivers/nv40/nv40_transfer.c b/src/gallium/drivers/nv40/nv40_transfer.c index ee266c6cfb1..0462a042c38 100644 --- a/src/gallium/drivers/nv40/nv40_transfer.c +++ b/src/gallium/drivers/nv40/nv40_transfer.c @@ -1,10 +1,10 @@ -#include <pipe/p_state.h> -#include <pipe/p_defines.h> -#include <util/u_inlines.h> -#include <util/u_format.h> -#include <util/u_memory.h> -#include <util/u_math.h> -#include <nouveau/nouveau_winsys.h> +#include "pipe/p_state.h" +#include "pipe/p_defines.h" +#include "util/u_inlines.h" +#include "util/u_format.h" +#include "util/u_memory.h" +#include "util/u_math.h" +#include "nouveau/nouveau_winsys.h" #include "nv40_context.h" #include "nv40_screen.h" #include "nv40_state.h" diff --git a/src/gallium/drivers/r300/Makefile b/src/gallium/drivers/r300/Makefile index afddcb161fa..1f69daec819 100644 --- a/src/gallium/drivers/r300/Makefile +++ b/src/gallium/drivers/r300/Makefile @@ -32,7 +32,5 @@ EXTRA_OBJECTS = \ include ../../Makefile.template -.PHONY : $(COMPILER_ARCHIVE) - $(COMPILER_ARCHIVE): $(MAKE) -C $(TOP)/src/mesa/drivers/dri/r300/compiler diff --git a/src/gallium/drivers/r300/r300_blit.c b/src/gallium/drivers/r300/r300_blit.c index ec7414dc369..8acb1098b96 100644 --- a/src/gallium/drivers/r300/r300_blit.c +++ b/src/gallium/drivers/r300/r300_blit.c @@ -33,7 +33,7 @@ static void r300_blitter_save_states(struct r300_context* r300) util_blitter_save_stencil_ref(r300->blitter, &(r300->stencil_ref)); util_blitter_save_rasterizer(r300->blitter, r300->rs_state.state); util_blitter_save_fragment_shader(r300->blitter, r300->fs); - util_blitter_save_vertex_shader(r300->blitter, r300->vs); + util_blitter_save_vertex_shader(r300->blitter, r300->vs_state.state); util_blitter_save_viewport(r300->blitter, &r300->viewport); util_blitter_save_clip(r300->blitter, &r300->clip); } @@ -100,6 +100,8 @@ static void r300_hw_copy(struct pipe_context* pipe, unsigned width, unsigned height) { struct r300_context* r300 = r300_context(pipe); + struct r300_textures_state* state = + (struct r300_textures_state*)r300->textures_state.state; /* Yeah we have to save all those states to ensure this blitter operation * is really transparent. The states will be restored by the blitter once @@ -108,11 +110,11 @@ static void r300_hw_copy(struct pipe_context* pipe, util_blitter_save_framebuffer(r300->blitter, r300->fb_state.state); util_blitter_save_fragment_sampler_states( - r300->blitter, r300->sampler_count, (void**)r300->sampler_states); + r300->blitter, state->sampler_count, (void**)state->sampler_states); util_blitter_save_fragment_sampler_textures( - r300->blitter, r300->texture_count, - (struct pipe_texture**)r300->textures); + r300->blitter, state->texture_count, + (struct pipe_texture**)state->textures); /* Do a copy */ util_blitter_copy(r300->blitter, diff --git a/src/gallium/drivers/r300/r300_context.c b/src/gallium/drivers/r300/r300_context.c index f631b4ed27f..86b98a4ba52 100644 --- a/src/gallium/drivers/r300/r300_context.c +++ b/src/gallium/drivers/r300/r300_context.c @@ -59,7 +59,9 @@ static void r300_destroy_context(struct pipe_context* context) FREE(r300->fb_state.state); FREE(r300->rs_block_state.state); FREE(r300->scissor_state.state); - FREE(r300->vertex_format_state.state); + FREE(r300->textures_state.state); + FREE(r300->vertex_stream_state.state); + FREE(r300->vap_output_state.state); FREE(r300->viewport_state.state); FREE(r300->ztop_state.state); FREE(r300); @@ -96,39 +98,55 @@ static void r300_flush_cb(void *data) } #define R300_INIT_ATOM(atomname, atomsize) \ - r300->atomname##_state.name = #atomname; \ - r300->atomname##_state.state = NULL; \ - r300->atomname##_state.size = atomsize; \ - r300->atomname##_state.emit = r300_emit_##atomname##_state; \ - r300->atomname##_state.dirty = FALSE; \ - insert_at_tail(&r300->atom_list, &r300->atomname##_state); + r300->atomname.name = #atomname; \ + r300->atomname.state = NULL; \ + r300->atomname.size = atomsize; \ + r300->atomname.emit = r300_emit_##atomname; \ + r300->atomname.dirty = FALSE; \ + insert_at_tail(&r300->atom_list, &r300->atomname); static void r300_setup_atoms(struct r300_context* r300) { + boolean is_r500 = r300_screen(r300->context.screen)->caps->is_r500; + boolean has_tcl = r300_screen(r300->context.screen)->caps->has_tcl; + /* Create the actual atom list. * * Each atom is examined and emitted in the order it appears here, which * can affect performance and conformance if not handled with care. * - * Some atoms never change size, others change every emit. This is just - * an upper bound on each atom, to keep the emission machinery from - * underallocating space. */ + * Some atoms never change size, others change every emit - those have + * the size of 0 here. */ make_empty_list(&r300->atom_list); - R300_INIT_ATOM(invariant, 71); - R300_INIT_ATOM(ztop, 2); - R300_INIT_ATOM(blend, 8); - R300_INIT_ATOM(blend_color, 3); - R300_INIT_ATOM(clip, 29); - R300_INIT_ATOM(dsa, 8); - R300_INIT_ATOM(fb, 56); - R300_INIT_ATOM(rs, 25); - R300_INIT_ATOM(scissor, 3); - R300_INIT_ATOM(viewport, 9); - R300_INIT_ATOM(rs_block, 21); - R300_INIT_ATOM(vertex_format, 26); + R300_INIT_ATOM(invariant_state, 71); + R300_INIT_ATOM(ztop_state, 2); + R300_INIT_ATOM(blend_state, 8); + R300_INIT_ATOM(blend_color_state, is_r500 ? 3 : 2); + R300_INIT_ATOM(clip_state, has_tcl ? 5 + (6 * 4) : 2); + R300_INIT_ATOM(dsa_state, is_r500 ? 8 : 6); + R300_INIT_ATOM(fb_state, 0); + R300_INIT_ATOM(rs_state, 0); + R300_INIT_ATOM(scissor_state, 3); + R300_INIT_ATOM(viewport_state, 9); + R300_INIT_ATOM(rs_block_state, 0); + R300_INIT_ATOM(vertex_stream_state, 0); + R300_INIT_ATOM(vap_output_state, 6); + R300_INIT_ATOM(pvs_flush, 2); + R300_INIT_ATOM(vs_state, 0); + R300_INIT_ATOM(texture_cache_inval, 2); + R300_INIT_ATOM(textures_state, 0); /* Some non-CSO atoms need explicit space to store the state locally. */ + r300->blend_color_state.state = CALLOC_STRUCT(r300_blend_color_state); + r300->clip_state.state = CALLOC_STRUCT(pipe_clip_state); r300->fb_state.state = CALLOC_STRUCT(pipe_framebuffer_state); + r300->rs_block_state.state = CALLOC_STRUCT(r300_rs_block); + r300->scissor_state.state = CALLOC_STRUCT(pipe_scissor_state); + r300->textures_state.state = CALLOC_STRUCT(r300_textures_state); + r300->vertex_stream_state.state = CALLOC_STRUCT(r300_vertex_stream_state); + r300->vap_output_state.state = CALLOC_STRUCT(r300_vap_output_state); + r300->viewport_state.state = CALLOC_STRUCT(r300_viewport_state); + r300->ztop_state.state = CALLOC_STRUCT(r300_ztop_state); } struct pipe_context* r300_create_context(struct pipe_screen* screen, @@ -178,14 +196,6 @@ struct pipe_context* r300_create_context(struct pipe_screen* screen, r300_setup_atoms(r300); - r300->blend_color_state.state = CALLOC_STRUCT(r300_blend_color_state); - r300->clip_state.state = CALLOC_STRUCT(pipe_clip_state); - r300->rs_block_state.state = CALLOC_STRUCT(r300_rs_block); - r300->scissor_state.state = CALLOC_STRUCT(pipe_scissor_state); - r300->vertex_format_state.state = CALLOC_STRUCT(r300_vertex_info); - r300->viewport_state.state = CALLOC_STRUCT(r300_viewport_state); - r300->ztop_state.state = CALLOC_STRUCT(r300_ztop_state); - /* Open up the OQ BO. */ r300->oqbo = screen->buffer_create(screen, 4096, PIPE_BUFFER_USAGE_VERTEX, 4096); diff --git a/src/gallium/drivers/r300/r300_context.h b/src/gallium/drivers/r300/r300_context.h index 97100c08ccb..0d1518a05bc 100644 --- a/src/gallium/drivers/r300/r300_context.h +++ b/src/gallium/drivers/r300/r300_context.h @@ -45,7 +45,7 @@ struct r300_atom { /* Opaque state. */ void* state; /* Emit the state to the context. */ - void (*emit)(struct r300_context*, void*); + void (*emit)(struct r300_context*, unsigned, void*); /* Upper bound on number of dwords to emit. */ unsigned size; /* Whether this atom should be emitted. */ @@ -86,7 +86,6 @@ struct r300_rs_state { uint32_t vap_control_status; /* R300_VAP_CNTL_STATUS: 0x2140 */ uint32_t antialiasing_config; /* R300_GB_AA_CONFIG: 0x4020 */ uint32_t point_size; /* R300_GA_POINT_SIZE: 0x421c */ - uint32_t point_minmax; /* R300_GA_POINT_MINMAX: 0x4230 */ uint32_t line_control; /* R300_GA_LINE_CNTL: 0x4234 */ float depth_scale; /* R300_SU_POLY_OFFSET_FRONT_SCALE: 0x42a4 */ /* R300_SU_POLY_OFFSET_BACK_SCALE: 0x42ac */ @@ -119,7 +118,7 @@ struct r300_sampler_state { unsigned min_lod, max_lod; }; -struct r300_texture_state { +struct r300_texture_format_state { uint32_t format0; /* R300_TX_FORMAT0: 0x4480 */ uint32_t format1; /* R300_TX_FORMAT1: 0x44c0 */ uint32_t format2; /* R300_TX_FORMAT2: 0x4500 */ @@ -135,6 +134,40 @@ struct r300_texture_fb_state { uint32_t zb_format; /* R300_ZB_FORMAT */ }; +struct r300_textures_state { + /* Textures. */ + struct r300_texture *textures[8]; + int texture_count; + /* Sampler states. */ + struct r300_sampler_state *sampler_states[8]; + int sampler_count; + + /* These is the merge of the texture and sampler states. */ + unsigned count; + uint32_t tx_enable; /* R300_TX_ENABLE: 0x4101 */ + struct r300_texture_sampler_state { + uint32_t format[3]; /* R300_TX_FORMAT[0-2] */ + uint32_t filter[2]; /* R300_TX_FILTER[0-1] */ + uint32_t border_color; /* R300_TX_BORDER_COLOR: 0x45c0 */ + uint32_t tile_config; /* R300_TX_OFFSET (subset thereof) */ + } regs[8]; +}; + +struct r300_vertex_stream_state { + /* R300_VAP_PROG_STREAK_CNTL_[0-7] */ + uint32_t vap_prog_stream_cntl[8]; + /* R300_VAP_PROG_STREAK_CNTL_EXT_[0-7] */ + uint32_t vap_prog_stream_cntl_ext[8]; + + unsigned count; +}; + +struct r300_vap_output_state { + uint32_t vap_vtx_state_cntl; /* R300_VAP_VTX_STATE_CNTL: 0x2180 */ + uint32_t vap_vsm_vtx_assm; /* R300_VAP_VSM_VTX_ASSM: 0x2184 */ + uint32_t vap_out_vtx_fmt[2]; /* R300_VAP_OUTPUT_VTX_FMT_[0-1]: 0x2090 */ +}; + struct r300_viewport_state { float xscale; /* R300_VAP_VPORT_XSCALE: 0x2098 */ float xoffset; /* R300_VAP_VPORT_XOFFSET: 0x209c */ @@ -151,11 +184,6 @@ struct r300_ztop_state { #define R300_NEW_FRAGMENT_SHADER 0x00000020 #define R300_NEW_FRAGMENT_SHADER_CONSTANTS 0x00000040 -#define R300_NEW_SAMPLER 0x00000200 -#define R300_ANY_NEW_SAMPLERS 0x0001fe00 -#define R300_NEW_TEXTURE 0x00040000 -#define R300_ANY_NEW_TEXTURES 0x03fc0000 -#define R300_NEW_VERTEX_SHADER 0x08000000 #define R300_NEW_VERTEX_SHADER_CONSTANTS 0x10000000 #define R300_NEW_QUERY 0x40000000 #define R300_NEW_KITCHEN_SINK 0x7fffffff @@ -241,23 +269,13 @@ struct r300_texture { struct pipe_buffer* buffer; /* Registers carrying texture format data. */ - struct r300_texture_state state; + struct r300_texture_format_state state; struct r300_texture_fb_state fb_state; /* Buffer tiling */ enum r300_buffer_tiling microtile, macrotile; }; -struct r300_vertex_info { - /* Parent class */ - struct vertex_info vinfo; - - /* R300_VAP_PROG_STREAK_CNTL_[0-7] */ - uint32_t vap_prog_stream_cntl[8]; - /* R300_VAP_PROG_STREAK_CNTL_EXT_[0-7] */ - uint32_t vap_prog_stream_cntl_ext[8]; -}; - extern struct pipe_viewport_state r300_viewport_identity; struct r300_context { @@ -282,9 +300,6 @@ struct r300_context { struct r300_query *query_current; struct r300_query query_list; - /* Vertex formatting information. */ - struct r300_atom vertex_format_state; - /* Various CSO state objects. */ /* Beginning of atom list. */ struct r300_atom atom_list; @@ -306,20 +321,24 @@ struct r300_context { struct r300_atom rs_state; /* RS block state. */ struct r300_atom rs_block_state; - /* Sampler states. */ - struct r300_sampler_state* sampler_states[8]; - int sampler_count; /* Scissor state. */ struct r300_atom scissor_state; - /* Texture states. */ - struct r300_texture* textures[8]; - int texture_count; + /* Textures state. */ + struct r300_atom textures_state; + /* Vertex stream formatting state. */ + struct r300_atom vertex_stream_state; + /* VAP (vertex shader) output mapping state. */ + struct r300_atom vap_output_state; /* Vertex shader. */ - struct r300_vertex_shader* vs; + struct r300_atom vs_state; /* Viewport state. */ struct r300_atom viewport_state; /* ZTOP state. */ struct r300_atom ztop_state; + /* PVS flush. */ + struct r300_atom pvs_flush; + /* Texture cache invalidate. */ + struct r300_atom texture_cache_inval; /* Invariant state. This must be emitted to get the engine started. */ struct r300_atom invariant_state; @@ -327,10 +346,14 @@ struct r300_context { /* Vertex buffers for Gallium. */ struct pipe_vertex_buffer vertex_buffer[PIPE_MAX_ATTRIBS]; int vertex_buffer_count; + int vertex_buffer_max_index; /* Vertex elements for Gallium. */ struct pipe_vertex_element vertex_element[PIPE_MAX_ATTRIBS]; int vertex_element_count; + /* Vertex info for Draw. */ + struct vertex_info vertex_info; + struct pipe_stencil_ref stencil_ref; struct pipe_clip_state clip; diff --git a/src/gallium/drivers/r300/r300_emit.c b/src/gallium/drivers/r300/r300_emit.c index 37ebe6c49df..addb28bded3 100644 --- a/src/gallium/drivers/r300/r300_emit.c +++ b/src/gallium/drivers/r300/r300_emit.c @@ -34,14 +34,15 @@ #include "r300_screen.h" #include "r300_vs.h" -void r300_emit_blend_state(struct r300_context* r300, void* state) +void r300_emit_blend_state(struct r300_context* r300, + unsigned size, void* state) { struct r300_blend_state* blend = (struct r300_blend_state*)state; struct pipe_framebuffer_state* fb = (struct pipe_framebuffer_state*)r300->fb_state.state; CS_LOCALS(r300); - BEGIN_CS(8); + BEGIN_CS(size); OUT_CS_REG(R300_RB3D_ROPCNTL, blend->rop); OUT_CS_REG_SEQ(R300_RB3D_CBLEND, 3); if (fb->nr_cbufs) { @@ -58,26 +59,28 @@ void r300_emit_blend_state(struct r300_context* r300, void* state) END_CS; } -void r300_emit_blend_color_state(struct r300_context* r300, void* state) +void r300_emit_blend_color_state(struct r300_context* r300, + unsigned size, void* state) { struct r300_blend_color_state* bc = (struct r300_blend_color_state*)state; struct r300_screen* r300screen = r300_screen(r300->context.screen); CS_LOCALS(r300); if (r300screen->caps->is_r500) { - BEGIN_CS(3); + BEGIN_CS(size); OUT_CS_REG_SEQ(R500_RB3D_CONSTANT_COLOR_AR, 2); OUT_CS(bc->blend_color_red_alpha); OUT_CS(bc->blend_color_green_blue); END_CS; } else { - BEGIN_CS(2); + BEGIN_CS(size); OUT_CS_REG(R300_RB3D_BLEND_COLOR, bc->blend_color); END_CS; } } -void r300_emit_clip_state(struct r300_context* r300, void* state) +void r300_emit_clip_state(struct r300_context* r300, + unsigned size, void* state) { struct pipe_clip_state* clip = (struct pipe_clip_state*)state; int i; @@ -85,7 +88,7 @@ void r300_emit_clip_state(struct r300_context* r300, void* state) CS_LOCALS(r300); if (r300screen->caps->has_tcl) { - BEGIN_CS(5 + (6 * 4)); + BEGIN_CS(size); OUT_CS_REG(R300_VAP_PVS_VECTOR_INDX_REG, (r300screen->caps->is_r500 ? R500_PVS_UCP_START : R300_PVS_UCP_START)); @@ -100,14 +103,14 @@ void r300_emit_clip_state(struct r300_context* r300, void* state) R300_PS_UCP_MODE_CLIP_AS_TRIFAN); END_CS; } else { - BEGIN_CS(2); + BEGIN_CS(size); OUT_CS_REG(R300_VAP_CLIP_CNTL, R300_CLIP_DISABLE); END_CS; } } -void r300_emit_dsa_state(struct r300_context* r300, void* state) +void r300_emit_dsa_state(struct r300_context* r300, unsigned size, void* state) { struct r300_dsa_state* dsa = (struct r300_dsa_state*)state; struct r300_screen* r300screen = r300_screen(r300->context.screen); @@ -116,7 +119,7 @@ void r300_emit_dsa_state(struct r300_context* r300, void* state) struct pipe_stencil_ref stencil_ref = r300->stencil_ref; CS_LOCALS(r300); - BEGIN_CS(r300screen->caps->is_r500 ? 8 : 6); + BEGIN_CS(size); OUT_CS_REG(R300_FG_ALPHA_FUNC, dsa->alpha_function); OUT_CS_REG_SEQ(R300_ZB_CNTL, 3); @@ -143,6 +146,8 @@ static const float * get_shader_constant( { struct r300_viewport_state* viewport = (struct r300_viewport_state*)r300->viewport_state.state; + struct r300_textures_state* texstate = + (struct r300_textures_state*)r300->textures_state.state; static float vec[4] = { 0.0, 0.0, 0.0, 1.0 }; struct pipe_texture *tex; @@ -158,7 +163,7 @@ static const float * get_shader_constant( /* Factor for converting rectangle coords to * normalized coords. Should only show up on non-r500. */ case RC_STATE_R300_TEXRECT_FACTOR: - tex = &r300->textures[constant->u.State[1]]->tex; + tex = &texstate->textures[constant->u.State[1]]->tex; vec[0] = 1.0 / tex->width0; vec[1] = 1.0 / tex->height0; break; @@ -370,7 +375,7 @@ void r500_emit_fs_constant_buffer(struct r300_context* r300, END_CS; } -void r300_emit_fb_state(struct r300_context* r300, void* state) +void r300_emit_fb_state(struct r300_context* r300, unsigned size, void* state) { struct pipe_framebuffer_state* fb = (struct pipe_framebuffer_state*)state; struct r300_screen* r300screen = r300_screen(r300->context.screen); @@ -379,7 +384,7 @@ void r300_emit_fb_state(struct r300_context* r300, void* state) int i; CS_LOCALS(r300); - BEGIN_CS((10 * fb->nr_cbufs) + (fb->zsbuf ? 10 : 0) + 6); + BEGIN_CS(size); /* Flush and free renderbuffer caches. */ OUT_CS_REG(R300_RB3D_DSTCACHE_CTLSTAT, @@ -418,6 +423,9 @@ void r300_emit_fb_state(struct r300_context* r300, void* state) OUT_CS_REG(R300_US_OUT_FMT_0 + (4 * i), tex->fb_state.us_out_fmt); } + for (; i < 4; i++) { + OUT_CS_REG(R300_US_OUT_FMT_0 + (4 * i), R300_US_OUT_FMT_UNUSED); + } /* Set up a zbuffer. */ if (fb->zsbuf) { @@ -435,6 +443,8 @@ void r300_emit_fb_state(struct r300_context* r300, void* state) 0, RADEON_GEM_DOMAIN_VRAM, 0); } + OUT_CS_REG(R300_GA_POINT_MINMAX, + (MAX2(fb->width, fb->height) * 6) << R300_GA_POINT_MINMAX_MAX_SHIFT); END_CS; } @@ -564,21 +574,19 @@ void r300_emit_query_end(struct r300_context* r300) r300_emit_query_finish(r300, query); } -void r300_emit_rs_state(struct r300_context* r300, void* state) +void r300_emit_rs_state(struct r300_context* r300, unsigned size, void* state) { struct r300_rs_state* rs = (struct r300_rs_state*)state; float scale, offset; CS_LOCALS(r300); - BEGIN_CS(18 + (rs->polygon_offset_enable ? 5 : 0)); + BEGIN_CS(size); OUT_CS_REG(R300_VAP_CNTL_STATUS, rs->vap_control_status); OUT_CS_REG(R300_GB_AA_CONFIG, rs->antialiasing_config); OUT_CS_REG(R300_GA_POINT_SIZE, rs->point_size); - OUT_CS_REG_SEQ(R300_GA_POINT_MINMAX, 2); - OUT_CS(rs->point_minmax); - OUT_CS(rs->line_control); + OUT_CS_REG(R300_GA_LINE_CNTL, rs->line_control); if (rs->polygon_offset_enable) { scale = rs->depth_scale * 12; @@ -609,7 +617,8 @@ void r300_emit_rs_state(struct r300_context* r300, void* state) END_CS; } -void r300_emit_rs_block_state(struct r300_context* r300, void* state) +void r300_emit_rs_block_state(struct r300_context* r300, + unsigned size, void* state) { struct r300_rs_block* rs = (struct r300_rs_block*)state; unsigned i; @@ -620,7 +629,7 @@ void r300_emit_rs_block_state(struct r300_context* r300, void* state) DBG(r300, DBG_DRAW, "r300: RS emit:\n"); - BEGIN_CS(5 + count*2); + BEGIN_CS(size); if (r300screen->caps->is_r500) { OUT_CS_REG_SEQ(R500_RS_IP_0, count); } else { @@ -651,7 +660,8 @@ void r300_emit_rs_block_state(struct r300_context* r300, void* state) END_CS; } -void r300_emit_scissor_state(struct r300_context* r300, void* state) +void r300_emit_scissor_state(struct r300_context* r300, + unsigned size, void* state) { unsigned minx, miny, maxx, maxy; uint32_t top_left, bottom_right; @@ -705,56 +715,42 @@ void r300_emit_scissor_state(struct r300_context* r300, void* state) (((maxy - 1) + 1440) << R300_SCISSORS_Y_SHIFT); } - BEGIN_CS(3); + BEGIN_CS(size); OUT_CS_REG_SEQ(R300_SC_SCISSORS_TL, 2); OUT_CS(top_left); OUT_CS(bottom_right); END_CS; } -void r300_emit_texture(struct r300_context* r300, - struct r300_sampler_state* sampler, - struct r300_texture* tex, - unsigned offset) +void r300_emit_textures_state(struct r300_context *r300, + unsigned size, void *state) { - uint32_t filter0 = sampler->filter0; - uint32_t format0 = tex->state.format0; - unsigned min_level, max_level; + struct r300_textures_state *allstate = (struct r300_textures_state*)state; + struct r300_texture_sampler_state *texstate; + unsigned i; CS_LOCALS(r300); - /* to emulate 1D textures through 2D ones correctly */ - if (tex->tex.target == PIPE_TEXTURE_1D) { - filter0 &= ~R300_TX_WRAP_T_MASK; - filter0 |= R300_TX_WRAP_T(R300_TX_CLAMP_TO_EDGE); - } + BEGIN_CS(size); + OUT_CS_REG(R300_TX_ENABLE, allstate->tx_enable); - if (tex->is_npot) { - /* NPOT textures don't support mip filter, unfortunately. - * This prevents incorrect rendering. */ - filter0 &= ~R300_TX_MIN_FILTER_MIP_MASK; - } else { - /* determine min/max levels */ - /* the MAX_MIP level is the largest (finest) one */ - max_level = MIN2(sampler->max_lod, tex->tex.last_level); - min_level = MIN2(sampler->min_lod, max_level); - format0 |= R300_TX_NUM_LEVELS(max_level); - filter0 |= R300_TX_MAX_MIP_LEVEL(min_level); - } + for (i = 0; i < allstate->count; i++) { + if ((1 << i) & allstate->tx_enable) { + texstate = &allstate->regs[i]; - BEGIN_CS(16); - OUT_CS_REG(R300_TX_FILTER0_0 + (offset * 4), filter0 | - (offset << 28)); - OUT_CS_REG(R300_TX_FILTER1_0 + (offset * 4), sampler->filter1); - OUT_CS_REG(R300_TX_BORDER_COLOR_0 + (offset * 4), sampler->border_color); - - OUT_CS_REG(R300_TX_FORMAT0_0 + (offset * 4), format0); - OUT_CS_REG(R300_TX_FORMAT1_0 + (offset * 4), tex->state.format1); - OUT_CS_REG(R300_TX_FORMAT2_0 + (offset * 4), tex->state.format2); - OUT_CS_REG_SEQ(R300_TX_OFFSET_0 + (offset * 4), 1); - OUT_CS_RELOC(tex->buffer, - R300_TXO_MACRO_TILE(tex->macrotile) | - R300_TXO_MICRO_TILE(tex->microtile), - RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM, 0, 0); + OUT_CS_REG(R300_TX_FILTER0_0 + (i * 4), texstate->filter[0]); + OUT_CS_REG(R300_TX_FILTER1_0 + (i * 4), texstate->filter[1]); + OUT_CS_REG(R300_TX_BORDER_COLOR_0 + (i * 4), + texstate->border_color); + + OUT_CS_REG(R300_TX_FORMAT0_0 + (i * 4), texstate->format[0]); + OUT_CS_REG(R300_TX_FORMAT1_0 + (i * 4), texstate->format[1]); + OUT_CS_REG(R300_TX_FORMAT2_0 + (i * 4), texstate->format[2]); + + OUT_CS_REG_SEQ(R300_TX_OFFSET_0 + (i * 4), 1); + OUT_CS_RELOC(allstate->textures[i]->buffer, texstate->tile_config, + RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM, 0, 0); + } + } END_CS; } @@ -798,58 +794,76 @@ void r300_emit_aos(struct r300_context* r300, unsigned offset) END_CS; } -void r300_emit_vertex_format_state(struct r300_context* r300, void* state) +void r300_emit_vertex_stream_state(struct r300_context* r300, + unsigned size, void* state) { - struct r300_vertex_info* vertex_info = (struct r300_vertex_info*)state; + struct r300_vertex_stream_state *streams = + (struct r300_vertex_stream_state*)state; unsigned i; CS_LOCALS(r300); - DBG(r300, DBG_DRAW, "r300: VAP/PSC emit:\n"); - - BEGIN_CS(26); - OUT_CS_REG(R300_VAP_VTX_SIZE, vertex_info->vinfo.size); + DBG(r300, DBG_DRAW, "r300: PSC emit:\n"); - OUT_CS_REG_SEQ(R300_VAP_VTX_STATE_CNTL, 2); - OUT_CS(vertex_info->vinfo.hwfmt[0]); - OUT_CS(vertex_info->vinfo.hwfmt[1]); - OUT_CS_REG_SEQ(R300_VAP_OUTPUT_VTX_FMT_0, 2); - OUT_CS(vertex_info->vinfo.hwfmt[2]); - OUT_CS(vertex_info->vinfo.hwfmt[3]); - for (i = 0; i < 4; i++) { - DBG(r300, DBG_DRAW, " : hwfmt%d: 0x%08x\n", i, - vertex_info->vinfo.hwfmt[i]); - } - - OUT_CS_REG_SEQ(R300_VAP_PROG_STREAM_CNTL_0, 8); - for (i = 0; i < 8; i++) { - OUT_CS(vertex_info->vap_prog_stream_cntl[i]); + BEGIN_CS(size); + OUT_CS_REG_SEQ(R300_VAP_PROG_STREAM_CNTL_0, streams->count); + for (i = 0; i < streams->count; i++) { + OUT_CS(streams->vap_prog_stream_cntl[i]); DBG(r300, DBG_DRAW, " : prog_stream_cntl%d: 0x%08x\n", i, - vertex_info->vap_prog_stream_cntl[i]); + streams->vap_prog_stream_cntl[i]); } - OUT_CS_REG_SEQ(R300_VAP_PROG_STREAM_CNTL_EXT_0, 8); - for (i = 0; i < 8; i++) { - OUT_CS(vertex_info->vap_prog_stream_cntl_ext[i]); + OUT_CS_REG_SEQ(R300_VAP_PROG_STREAM_CNTL_EXT_0, streams->count); + for (i = 0; i < streams->count; i++) { + OUT_CS(streams->vap_prog_stream_cntl_ext[i]); DBG(r300, DBG_DRAW, " : prog_stream_cntl_ext%d: 0x%08x\n", i, - vertex_info->vap_prog_stream_cntl_ext[i]); + streams->vap_prog_stream_cntl_ext[i]); } END_CS; } +void r300_emit_vap_output_state(struct r300_context* r300, + unsigned size, void* state) +{ + struct r300_vap_output_state *vap_out_state = + (struct r300_vap_output_state*)state; + CS_LOCALS(r300); + + DBG(r300, DBG_DRAW, "r300: VAP emit:\n"); + + BEGIN_CS(size); + OUT_CS_REG_SEQ(R300_VAP_VTX_STATE_CNTL, 2); + OUT_CS(vap_out_state->vap_vtx_state_cntl); + OUT_CS(vap_out_state->vap_vsm_vtx_assm); + OUT_CS_REG_SEQ(R300_VAP_OUTPUT_VTX_FMT_0, 2); + OUT_CS(vap_out_state->vap_out_vtx_fmt[0]); + OUT_CS(vap_out_state->vap_out_vtx_fmt[1]); + END_CS; +} -void r300_emit_vertex_program_code(struct r300_context* r300, - struct r300_vertex_program_code* code) +void r300_emit_pvs_flush(struct r300_context* r300, unsigned size, void* state) { - int i; + CS_LOCALS(r300); + + BEGIN_CS(size); + OUT_CS_REG(R300_VAP_PVS_STATE_FLUSH_REG, 0x0); + END_CS; +} + +void r300_emit_vs_state(struct r300_context* r300, unsigned size, void* state) +{ + struct r300_vertex_shader* vs = (struct r300_vertex_shader*)state; + struct r300_vertex_program_code* code = &vs->code; struct r300_screen* r300screen = r300_screen(r300->context.screen); unsigned instruction_count = code->length / 4; + unsigned i; - int vtx_mem_size = r300screen->caps->is_r500 ? 128 : 72; - int input_count = MAX2(util_bitcount(code->InputsRead), 1); - int output_count = MAX2(util_bitcount(code->OutputsWritten), 1); - int temp_count = MAX2(code->num_temporaries, 1); - int pvs_num_slots = MIN3(vtx_mem_size / input_count, - vtx_mem_size / output_count, 10); - int pvs_num_controllers = MIN2(vtx_mem_size / temp_count, 6); + unsigned vtx_mem_size = r300screen->caps->is_r500 ? 128 : 72; + unsigned input_count = MAX2(util_bitcount(code->InputsRead), 1); + unsigned output_count = MAX2(util_bitcount(code->OutputsWritten), 1); + unsigned temp_count = MAX2(code->num_temporaries, 1); + + unsigned pvs_num_slots = MIN3(vtx_mem_size / input_count, + vtx_mem_size / output_count, 10); + unsigned pvs_num_controllers = MIN2(vtx_mem_size / temp_count, 6); CS_LOCALS(r300); @@ -859,7 +873,7 @@ void r300_emit_vertex_program_code(struct r300_context* r300, return; } - BEGIN_CS(9 + code->length); + BEGIN_CS(size); /* R300_VAP_PVS_CODE_CNTL_0 * R300_VAP_PVS_CONST_CNTL * R300_VAP_PVS_CODE_CNTL_1 @@ -873,8 +887,9 @@ void r300_emit_vertex_program_code(struct r300_context* r300, OUT_CS_REG(R300_VAP_PVS_VECTOR_INDX_REG, 0); OUT_CS_ONE_REG(R300_VAP_PVS_UPLOAD_DATA, code->length); - for (i = 0; i < code->length; i++) + for (i = 0; i < code->length; i++) { OUT_CS(code->body.d[i]); + } OUT_CS_REG(R300_VAP_CNTL, R300_PVS_NUM_SLOTS(pvs_num_slots) | R300_PVS_NUM_CNTLRS(pvs_num_controllers) | @@ -884,12 +899,6 @@ void r300_emit_vertex_program_code(struct r300_context* r300, END_CS; } -void r300_emit_vertex_shader(struct r300_context* r300, - struct r300_vertex_shader* vs) -{ - r300_emit_vertex_program_code(r300, &vs->code); -} - void r300_emit_vs_constant_buffer(struct r300_context* r300, struct rc_constant_list* constants) { @@ -923,77 +932,56 @@ void r300_emit_vs_constant_buffer(struct r300_context* r300, END_CS; } -void r300_emit_viewport_state(struct r300_context* r300, void* state) +void r300_emit_viewport_state(struct r300_context* r300, + unsigned size, void* state) { struct r300_viewport_state* viewport = (struct r300_viewport_state*)state; CS_LOCALS(r300); - BEGIN_CS(9); - OUT_CS_REG_SEQ(R300_SE_VPORT_XSCALE, 6); - OUT_CS_32F(viewport->xscale); - OUT_CS_32F(viewport->xoffset); - OUT_CS_32F(viewport->yscale); - OUT_CS_32F(viewport->yoffset); - OUT_CS_32F(viewport->zscale); - OUT_CS_32F(viewport->zoffset); - OUT_CS_REG(R300_VAP_VTE_CNTL, viewport->vte_control); - END_CS; -} - -void r300_emit_texture_count(struct r300_context* r300) -{ - uint32_t tx_enable = 0; - int i; - CS_LOCALS(r300); - - /* Notice that texture_count and sampler_count are just sizes - * of the respective arrays. We still have to check for the individual - * elements. */ - for (i = 0; i < MIN2(r300->sampler_count, r300->texture_count); i++) { - if (r300->textures[i]) { - tx_enable |= 1 << i; - } - } - - BEGIN_CS(2); - OUT_CS_REG(R300_TX_ENABLE, tx_enable); - END_CS; - + BEGIN_CS(size); + OUT_CS_REG_SEQ(R300_SE_VPORT_XSCALE, 6); + OUT_CS_32F(viewport->xscale); + OUT_CS_32F(viewport->xoffset); + OUT_CS_32F(viewport->yscale); + OUT_CS_32F(viewport->yoffset); + OUT_CS_32F(viewport->zscale); + OUT_CS_32F(viewport->zoffset); + OUT_CS_REG(R300_VAP_VTE_CNTL, viewport->vte_control); + END_CS; } -void r300_emit_ztop_state(struct r300_context* r300, void* state) +void r300_emit_ztop_state(struct r300_context* r300, + unsigned size, void* state) { struct r300_ztop_state* ztop = (struct r300_ztop_state*)state; CS_LOCALS(r300); - BEGIN_CS(2); + BEGIN_CS(size); OUT_CS_REG(R300_ZB_ZTOP, ztop->z_buffer_top); END_CS; } -void r300_flush_textures(struct r300_context* r300) +void r300_emit_texture_cache_inval(struct r300_context* r300, unsigned size, void* state) { CS_LOCALS(r300); - BEGIN_CS(2); + BEGIN_CS(size); OUT_CS_REG(R300_TX_INVALTAGS, 0); END_CS; } -static void r300_flush_pvs(struct r300_context* r300) -{ - CS_LOCALS(r300); - - BEGIN_CS(2); - OUT_CS_REG(R300_VAP_PVS_STATE_FLUSH_REG, 0x0); - END_CS; -} - -void r300_emit_buffer_validate(struct r300_context *r300) +void r300_emit_buffer_validate(struct r300_context *r300, + boolean do_validate_vertex_buffers, + struct pipe_buffer *index_buffer) { struct pipe_framebuffer_state* fb = (struct pipe_framebuffer_state*)r300->fb_state.state; + struct r300_textures_state *texstate = + (struct r300_textures_state*)r300->textures_state.state; struct r300_texture* tex; + struct pipe_vertex_buffer *vbuf = r300->vertex_buffer; + struct pipe_vertex_element *velem = r300->vertex_element; + struct pipe_buffer *pbuf; unsigned i; boolean invalid = FALSE; @@ -1022,9 +1010,9 @@ validate: } } /* ...textures... */ - for (i = 0; i < r300->texture_count; i++) { - tex = r300->textures[i]; - if (!tex) + for (i = 0; i < texstate->count; i++) { + tex = texstate->textures[i]; + if (!tex || !texstate->sampler_states[i]) continue; if (!r300->winsys->add_buffer(r300->winsys, tex->buffer, RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM, 0)) { @@ -1040,16 +1028,35 @@ validate: goto validate; } } - /* ...and vertex buffer. */ + /* ...vertex buffer for SWTCL path... */ if (r300->vbo) { if (!r300->winsys->add_buffer(r300->winsys, r300->vbo, RADEON_GEM_DOMAIN_GTT, 0)) { r300->context.flush(&r300->context, 0, NULL); goto validate; } - } else { - /* debug_printf("No VBO while emitting dirty state!\n"); */ } + /* ...vertex buffers for HWTCL path... */ + if (do_validate_vertex_buffers) { + for (i = 0; i < r300->vertex_element_count; i++) { + pbuf = vbuf[velem[i].vertex_buffer_index].buffer; + + if (!r300->winsys->add_buffer(r300->winsys, pbuf, + RADEON_GEM_DOMAIN_GTT, 0)) { + r300->context.flush(&r300->context, 0, NULL); + goto validate; + } + } + } + /* ...and index buffer for HWTCL path. */ + if (index_buffer) { + if (!r300->winsys->add_buffer(r300->winsys, index_buffer, + RADEON_GEM_DOMAIN_GTT, 0)) { + r300->context.flush(&r300->context, 0, NULL); + goto validate; + } + } + if (!r300->winsys->validate(r300->winsys)) { r300->context.flush(&r300->context, 0, NULL); if (invalid) { @@ -1062,16 +1069,10 @@ validate: } } -/* Emit all dirty state. */ -void r300_emit_dirty_state(struct r300_context* r300) +unsigned r300_get_num_dirty_dwords(struct r300_context *r300) { - struct r300_screen* r300screen = r300_screen(r300->context.screen); struct r300_atom* atom; - unsigned i, dwords = 1024; - int dirty_tex = 0; - - /* Check the required number of dwords against the space remaining in the - * current CS object. If we need more, then flush. */ + unsigned dwords = 0; foreach(atom, &r300->atom_list) { if (atom->dirty || atom->always_dirty) { @@ -1079,12 +1080,17 @@ void r300_emit_dirty_state(struct r300_context* r300) } } - /* Make sure we have at least 2*1024 spare dwords. */ - /* XXX It would be nice to know the number of dwords we really need to - * XXX emit. */ - while (!r300->winsys->check_cs(r300->winsys, dwords)) { - r300->context.flush(&r300->context, 0, NULL); - } + /* XXX This is the compensation for the non-atomized states. */ + dwords += 1024; + + return dwords; +} + +/* Emit all dirty state. */ +void r300_emit_dirty_state(struct r300_context* r300) +{ + struct r300_screen* r300screen = r300_screen(r300->context.screen); + struct r300_atom* atom; if (r300->dirty_state & R300_NEW_QUERY) { r300_emit_query_start(r300); @@ -1093,7 +1099,7 @@ void r300_emit_dirty_state(struct r300_context* r300) foreach(atom, &r300->atom_list) { if (atom->dirty || atom->always_dirty) { - atom->emit(r300, atom->state); + atom->emit(r300, atom->size, atom->state); atom->dirty = FALSE; } } @@ -1119,43 +1125,9 @@ void r300_emit_dirty_state(struct r300_context* r300) r300->dirty_state &= ~R300_NEW_FRAGMENT_SHADER_CONSTANTS; } - /* Samplers and textures are tracked separately but emitted together. */ - if (r300->dirty_state & - (R300_ANY_NEW_SAMPLERS | R300_ANY_NEW_TEXTURES)) { - r300_emit_texture_count(r300); - - for (i = 0; i < MIN2(r300->sampler_count, r300->texture_count); i++) { - if (r300->dirty_state & - ((R300_NEW_SAMPLER << i) | (R300_NEW_TEXTURE << i))) { - if (r300->textures[i]) { - r300_emit_texture(r300, - r300->sampler_states[i], - r300->textures[i], - i); - dirty_tex |= r300->dirty_state & (R300_NEW_TEXTURE << i); - } - r300->dirty_state &= - ~((R300_NEW_SAMPLER << i) | (R300_NEW_TEXTURE << i)); - } - } - r300->dirty_state &= ~(R300_ANY_NEW_SAMPLERS | R300_ANY_NEW_TEXTURES); - } - - if (dirty_tex) { - r300_flush_textures(r300); - } - - if (r300->dirty_state & (R300_NEW_VERTEX_SHADER | R300_NEW_VERTEX_SHADER_CONSTANTS)) { - r300_flush_pvs(r300); - } - - if (r300->dirty_state & R300_NEW_VERTEX_SHADER) { - r300_emit_vertex_shader(r300, r300->vs); - r300->dirty_state &= ~R300_NEW_VERTEX_SHADER; - } - if (r300->dirty_state & R300_NEW_VERTEX_SHADER_CONSTANTS) { - r300_emit_vs_constant_buffer(r300, &r300->vs->code.constants); + struct r300_vertex_shader* vs = r300->vs_state.state; + r300_emit_vs_constant_buffer(r300, &vs->code.constants); r300->dirty_state &= ~R300_NEW_VERTEX_SHADER_CONSTANTS; } diff --git a/src/gallium/drivers/r300/r300_emit.h b/src/gallium/drivers/r300/r300_emit.h index 6b96d9b57c0..449e640a884 100644 --- a/src/gallium/drivers/r300/r300_emit.h +++ b/src/gallium/drivers/r300/r300_emit.h @@ -31,13 +31,17 @@ struct r300_vertex_program_code; void r300_emit_aos(struct r300_context* r300, unsigned offset); -void r300_emit_blend_state(struct r300_context* r300, void* state); +void r300_emit_blend_state(struct r300_context* r300, + unsigned size, void* state); -void r300_emit_blend_color_state(struct r300_context* r300, void* state); +void r300_emit_blend_color_state(struct r300_context* r300, + unsigned size, void* state); -void r300_emit_clip_state(struct r300_context* r300, void* state); +void r300_emit_clip_state(struct r300_context* r300, + unsigned size, void* state); -void r300_emit_dsa_state(struct r300_context* r300, void* state); +void r300_emit_dsa_state(struct r300_context* r300, + unsigned size, void* state); void r300_emit_fragment_program_code(struct r300_context* r300, struct rX00_fragment_program_code* generic_code); @@ -51,48 +55,54 @@ void r500_emit_fragment_program_code(struct r300_context* r300, void r500_emit_fs_constant_buffer(struct r300_context* r300, struct rc_constant_list* constants); -void r300_emit_fb_state(struct r300_context* r300, void* state); +void r300_emit_fb_state(struct r300_context* r300, unsigned size, void* state); void r300_emit_query_begin(struct r300_context* r300, struct r300_query* query); void r300_emit_query_end(struct r300_context* r300); -void r300_emit_rs_state(struct r300_context* r300, void* state); +void r300_emit_rs_state(struct r300_context* r300, unsigned size, void* state); -void r300_emit_rs_block_state(struct r300_context* r300, void* state); +void r300_emit_rs_block_state(struct r300_context* r300, + unsigned size, void* state); -void r300_emit_scissor_state(struct r300_context* r300, void* state); +void r300_emit_scissor_state(struct r300_context* r300, + unsigned size, void* state); -void r300_emit_texture(struct r300_context* r300, - struct r300_sampler_state* sampler, - struct r300_texture* tex, - unsigned offset); +void r300_emit_textures_state(struct r300_context *r300, + unsigned size, void *state); void r300_emit_vertex_buffer(struct r300_context* r300); -void r300_emit_vertex_format_state(struct r300_context* r300, void* state); +void r300_emit_vertex_stream_state(struct r300_context* r300, + unsigned size, void* state); -void r300_emit_vertex_program_code(struct r300_context* r300, - struct r300_vertex_program_code* code); +void r300_emit_vap_output_state(struct r300_context* r300, + unsigned size, void* state); void r300_emit_vs_constant_buffer(struct r300_context* r300, struct rc_constant_list* constants); -void r300_emit_vertex_shader(struct r300_context* r300, - struct r300_vertex_shader* vs); +void r300_emit_vs_state(struct r300_context* r300, unsigned size, void* state); -void r300_emit_viewport_state(struct r300_context* r300, void* state); +void r300_emit_viewport_state(struct r300_context* r300, + unsigned size, void* state); -void r300_emit_texture_count(struct r300_context* r300); +void r300_emit_ztop_state(struct r300_context* r300, + unsigned size, void* state); -void r300_emit_ztop_state(struct r300_context* r300, void* state); +void r300_emit_pvs_flush(struct r300_context* r300, unsigned size, void* state); -void r300_flush_textures(struct r300_context* r300); +void r300_emit_texture_cache_inval(struct r300_context* r300, unsigned size, void* state); + +unsigned r300_get_num_dirty_dwords(struct r300_context *r300); /* Emit all dirty state. */ void r300_emit_dirty_state(struct r300_context* r300); -void r300_emit_buffer_validate(struct r300_context *r300); +void r300_emit_buffer_validate(struct r300_context *r300, + boolean do_validate_vertex_buffers, + struct pipe_buffer *index_buffer); #endif /* R300_EMIT_H */ diff --git a/src/gallium/drivers/r300/r300_fs.c b/src/gallium/drivers/r300/r300_fs.c index ae4c62b2f1d..3c2625269b8 100644 --- a/src/gallium/drivers/r300/r300_fs.c +++ b/src/gallium/drivers/r300/r300_fs.c @@ -133,10 +133,13 @@ static void get_compare_state( struct r300_fragment_program_external_state* state, unsigned shadow_samplers) { + struct r300_textures_state *texstate = + (struct r300_textures_state*)r300->textures_state.state; + memset(state, 0, sizeof(*state)); - for (int i = 0; i < r300->sampler_count; i++) { - struct r300_sampler_state* s = r300->sampler_states[i]; + for (int i = 0; i < texstate->sampler_count; i++) { + struct r300_sampler_state* s = texstate->sampler_states[i]; if (s && s->state.compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE) { /* XXX Gallium doesn't provide us with any information regarding diff --git a/src/gallium/drivers/r300/r300_render.c b/src/gallium/drivers/r300/r300_render.c index 754eb4dc769..770a92be74f 100644 --- a/src/gallium/drivers/r300/r300_render.c +++ b/src/gallium/drivers/r300/r300_render.c @@ -118,6 +118,18 @@ static uint32_t r300_provoking_vertex_fixes(struct r300_context *r300, return color_control; } +/* Check if the requested number of dwords is available in the CS and + * if not, flush. Return TRUE if the flush occured. */ +static boolean r300_reserve_cs_space(struct r300_context *r300, + unsigned dwords) +{ + if (!r300->winsys->check_cs(r300->winsys, dwords)) { + r300->context.flush(&r300->context, 0, NULL); + return TRUE; + } + return FALSE; +} + static boolean immd_is_good_idea(struct r300_context *r300, unsigned count) { @@ -132,7 +144,7 @@ static void r300_emit_draw_arrays_immediate(struct r300_context *r300, struct pipe_vertex_element* velem; struct pipe_vertex_buffer* vbuf; unsigned vertex_element_count = r300->vertex_element_count; - unsigned i, v, vbi, dw, elem_offset; + unsigned i, v, vbi, dw, elem_offset, dwords; /* Size of the vertex, in dwords. */ unsigned vertex_size = 0; @@ -171,9 +183,13 @@ static void r300_emit_draw_arrays_immediate(struct r300_context *r300, } } + dwords = 10 + count * vertex_size; + + r300_reserve_cs_space(r300, r300_get_num_dirty_dwords(r300) + dwords); + r300_emit_buffer_validate(r300, FALSE, 0); r300_emit_dirty_state(r300); - BEGIN_CS(10 + count * vertex_size); + BEGIN_CS(dwords); OUT_CS_REG(R300_GA_COLOR_CONTROL, r300_provoking_vertex_fixes(r300, mode)); OUT_CS_REG(R300_VAP_VTX_SIZE, vertex_size); @@ -258,11 +274,6 @@ static void r300_emit_draw_elements(struct r300_context *r300, assert((start * indexSize) % 4 == 0); - /* XXX Non-zero offset locks up. */ - if (offset_dwords != 0) { - return; - } - if (alt_num_verts) { assert(count < (1 << 24)); BEGIN_CS(16); @@ -276,13 +287,13 @@ static void r300_emit_draw_elements(struct r300_context *r300, OUT_CS_REG(R300_VAP_VF_MAX_VTX_INDX, maxIndex); OUT_CS_PKT3(R300_PACKET3_3D_DRAW_INDX_2, 0); if (indexSize == 4) { - count_dwords = count + start; + count_dwords = count; OUT_CS(R300_VAP_VF_CNTL__PRIM_WALK_INDICES | (count << 16) | R300_VAP_VF_CNTL__INDEX_SIZE_32bit | r300_translate_primitive(mode) | (alt_num_verts ? R500_VAP_VF_CNTL__USE_ALT_NUM_VERTS : 0)); } else { - count_dwords = (count + start + 1) / 2; + count_dwords = (count + 1) / 2; OUT_CS(R300_VAP_VF_CNTL__PRIM_WALK_INDICES | (count << 16) | r300_translate_primitive(mode) | (alt_num_verts ? R500_VAP_VF_CNTL__USE_ALT_NUM_VERTS : 0)); @@ -303,31 +314,6 @@ static void r300_emit_draw_elements(struct r300_context *r300, END_CS; } -static boolean r300_setup_vertex_buffers(struct r300_context *r300) -{ - struct pipe_vertex_buffer *vbuf = r300->vertex_buffer; - struct pipe_vertex_element *velem = r300->vertex_element; - struct pipe_buffer *pbuf; - -validate: - for (int i = 0; i < r300->vertex_element_count; i++) { - pbuf = vbuf[velem[i].vertex_buffer_index].buffer; - - if (!r300->winsys->add_buffer(r300->winsys, pbuf, - RADEON_GEM_DOMAIN_GTT, 0)) { - r300->context.flush(&r300->context, 0, NULL); - goto validate; - } - } - - if (!r300->winsys->validate(r300->winsys)) { - r300->context.flush(&r300->context, 0, NULL); - return r300->winsys->validate(r300->winsys); - } - - return TRUE; -} - static void r300_shorten_ubyte_elts(struct r300_context* r300, struct pipe_buffer** elts, unsigned count) @@ -383,30 +369,17 @@ void r300_draw_range_elements(struct pipe_context* pipe, return; } - r300_update_derived_state(r300); - - r300_emit_buffer_validate(r300); - - if (!r300_setup_vertex_buffers(r300)) { - return; - } - if (indexSize == 1) { r300_shorten_ubyte_elts(r300, &indexBuffer, count); indexSize = 2; } - if (!r300->winsys->add_buffer(r300->winsys, indexBuffer, - RADEON_GEM_DOMAIN_GTT, 0)) { - goto cleanup; - } - - if (!r300->winsys->validate(r300->winsys)) { - goto cleanup; - } + r300_update_derived_state(r300); + /* 128 dwords for emit_aos and emit_draw_elements */ + r300_reserve_cs_space(r300, r300_get_num_dirty_dwords(r300) + 128); + r300_emit_buffer_validate(r300, TRUE, indexBuffer); r300_emit_dirty_state(r300); - r300_emit_aos(r300, 0); if (alt_num_verts || count <= 65535) { @@ -420,10 +393,16 @@ void r300_draw_range_elements(struct pipe_context* pipe, start += short_count; count -= short_count; + + /* 16 spare dwords are enough for emit_draw_elements. */ + if (count && r300_reserve_cs_space(r300, 16)) { + r300_emit_buffer_validate(r300, TRUE, indexBuffer); + r300_emit_dirty_state(r300); + r300_emit_aos(r300, 0); + } } while (count); } -cleanup: if (indexBuffer != orgIndexBuffer) { pipe->screen->buffer_destroy(indexBuffer); } @@ -435,8 +414,11 @@ void r300_draw_elements(struct pipe_context* pipe, unsigned indexSize, unsigned mode, unsigned start, unsigned count) { - pipe->draw_range_elements(pipe, indexBuffer, indexSize, 0, ~0, - mode, start, count); + struct r300_context *r300 = r300_context(pipe); + + pipe->draw_range_elements(pipe, indexBuffer, indexSize, 0, + r300->vertex_buffer_max_index, + mode, start, count); } void r300_draw_arrays(struct pipe_context* pipe, unsigned mode, @@ -457,15 +439,13 @@ void r300_draw_arrays(struct pipe_context* pipe, unsigned mode, r300_update_derived_state(r300); - r300_emit_buffer_validate(r300); - if (immd_is_good_idea(r300, count)) { r300_emit_draw_arrays_immediate(r300, mode, start, count); } else { - if (!r300_setup_vertex_buffers(r300)) { - return; - } - + /* Make sure there are at least 128 spare dwords in the command buffer. + * (most of it being consumed by emit_aos) */ + r300_reserve_cs_space(r300, r300_get_num_dirty_dwords(r300) + 128); + r300_emit_buffer_validate(r300, TRUE, 0); r300_emit_dirty_state(r300); if (alt_num_verts || count <= 65535) { @@ -479,6 +459,13 @@ void r300_draw_arrays(struct pipe_context* pipe, unsigned mode, start += short_count; count -= short_count; + + /* Again, we emit both AOS and draw_arrays so there should be + * at least 128 spare dwords. */ + if (count && r300_reserve_cs_space(r300, 128)) { + r300_emit_buffer_validate(r300, TRUE, 0); + r300_emit_dirty_state(r300); + } } while (count); } } @@ -610,7 +597,7 @@ r300_render_get_vertex_info(struct vbuf_render* render) r300_update_derived_state(r300); - return (struct vertex_info*)r300->vertex_format_state.state; + return &r300->vertex_info; } static boolean r300_render_allocate_vertices(struct vbuf_render* render, @@ -695,6 +682,7 @@ static void r300_render_draw_arrays(struct vbuf_render* render, CS_LOCALS(r300); + r300_reserve_cs_space(r300, r300_get_num_dirty_dwords(r300) + 2); r300_emit_dirty_state(r300); DBG(r300, DBG_DRAW, "r300: Doing vbuf render, count %d\n", count); @@ -713,12 +701,14 @@ static void r300_render_draw(struct vbuf_render* render, struct r300_render* r300render = r300_render(render); struct r300_context* r300 = r300render->r300; int i; + unsigned dwords = 2 + (count+1)/2; CS_LOCALS(r300); + r300_reserve_cs_space(r300, r300_get_num_dirty_dwords(r300) + dwords); r300_emit_dirty_state(r300); - BEGIN_CS(2 + (count+1)/2); + BEGIN_CS(dwords); OUT_CS_PKT3(R300_PACKET3_3D_DRAW_INDX_2, (count+1)/2); OUT_CS(R300_VAP_VF_CNTL__PRIM_WALK_INDICES | (count << 16) | r300render->hwprim); diff --git a/src/gallium/drivers/r300/r300_state.c b/src/gallium/drivers/r300/r300_state.c index ebb859138fa..b732380a145 100644 --- a/src/gallium/drivers/r300/r300_state.c +++ b/src/gallium/drivers/r300/r300_state.c @@ -600,7 +600,8 @@ static void memcpy(r300->fb_state.state, state, sizeof(struct pipe_framebuffer_state)); - r300->fb_state.size = (10 * state->nr_cbufs) + (state->zsbuf ? 10 : 0) + 6; + r300->fb_state.size = (10 * state->nr_cbufs) + (2 * (4 - state->nr_cbufs)) + + (state->zsbuf ? 10 : 0) + 8; r300_fb_update_tiling_flags(r300, r300->fb_state.state, state); @@ -660,8 +661,10 @@ static void r300_bind_fs_state(struct pipe_context* pipe, void* shader) r300->fs = fs; r300_pick_fragment_shader(r300); - if (r300->vs && r300_vertex_shader_setup_wpos(r300)) { - r300->vertex_format_state.dirty = TRUE; + r300->rs_block_state.dirty = TRUE; /* Will be updated before the emission. */ + + if (r300->vs_state.state && r300_vertex_shader_setup_wpos(r300)) { + r300->vap_output_state.dirty = TRUE; } r300->dirty_state |= R300_NEW_FRAGMENT_SHADER | R300_NEW_FRAGMENT_SHADER_CONSTANTS; @@ -720,22 +723,6 @@ static void* r300_create_rs_state(struct pipe_context* pipe, rs->point_size = pack_float_16_6x(state->point_size) | (pack_float_16_6x(state->point_size) << R300_POINTSIZE_X_SHIFT); - /* Point minimum and maximum sizes. This register has to be emitted, - * and it'd be a step backwards to put it in invariant state. */ - if (r300screen->caps->is_r500) { - rs->point_minmax = - ((int)(0.0 * 6.0) << R300_GA_POINT_MINMAX_MIN_SHIFT) | - ((int)(4096.0 * 6.0) << R300_GA_POINT_MINMAX_MAX_SHIFT); - } else if (r300screen->caps->is_r400) { - rs->point_minmax = - ((int)(0.0 * 6.0) << R300_GA_POINT_MINMAX_MIN_SHIFT) | - ((int)(4021.0 * 6.0) << R300_GA_POINT_MINMAX_MAX_SHIFT); - } else { - rs->point_minmax = - ((int)(0.0 * 6.0) << R300_GA_POINT_MINMAX_MIN_SHIFT) | - ((int)(2560.0 * 6.0) << R300_GA_POINT_MINMAX_MAX_SHIFT); - } - rs->line_control = pack_float_16_6x(state->line_width) | R300_GA_LINE_CNTL_END_TYPE_COMP; @@ -826,12 +813,13 @@ static void r300_bind_rs_state(struct pipe_context* pipe, void* state) if (rs) { r300->polygon_offset_enabled = rs->rs.offset_cw || rs->rs.offset_ccw; + r300->rs_state.dirty = TRUE; } else { r300->polygon_offset_enabled = FALSE; } r300->rs_state.state = rs; - r300->rs_state.dirty = TRUE; + r300->rs_state.size = 17 + (r300->polygon_offset_enabled ? 5 : 0); /* XXX Why is this still needed, dammit!? */ r300->scissor_state.dirty = TRUE; r300->viewport_state.dirty = TRUE; @@ -870,7 +858,7 @@ static void* state->max_anisotropy > 0); /* Unfortunately, r300-r500 don't support floating-point mipmap lods. */ - /* We must pass these to the emit function to clamp them properly. */ + /* We must pass these to the merge function to clamp them properly. */ sampler->min_lod = MAX2((unsigned)state->min_lod, 0); sampler->max_lod = MAX2((unsigned)ceilf(state->max_lod), 0); @@ -896,23 +884,20 @@ static void r300_bind_sampler_states(struct pipe_context* pipe, void** states) { struct r300_context* r300 = r300_context(pipe); - int i; + struct r300_textures_state* state = + (struct r300_textures_state*)r300->textures_state.state; if (count > 8) { return; } - for (i = 0; i < count; i++) { - if (r300->sampler_states[i] != states[i]) { - r300->sampler_states[i] = (struct r300_sampler_state*)states[i]; - r300->dirty_state |= (R300_NEW_SAMPLER << i); - } - } + memcpy(state->sampler_states, states, sizeof(void*) * count); + state->sampler_count = count; - r300->sampler_count = count; + r300->textures_state.dirty = TRUE; /* Pick a fragment shader based on the texture compare state. */ - if (r300->fs && (r300->dirty_state & R300_ANY_NEW_SAMPLERS)) { + if (r300->fs && count) { if (r300_pick_fragment_shader(r300)) { r300->dirty_state |= R300_NEW_FRAGMENT_SHADER | R300_NEW_FRAGMENT_SHADER_CONSTANTS; @@ -936,22 +921,25 @@ static void r300_set_sampler_textures(struct pipe_context* pipe, struct pipe_texture** texture) { struct r300_context* r300 = r300_context(pipe); + struct r300_textures_state* state = + (struct r300_textures_state*)r300->textures_state.state; + unsigned i; boolean is_r500 = r300_screen(r300->context.screen)->caps->is_r500; - int i; + boolean dirty_tex = FALSE; /* XXX magic num */ if (count > 8) { return; } - + for (i = 0; i < count; i++) { - if (r300->textures[i] != (struct r300_texture*)texture[i]) { - pipe_texture_reference((struct pipe_texture**)&r300->textures[i], - texture[i]); - r300->dirty_state |= (R300_NEW_TEXTURE << i); + if (state->textures[i] != (struct r300_texture*)texture[i]) { + pipe_texture_reference((struct pipe_texture**)&state->textures[i], + texture[i]); + dirty_tex = TRUE; - /* R300-specific - set the texrect factor in a fragment shader */ - if (!is_r500 && r300->textures[i]->is_npot) { + /* R300-specific - set the texrect factor in the fragment shader */ + if (!is_r500 && state->textures[i]->is_npot) { /* XXX It would be nice to re-emit just 1 constant, * XXX not all of them */ r300->dirty_state |= R300_NEW_FRAGMENT_SHADER_CONSTANTS; @@ -960,14 +948,19 @@ static void r300_set_sampler_textures(struct pipe_context* pipe, } for (i = count; i < 8; i++) { - if (r300->textures[i]) { - pipe_texture_reference((struct pipe_texture**)&r300->textures[i], + if (state->textures[i]) { + pipe_texture_reference((struct pipe_texture**)&state->textures[i], NULL); - r300->dirty_state |= (R300_NEW_TEXTURE << i); } } - r300->texture_count = count; + state->texture_count = count; + + r300->textures_state.dirty = TRUE; + + if (dirty_tex) { + r300->texture_cache_inval.dirty = TRUE; + } } static void r300_set_scissor_state(struct pipe_context* pipe, @@ -1029,17 +1022,24 @@ static void r300_set_vertex_buffers(struct pipe_context* pipe, const struct pipe_vertex_buffer* buffers) { struct r300_context* r300 = r300_context(pipe); + unsigned i, max_index = ~0; memcpy(r300->vertex_buffer, buffers, sizeof(struct pipe_vertex_buffer) * count); + + for (i = 0; i < count; i++) { + max_index = MIN2(buffers[i].max_index, max_index); + } + r300->vertex_buffer_count = count; + r300->vertex_buffer_max_index = max_index; if (r300->draw) { draw_flush(r300->draw); draw_set_vertex_buffers(r300->draw, count, buffers); + } else { + r300->vertex_stream_state.dirty = TRUE; } - - r300->vertex_format_state.dirty = TRUE; } static boolean r300_validate_aos(struct r300_context *r300) @@ -1108,21 +1108,26 @@ static void r300_bind_vs_state(struct pipe_context* pipe, void* shader) struct r300_vertex_shader* vs = (struct r300_vertex_shader*)shader; if (vs == NULL) { - r300->vs = NULL; + r300->vs_state.state = NULL; return; } else if (!vs->translated) { r300_translate_vertex_shader(r300, vs); } - r300->vs = vs; + r300->vs_state.state = vs; + r300->vs_state.size = vs->code.length + 9; + r300->vs_state.dirty = TRUE; + + r300->rs_block_state.dirty = TRUE; /* Will be updated before the emission. */ + r300->vap_output_state.dirty = TRUE; + r300->vertex_stream_state.dirty = TRUE; /* XXX needed for TCL bypass */ + r300->pvs_flush.dirty = TRUE; + if (r300->fs) { r300_vertex_shader_setup_wpos(r300); } - r300->vertex_format_state.dirty = TRUE; - - r300->dirty_state |= - R300_NEW_VERTEX_SHADER | R300_NEW_VERTEX_SHADER_CONSTANTS; + r300->dirty_state |= R300_NEW_VERTEX_SHADER_CONSTANTS; } else { draw_flush(r300->draw); draw_bind_vertex_shader(r300->draw, @@ -1194,8 +1199,10 @@ static void r300_set_constant_buffer(struct pipe_context *pipe, r300->shader_constants[shader].count = buf->size / (4 * sizeof(float)); pipe_buffer_unmap(pipe->screen, buf); - if (shader == PIPE_SHADER_VERTEX) + if (shader == PIPE_SHADER_VERTEX) { r300->dirty_state |= R300_NEW_VERTEX_SHADER_CONSTANTS; + r300->pvs_flush.dirty = TRUE; + } else if (shader == PIPE_SHADER_FRAGMENT) r300->dirty_state |= R300_NEW_FRAGMENT_SHADER_CONSTANTS; } diff --git a/src/gallium/drivers/r300/r300_state_derived.c b/src/gallium/drivers/r300/r300_state_derived.c index 778eaaacd99..9c8e907fdf7 100644 --- a/src/gallium/drivers/r300/r300_state_derived.c +++ b/src/gallium/drivers/r300/r300_state_derived.c @@ -42,20 +42,20 @@ static void r300_draw_emit_attrib(struct r300_context* r300, enum interp_mode interp, int index) { - struct tgsi_shader_info* info = &r300->vs->info; + struct r300_vertex_shader* vs = r300->vs_state.state; + struct tgsi_shader_info* info = &vs->info; int output; output = draw_find_shader_output(r300->draw, info->output_semantic_name[index], info->output_semantic_index[index]); - draw_emit_vertex_attr( - (struct vertex_info*)r300->vertex_format_state.state, - emit, interp, output); + draw_emit_vertex_attr(&r300->vertex_info, emit, interp, output); } static void r300_draw_emit_all_attribs(struct r300_context* r300) { - struct r300_shader_semantics* vs_outputs = &r300->vs->outputs; + struct r300_vertex_shader* vs = r300->vs_state.state; + struct r300_shader_semantics* vs_outputs = &vs->outputs; int i, gen_count; /* Position. */ @@ -104,16 +104,21 @@ static void r300_draw_emit_all_attribs(struct r300_context* r300) } /* Update the PSC tables. */ +/* XXX move this function into r300_state.c after TCL-bypass gets removed + * XXX because this one is dependent only on vertex elements. */ static void r300_vertex_psc(struct r300_context* r300) { - struct r300_vertex_info *vformat = - (struct r300_vertex_info*)r300->vertex_format_state.state; + struct r300_vertex_shader* vs = r300->vs_state.state; + struct r300_vertex_stream_state *vformat = + (struct r300_vertex_stream_state*)r300->vertex_stream_state.state; uint16_t type, swizzle; enum pipe_format format; unsigned i; int identity[16] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}; int* stream_tab; + memset(vformat, 0, sizeof(struct r300_vertex_stream_state)); + stream_tab = identity; /* Vertex shaders have no semantics on their inputs, @@ -121,7 +126,7 @@ static void r300_vertex_psc(struct r300_context* r300) * and not on attrib information. */ DBG(r300, DBG_DRAW, "r300: vs expects %d attribs, routing %d elements" " in psc\n", - r300->vs->info.num_inputs, + vs->info.num_inputs, r300->vertex_element_count); for (i = 0; i < r300->vertex_element_count; i++) { @@ -148,18 +153,24 @@ static void r300_vertex_psc(struct r300_context* r300) } vformat->vap_prog_stream_cntl[i >> 1] |= (R300_LAST_VEC << (i & 1 ? 16 : 0)); + + vformat->count = (i >> 1) + 1; + r300->vertex_stream_state.size = (1 + vformat->count) * 2; } /* Update the PSC tables for SW TCL, using Draw. */ static void r300_swtcl_vertex_psc(struct r300_context* r300) { - struct r300_vertex_info *vformat = - (struct r300_vertex_info*)r300->vertex_format_state.state; - struct vertex_info* vinfo = &vformat->vinfo; + struct r300_vertex_shader* vs = r300->vs_state.state; + struct r300_vertex_stream_state *vformat = + (struct r300_vertex_stream_state*)r300->vertex_stream_state.state; + struct vertex_info* vinfo = &r300->vertex_info; uint16_t type, swizzle; enum pipe_format format; unsigned i, attrib_count; - int* vs_output_tab = r300->vs->stream_loc_notcl; + int* vs_output_tab = vs->stream_loc_notcl; + + memset(vformat, 0, sizeof(struct r300_vertex_stream_state)); /* For each Draw attribute, route it to the fragment shader according * to the vs_output_tab. */ @@ -202,6 +213,9 @@ static void r300_swtcl_vertex_psc(struct r300_context* r300) } vformat->vap_prog_stream_cntl[i >> 1] |= (R300_LAST_VEC << (i & 1 ? 16 : 0)); + + vformat->count = (i >> 1) + 1; + r300->vertex_stream_state.size = (1 + vformat->count) * 2; } static void r300_rs_col(struct r300_rs_block* rs, int id, int ptr, @@ -410,31 +424,29 @@ static void r300_update_rs_block(struct r300_context* r300, /* Now, after all that, see if we actually need to update the state. */ if (memcmp(r300->rs_block_state.state, &rs, sizeof(struct r300_rs_block))) { memcpy(r300->rs_block_state.state, &rs, sizeof(struct r300_rs_block)); - r300->rs_block_state.size = 5 + count; - r300->rs_block_state.dirty = TRUE; + r300->rs_block_state.size = 5 + count*2; } } /* Update the shader-dependant states. */ static void r300_update_derived_shader_state(struct r300_context* r300) { + struct r300_vertex_shader* vs = r300->vs_state.state; struct r300_screen* r300screen = r300_screen(r300->context.screen); - struct r300_vertex_info *vformat = - (struct r300_vertex_info*)r300->vertex_format_state.state; - struct vertex_info* vinfo = &vformat->vinfo; + struct r300_vap_output_state *vap_out = + (struct r300_vap_output_state*)r300->vap_output_state.state; - /* Mmm, delicious hax */ - memset(r300->vertex_format_state.state, 0, sizeof(struct r300_vertex_info)); - memcpy(vinfo->hwfmt, r300->vs->hwfmt, sizeof(uint)*4); + /* XXX Mmm, delicious hax */ + memset(&r300->vertex_info, 0, sizeof(struct vertex_info)); + memcpy(vap_out, vs->hwfmt, sizeof(uint)*4); - r300_update_rs_block(r300, &r300->vs->outputs, &r300->fs->inputs); + r300_update_rs_block(r300, &vs->outputs, &r300->fs->inputs); if (r300screen->caps->has_tcl) { r300_vertex_psc(r300); } else { r300_draw_emit_all_attribs(r300); - draw_compute_vertex_size( - (struct vertex_info*)r300->vertex_format_state.state); + draw_compute_vertex_size(&r300->vertex_info); r300_swtcl_vertex_psc(r300); } } @@ -510,14 +522,74 @@ static void r300_update_ztop(struct r300_context* r300) r300->ztop_state.dirty = TRUE; } +static void r300_merge_textures_and_samplers(struct r300_context* r300) +{ + struct r300_textures_state *state = + (struct r300_textures_state*)r300->textures_state.state; + struct r300_texture_sampler_state *texstate; + struct r300_sampler_state *sampler; + struct r300_texture *tex; + unsigned min_level, max_level, i, size; + unsigned count = MIN2(state->texture_count, state->sampler_count); + + state->tx_enable = 0; + size = 2; + + for (i = 0; i < count; i++) { + if (state->textures[i] && state->sampler_states[i]) { + state->tx_enable |= 1 << i; + + tex = state->textures[i]; + sampler = state->sampler_states[i]; + + texstate = &state->regs[i]; + memcpy(texstate->format, &tex->state, sizeof(uint32_t)*3); + texstate->filter[0] = sampler->filter0; + texstate->filter[1] = sampler->filter1; + texstate->border_color = sampler->border_color; + texstate->tile_config = R300_TXO_MACRO_TILE(tex->macrotile) | + R300_TXO_MICRO_TILE(tex->microtile); + + /* to emulate 1D textures through 2D ones correctly */ + if (tex->tex.target == PIPE_TEXTURE_1D) { + texstate->filter[0] &= ~R300_TX_WRAP_T_MASK; + texstate->filter[0] |= R300_TX_WRAP_T(R300_TX_CLAMP_TO_EDGE); + } + + if (tex->is_npot) { + /* NPOT textures don't support mip filter, unfortunately. + * This prevents incorrect rendering. */ + texstate->filter[0] &= ~R300_TX_MIN_FILTER_MIP_MASK; + } else { + /* determine min/max levels */ + /* the MAX_MIP level is the largest (finest) one */ + max_level = MIN2(sampler->max_lod, tex->tex.last_level); + min_level = MIN2(sampler->min_lod, max_level); + texstate->format[0] |= R300_TX_NUM_LEVELS(max_level); + texstate->filter[0] |= R300_TX_MAX_MIP_LEVEL(min_level); + } + + texstate->filter[0] |= i << 28; + + size += 16; + state->count = i+1; + } + } + + r300->textures_state.size = size; +} + void r300_update_derived_state(struct r300_context* r300) { - /* XXX */ - if (r300->dirty_state & - (R300_NEW_FRAGMENT_SHADER | R300_NEW_VERTEX_SHADER) || - r300->vertex_format_state.dirty || r300->rs_state.dirty) { + if (r300->rs_block_state.dirty || + r300->vertex_stream_state.dirty || /* XXX put updating this state out of this file */ + r300->rs_state.dirty) { /* XXX and remove this one (tcl_bypass dependency) */ r300_update_derived_shader_state(r300); } + if (r300->textures_state.dirty) { + r300_merge_textures_and_samplers(r300); + } + r300_update_ztop(r300); } diff --git a/src/gallium/drivers/r300/r300_state_inlines.h b/src/gallium/drivers/r300/r300_state_inlines.h index 0e1cb328d17..2f3a56e1fbc 100644 --- a/src/gallium/drivers/r300/r300_state_inlines.h +++ b/src/gallium/drivers/r300/r300_state_inlines.h @@ -384,8 +384,7 @@ r300_translate_vertex_data_type(enum pipe_format format) { desc = util_format_description(format); - if (desc->layout != UTIL_FORMAT_LAYOUT_ARITH && - desc->layout != UTIL_FORMAT_LAYOUT_ARRAY) { + if (desc->layout != UTIL_FORMAT_LAYOUT_PLAIN) { debug_printf("r300: Bad format %s in %s:%d\n", util_format_name(format), __FUNCTION__, __LINE__); assert(0); @@ -458,8 +457,7 @@ r300_translate_vertex_data_swizzle(enum pipe_format format) { assert(format); - if (desc->layout != UTIL_FORMAT_LAYOUT_ARITH && - desc->layout != UTIL_FORMAT_LAYOUT_ARRAY) { + if (desc->layout != UTIL_FORMAT_LAYOUT_PLAIN) { debug_printf("r300: Bad format %s in %s:%d\n", util_format_name(format), __FUNCTION__, __LINE__); return 0; diff --git a/src/gallium/drivers/r300/r300_state_invariant.c b/src/gallium/drivers/r300/r300_state_invariant.c index 97927acf1b4..4a2c68269b1 100644 --- a/src/gallium/drivers/r300/r300_state_invariant.c +++ b/src/gallium/drivers/r300/r300_state_invariant.c @@ -38,7 +38,8 @@ struct pipe_viewport_state r300_viewport_identity = { * * Note that eventually this should be empty, but it's useful for development * and general unduplication of code. */ -void r300_emit_invariant_state(struct r300_context* r300, void* state) +void r300_emit_invariant_state(struct r300_context* r300, + unsigned size, void* state) { struct r300_capabilities* caps = r300_screen(r300->context.screen)->caps; CS_LOCALS(r300); diff --git a/src/gallium/drivers/r300/r300_state_invariant.h b/src/gallium/drivers/r300/r300_state_invariant.h index 5d1a9636545..83d031c7fe9 100644 --- a/src/gallium/drivers/r300/r300_state_invariant.h +++ b/src/gallium/drivers/r300/r300_state_invariant.h @@ -25,6 +25,7 @@ struct r300_context; -void r300_emit_invariant_state(struct r300_context* r300, void* state); +void r300_emit_invariant_state(struct r300_context* r300, + unsigned size, void* state); #endif /* R300_STATE_INVARIANT_H */ diff --git a/src/gallium/drivers/r300/r300_texture.c b/src/gallium/drivers/r300/r300_texture.c index ed2be06254a..2246c75056c 100644 --- a/src/gallium/drivers/r300/r300_texture.c +++ b/src/gallium/drivers/r300/r300_texture.c @@ -70,6 +70,12 @@ static uint32_t r300_translate_texformat(enum pipe_format format) R300_TX_FORMAT_B_SHIFT, R300_TX_FORMAT_A_SHIFT }; + const uint32_t swizzle[4] = { + R300_TX_FORMAT_X, + R300_TX_FORMAT_Y, + R300_TX_FORMAT_Z, + R300_TX_FORMAT_W + }; const uint32_t sign_bit[4] = { R300_TX_FORMAT_SIGNED_X, R300_TX_FORMAT_SIGNED_Y, @@ -119,16 +125,16 @@ static uint32_t r300_translate_texformat(enum pipe_format format) switch (desc->swizzle[i]) { case UTIL_FORMAT_SWIZZLE_X: case UTIL_FORMAT_SWIZZLE_NONE: - result |= R300_TX_FORMAT_X << swizzle_shift[i]; + result |= swizzle[0] << swizzle_shift[i]; break; case UTIL_FORMAT_SWIZZLE_Y: - result |= R300_TX_FORMAT_Y << swizzle_shift[i]; + result |= swizzle[1] << swizzle_shift[i]; break; case UTIL_FORMAT_SWIZZLE_Z: - result |= R300_TX_FORMAT_Z << swizzle_shift[i]; + result |= swizzle[2] << swizzle_shift[i]; break; case UTIL_FORMAT_SWIZZLE_W: - result |= R300_TX_FORMAT_W << swizzle_shift[i]; + result |= swizzle[3] << swizzle_shift[i]; break; case UTIL_FORMAT_SWIZZLE_0: result |= R300_TX_FORMAT_ZERO << swizzle_shift[i]; @@ -142,7 +148,7 @@ static uint32_t r300_translate_texformat(enum pipe_format format) } /* Compressed formats. */ - if (desc->layout == UTIL_FORMAT_LAYOUT_DXT) { + if (desc->layout == UTIL_FORMAT_LAYOUT_COMPRESSED) { switch (format) { case PIPE_FORMAT_DXT1_RGB: case PIPE_FORMAT_DXT1_RGBA: @@ -499,7 +505,7 @@ boolean r300_is_sampler_format_supported(enum pipe_format format) static void r300_setup_texture_state(struct r300_screen* screen, struct r300_texture* tex) { - struct r300_texture_state* state = &tex->state; + struct r300_texture_format_state* state = &tex->state; struct pipe_texture *pt = &tex->tex; unsigned i; boolean is_r500 = screen->caps->is_r500; diff --git a/src/gallium/drivers/r300/r300_vs.c b/src/gallium/drivers/r300/r300_vs.c index a6786c321c6..60a04bbfeda 100644 --- a/src/gallium/drivers/r300/r300_vs.c +++ b/src/gallium/drivers/r300/r300_vs.c @@ -368,8 +368,8 @@ void r300_translate_vertex_shader(struct r300_context* r300, boolean r300_vertex_shader_setup_wpos(struct r300_context* r300) { - struct r300_vertex_shader* vs = r300->vs; - int tex_output = r300->vs->wpos_tex_output; + struct r300_vertex_shader* vs = r300->vs_state.state; + int tex_output = vs->wpos_tex_output; uint32_t tex_fmt = R300_INPUT_CNTL_TC0 << tex_output; uint32_t* hwfmt = vs->hwfmt; diff --git a/src/gallium/drivers/softpipe/sp_video_context.c b/src/gallium/drivers/softpipe/sp_video_context.c index 8e4867a904e..242aaac4665 100644 --- a/src/gallium/drivers/softpipe/sp_video_context.c +++ b/src/gallium/drivers/softpipe/sp_video_context.c @@ -25,10 +25,12 @@ * **************************************************************************/ +#include "util/u_inlines.h" +#include "util/u_memory.h" + #include "sp_video_context.h" -#include <util/u_inlines.h> -#include <util/u_memory.h> -#include "softpipe/sp_texture.h" +#include "sp_texture.h" + static void sp_mpeg12_destroy(struct pipe_video_context *vpipe) diff --git a/src/gallium/drivers/softpipe/sp_winsys.c b/src/gallium/drivers/softpipe/sp_winsys.c index f6598927d35..0a6245ed2cd 100644 --- a/src/gallium/drivers/softpipe/sp_winsys.c +++ b/src/gallium/drivers/softpipe/sp_winsys.c @@ -37,13 +37,13 @@ #include "util/u_simple_screen.h"/* port to just p_screen */ -#include "pipe/p_format.h" -#include "pipe/p_context.h" #include "util/u_format.h" #include "util/u_math.h" #include "util/u_memory.h" #include "util/u_inlines.h" -#include "softpipe/sp_winsys.h" +#include "pipe/p_format.h" +#include "pipe/p_context.h" +#include "sp_winsys.h" struct st_softpipe_buffer diff --git a/src/gallium/drivers/svga/svga_cmd.c b/src/gallium/drivers/svga/svga_cmd.c index a0da7d7e5d5..04307d17fe0 100644 --- a/src/gallium/drivers/svga/svga_cmd.c +++ b/src/gallium/drivers/svga/svga_cmd.c @@ -478,7 +478,8 @@ SVGA3D_BufferDMA(struct svga_winsys_context *swc, struct svga_winsys_surface *host, SVGA3dTransferType transfer, // IN uint32 size, // IN - uint32 offset, // IN + uint32 guest_offset, // IN + uint32 host_offset, // IN SVGA3dSurfaceDMAFlags flags) // IN { SVGA3dCmdSurfaceDMA *cmd; @@ -517,19 +518,19 @@ SVGA3D_BufferDMA(struct svga_winsys_context *swc, cmd->transfer = transfer; box = (SVGA3dCopyBox *)&cmd[1]; - box->x = offset; + box->x = host_offset; box->y = 0; box->z = 0; box->w = size; box->h = 1; box->d = 1; - box->srcx = offset; + box->srcx = guest_offset; box->srcy = 0; box->srcz = 0; pSuffix = (SVGA3dCmdSurfaceDMASuffix *)((uint8_t*)cmd + sizeof *cmd + sizeof *box); pSuffix->suffixSize = sizeof *pSuffix; - pSuffix->maximumOffset = offset + size; + pSuffix->maximumOffset = guest_offset + size; pSuffix->flags = flags; swc->commit(swc); diff --git a/src/gallium/drivers/svga/svga_cmd.h b/src/gallium/drivers/svga/svga_cmd.h index 80410547690..da9fc4355fa 100644 --- a/src/gallium/drivers/svga/svga_cmd.h +++ b/src/gallium/drivers/svga/svga_cmd.h @@ -111,7 +111,8 @@ SVGA3D_BufferDMA(struct svga_winsys_context *swc, struct svga_winsys_surface *host, SVGA3dTransferType transfer, uint32 size, - uint32 offset, + uint32 guest_offset, + uint32 host_offset, SVGA3dSurfaceDMAFlags flags); /* diff --git a/src/gallium/drivers/svga/svga_pipe_blend.c b/src/gallium/drivers/svga/svga_pipe_blend.c index b60117f090e..594eec7166e 100644 --- a/src/gallium/drivers/svga/svga_pipe_blend.c +++ b/src/gallium/drivers/svga/svga_pipe_blend.c @@ -92,6 +92,7 @@ svga_create_blend_state(struct pipe_context *pipe, if (templ->logicop_enable) { switch (templ->logicop_func) { case PIPE_LOGICOP_XOR: + case PIPE_LOGICOP_INVERT: blend->need_white_fragments = TRUE; blend->rt[i].blend_enable = TRUE; blend->rt[i].srcblend = SVGA3D_BLENDOP_ONE; @@ -125,12 +126,6 @@ svga_create_blend_state(struct pipe_context *pipe, blend->rt[i].dstblend = SVGA3D_BLENDOP_ONE; blend->rt[i].blendeq = SVGA3D_BLENDEQ_MAXIMUM; break; - case PIPE_LOGICOP_INVERT: - blend->rt[i].blend_enable = TRUE; - blend->rt[i].srcblend = SVGA3D_BLENDOP_INVSRCCOLOR; - blend->rt[i].dstblend = SVGA3D_BLENDOP_ZERO; - blend->rt[i].blendeq = SVGA3D_BLENDEQ_ADD; - break; case PIPE_LOGICOP_AND: /* Approximate with minimum - works for the 0 & anything case: */ blend->rt[i].blend_enable = TRUE; diff --git a/src/gallium/drivers/svga/svga_pipe_vertex.c b/src/gallium/drivers/svga/svga_pipe_vertex.c index ffc0f99565b..836b8441da2 100644 --- a/src/gallium/drivers/svga/svga_pipe_vertex.c +++ b/src/gallium/drivers/svga/svga_pipe_vertex.c @@ -49,7 +49,7 @@ static void svga_set_vertex_buffers(struct pipe_context *pipe, /* Adjust refcounts */ for (i = 0; i < count; i++) { pipe_buffer_reference(&svga->curr.vb[i].buffer, buffers[i].buffer); - if (svga_buffer(buffers[i].buffer)->user) + if (svga_buffer_is_user_buffer(buffers[i].buffer)) any_user_buffer = TRUE; } diff --git a/src/gallium/drivers/svga/svga_screen_buffer.c b/src/gallium/drivers/svga/svga_screen_buffer.c index c9e9bef5406..1ff6a3a5b31 100644 --- a/src/gallium/drivers/svga/svga_screen_buffer.c +++ b/src/gallium/drivers/svga/svga_screen_buffer.c @@ -83,7 +83,7 @@ svga_buffer_create_host_surface(struct svga_screen *ss, * as svga_screen_surface_create might have passed a recycled host * buffer. */ - sbuf->hw.flags.discard = TRUE; + sbuf->dma.flags.discard = TRUE; SVGA_DBG(DEBUG_DMA, " --> got sid %p sz %d (buffer)\n", sbuf->handle, sbuf->base.size); } @@ -109,10 +109,10 @@ svga_buffer_destroy_hw_storage(struct svga_screen *ss, struct svga_buffer *sbuf) struct svga_winsys_screen *sws = ss->sws; assert(!sbuf->map.count); - assert(sbuf->hw.buf); - if(sbuf->hw.buf) { - sws->buffer_destroy(sws, sbuf->hw.buf); - sbuf->hw.buf = NULL; + assert(sbuf->hwbuf); + if(sbuf->hwbuf) { + sws->buffer_destroy(sws, sbuf->hwbuf); + sbuf->hwbuf = NULL; } } @@ -151,16 +151,18 @@ static INLINE enum pipe_error svga_buffer_create_hw_storage(struct svga_screen *ss, struct svga_buffer *sbuf) { - if(!sbuf->hw.buf) { + assert(!sbuf->user); + + if(!sbuf->hwbuf) { unsigned alignment = sbuf->base.alignment; unsigned usage = 0; unsigned size = sbuf->base.size; - sbuf->hw.buf = svga_winsys_buffer_create(ss, alignment, usage, size); - if(!sbuf->hw.buf) + sbuf->hwbuf = svga_winsys_buffer_create(ss, alignment, usage, size); + if(!sbuf->hwbuf) return PIPE_ERROR_OUT_OF_MEMORY; - assert(!sbuf->needs_flush); + assert(!sbuf->dma.pending); } return PIPE_OK; @@ -175,12 +177,11 @@ svga_buffer_upload_command(struct svga_context *svga, struct svga_buffer *sbuf) { struct svga_winsys_context *swc = svga->swc; - struct svga_winsys_buffer *guest = sbuf->hw.buf; + struct svga_winsys_buffer *guest = sbuf->hwbuf; struct svga_winsys_surface *host = sbuf->handle; SVGA3dTransferType transfer = SVGA3D_WRITE_HOST_VRAM; - SVGA3dSurfaceDMAFlags flags = sbuf->hw.flags; SVGA3dCmdSurfaceDMA *cmd; - uint32 numBoxes = sbuf->hw.num_ranges; + uint32 numBoxes = sbuf->map.num_ranges; SVGA3dCopyBox *boxes; SVGA3dCmdSurfaceDMASuffix *pSuffix; unsigned region_flags; @@ -218,8 +219,8 @@ svga_buffer_upload_command(struct svga_context *svga, cmd->transfer = transfer; - sbuf->hw.boxes = (SVGA3dCopyBox *)&cmd[1]; - sbuf->hw.svga = svga; + sbuf->dma.boxes = (SVGA3dCopyBox *)&cmd[1]; + sbuf->dma.svga = svga; /* Increment reference count */ dummy = NULL; @@ -228,9 +229,11 @@ svga_buffer_upload_command(struct svga_context *svga, pSuffix = (SVGA3dCmdSurfaceDMASuffix *)((uint8_t*)cmd + sizeof *cmd + numBoxes * sizeof *boxes); pSuffix->suffixSize = sizeof *pSuffix; pSuffix->maximumOffset = sbuf->base.size; - pSuffix->flags = flags; + pSuffix->flags = sbuf->dma.flags; + + SVGA_FIFOCommitAll(swc); - swc->commit(swc); + sbuf->dma.flags.discard = FALSE; return PIPE_OK; } @@ -248,10 +251,10 @@ svga_buffer_upload_flush(struct svga_context *svga, unsigned i; assert(sbuf->handle); - assert(sbuf->hw.buf); - assert(sbuf->hw.num_ranges); - assert(sbuf->hw.svga == svga); - assert(sbuf->hw.boxes); + assert(sbuf->hwbuf); + assert(sbuf->map.num_ranges); + assert(sbuf->dma.svga == svga); + assert(sbuf->dma.boxes); /* * Patch the DMA command with the final copy box. @@ -259,36 +262,33 @@ svga_buffer_upload_flush(struct svga_context *svga, SVGA_DBG(DEBUG_DMA, "dma to sid %p\n", sbuf->handle); - boxes = sbuf->hw.boxes; - for(i = 0; i < sbuf->hw.num_ranges; ++i) { + boxes = sbuf->dma.boxes; + for(i = 0; i < sbuf->map.num_ranges; ++i) { SVGA_DBG(DEBUG_DMA, " bytes %u - %u\n", - sbuf->hw.ranges[i].start, sbuf->hw.ranges[i].end); + sbuf->map.ranges[i].start, sbuf->map.ranges[i].end); - boxes[i].x = sbuf->hw.ranges[i].start; + boxes[i].x = sbuf->map.ranges[i].start; boxes[i].y = 0; boxes[i].z = 0; - boxes[i].w = sbuf->hw.ranges[i].end - sbuf->hw.ranges[i].start; + boxes[i].w = sbuf->map.ranges[i].end - sbuf->map.ranges[i].start; boxes[i].h = 1; boxes[i].d = 1; - boxes[i].srcx = sbuf->hw.ranges[i].start; + boxes[i].srcx = sbuf->map.ranges[i].start; boxes[i].srcy = 0; boxes[i].srcz = 0; } - sbuf->hw.num_ranges = 0; - memset(&sbuf->hw.flags, 0, sizeof sbuf->hw.flags); + sbuf->map.num_ranges = 0; assert(sbuf->head.prev && sbuf->head.next); LIST_DEL(&sbuf->head); #ifdef DEBUG sbuf->head.next = sbuf->head.prev = NULL; #endif - sbuf->needs_flush = FALSE; - - sbuf->hw.svga = NULL; - sbuf->hw.boxes = NULL; + sbuf->dma.pending = FALSE; - sbuf->host_written = TRUE; + sbuf->dma.svga = NULL; + sbuf->dma.boxes = NULL; /* Decrement reference count */ pipe_reference(&(sbuf->base.reference), NULL); @@ -297,7 +297,7 @@ svga_buffer_upload_flush(struct svga_context *svga, /** - * Queue a DMA upload of a range of this buffer to the host. + * Note a dirty range. * * This function only notes the range down. It doesn't actually emit a DMA * upload command. That only happens when a context tries to refer to this @@ -306,15 +306,24 @@ svga_buffer_upload_flush(struct svga_context *svga, * We try to lump as many contiguous DMA transfers together as possible. */ static void -svga_buffer_upload_queue(struct svga_buffer *sbuf, - unsigned start, - unsigned end) +svga_buffer_add_range(struct svga_buffer *sbuf, + unsigned start, + unsigned end) { unsigned i; + unsigned nearest_range; + unsigned nearest_dist; - assert(sbuf->hw.buf); assert(end > start); + if (sbuf->map.num_ranges < SVGA_BUFFER_MAX_RANGES) { + nearest_range = sbuf->map.num_ranges; + nearest_dist = ~0; + } else { + nearest_range = SVGA_BUFFER_MAX_RANGES - 1; + nearest_dist = 0; + } + /* * Try to grow one of the ranges. * @@ -325,12 +334,34 @@ svga_buffer_upload_queue(struct svga_buffer *sbuf, * buffer should be flushed. */ - for(i = 0; i < sbuf->hw.num_ranges; ++i) { - if(start <= sbuf->hw.ranges[i].end && sbuf->hw.ranges[i].start <= end) { - sbuf->hw.ranges[i].start = MIN2(sbuf->hw.ranges[i].start, start); - sbuf->hw.ranges[i].end = MAX2(sbuf->hw.ranges[i].end, end); + for(i = 0; i < sbuf->map.num_ranges; ++i) { + int left_dist; + int right_dist; + int dist; + + left_dist = start - sbuf->map.ranges[i].end; + right_dist = sbuf->map.ranges[i].start - end; + dist = MAX2(left_dist, right_dist); + + if (dist <= 0) { + /* + * Ranges are contiguous or overlapping -- extend this one and return. + */ + + sbuf->map.ranges[i].start = MIN2(sbuf->map.ranges[i].start, start); + sbuf->map.ranges[i].end = MAX2(sbuf->map.ranges[i].end, end); return; } + else { + /* + * Discontiguous ranges -- keep track of the nearest range. + */ + + if (dist < nearest_dist) { + nearest_range = i; + nearest_dist = dist; + } + } } /* @@ -338,20 +369,34 @@ svga_buffer_upload_queue(struct svga_buffer *sbuf, * pending DMA upload and start clean. */ - if(sbuf->needs_flush) - svga_buffer_upload_flush(sbuf->hw.svga, sbuf); + if(sbuf->dma.pending) + svga_buffer_upload_flush(sbuf->dma.svga, sbuf); - assert(!sbuf->needs_flush); - assert(!sbuf->hw.svga); - assert(!sbuf->hw.boxes); + assert(!sbuf->dma.pending); + assert(!sbuf->dma.svga); + assert(!sbuf->dma.boxes); - /* - * Add a new range. - */ + if (sbuf->map.num_ranges < SVGA_BUFFER_MAX_RANGES) { + /* + * Add a new range. + */ + + sbuf->map.ranges[sbuf->map.num_ranges].start = start; + sbuf->map.ranges[sbuf->map.num_ranges].end = end; + ++sbuf->map.num_ranges; + } else { + /* + * Everything else failed, so just extend the nearest range. + * + * It is OK to do this because we always keep a local copy of the + * host buffer data, for SW TNL, and the host never modifies the buffer. + */ - sbuf->hw.ranges[sbuf->hw.num_ranges].start = start; - sbuf->hw.ranges[sbuf->hw.num_ranges].end = end; - ++sbuf->hw.num_ranges; + assert(nearest_range < SVGA_BUFFER_MAX_RANGES); + assert(nearest_range < sbuf->map.num_ranges); + sbuf->map.ranges[nearest_range].start = MIN2(sbuf->map.ranges[nearest_range].start, start); + sbuf->map.ranges[nearest_range].end = MAX2(sbuf->map.ranges[nearest_range].end, end); + } } @@ -366,55 +411,30 @@ svga_buffer_map_range( struct pipe_screen *screen, struct svga_buffer *sbuf = svga_buffer( buf ); void *map; - if(sbuf->swbuf) { + if (!sbuf->swbuf && !sbuf->hwbuf) { + if (svga_buffer_create_hw_storage(ss, sbuf) != PIPE_OK) { + /* + * We can't create a hardware buffer big enough, so create a malloc + * buffer instead. + */ + + debug_printf("%s: failed to allocate %u KB of DMA, splitting DMA transfers\n", + __FUNCTION__, + (sbuf->base.size + 1023)/1024); + + sbuf->swbuf = align_malloc(sbuf->base.size, sbuf->base.alignment); + } + } + + if (sbuf->swbuf) { /* User/malloc buffer */ map = sbuf->swbuf; } + else if (sbuf->hwbuf) { + map = sws->buffer_map(sws, sbuf->hwbuf, usage); + } else { - if(!sbuf->hw.buf) { - if(svga_buffer_create_hw_storage(ss, sbuf) != PIPE_OK) - return NULL; - - /* Populate the hardware storage if the host surface pre-existed */ - if(sbuf->host_written) { - SVGA3dSurfaceDMAFlags flags; - enum pipe_error ret; - struct pipe_fence_handle *fence = NULL; - - assert(sbuf->handle); - - SVGA_DBG(DEBUG_DMA|DEBUG_PERF, "dma from sid %p (buffer), bytes %u - %u\n", - sbuf->handle, 0, sbuf->base.size); - - memset(&flags, 0, sizeof flags); - - ret = SVGA3D_BufferDMA(ss->swc, - sbuf->hw.buf, - sbuf->handle, - SVGA3D_READ_HOST_VRAM, - sbuf->base.size, - 0, - flags); - if(ret != PIPE_OK) { - ss->swc->flush(ss->swc, NULL); - - ret = SVGA3D_BufferDMA(ss->swc, - sbuf->hw.buf, - sbuf->handle, - SVGA3D_READ_HOST_VRAM, - sbuf->base.size, - 0, - flags); - assert(ret == PIPE_OK); - } - - ss->swc->flush(ss->swc, &fence); - sws->fence_finish(sws, fence, 0); - sws->fence_reference(sws, &fence, NULL); - } - } - - map = sws->buffer_map(sws, sbuf->hw.buf, usage); + map = NULL; } if(map) { @@ -447,8 +467,7 @@ svga_buffer_flush_mapped_range( struct pipe_screen *screen, assert(sbuf->map.writing); if(sbuf->map.writing) { assert(sbuf->map.flush_explicit); - if(sbuf->hw.buf) - svga_buffer_upload_queue(sbuf, offset, offset + length); + svga_buffer_add_range(sbuf, offset, offset + length); } pipe_mutex_unlock(ss->swc_mutex); } @@ -467,16 +486,15 @@ svga_buffer_unmap( struct pipe_screen *screen, if(sbuf->map.count) --sbuf->map.count; - if(sbuf->hw.buf) - sws->buffer_unmap(sws, sbuf->hw.buf); + if(sbuf->hwbuf) + sws->buffer_unmap(sws, sbuf->hwbuf); if(sbuf->map.writing) { if(!sbuf->map.flush_explicit) { /* No mapped range was flushed -- flush the whole buffer */ SVGA_DBG(DEBUG_DMA, "flushing the whole buffer\n"); - if(sbuf->hw.buf) - svga_buffer_upload_queue(sbuf, 0, sbuf->base.size); + svga_buffer_add_range(sbuf, 0, sbuf->base.size); } sbuf->map.writing = FALSE; @@ -494,12 +512,15 @@ svga_buffer_destroy( struct pipe_buffer *buf ) assert(!p_atomic_read(&buf->reference.count)); - assert(!sbuf->needs_flush); + assert(!sbuf->dma.pending); if(sbuf->handle) svga_buffer_destroy_host_surface(ss, sbuf); - if(sbuf->hw.buf) + if(sbuf->uploaded.buffer) + pipe_buffer_reference(&sbuf->uploaded.buffer, NULL); + + if(sbuf->hwbuf) svga_buffer_destroy_hw_storage(ss, sbuf); if(sbuf->swbuf && !sbuf->user) @@ -596,13 +617,14 @@ svga_screen_init_buffer_functions(struct pipe_screen *screen) } -/** - * Copy the contents of the user buffer / malloc buffer to a hardware buffer. +/** + * Copy the contents of the malloc buffer to a hardware buffer. */ static INLINE enum pipe_error svga_buffer_update_hw(struct svga_screen *ss, struct svga_buffer *sbuf) { - if(!sbuf->hw.buf) { + assert(!sbuf->user); + if(!sbuf->hwbuf) { enum pipe_error ret; void *map; @@ -611,20 +633,20 @@ svga_buffer_update_hw(struct svga_screen *ss, struct svga_buffer *sbuf) return PIPE_ERROR; ret = svga_buffer_create_hw_storage(ss, sbuf); - assert(ret == PIPE_OK); if(ret != PIPE_OK) return ret; pipe_mutex_lock(ss->swc_mutex); - map = ss->sws->buffer_map(ss->sws, sbuf->hw.buf, PIPE_BUFFER_USAGE_CPU_WRITE); + map = ss->sws->buffer_map(ss->sws, sbuf->hwbuf, PIPE_BUFFER_USAGE_CPU_WRITE); assert(map); if(!map) { pipe_mutex_unlock(ss->swc_mutex); - return PIPE_ERROR_OUT_OF_MEMORY; + svga_buffer_destroy_hw_storage(ss, sbuf); + return PIPE_ERROR; } memcpy(map, sbuf->swbuf, sbuf->base.size); - ss->sws->buffer_unmap(ss->sws, sbuf->hw.buf); + ss->sws->buffer_unmap(ss->sws, sbuf->hwbuf); /* This user/malloc buffer is now indistinguishable from a gpu buffer */ assert(!sbuf->map.count); @@ -636,10 +658,89 @@ svga_buffer_update_hw(struct svga_screen *ss, struct svga_buffer *sbuf) sbuf->swbuf = NULL; } - svga_buffer_upload_queue(sbuf, 0, sbuf->base.size); + pipe_mutex_unlock(ss->swc_mutex); } - pipe_mutex_unlock(ss->swc_mutex); + return PIPE_OK; +} + + +/** + * Upload the buffer to the host in a piecewise fashion. + * + * Used when the buffer is too big to fit in the GMR aperture. + */ +static INLINE enum pipe_error +svga_buffer_upload_piecewise(struct svga_screen *ss, + struct svga_context *svga, + struct svga_buffer *sbuf) +{ + struct svga_winsys_screen *sws = ss->sws; + const unsigned alignment = sizeof(void *); + const unsigned usage = 0; + unsigned i; + + assert(sbuf->map.num_ranges); + assert(!sbuf->dma.pending); + + SVGA_DBG(DEBUG_DMA, "dma to sid %p\n", sbuf->handle); + + for (i = 0; i < sbuf->map.num_ranges; ++i) { + struct svga_buffer_range *range = &sbuf->map.ranges[i]; + unsigned offset = range->start; + unsigned size = range->end - range->start; + + while (offset < range->end) { + struct svga_winsys_buffer *hwbuf; + uint8_t *map; + enum pipe_error ret; + + if (offset + size > range->end) + size = range->end - offset; + + hwbuf = svga_winsys_buffer_create(ss, alignment, usage, size); + while (!hwbuf) { + size /= 2; + if (!size) + return PIPE_ERROR_OUT_OF_MEMORY; + hwbuf = svga_winsys_buffer_create(ss, alignment, usage, size); + } + + SVGA_DBG(DEBUG_DMA, " bytes %u - %u\n", + offset, offset + size); + + map = sws->buffer_map(sws, hwbuf, + PIPE_BUFFER_USAGE_CPU_WRITE | + PIPE_BUFFER_USAGE_DISCARD); + assert(map); + if (map) { + memcpy(map, sbuf->swbuf, size); + sws->buffer_unmap(sws, hwbuf); + } + + ret = SVGA3D_BufferDMA(svga->swc, + hwbuf, sbuf->handle, + SVGA3D_WRITE_HOST_VRAM, + size, 0, offset, sbuf->dma.flags); + if(ret != PIPE_OK) { + svga_context_flush(svga, NULL); + ret = SVGA3D_BufferDMA(svga->swc, + hwbuf, sbuf->handle, + SVGA3D_WRITE_HOST_VRAM, + size, 0, offset, sbuf->dma.flags); + assert(ret == PIPE_OK); + } + + sbuf->dma.flags.discard = FALSE; + + sws->buffer_destroy(sws, hwbuf); + + offset += size; + } + } + + sbuf->map.num_ranges = 0; + return PIPE_OK; } @@ -659,34 +760,74 @@ svga_buffer_handle(struct svga_context *svga, sbuf = svga_buffer(buf); assert(!sbuf->map.count); + assert(!sbuf->user); if(!sbuf->handle) { ret = svga_buffer_create_host_surface(ss, sbuf); if(ret != PIPE_OK) return NULL; - - ret = svga_buffer_update_hw(ss, sbuf); - if(ret != PIPE_OK) - return NULL; } - if(!sbuf->needs_flush && sbuf->hw.num_ranges) { - /* Queue the buffer for flushing */ - ret = svga_buffer_upload_command(svga, sbuf); - if(ret != PIPE_OK) - /* XXX: Should probably have a richer return value */ - return NULL; - - assert(sbuf->hw.svga == svga); + assert(sbuf->handle); + + if (sbuf->map.num_ranges) { + if (!sbuf->dma.pending) { + /* + * No pending DMA upload yet, so insert a DMA upload command now. + */ + + /* + * Migrate the data from swbuf -> hwbuf if necessary. + */ + ret = svga_buffer_update_hw(ss, sbuf); + if (ret == PIPE_OK) { + /* + * Queue a dma command. + */ + + ret = svga_buffer_upload_command(svga, sbuf); + if (ret == PIPE_ERROR_OUT_OF_MEMORY) { + svga_context_flush(svga, NULL); + ret = svga_buffer_upload_command(svga, sbuf); + assert(ret == PIPE_OK); + } + if (ret == PIPE_OK) { + sbuf->dma.pending = TRUE; + assert(!sbuf->head.prev && !sbuf->head.next); + LIST_ADDTAIL(&sbuf->head, &svga->dirty_buffers); + } + } + else if (ret == PIPE_ERROR_OUT_OF_MEMORY) { + /* + * The buffer is too big to fit in the GMR aperture, so break it in + * smaller pieces. + */ + ret = svga_buffer_upload_piecewise(ss, svga, sbuf); + } - sbuf->needs_flush = TRUE; - assert(!sbuf->head.prev && !sbuf->head.next); - LIST_ADDTAIL(&sbuf->head, &svga->dirty_buffers); + if (ret != PIPE_OK) { + /* + * Something unexpected happened above. There is very little that + * we can do other than proceeding while ignoring the dirty ranges. + */ + assert(0); + sbuf->map.num_ranges = 0; + } + } + else { + /* + * There a pending dma already. Make sure it is from this context. + */ + assert(sbuf->dma.svga == svga); + } } + assert(!sbuf->map.num_ranges || sbuf->dma.pending); + return sbuf->handle; } + struct pipe_buffer * svga_screen_buffer_wrap_surface(struct pipe_screen *screen, enum SVGA3dSurfaceFormat format, @@ -739,7 +880,7 @@ svga_context_flush_buffers(struct svga_context *svga) sbuf = LIST_ENTRY(struct svga_buffer, curr, head); assert(p_atomic_read(&sbuf->base.reference.count) != 0); - assert(sbuf->needs_flush); + assert(sbuf->dma.pending); svga_buffer_upload_flush(svga, sbuf); diff --git a/src/gallium/drivers/svga/svga_screen_buffer.h b/src/gallium/drivers/svga/svga_screen_buffer.h index 448ac107c7f..8c862fa62d6 100644 --- a/src/gallium/drivers/svga/svga_screen_buffer.h +++ b/src/gallium/drivers/svga/svga_screen_buffer.h @@ -57,35 +57,6 @@ struct svga_buffer_range /** - * Describe a - * - * This holds the information to emit a SVGA3dCmdSurfaceDMA. - */ -struct svga_buffer_upload -{ - /** - * Guest memory region. - */ - struct svga_winsys_buffer *buf; - - struct svga_buffer_range ranges[SVGA_BUFFER_MAX_RANGES]; - unsigned num_ranges; - - SVGA3dSurfaceDMAFlags flags; - - /** - * Pointer to the DMA copy box *inside* the command buffer. - */ - SVGA3dCopyBox *boxes; - - /** - * Context that has the pending DMA to this buffer. - */ - struct svga_context *svga; -}; - - -/** * SVGA pipe buffer. */ struct svga_buffer @@ -111,14 +82,6 @@ struct svga_buffer boolean user; /** - * DMA'ble memory. - * - * A piece of GMR memory. It is created when mapping the buffer, and will be - * used to upload/download vertex data from the host. - */ - struct svga_buffer_upload hw; - - /** * Creation key for the host surface handle. * * This structure describes all the host surface characteristics so that it @@ -134,19 +97,94 @@ struct svga_buffer * trying to bind */ struct svga_winsys_surface *handle; - + /** - * Whether the host has been ever written. + * Information about ongoing and past map operations. */ - boolean host_written; - struct { + /** + * Number of concurrent mappings. + * + * XXX: It is impossible to guarantee concurrent maps work in all + * circumstances -- pipe_buffers really need transfer objects too. + */ unsigned count; + + /** + * Whether this buffer is currently mapped for writing. + */ boolean writing; + + /** + * Whether the application will tell us explicity which ranges it touched + * or not. + */ boolean flush_explicit; + + /** + * Dirty ranges. + * + * Ranges that were touched by the application and need to be uploaded to + * the host. + * + * This information will be copied into dma.boxes, when emiting the + * SVGA3dCmdSurfaceDMA command. + */ + struct svga_buffer_range ranges[SVGA_BUFFER_MAX_RANGES]; + unsigned num_ranges; } map; - - boolean needs_flush; + + /** + * Information about uploaded version of user buffers. + */ + struct { + struct pipe_buffer *buffer; + + /** + * We combine multiple user buffers into the same hardware buffer. This + * is the relative offset within that buffer. + */ + unsigned offset; + } uploaded; + + /** + * DMA'ble memory. + * + * A piece of GMR memory, with the same size of the buffer. It is created + * when mapping the buffer, and will be used to upload vertex data to the + * host. + */ + struct svga_winsys_buffer *hwbuf; + + /** + * Information about pending DMA uploads. + * + */ + struct { + /** + * Whether this buffer has an unfinished DMA upload command. + * + * If not set then the rest of the information is null. + */ + boolean pending; + + SVGA3dSurfaceDMAFlags flags; + + /** + * Pointer to the DMA copy box *inside* the command buffer. + */ + SVGA3dCopyBox *boxes; + + /** + * Context that has the pending DMA to this buffer. + */ + struct svga_context *svga; + } dma; + + /** + * Linked list head, used to gather all buffers with pending dma uploads on + * a context. It is only valid if the dma.pending is set above. + */ struct list_head head; }; @@ -176,6 +214,16 @@ svga_buffer_is_user_buffer( struct pipe_buffer *buffer ) void svga_screen_init_buffer_functions(struct pipe_screen *screen); + +/** + * Get the host surface handle for this buffer. + * + * This will ensure the host surface is updated, issuing DMAs as needed. + * + * NOTE: This may insert new commands in the context, so it *must* be called + * before reserving command buffer space. And, in order to insert commands + * it may need to call svga_context_flush(). + */ struct svga_winsys_surface * svga_buffer_handle(struct svga_context *svga, struct pipe_buffer *buf); diff --git a/src/gallium/drivers/svga/svga_state_vdecl.c b/src/gallium/drivers/svga/svga_state_vdecl.c index d1066ce13b0..ded903170b5 100644 --- a/src/gallium/drivers/svga/svga_state_vdecl.c +++ b/src/gallium/drivers/svga/svga_state_vdecl.c @@ -54,33 +54,30 @@ upload_user_buffers( struct svga_context *svga ) { if (svga_buffer_is_user_buffer(svga->curr.vb[i].buffer)) { - struct pipe_buffer *upload_buffer = NULL; - unsigned offset = /*svga->curr.vb[i].buffer_offset*/ 0; - unsigned size = svga->curr.vb[i].buffer->size /*- offset*/; - unsigned upload_offset; - - ret = u_upload_buffer( svga->upload_vb, - offset, - size, - svga->curr.vb[i].buffer, - &upload_offset, - &upload_buffer ); - if (ret) - return ret; - - if (0) - debug_printf("%s: %d: orig buf %p upl buf %p ofs %d sz %d\n", - __FUNCTION__, - i, - svga->curr.vb[i].buffer, - upload_buffer, upload_offset, size); - - /* Make sure we release the old buffer and end up with the - * correct refcount on the uploaded buffer. - */ - pipe_buffer_reference( &svga->curr.vb[i].buffer, NULL ); - svga->curr.vb[i].buffer = upload_buffer; - svga->curr.vb[i].buffer_offset = upload_offset; + struct svga_buffer *buffer = svga_buffer(svga->curr.vb[i].buffer); + + if (!buffer->uploaded.buffer) { + ret = u_upload_buffer( svga->upload_vb, + 0, + buffer->base.size, + &buffer->base, + &buffer->uploaded.offset, + &buffer->uploaded.buffer ); + if (ret) + return ret; + + if (0) + debug_printf("%s: %d: orig buf %p upl buf %p ofs %d sz %d\n", + __FUNCTION__, + i, + buffer, + buffer->uploaded.buffer, + buffer->uploaded.offset, + buffer->base.size); + } + + pipe_buffer_reference( &svga->curr.vb[i].buffer, buffer->uploaded.buffer ); + svga->curr.vb[i].buffer_offset = buffer->uploaded.offset; } } diff --git a/src/gallium/drivers/svga/svga_tgsi_emit.h b/src/gallium/drivers/svga/svga_tgsi_emit.h index e8f75485d55..48eced2ecea 100644 --- a/src/gallium/drivers/svga/svga_tgsi_emit.h +++ b/src/gallium/drivers/svga/svga_tgsi_emit.h @@ -138,6 +138,7 @@ static INLINE boolean emit_dst( struct svga_shader_emitter *emit, SVGA3dShaderDestToken dest ) { assert(dest.reserved0); + assert(dest.mask); return svga_shader_emit_dword( emit, dest.value ); } @@ -267,6 +268,7 @@ static INLINE SVGA3dShaderDestToken writemask( SVGA3dShaderDestToken dest, unsigned mask ) { + assert(dest.mask & mask); dest.mask &= mask; return dest; } diff --git a/src/gallium/drivers/svga/svga_tgsi_insn.c b/src/gallium/drivers/svga/svga_tgsi_insn.c index 87aed39f78f..3d4f56a67bd 100644 --- a/src/gallium/drivers/svga/svga_tgsi_insn.c +++ b/src/gallium/drivers/svga/svga_tgsi_insn.c @@ -112,6 +112,7 @@ translate_dst_register( struct svga_shader_emitter *emit, } dest.mask = reg->Register.WriteMask; + assert(dest.mask); if (insn->Instruction.Saturate) dest.dstMod = SVGA3DDSTMOD_SATURATE; @@ -1410,34 +1411,42 @@ static boolean emit_tex(struct svga_shader_emitter *emit, if (compare) { - SVGA3dShaderDestToken src0_zdivw = get_temp( emit ); - struct src_register tex_src_x = scalar(src(tex_result), TGSI_SWIZZLE_Y); - struct src_register one = - scalar( get_zero_immediate( emit ), TGSI_SWIZZLE_W ); - - /* Divide texcoord R by Q */ - if (!submit_op1( emit, inst_token( SVGA3DOP_RCP ), - src0_zdivw, - scalar(src0, TGSI_SWIZZLE_W) )) - return FALSE; + if (dst.mask & TGSI_WRITEMASK_XYZ) { + SVGA3dShaderDestToken src0_zdivw = get_temp( emit ); + struct src_register tex_src_x = scalar(src(tex_result), TGSI_SWIZZLE_Y); + + /* Divide texcoord R by Q */ + if (!submit_op1( emit, inst_token( SVGA3DOP_RCP ), + writemask(src0_zdivw, TGSI_WRITEMASK_X), + scalar(src0, TGSI_SWIZZLE_W) )) + return FALSE; - if (!submit_op2( emit, inst_token( SVGA3DOP_MUL ), - src0_zdivw, - scalar(src0, TGSI_SWIZZLE_Z), - src(src0_zdivw) )) - return FALSE; + if (!submit_op2( emit, inst_token( SVGA3DOP_MUL ), + writemask(src0_zdivw, TGSI_WRITEMASK_X), + scalar(src0, TGSI_SWIZZLE_Z), + scalar(src(src0_zdivw), TGSI_SWIZZLE_X) )) + return FALSE; - if (!emit_select( - emit, - emit->key.fkey.tex[src1.base.num].compare_func, - dst, - src(src0_zdivw), - tex_src_x)) - return FALSE; + if (!emit_select( + emit, + emit->key.fkey.tex[src1.base.num].compare_func, + writemask( dst, TGSI_WRITEMASK_XYZ ), + scalar(src(src0_zdivw), TGSI_SWIZZLE_X), + tex_src_x)) + return FALSE; + } - return submit_op1( emit, inst_token( SVGA3DOP_MOV ), - writemask( dst, TGSI_WRITEMASK_W), - one ); + if (dst.mask & TGSI_WRITEMASK_W) { + struct src_register one = + scalar( get_zero_immediate( emit ), TGSI_SWIZZLE_W ); + + if (!submit_op1( emit, inst_token( SVGA3DOP_MOV ), + writemask( dst, TGSI_WRITEMASK_W ), + one )) + return FALSE; + } + + return TRUE; } else if (!emit->use_sm30 && dst.mask != TGSI_WRITEMASK_XYZW) { @@ -1827,13 +1836,13 @@ static boolean emit_exp(struct svga_shader_emitter *emit, */ if (dst.mask & TGSI_WRITEMASK_X) { if (!submit_op2( emit, inst_token( SVGA3DOP_ADD ), - writemask( dst, dst.mask & TGSI_WRITEMASK_X ), + writemask( dst, TGSI_WRITEMASK_X ), src0, scalar( negate( src( fraction ) ), TGSI_SWIZZLE_Y ) ) ) return FALSE; if (!submit_op1( emit, inst_token( SVGA3DOP_EXP ), - writemask( dst, dst.mask & TGSI_WRITEMASK_X ), + writemask( dst, TGSI_WRITEMASK_X ), scalar( src( dst ), TGSI_SWIZZLE_X ) ) ) return FALSE; @@ -1845,7 +1854,7 @@ static boolean emit_exp(struct svga_shader_emitter *emit, */ if (dst.mask & TGSI_WRITEMASK_Z) { if (!submit_op1( emit, inst_token( SVGA3DOP_EXPP ), - writemask( dst, dst.mask & TGSI_WRITEMASK_Z ), + writemask( dst, TGSI_WRITEMASK_Z ), src0 ) ) return FALSE; } diff --git a/src/gallium/drivers/trace/tr_drm.c b/src/gallium/drivers/trace/tr_drm.c index b8adde77f10..2b4915003e2 100644 --- a/src/gallium/drivers/trace/tr_drm.c +++ b/src/gallium/drivers/trace/tr_drm.c @@ -28,11 +28,11 @@ #include "state_tracker/drm_api.h" #include "util/u_memory.h" -#include "trace/tr_drm.h" -#include "trace/tr_screen.h" -#include "trace/tr_context.h" -#include "trace/tr_buffer.h" -#include "trace/tr_texture.h" +#include "tr_drm.h" +#include "tr_screen.h" +#include "tr_context.h" +#include "tr_buffer.h" +#include "tr_texture.h" struct trace_drm_api { diff --git a/src/gallium/include/pipe/p_screen.h b/src/gallium/include/pipe/p_screen.h index 48625bf3127..e4a92228093 100644 --- a/src/gallium/include/pipe/p_screen.h +++ b/src/gallium/include/pipe/p_screen.h @@ -121,7 +121,9 @@ struct pipe_screen { void (*texture_destroy)(struct pipe_texture *pt); - /** Get a surface which is a "view" into a texture */ + /** Get a 2D surface which is a "view" into a texture + * \param usage bitmaks of PIPE_BUFFER_USAGE_* read/write flags + */ struct pipe_surface *(*get_tex_surface)(struct pipe_screen *, struct pipe_texture *texture, unsigned face, unsigned level, diff --git a/src/gallium/state_trackers/dri/dri_drawable.c b/src/gallium/state_trackers/dri/dri_drawable.c index 8843e087a81..4809b9090d5 100644 --- a/src/gallium/state_trackers/dri/dri_drawable.c +++ b/src/gallium/state_trackers/dri/dri_drawable.c @@ -134,12 +134,13 @@ dri_get_buffers(__DRIdrawable * dPriv) if ((dri_screen->dri2.loader && (dri_screen->dri2.loader->base.version > 2) - && (dri_screen->dri2.loader->getBuffersWithFormat != NULL))) + && (dri_screen->dri2.loader->getBuffersWithFormat != NULL))) { buffers = (*dri_screen->dri2.loader->getBuffersWithFormat) (dri_drawable, &dri_drawable->w, &dri_drawable->h, drawable->attachments, drawable->num_attachments, &count, dri_drawable->loaderPrivate); - else + } else { + assert(dri_screen->dri2.loader); buffers = (*dri_screen->dri2.loader->getBuffers) (dri_drawable, &dri_drawable->w, &dri_drawable->h, @@ -148,6 +149,7 @@ dri_get_buffers(__DRIdrawable * dPriv) num_attachments, &count, dri_drawable-> loaderPrivate); + } if (buffers == NULL) { return; diff --git a/src/gallium/state_trackers/egl/common/egl_g3d.c b/src/gallium/state_trackers/egl/common/egl_g3d.c index d769d253ac5..086e644e211 100644 --- a/src/gallium/state_trackers/egl/common/egl_g3d.c +++ b/src/gallium/state_trackers/egl/common/egl_g3d.c @@ -687,131 +687,143 @@ egl_g3d_destroy_context(_EGLDriver *drv, _EGLDisplay *dpy, _EGLContext *ctx) return EGL_TRUE; } -static EGLBoolean -init_surface_geometry(_EGLSurface *surf) -{ - struct egl_g3d_surface *gsurf = egl_g3d_surface(surf); - - return gsurf->native->validate(gsurf->native, 0x0, - &gsurf->sequence_number, NULL, - &gsurf->base.Width, &gsurf->base.Height); -} +struct egl_g3d_create_surface_arg { + EGLint type; + union { + EGLNativeWindowType win; + EGLNativePixmapType pix; + } u; +}; static _EGLSurface * -egl_g3d_create_window_surface(_EGLDriver *drv, _EGLDisplay *dpy, - _EGLConfig *conf, EGLNativeWindowType win, - const EGLint *attribs) +egl_g3d_create_surface(_EGLDriver *drv, _EGLDisplay *dpy, _EGLConfig *conf, + struct egl_g3d_create_surface_arg *arg, + const EGLint *attribs) { struct egl_g3d_display *gdpy = egl_g3d_display(dpy); struct egl_g3d_config *gconf = egl_g3d_config(conf); struct egl_g3d_surface *gsurf; + struct native_surface *nsurf; + const char *err; + + switch (arg->type) { + case EGL_WINDOW_BIT: + err = "eglCreateWindowSurface"; + break; + case EGL_PIXMAP_BIT: + err = "eglCreatePixmapSurface"; + break; + case EGL_PBUFFER_BIT: + err = "eglCreatePBufferSurface"; + break; +#ifdef EGL_MESA_screen_surface + case EGL_SCREEN_BIT_MESA: + err = "eglCreateScreenSurface"; + break; +#endif + default: + err = "eglCreateUnknownSurface"; + break; + } gsurf = CALLOC_STRUCT(egl_g3d_surface); if (!gsurf) { - _eglError(EGL_BAD_ALLOC, "eglCreateWindowSurface"); + _eglError(EGL_BAD_ALLOC, err); return NULL; } - if (!_eglInitSurface(&gsurf->base, dpy, EGL_WINDOW_BIT, conf, attribs)) { + if (!_eglInitSurface(&gsurf->base, dpy, arg->type, conf, attribs)) { free(gsurf); return NULL; } - gsurf->native = - gdpy->native->create_window_surface(gdpy->native, win, gconf->native); - if (!gsurf->native) { + /* create the native surface */ + switch (arg->type) { + case EGL_WINDOW_BIT: + nsurf = gdpy->native->create_window_surface(gdpy->native, + arg->u.win, gconf->native); + break; + case EGL_PIXMAP_BIT: + nsurf = gdpy->native->create_pixmap_surface(gdpy->native, + arg->u.pix, gconf->native); + break; + case EGL_PBUFFER_BIT: + nsurf = gdpy->native->create_pbuffer_surface(gdpy->native, + gconf->native, gsurf->base.Width, gsurf->base.Height); + break; +#ifdef EGL_MESA_screen_surface + case EGL_SCREEN_BIT_MESA: + /* prefer back buffer (move to _eglInitSurface?) */ + gsurf->base.RenderBuffer = EGL_BACK_BUFFER; + nsurf = gdpy->native->modeset->create_scanout_surface(gdpy->native, + gconf->native, gsurf->base.Width, gsurf->base.Height); + break; +#endif + default: + nsurf = NULL; + break; + } + + if (!nsurf) { free(gsurf); return NULL; } - - if (!init_surface_geometry(&gsurf->base)) { - gsurf->native->destroy(gsurf->native); + /* initialize the geometry */ + if (!nsurf->validate(nsurf, 0x0, &gsurf->sequence_number, NULL, + &gsurf->base.Width, &gsurf->base.Height)) { + nsurf->destroy(nsurf); free(gsurf); return NULL; } - gsurf->render_att = (gsurf->base.RenderBuffer == EGL_SINGLE_BUFFER || - !gconf->native->mode.doubleBufferMode) ? + gsurf->native = nsurf; + + gsurf->render_att = (gsurf->base.RenderBuffer == EGL_SINGLE_BUFFER) ? NATIVE_ATTACHMENT_FRONT_LEFT : NATIVE_ATTACHMENT_BACK_LEFT; + if (!gconf->native->mode.doubleBufferMode) + gsurf->render_att = NATIVE_ATTACHMENT_FRONT_LEFT; return &gsurf->base; } static _EGLSurface * -egl_g3d_create_pixmap_surface(_EGLDriver *drv, _EGLDisplay *dpy, - _EGLConfig *conf, EGLNativePixmapType pix, +egl_g3d_create_window_surface(_EGLDriver *drv, _EGLDisplay *dpy, + _EGLConfig *conf, EGLNativeWindowType win, const EGLint *attribs) { - struct egl_g3d_display *gdpy = egl_g3d_display(dpy); - struct egl_g3d_config *gconf = egl_g3d_config(conf); - struct egl_g3d_surface *gsurf; - - gsurf = CALLOC_STRUCT(egl_g3d_surface); - if (!gsurf) { - _eglError(EGL_BAD_ALLOC, "eglCreatePixmapSurface"); - return NULL; - } + struct egl_g3d_create_surface_arg arg; - if (!_eglInitSurface(&gsurf->base, dpy, EGL_PIXMAP_BIT, conf, attribs)) { - free(gsurf); - return NULL; - } + memset(&arg, 0, sizeof(arg)); + arg.type = EGL_WINDOW_BIT; + arg.u.win = win; - gsurf->native = - gdpy->native->create_pixmap_surface(gdpy->native, pix, gconf->native); - if (!gsurf->native) { - free(gsurf); - return NULL; - } + return egl_g3d_create_surface(drv, dpy, conf, &arg, attribs); +} - if (!init_surface_geometry(&gsurf->base)) { - gsurf->native->destroy(gsurf->native); - free(gsurf); - return NULL; - } +static _EGLSurface * +egl_g3d_create_pixmap_surface(_EGLDriver *drv, _EGLDisplay *dpy, + _EGLConfig *conf, EGLNativePixmapType pix, + const EGLint *attribs) +{ + struct egl_g3d_create_surface_arg arg; - gsurf->render_att = NATIVE_ATTACHMENT_FRONT_LEFT; + memset(&arg, 0, sizeof(arg)); + arg.type = EGL_PIXMAP_BIT; + arg.u.pix = pix; - return &gsurf->base; + return egl_g3d_create_surface(drv, dpy, conf, &arg, attribs); } static _EGLSurface * egl_g3d_create_pbuffer_surface(_EGLDriver *drv, _EGLDisplay *dpy, _EGLConfig *conf, const EGLint *attribs) { - struct egl_g3d_display *gdpy = egl_g3d_display(dpy); - struct egl_g3d_config *gconf = egl_g3d_config(conf); - struct egl_g3d_surface *gsurf; - - gsurf = CALLOC_STRUCT(egl_g3d_surface); - if (!gsurf) { - _eglError(EGL_BAD_ALLOC, "eglCreatePbufferSurface"); - return NULL; - } + struct egl_g3d_create_surface_arg arg; - if (!_eglInitSurface(&gsurf->base, dpy, EGL_PBUFFER_BIT, conf, attribs)) { - free(gsurf); - return NULL; - } - - gsurf->native = - gdpy->native->create_pbuffer_surface(gdpy->native, gconf->native, - gsurf->base.Width, gsurf->base.Height); - if (!gsurf->native) { - free(gsurf); - return NULL; - } - - if (!init_surface_geometry(&gsurf->base)) { - gsurf->native->destroy(gsurf->native); - free(gsurf); - return NULL; - } - - gsurf->render_att = (!gconf->native->mode.doubleBufferMode) ? - NATIVE_ATTACHMENT_FRONT_LEFT : NATIVE_ATTACHMENT_BACK_LEFT; + memset(&arg, 0, sizeof(arg)); + arg.type = EGL_PBUFFER_BIT; - return &gsurf->base; + return egl_g3d_create_surface(drv, dpy, conf, &arg, attribs); } /** @@ -919,25 +931,7 @@ egl_g3d_swap_buffers(_EGLDriver *drv, _EGLDisplay *dpy, _EGLSurface *surf) if (gctx) gctx->stapi->st_notify_swapbuffers(gctx->draw.st_fb); - /* - * We drew on the back buffer, unless there was no back buffer. - * In that case, we drew on the front buffer. Either case, we call - * swap_buffers. - */ - if (!gsurf->native->swap_buffers(gsurf->native)) - return EGL_FALSE; - - if (gctx) { - struct egl_g3d_config *gconf = egl_g3d_config(gsurf->base.Config); - - /* force validation if the swap method is not copy */ - if (gconf->native->mode.swapMethod != GLX_SWAP_COPY_OML) { - gctx->force_validate = EGL_TRUE; - egl_g3d_validate_context(dpy, &gctx->base); - } - } - - return EGL_TRUE; + return gsurf->native->swap_buffers(gsurf->native); } /** @@ -1171,34 +1165,12 @@ static _EGLSurface * egl_g3d_create_screen_surface(_EGLDriver *drv, _EGLDisplay *dpy, _EGLConfig *conf, const EGLint *attribs) { - struct egl_g3d_display *gdpy = egl_g3d_display(dpy); - struct egl_g3d_config *gconf = egl_g3d_config(conf); - struct egl_g3d_surface *gsurf; - - gsurf = CALLOC_STRUCT(egl_g3d_surface); - if (!gsurf) { - _eglError(EGL_BAD_ALLOC, "eglCreatePbufferSurface"); - return NULL; - } - - if (!_eglInitSurface(&gsurf->base, dpy, - EGL_SCREEN_BIT_MESA, conf, attribs)) { - free(gsurf); - return NULL; - } + struct egl_g3d_create_surface_arg arg; - gsurf->native = - gdpy->native->modeset->create_scanout_surface(gdpy->native, - gconf->native, gsurf->base.Width, gsurf->base.Height); - if (!gsurf->native) { - free(gsurf); - return NULL; - } + memset(&arg, 0, sizeof(arg)); + arg.type = EGL_SCREEN_BIT_MESA; - gsurf->render_att = (!gconf->native->mode.doubleBufferMode) ? - NATIVE_ATTACHMENT_FRONT_LEFT : NATIVE_ATTACHMENT_BACK_LEFT; - - return &gsurf->base; + return egl_g3d_create_surface(drv, dpy, conf, &arg, attribs); } static EGLBoolean diff --git a/src/gallium/state_trackers/egl/x11/native_dri2.c b/src/gallium/state_trackers/egl/x11/native_dri2.c index dbd1a64992d..8df58891a05 100644 --- a/src/gallium/state_trackers/egl/x11/native_dri2.c +++ b/src/gallium/state_trackers/egl/x11/native_dri2.c @@ -26,6 +26,7 @@ #include "util/u_math.h" #include "util/u_format.h" #include "util/u_inlines.h" +#include "util/u_hash_table.h" #include "pipe/p_compiler.h" #include "pipe/p_screen.h" #include "pipe/p_context.h" @@ -50,9 +51,13 @@ struct dri2_display { struct drm_api *api; struct x11_screen *xscr; int xscr_number; + const char *dri_driver; + int dri_major, dri_minor; struct dri2_config *configs; int num_configs; + + struct util_hash_table *surfaces; }; struct dri2_surface { @@ -62,7 +67,8 @@ struct dri2_surface { enum pipe_format color_format; struct dri2_display *dri2dpy; - unsigned int sequence_number; + unsigned int server_stamp; + unsigned int client_stamp; int width, height; struct pipe_texture *textures[NUM_NATIVE_ATTACHMENTS]; uint valid_mask; @@ -96,65 +102,23 @@ dri2_config(const struct native_config *nconf) } /** - * Get the buffers from the server. + * Process the buffers returned by the server. */ static void -dri2_surface_get_buffers(struct native_surface *nsurf, uint buffer_mask) +dri2_surface_process_drawable_buffers(struct native_surface *nsurf, + struct x11_drawable_buffer *xbufs, + int num_xbufs) { struct dri2_surface *dri2surf = dri2_surface(nsurf); struct dri2_display *dri2dpy = dri2surf->dri2dpy; - unsigned int dri2atts[NUM_NATIVE_ATTACHMENTS]; - int num_ins, num_outs, att, i; - struct x11_drawable_buffer *xbufs; struct pipe_texture templ; uint valid_mask; + int i; - /* prepare the attachments */ - num_ins = 0; - for (att = 0; att < NUM_NATIVE_ATTACHMENTS; att++) { - if (native_attachment_mask_test(buffer_mask, att)) { - unsigned int dri2att; - - switch (att) { - case NATIVE_ATTACHMENT_FRONT_LEFT: - dri2att = DRI2BufferFrontLeft; - break; - case NATIVE_ATTACHMENT_BACK_LEFT: - dri2att = DRI2BufferBackLeft; - break; - case NATIVE_ATTACHMENT_FRONT_RIGHT: - dri2att = DRI2BufferFrontRight; - break; - case NATIVE_ATTACHMENT_BACK_RIGHT: - dri2att = DRI2BufferBackRight; - break; - default: - assert(0); - dri2att = 0; - break; - } - - dri2atts[num_ins] = dri2att; - num_ins++; - } - } - - xbufs = x11_drawable_get_buffers(dri2dpy->xscr, dri2surf->drawable, - &dri2surf->width, &dri2surf->height, - dri2atts, FALSE, num_ins, &num_outs); - - /* we should be able to do better... */ - if (xbufs && dri2surf->last_num_xbufs == num_outs && - memcmp(dri2surf->last_xbufs, xbufs, sizeof(*xbufs) * num_outs) == 0) { - free(xbufs); - return; - } - - /* free the old buffers */ + /* free the old textures */ for (i = 0; i < NUM_NATIVE_ATTACHMENTS; i++) pipe_texture_reference(&dri2surf->textures[i], NULL); dri2surf->valid_mask = 0x0; - dri2surf->sequence_number++; dri2surf->have_back = FALSE; dri2surf->have_fake = FALSE; @@ -172,7 +136,7 @@ dri2_surface_get_buffers(struct native_surface *nsurf, uint buffer_mask) templ.tex_usage = PIPE_TEXTURE_USAGE_RENDER_TARGET; valid_mask = 0x0; - for (i = 0; i < num_outs; i++) { + for (i = 0; i < num_xbufs; i++) { struct x11_drawable_buffer *xbuf = &xbufs[i]; const char *desc; enum native_attachment natt; @@ -212,12 +176,72 @@ dri2_surface_get_buffers(struct native_surface *nsurf, uint buffer_mask) valid_mask |= 1 << natt; } + dri2surf->valid_mask = valid_mask; +} + +/** + * Get the buffers from the server. + */ +static void +dri2_surface_get_buffers(struct native_surface *nsurf, uint buffer_mask) +{ + struct dri2_surface *dri2surf = dri2_surface(nsurf); + struct dri2_display *dri2dpy = dri2surf->dri2dpy; + unsigned int dri2atts[NUM_NATIVE_ATTACHMENTS]; + int num_ins, num_outs, att; + struct x11_drawable_buffer *xbufs; + + /* prepare the attachments */ + num_ins = 0; + for (att = 0; att < NUM_NATIVE_ATTACHMENTS; att++) { + if (native_attachment_mask_test(buffer_mask, att)) { + unsigned int dri2att; + + switch (att) { + case NATIVE_ATTACHMENT_FRONT_LEFT: + dri2att = DRI2BufferFrontLeft; + break; + case NATIVE_ATTACHMENT_BACK_LEFT: + dri2att = DRI2BufferBackLeft; + break; + case NATIVE_ATTACHMENT_FRONT_RIGHT: + dri2att = DRI2BufferFrontRight; + break; + case NATIVE_ATTACHMENT_BACK_RIGHT: + dri2att = DRI2BufferBackRight; + break; + default: + assert(0); + dri2att = 0; + break; + } + + dri2atts[num_ins] = dri2att; + num_ins++; + } + } + + xbufs = x11_drawable_get_buffers(dri2dpy->xscr, dri2surf->drawable, + &dri2surf->width, &dri2surf->height, + dri2atts, FALSE, num_ins, &num_outs); + + /* we should be able to do better... */ + if (xbufs && dri2surf->last_num_xbufs == num_outs && + memcmp(dri2surf->last_xbufs, xbufs, sizeof(*xbufs) * num_outs) == 0) { + free(xbufs); + dri2surf->client_stamp = dri2surf->server_stamp; + return; + } + + dri2_surface_process_drawable_buffers(&dri2surf->base, xbufs, num_outs); + + dri2surf->server_stamp++; + dri2surf->client_stamp = dri2surf->server_stamp; + if (dri2surf->last_xbufs) free(dri2surf->last_xbufs); dri2surf->last_xbufs = xbufs; dri2surf->last_num_xbufs = num_outs; - - dri2surf->valid_mask = valid_mask; } /** @@ -264,7 +288,7 @@ dri2_surface_update_buffers(struct native_surface *nsurf, uint buffer_mask) } } dri2surf->valid_mask |= new_valid; - /* no need to update sequence number */ + /* no need to update the stamps */ } else { dri2_surface_get_buffers(&dri2surf->base, buffer_mask); @@ -273,6 +297,16 @@ dri2_surface_update_buffers(struct native_surface *nsurf, uint buffer_mask) return ((dri2surf->valid_mask & buffer_mask) == buffer_mask); } +/** + * Return TRUE if the surface receives DRI2_InvalidateBuffers events. + */ +static INLINE boolean +dri2_surface_receive_events(struct native_surface *nsurf) +{ + struct dri2_surface *dri2surf = dri2_surface(nsurf); + return (dri2surf->dri2dpy->dri_minor >= 3); +} + static boolean dri2_surface_flush_frontbuffer(struct native_surface *nsurf) { @@ -289,6 +323,10 @@ dri2_surface_flush_frontbuffer(struct native_surface *nsurf) 0, 0, dri2surf->width, dri2surf->height, DRI2BufferFakeFrontLeft, DRI2BufferFrontLeft); + /* force buffers to be updated in next validation call */ + if (!dri2_surface_receive_events(&dri2surf->base)) + dri2surf->server_stamp++; + return TRUE; } @@ -314,6 +352,10 @@ dri2_surface_swap_buffers(struct native_surface *nsurf) 0, 0, dri2surf->width, dri2surf->height, DRI2BufferFrontLeft, DRI2BufferFakeFrontLeft); + /* force buffers to be updated in next validation call */ + if (!dri2_surface_receive_events(&dri2surf->base)) + dri2surf->server_stamp++; + return TRUE; } @@ -324,11 +366,14 @@ dri2_surface_validate(struct native_surface *nsurf, uint attachment_mask, { struct dri2_surface *dri2surf = dri2_surface(nsurf); - if (!dri2_surface_update_buffers(&dri2surf->base, attachment_mask)) - return FALSE; + if (dri2surf->server_stamp != dri2surf->client_stamp || + (dri2surf->valid_mask & attachment_mask) != attachment_mask) { + if (!dri2_surface_update_buffers(&dri2surf->base, attachment_mask)) + return FALSE; + } if (seq_num) - *seq_num = dri2surf->sequence_number; + *seq_num = dri2surf->client_stamp; if (textures) { int att; @@ -377,9 +422,13 @@ dri2_surface_destroy(struct native_surface *nsurf) pipe_texture_reference(&ptex, NULL); } - if (dri2surf->drawable) + if (dri2surf->drawable) { x11_drawable_enable_dri2(dri2surf->dri2dpy->xscr, dri2surf->drawable, FALSE); + + util_hash_table_remove(dri2surf->dri2dpy->surfaces, + (void *) dri2surf->drawable); + } free(dri2surf); } @@ -408,8 +457,14 @@ dri2_display_create_surface(struct native_display *ndpy, dri2surf->base.validate = dri2_surface_validate; dri2surf->base.wait = dri2_surface_wait; - if (drawable) + if (drawable) { x11_drawable_enable_dri2(dri2dpy->xscr, drawable, TRUE); + /* initialize the geometry */ + dri2_surface_update_buffers(&dri2surf->base, 0x0); + + util_hash_table_set(dri2surf->dri2dpy->surfaces, + (void *) dri2surf->drawable, (void *) &dri2surf->base); + } return dri2surf; } @@ -655,6 +710,9 @@ dri2_display_destroy(struct native_display *ndpy) if (dri2dpy->base.screen) dri2dpy->base.screen->destroy(dri2dpy->base.screen); + if (dri2dpy->surfaces) + util_hash_table_destroy(dri2dpy->surfaces); + if (dri2dpy->xscr) x11_screen_destroy(dri2dpy->xscr); if (dri2dpy->own_dpy) @@ -664,6 +722,24 @@ dri2_display_destroy(struct native_display *ndpy) free(dri2dpy); } +static void +dri2_display_invalidate_buffers(struct x11_screen *xscr, Drawable drawable, + void *user_data) +{ + struct native_display *ndpy = (struct native_display* ) user_data; + struct dri2_display *dri2dpy = dri2_display(ndpy); + struct native_surface *nsurf; + struct dri2_surface *dri2surf; + + nsurf = (struct native_surface *) + util_hash_table_get(dri2dpy->surfaces, (void *) drawable); + if (!nsurf) + return; + + dri2surf = dri2_surface(nsurf); + dri2surf->server_stamp++; +} + /** * Initialize DRI2 and pipe screen. */ @@ -681,7 +757,17 @@ dri2_display_init_screen(struct native_display *ndpy) return FALSE; } - fd = x11_screen_enable_dri2(dri2dpy->xscr, driver); + dri2dpy->dri_driver = x11_screen_probe_dri2(dri2dpy->xscr, + &dri2dpy->dri_major, &dri2dpy->dri_minor); + if (!dri2dpy->dri_driver || !driver || + strcmp(dri2dpy->dri_driver, driver) != 0) { + _eglLog(_EGL_WARNING, "Driver mismatch: %s != %s", + dri2dpy->dri_driver, dri2dpy->api->name); + return FALSE; + } + + fd = x11_screen_enable_dri2(dri2dpy->xscr, + dri2_display_invalidate_buffers, &dri2dpy->base); if (fd < 0) return FALSE; @@ -696,6 +782,19 @@ dri2_display_init_screen(struct native_display *ndpy) return TRUE; } +static unsigned +dri2_display_hash_table_hash(void *key) +{ + XID drawable = pointer_to_uintptr(key); + return (unsigned) drawable; +} + +static int +dri2_display_hash_table_compare(void *key1, void *key2) +{ + return (key1 - key2); +} + struct native_display * x11_create_dri2_display(EGLNativeDisplayType dpy, struct drm_api *api) { @@ -706,11 +805,6 @@ x11_create_dri2_display(EGLNativeDisplayType dpy, struct drm_api *api) return NULL; dri2dpy->api = api; - if (!dri2dpy->api) { - _eglLog(_EGL_WARNING, "failed to create DRM API"); - free(dri2dpy); - return NULL; - } dri2dpy->dpy = dpy; if (!dri2dpy->dpy) { @@ -734,6 +828,13 @@ x11_create_dri2_display(EGLNativeDisplayType dpy, struct drm_api *api) return NULL; } + dri2dpy->surfaces = util_hash_table_create(dri2_display_hash_table_hash, + dri2_display_hash_table_compare); + if (!dri2dpy->surfaces) { + dri2_display_destroy(&dri2dpy->base); + return NULL; + } + dri2dpy->base.destroy = dri2_display_destroy; dri2dpy->base.get_configs = dri2_display_get_configs; dri2dpy->base.is_pixmap_supported = dri2_display_is_pixmap_supported; diff --git a/src/gallium/state_trackers/egl/x11/native_x11.c b/src/gallium/state_trackers/egl/x11/native_x11.c index 55f0d4d3083..3add95d0aca 100644 --- a/src/gallium/state_trackers/egl/x11/native_x11.c +++ b/src/gallium/state_trackers/egl/x11/native_x11.c @@ -70,7 +70,7 @@ native_create_probe(EGLNativeDisplayType dpy) xscr = x11_screen_create(xdpy, scr); if (xscr) { if (x11_screen_support(xscr, X11_SCREEN_EXTENSION_DRI2)) { - driver_name = x11_screen_probe_dri2(xscr); + driver_name = x11_screen_probe_dri2(xscr, NULL, NULL); if (driver_name) nprobe->data = strdup(driver_name); } diff --git a/src/gallium/state_trackers/egl/x11/native_ximage.c b/src/gallium/state_trackers/egl/x11/native_ximage.c index 78675a19985..8ba73f289dd 100644 --- a/src/gallium/state_trackers/egl/x11/native_ximage.c +++ b/src/gallium/state_trackers/egl/x11/native_ximage.c @@ -81,7 +81,8 @@ struct ximage_surface { GC gc; - unsigned int sequence_number; + unsigned int server_stamp; + unsigned int client_stamp; int width, height; struct ximage_buffer buffers[NUM_NATIVE_ATTACHMENTS]; uint valid_mask; @@ -216,18 +217,11 @@ ximage_surface_update_geometry(struct native_surface *nsurf) ok = XGetGeometry(xsurf->xdpy->dpy, xsurf->drawable, &root, &x, &y, &w, &h, &border, &depth); - if (!ok) { - w = xsurf->width; - h = xsurf->height; - } - - /* all buffers become invalid */ - if (xsurf->width != w || xsurf->height != h) { + if (ok && (xsurf->width != w || xsurf->height != h)) { xsurf->width = w; xsurf->height = h; - xsurf->valid_mask = 0x0; - xsurf->sequence_number++; + xsurf->server_stamp++; updated = TRUE; } @@ -247,10 +241,18 @@ ximage_surface_update_buffers(struct native_surface *nsurf, uint buffer_mask) int att; updated = ximage_surface_update_geometry(&xsurf->base); - buffer_mask &= ~xsurf->valid_mask; - /* all requested buffers are valid */ - if (!buffer_mask) - return TRUE; + if (updated) { + /* all buffers become invalid */ + xsurf->valid_mask = 0x0; + } + else { + buffer_mask &= ~xsurf->valid_mask; + /* all requested buffers are valid */ + if (!buffer_mask) { + xsurf->client_stamp = xsurf->server_stamp; + return TRUE; + } + } new_valid = 0x0; for (att = 0; att < NUM_NATIVE_ATTACHMENTS; att++) { @@ -273,11 +275,8 @@ ximage_surface_update_buffers(struct native_surface *nsurf, uint buffer_mask) } } - if (new_valid) { - xsurf->valid_mask |= new_valid; - if (updated) - xsurf->sequence_number++; - } + xsurf->valid_mask |= new_valid; + xsurf->client_stamp = xsurf->server_stamp; return (new_valid == buffer_mask); } @@ -333,7 +332,15 @@ ximage_surface_draw_buffer(struct native_surface *nsurf, static boolean ximage_surface_flush_frontbuffer(struct native_surface *nsurf) { - return ximage_surface_draw_buffer(nsurf, NATIVE_ATTACHMENT_FRONT_LEFT); + struct ximage_surface *xsurf = ximage_surface(nsurf); + boolean ret; + + ret = ximage_surface_draw_buffer(&xsurf->base, + NATIVE_ATTACHMENT_FRONT_LEFT); + /* force buffers to be updated in next validation call */ + xsurf->server_stamp++; + + return ret; } static boolean @@ -345,6 +352,8 @@ ximage_surface_swap_buffers(struct native_surface *nsurf) /* display the back buffer first */ ret = ximage_surface_draw_buffer(nsurf, NATIVE_ATTACHMENT_BACK_LEFT); + /* force buffers to be updated in next validation call */ + xsurf->server_stamp++; xfront = &xsurf->buffers[NATIVE_ATTACHMENT_FRONT_LEFT]; xback = &xsurf->buffers[NATIVE_ATTACHMENT_BACK_LEFT]; @@ -356,7 +365,6 @@ ximage_surface_swap_buffers(struct native_surface *nsurf) xtmp = *xfront; *xfront = *xback; *xback = xtmp; - xsurf->sequence_number++; return ret; } @@ -368,11 +376,14 @@ ximage_surface_validate(struct native_surface *nsurf, uint attachment_mask, { struct ximage_surface *xsurf = ximage_surface(nsurf); - if (!ximage_surface_update_buffers(&xsurf->base, attachment_mask)) - return FALSE; + if (xsurf->client_stamp != xsurf->server_stamp || + (xsurf->valid_mask & attachment_mask) != attachment_mask) { + if (!ximage_surface_update_buffers(&xsurf->base, attachment_mask)) + return FALSE; + } if (seq_num) - *seq_num = xsurf->sequence_number; + *seq_num = xsurf->client_stamp; if (textures) { int att; @@ -453,6 +464,9 @@ ximage_display_create_surface(struct native_display *ndpy, return NULL; } + /* initialize the geometry */ + ximage_surface_update_buffers(&xsurf->base, 0x0); + for (i = 0; i < NUM_NATIVE_ATTACHMENTS; i++) { struct ximage_buffer *xbuf = &xsurf->buffers[i]; diff --git a/src/gallium/state_trackers/egl/x11/x11_screen.c b/src/gallium/state_trackers/egl/x11/x11_screen.c index d72bfc99d3e..f4096114844 100644 --- a/src/gallium/state_trackers/egl/x11/x11_screen.c +++ b/src/gallium/state_trackers/egl/x11/x11_screen.c @@ -39,6 +39,9 @@ #include "glxinit.h" struct x11_screen { + /* dummy base class */ + struct __GLXDRIdisplayRec base; + Display *dpy; int number; @@ -53,6 +56,9 @@ struct x11_screen { char *dri_device; int dri_fd; + x11_drawable_invalidate_buffers dri_invalidate_buffers; + void *dri_user_data; + XVisualInfo *visuals; int num_visuals; @@ -98,6 +104,8 @@ x11_screen_destroy(struct x11_screen *xscr) Xfree(xscr->dri_device); /* xscr->glx_dpy will be destroyed with the X display */ + if (xscr->glx_dpy) + xscr->glx_dpy->dri2Display = NULL; if (xscr->visuals) XFree(xscr->visuals); @@ -247,24 +255,25 @@ x11_screen_get_glx_visuals(struct x11_screen *xscr) : NULL; } -static boolean -x11_screen_is_driver_equal(struct x11_screen *xscr, const char *driver) -{ - return (strcmp(xscr->dri_driver, driver) == 0); -} - /** * Probe the screen for the DRI2 driver name. */ const char * -x11_screen_probe_dri2(struct x11_screen *xscr) +x11_screen_probe_dri2(struct x11_screen *xscr, int *major, int *minor) { + if (!x11_screen_init_dri2(xscr)) + return NULL; + /* get the driver name and the device name */ if (!xscr->dri_driver) { if (!DRI2Connect(xscr->dpy, RootWindow(xscr->dpy, xscr->number), &xscr->dri_driver, &xscr->dri_device)) xscr->dri_driver = xscr->dri_device = NULL; } + if (major) + *major = xscr->dri_major; + if (minor) + *minor = xscr->dri_minor; return xscr->dri_driver; } @@ -274,21 +283,17 @@ x11_screen_probe_dri2(struct x11_screen *xscr) * descriptor will be closed automatically when the screen is destoryed. */ int -x11_screen_enable_dri2(struct x11_screen *xscr, const char *driver) +x11_screen_enable_dri2(struct x11_screen *xscr, + x11_drawable_invalidate_buffers invalidate_buffers, + void *user_data) { if (xscr->dri_fd < 0) { int fd; drm_magic_t magic; /* get the driver name and the device name first */ - if (!x11_screen_probe_dri2(xscr)) - return -1; - - if (!x11_screen_is_driver_equal(xscr, driver)) { - _eglLog(_EGL_WARNING, "Driver mismatch: %s != %s", - xscr->dri_driver, driver); + if (!x11_screen_probe_dri2(xscr, NULL, NULL)) return -1; - } fd = open(xscr->dri_device, O_RDWR); if (fd < 0) { @@ -310,6 +315,22 @@ x11_screen_enable_dri2(struct x11_screen *xscr, const char *driver) return -1; } + if (!x11_screen_init_glx(xscr)) { + _eglLog(_EGL_WARNING, "failed to initialize GLX"); + close(fd); + return -1; + } + if (xscr->glx_dpy->dri2Display) { + _eglLog(_EGL_WARNING, + "display is already managed by another x11 screen"); + close(fd); + return -1; + } + + xscr->glx_dpy->dri2Display = (__GLXDRIdisplay *) xscr; + xscr->dri_invalidate_buffers = invalidate_buffers; + xscr->dri_user_data = user_data; + xscr->dri_fd = fd; } @@ -451,3 +472,20 @@ x11_context_modes_count(const __GLcontextModes *modes) count++; return count; } + +/** + * This is called from src/glx/dri2.c. + */ +void +dri2InvalidateBuffers(Display *dpy, XID drawable) +{ + __GLXdisplayPrivate *priv = __glXInitialize(dpy); + struct x11_screen *xscr = NULL; + + if (priv && priv->dri2Display) + xscr = (struct x11_screen *) priv->dri2Display; + if (!xscr || !xscr->dri_invalidate_buffers) + return; + + xscr->dri_invalidate_buffers(xscr, drawable, xscr->dri_user_data); +} diff --git a/src/gallium/state_trackers/egl/x11/x11_screen.h b/src/gallium/state_trackers/egl/x11/x11_screen.h index 5432858ac3e..37e8d5a40e6 100644 --- a/src/gallium/state_trackers/egl/x11/x11_screen.h +++ b/src/gallium/state_trackers/egl/x11/x11_screen.h @@ -48,6 +48,10 @@ struct x11_drawable_buffer { struct x11_screen; +typedef void (*x11_drawable_invalidate_buffers)(struct x11_screen *xscr, + Drawable drawable, + void *user_data); + struct x11_screen * x11_screen_create(Display *dpy, int screen); @@ -71,10 +75,12 @@ const __GLcontextModes * x11_screen_get_glx_visuals(struct x11_screen *xscr); const char * -x11_screen_probe_dri2(struct x11_screen *xscr); +x11_screen_probe_dri2(struct x11_screen *xscr, int *major, int *minor); int -x11_screen_enable_dri2(struct x11_screen *xscr, const char *driver); +x11_screen_enable_dri2(struct x11_screen *xscr, + x11_drawable_invalidate_buffers invalidate_buffers, + void *user_data); __GLcontextModes * x11_context_modes_create(unsigned count); diff --git a/src/gallium/state_trackers/glx/xlib/glx_api.c b/src/gallium/state_trackers/glx/xlib/glx_api.c index 656a69131e9..08bf624b5c1 100644 --- a/src/gallium/state_trackers/glx/xlib/glx_api.c +++ b/src/gallium/state_trackers/glx/xlib/glx_api.c @@ -689,6 +689,8 @@ choose_visual( Display *dpy, int screen, const int *list, GLboolean fbConfig ) int desiredVisualID = -1; int numAux = 0; + xmesa_init(); + parselist = list; while (*parselist) { @@ -941,9 +943,6 @@ choose_visual( Display *dpy, int screen, const int *list, GLboolean fbConfig ) /* give the visual some useful GLX attributes */ double_flag = GL_TRUE; rgb_flag = GL_TRUE; - depth_size = default_depth_bits(); - stencil_size = STENCIL_BITS; - /* XXX accum??? */ } } else if (level==0) { diff --git a/src/gallium/state_trackers/glx/xlib/xm_api.c b/src/gallium/state_trackers/glx/xlib/xm_api.c index 4aac08a1084..188b885f68a 100644 --- a/src/gallium/state_trackers/glx/xlib/xm_api.c +++ b/src/gallium/state_trackers/glx/xlib/xm_api.c @@ -319,6 +319,51 @@ choose_pixel_format(XMesaVisual v) +/** + * Query the default gallium screen for a Z/Stencil format that + * at least matches the given depthBits and stencilBits. + */ +static void +xmesa_choose_z_stencil_format(int depthBits, int stencilBits, + enum pipe_format *depthFormat, + enum pipe_format *stencilFormat) +{ + const enum pipe_texture_target target = PIPE_TEXTURE_2D; + const unsigned tex_usage = PIPE_TEXTURE_USAGE_DEPTH_STENCIL; + const unsigned geom_flags = (PIPE_TEXTURE_GEOM_NON_SQUARE | + PIPE_TEXTURE_GEOM_NON_POWER_OF_TWO); + static enum pipe_format formats[] = { + PIPE_FORMAT_Z24S8_UNORM, + PIPE_FORMAT_S8Z24_UNORM, + PIPE_FORMAT_Z16_UNORM, + PIPE_FORMAT_Z32_UNORM + }; + int i; + + assert(screen); + + *depthFormat = *stencilFormat = PIPE_FORMAT_NONE; + + /* search for supported format */ + for (i = 0; i < Elements(formats); i++) { + if (screen->is_format_supported(screen, formats[i], + target, tex_usage, geom_flags)) { + *depthFormat = formats[i]; + break; + } + } + + if (stencilBits) { + *stencilFormat = *depthFormat; + } + + /* XXX we should check that he chosen format has at least as many bits + * as what was requested. + */ +} + + + /**********************************************************************/ /***** Linked list of XMesaBuffers *****/ /**********************************************************************/ @@ -361,34 +406,9 @@ create_xmesa_buffer(Drawable d, BufferType type, /* determine PIPE_FORMATs for buffers */ colorFormat = choose_pixel_format(vis); - if (vis->mesa_visual.depthBits == 0) - depthFormat = PIPE_FORMAT_NONE; -#ifdef GALLIUM_CELL /* XXX temporary for Cell! */ - else - depthFormat = PIPE_FORMAT_S8Z24_UNORM; -#else - else if (vis->mesa_visual.depthBits <= 16) - depthFormat = PIPE_FORMAT_Z16_UNORM; - else if (vis->mesa_visual.depthBits <= 24) - depthFormat = PIPE_FORMAT_S8Z24_UNORM; - else - depthFormat = PIPE_FORMAT_Z32_UNORM; -#endif - - if (vis->mesa_visual.stencilBits == 8) { - if (depthFormat == PIPE_FORMAT_S8Z24_UNORM) - stencilFormat = depthFormat; - else - stencilFormat = PIPE_FORMAT_S8_UNORM; - } - else { - /* no stencil */ - stencilFormat = PIPE_FORMAT_NONE; - if (depthFormat == PIPE_FORMAT_S8Z24_UNORM) { - /* use 24-bit Z, undefined stencil channel */ - depthFormat = PIPE_FORMAT_X8Z24_UNORM; - } - } + xmesa_choose_z_stencil_format(vis->mesa_visual.depthBits, + vis->mesa_visual.stencilBits, + &depthFormat, &stencilFormat); get_drawable_size(vis->display, d, &width, &height); @@ -653,6 +673,8 @@ XMesaVisual XMesaCreateVisual( Display *display, XMesaVisual v; GLint red_bits, green_bits, blue_bits, alpha_bits; + xmesa_init(); + /* For debugging only */ if (_mesa_getenv("MESA_XSYNC")) { /* This makes debugging X easier. @@ -748,6 +770,21 @@ void XMesaDestroyVisual( XMesaVisual v ) } +/** + * Do one-time initializations. + */ +void +xmesa_init(void) +{ + static GLboolean firstTime = GL_TRUE; + if (firstTime) { + pipe_mutex_init(_xmesa_lock); + _screen = driver.create_pipe_screen(); + screen = trace_screen_create( _screen ); + firstTime = GL_FALSE; + } +} + /** * Create a new XMesaContext. @@ -759,18 +796,12 @@ void XMesaDestroyVisual( XMesaVisual v ) PUBLIC XMesaContext XMesaCreateContext( XMesaVisual v, XMesaContext share_list ) { - static GLboolean firstTime = GL_TRUE; struct pipe_context *pipe = NULL; XMesaContext c; GLcontext *mesaCtx; uint pf; - if (firstTime) { - pipe_mutex_init(_xmesa_lock); - _screen = driver.create_pipe_screen(); - screen = trace_screen_create( _screen ); - firstTime = GL_FALSE; - } + xmesa_init(); /* Note: the XMesaContext contains a Mesa GLcontext struct (inheritance) */ c = (XMesaContext) CALLOC_STRUCT(xmesa_context); diff --git a/src/gallium/state_trackers/glx/xlib/xm_api.h b/src/gallium/state_trackers/glx/xlib/xm_api.h index 63a329cbe05..004cb260dcd 100644 --- a/src/gallium/state_trackers/glx/xlib/xm_api.h +++ b/src/gallium/state_trackers/glx/xlib/xm_api.h @@ -367,6 +367,9 @@ xmesa_buffer(GLframebuffer *fb) extern void +xmesa_init(void); + +extern void xmesa_delete_framebuffer(struct gl_framebuffer *fb); extern XMesaBuffer diff --git a/src/gallium/state_trackers/python/README b/src/gallium/state_trackers/python/README index 4a060730242..e24a262aba9 100644 --- a/src/gallium/state_trackers/python/README +++ b/src/gallium/state_trackers/python/README @@ -18,7 +18,7 @@ On a Windows machine ensure the swig command is in your PATH. Invoke scons on the top dir as - scons debug=yes statetrackers=python drivers=softpipe,trace winsys=none + scons debug=yes statetrackers=python drivers=softpipe winsys=none To use it set PYTHONPATH appropriately, e.g, in Linux do: diff --git a/src/gallium/state_trackers/python/p_state.i b/src/gallium/state_trackers/python/p_state.i index 5afe4d49082..eda77b56f8e 100644 --- a/src/gallium/state_trackers/python/p_state.i +++ b/src/gallium/state_trackers/python/p_state.i @@ -69,7 +69,7 @@ pipe_blend_state(const char *STRING, unsigned LENGTH) { struct pipe_blend_state *state; - state = CALLOC_STRUCT(pipe_framebuffer_state); + state = CALLOC_STRUCT(pipe_blend_state); if (state) { LENGTH = MIN2(sizeof *state, LENGTH); memcpy(state, STRING, LENGTH); diff --git a/src/gallium/state_trackers/python/tests/regress/fragment-shader/frag-abs.png b/src/gallium/state_trackers/python/tests/regress/fragment-shader/frag-abs.png Binary files differnew file mode 100644 index 00000000000..c947a7b8813 --- /dev/null +++ b/src/gallium/state_trackers/python/tests/regress/fragment-shader/frag-abs.png diff --git a/src/gallium/state_trackers/wgl/stw_device.c b/src/gallium/state_trackers/wgl/stw_device.c index e5fa6ac8eb4..472a2a5379a 100644 --- a/src/gallium/state_trackers/wgl/stw_device.c +++ b/src/gallium/state_trackers/wgl/stw_device.c @@ -47,7 +47,6 @@ #ifdef WIN32_THREADS extern _glthread_Mutex OneTimeLock; -extern void FreeAllTSD(void); #endif @@ -183,7 +182,8 @@ stw_cleanup(void) #ifdef WIN32_THREADS _glthread_DESTROY_MUTEX(OneTimeLock); - FreeAllTSD(); + + _glapi_destroy_multithread(); #endif #ifdef DEBUG diff --git a/src/gallium/state_trackers/xorg/xorg_driver.c b/src/gallium/state_trackers/xorg/xorg_driver.c index f53a879a14a..8fb6e5a96dd 100644 --- a/src/gallium/state_trackers/xorg/xorg_driver.c +++ b/src/gallium/state_trackers/xorg/xorg_driver.c @@ -155,7 +155,7 @@ drv_get_rec(ScrnInfoPtr pScrn) if (pScrn->driverPrivate) return TRUE; - pScrn->driverPrivate = xnfcalloc(sizeof(modesettingRec), 1); + pScrn->driverPrivate = xnfcalloc(1, sizeof(modesettingRec)); return TRUE; } @@ -183,31 +183,66 @@ drv_probe_ddc(ScrnInfoPtr pScrn, int index) static Bool drv_crtc_resize(ScrnInfoPtr pScrn, int width, int height) { + xf86CrtcConfigPtr xf86_config = XF86_CRTC_CONFIG_PTR(pScrn); modesettingPtr ms = modesettingPTR(pScrn); - PixmapPtr rootPixmap; ScreenPtr pScreen = pScrn->pScreen; + int old_width, old_height; + PixmapPtr rootPixmap; + int i; if (width == pScrn->virtualX && height == pScrn->virtualY) return TRUE; + old_width = pScrn->virtualX; + old_height = pScrn->virtualY; pScrn->virtualX = width; pScrn->virtualY = height; - /* - * Remove the old framebuffer & texture. - */ - drmModeRmFB(ms->fd, ms->fb_id); - if (!ms->destroy_front_buffer(pScrn)) - FatalError("failed to destroy front buffer\n"); + /* ms->create_front_buffer will remove the old front buffer */ rootPixmap = pScreen->GetScreenPixmap(pScreen); if (!pScreen->ModifyPixmapHeader(rootPixmap, width, height, -1, -1, -1, NULL)) - return FALSE; + goto error_modify; + + pScrn->displayWidth = rootPixmap->devKind / (rootPixmap->drawable.bitsPerPixel / 8); + + if (!ms->create_front_buffer(pScrn) || !ms->bind_front_buffer(pScrn)) + goto error_create; + + /* + * create && bind will turn off all crtc(s) in the kernel so we need to + * re-enable all the crtcs again. For real HW we might want to do this + * before destroying the old framebuffer. + */ + for (i = 0; i < xf86_config->num_crtc; i++) { + xf86CrtcPtr crtc = xf86_config->crtc[i]; + + if (!crtc->enabled) + continue; + + crtc->funcs->set_mode_major(crtc, &crtc->mode, crtc->rotation, crtc->x, crtc->y); + } + + return TRUE; + + /* + * This is the error recovery path. + */ +error_create: + if (!pScreen->ModifyPixmapHeader(rootPixmap, old_width, old_height, -1, -1, -1, NULL)) + FatalError("failed to resize rootPixmap error path\n"); pScrn->displayWidth = rootPixmap->devKind / (rootPixmap->drawable.bitsPerPixel / 8); - /* now create new frontbuffer */ - return ms->create_front_buffer(pScrn) && ms->bind_front_buffer(pScrn); +error_modify: + pScrn->virtualX = old_width; + pScrn->virtualY = old_height; + + if (ms->create_front_buffer(pScrn) && ms->bind_front_buffer(pScrn)) + return FALSE; + + FatalError("failed to setup old framebuffer\n"); + return FALSE; } static const xf86CrtcConfigFuncsRec crtc_config_funcs = { @@ -333,6 +368,7 @@ drv_pre_init(ScrnInfoPtr pScrn, int flags) EntityInfoPtr pEnt; EntPtr msEnt = NULL; int max_width, max_height; + CustomizerPtr cust; if (pScrn->numEntities != 1) return FALSE; @@ -344,6 +380,9 @@ drv_pre_init(ScrnInfoPtr pScrn, int flags) return TRUE; } + cust = (CustomizerPtr) pScrn->driverPrivate; + pScrn->driverPrivate = NULL; + /* Allocate driverPrivate */ if (!drv_get_rec(pScrn)) return FALSE; @@ -351,6 +390,7 @@ drv_pre_init(ScrnInfoPtr pScrn, int flags) ms = modesettingPTR(pScrn); ms->SaveGeneration = -1; ms->pEnt = pEnt; + ms->cust = cust; pScrn->displayWidth = 640; /* default it */ @@ -423,8 +463,8 @@ drv_pre_init(ScrnInfoPtr pScrn, int flags) xf86CrtcConfigInit(pScrn, &crtc_config_funcs); xf86_config = XF86_CRTC_CONFIG_PTR(pScrn); - max_width = 8192; - max_height = 8192; + max_width = 2048; /* A very low default */ + max_height = 2048; /* see screen_init */ xf86CrtcSetSizeRange(pScrn, 320, 200, max_width, max_height); if (xf86ReturnOptValBool(ms->Options, OPTION_SW_CURSOR, FALSE)) { @@ -607,7 +647,9 @@ drv_screen_init(int scrnIndex, ScreenPtr pScreen, int argc, char **argv) { ScrnInfoPtr pScrn = xf86Screens[pScreen->myNum]; modesettingPtr ms = modesettingPTR(pScrn); + unsigned max_width, max_height; VisualPtr visual; + CustomizerPtr cust = ms->cust; if (!drv_init_drm(pScrn)) { FatalError("Could not init DRM"); @@ -624,6 +666,26 @@ drv_screen_init(int scrnIndex, ScreenPtr pScreen, int argc, char **argv) return FALSE; } + /* get max width and height */ + { + drmModeResPtr res; + res = drmModeGetResources(ms->fd); + max_width = res->max_width; + max_height = res->max_height; + drmModeFreeResources(res); + } + + if (ms->screen) { + float maxf; + int max; + maxf = ms->screen->get_paramf(ms->screen, PIPE_CAP_MAX_TEXTURE_2D_LEVELS); + max = (1 << (int)(maxf - 1.0f)); + max_width = max < max_width ? max : max_width; + max_height = max < max_height ? max : max_height; + } + + xf86CrtcSetSizeRange(pScrn, 1, 1, max_width, max_height); + pScrn->pScreen = pScreen; /* HW dependent - FIXME */ @@ -673,7 +735,7 @@ drv_screen_init(int scrnIndex, ScreenPtr pScreen, int argc, char **argv) xf86SetBlackWhitePixels(pScreen); ms->accelerate_2d = xf86ReturnOptValBool(ms->Options, OPTION_2D_ACCEL, FALSE); - ms->debug_fallback = xf86ReturnOptValBool(ms->Options, OPTION_DEBUG_FALLBACK, TRUE); + ms->debug_fallback = xf86ReturnOptValBool(ms->Options, OPTION_DEBUG_FALLBACK, ms->accelerate_2d); if (ms->screen) { ms->exa = xorg_exa_init(pScrn, ms->accelerate_2d); @@ -684,6 +746,11 @@ drv_screen_init(int scrnIndex, ScreenPtr pScreen, int argc, char **argv) #endif } + xf86DrvMsg(pScrn->scrnIndex, X_INFO, "##################################\n"); + xf86DrvMsg(pScrn->scrnIndex, X_INFO, "# Usefull debugging info follows #\n"); + xf86DrvMsg(pScrn->scrnIndex, X_INFO, "##################################\n"); + xf86DrvMsg(pScrn->scrnIndex, X_INFO, "Using %s backend\n", + ms->screen ? "Gallium3D" : "libkms"); xf86DrvMsg(pScrn->scrnIndex, X_INFO, "2D Acceleration is %s\n", ms->screen && ms->accelerate_2d ? "enabled" : "disabled"); xf86DrvMsg(pScrn->scrnIndex, X_INFO, "Fallback debugging is %s\n", @@ -694,6 +761,7 @@ drv_screen_init(int scrnIndex, ScreenPtr pScreen, int argc, char **argv) #else xf86DrvMsg(pScrn->scrnIndex, X_INFO, "3D Acceleration is disabled\n"); #endif + xf86DrvMsg(pScrn->scrnIndex, X_INFO, "##################################\n"); miInitializeBackingStore(pScreen); xf86SetBackingStore(pScreen); @@ -725,8 +793,8 @@ drv_screen_init(int scrnIndex, ScreenPtr pScreen, int argc, char **argv) if (serverGeneration == 1) xf86ShowUnusedOptions(pScrn->scrnIndex, pScrn->options); - if (ms->winsys_screen_init) - ms->winsys_screen_init(pScrn); + if (cust && cust->winsys_screen_init) + cust->winsys_screen_init(cust, ms->fd); return drv_enter_vt(scrnIndex, 1); } @@ -759,10 +827,11 @@ drv_leave_vt(int scrnIndex, int flags) ScrnInfoPtr pScrn = xf86Screens[scrnIndex]; modesettingPtr ms = modesettingPTR(pScrn); xf86CrtcConfigPtr config = XF86_CRTC_CONFIG_PTR(pScrn); + CustomizerPtr cust = ms->cust; int o; - if (ms->winsys_leave_vt) - ms->winsys_leave_vt(pScrn); + if (cust && cust->winsys_leave_vt) + cust->winsys_leave_vt(cust); for (o = 0; o < config->num_crtc; o++) { xf86CrtcPtr crtc = config->crtc[o]; @@ -778,6 +847,7 @@ drv_leave_vt(int scrnIndex, int flags) } drmModeRmFB(ms->fd, ms->fb_id); + ms->fb_id = -1; drv_restore_hw_state(pScrn); @@ -796,6 +866,7 @@ drv_enter_vt(int scrnIndex, int flags) { ScrnInfoPtr pScrn = xf86Screens[scrnIndex]; modesettingPtr ms = modesettingPTR(pScrn); + CustomizerPtr cust = ms->cust; if (drmSetMaster(ms->fd)) { if (errno == EINVAL) { @@ -826,8 +897,8 @@ drv_enter_vt(int scrnIndex, int flags) if (!xf86SetDesiredModes(pScrn)) return FALSE; - if (ms->winsys_enter_vt) - ms->winsys_enter_vt(pScrn); + if (cust && cust->winsys_enter_vt) + cust->winsys_enter_vt(cust); return TRUE; } @@ -845,13 +916,14 @@ drv_close_screen(int scrnIndex, ScreenPtr pScreen) { ScrnInfoPtr pScrn = xf86Screens[scrnIndex]; modesettingPtr ms = modesettingPTR(pScrn); + CustomizerPtr cust = ms->cust; if (pScrn->vtSema) { drv_leave_vt(scrnIndex, 0); } - if (ms->winsys_screen_close) - ms->winsys_screen_close(pScrn); + if (cust && cust->winsys_screen_close) + cust->winsys_screen_close(cust); #ifdef DRI2 if (ms->screen) @@ -900,6 +972,15 @@ static Bool drv_destroy_front_buffer_ga3d(ScrnInfoPtr pScrn) { modesettingPtr ms = modesettingPTR(pScrn); + + if (!ms->root_texture) + return TRUE; + + if (ms->fb_id != -1) { + drmModeRmFB(ms->fd, ms->fb_id); + ms->fb_id = -1; + } + pipe_texture_reference(&ms->root_texture, NULL); return TRUE; } @@ -908,7 +989,7 @@ static Bool drv_create_front_buffer_ga3d(ScrnInfoPtr pScrn) { modesettingPtr ms = modesettingPTR(pScrn); - unsigned handle, stride; + unsigned handle, stride, fb_id; struct pipe_texture *tex; int ret; @@ -933,19 +1014,23 @@ drv_create_front_buffer_ga3d(ScrnInfoPtr pScrn) pScrn->bitsPerPixel, stride, handle, - &ms->fb_id); + &fb_id); if (ret) { - debug_printf("%s: failed to create framebuffer (%i, %s)", + debug_printf("%s: failed to create framebuffer (%i, %s)\n", __func__, ret, strerror(-ret)); goto err_destroy; } + if (!drv_destroy_front_buffer_ga3d(pScrn)) + FatalError("%s: failed to take down old framebuffer\n", __func__); + pScrn->frameX0 = 0; pScrn->frameY0 = 0; drv_adjust_frame(pScrn->scrnIndex, pScrn->frameX0, pScrn->frameY0, 0); pipe_texture_reference(&ms->root_texture, tex); pipe_texture_reference(&tex, NULL); + ms->fb_id = fb_id; return TRUE; @@ -993,6 +1078,11 @@ drv_destroy_front_buffer_kms(ScrnInfoPtr pScrn) if (!ms->root_bo) return TRUE; + if (ms->fb_id != -1) { + drmModeRmFB(ms->fd, ms->fb_id); + ms->fb_id = -1; + } + kms_bo_unmap(ms->root_bo); kms_bo_destroy(&ms->root_bo); return TRUE; @@ -1005,6 +1095,7 @@ drv_create_front_buffer_kms(ScrnInfoPtr pScrn) unsigned handle, stride; struct kms_bo *bo; unsigned attr[8]; + unsigned fb_id; int ret; attr[0] = KMS_BO_TYPE; @@ -1035,17 +1126,21 @@ drv_create_front_buffer_kms(ScrnInfoPtr pScrn) pScrn->bitsPerPixel, stride, handle, - &ms->fb_id); + &fb_id); if (ret) { debug_printf("%s: failed to create framebuffer (%i, %s)", __func__, ret, strerror(-ret)); goto err_destroy; } + if (!drv_destroy_front_buffer_kms(pScrn)) + FatalError("%s: could not takedown old bo", __func__); + pScrn->frameX0 = 0; pScrn->frameY0 = 0; drv_adjust_frame(pScrn->scrnIndex, pScrn->frameX0, pScrn->frameY0, 0); ms->root_bo = bo; + ms->fb_id = fb_id; return TRUE; @@ -1113,4 +1208,14 @@ static Bool drv_init_front_buffer_functions(ScrnInfoPtr pScrn) return TRUE; } +CustomizerPtr xorg_customizer(ScrnInfoPtr pScrn) +{ + return modesettingPTR(pScrn)->cust; +} + +Bool xorg_has_gallium(ScrnInfoPtr pScrn) +{ + return modesettingPTR(pScrn)->screen != NULL; +} + /* vim: set sw=4 ts=8 sts=4: */ diff --git a/src/gallium/state_trackers/xorg/xorg_tracker.h b/src/gallium/state_trackers/xorg/xorg_tracker.h index 58bb60a721d..2f5cc64d9c5 100644 --- a/src/gallium/state_trackers/xorg/xorg_tracker.h +++ b/src/gallium/state_trackers/xorg/xorg_tracker.h @@ -67,6 +67,14 @@ typedef struct #define XORG_NR_FENCES 3 +typedef struct _CustomizerRec +{ + Bool (*winsys_screen_init)(struct _CustomizerRec *cust, int fd); + Bool (*winsys_screen_close)(struct _CustomizerRec *cust); + Bool (*winsys_enter_vt)(struct _CustomizerRec *cust); + Bool (*winsys_leave_vt)(struct _CustomizerRec *cust); +} CustomizerRec, *CustomizerPtr; + typedef struct _modesettingRec { /* drm */ @@ -117,12 +125,7 @@ typedef struct _modesettingRec Bool accelerate_2d; Bool debug_fallback; - /* winsys hocks */ - Bool (*winsys_screen_init)(ScrnInfoPtr pScr); - Bool (*winsys_screen_close)(ScrnInfoPtr pScr); - Bool (*winsys_enter_vt)(ScrnInfoPtr pScr); - Bool (*winsys_leave_vt)(ScrnInfoPtr pScr); - void *winsys_priv; + CustomizerPtr cust; #ifdef DRM_MODE_FEATURE_DIRTYFB DamagePtr damage; @@ -131,6 +134,9 @@ typedef struct _modesettingRec #define modesettingPTR(p) ((modesettingPtr)((p)->driverPrivate)) +CustomizerPtr xorg_customizer(ScrnInfoPtr pScrn); + +Bool xorg_has_gallium(ScrnInfoPtr pScrn); /*********************************************************************** * xorg_exa.c diff --git a/src/gallium/winsys/drm/Makefile.template b/src/gallium/winsys/drm/Makefile.template index 960353a73d9..f4cc0def471 100644 --- a/src/gallium/winsys/drm/Makefile.template +++ b/src/gallium/winsys/drm/Makefile.template @@ -19,30 +19,12 @@ COMMON_BM_SOURCES = \ $(TOP)/src/mesa/drivers/dri/common/dri_bufmgr.c \ $(TOP)/src/mesa/drivers/dri/common/dri_drmpool.c - -ifeq ($(WINDOW_SYSTEM),dri) -WINOBJ= -WINLIB= INCLUDES = $(SHARED_INCLUDES) $(EXPAT_INCLUDES) OBJECTS = \ $(C_SOURCES:.c=.o) \ $(ASM_SOURCES:.S=.o) -else -# miniglx -WINOBJ= -WINLIB=-L$(MESA)/src/glx/mini -MINIGLX_INCLUDES = -I$(TOP)/src/glx/mini -INCLUDES = $(MINIGLX_INCLUDES) \ - $(SHARED_INCLUDES) \ - $(PCIACCESS_CFLAGS) - -OBJECTS = $(C_SOURCES:.c=.o) \ - $(MINIGLX_SOURCES:.c=.o) \ - $(ASM_SOURCES:.S=.o) -endif - ### Include directories SHARED_INCLUDES = \ @@ -74,18 +56,19 @@ SHARED_INCLUDES = \ $(CC) -c $(INCLUDES) $(CFLAGS) $(DRIVER_DEFINES) $< -o $@ .S.o: - $(CC) -c $(INCLUDES) $(CFLAGS) $(DRIVER_DEFINES) $< -o $@ + $(CC) -c $(INCLUDES) $(CFLAGS) $(DRIVER_DEFINES) $< -o $@ ##### TARGETS ##### default: depend symlinks $(TOP)/$(LIB_DIR)/gallium/$(LIBNAME) -$(LIBNAME): $(OBJECTS) $(MESA_MODULES) $(PIPE_DRIVERS) $(WINOBJ) Makefile $(TOP)/src/mesa/drivers/dri/Makefile.template +$(LIBNAME): $(OBJECTS) $(MESA_MODULES) $(PIPE_DRIVERS) Makefile \ + $(TOP)/src/mesa/drivers/dri/Makefile.template $(MKLIB) -o $@ -noprefix -linker '$(CC)' -ldflags '$(LDFLAGS)' \ $(OBJECTS) $(PIPE_DRIVERS) \ -Wl,--start-group $(MESA_MODULES) -Wl,--end-group \ - $(WINOBJ) $(DRI_LIB_DEPS) $(DRIVER_EXTRAS) + $(DRI_LIB_DEPS) $(DRIVER_EXTRAS) $(TOP)/$(LIB_DIR)/gallium: mkdir -p $@ diff --git a/src/gallium/winsys/drm/radeon/core/radeon_buffer.c b/src/gallium/winsys/drm/radeon/core/radeon_buffer.c index 5cbf0dd2c54..e5c69199330 100644 --- a/src/gallium/winsys/drm/radeon/core/radeon_buffer.c +++ b/src/gallium/winsys/drm/radeon/core/radeon_buffer.c @@ -31,11 +31,13 @@ */ #include "radeon_buffer.h" +#include "radeon_drm.h" -#include "radeon_bo_gem.h" -#include "r300_context.h" #include "util/u_format.h" #include "util/u_math.h" +#include "util/u_memory.h" + +#include "radeon_bo_gem.h" #include <X11/Xutil.h> struct radeon_vl_context diff --git a/src/gallium/winsys/drm/radeon/core/radeon_buffer.h b/src/gallium/winsys/drm/radeon/core/radeon_buffer.h index f1c8fc2a3b1..f776e2d9008 100644 --- a/src/gallium/winsys/drm/radeon/core/radeon_buffer.h +++ b/src/gallium/winsys/drm/radeon/core/radeon_buffer.h @@ -30,21 +30,11 @@ #ifndef RADEON_BUFFER_H #define RADEON_BUFFER_H -#include <stdio.h> - -#include "util/u_simple_screen.h" -#include "pipe/p_defines.h" -#include "util/u_inlines.h" - #include "pipebuffer/pb_buffer.h" -#include "util/u_memory.h" - #include "radeon_bo.h" #include "radeon_cs.h" -#include "radeon_drm.h" - #include "radeon_winsys.h" struct radeon_pipe_buffer { diff --git a/src/gallium/winsys/drm/radeon/core/radeon_drm.c b/src/gallium/winsys/drm/radeon/core/radeon_drm.c index 0c0e118ba3a..e817a26da6d 100644 --- a/src/gallium/winsys/drm/radeon/core/radeon_drm.c +++ b/src/gallium/winsys/drm/radeon/core/radeon_drm.c @@ -30,6 +30,16 @@ */ #include "radeon_drm.h" +#include "radeon_r300.h" +#include "radeon_buffer.h" + +#include "r300_winsys.h" +#include "trace/tr_drm.h" + +#include "util/u_memory.h" + +#include "xf86drm.h" +#include <sys/ioctl.h> /* Helper function to do the ioctls needed for setup and init. */ static void do_ioctls(int fd, struct radeon_winsys* winsys) diff --git a/src/gallium/winsys/drm/radeon/core/radeon_drm.h b/src/gallium/winsys/drm/radeon/core/radeon_drm.h index 8d74cbafc2f..f62a9b80485 100644 --- a/src/gallium/winsys/drm/radeon/core/radeon_drm.h +++ b/src/gallium/winsys/drm/radeon/core/radeon_drm.h @@ -30,23 +30,8 @@ #ifndef RADEON_DRM_H #define RADEON_DRM_H -#include <sys/ioctl.h> - -#include "xf86drm.h" - -#include "pipe/p_screen.h" - -#include "trace/tr_drm.h" -#include "util/u_debug.h" -#include "util/u_memory.h" - #include "state_tracker/drm_api.h" -#include "radeon_buffer.h" -#include "radeon_r300.h" - -/* XXX */ -#include "r300_screen.h" struct pipe_screen* radeon_create_screen(struct drm_api* api, int drmFB, diff --git a/src/gallium/winsys/drm/radeon/core/radeon_r300.c b/src/gallium/winsys/drm/radeon/core/radeon_r300.c index d759beaba13..122bd213543 100644 --- a/src/gallium/winsys/drm/radeon/core/radeon_r300.c +++ b/src/gallium/winsys/drm/radeon/core/radeon_r300.c @@ -21,6 +21,9 @@ * USE OR OTHER DEALINGS IN THE SOFTWARE. */ #include "radeon_r300.h" +#include "radeon_buffer.h" + +#include "radeon_cs_gem.h" static void radeon_set_flush_cb(struct radeon_winsys *winsys, void (*flush_cb)(void *), diff --git a/src/gallium/winsys/drm/radeon/core/radeon_r300.h b/src/gallium/winsys/drm/radeon/core/radeon_r300.h index cfbdb302661..e655dc32c85 100644 --- a/src/gallium/winsys/drm/radeon/core/radeon_r300.h +++ b/src/gallium/winsys/drm/radeon/core/radeon_r300.h @@ -23,16 +23,7 @@ #ifndef RADEON_R300_H #define RADEON_R300_H -/* XXX WTF is this! I shouldn't have to include those first three! FUCK! */ -#include <stdint.h> -#include <stdlib.h> -#include "drm.h" -#include "radeon_drm.h" -#include "radeon_cs_gem.h" - -#include "r300_winsys.h" - -#include "radeon_buffer.h" +#include "radeon_winsys.h" void radeon_setup_winsys(int fd, struct radeon_winsys* winsys); diff --git a/src/gallium/winsys/drm/radeon/dri/Makefile b/src/gallium/winsys/drm/radeon/dri/Makefile index eaa34180321..d75f7dd6da7 100644 --- a/src/gallium/winsys/drm/radeon/dri/Makefile +++ b/src/gallium/winsys/drm/radeon/dri/Makefile @@ -4,8 +4,6 @@ include $(TOP)/configs/current LIBNAME = radeong_dri.so -MINIGLX_SOURCES = - PIPE_DRIVERS = \ $(TOP)/src/gallium/state_trackers/dri/libdridrm.a \ $(TOP)/src/gallium/winsys/drm/radeon/core/libradeonwinsys.a \ diff --git a/src/gallium/winsys/drm/vmware/core/vmw_screen_dri.c b/src/gallium/winsys/drm/vmware/core/vmw_screen_dri.c index 1dcbc419dbb..a7c6e88b9eb 100644 --- a/src/gallium/winsys/drm/vmware/core/vmw_screen_dri.c +++ b/src/gallium/winsys/drm/vmware/core/vmw_screen_dri.c @@ -49,7 +49,7 @@ static struct dri1_api_version ddx_required = { 0, 1, 0 }; static struct dri1_api_version ddx_compat = { 0, 0, 0 }; static struct dri1_api_version dri_required = { 4, 0, 0 }; static struct dri1_api_version dri_compat = { 4, 0, 0 }; -static struct dri1_api_version drm_required = { 0, 1, 0 }; +static struct dri1_api_version drm_required = { 1, 0, 0 }; static struct dri1_api_version drm_compat = { 1, 0, 0 }; static struct dri1_api_version drm_scanout = { 0, 9, 0 }; diff --git a/src/gallium/winsys/drm/vmware/core/vmwgfx_drm.h b/src/gallium/winsys/drm/vmware/core/vmwgfx_drm.h index 1457966db81..47914bdb711 100644 --- a/src/gallium/winsys/drm/vmware/core/vmwgfx_drm.h +++ b/src/gallium/winsys/drm/vmware/core/vmwgfx_drm.h @@ -260,15 +260,23 @@ union drm_vmw_surface_reference_arg { * to the nearest kernel tick. * @fence_rep: User-space address of a struct drm_vmw_fence_rep cast to an * uint64_t. + * @version: Allows expanding the execbuf ioctl parameters without breaking + * backwards compatibility, since user-space will always tell the kernel + * which version it uses. + * @flags: Execbuf flags. None currently. * * Argument to the DRM_VMW_EXECBUF Ioctl. */ +#define DRM_VMW_EXECBUF_VERSION 0 + struct drm_vmw_execbuf_arg { uint64_t commands; uint32_t command_size; uint32_t throttle_us; uint64_t fence_rep; + uint32_t version; + uint32_t flags; }; /** diff --git a/src/gallium/winsys/drm/vmware/xorg/vmw_driver.h b/src/gallium/winsys/drm/vmware/xorg/vmw_driver.h index 3efe851a4be..ba754b51e47 100644 --- a/src/gallium/winsys/drm/vmware/xorg/vmw_driver.h +++ b/src/gallium/winsys/drm/vmware/xorg/vmw_driver.h @@ -40,8 +40,11 @@ struct vmw_dma_buffer; -struct vmw_driver +struct vmw_customizer { + CustomizerRec base; + ScrnInfoPtr pScrn; + int fd; void *cursor_priv; @@ -50,11 +53,10 @@ struct vmw_driver void *video_priv; }; -static INLINE struct vmw_driver * -vmw_driver(ScrnInfoPtr pScrn) +static INLINE struct vmw_customizer * +vmw_customizer(CustomizerPtr cust) { - modesettingPtr ms = modesettingPTR(pScrn); - return ms ? (struct vmw_driver *)ms->winsys_priv : NULL; + return cust ? (struct vmw_customizer *) cust : NULL; } @@ -62,40 +64,40 @@ vmw_driver(ScrnInfoPtr pScrn) * vmw_video.c */ -Bool vmw_video_init(ScrnInfoPtr pScrn, struct vmw_driver *vmw); +Bool vmw_video_init(struct vmw_customizer *vmw); -Bool vmw_video_close(ScrnInfoPtr pScrn, struct vmw_driver *vmw); +Bool vmw_video_close(struct vmw_customizer *vmw); -void vmw_video_stop_all(ScrnInfoPtr pScrn, struct vmw_driver *vmw); +void vmw_video_stop_all(struct vmw_customizer *vmw); /*********************************************************************** * vmw_ioctl.c */ -int vmw_ioctl_cursor_bypass(struct vmw_driver *vmw, int xhot, int yhot); +int vmw_ioctl_cursor_bypass(struct vmw_customizer *vmw, int xhot, int yhot); -struct vmw_dma_buffer * vmw_ioctl_buffer_create(struct vmw_driver *vmw, +struct vmw_dma_buffer * vmw_ioctl_buffer_create(struct vmw_customizer *vmw, uint32_t size, unsigned *handle); -void * vmw_ioctl_buffer_map(struct vmw_driver *vmw, +void * vmw_ioctl_buffer_map(struct vmw_customizer *vmw, struct vmw_dma_buffer *buf); -void vmw_ioctl_buffer_unmap(struct vmw_driver *vmw, +void vmw_ioctl_buffer_unmap(struct vmw_customizer *vmw, struct vmw_dma_buffer *buf); -void vmw_ioctl_buffer_destroy(struct vmw_driver *vmw, +void vmw_ioctl_buffer_destroy(struct vmw_customizer *vmw, struct vmw_dma_buffer *buf); -int vmw_ioctl_supports_streams(struct vmw_driver *vmw); +int vmw_ioctl_supports_streams(struct vmw_customizer *vmw); -int vmw_ioctl_num_streams(struct vmw_driver *vmw, +int vmw_ioctl_num_streams(struct vmw_customizer *vmw, uint32_t *ntot, uint32_t *nfree); -int vmw_ioctl_unref_stream(struct vmw_driver *vmw, uint32_t stream_id); +int vmw_ioctl_unref_stream(struct vmw_customizer *vmw, uint32_t stream_id); -int vmw_ioctl_claim_stream(struct vmw_driver *vmw, uint32_t *out); +int vmw_ioctl_claim_stream(struct vmw_customizer *vmw, uint32_t *out); #endif diff --git a/src/gallium/winsys/drm/vmware/xorg/vmw_ioctl.c b/src/gallium/winsys/drm/vmware/xorg/vmw_ioctl.c index ab2b5fadc49..521578ab35d 100644 --- a/src/gallium/winsys/drm/vmware/xorg/vmw_ioctl.c +++ b/src/gallium/winsys/drm/vmware/xorg/vmw_ioctl.c @@ -57,7 +57,7 @@ struct vmw_dma_buffer }; static int -vmw_ioctl_get_param(struct vmw_driver *vmw, uint32_t param, uint64_t *out) +vmw_ioctl_get_param(struct vmw_customizer *vmw, uint32_t param, uint64_t *out) { struct drm_vmw_getparam_arg gp_arg; int ret; @@ -75,7 +75,7 @@ vmw_ioctl_get_param(struct vmw_driver *vmw, uint32_t param, uint64_t *out) } int -vmw_ioctl_supports_streams(struct vmw_driver *vmw) +vmw_ioctl_supports_streams(struct vmw_customizer *vmw) { uint64_t value; int ret; @@ -88,7 +88,7 @@ vmw_ioctl_supports_streams(struct vmw_driver *vmw) } int -vmw_ioctl_num_streams(struct vmw_driver *vmw, +vmw_ioctl_num_streams(struct vmw_customizer *vmw, uint32_t *ntot, uint32_t *nfree) { uint64_t v1, v2; @@ -109,7 +109,7 @@ vmw_ioctl_num_streams(struct vmw_driver *vmw, } int -vmw_ioctl_claim_stream(struct vmw_driver *vmw, uint32_t *out) +vmw_ioctl_claim_stream(struct vmw_customizer *vmw, uint32_t *out) { struct drm_vmw_stream_arg s_arg; int ret; @@ -125,7 +125,7 @@ vmw_ioctl_claim_stream(struct vmw_driver *vmw, uint32_t *out) } int -vmw_ioctl_unref_stream(struct vmw_driver *vmw, uint32_t stream_id) +vmw_ioctl_unref_stream(struct vmw_customizer *vmw, uint32_t stream_id) { struct drm_vmw_stream_arg s_arg; int ret; @@ -140,7 +140,7 @@ vmw_ioctl_unref_stream(struct vmw_driver *vmw, uint32_t stream_id) } int -vmw_ioctl_cursor_bypass(struct vmw_driver *vmw, int xhot, int yhot) +vmw_ioctl_cursor_bypass(struct vmw_customizer *vmw, int xhot, int yhot) { struct drm_vmw_cursor_bypass_arg arg; int ret; @@ -157,7 +157,7 @@ vmw_ioctl_cursor_bypass(struct vmw_driver *vmw, int xhot, int yhot) } struct vmw_dma_buffer * -vmw_ioctl_buffer_create(struct vmw_driver *vmw, uint32_t size, unsigned *handle) +vmw_ioctl_buffer_create(struct vmw_customizer *vmw, uint32_t size, unsigned *handle) { struct vmw_dma_buffer *buf; union drm_vmw_alloc_dmabuf_arg arg; @@ -198,7 +198,7 @@ err: } void -vmw_ioctl_buffer_destroy(struct vmw_driver *vmw, struct vmw_dma_buffer *buf) +vmw_ioctl_buffer_destroy(struct vmw_customizer *vmw, struct vmw_dma_buffer *buf) { struct drm_vmw_unref_dmabuf_arg arg; @@ -215,7 +215,7 @@ vmw_ioctl_buffer_destroy(struct vmw_driver *vmw, struct vmw_dma_buffer *buf) } void * -vmw_ioctl_buffer_map(struct vmw_driver *vmw, struct vmw_dma_buffer *buf) +vmw_ioctl_buffer_map(struct vmw_customizer *vmw, struct vmw_dma_buffer *buf) { void *map; @@ -236,7 +236,7 @@ vmw_ioctl_buffer_map(struct vmw_driver *vmw, struct vmw_dma_buffer *buf) } void -vmw_ioctl_buffer_unmap(struct vmw_driver *vmw, struct vmw_dma_buffer *buf) +vmw_ioctl_buffer_unmap(struct vmw_customizer *vmw, struct vmw_dma_buffer *buf) { --buf->map_count; } diff --git a/src/gallium/winsys/drm/vmware/xorg/vmw_screen.c b/src/gallium/winsys/drm/vmware/xorg/vmw_screen.c index 7c9757cce95..f43f91e5c0d 100644 --- a/src/gallium/winsys/drm/vmware/xorg/vmw_screen.c +++ b/src/gallium/winsys/drm/vmware/xorg/vmw_screen.c @@ -41,7 +41,8 @@ xf86CrtcFuncsRec vmw_screen_crtc_funcs; static void vmw_screen_cursor_load_argb(xf86CrtcPtr crtc, CARD32 *image) { - struct vmw_driver *vmw = modesettingPTR(crtc->scrn)->winsys_priv; + struct vmw_customizer *vmw = + vmw_customizer(xorg_customizer(crtc->scrn)); xf86CrtcConfigPtr config = XF86_CRTC_CONFIG_PTR(crtc->scrn); xf86CrtcFuncsPtr funcs = vmw->cursor_priv; CursorPtr c = config->cursor; @@ -53,8 +54,9 @@ vmw_screen_cursor_load_argb(xf86CrtcPtr crtc, CARD32 *image) } static void -vmw_screen_cursor_init(ScrnInfoPtr pScrn, struct vmw_driver *vmw) +vmw_screen_cursor_init(struct vmw_customizer *vmw) { + ScrnInfoPtr pScrn = vmw->pScrn; xf86CrtcConfigPtr config = XF86_CRTC_CONFIG_PTR(pScrn); int i; @@ -70,9 +72,9 @@ vmw_screen_cursor_init(ScrnInfoPtr pScrn, struct vmw_driver *vmw) } static void -vmw_screen_cursor_close(ScrnInfoPtr pScrn, struct vmw_driver *vmw) +vmw_screen_cursor_close(struct vmw_customizer *vmw) { - xf86CrtcConfigPtr config = XF86_CRTC_CONFIG_PTR(pScrn); + xf86CrtcConfigPtr config = XF86_CRTC_CONFIG_PTR(vmw->pScrn); int i; vmw_ioctl_cursor_bypass(vmw, 0, 0); @@ -82,50 +84,39 @@ vmw_screen_cursor_close(ScrnInfoPtr pScrn, struct vmw_driver *vmw) } static Bool -vmw_screen_init(ScrnInfoPtr pScrn) +vmw_screen_init(CustomizerPtr cust, int fd) { - modesettingPtr ms = modesettingPTR(pScrn); - struct vmw_driver *vmw; - - vmw = xnfcalloc(sizeof(*vmw), 1); - if (!vmw) - return FALSE; + struct vmw_customizer *vmw = vmw_customizer(cust); - vmw->fd = ms->fd; - ms->winsys_priv = vmw; - - vmw_screen_cursor_init(pScrn, vmw); + vmw->fd = fd; + vmw_screen_cursor_init(vmw); /* if gallium is used then we don't need to do anything more. */ - if (ms->screen) + if (xorg_has_gallium(vmw->pScrn)) return TRUE; - vmw_video_init(pScrn, vmw); + vmw_video_init(vmw); return TRUE; } static Bool -vmw_screen_close(ScrnInfoPtr pScrn) +vmw_screen_close(CustomizerPtr cust) { - modesettingPtr ms = modesettingPTR(pScrn); - struct vmw_driver *vmw = vmw_driver(pScrn); + struct vmw_customizer *vmw = vmw_customizer(cust); if (!vmw) return TRUE; - vmw_screen_cursor_close(pScrn, vmw); - - vmw_video_close(pScrn, vmw); + vmw_screen_cursor_close(vmw); - ms->winsys_priv = NULL; - xfree(vmw); + vmw_video_close(vmw); return TRUE; } static Bool -vmw_screen_enter_vt(ScrnInfoPtr pScrn) +vmw_screen_enter_vt(CustomizerPtr cust) { debug_printf("%s: enter\n", __func__); @@ -133,13 +124,13 @@ vmw_screen_enter_vt(ScrnInfoPtr pScrn) } static Bool -vmw_screen_leave_vt(ScrnInfoPtr pScrn) +vmw_screen_leave_vt(CustomizerPtr cust) { - struct vmw_driver *vmw = vmw_driver(pScrn); + struct vmw_customizer *vmw = vmw_customizer(cust); debug_printf("%s: enter\n", __func__); - vmw_video_stop_all(pScrn, vmw); + vmw_video_stop_all(vmw); return TRUE; } @@ -153,18 +144,27 @@ static Bool (*vmw_screen_pre_init_saved)(ScrnInfoPtr pScrn, int flags) = NULL; static Bool vmw_screen_pre_init(ScrnInfoPtr pScrn, int flags) { - modesettingPtr ms; + struct vmw_customizer *vmw; + CustomizerPtr cust; + + vmw = xnfcalloc(1, sizeof(*vmw)); + if (!vmw) + return FALSE; + + cust = &vmw->base; + + cust->winsys_screen_init = vmw_screen_init; + cust->winsys_screen_close = vmw_screen_close; + cust->winsys_enter_vt = vmw_screen_enter_vt; + cust->winsys_leave_vt = vmw_screen_leave_vt; + vmw->pScrn = pScrn; + + pScrn->driverPrivate = cust; pScrn->PreInit = vmw_screen_pre_init_saved; if (!pScrn->PreInit(pScrn, flags)) return FALSE; - ms = modesettingPTR(pScrn); - ms->winsys_screen_init = vmw_screen_init; - ms->winsys_screen_close = vmw_screen_close; - ms->winsys_enter_vt = vmw_screen_enter_vt; - ms->winsys_leave_vt = vmw_screen_leave_vt; - return TRUE; } diff --git a/src/gallium/winsys/drm/vmware/xorg/vmw_video.c b/src/gallium/winsys/drm/vmware/xorg/vmw_video.c index ff3b992d078..de28f06a475 100644 --- a/src/gallium/winsys/drm/vmware/xorg/vmw_video.c +++ b/src/gallium/winsys/drm/vmware/xorg/vmw_video.c @@ -226,7 +226,7 @@ static void vmw_xv_query_best_size(ScrnInfoPtr pScrn, Bool motion, /* * Local functions. */ -static XF86VideoAdaptorPtr vmw_video_init_adaptor(ScrnInfoPtr pScrn, struct vmw_driver *vmw); +static XF86VideoAdaptorPtr vmw_video_init_adaptor(ScrnInfoPtr pScrn, struct vmw_customizer *vmw); static int vmw_video_port_init(ScrnInfoPtr pScrn, struct vmw_video_port *port, @@ -243,9 +243,9 @@ static int vmw_video_port_play(ScrnInfoPtr pScrn, struct vmw_video_port *port, short height, RegionPtr clipBoxes); static void vmw_video_port_cleanup(ScrnInfoPtr pScrn, struct vmw_video_port *port); -static int vmw_video_buffer_alloc(struct vmw_driver *vmw, int size, +static int vmw_video_buffer_alloc(struct vmw_customizer *vmw, int size, struct vmw_video_buffer *out); -static int vmw_video_buffer_free(struct vmw_driver *vmw, +static int vmw_video_buffer_free(struct vmw_customizer *vmw, struct vmw_video_buffer *out); @@ -267,8 +267,9 @@ static int vmw_video_buffer_free(struct vmw_driver *vmw, */ Bool -vmw_video_init(ScrnInfoPtr pScrn, struct vmw_driver *vmw) +vmw_video_init(struct vmw_customizer *vmw) { + ScrnInfoPtr pScrn = vmw->pScrn; ScreenPtr pScreen = pScrn->pScreen; XF86VideoAdaptorPtr *overlayAdaptors, *newAdaptors = NULL; XF86VideoAdaptorPtr newAdaptor = NULL; @@ -345,8 +346,9 @@ vmw_video_init(ScrnInfoPtr pScrn, struct vmw_driver *vmw) */ Bool -vmw_video_close(ScrnInfoPtr pScrn, struct vmw_driver *vmw) +vmw_video_close(struct vmw_customizer *vmw) { + ScrnInfoPtr pScrn = vmw->pScrn; struct vmw_video_private *video; int i; @@ -387,8 +389,9 @@ vmw_video_close(ScrnInfoPtr pScrn, struct vmw_driver *vmw) *----------------------------------------------------------------------------- */ -void vmw_video_stop_all(ScrnInfoPtr pScrn, struct vmw_driver *vmw) +void vmw_video_stop_all(struct vmw_customizer *vmw) { + ScrnInfoPtr pScrn = vmw->pScrn; struct vmw_video_private *video = vmw->video_priv; int i; @@ -421,7 +424,7 @@ void vmw_video_stop_all(ScrnInfoPtr pScrn, struct vmw_driver *vmw) */ static XF86VideoAdaptorPtr -vmw_video_init_adaptor(ScrnInfoPtr pScrn, struct vmw_driver *vmw) +vmw_video_init_adaptor(ScrnInfoPtr pScrn, struct vmw_customizer *vmw) { XF86VideoAdaptorPtr adaptor; struct vmw_video_private *video; @@ -515,7 +518,7 @@ vmw_video_port_init(ScrnInfoPtr pScrn, struct vmw_video_port *port, unsigned char *buf, short width, short height, RegionPtr clipBoxes) { - struct vmw_driver *vmw = vmw_driver(pScrn); + struct vmw_customizer *vmw = vmw_customizer(xorg_customizer(pScrn)); unsigned short w, h; int i, ret; @@ -583,7 +586,7 @@ vmw_video_port_play(ScrnInfoPtr pScrn, struct vmw_video_port *port, unsigned char *buf, short width, short height, RegionPtr clipBoxes) { - struct vmw_driver *vmw = vmw_driver(pScrn); + struct vmw_customizer *vmw = vmw_customizer(xorg_customizer(pScrn)); struct drm_vmw_control_stream_arg arg; unsigned short w, h; int size; @@ -675,7 +678,7 @@ vmw_video_port_play(ScrnInfoPtr pScrn, struct vmw_video_port *port, static void vmw_video_port_cleanup(ScrnInfoPtr pScrn, struct vmw_video_port *port) { - struct vmw_driver *vmw = vmw_driver(pScrn); + struct vmw_customizer *vmw = vmw_customizer(xorg_customizer(pScrn)); uint32 id, colorKey, flags; Bool isAutoPaintColorkey; int i; @@ -721,7 +724,7 @@ vmw_video_port_cleanup(ScrnInfoPtr pScrn, struct vmw_video_port *port) */ static int -vmw_video_buffer_alloc(struct vmw_driver *vmw, int size, +vmw_video_buffer_alloc(struct vmw_customizer *vmw, int size, struct vmw_video_buffer *out) { out->buf = vmw_ioctl_buffer_create(vmw, size, &out->handle); @@ -764,7 +767,7 @@ vmw_video_buffer_alloc(struct vmw_driver *vmw, int size, */ static int -vmw_video_buffer_free(struct vmw_driver *vmw, +vmw_video_buffer_free(struct vmw_customizer *vmw, struct vmw_video_buffer *out) { if (out->size == 0) @@ -814,7 +817,7 @@ vmw_xv_put_image(ScrnInfoPtr pScrn, short src_x, short src_y, Bool sync, RegionPtr clipBoxes, pointer data, DrawablePtr dst) { - struct vmw_driver *vmw = vmw_driver(pScrn); + struct vmw_customizer *vmw = vmw_customizer(xorg_customizer(pScrn)); struct vmw_video_port *port = data; debug_printf("%s: enter (%u, %u) (%ux%u) (%u, %u) (%ux%u) (%ux%u)\n", __func__, @@ -852,7 +855,7 @@ vmw_xv_put_image(ScrnInfoPtr pScrn, short src_x, short src_y, static void vmw_xv_stop_video(ScrnInfoPtr pScrn, pointer data, Bool cleanup) { - struct vmw_driver *vmw = vmw_driver(pScrn); + struct vmw_customizer *vmw = vmw_customizer(xorg_customizer(pScrn)); struct vmw_video_port *port = data; struct drm_vmw_control_stream_arg arg; int ret; diff --git a/src/gallium/winsys/drm/vmware/xorg/vmw_xorg.c b/src/gallium/winsys/drm/vmware/xorg/vmw_xorg.c index cd273d091fe..87aad25b24f 100644 --- a/src/gallium/winsys/drm/vmware/xorg/vmw_xorg.c +++ b/src/gallium/winsys/drm/vmware/xorg/vmw_xorg.c @@ -33,12 +33,50 @@ #include "vmw_hook.h" + +/* + * Defines and modinfo + */ + +#define VMWGFX_DRIVER_NAME "vmwgfx" + +#define VMW_STRING_INNER(s) #s +#define VMW_STRING(str) VMW_STRING_INNER(str) + +#define VMWGFX_VERSION_MAJOR 11 +#define VMWGFX_VERSION_MINOR 0 +#define VMWGFX_VERSION_PATCH 0 +#define VMWGFX_VERSION_STRING_MAJOR VMW_STRING(VMWGFX_VERSION_MAJOR) +#define VMWGFX_VERSION_STRING_MINOR VMW_STRING(VMWGFX_VERSION_MINOR) +#define VMWGFX_VERSION_STRING_PATCH VMW_STRING(VMWGFX_VERSION_PATCH) + +#define VMWGFX_DRIVER_VERSION \ + (VMWGFX_VERSION_MAJOR * 65536 + VMWGFX_VERSION_MINOR * 256 + VMWGFX_VERSION_PATCH) +#define VMWGFX_DRIVER_VERSION_STRING \ + VMWGFX_VERSION_STRING_MAJOR "." VMWGFX_VERSION_STRING_MINOR \ + "." VMWGFX_VERSION_STRING_PATCH + +/* + * Standard four digit version string expected by VMware Tools installer. + * As the driver's version is only {major, minor, patchlevel}, simply append an + * extra zero for the fourth digit. + */ +#ifdef __GNUC__ +_X_EXPORT const char vmwgfx_drv_modinfo[] __attribute__((section(".modinfo"),unused)) = + "version=" VMWGFX_DRIVER_VERSION_STRING ".0"; +#endif + static void vmw_xorg_identify(int flags); _X_EXPORT Bool vmw_xorg_pci_probe(DriverPtr driver, int entity_num, struct pci_device *device, intptr_t match_data); + +/* + * Tables + */ + static const struct pci_id_match vmw_xorg_device_match[] = { {0x15ad, PCI_MATCH_ANY, PCI_MATCH_ANY, PCI_MATCH_ANY, 0, 0, 0}, {0, 0, 0, 0, 0, 0, 0}, @@ -55,12 +93,12 @@ static PciChipsets vmw_xorg_pci_devices[] = { }; static XF86ModuleVersionInfo vmw_xorg_version = { - "vmwgfx", + VMWGFX_DRIVER_NAME, MODULEVENDORSTRING, MODINFOSTRING1, MODINFOSTRING2, XORG_VERSION_CURRENT, - 0, 1, 0, /* major, minor, patch */ + VMWGFX_VERSION_MAJOR, VMWGFX_VERSION_MINOR, VMWGFX_VERSION_PATCH, ABI_CLASS_VIDEODRV, ABI_VIDEODRV_VERSION, MOD_CLASS_VIDEODRV, @@ -73,7 +111,7 @@ static XF86ModuleVersionInfo vmw_xorg_version = { _X_EXPORT DriverRec vmwgfx = { 1, - "vmwgfx", + VMWGFX_DRIVER_NAME, vmw_xorg_identify, NULL, xorg_tracker_available_options, @@ -92,6 +130,7 @@ _X_EXPORT XF86ModuleData vmwgfxModuleData = { NULL }; + /* * Xorg driver functions */ diff --git a/src/gallium/winsys/xlib/Makefile b/src/gallium/winsys/xlib/Makefile index 9482e8f9b11..824c666ae30 100644 --- a/src/gallium/winsys/xlib/Makefile +++ b/src/gallium/winsys/xlib/Makefile @@ -50,10 +50,10 @@ LIBS = \ .SUFFIXES : .cpp .c.o: - $(CC) -c $(INCLUDE_DIRS) $(DEFINES) $(CFLAGS) $< -o $@ + $(CC) -c $(INCLUDE_DIRS) $(CFLAGS) $< -o $@ .cpp.o: - $(CXX) -c $(INCLUDE_DIRS) $(DEFINES) $(CXXFLAGS) $< -o $@ + $(CXX) -c $(INCLUDE_DIRS) $(CXXFLAGS) $< -o $@ diff --git a/src/gallium/winsys/xlib/SConscript b/src/gallium/winsys/xlib/SConscript index a4dabb7804c..8c9d318af2b 100644 --- a/src/gallium/winsys/xlib/SConscript +++ b/src/gallium/winsys/xlib/SConscript @@ -14,11 +14,7 @@ if env['dri']: print 'warning: DRI enabled: skipping build of xlib libGL.so' Return() -if 'trace' not in env['drivers']: - print 'warning: trace pipe driver disabled: skipping build of xlib libGL.so' - Return() - -if not set(('softpipe', 'llvmpipe', 'trace')).intersection(env['drivers']): +if not set(('softpipe', 'llvmpipe', 'cell')).intersection(env['drivers']): print 'warning: no supported pipe driver: skipping build of xlib libGL.so' Return() |