diff options
Diffstat (limited to 'src/gallium')
609 files changed, 36578 insertions, 13339 deletions
diff --git a/src/gallium/SConscript b/src/gallium/SConscript index 0efab834f66..2265f1de46c 100644 --- a/src/gallium/SConscript +++ b/src/gallium/SConscript @@ -55,6 +55,7 @@ SConscript('winsys/sw/null/SConscript') SConscript('state_trackers/python/SConscript') if env['platform'] != 'embedded': SConscript('state_trackers/vega/SConscript') + SConscript('state_trackers/egl/SConscript') if env['x11']: SConscript('state_trackers/glx/xlib/SConscript') @@ -66,10 +67,7 @@ if env['platform'] != 'embedded': SConscript('state_trackers/xorg/SConscript') if env['platform'] == 'windows': - SConscript([ - 'state_trackers/egl/SConscript', - 'state_trackers/wgl/SConscript', - ]) + SConscript('state_trackers/wgl/SConscript') # # Winsys @@ -85,6 +83,11 @@ SConscript([ 'targets/graw-null/SConscript', ]) +if env['platform'] != 'embedded': + SConscript([ + 'targets/egl-static/SConscript' + ]) + if env['x11']: SConscript([ 'targets/graw-xlib/SConscript', @@ -95,7 +98,6 @@ if env['platform'] == 'windows': SConscript([ 'targets/graw-gdi/SConscript', 'targets/libgl-gdi/SConscript', - #'egl-gdi/SConscript', ]) if env['dri']: diff --git a/src/gallium/auxiliary/Makefile b/src/gallium/auxiliary/Makefile index 53a0847f032..ff355c47832 100644 --- a/src/gallium/auxiliary/Makefile +++ b/src/gallium/auxiliary/Makefile @@ -128,12 +128,12 @@ C_SOURCES = \ util/u_linkage.c \ util/u_network.c \ util/u_math.c \ - util/u_mempool.c \ util/u_mm.c \ util/u_rect.c \ util/u_ringbuffer.c \ util/u_sampler.c \ util/u_simple_shaders.c \ + util/u_slab.c \ util/u_snprintf.c \ util/u_staging.c \ util/u_surface.c \ @@ -185,7 +185,7 @@ GALLIVM_SOURCES = \ draw/draw_pt_fetch_shade_pipeline_llvm.c GALLIVM_CPP_SOURCES = \ - gallivm/lp_bld_misc.cpp + gallivm/lp_bld_misc.cpp GENERATED_SOURCES = \ indices/u_indices_gen.c \ @@ -203,9 +203,6 @@ CPP_SOURCES += \ endif -LIBRARY_DEFINES += -D__STDC_CONSTANT_MACROS - - include ../Makefile.template diff --git a/src/gallium/auxiliary/SConscript b/src/gallium/auxiliary/SConscript index 75c27dd2420..fca7e5fd117 100644 --- a/src/gallium/auxiliary/SConscript +++ b/src/gallium/auxiliary/SConscript @@ -175,13 +175,13 @@ source = [ 'util/u_linkage.c', 'util/u_network.c', 'util/u_math.c', - 'util/u_mempool.c', 'util/u_mm.c', 'util/u_rect.c', 'util/u_resource.c', 'util/u_ringbuffer.c', 'util/u_sampler.c', 'util/u_simple_shaders.c', + 'util/u_slab.c', 'util/u_snprintf.c', 'util/u_staging.c', 'util/u_surface.c', diff --git a/src/gallium/auxiliary/draw/draw_context.c b/src/gallium/auxiliary/draw/draw_context.c index 39d82f32892..e045313b94f 100644 --- a/src/gallium/auxiliary/draw/draw_context.c +++ b/src/gallium/auxiliary/draw/draw_context.c @@ -35,6 +35,7 @@ #include "util/u_memory.h" #include "util/u_math.h" #include "util/u_cpu_detect.h" +#include "util/u_inlines.h" #include "draw_context.h" #include "draw_vs.h" #include "draw_gs.h" @@ -63,19 +64,32 @@ draw_get_option_use_llvm(void) } #endif -struct draw_context *draw_create( struct pipe_context *pipe ) + + +/** + * Create new draw module context. + */ +struct draw_context * +draw_create(struct pipe_context *pipe) +{ + return draw_create_gallivm(pipe, NULL); +} + + + +/** + * Create new draw module context with gallivm state for LLVM JIT. + */ +struct draw_context * +draw_create_gallivm(struct pipe_context *pipe, struct gallivm_state *gallivm) { struct draw_context *draw = CALLOC_STRUCT( draw_context ); if (draw == NULL) goto fail; #if HAVE_LLVM - if(draw_get_option_use_llvm()) - { - lp_build_init(); - assert(lp_build_engine); - draw->engine = lp_build_engine; - draw->llvm = draw_llvm_create(draw); + if (draw_get_option_use_llvm() && gallivm) { + draw->llvm = draw_llvm_create(draw, gallivm); } #endif @@ -91,6 +105,8 @@ fail: return NULL; } + + boolean draw_init(struct draw_context *draw) { /* @@ -149,6 +165,10 @@ void draw_destroy( struct draw_context *draw ) } } + for (i = 0; i < draw->pt.nr_vertex_buffers; i++) { + pipe_resource_reference(&draw->pt.vertex_buffer[i].buffer, NULL); + } + /* Not so fast -- we're just borrowing this at the moment. * if (draw->render) @@ -292,8 +312,9 @@ draw_set_vertex_buffers(struct draw_context *draw, { assert(count <= PIPE_MAX_ATTRIBS); - memcpy(draw->pt.vertex_buffer, buffers, count * sizeof(buffers[0])); - draw->pt.nr_vertex_buffers = count; + util_copy_vertex_buffers(draw->pt.vertex_buffer, + &draw->pt.nr_vertex_buffers, + buffers, count); } diff --git a/src/gallium/auxiliary/draw/draw_context.h b/src/gallium/auxiliary/draw/draw_context.h index ff4f753604f..a0b217e4d33 100644 --- a/src/gallium/auxiliary/draw/draw_context.h +++ b/src/gallium/auxiliary/draw/draw_context.h @@ -48,10 +48,15 @@ struct draw_vertex_shader; struct draw_geometry_shader; struct draw_fragment_shader; struct tgsi_sampler; +struct gallivm_state; + struct draw_context *draw_create( struct pipe_context *pipe ); +struct draw_context * +draw_create_gallivm(struct pipe_context *pipe, struct gallivm_state *gallivm); + void draw_destroy( struct draw_context *draw ); void draw_flush(struct draw_context *draw); diff --git a/src/gallium/auxiliary/draw/draw_llvm.c b/src/gallium/auxiliary/draw/draw_llvm.c index eb162fb0f62..41269ee869d 100644 --- a/src/gallium/auxiliary/draw/draw_llvm.c +++ b/src/gallium/auxiliary/draw/draw_llvm.c @@ -42,6 +42,7 @@ #include "gallivm/lp_bld_printf.h" #include "gallivm/lp_bld_intr.h" #include "gallivm/lp_bld_init.h" +#include "gallivm/lp_bld_type.h" #include "tgsi/tgsi_exec.h" #include "tgsi/tgsi_dump.h" @@ -49,259 +50,352 @@ #include "util/u_math.h" #include "util/u_pointer.h" #include "util/u_string.h" +#include "util/u_simple_list.h" -#include <llvm-c/Transforms/Scalar.h> #define DEBUG_STORE 0 -/* generates the draw jit function */ + +/** + * This function is called by the gallivm "garbage collector" when + * the LLVM global data structures are freed. We must free all LLVM-related + * data. Specifically, all JIT'd shader variants. + */ +static void +draw_llvm_garbage_collect_callback(void *cb_data) +{ + struct draw_llvm *llvm = (struct draw_llvm *) cb_data; + struct draw_llvm_variant_list_item *li; + + /* free all shader variants */ + li = first_elem(&llvm->vs_variants_list); + while (!at_end(&llvm->vs_variants_list, li)) { + struct draw_llvm_variant_list_item *next = next_elem(li); + draw_llvm_destroy_variant(li->base); + li = next; + } + + /* Null-out these pointers so they get remade next time they're needed. + * See the accessor functions below. + */ + llvm->context_ptr_type = NULL; + llvm->buffer_ptr_type = NULL; + llvm->vb_ptr_type = NULL; + llvm->vertex_header_ptr_type = NULL; +} + + static void draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *var); + static void draw_llvm_generate_elts(struct draw_llvm *llvm, struct draw_llvm_variant *var); -static void -init_globals(struct draw_llvm *llvm) + +/** + * Create LLVM type for struct draw_jit_texture + */ +static LLVMTypeRef +create_jit_texture_type(struct gallivm_state *gallivm) { + LLVMTargetDataRef target = gallivm->target; LLVMTypeRef texture_type; + LLVMTypeRef elem_types[DRAW_JIT_TEXTURE_NUM_FIELDS]; + LLVMTypeRef int32_type = LLVMInt32TypeInContext(gallivm->context); + + elem_types[DRAW_JIT_TEXTURE_WIDTH] = + elem_types[DRAW_JIT_TEXTURE_HEIGHT] = + elem_types[DRAW_JIT_TEXTURE_DEPTH] = + elem_types[DRAW_JIT_TEXTURE_LAST_LEVEL] = int32_type; + elem_types[DRAW_JIT_TEXTURE_ROW_STRIDE] = + elem_types[DRAW_JIT_TEXTURE_IMG_STRIDE] = + LLVMArrayType(int32_type, PIPE_MAX_TEXTURE_LEVELS); + elem_types[DRAW_JIT_TEXTURE_DATA] = + LLVMArrayType(LLVMPointerType(LLVMInt8TypeInContext(gallivm->context), 0), + PIPE_MAX_TEXTURE_LEVELS); + elem_types[DRAW_JIT_TEXTURE_MIN_LOD] = + elem_types[DRAW_JIT_TEXTURE_MAX_LOD] = + elem_types[DRAW_JIT_TEXTURE_LOD_BIAS] = LLVMFloatTypeInContext(gallivm->context); + elem_types[DRAW_JIT_TEXTURE_BORDER_COLOR] = + LLVMArrayType(LLVMFloatTypeInContext(gallivm->context), 4); + + texture_type = LLVMStructTypeInContext(gallivm->context, elem_types, + Elements(elem_types), 0); + + /* Make sure the target's struct layout cache doesn't return + * stale/invalid data. + */ + LLVMInvalidateStructLayout(gallivm->target, texture_type); + + LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, width, + target, texture_type, + DRAW_JIT_TEXTURE_WIDTH); + LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, height, + target, texture_type, + DRAW_JIT_TEXTURE_HEIGHT); + LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, depth, + target, texture_type, + DRAW_JIT_TEXTURE_DEPTH); + LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, last_level, + target, texture_type, + DRAW_JIT_TEXTURE_LAST_LEVEL); + LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, row_stride, + target, texture_type, + DRAW_JIT_TEXTURE_ROW_STRIDE); + LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, img_stride, + target, texture_type, + DRAW_JIT_TEXTURE_IMG_STRIDE); + LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, data, + target, texture_type, + DRAW_JIT_TEXTURE_DATA); + LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, min_lod, + target, texture_type, + DRAW_JIT_TEXTURE_MIN_LOD); + LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, max_lod, + target, texture_type, + DRAW_JIT_TEXTURE_MAX_LOD); + LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, lod_bias, + target, texture_type, + DRAW_JIT_TEXTURE_LOD_BIAS); + LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, border_color, + target, texture_type, + DRAW_JIT_TEXTURE_BORDER_COLOR); + + LP_CHECK_STRUCT_SIZE(struct draw_jit_texture, target, texture_type); + + return texture_type; +} - /* struct draw_jit_texture */ - { - LLVMTypeRef elem_types[DRAW_JIT_TEXTURE_NUM_FIELDS]; - - elem_types[DRAW_JIT_TEXTURE_WIDTH] = LLVMInt32Type(); - elem_types[DRAW_JIT_TEXTURE_HEIGHT] = LLVMInt32Type(); - elem_types[DRAW_JIT_TEXTURE_DEPTH] = LLVMInt32Type(); - elem_types[DRAW_JIT_TEXTURE_LAST_LEVEL] = LLVMInt32Type(); - elem_types[DRAW_JIT_TEXTURE_ROW_STRIDE] = - LLVMArrayType(LLVMInt32Type(), PIPE_MAX_TEXTURE_LEVELS); - elem_types[DRAW_JIT_TEXTURE_IMG_STRIDE] = - LLVMArrayType(LLVMInt32Type(), PIPE_MAX_TEXTURE_LEVELS); - elem_types[DRAW_JIT_TEXTURE_DATA] = - LLVMArrayType(LLVMPointerType(LLVMInt8Type(), 0), - PIPE_MAX_TEXTURE_LEVELS); - elem_types[DRAW_JIT_TEXTURE_MIN_LOD] = LLVMFloatType(); - elem_types[DRAW_JIT_TEXTURE_MAX_LOD] = LLVMFloatType(); - elem_types[DRAW_JIT_TEXTURE_LOD_BIAS] = LLVMFloatType(); - elem_types[DRAW_JIT_TEXTURE_BORDER_COLOR] = - LLVMArrayType(LLVMFloatType(), 4); - - texture_type = LLVMStructType(elem_types, Elements(elem_types), 0); - - LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, width, - llvm->target, texture_type, - DRAW_JIT_TEXTURE_WIDTH); - LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, height, - llvm->target, texture_type, - DRAW_JIT_TEXTURE_HEIGHT); - LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, depth, - llvm->target, texture_type, - DRAW_JIT_TEXTURE_DEPTH); - LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, last_level, - llvm->target, texture_type, - DRAW_JIT_TEXTURE_LAST_LEVEL); - LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, row_stride, - llvm->target, texture_type, - DRAW_JIT_TEXTURE_ROW_STRIDE); - LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, img_stride, - llvm->target, texture_type, - DRAW_JIT_TEXTURE_IMG_STRIDE); - LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, data, - llvm->target, texture_type, - DRAW_JIT_TEXTURE_DATA); - LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, min_lod, - llvm->target, texture_type, - DRAW_JIT_TEXTURE_MIN_LOD); - LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, max_lod, - llvm->target, texture_type, - DRAW_JIT_TEXTURE_MAX_LOD); - LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, lod_bias, - llvm->target, texture_type, - DRAW_JIT_TEXTURE_LOD_BIAS); - LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, border_color, - llvm->target, texture_type, - DRAW_JIT_TEXTURE_BORDER_COLOR); - LP_CHECK_STRUCT_SIZE(struct draw_jit_texture, - llvm->target, texture_type); - - LLVMAddTypeName(llvm->module, "texture", texture_type); - } +/** + * Create LLVM type for struct draw_jit_texture + */ +static LLVMTypeRef +create_jit_context_type(struct gallivm_state *gallivm, + LLVMTypeRef texture_type) +{ + LLVMTargetDataRef target = gallivm->target; + LLVMTypeRef float_type = LLVMFloatTypeInContext(gallivm->context); + LLVMTypeRef elem_types[5]; + LLVMTypeRef context_type; + + elem_types[0] = LLVMPointerType(float_type, 0); /* vs_constants */ + elem_types[1] = LLVMPointerType(float_type, 0); /* gs_constants */ + elem_types[2] = LLVMPointerType(LLVMArrayType(LLVMArrayType(float_type, 4), 12), 0); /* planes */ + elem_types[3] = LLVMPointerType(float_type, 0); /* viewport */ + elem_types[4] = LLVMArrayType(texture_type, + PIPE_MAX_VERTEX_SAMPLERS); /* textures */ + + context_type = LLVMStructTypeInContext(gallivm->context, elem_types, + Elements(elem_types), 0); + + LLVMInvalidateStructLayout(gallivm->target, context_type); + + LP_CHECK_MEMBER_OFFSET(struct draw_jit_context, vs_constants, + target, context_type, 0); + LP_CHECK_MEMBER_OFFSET(struct draw_jit_context, gs_constants, + target, context_type, 1); + LP_CHECK_MEMBER_OFFSET(struct draw_jit_context, planes, + target, context_type, 2); + LP_CHECK_MEMBER_OFFSET(struct draw_jit_context, textures, + target, context_type, + DRAW_JIT_CTX_TEXTURES); + LP_CHECK_STRUCT_SIZE(struct draw_jit_context, + target, context_type); + + return context_type; +} - /* struct draw_jit_context */ - { - LLVMTypeRef elem_types[5]; - LLVMTypeRef context_type; - - elem_types[0] = LLVMPointerType(LLVMFloatType(), 0); /* vs_constants */ - elem_types[1] = LLVMPointerType(LLVMFloatType(), 0); /* gs_constants */ - elem_types[2] = LLVMPointerType(LLVMArrayType(LLVMArrayType(LLVMFloatType(), 4), 12), 0); /* planes */ - elem_types[3] = LLVMPointerType(LLVMFloatType(), 0); /* viewport */ - elem_types[4] = LLVMArrayType(texture_type, - PIPE_MAX_VERTEX_SAMPLERS); /* textures */ - - context_type = LLVMStructType(elem_types, Elements(elem_types), 0); - - LP_CHECK_MEMBER_OFFSET(struct draw_jit_context, vs_constants, - llvm->target, context_type, 0); - LP_CHECK_MEMBER_OFFSET(struct draw_jit_context, gs_constants, - llvm->target, context_type, 1); - LP_CHECK_MEMBER_OFFSET(struct draw_jit_context, planes, - llvm->target, context_type, 2); - LP_CHECK_MEMBER_OFFSET(struct draw_jit_context, textures, - llvm->target, context_type, - DRAW_JIT_CTX_TEXTURES); - LP_CHECK_STRUCT_SIZE(struct draw_jit_context, - llvm->target, context_type); - - LLVMAddTypeName(llvm->module, "draw_jit_context", context_type); - - llvm->context_ptr_type = LLVMPointerType(context_type, 0); - } - { - LLVMTypeRef buffer_ptr = LLVMPointerType(LLVMIntType(8), 0); - llvm->buffer_ptr_type = LLVMPointerType(buffer_ptr, 0); - } - /* struct pipe_vertex_buffer */ - { - LLVMTypeRef elem_types[4]; - LLVMTypeRef vb_type; - elem_types[0] = LLVMInt32Type(); - elem_types[1] = LLVMInt32Type(); - elem_types[2] = LLVMInt32Type(); - elem_types[3] = LLVMPointerType(LLVMOpaqueType(), 0); /* vs_constants */ +/** + * Create LLVM type for struct pipe_vertex_buffer + */ +static LLVMTypeRef +create_jit_vertex_buffer_type(struct gallivm_state *gallivm) +{ + LLVMTargetDataRef target = gallivm->target; + LLVMTypeRef elem_types[4]; + LLVMTypeRef vb_type; - vb_type = LLVMStructType(elem_types, Elements(elem_types), 0); + elem_types[0] = + elem_types[1] = + elem_types[2] = LLVMInt32TypeInContext(gallivm->context); + elem_types[3] = LLVMPointerType(LLVMOpaqueTypeInContext(gallivm->context), 0); /* vs_constants */ - LP_CHECK_MEMBER_OFFSET(struct pipe_vertex_buffer, stride, - llvm->target, vb_type, 0); - LP_CHECK_MEMBER_OFFSET(struct pipe_vertex_buffer, buffer_offset, - llvm->target, vb_type, 2); - LP_CHECK_STRUCT_SIZE(struct pipe_vertex_buffer, - llvm->target, vb_type); + vb_type = LLVMStructTypeInContext(gallivm->context, elem_types, + Elements(elem_types), 0); - LLVMAddTypeName(llvm->module, "pipe_vertex_buffer", vb_type); + LLVMInvalidateStructLayout(gallivm->target, vb_type); - llvm->vb_ptr_type = LLVMPointerType(vb_type, 0); - } + LP_CHECK_MEMBER_OFFSET(struct pipe_vertex_buffer, stride, + target, vb_type, 0); + LP_CHECK_MEMBER_OFFSET(struct pipe_vertex_buffer, buffer_offset, + target, vb_type, 2); + + LP_CHECK_STRUCT_SIZE(struct pipe_vertex_buffer, target, vb_type); + + return vb_type; } + +/** + * Create LLVM type for struct vertex_header; + */ static LLVMTypeRef -create_vertex_header(struct draw_llvm *llvm, int data_elems) +create_jit_vertex_header(struct gallivm_state *gallivm, int data_elems) { - /* struct vertex_header */ + LLVMTargetDataRef target = gallivm->target; LLVMTypeRef elem_types[3]; LLVMTypeRef vertex_header; char struct_name[24]; util_snprintf(struct_name, 23, "vertex_header%d", data_elems); - elem_types[0] = LLVMIntType(32); - elem_types[1] = LLVMArrayType(LLVMFloatType(), 4); + elem_types[0] = LLVMIntTypeInContext(gallivm->context, 32); + elem_types[1] = LLVMArrayType(LLVMFloatTypeInContext(gallivm->context), 4); elem_types[2] = LLVMArrayType(elem_types[1], data_elems); - vertex_header = LLVMStructType(elem_types, Elements(elem_types), 0); + vertex_header = LLVMStructTypeInContext(gallivm->context, elem_types, + Elements(elem_types), 0); + + LLVMInvalidateStructLayout(gallivm->target, vertex_header); /* these are bit-fields and we can't take address of them LP_CHECK_MEMBER_OFFSET(struct vertex_header, clipmask, - llvm->target, vertex_header, + target, vertex_header, DRAW_JIT_VERTEX_CLIPMASK); LP_CHECK_MEMBER_OFFSET(struct vertex_header, edgeflag, - llvm->target, vertex_header, + target, vertex_header, DRAW_JIT_VERTEX_EDGEFLAG); LP_CHECK_MEMBER_OFFSET(struct vertex_header, pad, - llvm->target, vertex_header, + target, vertex_header, DRAW_JIT_VERTEX_PAD); LP_CHECK_MEMBER_OFFSET(struct vertex_header, vertex_id, - llvm->target, vertex_header, + target, vertex_header, DRAW_JIT_VERTEX_VERTEX_ID); */ LP_CHECK_MEMBER_OFFSET(struct vertex_header, clip, - llvm->target, vertex_header, + target, vertex_header, DRAW_JIT_VERTEX_CLIP); LP_CHECK_MEMBER_OFFSET(struct vertex_header, data, - llvm->target, vertex_header, + target, vertex_header, DRAW_JIT_VERTEX_DATA); - LLVMAddTypeName(llvm->module, struct_name, vertex_header); + LLVMAddTypeName(gallivm->module, struct_name, vertex_header); - return LLVMPointerType(vertex_header, 0); + return vertex_header; } -struct draw_llvm * -draw_llvm_create(struct draw_context *draw) + +/** + * Create LLVM types for various structures. + */ +static void +create_jit_types(struct draw_llvm *llvm) { - struct draw_llvm *llvm; + struct gallivm_state *gallivm = llvm->gallivm; + LLVMTypeRef texture_type, context_type, buffer_type, vb_type; - llvm = CALLOC_STRUCT( draw_llvm ); - if (!llvm) - return NULL; + texture_type = create_jit_texture_type(gallivm); + LLVMAddTypeName(gallivm->module, "texture", texture_type); - llvm->draw = draw; - llvm->engine = draw->engine; + context_type = create_jit_context_type(gallivm, texture_type); + LLVMAddTypeName(gallivm->module, "draw_jit_context", context_type); + llvm->context_ptr_type = LLVMPointerType(context_type, 0); - debug_assert(llvm->engine); + buffer_type = LLVMPointerType(LLVMIntTypeInContext(gallivm->context, 8), 0); + LLVMAddTypeName(gallivm->module, "buffer", buffer_type); + llvm->buffer_ptr_type = LLVMPointerType(buffer_type, 0); - llvm->module = LLVMModuleCreateWithName("draw_llvm"); - llvm->provider = LLVMCreateModuleProviderForExistingModule(llvm->module); + vb_type = create_jit_vertex_buffer_type(gallivm); + LLVMAddTypeName(gallivm->module, "pipe_vertex_buffer", vb_type); + llvm->vb_ptr_type = LLVMPointerType(vb_type, 0); +} - LLVMAddModuleProvider(llvm->engine, llvm->provider); - llvm->target = LLVMGetExecutionEngineTargetData(llvm->engine); +static LLVMTypeRef +get_context_ptr_type(struct draw_llvm *llvm) +{ + if (!llvm->context_ptr_type) + create_jit_types(llvm); + return llvm->context_ptr_type; +} - llvm->pass = LLVMCreateFunctionPassManager(llvm->provider); - LLVMAddTargetData(llvm->target, llvm->pass); - if ((gallivm_debug & GALLIVM_DEBUG_NO_OPT) == 0) { - /* These are the passes currently listed in llvm-c/Transforms/Scalar.h, - * but there are more on SVN. */ - /* TODO: Add more passes */ +static LLVMTypeRef +get_buffer_ptr_type(struct draw_llvm *llvm) +{ + if (!llvm->buffer_ptr_type) + create_jit_types(llvm); + return llvm->buffer_ptr_type; +} - LLVMAddCFGSimplificationPass(llvm->pass); - if (HAVE_LLVM >= 0x207 && sizeof(void*) == 4) { - /* For LLVM >= 2.7 and 32-bit build, use this order of passes to - * avoid generating bad code. - * Test with piglit glsl-vs-sqrt-zero test. - */ - LLVMAddConstantPropagationPass(llvm->pass); - LLVMAddPromoteMemoryToRegisterPass(llvm->pass); - } - else { - LLVMAddPromoteMemoryToRegisterPass(llvm->pass); - LLVMAddConstantPropagationPass(llvm->pass); - } +static LLVMTypeRef +get_vb_ptr_type(struct draw_llvm *llvm) +{ + if (!llvm->vb_ptr_type) + create_jit_types(llvm); + return llvm->vb_ptr_type; +} - LLVMAddInstructionCombiningPass(llvm->pass); - LLVMAddGVNPass(llvm->pass); - } else { - /* We need at least this pass to prevent the backends to fail in - * unexpected ways. - */ - LLVMAddPromoteMemoryToRegisterPass(llvm->pass); - } +static LLVMTypeRef +get_vertex_header_ptr_type(struct draw_llvm *llvm) +{ + if (!llvm->vertex_header_ptr_type) + create_jit_types(llvm); + return llvm->vertex_header_ptr_type; +} - init_globals(llvm); + +/** + * Create per-context LLVM info. + */ +struct draw_llvm * +draw_llvm_create(struct draw_context *draw, struct gallivm_state *gallivm) +{ + struct draw_llvm *llvm; + + llvm = CALLOC_STRUCT( draw_llvm ); + if (!llvm) + return NULL; + + lp_build_init(); + + llvm->draw = draw; + llvm->gallivm = gallivm; if (gallivm_debug & GALLIVM_DEBUG_IR) { - LLVMDumpModule(llvm->module); + LLVMDumpModule(llvm->gallivm->module); } llvm->nr_variants = 0; make_empty_list(&llvm->vs_variants_list); + gallivm_register_garbage_collector_callback( + draw_llvm_garbage_collect_callback, llvm); + return llvm; } + +/** + * Free per-context LLVM info. + */ void draw_llvm_destroy(struct draw_llvm *llvm) { - LLVMDisposePassManager(llvm->pass); + gallivm_remove_garbage_collector_callback( + draw_llvm_garbage_collect_callback, llvm); + /* XXX free other draw_llvm data? */ FREE(llvm); } + +/** + * Create LLVM-generated code for a vertex shader. + */ struct draw_llvm_variant * draw_llvm_create_variant(struct draw_llvm *llvm, unsigned num_inputs, @@ -310,6 +404,7 @@ draw_llvm_create_variant(struct draw_llvm *llvm, struct draw_llvm_variant *variant; struct llvm_vertex_shader *shader = llvm_vertex_shader(llvm->draw->vs.vertex_shader); + LLVMTypeRef vertex_header; variant = MALLOC(sizeof *variant + shader->variant_key_size - @@ -321,7 +416,9 @@ draw_llvm_create_variant(struct draw_llvm *llvm, memcpy(&variant->key, key, shader->variant_key_size); - llvm->vertex_header_ptr_type = create_vertex_header(llvm, num_inputs); + vertex_header = create_jit_vertex_header(llvm->gallivm, num_inputs); + + llvm->vertex_header_ptr_type = LLVMPointerType(vertex_header, 0); draw_llvm_generate(llvm, variant); draw_llvm_generate_elts(llvm, variant); @@ -346,7 +443,7 @@ generate_vs(struct draw_llvm *llvm, { const struct tgsi_token *tokens = llvm->draw->vs.vertex_shader->state.tokens; struct lp_type vs_type; - LLVMValueRef consts_ptr = draw_jit_context_vs_constants(builder, context_ptr); + LLVMValueRef consts_ptr = draw_jit_context_vs_constants(llvm->gallivm, context_ptr); struct lp_build_sampler_soa *sampler = 0; memset(&vs_type, 0, sizeof vs_type); @@ -367,7 +464,7 @@ generate_vs(struct draw_llvm *llvm, llvm->draw->num_samplers) sampler = draw_sampler; - lp_build_tgsi_soa(builder, + lp_build_tgsi_soa(llvm->gallivm, tokens, vs_type, NULL /*struct lp_build_mask_context *mask*/, @@ -386,20 +483,20 @@ static void print_vectorf(LLVMBuilderRef builder, { LLVMValueRef val[4]; val[0] = LLVMBuildExtractElement(builder, vec, - LLVMConstInt(LLVMInt32Type(), 0, 0), ""); + lp_build_const_int32(gallivm, 0), ""); val[1] = LLVMBuildExtractElement(builder, vec, - LLVMConstInt(LLVMInt32Type(), 1, 0), ""); + lp_build_const_int32(gallivm, 1), ""); val[2] = LLVMBuildExtractElement(builder, vec, - LLVMConstInt(LLVMInt32Type(), 2, 0), ""); + lp_build_const_int32(gallivm, 2), ""); val[3] = LLVMBuildExtractElement(builder, vec, - LLVMConstInt(LLVMInt32Type(), 3, 0), ""); + lp_build_const_int32(gallivm, 3), ""); lp_build_printf(builder, "vector = [%f, %f, %f, %f]\n", val[0], val[1], val[2], val[3]); } #endif static void -generate_fetch(LLVMBuilderRef builder, +generate_fetch(struct gallivm_state *gallivm, LLVMValueRef vbuffers_ptr, LLVMValueRef *res, struct pipe_vertex_element *velem, @@ -407,19 +504,22 @@ generate_fetch(LLVMBuilderRef builder, LLVMValueRef index, LLVMValueRef instance_id) { - LLVMValueRef indices = LLVMConstInt(LLVMInt64Type(), velem->vertex_buffer_index, 0); + LLVMBuilderRef builder = gallivm->builder; + LLVMValueRef indices = + LLVMConstInt(LLVMInt64TypeInContext(gallivm->context), + velem->vertex_buffer_index, 0); LLVMValueRef vbuffer_ptr = LLVMBuildGEP(builder, vbuffers_ptr, &indices, 1, ""); - LLVMValueRef vb_stride = draw_jit_vbuffer_stride(builder, vbuf); - LLVMValueRef vb_max_index = draw_jit_vbuffer_max_index(builder, vbuf); - LLVMValueRef vb_buffer_offset = draw_jit_vbuffer_offset(builder, vbuf); + LLVMValueRef vb_stride = draw_jit_vbuffer_stride(gallivm, vbuf); + LLVMValueRef vb_max_index = draw_jit_vbuffer_max_index(gallivm, vbuf); + LLVMValueRef vb_buffer_offset = draw_jit_vbuffer_offset(gallivm, vbuf); LLVMValueRef cond; LLVMValueRef stride; if (velem->instance_divisor) { /* array index = instance_id / instance_divisor */ index = LLVMBuildUDiv(builder, instance_id, - LLVMConstInt(LLVMInt32Type(), velem->instance_divisor, 0), + lp_build_const_int32(gallivm, velem->instance_divisor), "instance_divisor"); } @@ -435,23 +535,24 @@ generate_fetch(LLVMBuilderRef builder, vb_buffer_offset, ""); stride = LLVMBuildAdd(builder, stride, - LLVMConstInt(LLVMInt32Type(), velem->src_offset, 0), + lp_build_const_int32(gallivm, velem->src_offset), ""); /*lp_build_printf(builder, "vbuf index = %d, stride is %d\n", indices, stride);*/ vbuffer_ptr = LLVMBuildGEP(builder, vbuffer_ptr, &stride, 1, ""); - *res = draw_llvm_translate_from(builder, vbuffer_ptr, velem->src_format); + *res = draw_llvm_translate_from(gallivm, vbuffer_ptr, velem->src_format); } static LLVMValueRef -aos_to_soa(LLVMBuilderRef builder, +aos_to_soa(struct gallivm_state *gallivm, LLVMValueRef val0, LLVMValueRef val1, LLVMValueRef val2, LLVMValueRef val3, LLVMValueRef channel) { + LLVMBuilderRef builder = gallivm->builder; LLVMValueRef ex, res; ex = LLVMBuildExtractElement(builder, val0, @@ -459,38 +560,39 @@ aos_to_soa(LLVMBuilderRef builder, res = LLVMBuildInsertElement(builder, LLVMConstNull(LLVMTypeOf(val0)), ex, - LLVMConstInt(LLVMInt32Type(), 0, 0), + lp_build_const_int32(gallivm, 0), ""); ex = LLVMBuildExtractElement(builder, val1, channel, ""); res = LLVMBuildInsertElement(builder, res, ex, - LLVMConstInt(LLVMInt32Type(), 1, 0), + lp_build_const_int32(gallivm, 1), ""); ex = LLVMBuildExtractElement(builder, val2, channel, ""); res = LLVMBuildInsertElement(builder, res, ex, - LLVMConstInt(LLVMInt32Type(), 2, 0), + lp_build_const_int32(gallivm, 2), ""); ex = LLVMBuildExtractElement(builder, val3, channel, ""); res = LLVMBuildInsertElement(builder, res, ex, - LLVMConstInt(LLVMInt32Type(), 3, 0), + lp_build_const_int32(gallivm, 3), ""); return res; } static void -soa_to_aos(LLVMBuilderRef builder, +soa_to_aos(struct gallivm_state *gallivm, LLVMValueRef soa[NUM_CHANNELS], LLVMValueRef aos[NUM_CHANNELS]) { + LLVMBuilderRef builder = gallivm->builder; LLVMValueRef comp; int i = 0; @@ -500,29 +602,29 @@ soa_to_aos(LLVMBuilderRef builder, aos[1] = aos[2] = aos[3] = aos[0]; for (i = 0; i < NUM_CHANNELS; ++i) { - LLVMValueRef channel = LLVMConstInt(LLVMInt32Type(), i, 0); + LLVMValueRef channel = lp_build_const_int32(gallivm, i); comp = LLVMBuildExtractElement(builder, soa[i], - LLVMConstInt(LLVMInt32Type(), 0, 0), ""); + lp_build_const_int32(gallivm, 0), ""); aos[0] = LLVMBuildInsertElement(builder, aos[0], comp, channel, ""); comp = LLVMBuildExtractElement(builder, soa[i], - LLVMConstInt(LLVMInt32Type(), 1, 0), ""); + lp_build_const_int32(gallivm, 1), ""); aos[1] = LLVMBuildInsertElement(builder, aos[1], comp, channel, ""); comp = LLVMBuildExtractElement(builder, soa[i], - LLVMConstInt(LLVMInt32Type(), 2, 0), ""); + lp_build_const_int32(gallivm, 2), ""); aos[2] = LLVMBuildInsertElement(builder, aos[2], comp, channel, ""); comp = LLVMBuildExtractElement(builder, soa[i], - LLVMConstInt(LLVMInt32Type(), 3, 0), ""); + lp_build_const_int32(gallivm, 3), ""); aos[3] = LLVMBuildInsertElement(builder, aos[3], comp, channel, ""); } } static void -convert_to_soa(LLVMBuilderRef builder, +convert_to_soa(struct gallivm_state *gallivm, LLVMValueRef (*aos)[NUM_CHANNELS], LLVMValueRef (*soa)[NUM_CHANNELS], int num_attribs) @@ -537,36 +639,37 @@ convert_to_soa(LLVMBuilderRef builder, LLVMValueRef val2 = aos[i][2]; LLVMValueRef val3 = aos[i][3]; - soa[i][0] = aos_to_soa(builder, val0, val1, val2, val3, - LLVMConstInt(LLVMInt32Type(), 0, 0)); - soa[i][1] = aos_to_soa(builder, val0, val1, val2, val3, - LLVMConstInt(LLVMInt32Type(), 1, 0)); - soa[i][2] = aos_to_soa(builder, val0, val1, val2, val3, - LLVMConstInt(LLVMInt32Type(), 2, 0)); - soa[i][3] = aos_to_soa(builder, val0, val1, val2, val3, - LLVMConstInt(LLVMInt32Type(), 3, 0)); + soa[i][0] = aos_to_soa(gallivm, val0, val1, val2, val3, + lp_build_const_int32(gallivm, 0)); + soa[i][1] = aos_to_soa(gallivm, val0, val1, val2, val3, + lp_build_const_int32(gallivm, 1)); + soa[i][2] = aos_to_soa(gallivm, val0, val1, val2, val3, + lp_build_const_int32(gallivm, 2)); + soa[i][3] = aos_to_soa(gallivm, val0, val1, val2, val3, + lp_build_const_int32(gallivm, 3)); } } static void -store_aos(LLVMBuilderRef builder, +store_aos(struct gallivm_state *gallivm, LLVMValueRef io_ptr, LLVMValueRef index, LLVMValueRef value, LLVMValueRef clipmask) { - LLVMValueRef id_ptr = draw_jit_header_id(builder, io_ptr); - LLVMValueRef data_ptr = draw_jit_header_data(builder, io_ptr); + LLVMBuilderRef builder = gallivm->builder; + LLVMValueRef id_ptr = draw_jit_header_id(gallivm, io_ptr); + LLVMValueRef data_ptr = draw_jit_header_data(gallivm, io_ptr); LLVMValueRef indices[3]; LLVMValueRef val, shift; - indices[0] = LLVMConstInt(LLVMInt32Type(), 0, 0); + indices[0] = lp_build_const_int32(gallivm, 0); indices[1] = index; - indices[2] = LLVMConstInt(LLVMInt32Type(), 0, 0); + indices[2] = lp_build_const_int32(gallivm, 0); /* initialize vertex id:16 = 0xffff, pad:3 = 0, edgeflag:1 = 1 */ - val = LLVMConstInt(LLVMInt32Type(), 0xffff1, 0); - shift = LLVMConstInt(LLVMInt32Type(), 12, 0); + val = lp_build_const_int32(gallivm, 0xffff1); + shift = lp_build_const_int32(gallivm, 12); val = LLVMBuildShl(builder, val, shift, ""); /* add clipmask:12 */ val = LLVMBuildOr(builder, val, clipmask, ""); @@ -582,7 +685,7 @@ store_aos(LLVMBuilderRef builder, /*lp_build_printf(builder, " ---- %p storing at %d (%p) ", io_ptr, index, data_ptr); print_vectorf(builder, value);*/ data_ptr = LLVMBuildBitCast(builder, data_ptr, - LLVMPointerType(LLVMArrayType(LLVMVectorType(LLVMFloatType(), 4), 0), 0), + LLVMPointerType(LLVMArrayType(LLVMVectorType(LLVMFloatTypeInContext(gallivm->context), 4), 0), 0), "datavec"); data_ptr = LLVMBuildGEP(builder, data_ptr, indices, 2, ""); @@ -594,10 +697,10 @@ store_aos(LLVMBuilderRef builder, LLVMValueRef gep0, gep1, gep2, gep3; data_ptr = LLVMBuildGEP(builder, data_ptr, indices, 3, ""); - idx0 = LLVMConstInt(LLVMInt32Type(), 0, 0); - idx1 = LLVMConstInt(LLVMInt32Type(), 1, 0); - idx2 = LLVMConstInt(LLVMInt32Type(), 2, 0); - idx3 = LLVMConstInt(LLVMInt32Type(), 3, 0); + idx0 = lp_build_const_int32(gallivm, 0); + idx1 = lp_build_const_int32(gallivm, 1); + idx2 = lp_build_const_int32(gallivm, 2); + idx3 = lp_build_const_int32(gallivm, 3); x = LLVMBuildExtractElement(builder, value, idx0, ""); @@ -624,18 +727,19 @@ store_aos(LLVMBuilderRef builder, } static void -store_aos_array(LLVMBuilderRef builder, +store_aos_array(struct gallivm_state *gallivm, LLVMValueRef io_ptr, LLVMValueRef aos[NUM_CHANNELS], int attrib, int num_outputs, LLVMValueRef clipmask) { - LLVMValueRef attr_index = LLVMConstInt(LLVMInt32Type(), attrib, 0); - LLVMValueRef ind0 = LLVMConstInt(LLVMInt32Type(), 0, 0); - LLVMValueRef ind1 = LLVMConstInt(LLVMInt32Type(), 1, 0); - LLVMValueRef ind2 = LLVMConstInt(LLVMInt32Type(), 2, 0); - LLVMValueRef ind3 = LLVMConstInt(LLVMInt32Type(), 3, 0); + LLVMBuilderRef builder = gallivm->builder; + LLVMValueRef attr_index = lp_build_const_int32(gallivm, attrib); + LLVMValueRef ind0 = lp_build_const_int32(gallivm, 0); + LLVMValueRef ind1 = lp_build_const_int32(gallivm, 1); + LLVMValueRef ind2 = lp_build_const_int32(gallivm, 2); + LLVMValueRef ind3 = lp_build_const_int32(gallivm, 3); LLVMValueRef io0_ptr, io1_ptr, io2_ptr, io3_ptr; LLVMValueRef clipmask0, clipmask1, clipmask2, clipmask3; @@ -664,20 +768,21 @@ store_aos_array(LLVMBuilderRef builder, io_ptr, ind0, ind1, ind2, ind3, clipmask0, clipmask1, clipmask2, clipmask3); #endif /* store for each of the 4 vertices */ - store_aos(builder, io0_ptr, attr_index, aos[0], clipmask0); - store_aos(builder, io1_ptr, attr_index, aos[1], clipmask1); - store_aos(builder, io2_ptr, attr_index, aos[2], clipmask2); - store_aos(builder, io3_ptr, attr_index, aos[3], clipmask3); + store_aos(gallivm, io0_ptr, attr_index, aos[0], clipmask0); + store_aos(gallivm, io1_ptr, attr_index, aos[1], clipmask1); + store_aos(gallivm, io2_ptr, attr_index, aos[2], clipmask2); + store_aos(gallivm, io3_ptr, attr_index, aos[3], clipmask3); } static void -convert_to_aos(LLVMBuilderRef builder, +convert_to_aos(struct gallivm_state *gallivm, LLVMValueRef io, LLVMValueRef (*outputs)[NUM_CHANNELS], LLVMValueRef clipmask, int num_outputs, int max_vertices) { + LLVMBuilderRef builder = gallivm->builder; unsigned chan, attrib; #if DEBUG_STORE @@ -698,8 +803,8 @@ convert_to_aos(LLVMBuilderRef builder, } else soa[chan] = 0; } - soa_to_aos(builder, soa, aos); - store_aos_array(builder, + soa_to_aos(gallivm, soa, aos); + store_aos_array(gallivm, io, aos, attrib, @@ -717,10 +822,11 @@ convert_to_aos(LLVMBuilderRef builder, * rather than extracting each element one by one. */ static void -store_clip(LLVMBuilderRef builder, +store_clip(struct gallivm_state *gallivm, LLVMValueRef io_ptr, LLVMValueRef (*outputs)[NUM_CHANNELS]) { + LLVMBuilderRef builder = gallivm->builder; LLVMValueRef out[4]; LLVMValueRef indices[2]; LLVMValueRef io0_ptr, io1_ptr, io2_ptr, io3_ptr; @@ -729,13 +835,13 @@ store_clip(LLVMBuilderRef builder, LLVMValueRef out0elem, out1elem, out2elem, out3elem; int i; - LLVMValueRef ind0 = LLVMConstInt(LLVMInt32Type(), 0, 0); - LLVMValueRef ind1 = LLVMConstInt(LLVMInt32Type(), 1, 0); - LLVMValueRef ind2 = LLVMConstInt(LLVMInt32Type(), 2, 0); - LLVMValueRef ind3 = LLVMConstInt(LLVMInt32Type(), 3, 0); + LLVMValueRef ind0 = lp_build_const_int32(gallivm, 0); + LLVMValueRef ind1 = lp_build_const_int32(gallivm, 1); + LLVMValueRef ind2 = lp_build_const_int32(gallivm, 2); + LLVMValueRef ind3 = lp_build_const_int32(gallivm, 3); - indices[0] = LLVMConstInt(LLVMInt32Type(), 0, 0); - indices[1] = LLVMConstInt(LLVMInt32Type(), 0, 0); + indices[0] = + indices[1] = lp_build_const_int32(gallivm, 0); out[0] = LLVMBuildLoad(builder, outputs[0][0], ""); /*x0 x1 x2 x3*/ out[1] = LLVMBuildLoad(builder, outputs[0][1], ""); /*y0 y1 y2 y3*/ @@ -747,29 +853,21 @@ store_clip(LLVMBuilderRef builder, io2_ptr = LLVMBuildGEP(builder, io_ptr, &ind2, 1, ""); io3_ptr = LLVMBuildGEP(builder, io_ptr, &ind3, 1, ""); - clip_ptr0 = draw_jit_header_clip(builder, io0_ptr); - clip_ptr1 = draw_jit_header_clip(builder, io1_ptr); - clip_ptr2 = draw_jit_header_clip(builder, io2_ptr); - clip_ptr3 = draw_jit_header_clip(builder, io3_ptr); + clip_ptr0 = draw_jit_header_clip(gallivm, io0_ptr); + clip_ptr1 = draw_jit_header_clip(gallivm, io1_ptr); + clip_ptr2 = draw_jit_header_clip(gallivm, io2_ptr); + clip_ptr3 = draw_jit_header_clip(gallivm, io3_ptr); for (i = 0; i<4; i++){ - clip0_ptr = LLVMBuildGEP(builder, clip_ptr0, - indices, 2, ""); //x0 - clip1_ptr = LLVMBuildGEP(builder, clip_ptr1, - indices, 2, ""); //x1 - clip2_ptr = LLVMBuildGEP(builder, clip_ptr2, - indices, 2, ""); //x2 - clip3_ptr = LLVMBuildGEP(builder, clip_ptr3, - indices, 2, ""); //x3 - - out0elem = LLVMBuildExtractElement(builder, out[i], - ind0, ""); //x0 - out1elem = LLVMBuildExtractElement(builder, out[i], - ind1, ""); //x1 - out2elem = LLVMBuildExtractElement(builder, out[i], - ind2, ""); //x2 - out3elem = LLVMBuildExtractElement(builder, out[i], - ind3, ""); //x3 + clip0_ptr = LLVMBuildGEP(builder, clip_ptr0, indices, 2, ""); /* x0 */ + clip1_ptr = LLVMBuildGEP(builder, clip_ptr1, indices, 2, ""); /* x1 */ + clip2_ptr = LLVMBuildGEP(builder, clip_ptr2, indices, 2, ""); /* x2 */ + clip3_ptr = LLVMBuildGEP(builder, clip_ptr3, indices, 2, ""); /* x3 */ + + out0elem = LLVMBuildExtractElement(builder, out[i], ind0, ""); /* x0 */ + out1elem = LLVMBuildExtractElement(builder, out[i], ind1, ""); /* x1 */ + out2elem = LLVMBuildExtractElement(builder, out[i], ind2, ""); /* x2 */ + out3elem = LLVMBuildExtractElement(builder, out[i], ind3, ""); /* x3 */ LLVMBuildStore(builder, out0elem, clip0_ptr); LLVMBuildStore(builder, out1elem, clip1_ptr); @@ -783,16 +881,19 @@ store_clip(LLVMBuilderRef builder, /* Equivalent of _mm_set1_ps(a) */ -static LLVMValueRef vec4f_from_scalar(LLVMBuilderRef bld, - LLVMValueRef a, - const char *name) +static LLVMValueRef +vec4f_from_scalar(struct gallivm_state *gallivm, + LLVMValueRef a, + const char *name) { - LLVMValueRef res = LLVMGetUndef(LLVMVectorType(LLVMFloatType(), 4)); + LLVMTypeRef float_type = LLVMFloatTypeInContext(gallivm->context); + LLVMValueRef res = LLVMGetUndef(LLVMVectorType(float_type, 4)); int i; for(i = 0; i < 4; ++i) { - LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0); - res = LLVMBuildInsertElement(bld, res, a, index, i == 3 ? name : ""); + LLVMValueRef index = lp_build_const_int32(gallivm, i); + res = LLVMBuildInsertElement(gallivm->builder, res, a, + index, i == 3 ? name : ""); } return res; @@ -808,10 +909,11 @@ generate_viewport(struct draw_llvm *llvm, LLVMValueRef context_ptr) { int i; + struct gallivm_state *gallivm = llvm->gallivm; struct lp_type f32_type = lp_type_float_vec(32); LLVMValueRef out3 = LLVMBuildLoad(builder, outputs[0][3], ""); /*w0 w1 w2 w3*/ - LLVMValueRef const1 = lp_build_const_vec(f32_type, 1.0); /*1.0 1.0 1.0 1.0*/ - LLVMValueRef vp_ptr = draw_jit_context_viewport(builder, context_ptr); + LLVMValueRef const1 = lp_build_const_vec(gallivm, f32_type, 1.0); /*1.0 1.0 1.0 1.0*/ + LLVMValueRef vp_ptr = draw_jit_context_viewport(gallivm, context_ptr); /* for 1/w convention*/ out3 = LLVMBuildFDiv(builder, const1, out3, ""); @@ -826,14 +928,14 @@ generate_viewport(struct draw_llvm *llvm, LLVMValueRef trans_i; LLVMValueRef index; - index = LLVMConstInt(LLVMInt32Type(), i, 0); + index = lp_build_const_int32(gallivm, i); scale_i = LLVMBuildGEP(builder, vp_ptr, &index, 1, ""); - index = LLVMConstInt(LLVMInt32Type(), i+4, 0); + index = lp_build_const_int32(gallivm, i+4); trans_i = LLVMBuildGEP(builder, vp_ptr, &index, 1, ""); - scale = vec4f_from_scalar(builder, LLVMBuildLoad(builder, scale_i, ""), "scale"); - trans = vec4f_from_scalar(builder, LLVMBuildLoad(builder, trans_i, ""), "trans"); + scale = vec4f_from_scalar(gallivm, LLVMBuildLoad(builder, scale_i, ""), "scale"); + trans = vec4f_from_scalar(gallivm, LLVMBuildLoad(builder, trans_i, ""), "trans"); /* divide by w */ out = LLVMBuildFMul(builder, out, out3, ""); @@ -853,7 +955,7 @@ generate_viewport(struct draw_llvm *llvm, * Returns clipmask as 4xi32 bitmask for the 4 vertices */ static LLVMValueRef -generate_clipmask(LLVMBuilderRef builder, +generate_clipmask(struct gallivm_state *gallivm, LLVMValueRef (*outputs)[NUM_CHANNELS], boolean clip_xy, boolean clip_z, @@ -862,6 +964,7 @@ generate_clipmask(LLVMBuilderRef builder, unsigned nr, LLVMValueRef context_ptr) { + LLVMBuilderRef builder = gallivm->builder; LLVMValueRef mask; /* stores the <4xi32> clipmasks */ LLVMValueRef test, temp; LLVMValueRef zero, shift; @@ -872,10 +975,10 @@ generate_clipmask(LLVMBuilderRef builder, struct lp_type f32_type = lp_type_float_vec(32); - mask = lp_build_const_int_vec(lp_type_int_vec(32), 0); - temp = lp_build_const_int_vec(lp_type_int_vec(32), 0); - zero = lp_build_const_vec(f32_type, 0); /* 0.0f 0.0f 0.0f 0.0f */ - shift = lp_build_const_int_vec(lp_type_int_vec(32), 1); /* 1 1 1 1 */ + mask = lp_build_const_int_vec(gallivm, lp_type_int_vec(32), 0); + temp = lp_build_const_int_vec(gallivm, lp_type_int_vec(32), 0); + zero = lp_build_const_vec(gallivm, f32_type, 0); /* 0.0f 0.0f 0.0f 0.0f */ + shift = lp_build_const_int_vec(gallivm, lp_type_int_vec(32), 1); /* 1 1 1 1 */ /* Assuming position stored at output[0] */ pos_x = LLVMBuildLoad(builder, outputs[0][0], ""); /*x0 x1 x2 x3*/ @@ -886,92 +989,92 @@ generate_clipmask(LLVMBuilderRef builder, /* Cliptest, for hardwired planes */ if (clip_xy){ /* plane 1 */ - test = lp_build_compare(builder, f32_type, PIPE_FUNC_GREATER, pos_x , pos_w); + test = lp_build_compare(gallivm, f32_type, PIPE_FUNC_GREATER, pos_x , pos_w); temp = shift; test = LLVMBuildAnd(builder, test, temp, ""); mask = test; /* plane 2 */ test = LLVMBuildFAdd(builder, pos_x, pos_w, ""); - test = lp_build_compare(builder, f32_type, PIPE_FUNC_GREATER, zero, test); + test = lp_build_compare(gallivm, f32_type, PIPE_FUNC_GREATER, zero, test); temp = LLVMBuildShl(builder, temp, shift, ""); test = LLVMBuildAnd(builder, test, temp, ""); mask = LLVMBuildOr(builder, mask, test, ""); /* plane 3 */ - test = lp_build_compare(builder, f32_type, PIPE_FUNC_GREATER, pos_y, pos_w); + test = lp_build_compare(gallivm, f32_type, PIPE_FUNC_GREATER, pos_y, pos_w); temp = LLVMBuildShl(builder, temp, shift, ""); test = LLVMBuildAnd(builder, test, temp, ""); mask = LLVMBuildOr(builder, mask, test, ""); /* plane 4 */ test = LLVMBuildFAdd(builder, pos_y, pos_w, ""); - test = lp_build_compare(builder, f32_type, PIPE_FUNC_GREATER, zero, test); + test = lp_build_compare(gallivm, f32_type, PIPE_FUNC_GREATER, zero, test); temp = LLVMBuildShl(builder, temp, shift, ""); test = LLVMBuildAnd(builder, test, temp, ""); mask = LLVMBuildOr(builder, mask, test, ""); } if (clip_z){ - temp = lp_build_const_int_vec(lp_type_int_vec(32), 16); + temp = lp_build_const_int_vec(gallivm, lp_type_int_vec(32), 16); if (clip_halfz){ /* plane 5 */ - test = lp_build_compare(builder, f32_type, PIPE_FUNC_GREATER, zero, pos_z); + test = lp_build_compare(gallivm, f32_type, PIPE_FUNC_GREATER, zero, pos_z); test = LLVMBuildAnd(builder, test, temp, ""); mask = LLVMBuildOr(builder, mask, test, ""); } else{ /* plane 5 */ test = LLVMBuildFAdd(builder, pos_z, pos_w, ""); - test = lp_build_compare(builder, f32_type, PIPE_FUNC_GREATER, zero, test); + test = lp_build_compare(gallivm, f32_type, PIPE_FUNC_GREATER, zero, test); test = LLVMBuildAnd(builder, test, temp, ""); mask = LLVMBuildOr(builder, mask, test, ""); } /* plane 6 */ - test = lp_build_compare(builder, f32_type, PIPE_FUNC_GREATER, pos_z, pos_w); + test = lp_build_compare(gallivm, f32_type, PIPE_FUNC_GREATER, pos_z, pos_w); temp = LLVMBuildShl(builder, temp, shift, ""); test = LLVMBuildAnd(builder, test, temp, ""); mask = LLVMBuildOr(builder, mask, test, ""); } if (clip_user){ - LLVMValueRef planes_ptr = draw_jit_context_planes(builder, context_ptr); + LLVMValueRef planes_ptr = draw_jit_context_planes(gallivm, context_ptr); LLVMValueRef indices[3]; - temp = lp_build_const_int_vec(lp_type_int_vec(32), 32); + temp = lp_build_const_int_vec(gallivm, lp_type_int_vec(32), 32); /* userclip planes */ for (i = 6; i < nr; i++) { - indices[0] = LLVMConstInt(LLVMInt32Type(), 0, 0); - indices[1] = LLVMConstInt(LLVMInt32Type(), i, 0); + indices[0] = lp_build_const_int32(gallivm, 0); + indices[1] = lp_build_const_int32(gallivm, i); - indices[2] = LLVMConstInt(LLVMInt32Type(), 0, 0); + indices[2] = lp_build_const_int32(gallivm, 0); plane_ptr = LLVMBuildGEP(builder, planes_ptr, indices, 3, ""); plane1 = LLVMBuildLoad(builder, plane_ptr, "plane_x"); - planes = vec4f_from_scalar(builder, plane1, "plane4_x"); + planes = vec4f_from_scalar(gallivm, plane1, "plane4_x"); sum = LLVMBuildFMul(builder, planes, pos_x, ""); - indices[2] = LLVMConstInt(LLVMInt32Type(), 1, 0); + indices[2] = lp_build_const_int32(gallivm, 1); plane_ptr = LLVMBuildGEP(builder, planes_ptr, indices, 3, ""); plane1 = LLVMBuildLoad(builder, plane_ptr, "plane_y"); - planes = vec4f_from_scalar(builder, plane1, "plane4_y"); + planes = vec4f_from_scalar(gallivm, plane1, "plane4_y"); test = LLVMBuildFMul(builder, planes, pos_y, ""); sum = LLVMBuildFAdd(builder, sum, test, ""); - indices[2] = LLVMConstInt(LLVMInt32Type(), 2, 0); + indices[2] = lp_build_const_int32(gallivm, 2); plane_ptr = LLVMBuildGEP(builder, planes_ptr, indices, 3, ""); plane1 = LLVMBuildLoad(builder, plane_ptr, "plane_z"); - planes = vec4f_from_scalar(builder, plane1, "plane4_z"); + planes = vec4f_from_scalar(gallivm, plane1, "plane4_z"); test = LLVMBuildFMul(builder, planes, pos_z, ""); sum = LLVMBuildFAdd(builder, sum, test, ""); - indices[2] = LLVMConstInt(LLVMInt32Type(), 3, 0); + indices[2] = lp_build_const_int32(gallivm, 3); plane_ptr = LLVMBuildGEP(builder, planes_ptr, indices, 3, ""); plane1 = LLVMBuildLoad(builder, plane_ptr, "plane_w"); - planes = vec4f_from_scalar(builder, plane1, "plane4_w"); + planes = vec4f_from_scalar(gallivm, plane1, "plane4_w"); test = LLVMBuildFMul(builder, planes, pos_w, ""); sum = LLVMBuildFAdd(builder, sum, test, ""); - test = lp_build_compare(builder, f32_type, PIPE_FUNC_GREATER, zero, sum); + test = lp_build_compare(gallivm, f32_type, PIPE_FUNC_GREATER, zero, sum); temp = LLVMBuildShl(builder, temp, shift, ""); test = LLVMBuildAnd(builder, test, temp, ""); mask = LLVMBuildOr(builder, mask, test, ""); @@ -985,17 +1088,18 @@ generate_clipmask(LLVMBuilderRef builder, * Used zero/non-zero i32 value to represent boolean */ static void -clipmask_bool(LLVMBuilderRef builder, +clipmask_bool(struct gallivm_state *gallivm, LLVMValueRef clipmask, LLVMValueRef ret_ptr) { + LLVMBuilderRef builder = gallivm->builder; LLVMValueRef ret = LLVMBuildLoad(builder, ret_ptr, ""); LLVMValueRef temp; int i; for (i=0; i<4; i++){ temp = LLVMBuildExtractElement(builder, clipmask, - LLVMConstInt(LLVMInt32Type(), i, 0) , ""); + lp_build_const_int32(gallivm, i) , ""); ret = LLVMBuildOr(builder, ret, temp, ""); } @@ -1005,6 +1109,9 @@ clipmask_bool(LLVMBuilderRef builder, static void draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *variant) { + struct gallivm_state *gallivm = llvm->gallivm; + LLVMContextRef context = gallivm->context; + LLVMTypeRef int32_type = LLVMInt32TypeInContext(context); LLVMTypeRef arg_types[8]; LLVMTypeRef func_type; LLVMValueRef context_ptr; @@ -1029,18 +1136,19 @@ draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *variant) variant->key.clip_z || variant->key.clip_user; - arg_types[0] = llvm->context_ptr_type; /* context */ - arg_types[1] = llvm->vertex_header_ptr_type; /* vertex_header */ - arg_types[2] = llvm->buffer_ptr_type; /* vbuffers */ - arg_types[3] = LLVMInt32Type(); /* start */ - arg_types[4] = LLVMInt32Type(); /* count */ - arg_types[5] = LLVMInt32Type(); /* stride */ - arg_types[6] = llvm->vb_ptr_type; /* pipe_vertex_buffer's */ - arg_types[7] = LLVMInt32Type(); /* instance_id */ - - func_type = LLVMFunctionType(LLVMInt32Type(), arg_types, Elements(arg_types), 0); - - variant->function = LLVMAddFunction(llvm->module, "draw_llvm_shader", func_type); + arg_types[0] = get_context_ptr_type(llvm); /* context */ + arg_types[1] = get_vertex_header_ptr_type(llvm); /* vertex_header */ + arg_types[2] = get_buffer_ptr_type(llvm); /* vbuffers */ + arg_types[3] = int32_type; /* start */ + arg_types[4] = int32_type; /* count */ + arg_types[5] = int32_type; /* stride */ + arg_types[6] = get_vb_ptr_type(llvm); /* pipe_vertex_buffer's */ + arg_types[7] = int32_type; /* instance_id */ + + func_type = LLVMFunctionType(int32_type, arg_types, Elements(arg_types), 0); + + variant->function = LLVMAddFunction(gallivm->module, "draw_llvm_shader", + func_type); LLVMSetFunctionCallConv(variant->function, LLVMCCallConv); for(i = 0; i < Elements(arg_types); ++i) if(LLVMGetTypeKind(arg_types[i]) == LLVMPointerTypeKind) @@ -1068,22 +1176,23 @@ draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *variant) * Function body */ - block = LLVMAppendBasicBlock(variant->function, "entry"); - builder = LLVMCreateBuilder(); + block = LLVMAppendBasicBlockInContext(gallivm->context, variant->function, "entry"); + builder = gallivm->builder; + assert(builder); LLVMPositionBuilderAtEnd(builder, block); - lp_build_context_init(&bld, builder, lp_type_int(32)); + lp_build_context_init(&bld, llvm->gallivm, lp_type_int(32)); - system_values_array = lp_build_system_values_array(builder, vs_info, + system_values_array = lp_build_system_values_array(gallivm, vs_info, instance_id, NULL); end = lp_build_add(&bld, start, count); - step = LLVMConstInt(LLVMInt32Type(), max_vertices, 0); + step = lp_build_const_int32(gallivm, max_vertices); /* function will return non-zero i32 value if any clipped vertices */ - ret_ptr = lp_build_alloca(builder, LLVMInt32Type(), ""); - LLVMBuildStore(builder, LLVMConstInt(LLVMInt32Type(), 0, 0), ret_ptr); + ret_ptr = lp_build_alloca(gallivm, int32_type, ""); + LLVMBuildStore(builder, lp_build_const_int32(gallivm, 0), ret_ptr); /* code generated texture sampling */ sampler = draw_llvm_sampler_soa_create( @@ -1094,7 +1203,7 @@ draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *variant) lp_build_printf(builder, "start = %d, end = %d, step = %d\n", start, end, step); #endif - lp_build_loop_begin(builder, start, &lp_loop); + lp_build_loop_begin(&lp_loop, llvm->gallivm, start); { LLVMValueRef inputs[PIPE_MAX_SHADER_INPUTS][NUM_CHANNELS]; LLVMValueRef aos_attribs[PIPE_MAX_SHADER_INPUTS][NUM_CHANNELS] = { { 0 } }; @@ -1112,20 +1221,18 @@ draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *variant) LLVMValueRef true_index = LLVMBuildAdd( builder, lp_loop.counter, - LLVMConstInt(LLVMInt32Type(), i, 0), ""); + lp_build_const_int32(gallivm, i), ""); for (j = 0; j < draw->pt.nr_vertex_elements; ++j) { struct pipe_vertex_element *velem = &draw->pt.vertex_element[j]; - LLVMValueRef vb_index = LLVMConstInt(LLVMInt32Type(), - velem->vertex_buffer_index, - 0); + LLVMValueRef vb_index = lp_build_const_int32(gallivm, velem->vertex_buffer_index); LLVMValueRef vb = LLVMBuildGEP(builder, vb_ptr, &vb_index, 1, ""); - generate_fetch(builder, vbuffers_ptr, + generate_fetch(llvm->gallivm, vbuffers_ptr, &aos_attribs[j][i], velem, vb, true_index, instance_id); } } - convert_to_soa(builder, aos_attribs, inputs, + convert_to_soa(gallivm, aos_attribs, inputs, draw->pt.nr_vertex_elements); ptr_aos = (const LLVMValueRef (*)[NUM_CHANNELS]) inputs; @@ -1138,12 +1245,12 @@ draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *variant) sampler); /* store original positions in clip before further manipulation */ - store_clip(builder, io, outputs); + store_clip(gallivm, io, outputs); /* do cliptest */ if (enable_cliptest){ /* allocate clipmask, assign it integer type */ - clipmask = generate_clipmask(builder, outputs, + clipmask = generate_clipmask(gallivm, outputs, variant->key.clip_xy, variant->key.clip_z, variant->key.clip_user, @@ -1151,10 +1258,10 @@ draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *variant) variant->key.nr_planes, context_ptr); /* return clipping boolean value for function */ - clipmask_bool(builder, clipmask, ret_ptr); + clipmask_bool(gallivm, clipmask, ret_ptr); } else{ - clipmask = lp_build_const_int_vec(lp_type_int_vec(32), 0); + clipmask = lp_build_const_int_vec(gallivm, lp_type_int_vec(32), 0); } /* do viewport mapping */ @@ -1163,19 +1270,17 @@ draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *variant) } /* store clipmask in vertex header and positions in data */ - convert_to_aos(builder, io, outputs, clipmask, + convert_to_aos(gallivm, io, outputs, clipmask, vs_info->num_outputs, max_vertices); } - lp_build_loop_end_cond(builder, end, step, LLVMIntUGE, &lp_loop); + lp_build_loop_end_cond(&lp_loop, end, step, LLVMIntUGE); sampler->destroy(sampler); ret = LLVMBuildLoad(builder, ret_ptr,""); LLVMBuildRet(builder, ret); - LLVMDisposeBuilder(builder); - /* * Translate the LLVM IR into machine code. */ @@ -1186,14 +1291,14 @@ draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *variant) } #endif - LLVMRunFunctionPassManager(llvm->pass, variant->function); + LLVMRunFunctionPassManager(gallivm->passmgr, variant->function); if (gallivm_debug & GALLIVM_DEBUG_IR) { lp_debug_dump_value(variant->function); debug_printf("\n"); } - code = LLVMGetPointerToGlobal(llvm->draw->engine, variant->function); + code = LLVMGetPointerToGlobal(gallivm->engine, variant->function); variant->jit_func = (draw_jit_vert_func)pointer_to_func(code); if (gallivm_debug & GALLIVM_DEBUG_ASM) { @@ -1206,6 +1311,9 @@ draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *variant) static void draw_llvm_generate_elts(struct draw_llvm *llvm, struct draw_llvm_variant *variant) { + struct gallivm_state *gallivm = llvm->gallivm; + LLVMContextRef context = gallivm->context; + LLVMTypeRef int32_type = LLVMInt32TypeInContext(context); LLVMTypeRef arg_types[8]; LLVMTypeRef func_type; LLVMValueRef context_ptr; @@ -1231,18 +1339,18 @@ draw_llvm_generate_elts(struct draw_llvm *llvm, struct draw_llvm_variant *varian variant->key.clip_z || variant->key.clip_user; - arg_types[0] = llvm->context_ptr_type; /* context */ - arg_types[1] = llvm->vertex_header_ptr_type; /* vertex_header */ - arg_types[2] = llvm->buffer_ptr_type; /* vbuffers */ - arg_types[3] = LLVMPointerType(LLVMInt32Type(), 0); /* fetch_elts * */ - arg_types[4] = LLVMInt32Type(); /* fetch_count */ - arg_types[5] = LLVMInt32Type(); /* stride */ - arg_types[6] = llvm->vb_ptr_type; /* pipe_vertex_buffer's */ - arg_types[7] = LLVMInt32Type(); /* instance_id */ - - func_type = LLVMFunctionType(LLVMInt32Type(), arg_types, Elements(arg_types), 0); - - variant->function_elts = LLVMAddFunction(llvm->module, "draw_llvm_shader_elts", func_type); + arg_types[0] = get_context_ptr_type(llvm); /* context */ + arg_types[1] = get_vertex_header_ptr_type(llvm); /* vertex_header */ + arg_types[2] = get_buffer_ptr_type(llvm); /* vbuffers */ + arg_types[3] = LLVMPointerType(int32_type, 0); /* fetch_elts * */ + arg_types[4] = int32_type; /* fetch_count */ + arg_types[5] = int32_type; /* stride */ + arg_types[6] = get_vb_ptr_type(llvm); /* pipe_vertex_buffer's */ + arg_types[7] = int32_type; /* instance_id */ + + func_type = LLVMFunctionType(int32_type, arg_types, Elements(arg_types), 0); + + variant->function_elts = LLVMAddFunction(gallivm->module, "draw_llvm_shader_elts", func_type); LLVMSetFunctionCallConv(variant->function_elts, LLVMCCallConv); for(i = 0; i < Elements(arg_types); ++i) if(LLVMGetTypeKind(arg_types[i]) == LLVMPointerTypeKind) @@ -1271,17 +1379,17 @@ draw_llvm_generate_elts(struct draw_llvm *llvm, struct draw_llvm_variant *varian * Function body */ - block = LLVMAppendBasicBlock(variant->function_elts, "entry"); - builder = LLVMCreateBuilder(); + block = LLVMAppendBasicBlockInContext(gallivm->context, variant->function_elts, "entry"); + builder = gallivm->builder; LLVMPositionBuilderAtEnd(builder, block); - lp_build_context_init(&bld, builder, lp_type_int(32)); + lp_build_context_init(&bld, gallivm, lp_type_int(32)); - system_values_array = lp_build_system_values_array(builder, vs_info, + system_values_array = lp_build_system_values_array(gallivm, vs_info, instance_id, NULL); - step = LLVMConstInt(LLVMInt32Type(), max_vertices, 0); + step = lp_build_const_int32(gallivm, max_vertices); /* code generated texture sampling */ sampler = draw_llvm_sampler_soa_create( @@ -1289,14 +1397,14 @@ draw_llvm_generate_elts(struct draw_llvm *llvm, struct draw_llvm_variant *varian context_ptr); fetch_max = LLVMBuildSub(builder, fetch_count, - LLVMConstInt(LLVMInt32Type(), 1, 0), + lp_build_const_int32(gallivm, 1), "fetch_max"); /* function returns non-zero i32 value if any clipped vertices */ - ret_ptr = lp_build_alloca(builder, LLVMInt32Type(), ""); - LLVMBuildStore(builder, LLVMConstInt(LLVMInt32Type(), 0, 0), ret_ptr); + ret_ptr = lp_build_alloca(gallivm, int32_type, ""); + LLVMBuildStore(builder, lp_build_const_int32(gallivm, 0), ret_ptr); - lp_build_loop_begin(builder, LLVMConstInt(LLVMInt32Type(), 0, 0), &lp_loop); + lp_build_loop_begin(&lp_loop, gallivm, lp_build_const_int32(gallivm, 0)); { LLVMValueRef inputs[PIPE_MAX_SHADER_INPUTS][NUM_CHANNELS]; LLVMValueRef aos_attribs[PIPE_MAX_SHADER_INPUTS][NUM_CHANNELS] = { { 0 } }; @@ -1314,7 +1422,7 @@ draw_llvm_generate_elts(struct draw_llvm *llvm, struct draw_llvm_variant *varian LLVMValueRef true_index = LLVMBuildAdd( builder, lp_loop.counter, - LLVMConstInt(LLVMInt32Type(), i, 0), ""); + lp_build_const_int32(gallivm, i), ""); LLVMValueRef fetch_ptr; /* make sure we're not out of bounds which can happen @@ -1327,17 +1435,15 @@ draw_llvm_generate_elts(struct draw_llvm *llvm, struct draw_llvm_variant *varian true_index = LLVMBuildLoad(builder, fetch_ptr, "fetch_elt"); for (j = 0; j < draw->pt.nr_vertex_elements; ++j) { struct pipe_vertex_element *velem = &draw->pt.vertex_element[j]; - LLVMValueRef vb_index = LLVMConstInt(LLVMInt32Type(), - velem->vertex_buffer_index, - 0); + LLVMValueRef vb_index = lp_build_const_int32(gallivm, velem->vertex_buffer_index); LLVMValueRef vb = LLVMBuildGEP(builder, vb_ptr, &vb_index, 1, ""); - generate_fetch(builder, vbuffers_ptr, + generate_fetch(gallivm, vbuffers_ptr, &aos_attribs[j][i], velem, vb, true_index, instance_id); } } - convert_to_soa(builder, aos_attribs, inputs, + convert_to_soa(gallivm, aos_attribs, inputs, draw->pt.nr_vertex_elements); ptr_aos = (const LLVMValueRef (*)[NUM_CHANNELS]) inputs; @@ -1350,12 +1456,12 @@ draw_llvm_generate_elts(struct draw_llvm *llvm, struct draw_llvm_variant *varian sampler); /* store original positions in clip before further manipulation */ - store_clip(builder, io, outputs); + store_clip(gallivm, io, outputs); /* do cliptest */ if (enable_cliptest){ /* allocate clipmask, assign it integer type */ - clipmask = generate_clipmask(builder, outputs, + clipmask = generate_clipmask(gallivm, outputs, variant->key.clip_xy, variant->key.clip_z, variant->key.clip_user, @@ -1363,10 +1469,10 @@ draw_llvm_generate_elts(struct draw_llvm *llvm, struct draw_llvm_variant *varian variant->key.nr_planes, context_ptr); /* return clipping boolean value for function */ - clipmask_bool(builder, clipmask, ret_ptr); + clipmask_bool(gallivm, clipmask, ret_ptr); } else{ - clipmask = lp_build_const_int_vec(lp_type_int_vec(32), 0); + clipmask = lp_build_const_int_vec(gallivm, lp_type_int_vec(32), 0); } /* do viewport mapping */ @@ -1378,19 +1484,17 @@ draw_llvm_generate_elts(struct draw_llvm *llvm, struct draw_llvm_variant *varian * original positions in clip * and transformed positions in data */ - convert_to_aos(builder, io, outputs, clipmask, + convert_to_aos(gallivm, io, outputs, clipmask, vs_info->num_outputs, max_vertices); } - lp_build_loop_end_cond(builder, fetch_count, step, LLVMIntUGE, &lp_loop); + lp_build_loop_end_cond(&lp_loop, fetch_count, step, LLVMIntUGE); sampler->destroy(sampler); ret = LLVMBuildLoad(builder, ret_ptr,""); LLVMBuildRet(builder, ret); - LLVMDisposeBuilder(builder); - /* * Translate the LLVM IR into machine code. */ @@ -1401,14 +1505,14 @@ draw_llvm_generate_elts(struct draw_llvm *llvm, struct draw_llvm_variant *varian } #endif - LLVMRunFunctionPassManager(llvm->pass, variant->function_elts); + LLVMRunFunctionPassManager(gallivm->passmgr, variant->function_elts); if (gallivm_debug & GALLIVM_DEBUG_IR) { lp_debug_dump_value(variant->function_elts); debug_printf("\n"); } - code = LLVMGetPointerToGlobal(llvm->draw->engine, variant->function_elts); + code = LLVMGetPointerToGlobal(gallivm->engine, variant->function_elts); variant->jit_func_elts = (draw_jit_vert_func_elts)pointer_to_func(code); if (gallivm_debug & GALLIVM_DEBUG_ASM) { @@ -1517,19 +1621,16 @@ void draw_llvm_destroy_variant(struct draw_llvm_variant *variant) { struct draw_llvm *llvm = variant->llvm; - struct draw_context *draw = llvm->draw; if (variant->function_elts) { - if (variant->function_elts) - LLVMFreeMachineCodeForFunction(draw->engine, - variant->function_elts); + LLVMFreeMachineCodeForFunction(llvm->gallivm->engine, + variant->function_elts); LLVMDeleteFunction(variant->function_elts); } if (variant->function) { - if (variant->function) - LLVMFreeMachineCodeForFunction(draw->engine, - variant->function); + LLVMFreeMachineCodeForFunction(llvm->gallivm->engine, + variant->function); LLVMDeleteFunction(variant->function); } diff --git a/src/gallium/auxiliary/draw/draw_llvm.h b/src/gallium/auxiliary/draw/draw_llvm.h index c3c30c07c64..9f038f1f04d 100644 --- a/src/gallium/auxiliary/draw/draw_llvm.h +++ b/src/gallium/auxiliary/draw/draw_llvm.h @@ -103,41 +103,41 @@ struct draw_jit_context }; -#define draw_jit_context_vs_constants(_builder, _ptr) \ - lp_build_struct_get(_builder, _ptr, 0, "vs_constants") +#define draw_jit_context_vs_constants(_gallivm, _ptr) \ + lp_build_struct_get(_gallivm, _ptr, 0, "vs_constants") -#define draw_jit_context_gs_constants(_builder, _ptr) \ - lp_build_struct_get(_builder, _ptr, 1, "gs_constants") +#define draw_jit_context_gs_constants(_gallivm, _ptr) \ + lp_build_struct_get(_gallivm, _ptr, 1, "gs_constants") -#define draw_jit_context_planes(_builder, _ptr) \ - lp_build_struct_get(_builder, _ptr, 2, "planes") +#define draw_jit_context_planes(_gallivm, _ptr) \ + lp_build_struct_get(_gallivm, _ptr, 2, "planes") -#define draw_jit_context_viewport(_builder, _ptr) \ - lp_build_struct_get(_builder, _ptr, 3, "viewport") +#define draw_jit_context_viewport(_gallivm, _ptr) \ + lp_build_struct_get(_gallivm, _ptr, 3, "viewport") #define DRAW_JIT_CTX_TEXTURES 4 -#define draw_jit_context_textures(_builder, _ptr) \ - lp_build_struct_get_ptr(_builder, _ptr, DRAW_JIT_CTX_TEXTURES, "textures") +#define draw_jit_context_textures(_gallivm, _ptr) \ + lp_build_struct_get_ptr(_gallivm, _ptr, DRAW_JIT_CTX_TEXTURES, "textures") -#define draw_jit_header_id(_builder, _ptr) \ - lp_build_struct_get_ptr(_builder, _ptr, 0, "id") +#define draw_jit_header_id(_gallivm, _ptr) \ + lp_build_struct_get_ptr(_gallivm, _ptr, 0, "id") -#define draw_jit_header_clip(_builder, _ptr) \ - lp_build_struct_get_ptr(_builder, _ptr, 1, "clip") +#define draw_jit_header_clip(_gallivm, _ptr) \ + lp_build_struct_get_ptr(_gallivm, _ptr, 1, "clip") -#define draw_jit_header_data(_builder, _ptr) \ - lp_build_struct_get_ptr(_builder, _ptr, 2, "data") +#define draw_jit_header_data(_gallivm, _ptr) \ + lp_build_struct_get_ptr(_gallivm, _ptr, 2, "data") -#define draw_jit_vbuffer_stride(_builder, _ptr) \ - lp_build_struct_get(_builder, _ptr, 0, "stride") +#define draw_jit_vbuffer_stride(_gallivm, _ptr) \ + lp_build_struct_get(_gallivm, _ptr, 0, "stride") -#define draw_jit_vbuffer_max_index(_builder, _ptr) \ - lp_build_struct_get(_builder, _ptr, 1, "max_index") +#define draw_jit_vbuffer_max_index(_gallivm, _ptr) \ + lp_build_struct_get(_gallivm, _ptr, 1, "max_index") -#define draw_jit_vbuffer_offset(_builder, _ptr) \ - lp_build_struct_get(_builder, _ptr, 2, "buffer_offset") +#define draw_jit_vbuffer_offset(_gallivm, _ptr) \ + lp_build_struct_get(_gallivm, _ptr, 2, "buffer_offset") typedef int @@ -229,7 +229,6 @@ struct draw_llvm_variant /* key is variable-sized, must be last */ struct draw_llvm_variant_key key; - /* key is variable-sized, must be last */ }; struct llvm_vertex_shader { @@ -246,21 +245,19 @@ struct draw_llvm { struct draw_jit_context jit_context; + struct gallivm_state *gallivm; + struct draw_llvm_variant_list_item vs_variants_list; int nr_variants; - LLVMModuleRef module; - LLVMExecutionEngineRef engine; - LLVMModuleProviderRef provider; - LLVMTargetDataRef target; - LLVMPassManagerRef pass; - + /* LLVM JIT builder types */ LLVMTypeRef context_ptr_type; - LLVMTypeRef vertex_header_ptr_type; LLVMTypeRef buffer_ptr_type; LLVMTypeRef vb_ptr_type; + LLVMTypeRef vertex_header_ptr_type; }; + static INLINE struct llvm_vertex_shader * llvm_vertex_shader(struct draw_vertex_shader *vs) { @@ -269,7 +266,7 @@ llvm_vertex_shader(struct draw_vertex_shader *vs) struct draw_llvm * -draw_llvm_create(struct draw_context *draw); +draw_llvm_create(struct draw_context *draw, struct gallivm_state *gallivm); void draw_llvm_destroy(struct draw_llvm *llvm); @@ -286,7 +283,7 @@ struct draw_llvm_variant_key * draw_llvm_make_variant_key(struct draw_llvm *llvm, char *store); LLVMValueRef -draw_llvm_translate_from(LLVMBuilderRef builder, +draw_llvm_translate_from(struct gallivm_state *gallivm, LLVMValueRef vbuffer, enum pipe_format from_format); diff --git a/src/gallium/auxiliary/draw/draw_llvm_sample.c b/src/gallium/auxiliary/draw/draw_llvm_sample.c index ac1fbb179c6..574c7cc452f 100644 --- a/src/gallium/auxiliary/draw/draw_llvm_sample.c +++ b/src/gallium/auxiliary/draw/draw_llvm_sample.c @@ -32,6 +32,7 @@ #include "pipe/p_defines.h" #include "pipe/p_shader_tokens.h" +#include "gallivm/lp_bld_const.h" #include "gallivm/lp_bld_debug.h" #include "gallivm/lp_bld_type.h" #include "gallivm/lp_bld_sample.h" @@ -84,12 +85,13 @@ struct draw_llvm_sampler_soa */ static LLVMValueRef draw_llvm_texture_member(const struct lp_sampler_dynamic_state *base, - LLVMBuilderRef builder, + struct gallivm_state *gallivm, unsigned unit, unsigned member_index, const char *member_name, boolean emit_load) { + LLVMBuilderRef builder = gallivm->builder; struct draw_llvm_sampler_dynamic_state *state = (struct draw_llvm_sampler_dynamic_state *)base; LLVMValueRef indices[4]; @@ -99,13 +101,13 @@ draw_llvm_texture_member(const struct lp_sampler_dynamic_state *base, debug_assert(unit < PIPE_MAX_VERTEX_SAMPLERS); /* context[0] */ - indices[0] = LLVMConstInt(LLVMInt32Type(), 0, 0); + indices[0] = lp_build_const_int32(gallivm, 0); /* context[0].textures */ - indices[1] = LLVMConstInt(LLVMInt32Type(), DRAW_JIT_CTX_TEXTURES, 0); + indices[1] = lp_build_const_int32(gallivm, DRAW_JIT_CTX_TEXTURES); /* context[0].textures[unit] */ - indices[2] = LLVMConstInt(LLVMInt32Type(), unit, 0); + indices[2] = lp_build_const_int32(gallivm, unit); /* context[0].textures[unit].member */ - indices[3] = LLVMConstInt(LLVMInt32Type(), member_index, 0); + indices[3] = lp_build_const_int32(gallivm, member_index); ptr = LLVMBuildGEP(builder, state->context_ptr, indices, Elements(indices), ""); @@ -132,10 +134,10 @@ draw_llvm_texture_member(const struct lp_sampler_dynamic_state *base, #define DRAW_LLVM_TEXTURE_MEMBER(_name, _index, _emit_load) \ static LLVMValueRef \ draw_llvm_texture_##_name( const struct lp_sampler_dynamic_state *base, \ - LLVMBuilderRef builder, \ + struct gallivm_state *gallivm, \ unsigned unit) \ { \ - return draw_llvm_texture_member(base, builder, unit, _index, #_name, _emit_load ); \ + return draw_llvm_texture_member(base, gallivm, unit, _index, #_name, _emit_load ); \ } @@ -165,7 +167,7 @@ draw_llvm_sampler_soa_destroy(struct lp_build_sampler_soa *sampler) */ static void draw_llvm_sampler_soa_emit_fetch_texel(const struct lp_build_sampler_soa *base, - LLVMBuilderRef builder, + struct gallivm_state *gallivm, struct lp_type type, unsigned unit, unsigned num_coords, @@ -180,7 +182,7 @@ draw_llvm_sampler_soa_emit_fetch_texel(const struct lp_build_sampler_soa *base, assert(unit < PIPE_MAX_VERTEX_SAMPLERS); - lp_build_sample_soa(builder, + lp_build_sample_soa(gallivm, &sampler->dynamic_state.static_state[unit], &sampler->dynamic_state.base, type, diff --git a/src/gallium/auxiliary/draw/draw_llvm_translate.c b/src/gallium/auxiliary/draw/draw_llvm_translate.c index 5171327ce2d..77d0af74733 100644 --- a/src/gallium/auxiliary/draw/draw_llvm_translate.c +++ b/src/gallium/auxiliary/draw/draw_llvm_translate.c @@ -3,6 +3,7 @@ #include "draw_llvm.h" +#include "gallivm/lp_bld_const.h" #include "gallivm/lp_bld_struct.h" #include "gallivm/lp_bld_format.h" #include "gallivm/lp_bld_debug.h" @@ -16,272 +17,279 @@ #define DRAW_DBG 0 static LLVMValueRef -from_64_float(LLVMBuilderRef builder, LLVMValueRef val) +from_64_float(struct gallivm_state *gallivm, LLVMValueRef val) { - LLVMValueRef bc = LLVMBuildBitCast(builder, val, - LLVMPointerType(LLVMDoubleType(), 0) , ""); - LLVMValueRef l = LLVMBuildLoad(builder, bc, ""); - return LLVMBuildFPTrunc(builder, l, LLVMFloatType(), ""); + LLVMValueRef bc = LLVMBuildBitCast(gallivm->builder, val, + LLVMPointerType(LLVMDoubleTypeInContext(gallivm->context), 0) , ""); + LLVMValueRef l = LLVMBuildLoad(gallivm->builder, bc, ""); + return LLVMBuildFPTrunc(gallivm->builder, l, LLVMFloatTypeInContext(gallivm->context), ""); } static LLVMValueRef -from_32_float(LLVMBuilderRef builder, LLVMValueRef val) +from_32_float(struct gallivm_state *gallivm, LLVMValueRef val) { - LLVMValueRef bc = LLVMBuildBitCast(builder, val, - LLVMPointerType(LLVMFloatType(), 0) , ""); - return LLVMBuildLoad(builder, bc, ""); + LLVMValueRef bc = LLVMBuildBitCast(gallivm->builder, val, + LLVMPointerType(LLVMFloatTypeInContext(gallivm->context), 0) , ""); + return LLVMBuildLoad(gallivm->builder, bc, ""); } static INLINE LLVMValueRef -from_8_uscaled(LLVMBuilderRef builder, LLVMValueRef val) +from_8_uscaled(struct gallivm_state *gallivm, LLVMValueRef val) { - LLVMValueRef l = LLVMBuildLoad(builder, val, ""); - return LLVMBuildUIToFP(builder, l, LLVMFloatType(), ""); + LLVMValueRef l = LLVMBuildLoad(gallivm->builder, val, ""); + return LLVMBuildUIToFP(gallivm->builder, l, LLVMFloatTypeInContext(gallivm->context), ""); } static INLINE LLVMValueRef -from_16_uscaled(LLVMBuilderRef builder, LLVMValueRef val) +from_16_uscaled(struct gallivm_state *gallivm, LLVMValueRef val) { - LLVMValueRef bc = LLVMBuildBitCast(builder, val, - LLVMPointerType(LLVMIntType(16), 0) , ""); - LLVMValueRef l = LLVMBuildLoad(builder, bc, ""); - return LLVMBuildUIToFP(builder, l, LLVMFloatType(), ""); + LLVMValueRef bc = LLVMBuildBitCast(gallivm->builder, val, + LLVMPointerType(LLVMIntTypeInContext(gallivm->context, 16), 0) , ""); + LLVMValueRef l = LLVMBuildLoad(gallivm->builder, bc, ""); + return LLVMBuildUIToFP(gallivm->builder, l, LLVMFloatTypeInContext(gallivm->context), ""); } static INLINE LLVMValueRef -from_32_uscaled(LLVMBuilderRef builder, LLVMValueRef val) +from_32_uscaled(struct gallivm_state *gallivm, LLVMValueRef val) { - LLVMValueRef bc = LLVMBuildBitCast(builder, val, - LLVMPointerType(LLVMIntType(32), 0) , ""); - LLVMValueRef l = LLVMBuildLoad(builder, bc, ""); - return LLVMBuildUIToFP(builder, l, LLVMFloatType(), ""); + LLVMValueRef bc = LLVMBuildBitCast(gallivm->builder, val, + LLVMPointerType(LLVMIntTypeInContext(gallivm->context, 32), 0) , ""); + LLVMValueRef l = LLVMBuildLoad(gallivm->builder, bc, ""); + return LLVMBuildUIToFP(gallivm->builder, l, LLVMFloatTypeInContext(gallivm->context), ""); } static INLINE LLVMValueRef -from_8_sscaled(LLVMBuilderRef builder, LLVMValueRef val) +from_8_sscaled(struct gallivm_state *gallivm, LLVMValueRef val) { - LLVMValueRef l = LLVMBuildLoad(builder, val, ""); - return LLVMBuildSIToFP(builder, l, LLVMFloatType(), ""); + LLVMValueRef l = LLVMBuildLoad(gallivm->builder, val, ""); + return LLVMBuildSIToFP(gallivm->builder, l, LLVMFloatTypeInContext(gallivm->context), ""); } static INLINE LLVMValueRef -from_16_sscaled(LLVMBuilderRef builder, LLVMValueRef val) +from_16_sscaled(struct gallivm_state *gallivm, LLVMValueRef val) { - LLVMValueRef bc = LLVMBuildBitCast(builder, val, - LLVMPointerType(LLVMIntType(16), 0) , ""); - LLVMValueRef l = LLVMBuildLoad(builder, bc, ""); - return LLVMBuildSIToFP(builder, l, LLVMFloatType(), ""); + LLVMValueRef bc = LLVMBuildBitCast(gallivm->builder, val, + LLVMPointerType(LLVMIntTypeInContext(gallivm->context, 16), 0) , ""); + LLVMValueRef l = LLVMBuildLoad(gallivm->builder, bc, ""); + return LLVMBuildSIToFP(gallivm->builder, l, LLVMFloatTypeInContext(gallivm->context), ""); } static INLINE LLVMValueRef -from_32_sscaled(LLVMBuilderRef builder, LLVMValueRef val) +from_32_sscaled(struct gallivm_state *gallivm, LLVMValueRef val) { - LLVMValueRef bc = LLVMBuildBitCast(builder, val, - LLVMPointerType(LLVMIntType(32), 0) , ""); - LLVMValueRef l = LLVMBuildLoad(builder, bc, ""); - return LLVMBuildSIToFP(builder, l, LLVMFloatType(), ""); + LLVMValueRef bc = LLVMBuildBitCast(gallivm->builder, val, + LLVMPointerType(LLVMIntTypeInContext(gallivm->context, 32), 0) , ""); + LLVMValueRef l = LLVMBuildLoad(gallivm->builder, bc, ""); + return LLVMBuildSIToFP(gallivm->builder, l, LLVMFloatTypeInContext(gallivm->context), ""); } static INLINE LLVMValueRef -from_8_unorm(LLVMBuilderRef builder, LLVMValueRef val) +from_8_unorm(struct gallivm_state *gallivm, LLVMValueRef val) { - LLVMValueRef l = LLVMBuildLoad(builder, val, ""); - LLVMValueRef uscaled = LLVMBuildUIToFP(builder, l, LLVMFloatType(), ""); - return LLVMBuildFDiv(builder, uscaled, - LLVMConstReal(LLVMFloatType(), 255.), ""); + LLVMValueRef l = LLVMBuildLoad(gallivm->builder, val, ""); + LLVMValueRef uscaled = LLVMBuildUIToFP(gallivm->builder, l, LLVMFloatTypeInContext(gallivm->context), ""); + return LLVMBuildFDiv(gallivm->builder, uscaled, + lp_build_const_float(gallivm, 255.), ""); } static INLINE LLVMValueRef -from_16_unorm(LLVMBuilderRef builder, LLVMValueRef val) +from_16_unorm(struct gallivm_state *gallivm, LLVMValueRef val) { - LLVMValueRef bc = LLVMBuildBitCast(builder, val, - LLVMPointerType(LLVMIntType(16), 0) , ""); - LLVMValueRef l = LLVMBuildLoad(builder, bc, ""); - LLVMValueRef uscaled = LLVMBuildUIToFP(builder, l, LLVMFloatType(), ""); - return LLVMBuildFDiv(builder, uscaled, - LLVMConstReal(LLVMFloatType(), 65535.), ""); + LLVMValueRef bc = LLVMBuildBitCast(gallivm->builder, val, + LLVMPointerType(LLVMIntTypeInContext(gallivm->context, 16), 0) , ""); + LLVMValueRef l = LLVMBuildLoad(gallivm->builder, bc, ""); + LLVMValueRef uscaled = LLVMBuildUIToFP(gallivm->builder, l, LLVMFloatTypeInContext(gallivm->context), ""); + return LLVMBuildFDiv(gallivm->builder, uscaled, + lp_build_const_float(gallivm, 65535.), ""); } static INLINE LLVMValueRef -from_32_unorm(LLVMBuilderRef builder, LLVMValueRef val) +from_32_unorm(struct gallivm_state *gallivm, LLVMValueRef val) { - LLVMValueRef bc = LLVMBuildBitCast(builder, val, - LLVMPointerType(LLVMIntType(32), 0) , ""); - LLVMValueRef l = LLVMBuildLoad(builder, bc, ""); - LLVMValueRef uscaled = LLVMBuildUIToFP(builder, l, LLVMFloatType(), ""); + LLVMValueRef bc = LLVMBuildBitCast(gallivm->builder, val, + LLVMPointerType(LLVMIntTypeInContext(gallivm->context, 32), 0) , ""); + LLVMValueRef l = LLVMBuildLoad(gallivm->builder, bc, ""); + LLVMValueRef uscaled = LLVMBuildUIToFP(gallivm->builder, l, LLVMFloatTypeInContext(gallivm->context), ""); - return LLVMBuildFDiv(builder, uscaled, - LLVMConstReal(LLVMFloatType(), 4294967295.), ""); + return LLVMBuildFDiv(gallivm->builder, uscaled, + lp_build_const_float(gallivm, 4294967295.), ""); } static INLINE LLVMValueRef -from_8_snorm(LLVMBuilderRef builder, LLVMValueRef val) +from_8_snorm(struct gallivm_state *gallivm, LLVMValueRef val) { - LLVMValueRef l = LLVMBuildLoad(builder, val, ""); - LLVMValueRef uscaled = LLVMBuildSIToFP(builder, l, LLVMFloatType(), ""); - return LLVMBuildFDiv(builder, uscaled, - LLVMConstReal(LLVMFloatType(), 127.0), ""); + LLVMValueRef l = LLVMBuildLoad(gallivm->builder, val, ""); + LLVMValueRef uscaled = LLVMBuildSIToFP(gallivm->builder, l, LLVMFloatTypeInContext(gallivm->context), ""); + return LLVMBuildFDiv(gallivm->builder, uscaled, + lp_build_const_float(gallivm, 127.0), ""); } static INLINE LLVMValueRef -from_16_snorm(LLVMBuilderRef builder, LLVMValueRef val) +from_16_snorm(struct gallivm_state *gallivm, LLVMValueRef val) { - LLVMValueRef bc = LLVMBuildBitCast(builder, val, - LLVMPointerType(LLVMIntType(16), 0) , ""); - LLVMValueRef l = LLVMBuildLoad(builder, bc, ""); - LLVMValueRef uscaled = LLVMBuildSIToFP(builder, l, LLVMFloatType(), ""); - return LLVMBuildFDiv(builder, uscaled, - LLVMConstReal(LLVMFloatType(), 32767.0f), ""); + LLVMValueRef bc = LLVMBuildBitCast(gallivm->builder, val, + LLVMPointerType(LLVMIntTypeInContext(gallivm->context, 16), 0) , ""); + LLVMValueRef l = LLVMBuildLoad(gallivm->builder, bc, ""); + LLVMValueRef uscaled = LLVMBuildSIToFP(gallivm->builder, l, LLVMFloatTypeInContext(gallivm->context), ""); + return LLVMBuildFDiv(gallivm->builder, uscaled, + lp_build_const_float(gallivm, 32767.0f), ""); } static INLINE LLVMValueRef -from_32_snorm(LLVMBuilderRef builder, LLVMValueRef val) +from_32_snorm(struct gallivm_state *gallivm, LLVMValueRef val) { - LLVMValueRef bc = LLVMBuildBitCast(builder, val, - LLVMPointerType(LLVMIntType(32), 0) , ""); - LLVMValueRef l = LLVMBuildLoad(builder, bc, ""); - LLVMValueRef uscaled = LLVMBuildSIToFP(builder, l, LLVMFloatType(), ""); + LLVMValueRef bc = LLVMBuildBitCast(gallivm->builder, val, + LLVMPointerType(LLVMIntTypeInContext(gallivm->context, 32), 0) , ""); + LLVMValueRef l = LLVMBuildLoad(gallivm->builder, bc, ""); + LLVMValueRef uscaled = LLVMBuildSIToFP(gallivm->builder, l, LLVMFloatTypeInContext(gallivm->context), ""); - return LLVMBuildFDiv(builder, uscaled, - LLVMConstReal(LLVMFloatType(), 2147483647.0), ""); + return LLVMBuildFDiv(gallivm->builder, uscaled, + lp_build_const_float(gallivm, 2147483647.0), ""); } static INLINE LLVMValueRef -from_32_fixed(LLVMBuilderRef builder, LLVMValueRef val) +from_32_fixed(struct gallivm_state *gallivm, LLVMValueRef val) { - LLVMValueRef bc = LLVMBuildBitCast(builder, val, - LLVMPointerType(LLVMIntType(32), 0) , ""); - LLVMValueRef l = LLVMBuildLoad(builder, bc, ""); - LLVMValueRef uscaled = LLVMBuildSIToFP(builder, l, LLVMFloatType(), ""); + LLVMValueRef bc = LLVMBuildBitCast(gallivm->builder, val, + LLVMPointerType(LLVMIntTypeInContext(gallivm->context, 32), 0) , ""); + LLVMValueRef l = LLVMBuildLoad(gallivm->builder, bc, ""); + LLVMValueRef uscaled = LLVMBuildSIToFP(gallivm->builder, l, LLVMFloatTypeInContext(gallivm->context), ""); - return LLVMBuildFDiv(builder, uscaled, - LLVMConstReal(LLVMFloatType(), 65536.0), ""); + return LLVMBuildFDiv(gallivm->builder, uscaled, + lp_build_const_float(gallivm, 65536.0), ""); } static LLVMValueRef -to_64_float(LLVMBuilderRef builder, LLVMValueRef fp) +to_64_float(struct gallivm_state *gallivm, LLVMValueRef fp) { - LLVMValueRef l = LLVMBuildLoad(builder, fp, ""); - return LLVMBuildFPExt(builder, l, LLVMDoubleType(), ""); + LLVMValueRef l = LLVMBuildLoad(gallivm->builder, fp, ""); + return LLVMBuildFPExt(gallivm->builder, l, LLVMDoubleTypeInContext(gallivm->context), ""); } static LLVMValueRef -to_32_float(LLVMBuilderRef builder, LLVMValueRef fp) +to_32_float(struct gallivm_state *gallivm, LLVMValueRef fp) { - return LLVMBuildLoad(builder, fp, ""); + return LLVMBuildLoad(gallivm->builder, fp, ""); } static INLINE LLVMValueRef -to_8_uscaled(LLVMBuilderRef builder, LLVMValueRef fp) +to_8_uscaled(struct gallivm_state *gallivm, LLVMValueRef fp) { - LLVMValueRef l = LLVMBuildLoad(builder, fp, ""); - return LLVMBuildFPToUI(builder, l, LLVMIntType(8), ""); + LLVMValueRef l = LLVMBuildLoad(gallivm->builder, fp, ""); + return LLVMBuildFPToUI(gallivm->builder, l, LLVMIntTypeInContext(gallivm->context, 8), ""); } static INLINE LLVMValueRef -to_16_uscaled(LLVMBuilderRef builder, LLVMValueRef fp) +to_16_uscaled(struct gallivm_state *gallivm, LLVMValueRef fp) { - LLVMValueRef l = LLVMBuildLoad(builder, fp, ""); - return LLVMBuildFPToUI(builder, l, LLVMIntType(16), ""); + LLVMValueRef l = LLVMBuildLoad(gallivm->builder, fp, ""); + return LLVMBuildFPToUI(gallivm->builder, l, LLVMIntTypeInContext(gallivm->context, 16), ""); } static INLINE LLVMValueRef -to_32_uscaled(LLVMBuilderRef builder, LLVMValueRef fp) +to_32_uscaled(struct gallivm_state *gallivm, LLVMValueRef fp) { - LLVMValueRef l = LLVMBuildLoad(builder, fp, ""); - return LLVMBuildFPToUI(builder, l, LLVMIntType(32), ""); + LLVMValueRef l = LLVMBuildLoad(gallivm->builder, fp, ""); + return LLVMBuildFPToUI(gallivm->builder, l, LLVMIntTypeInContext(gallivm->context, 32), ""); } static INLINE LLVMValueRef -to_8_sscaled(LLVMBuilderRef builder, LLVMValueRef fp) +to_8_sscaled(struct gallivm_state *gallivm, LLVMValueRef fp) { - LLVMValueRef l = LLVMBuildLoad(builder, fp, ""); - return LLVMBuildFPToSI(builder, l, LLVMIntType(8), ""); + LLVMValueRef l = LLVMBuildLoad(gallivm->builder, fp, ""); + return LLVMBuildFPToSI(gallivm->builder, l, LLVMIntTypeInContext(gallivm->context, 8), ""); } static INLINE LLVMValueRef -to_16_sscaled(LLVMBuilderRef builder, LLVMValueRef fp) +to_16_sscaled(struct gallivm_state *gallivm, LLVMValueRef fp) { - LLVMValueRef l = LLVMBuildLoad(builder, fp, ""); - return LLVMBuildFPToSI(builder, l, LLVMIntType(16), ""); + LLVMValueRef l = LLVMBuildLoad(gallivm->builder, fp, ""); + return LLVMBuildFPToSI(gallivm->builder, l, LLVMIntTypeInContext(gallivm->context, 16), ""); } static INLINE LLVMValueRef -to_32_sscaled(LLVMBuilderRef builder, LLVMValueRef fp) +to_32_sscaled(struct gallivm_state *gallivm, LLVMValueRef fp) { - LLVMValueRef l = LLVMBuildLoad(builder, fp, ""); - return LLVMBuildFPToSI(builder, l, LLVMIntType(32), ""); + LLVMValueRef l = LLVMBuildLoad(gallivm->builder, fp, ""); + return LLVMBuildFPToSI(gallivm->builder, l, LLVMIntTypeInContext(gallivm->context, 32), ""); } static INLINE LLVMValueRef -to_8_unorm(LLVMBuilderRef builder, LLVMValueRef fp) +to_8_unorm(struct gallivm_state *gallivm, LLVMValueRef fp) { - LLVMValueRef l = LLVMBuildLoad(builder, fp, ""); - LLVMValueRef uscaled = LLVMBuildFPToUI(builder, l, LLVMIntType(8), ""); - return LLVMBuildFMul(builder, uscaled, - LLVMConstReal(LLVMFloatType(), 255.), ""); + LLVMValueRef l = LLVMBuildLoad(gallivm->builder, fp, ""); + LLVMValueRef uscaled = LLVMBuildFPToUI(gallivm->builder, l, + LLVMIntTypeInContext(gallivm->context, 8), ""); + return LLVMBuildFMul(gallivm->builder, uscaled, + lp_build_const_float(gallivm, 255.), ""); } static INLINE LLVMValueRef -to_16_unorm(LLVMBuilderRef builder, LLVMValueRef fp) +to_16_unorm(struct gallivm_state *gallivm, LLVMValueRef fp) { - LLVMValueRef l = LLVMBuildLoad(builder, fp, ""); - LLVMValueRef uscaled = LLVMBuildFPToUI(builder, l, LLVMIntType(32), ""); - return LLVMBuildFMul(builder, uscaled, - LLVMConstReal(LLVMFloatType(), 65535.), ""); + LLVMValueRef l = LLVMBuildLoad(gallivm->builder, fp, ""); + LLVMValueRef uscaled = LLVMBuildFPToUI(gallivm->builder, l, + LLVMIntTypeInContext(gallivm->context, 32), ""); + return LLVMBuildFMul(gallivm->builder, uscaled, + lp_build_const_float(gallivm, 65535.), ""); } static INLINE LLVMValueRef -to_32_unorm(LLVMBuilderRef builder, LLVMValueRef fp) +to_32_unorm(struct gallivm_state *gallivm, LLVMValueRef fp) { - LLVMValueRef l = LLVMBuildLoad(builder, fp, ""); - LLVMValueRef uscaled = LLVMBuildFPToUI(builder, l, LLVMIntType(32), ""); + LLVMValueRef l = LLVMBuildLoad(gallivm->builder, fp, ""); + LLVMValueRef uscaled = LLVMBuildFPToUI(gallivm->builder, l, + LLVMIntTypeInContext(gallivm->context, 32), ""); - return LLVMBuildFMul(builder, uscaled, - LLVMConstReal(LLVMFloatType(), 4294967295.), ""); + return LLVMBuildFMul(gallivm->builder, uscaled, + lp_build_const_float(gallivm, 4294967295.), ""); } static INLINE LLVMValueRef -to_8_snorm(LLVMBuilderRef builder, LLVMValueRef val) +to_8_snorm(struct gallivm_state *gallivm, LLVMValueRef val) { - LLVMValueRef l = LLVMBuildLoad(builder, val, ""); - LLVMValueRef uscaled = LLVMBuildFPToSI(builder, l, LLVMIntType(8), ""); - return LLVMBuildFMul(builder, uscaled, - LLVMConstReal(LLVMFloatType(), 127.0), ""); + LLVMValueRef l = LLVMBuildLoad(gallivm->builder, val, ""); + LLVMValueRef uscaled = LLVMBuildFPToSI(gallivm->builder, l, + LLVMIntTypeInContext(gallivm->context, 8), ""); + return LLVMBuildFMul(gallivm->builder, uscaled, + lp_build_const_float(gallivm, 127.0), ""); } static INLINE LLVMValueRef -to_16_snorm(LLVMBuilderRef builder, LLVMValueRef fp) +to_16_snorm(struct gallivm_state *gallivm, LLVMValueRef fp) { - LLVMValueRef l = LLVMBuildLoad(builder, fp, ""); - LLVMValueRef uscaled = LLVMBuildFPToSI(builder, l, LLVMIntType(16), ""); - return LLVMBuildFMul(builder, uscaled, - LLVMConstReal(LLVMFloatType(), 32767.0f), ""); + LLVMValueRef l = LLVMBuildLoad(gallivm->builder, fp, ""); + LLVMValueRef uscaled = LLVMBuildFPToSI(gallivm->builder, l, + LLVMIntTypeInContext(gallivm->context, 16), ""); + return LLVMBuildFMul(gallivm->builder, uscaled, + lp_build_const_float(gallivm, 32767.0f), ""); } static INLINE LLVMValueRef -to_32_snorm(LLVMBuilderRef builder, LLVMValueRef fp) +to_32_snorm(struct gallivm_state *gallivm, LLVMValueRef fp) { - LLVMValueRef l = LLVMBuildLoad(builder, fp, ""); - LLVMValueRef uscaled = LLVMBuildFPToSI(builder, l, LLVMIntType(32), ""); + LLVMValueRef l = LLVMBuildLoad(gallivm->builder, fp, ""); + LLVMValueRef uscaled = LLVMBuildFPToSI(gallivm->builder, l, + LLVMIntTypeInContext(gallivm->context, 32), ""); - return LLVMBuildFMul(builder, uscaled, - LLVMConstReal(LLVMFloatType(), 2147483647.0), ""); + return LLVMBuildFMul(gallivm->builder, uscaled, + lp_build_const_float(gallivm, 2147483647.0), ""); } static INLINE LLVMValueRef -to_32_fixed(LLVMBuilderRef builder, LLVMValueRef fp) +to_32_fixed(struct gallivm_state *gallivm, LLVMValueRef fp) { - LLVMValueRef l = LLVMBuildLoad(builder, fp, ""); - LLVMValueRef uscaled = LLVMBuildFPToSI(builder, l, LLVMIntType(32), ""); + LLVMValueRef l = LLVMBuildLoad(gallivm->builder, fp, ""); + LLVMValueRef uscaled = LLVMBuildFPToSI(gallivm->builder, l, + LLVMIntTypeInContext(gallivm->context, 32), ""); - return LLVMBuildFMul(builder, uscaled, - LLVMConstReal(LLVMFloatType(), 65536.0), ""); + return LLVMBuildFMul(gallivm->builder, uscaled, + lp_build_const_float(gallivm, 65536.0), ""); } -typedef LLVMValueRef (*from_func)(LLVMBuilderRef, LLVMValueRef); -typedef LLVMValueRef (*to_func)(LLVMBuilderRef, LLVMValueRef); +typedef LLVMValueRef (*from_func)(struct gallivm_state *, LLVMValueRef); +typedef LLVMValueRef (*to_func)(struct gallivm_state *, LLVMValueRef); /* so that underneath can avoid function calls which are prohibited * for static initialization we need this conversion */ @@ -294,21 +302,21 @@ enum ll_type { }; static INLINE LLVMTypeRef -ll_type_to_llvm(enum ll_type type) +ll_type_to_llvm(struct gallivm_state *gallivm, enum ll_type type) { switch (type) { case LL_Double: - return LLVMDoubleType(); + return LLVMDoubleTypeInContext(gallivm->context); case LL_Float: - return LLVMFloatType(); + return LLVMFloatTypeInContext(gallivm->context); case LL_Int32: - return LLVMInt32Type(); + return LLVMInt32TypeInContext(gallivm->context); case LL_Int16: - return LLVMIntType(16); + return LLVMIntTypeInContext(gallivm->context, 16); case LL_Int8: - return LLVMIntType(8); + return LLVMIntTypeInContext(gallivm->context, 8); } - return LLVMIntType(8); + return LLVMIntTypeInContext(gallivm->context, 8); } static INLINE int @@ -414,42 +422,42 @@ struct draw_llvm_translate { static LLVMValueRef -fetch(LLVMBuilderRef builder, +fetch(struct gallivm_state *gallivm, LLVMValueRef ptr, int val_size, int nr_components, from_func func) { int i; int offset = 0; - LLVMValueRef res = LLVMConstNull( - LLVMVectorType(LLVMFloatType(), 4)); + LLVMValueRef res = + LLVMConstNull(LLVMVectorType(LLVMFloatTypeInContext(gallivm->context), 4)); LLVMValueRef defaults[4]; - defaults[0] = LLVMConstReal(LLVMFloatType(), 0); - defaults[1] = LLVMConstReal(LLVMFloatType(), 0); - defaults[2] = LLVMConstReal(LLVMFloatType(), 0); - defaults[3] = LLVMConstReal(LLVMFloatType(), 1); + defaults[0] = + defaults[1] = + defaults[2] = lp_build_const_float(gallivm, 0.0); + defaults[3] = lp_build_const_float(gallivm, 1.0); for (i = 0; i < nr_components; ++i) { - LLVMValueRef src_index = LLVMConstInt(LLVMInt32Type(), offset, 0); - LLVMValueRef dst_index = LLVMConstInt(LLVMInt32Type(), i, 0); + LLVMValueRef src_index = lp_build_const_int32(gallivm, offset); + LLVMValueRef dst_index = lp_build_const_int32(gallivm, i); LLVMValueRef src_tmp; LLVMValueRef component; - src_tmp = LLVMBuildGEP(builder, ptr, &src_index, 1, "src_tmp"); + src_tmp = LLVMBuildGEP(gallivm->builder, ptr, &src_index, 1, "src_tmp"); /* convert src_tmp to float */ - component = func(builder, src_tmp); + component = func(gallivm, src_tmp); /* vec.comp = component */ - res = LLVMBuildInsertElement(builder, + res = LLVMBuildInsertElement(gallivm->builder, res, component, dst_index, ""); offset += val_size; } for (; i < 4; ++i) { - LLVMValueRef dst_index = LLVMConstInt(LLVMInt32Type(), i, 0); - res = LLVMBuildInsertElement(builder, + LLVMValueRef dst_index = lp_build_const_int32(gallivm, i); + res = LLVMBuildInsertElement(gallivm->builder, res, defaults[i], dst_index, ""); @@ -459,7 +467,7 @@ fetch(LLVMBuilderRef builder, LLVMValueRef -draw_llvm_translate_from(LLVMBuilderRef builder, +draw_llvm_translate_from(struct gallivm_state *gallivm, LLVMValueRef vbuffer, enum pipe_format from_format) { @@ -476,7 +484,7 @@ draw_llvm_translate_from(LLVMBuilderRef builder, for (i = 0; i < Elements(translates); ++i) { if (translates[i].format == from_format) { /*LLVMTypeRef type = ll_type_to_llvm(translates[i].type);*/ - return fetch(builder, + return fetch(gallivm, vbuffer, ll_type_size(translates[i].type), translates[i].num_components, @@ -493,6 +501,6 @@ draw_llvm_translate_from(LLVMBuilderRef builder, */ format_desc = util_format_description(from_format); - zero = LLVMConstNull(LLVMInt32Type()); - return lp_build_fetch_rgba_aos(builder, format_desc, type, vbuffer, zero, zero, zero); + zero = LLVMConstNull(LLVMInt32TypeInContext(gallivm->context)); + return lp_build_fetch_rgba_aos(gallivm, format_desc, type, vbuffer, zero, zero, zero); } diff --git a/src/gallium/auxiliary/draw/draw_pipe_aaline.c b/src/gallium/auxiliary/draw/draw_pipe_aaline.c index d1aba763098..0851b9acc0d 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_aaline.c +++ b/src/gallium/auxiliary/draw/draw_pipe_aaline.c @@ -406,6 +406,7 @@ aaline_create_texture(struct aaline_stage *aaline) texTemp.width0 = 1 << MAX_TEXTURE_LEVEL; texTemp.height0 = 1 << MAX_TEXTURE_LEVEL; texTemp.depth0 = 1; + texTemp.array_size = 1; texTemp.bind = PIPE_BIND_SAMPLER_VIEW; aaline->texture = screen->resource_create(screen, &texTemp); @@ -441,10 +442,10 @@ aaline_create_texture(struct aaline_stage *aaline) /* This texture is new, no need to flush. */ transfer = pipe->get_transfer(pipe, - aaline->texture, - u_subresource(0, level), - PIPE_TRANSFER_WRITE, - &box); + aaline->texture, + level, + PIPE_TRANSFER_WRITE, + &box); data = pipe->transfer_map(pipe, transfer); if (data == NULL) diff --git a/src/gallium/auxiliary/draw/draw_pipe_pstipple.c b/src/gallium/auxiliary/draw/draw_pipe_pstipple.c index ed9a53e154d..f5515c1df76 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_pstipple.c +++ b/src/gallium/auxiliary/draw/draw_pipe_pstipple.c @@ -393,8 +393,8 @@ pstip_update_texture(struct pstip_stage *pstip) */ pipe->flush( pipe, PIPE_FLUSH_TEXTURE_CACHE, NULL ); - transfer = pipe_get_transfer(pipe, pstip->texture, 0, 0, 0, - PIPE_TRANSFER_WRITE, 0, 0, 32, 32); + transfer = pipe_get_transfer(pipe, pstip->texture, 0, 0, + PIPE_TRANSFER_WRITE, 0, 0, 32, 32); data = pipe->transfer_map(pipe, transfer); /* @@ -440,6 +440,7 @@ pstip_create_texture(struct pstip_stage *pstip) texTemp.width0 = 32; texTemp.height0 = 32; texTemp.depth0 = 1; + texTemp.array_size = 1; texTemp.bind = PIPE_BIND_SAMPLER_VIEW; pstip->texture = screen->resource_create(screen, &texTemp); diff --git a/src/gallium/auxiliary/draw/draw_private.h b/src/gallium/auxiliary/draw/draw_private.h index 54163d7f9eb..06ed4d60ef2 100644 --- a/src/gallium/auxiliary/draw/draw_private.h +++ b/src/gallium/auxiliary/draw/draw_private.h @@ -286,7 +286,6 @@ struct draw_context #ifdef HAVE_LLVM struct draw_llvm *llvm; - LLVMExecutionEngineRef engine; #endif struct pipe_sampler_view *sampler_views[PIPE_MAX_VERTEX_SAMPLERS]; diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c index 7c198c6026d..c98fb3d5205 100644 --- a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c +++ b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c @@ -58,8 +58,8 @@ struct fetch_shade_emit { const ubyte *src[PIPE_MAX_ATTRIBS]; unsigned prim; - struct draw_vs_varient_key key; - struct draw_vs_varient *active; + struct draw_vs_variant_key key; + struct draw_vs_variant *active; const struct vertex_info *vinfo; @@ -150,7 +150,7 @@ static void fse_prepare( struct draw_pt_middle_end *middle, } - fse->active = draw_vs_lookup_varient( draw->vs.vertex_shader, + fse->active = draw_vs_lookup_variant( draw->vs.vertex_shader, &fse->key ); if (!fse->active) { diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline_llvm.c b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline_llvm.c index a53a768d029..2e3afb22c48 100644 --- a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline_llvm.c +++ b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline_llvm.c @@ -34,6 +34,7 @@ #include "draw/draw_pt.h" #include "draw/draw_vs.h" #include "draw/draw_llvm.h" +#include "gallivm/lp_bld_init.h" struct llvm_middle_end { @@ -72,19 +73,18 @@ llvm_middle_end_prepare( struct draw_pt_middle_end *middle, struct draw_llvm_variant_list_item *li; unsigned i; unsigned instance_id_index = ~0; - - - unsigned out_prim = (draw->gs.geometry_shader ? - draw->gs.geometry_shader->output_primitive : - in_prim); + const unsigned out_prim = (draw->gs.geometry_shader ? + draw->gs.geometry_shader->output_primitive : + in_prim); /* Add one to num_outputs because the pipeline occasionally tags on * an additional texcoord, eg for AA lines. */ - unsigned nr = MAX2( shader->base.info.num_inputs, - shader->base.info.num_outputs + 1 ); + const unsigned nr = MAX2( shader->base.info.num_inputs, + shader->base.info.num_outputs + 1 ); /* Scan for instanceID system value. + * XXX but we never use instance_id_index?! */ for (i = 0; i < shader->base.info.num_inputs; i++) { if (shader->base.info.input_semantic_name[i] == TGSI_SEMANTIC_INSTANCEID) { @@ -133,9 +133,10 @@ llvm_middle_end_prepare( struct draw_pt_middle_end *middle, key = draw_llvm_make_variant_key(fpme->llvm, store); + /* Search shader's list of variants for the key */ li = first_elem(&shader->variants); - while(!at_end(&shader->variants, li)) { - if(memcmp(&li->base->key, key, shader->variant_key_size) == 0) { + while (!at_end(&shader->variants, li)) { + if (memcmp(&li->base->key, key, shader->variant_key_size) == 0) { variant = li->base; break; } @@ -143,10 +144,16 @@ llvm_middle_end_prepare( struct draw_pt_middle_end *middle, } if (variant) { + /* found the variant, move to head of global list (for LRU) */ move_to_head(&fpme->llvm->vs_variants_list, &variant->list_item_global); } else { + /* Need to create new variant */ unsigned i; + + /* First check if we've created too many variants. If so, free + * 25% of the LRU to avoid using too much memory. + */ if (fpme->llvm->nr_variants >= DRAW_MAX_SHADER_VARIANTS) { /* * XXX: should we flush here ? @@ -422,7 +429,7 @@ draw_pt_fetch_pipeline_or_emit_llvm(struct draw_context *draw) { struct llvm_middle_end *fpme = 0; - if (!draw->engine) + if (!draw->llvm->gallivm->engine) return NULL; fpme = CALLOC_STRUCT( llvm_middle_end ); diff --git a/src/gallium/auxiliary/draw/draw_pt_vsplit_tmp.h b/src/gallium/auxiliary/draw/draw_pt_vsplit_tmp.h index 3f66f962e11..75dba8c39a5 100644 --- a/src/gallium/auxiliary/draw/draw_pt_vsplit_tmp.h +++ b/src/gallium/auxiliary/draw/draw_pt_vsplit_tmp.h @@ -258,9 +258,10 @@ vsplit_segment_fan_linear(struct vsplit_frontend *vsplit, unsigned flags, boolean use_spoken = ((flags & DRAW_SPLIT_BEFORE) != 0); unsigned nr = 0, i; - assert(icount + !!use_spoken <= vsplit->segment_size); + assert(icount <= vsplit->segment_size); if (use_spoken) { + /* replace istart by i0 */ vsplit->fetch_elts[nr++] = i0; for (i = 1 ; i < icount; i++) vsplit->fetch_elts[nr++] = istart + i; diff --git a/src/gallium/auxiliary/draw/draw_vs.c b/src/gallium/auxiliary/draw/draw_vs.c index fb665b08fff..7caad6f0053 100644 --- a/src/gallium/auxiliary/draw/draw_vs.c +++ b/src/gallium/auxiliary/draw/draw_vs.c @@ -165,10 +165,10 @@ draw_delete_vertex_shader(struct draw_context *draw, { unsigned i; - for (i = 0; i < dvs->nr_varients; i++) - dvs->varient[i]->destroy( dvs->varient[i] ); + for (i = 0; i < dvs->nr_variants; i++) + dvs->variant[i]->destroy( dvs->variant[i] ); - dvs->nr_varients = 0; + dvs->nr_variants = 0; dvs->delete( dvs ); } @@ -225,40 +225,40 @@ draw_vs_destroy( struct draw_context *draw ) } -struct draw_vs_varient * -draw_vs_lookup_varient( struct draw_vertex_shader *vs, - const struct draw_vs_varient_key *key ) +struct draw_vs_variant * +draw_vs_lookup_variant( struct draw_vertex_shader *vs, + const struct draw_vs_variant_key *key ) { - struct draw_vs_varient *varient; + struct draw_vs_variant *variant; unsigned i; - /* Lookup existing varient: + /* Lookup existing variant: */ - for (i = 0; i < vs->nr_varients; i++) - if (draw_vs_varient_key_compare(key, &vs->varient[i]->key) == 0) - return vs->varient[i]; + for (i = 0; i < vs->nr_variants; i++) + if (draw_vs_variant_key_compare(key, &vs->variant[i]->key) == 0) + return vs->variant[i]; /* Else have to create a new one: */ - varient = vs->create_varient( vs, key ); - if (varient == NULL) + variant = vs->create_variant( vs, key ); + if (variant == NULL) return NULL; /* Add it to our list, could be smarter: */ - if (vs->nr_varients < Elements(vs->varient)) { - vs->varient[vs->nr_varients++] = varient; + if (vs->nr_variants < Elements(vs->variant)) { + vs->variant[vs->nr_variants++] = variant; } else { - vs->last_varient++; - vs->last_varient %= Elements(vs->varient); - vs->varient[vs->last_varient]->destroy(vs->varient[vs->last_varient]); - vs->varient[vs->last_varient] = varient; + vs->last_variant++; + vs->last_variant %= Elements(vs->variant); + vs->variant[vs->last_variant]->destroy(vs->variant[vs->last_variant]); + vs->variant[vs->last_variant] = variant; } /* Done */ - return varient; + return variant; } diff --git a/src/gallium/auxiliary/draw/draw_vs.h b/src/gallium/auxiliary/draw/draw_vs.h index f9a038788fb..bfb72d50efa 100644 --- a/src/gallium/auxiliary/draw/draw_vs.h +++ b/src/gallium/auxiliary/draw/draw_vs.h @@ -38,7 +38,7 @@ struct draw_context; struct pipe_shader_state; -struct draw_varient_input +struct draw_variant_input { enum pipe_format format; unsigned buffer; @@ -46,19 +46,19 @@ struct draw_varient_input unsigned instance_divisor; }; -struct draw_varient_output +struct draw_variant_output { enum pipe_format format; /* output format */ unsigned vs_output:8; /* which vertex shader output is this? */ unsigned offset:24; /* offset into output vertex */ }; -struct draw_varient_element { - struct draw_varient_input in; - struct draw_varient_output out; +struct draw_variant_element { + struct draw_variant_input in; + struct draw_variant_output out; }; -struct draw_vs_varient_key { +struct draw_vs_variant_key { unsigned output_stride; unsigned nr_elements:8; /* max2(nr_inputs, nr_outputs) */ unsigned nr_inputs:8; @@ -66,34 +66,34 @@ struct draw_vs_varient_key { unsigned viewport:1; unsigned clip:1; unsigned const_vbuffers:5; - struct draw_varient_element element[PIPE_MAX_ATTRIBS]; + struct draw_variant_element element[PIPE_MAX_ATTRIBS]; }; -struct draw_vs_varient; +struct draw_vs_variant; -struct draw_vs_varient { - struct draw_vs_varient_key key; +struct draw_vs_variant { + struct draw_vs_variant_key key; struct draw_vertex_shader *vs; - void (*set_buffer)( struct draw_vs_varient *, + void (*set_buffer)( struct draw_vs_variant *, unsigned i, const void *ptr, unsigned stride, unsigned max_stride ); - void (PIPE_CDECL *run_linear)( struct draw_vs_varient *shader, + void (PIPE_CDECL *run_linear)( struct draw_vs_variant *shader, unsigned start, unsigned count, void *output_buffer ); - void (PIPE_CDECL *run_elts)( struct draw_vs_varient *shader, + void (PIPE_CDECL *run_elts)( struct draw_vs_variant *shader, const unsigned *elts, unsigned count, void *output_buffer ); - void (*destroy)( struct draw_vs_varient * ); + void (*destroy)( struct draw_vs_variant * ); }; @@ -117,11 +117,11 @@ struct draw_vertex_shader { /* */ - struct draw_vs_varient *varient[16]; - unsigned nr_varients; - unsigned last_varient; - struct draw_vs_varient *(*create_varient)( struct draw_vertex_shader *shader, - const struct draw_vs_varient_key *key ); + struct draw_vs_variant *variant[16]; + unsigned nr_variants; + unsigned last_variant; + struct draw_vs_variant *(*create_variant)( struct draw_vertex_shader *shader, + const struct draw_vs_variant_key *key ); void (*prepare)( struct draw_vertex_shader *shader, @@ -144,9 +144,9 @@ struct draw_vertex_shader { }; -struct draw_vs_varient * -draw_vs_lookup_varient( struct draw_vertex_shader *base, - const struct draw_vs_varient_key *key ); +struct draw_vs_variant * +draw_vs_lookup_variant( struct draw_vertex_shader *base, + const struct draw_vs_variant_key *key ); /******************************************************************************** @@ -166,12 +166,12 @@ draw_create_vs_ppc(struct draw_context *draw, const struct pipe_shader_state *templ); -struct draw_vs_varient_key; +struct draw_vs_variant_key; struct draw_vertex_shader; -struct draw_vs_varient * -draw_vs_create_varient_aos_sse( struct draw_vertex_shader *vs, - const struct draw_vs_varient_key *key ); +struct draw_vs_variant * +draw_vs_create_variant_aos_sse( struct draw_vertex_shader *vs, + const struct draw_vs_variant_key *key ); #if HAVE_LLVM struct draw_vertex_shader * @@ -181,7 +181,7 @@ draw_create_vs_llvm(struct draw_context *draw, /******************************************************************************** - * Helpers for vs implementations that don't do their own fetch/emit varients. + * Helpers for vs implementations that don't do their own fetch/emit variants. * Means these can be shared between shaders. */ struct translate; @@ -194,21 +194,21 @@ struct translate *draw_vs_get_fetch( struct draw_context *draw, struct translate *draw_vs_get_emit( struct draw_context *draw, struct translate_key *key ); -struct draw_vs_varient * -draw_vs_create_varient_generic( struct draw_vertex_shader *vs, - const struct draw_vs_varient_key *key ); +struct draw_vs_variant * +draw_vs_create_variant_generic( struct draw_vertex_shader *vs, + const struct draw_vs_variant_key *key ); -static INLINE int draw_vs_varient_keysize( const struct draw_vs_varient_key *key ) +static INLINE int draw_vs_variant_keysize( const struct draw_vs_variant_key *key ) { - return 2 * sizeof(int) + key->nr_elements * sizeof(struct draw_varient_element); + return 2 * sizeof(int) + key->nr_elements * sizeof(struct draw_variant_element); } -static INLINE int draw_vs_varient_key_compare( const struct draw_vs_varient_key *a, - const struct draw_vs_varient_key *b ) +static INLINE int draw_vs_variant_key_compare( const struct draw_vs_variant_key *a, + const struct draw_vs_variant_key *b ) { - int keysize = draw_vs_varient_keysize(a); + int keysize = draw_vs_variant_keysize(a); return memcmp(a, b, keysize); } diff --git a/src/gallium/auxiliary/draw/draw_vs_aos.c b/src/gallium/auxiliary/draw/draw_vs_aos.c index 19f49e34c8b..7b90dba0cd5 100644 --- a/src/gallium/auxiliary/draw/draw_vs_aos.c +++ b/src/gallium/auxiliary/draw/draw_vs_aos.c @@ -1918,7 +1918,7 @@ static void find_last_write_outputs( struct aos_compilation *cp ) #define ARG_OUTBUF 4 -static boolean build_vertex_program( struct draw_vs_varient_aos_sse *varient, +static boolean build_vertex_program( struct draw_vs_variant_aos_sse *variant, boolean linear ) { struct tgsi_parse_context parse; @@ -1927,14 +1927,14 @@ static boolean build_vertex_program( struct draw_vs_varient_aos_sse *varient, util_init_math(); - tgsi_parse_init( &parse, varient->base.vs->state.tokens ); + tgsi_parse_init( &parse, variant->base.vs->state.tokens ); memset(&cp, 0, sizeof(cp)); cp.insn_counter = 1; - cp.vaos = varient; + cp.vaos = variant; cp.have_sse2 = 1; - cp.func = &varient->func[ linear ? 0 : 1 ]; + cp.func = &variant->func[ linear ? 0 : 1 ]; cp.tmp_EAX = x86_make_reg(file_REG32, reg_AX); cp.idx_EBX = x86_make_reg(file_REG32, reg_BX); @@ -2090,20 +2090,20 @@ static boolean build_vertex_program( struct draw_vs_varient_aos_sse *varient, /** cast wrapper */ -static INLINE struct draw_vs_varient_aos_sse * -draw_vs_varient_aos_sse(struct draw_vs_varient *varient) +static INLINE struct draw_vs_variant_aos_sse * +draw_vs_variant_aos_sse(struct draw_vs_variant *variant) { - return (struct draw_vs_varient_aos_sse *) varient; + return (struct draw_vs_variant_aos_sse *) variant; } -static void vaos_set_buffer( struct draw_vs_varient *varient, +static void vaos_set_buffer( struct draw_vs_variant *variant, unsigned buf, const void *ptr, unsigned stride, unsigned max_stride) { - struct draw_vs_varient_aos_sse *vaos = draw_vs_varient_aos_sse(varient); + struct draw_vs_variant_aos_sse *vaos = draw_vs_variant_aos_sse(variant); if (buf < vaos->nr_vb) { vaos->buffer[buf].base_ptr = (char *)ptr; @@ -2115,12 +2115,12 @@ static void vaos_set_buffer( struct draw_vs_varient *varient, -static void PIPE_CDECL vaos_run_elts( struct draw_vs_varient *varient, +static void PIPE_CDECL vaos_run_elts( struct draw_vs_variant *variant, const unsigned *elts, unsigned count, void *output_buffer ) { - struct draw_vs_varient_aos_sse *vaos = draw_vs_varient_aos_sse(varient); + struct draw_vs_variant_aos_sse *vaos = draw_vs_variant_aos_sse(variant); struct aos_machine *machine = vaos->draw->vs.aos_machine; unsigned i; @@ -2139,12 +2139,12 @@ static void PIPE_CDECL vaos_run_elts( struct draw_vs_varient *varient, output_buffer ); } -static void PIPE_CDECL vaos_run_linear( struct draw_vs_varient *varient, +static void PIPE_CDECL vaos_run_linear( struct draw_vs_variant *variant, unsigned start, unsigned count, void *output_buffer ) { - struct draw_vs_varient_aos_sse *vaos = draw_vs_varient_aos_sse(varient); + struct draw_vs_variant_aos_sse *vaos = draw_vs_variant_aos_sse(variant); struct aos_machine *machine = vaos->draw->vs.aos_machine; unsigned i; @@ -2171,9 +2171,9 @@ static void PIPE_CDECL vaos_run_linear( struct draw_vs_varient *varient, -static void vaos_destroy( struct draw_vs_varient *varient ) +static void vaos_destroy( struct draw_vs_variant *variant ) { - struct draw_vs_varient_aos_sse *vaos = draw_vs_varient_aos_sse(varient); + struct draw_vs_variant_aos_sse *vaos = draw_vs_variant_aos_sse(variant); FREE( vaos->buffer ); @@ -2185,11 +2185,11 @@ static void vaos_destroy( struct draw_vs_varient *varient ) -static struct draw_vs_varient *varient_aos_sse( struct draw_vertex_shader *vs, - const struct draw_vs_varient_key *key ) +static struct draw_vs_variant *variant_aos_sse( struct draw_vertex_shader *vs, + const struct draw_vs_variant_key *key ) { unsigned i; - struct draw_vs_varient_aos_sse *vaos = CALLOC_STRUCT(draw_vs_varient_aos_sse); + struct draw_vs_variant_aos_sse *vaos = CALLOC_STRUCT(draw_vs_variant_aos_sse); if (!vaos) goto fail; @@ -2249,17 +2249,17 @@ static struct draw_vs_varient *varient_aos_sse( struct draw_vertex_shader *vs, } -struct draw_vs_varient * -draw_vs_create_varient_aos_sse( struct draw_vertex_shader *vs, - const struct draw_vs_varient_key *key ) +struct draw_vs_variant * +draw_vs_create_variant_aos_sse( struct draw_vertex_shader *vs, + const struct draw_vs_variant_key *key ) { - struct draw_vs_varient *varient = varient_aos_sse( vs, key ); + struct draw_vs_variant *variant = variant_aos_sse( vs, key ); - if (varient == NULL) { - varient = draw_vs_create_varient_generic( vs, key ); + if (variant == NULL) { + variant = draw_vs_create_variant_generic( vs, key ); } - return varient; + return variant; } diff --git a/src/gallium/auxiliary/draw/draw_vs_aos.h b/src/gallium/auxiliary/draw/draw_vs_aos.h index 68e8295b5e1..55e63d8b9fa 100644 --- a/src/gallium/auxiliary/draw/draw_vs_aos.h +++ b/src/gallium/auxiliary/draw/draw_vs_aos.h @@ -98,9 +98,9 @@ struct aos_buffer { -/* This is the temporary storage used by all the aos_sse vs varients. +/* This is the temporary storage used by all the aos_sse vs variants. * Create one per context and reuse by passing a pointer in at - * vs_varient creation?? + * vs_variant creation?? */ struct aos_machine { float input [MAX_INPUTS ][4]; @@ -134,7 +134,7 @@ struct aos_machine { struct aos_compilation { struct x86_function *func; - struct draw_vs_varient_aos_sse *vaos; + struct draw_vs_variant_aos_sse *vaos; unsigned insn_counter; unsigned num_immediates; @@ -234,8 +234,8 @@ typedef void (PIPE_CDECL *vaos_run_linear_func)( struct aos_machine *, void *output_buffer); -struct draw_vs_varient_aos_sse { - struct draw_vs_varient base; +struct draw_vs_variant_aos_sse { + struct draw_vs_variant base; struct draw_context *draw; struct aos_buffer *buffer; diff --git a/src/gallium/auxiliary/draw/draw_vs_exec.c b/src/gallium/auxiliary/draw/draw_vs_exec.c index 08608b480e0..c41d7c42a01 100644 --- a/src/gallium/auxiliary/draw/draw_vs_exec.c +++ b/src/gallium/auxiliary/draw/draw_vs_exec.c @@ -209,7 +209,7 @@ draw_create_vs_exec(struct draw_context *draw, vs->base.prepare = vs_exec_prepare; vs->base.run_linear = vs_exec_run_linear; vs->base.delete = vs_exec_delete; - vs->base.create_varient = draw_vs_create_varient_generic; + vs->base.create_variant = draw_vs_create_variant_generic; vs->machine = draw->vs.machine; return &vs->base; diff --git a/src/gallium/auxiliary/draw/draw_vs_llvm.c b/src/gallium/auxiliary/draw/draw_vs_llvm.c index fa9992db783..54a5574f73f 100644 --- a/src/gallium/auxiliary/draw/draw_vs_llvm.c +++ b/src/gallium/auxiliary/draw/draw_vs_llvm.c @@ -65,19 +65,7 @@ static void vs_llvm_delete( struct draw_vertex_shader *dvs ) { struct llvm_vertex_shader *shader = llvm_vertex_shader(dvs); - struct pipe_fence_handle *fence = NULL; struct draw_llvm_variant_list_item *li; - struct pipe_context *pipe = dvs->draw->pipe; - - /* - * XXX: This might be not neccessary at all. - */ - pipe->flush(pipe, 0, &fence); - if (fence) { - pipe->screen->fence_finish(pipe->screen, fence, 0); - pipe->screen->fence_reference(pipe->screen, &fence, NULL); - } - li = first_elem(&shader->variants); while(!at_end(&shader->variants, li)) { @@ -119,7 +107,7 @@ draw_create_vs_llvm(struct draw_context *draw, vs->base.prepare = vs_llvm_prepare; vs->base.run_linear = vs_llvm_run_linear; vs->base.delete = vs_llvm_delete; - vs->base.create_varient = draw_vs_create_varient_generic; + vs->base.create_variant = draw_vs_create_variant_generic; make_empty_list(&vs->variants); diff --git a/src/gallium/auxiliary/draw/draw_vs_ppc.c b/src/gallium/auxiliary/draw/draw_vs_ppc.c index 5df84916c51..cf894bbe8af 100644 --- a/src/gallium/auxiliary/draw/draw_vs_ppc.c +++ b/src/gallium/auxiliary/draw/draw_vs_ppc.c @@ -187,10 +187,10 @@ draw_create_vs_ppc(struct draw_context *draw, vs->base.draw = draw; #if 0 if (1) - vs->base.create_varient = draw_vs_varient_aos_ppc; + vs->base.create_variant = draw_vs_variant_aos_ppc; else #endif - vs->base.create_varient = draw_vs_create_varient_generic; + vs->base.create_variant = draw_vs_create_variant_generic; vs->base.prepare = vs_ppc_prepare; vs->base.run_linear = vs_ppc_run_linear; vs->base.delete = vs_ppc_delete; diff --git a/src/gallium/auxiliary/draw/draw_vs_sse.c b/src/gallium/auxiliary/draw/draw_vs_sse.c index 672efe25cca..d55b9b08070 100644 --- a/src/gallium/auxiliary/draw/draw_vs_sse.c +++ b/src/gallium/auxiliary/draw/draw_vs_sse.c @@ -172,9 +172,9 @@ draw_create_vs_sse(struct draw_context *draw, vs->base.draw = draw; if (1) - vs->base.create_varient = draw_vs_create_varient_aos_sse; + vs->base.create_variant = draw_vs_create_variant_aos_sse; else - vs->base.create_varient = draw_vs_create_varient_generic; + vs->base.create_variant = draw_vs_create_variant_generic; vs->base.prepare = vs_sse_prepare; vs->base.run_linear = vs_sse_run_linear; vs->base.delete = vs_sse_delete; diff --git a/src/gallium/auxiliary/draw/draw_vs_varient.c b/src/gallium/auxiliary/draw/draw_vs_varient.c index eacd1601877..d8f030f61eb 100644 --- a/src/gallium/auxiliary/draw/draw_vs_varient.c +++ b/src/gallium/auxiliary/draw/draw_vs_varient.c @@ -41,8 +41,8 @@ /* A first pass at incorporating vertex fetch/emit functionality into */ -struct draw_vs_varient_generic { - struct draw_vs_varient base; +struct draw_vs_variant_generic { + struct draw_vs_variant base; struct draw_vertex_shader *shader; struct draw_context *draw; @@ -63,13 +63,13 @@ struct draw_vs_varient_generic { -static void vsvg_set_buffer( struct draw_vs_varient *varient, +static void vsvg_set_buffer( struct draw_vs_variant *variant, unsigned buffer, const void *ptr, unsigned stride, unsigned max_index ) { - struct draw_vs_varient_generic *vsvg = (struct draw_vs_varient_generic *)varient; + struct draw_vs_variant_generic *vsvg = (struct draw_vs_variant_generic *)variant; vsvg->fetch->set_buffer(vsvg->fetch, buffer, @@ -81,7 +81,7 @@ static void vsvg_set_buffer( struct draw_vs_varient *varient, /* Mainly for debug at this stage: */ -static void do_rhw_viewport( struct draw_vs_varient_generic *vsvg, +static void do_rhw_viewport( struct draw_vs_variant_generic *vsvg, unsigned count, void *output_buffer ) { @@ -104,7 +104,7 @@ static void do_rhw_viewport( struct draw_vs_varient_generic *vsvg, } } -static void do_viewport( struct draw_vs_varient_generic *vsvg, +static void do_viewport( struct draw_vs_variant_generic *vsvg, unsigned count, void *output_buffer ) { @@ -126,12 +126,12 @@ static void do_viewport( struct draw_vs_varient_generic *vsvg, } -static void PIPE_CDECL vsvg_run_elts( struct draw_vs_varient *varient, +static void PIPE_CDECL vsvg_run_elts( struct draw_vs_variant *variant, const unsigned *elts, unsigned count, void *output_buffer) { - struct draw_vs_varient_generic *vsvg = (struct draw_vs_varient_generic *)varient; + struct draw_vs_variant_generic *vsvg = (struct draw_vs_variant_generic *)variant; unsigned temp_vertex_stride = vsvg->temp_vertex_stride; void *temp_buffer = MALLOC( align(count,4) * temp_vertex_stride ); @@ -193,12 +193,12 @@ static void PIPE_CDECL vsvg_run_elts( struct draw_vs_varient *varient, } -static void PIPE_CDECL vsvg_run_linear( struct draw_vs_varient *varient, +static void PIPE_CDECL vsvg_run_linear( struct draw_vs_variant *variant, unsigned start, unsigned count, void *output_buffer ) { - struct draw_vs_varient_generic *vsvg = (struct draw_vs_varient_generic *)varient; + struct draw_vs_variant_generic *vsvg = (struct draw_vs_variant_generic *)variant; unsigned temp_vertex_stride = vsvg->temp_vertex_stride; void *temp_buffer = MALLOC( align(count,4) * temp_vertex_stride ); @@ -259,20 +259,20 @@ static void PIPE_CDECL vsvg_run_linear( struct draw_vs_varient *varient, -static void vsvg_destroy( struct draw_vs_varient *varient ) +static void vsvg_destroy( struct draw_vs_variant *variant ) { - FREE(varient); + FREE(variant); } -struct draw_vs_varient * -draw_vs_create_varient_generic( struct draw_vertex_shader *vs, - const struct draw_vs_varient_key *key ) +struct draw_vs_variant * +draw_vs_create_variant_generic( struct draw_vertex_shader *vs, + const struct draw_vs_variant_key *key ) { unsigned i; struct translate_key fetch, emit; - struct draw_vs_varient_generic *vsvg = CALLOC_STRUCT( draw_vs_varient_generic ); + struct draw_vs_variant_generic *vsvg = CALLOC_STRUCT( draw_vs_variant_generic ); if (vsvg == NULL) return NULL; diff --git a/src/gallium/auxiliary/gallivm/lp_bld.h b/src/gallium/auxiliary/gallivm/lp_bld.h index 8103bc917fc..ee05c6ba01d 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld.h +++ b/src/gallium/auxiliary/gallivm/lp_bld.h @@ -55,4 +55,33 @@ #endif +/** + * Redefine these LLVM entrypoints as invalid macros to make sure we + * don't accidentally use them. We need to use the functions which + * take an explicit LLVMContextRef parameter. + */ +#define LLVMInt1Type ILLEGAL_LLVM_FUNCTION +#define LLVMInt8Type ILLEGAL_LLVM_FUNCTION +#define LLVMInt16Type ILLEGAL_LLVM_FUNCTION +#define LLVMInt32Type ILLEGAL_LLVM_FUNCTION +#define LLVMInt64Type ILLEGAL_LLVM_FUNCTION +#define LLVMIntType ILLEGAL_LLVM_FUNCTION +#define LLVMFloatType ILLEGAL_LLVM_FUNCTION +#define LLVMDoubleType ILLEGAL_LLVM_FUNCTION +#define LLVMX86FP80Type ILLEGAL_LLVM_FUNCTION +#define LLVMFP128Type ILLEGAL_LLVM_FUNCTION +#define LLVMPPCFP128Type ILLEGAL_LLVM_FUNCTION +#define LLVMStructType ILLEGAL_LLVM_FUNCTION +#define LLVMVoidType ILLEGAL_LLVM_FUNCTION +#define LLVMLabelType ILLEGAL_LLVM_FUNCTION +#define LLVMOpaqueType ILLEGAL_LLVM_FUNCTION +#define LLVMUnionType ILLEGAL_LLVM_FUNCTION +#define LLVMMDString ILLEGAL_LLVM_FUNCTION +#define LLVMMDNode ILLEGAL_LLVM_FUNCTION +#define LLVMConstString ILLEGAL_LLVM_FUNCTION +#define LLVMConstStruct ILLEGAL_LLVM_FUNCTION +#define LLVMAppendBasicBlock ILLEGAL_LLVM_FUNCTION +#define LLVMInsertBasicBlock ILLEGAL_LLVM_FUNCTION +#define LLVMCreateBuilder ILLEGAL_LLVM_FUNCTION + #endif /* LP_BLD_H */ diff --git a/src/gallium/auxiliary/gallivm/lp_bld_arit.c b/src/gallium/auxiliary/gallivm/lp_bld_arit.c index f9a12a41a1b..02b3bde7893 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_arit.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_arit.c @@ -53,6 +53,7 @@ #include "lp_bld_type.h" #include "lp_bld_const.h" +#include "lp_bld_init.h" #include "lp_bld_intr.h" #include "lp_bld_logic.h" #include "lp_bld_pack.h" @@ -74,6 +75,7 @@ lp_build_min_simple(struct lp_build_context *bld, LLVMValueRef a, LLVMValueRef b) { + LLVMBuilderRef builder = bld->gallivm->builder; const struct lp_type type = bld->type; const char *intrinsic = NULL; LLVMValueRef cond; @@ -107,7 +109,7 @@ lp_build_min_simple(struct lp_build_context *bld, } if(intrinsic) - return lp_build_intrinsic_binary(bld->builder, intrinsic, lp_build_vec_type(bld->type), a, b); + return lp_build_intrinsic_binary(builder, intrinsic, lp_build_vec_type(bld->gallivm, bld->type), a, b); cond = lp_build_cmp(bld, PIPE_FUNC_LESS, a, b); return lp_build_select(bld, cond, a, b); @@ -123,6 +125,7 @@ lp_build_max_simple(struct lp_build_context *bld, LLVMValueRef a, LLVMValueRef b) { + LLVMBuilderRef builder = bld->gallivm->builder; const struct lp_type type = bld->type; const char *intrinsic = NULL; LLVMValueRef cond; @@ -156,7 +159,7 @@ lp_build_max_simple(struct lp_build_context *bld, } if(intrinsic) - return lp_build_intrinsic_binary(bld->builder, intrinsic, lp_build_vec_type(bld->type), a, b); + return lp_build_intrinsic_binary(builder, intrinsic, lp_build_vec_type(bld->gallivm, bld->type), a, b); cond = lp_build_cmp(bld, PIPE_FUNC_GREATER, a, b); return lp_build_select(bld, cond, a, b); @@ -170,6 +173,7 @@ LLVMValueRef lp_build_comp(struct lp_build_context *bld, LLVMValueRef a) { + LLVMBuilderRef builder = bld->gallivm->builder; const struct lp_type type = bld->type; assert(lp_check_value(type, a)); @@ -183,7 +187,7 @@ lp_build_comp(struct lp_build_context *bld, if(LLVMIsConstant(a)) return LLVMConstNot(a); else - return LLVMBuildNot(bld->builder, a, ""); + return LLVMBuildNot(builder, a, ""); } if(LLVMIsConstant(a)) @@ -193,9 +197,9 @@ lp_build_comp(struct lp_build_context *bld, return LLVMConstSub(bld->one, a); else if (type.floating) - return LLVMBuildFSub(bld->builder, bld->one, a, ""); + return LLVMBuildFSub(builder, bld->one, a, ""); else - return LLVMBuildSub(bld->builder, bld->one, a, ""); + return LLVMBuildSub(builder, bld->one, a, ""); } @@ -207,6 +211,7 @@ lp_build_add(struct lp_build_context *bld, LLVMValueRef a, LLVMValueRef b) { + LLVMBuilderRef builder = bld->gallivm->builder; const struct lp_type type = bld->type; LLVMValueRef res; @@ -236,7 +241,7 @@ lp_build_add(struct lp_build_context *bld, } if(intrinsic) - return lp_build_intrinsic_binary(bld->builder, intrinsic, lp_build_vec_type(bld->type), a, b); + return lp_build_intrinsic_binary(builder, intrinsic, lp_build_vec_type(bld->gallivm, bld->type), a, b); } if(LLVMIsConstant(a) && LLVMIsConstant(b)) @@ -246,9 +251,9 @@ lp_build_add(struct lp_build_context *bld, res = LLVMConstAdd(a, b); else if (type.floating) - res = LLVMBuildFAdd(bld->builder, a, b, ""); + res = LLVMBuildFAdd(builder, a, b, ""); else - res = LLVMBuildAdd(bld->builder, a, b, ""); + res = LLVMBuildAdd(builder, a, b, ""); /* clamp to ceiling of 1.0 */ if(bld->type.norm && (bld->type.floating || bld->type.fixed)) @@ -265,6 +270,7 @@ LLVMValueRef lp_build_sum_vector(struct lp_build_context *bld, LLVMValueRef a) { + LLVMBuilderRef builder = bld->gallivm->builder; const struct lp_type type = bld->type; LLVMValueRef index, res; unsigned i; @@ -277,19 +283,19 @@ lp_build_sum_vector(struct lp_build_context *bld, assert(!bld->type.norm); - index = LLVMConstInt(LLVMInt32Type(), 0, 0); - res = LLVMBuildExtractElement(bld->builder, a, index, ""); + index = lp_build_const_int32(bld->gallivm, 0); + res = LLVMBuildExtractElement(builder, a, index, ""); for (i = 1; i < type.length; i++) { - index = LLVMConstInt(LLVMInt32Type(), i, 0); + index = lp_build_const_int32(bld->gallivm, i); if (type.floating) - res = LLVMBuildFAdd(bld->builder, res, - LLVMBuildExtractElement(bld->builder, + res = LLVMBuildFAdd(builder, res, + LLVMBuildExtractElement(builder, a, index, ""), ""); else - res = LLVMBuildAdd(bld->builder, res, - LLVMBuildExtractElement(bld->builder, + res = LLVMBuildAdd(builder, res, + LLVMBuildExtractElement(builder, a, index, ""), ""); } @@ -306,6 +312,7 @@ lp_build_sub(struct lp_build_context *bld, LLVMValueRef a, LLVMValueRef b) { + LLVMBuilderRef builder = bld->gallivm->builder; const struct lp_type type = bld->type; LLVMValueRef res; @@ -335,7 +342,7 @@ lp_build_sub(struct lp_build_context *bld, } if(intrinsic) - return lp_build_intrinsic_binary(bld->builder, intrinsic, lp_build_vec_type(bld->type), a, b); + return lp_build_intrinsic_binary(builder, intrinsic, lp_build_vec_type(bld->gallivm, bld->type), a, b); } if(LLVMIsConstant(a) && LLVMIsConstant(b)) @@ -345,9 +352,9 @@ lp_build_sub(struct lp_build_context *bld, res = LLVMConstSub(a, b); else if (type.floating) - res = LLVMBuildFSub(bld->builder, a, b, ""); + res = LLVMBuildFSub(builder, a, b, ""); else - res = LLVMBuildSub(bld->builder, a, b, ""); + res = LLVMBuildSub(builder, a, b, ""); if(bld->type.norm && (bld->type.floating || bld->type.fixed)) res = lp_build_max_simple(bld, res, bld->zero); @@ -398,10 +405,11 @@ lp_build_sub(struct lp_build_context *bld, * http://www.stereopsis.com/doubleblend.html */ static LLVMValueRef -lp_build_mul_u8n(LLVMBuilderRef builder, +lp_build_mul_u8n(struct gallivm_state *gallivm, struct lp_type i16_type, LLVMValueRef a, LLVMValueRef b) { + LLVMBuilderRef builder = gallivm->builder; LLVMValueRef c8; LLVMValueRef ab; @@ -409,12 +417,12 @@ lp_build_mul_u8n(LLVMBuilderRef builder, assert(lp_check_value(i16_type, a)); assert(lp_check_value(i16_type, b)); - c8 = lp_build_const_int_vec(i16_type, 8); + c8 = lp_build_const_int_vec(gallivm, i16_type, 8); #if 0 /* a*b/255 ~= (a*(b + 1)) >> 256 */ - b = LLVMBuildAdd(builder, b, lp_build_const_int_vec(i16_type, 1), ""); + b = LLVMBuildAdd(builder, b, lp_build_const_int_vec(gallium, i16_type, 1), ""); ab = LLVMBuildMul(builder, a, b, ""); #else @@ -422,7 +430,7 @@ lp_build_mul_u8n(LLVMBuilderRef builder, /* ab/255 ~= (ab + (ab >> 8) + 0x80) >> 8 */ ab = LLVMBuildMul(builder, a, b, ""); ab = LLVMBuildAdd(builder, ab, LLVMBuildLShr(builder, ab, c8, ""), ""); - ab = LLVMBuildAdd(builder, ab, lp_build_const_int_vec(i16_type, 0x80), ""); + ab = LLVMBuildAdd(builder, ab, lp_build_const_int_vec(gallivm, i16_type, 0x80), ""); #endif @@ -440,6 +448,7 @@ lp_build_mul(struct lp_build_context *bld, LLVMValueRef a, LLVMValueRef b) { + LLVMBuilderRef builder = bld->gallivm->builder; const struct lp_type type = bld->type; LLVMValueRef shift; LLVMValueRef res; @@ -463,14 +472,14 @@ lp_build_mul(struct lp_build_context *bld, struct lp_type i16_type = lp_wider_type(type); LLVMValueRef al, ah, bl, bh, abl, abh, ab; - lp_build_unpack2(bld->builder, type, i16_type, a, &al, &ah); - lp_build_unpack2(bld->builder, type, i16_type, b, &bl, &bh); + lp_build_unpack2(bld->gallivm, type, i16_type, a, &al, &ah); + lp_build_unpack2(bld->gallivm, type, i16_type, b, &bl, &bh); /* PMULLW, PSRLW, PADDW */ - abl = lp_build_mul_u8n(bld->builder, i16_type, al, bl); - abh = lp_build_mul_u8n(bld->builder, i16_type, ah, bh); + abl = lp_build_mul_u8n(bld->gallivm, i16_type, al, bl); + abh = lp_build_mul_u8n(bld->gallivm, i16_type, ah, bh); - ab = lp_build_pack2(bld->builder, i16_type, type, abl, abh); + ab = lp_build_pack2(bld->gallivm, i16_type, type, abl, abh); return ab; } @@ -480,7 +489,7 @@ lp_build_mul(struct lp_build_context *bld, } if(type.fixed) - shift = lp_build_const_int_vec(type, type.width/2); + shift = lp_build_const_int_vec(bld->gallivm, type, type.width/2); else shift = NULL; @@ -498,14 +507,14 @@ lp_build_mul(struct lp_build_context *bld, } else { if (type.floating) - res = LLVMBuildFMul(bld->builder, a, b, ""); + res = LLVMBuildFMul(builder, a, b, ""); else - res = LLVMBuildMul(bld->builder, a, b, ""); + res = LLVMBuildMul(builder, a, b, ""); if(shift) { if(type.sign) - res = LLVMBuildAShr(bld->builder, res, shift, ""); + res = LLVMBuildAShr(builder, res, shift, ""); else - res = LLVMBuildLShr(bld->builder, res, shift, ""); + res = LLVMBuildLShr(builder, res, shift, ""); } } @@ -521,6 +530,7 @@ lp_build_mul_imm(struct lp_build_context *bld, LLVMValueRef a, int b) { + LLVMBuilderRef builder = bld->gallivm->builder; LLVMValueRef factor; assert(lp_check_value(bld->type, a)); @@ -550,20 +560,20 @@ lp_build_mul_imm(struct lp_build_context *bld, * for Inf and NaN. */ unsigned mantissa = lp_mantissa(bld->type); - factor = lp_build_const_int_vec(bld->type, (unsigned long long)shift << mantissa); - a = LLVMBuildBitCast(bld->builder, a, lp_build_int_vec_type(bld->type), ""); - a = LLVMBuildAdd(bld->builder, a, factor, ""); - a = LLVMBuildBitCast(bld->builder, a, lp_build_vec_type(bld->type), ""); + factor = lp_build_const_int_vec(bld->gallivm, bld->type, (unsigned long long)shift << mantissa); + a = LLVMBuildBitCast(builder, a, lp_build_int_vec_type(bld->type), ""); + a = LLVMBuildAdd(builder, a, factor, ""); + a = LLVMBuildBitCast(builder, a, lp_build_vec_type(bld->gallivm, bld->type), ""); return a; #endif } else { - factor = lp_build_const_vec(bld->type, shift); - return LLVMBuildShl(bld->builder, a, factor, ""); + factor = lp_build_const_vec(bld->gallivm, bld->type, shift); + return LLVMBuildShl(builder, a, factor, ""); } } - factor = lp_build_const_vec(bld->type, (double)b); + factor = lp_build_const_vec(bld->gallivm, bld->type, (double)b); return lp_build_mul(bld, a, factor); } @@ -576,6 +586,7 @@ lp_build_div(struct lp_build_context *bld, LLVMValueRef a, LLVMValueRef b) { + LLVMBuilderRef builder = bld->gallivm->builder; const struct lp_type type = bld->type; assert(lp_check_value(type, a)); @@ -605,11 +616,11 @@ lp_build_div(struct lp_build_context *bld, return lp_build_mul(bld, a, lp_build_rcp(bld, b)); if (type.floating) - return LLVMBuildFDiv(bld->builder, a, b, ""); + return LLVMBuildFDiv(builder, a, b, ""); else if (type.sign) - return LLVMBuildSDiv(bld->builder, a, b, ""); + return LLVMBuildSDiv(builder, a, b, ""); else - return LLVMBuildUDiv(bld->builder, a, b, ""); + return LLVMBuildUDiv(builder, a, b, ""); } @@ -624,6 +635,7 @@ lp_build_lerp_simple(struct lp_build_context *bld, LLVMValueRef v0, LLVMValueRef v1) { + LLVMBuilderRef builder = bld->gallivm->builder; LLVMValueRef delta; LLVMValueRef res; @@ -642,7 +654,7 @@ lp_build_lerp_simple(struct lp_build_context *bld, * but it will be wrong for other uses. Basically we need a more * powerful lp_type, capable of further distinguishing the values * interpretation from the value storage. */ - res = LLVMBuildAnd(bld->builder, res, lp_build_const_int_vec(bld->type, (1 << bld->type.width/2) - 1), ""); + res = LLVMBuildAnd(builder, res, lp_build_const_int_vec(bld->gallivm, bld->type, (1 << bld->type.width/2) - 1), ""); } return res; @@ -658,6 +670,7 @@ lp_build_lerp(struct lp_build_context *bld, LLVMValueRef v0, LLVMValueRef v1) { + LLVMBuilderRef builder = bld->gallivm->builder; const struct lp_type type = bld->type; LLVMValueRef res; @@ -683,22 +696,22 @@ lp_build_lerp(struct lp_build_context *bld, wide_type.width = type.width*2; wide_type.length = type.length/2; - lp_build_context_init(&wide_bld, bld->builder, wide_type); + lp_build_context_init(&wide_bld, bld->gallivm, wide_type); - lp_build_unpack2(bld->builder, type, wide_type, x, &xl, &xh); - lp_build_unpack2(bld->builder, type, wide_type, v0, &v0l, &v0h); - lp_build_unpack2(bld->builder, type, wide_type, v1, &v1l, &v1h); + lp_build_unpack2(bld->gallivm, type, wide_type, x, &xl, &xh); + lp_build_unpack2(bld->gallivm, type, wide_type, v0, &v0l, &v0h); + lp_build_unpack2(bld->gallivm, type, wide_type, v1, &v1l, &v1h); /* * Scale x from [0, 255] to [0, 256] */ - shift = lp_build_const_int_vec(wide_type, type.width - 1); + shift = lp_build_const_int_vec(bld->gallivm, wide_type, type.width - 1); xl = lp_build_add(&wide_bld, xl, - LLVMBuildAShr(bld->builder, xl, shift, "")); + LLVMBuildAShr(builder, xl, shift, "")); xh = lp_build_add(&wide_bld, xh, - LLVMBuildAShr(bld->builder, xh, shift, "")); + LLVMBuildAShr(builder, xh, shift, "")); /* * Lerp both halves. @@ -707,7 +720,7 @@ lp_build_lerp(struct lp_build_context *bld, resl = lp_build_lerp_simple(&wide_bld, xl, v0l, v1l); resh = lp_build_lerp_simple(&wide_bld, xh, v0h, v1h); - res = lp_build_pack2(bld->builder, wide_type, type, resl, resh); + res = lp_build_pack2(bld->gallivm, wide_type, type, resl, resh); } else { res = lp_build_lerp_simple(bld, x, v0, v1); } @@ -820,8 +833,9 @@ LLVMValueRef lp_build_abs(struct lp_build_context *bld, LLVMValueRef a) { + LLVMBuilderRef builder = bld->gallivm->builder; const struct lp_type type = bld->type; - LLVMTypeRef vec_type = lp_build_vec_type(type); + LLVMTypeRef vec_type = lp_build_vec_type(bld->gallivm, type); assert(lp_check_value(type, a)); @@ -830,27 +844,27 @@ lp_build_abs(struct lp_build_context *bld, if(type.floating) { /* Mask out the sign bit */ - LLVMTypeRef int_vec_type = lp_build_int_vec_type(type); + LLVMTypeRef int_vec_type = lp_build_int_vec_type(bld->gallivm, type); unsigned long long absMask = ~(1ULL << (type.width - 1)); - LLVMValueRef mask = lp_build_const_int_vec(type, ((unsigned long long) absMask)); - a = LLVMBuildBitCast(bld->builder, a, int_vec_type, ""); - a = LLVMBuildAnd(bld->builder, a, mask, ""); - a = LLVMBuildBitCast(bld->builder, a, vec_type, ""); + LLVMValueRef mask = lp_build_const_int_vec(bld->gallivm, type, ((unsigned long long) absMask)); + a = LLVMBuildBitCast(builder, a, int_vec_type, ""); + a = LLVMBuildAnd(builder, a, mask, ""); + a = LLVMBuildBitCast(builder, a, vec_type, ""); return a; } if(type.width*type.length == 128 && util_cpu_caps.has_ssse3) { switch(type.width) { case 8: - return lp_build_intrinsic_unary(bld->builder, "llvm.x86.ssse3.pabs.b.128", vec_type, a); + return lp_build_intrinsic_unary(builder, "llvm.x86.ssse3.pabs.b.128", vec_type, a); case 16: - return lp_build_intrinsic_unary(bld->builder, "llvm.x86.ssse3.pabs.w.128", vec_type, a); + return lp_build_intrinsic_unary(builder, "llvm.x86.ssse3.pabs.w.128", vec_type, a); case 32: - return lp_build_intrinsic_unary(bld->builder, "llvm.x86.ssse3.pabs.d.128", vec_type, a); + return lp_build_intrinsic_unary(builder, "llvm.x86.ssse3.pabs.d.128", vec_type, a); } } - return lp_build_max(bld, a, LLVMBuildNeg(bld->builder, a, "")); + return lp_build_max(bld, a, LLVMBuildNeg(builder, a, "")); } @@ -858,14 +872,16 @@ LLVMValueRef lp_build_negate(struct lp_build_context *bld, LLVMValueRef a) { + LLVMBuilderRef builder = bld->gallivm->builder; + assert(lp_check_value(bld->type, a)); #if HAVE_LLVM >= 0x0207 if (bld->type.floating) - a = LLVMBuildFNeg(bld->builder, a, ""); + a = LLVMBuildFNeg(builder, a, ""); else #endif - a = LLVMBuildNeg(bld->builder, a, ""); + a = LLVMBuildNeg(builder, a, ""); return a; } @@ -876,6 +892,7 @@ LLVMValueRef lp_build_sgn(struct lp_build_context *bld, LLVMValueRef a) { + LLVMBuilderRef builder = bld->gallivm->builder; const struct lp_type type = bld->type; LLVMValueRef cond; LLVMValueRef res; @@ -895,20 +912,20 @@ lp_build_sgn(struct lp_build_context *bld, LLVMValueRef one; unsigned long long maskBit = (unsigned long long)1 << (type.width - 1); - int_type = lp_build_int_vec_type(type); - vec_type = lp_build_vec_type(type); - mask = lp_build_const_int_vec(type, maskBit); + int_type = lp_build_int_vec_type(bld->gallivm, type); + vec_type = lp_build_vec_type(bld->gallivm, type); + mask = lp_build_const_int_vec(bld->gallivm, type, maskBit); /* Take the sign bit and add it to 1 constant */ - sign = LLVMBuildBitCast(bld->builder, a, int_type, ""); - sign = LLVMBuildAnd(bld->builder, sign, mask, ""); + sign = LLVMBuildBitCast(builder, a, int_type, ""); + sign = LLVMBuildAnd(builder, sign, mask, ""); one = LLVMConstBitCast(bld->one, int_type); - res = LLVMBuildOr(bld->builder, sign, one, ""); - res = LLVMBuildBitCast(bld->builder, res, vec_type, ""); + res = LLVMBuildOr(builder, sign, one, ""); + res = LLVMBuildBitCast(builder, res, vec_type, ""); } else { - LLVMValueRef minus_one = lp_build_const_vec(type, -1.0); + LLVMValueRef minus_one = lp_build_const_vec(bld->gallivm, type, -1.0); cond = lp_build_cmp(bld, PIPE_FUNC_GREATER, a, bld->zero); res = lp_build_select(bld, cond, bld->one, minus_one); } @@ -931,11 +948,12 @@ LLVMValueRef lp_build_set_sign(struct lp_build_context *bld, LLVMValueRef a, LLVMValueRef sign) { + LLVMBuilderRef builder = bld->gallivm->builder; const struct lp_type type = bld->type; - LLVMTypeRef int_vec_type = lp_build_int_vec_type(type); - LLVMTypeRef vec_type = lp_build_vec_type(type); - LLVMValueRef shift = lp_build_const_int_vec(type, type.width - 1); - LLVMValueRef mask = lp_build_const_int_vec(type, + LLVMTypeRef int_vec_type = lp_build_int_vec_type(bld->gallivm, type); + LLVMTypeRef vec_type = lp_build_vec_type(bld->gallivm, type); + LLVMValueRef shift = lp_build_const_int_vec(bld->gallivm, type, type.width - 1); + LLVMValueRef mask = lp_build_const_int_vec(bld->gallivm, type, ~((unsigned long long) 1 << (type.width - 1))); LLVMValueRef val, res; @@ -943,15 +961,15 @@ lp_build_set_sign(struct lp_build_context *bld, assert(lp_check_value(type, a)); /* val = reinterpret_cast<int>(a) */ - val = LLVMBuildBitCast(bld->builder, a, int_vec_type, ""); + val = LLVMBuildBitCast(builder, a, int_vec_type, ""); /* val = val & mask */ - val = LLVMBuildAnd(bld->builder, val, mask, ""); + val = LLVMBuildAnd(builder, val, mask, ""); /* sign = sign << shift */ - sign = LLVMBuildShl(bld->builder, sign, shift, ""); + sign = LLVMBuildShl(builder, sign, shift, ""); /* res = val | sign */ - res = LLVMBuildOr(bld->builder, val, sign, ""); + res = LLVMBuildOr(builder, val, sign, ""); /* res = reinterpret_cast<float>(res) */ - res = LLVMBuildBitCast(bld->builder, res, vec_type, ""); + res = LLVMBuildBitCast(builder, res, vec_type, ""); return res; } @@ -964,12 +982,13 @@ LLVMValueRef lp_build_int_to_float(struct lp_build_context *bld, LLVMValueRef a) { + LLVMBuilderRef builder = bld->gallivm->builder; const struct lp_type type = bld->type; - LLVMTypeRef vec_type = lp_build_vec_type(type); + LLVMTypeRef vec_type = lp_build_vec_type(bld->gallivm, type); assert(type.floating); - return LLVMBuildSIToFP(bld->builder, a, vec_type, ""); + return LLVMBuildSIToFP(builder, a, vec_type, ""); } @@ -994,8 +1013,9 @@ lp_build_round_sse41(struct lp_build_context *bld, LLVMValueRef a, enum lp_build_round_sse41_mode mode) { + LLVMBuilderRef builder = bld->gallivm->builder; const struct lp_type type = bld->type; - LLVMTypeRef i32t = LLVMInt32Type(); + LLVMTypeRef i32t = LLVMInt32TypeInContext(bld->gallivm->context); const char *intrinsic; LLVMValueRef res; @@ -1027,13 +1047,13 @@ lp_build_round_sse41(struct lp_build_context *bld, undef = LLVMGetUndef(vec_type); args[0] = undef; - args[1] = LLVMBuildInsertElement(bld->builder, undef, a, index0, ""); + args[1] = LLVMBuildInsertElement(builder, undef, a, index0, ""); args[2] = LLVMConstInt(i32t, mode, 0); - res = lp_build_intrinsic(bld->builder, intrinsic, + res = lp_build_intrinsic(builder, intrinsic, vec_type, args, Elements(args)); - res = LLVMBuildExtractElement(bld->builder, res, index0, ""); + res = LLVMBuildExtractElement(builder, res, index0, ""); } else { assert(type.width*type.length == 128); @@ -1050,7 +1070,7 @@ lp_build_round_sse41(struct lp_build_context *bld, return bld->undef; } - res = lp_build_intrinsic_binary(bld->builder, intrinsic, + res = lp_build_intrinsic_binary(builder, intrinsic, bld->vec_type, a, LLVMConstInt(i32t, mode, 0)); } @@ -1063,9 +1083,10 @@ static INLINE LLVMValueRef lp_build_iround_nearest_sse2(struct lp_build_context *bld, LLVMValueRef a) { + LLVMBuilderRef builder = bld->gallivm->builder; const struct lp_type type = bld->type; - LLVMTypeRef i32t = LLVMInt32Type(); - LLVMTypeRef ret_type = lp_build_int_vec_type(type); + LLVMTypeRef i32t = LLVMInt32TypeInContext(bld->gallivm->context); + LLVMTypeRef ret_type = lp_build_int_vec_type(bld->gallivm, type); const char *intrinsic; LLVMValueRef res; @@ -1089,9 +1110,9 @@ lp_build_iround_nearest_sse2(struct lp_build_context *bld, undef = LLVMGetUndef(vec_type); - arg = LLVMBuildInsertElement(bld->builder, undef, a, index0, ""); + arg = LLVMBuildInsertElement(builder, undef, a, index0, ""); - res = lp_build_intrinsic_unary(bld->builder, intrinsic, + res = lp_build_intrinsic_unary(builder, intrinsic, ret_type, arg); } else { @@ -1099,7 +1120,7 @@ lp_build_iround_nearest_sse2(struct lp_build_context *bld, intrinsic = "llvm.x86.sse2.cvtps2dq"; - res = lp_build_intrinsic_unary(bld->builder, intrinsic, + res = lp_build_intrinsic_unary(builder, intrinsic, ret_type, a); } @@ -1116,6 +1137,7 @@ LLVMValueRef lp_build_trunc(struct lp_build_context *bld, LLVMValueRef a) { + LLVMBuilderRef builder = bld->gallivm->builder; const struct lp_type type = bld->type; assert(type.floating); @@ -1126,11 +1148,11 @@ lp_build_trunc(struct lp_build_context *bld, return lp_build_round_sse41(bld, a, LP_BUILD_ROUND_SSE41_TRUNCATE); } else { - LLVMTypeRef vec_type = lp_build_vec_type(type); - LLVMTypeRef int_vec_type = lp_build_int_vec_type(type); + LLVMTypeRef vec_type = lp_build_vec_type(bld->gallivm, type); + LLVMTypeRef int_vec_type = lp_build_int_vec_type(bld->gallivm, type); LLVMValueRef res; - res = LLVMBuildFPToSI(bld->builder, a, int_vec_type, ""); - res = LLVMBuildSIToFP(bld->builder, res, vec_type, ""); + res = LLVMBuildFPToSI(builder, a, int_vec_type, ""); + res = LLVMBuildSIToFP(builder, res, vec_type, ""); return res; } } @@ -1146,6 +1168,7 @@ LLVMValueRef lp_build_round(struct lp_build_context *bld, LLVMValueRef a) { + LLVMBuilderRef builder = bld->gallivm->builder; const struct lp_type type = bld->type; assert(type.floating); @@ -1156,10 +1179,10 @@ lp_build_round(struct lp_build_context *bld, return lp_build_round_sse41(bld, a, LP_BUILD_ROUND_SSE41_NEAREST); } else { - LLVMTypeRef vec_type = lp_build_vec_type(type); + LLVMTypeRef vec_type = lp_build_vec_type(bld->gallivm, type); LLVMValueRef res; res = lp_build_iround(bld, a); - res = LLVMBuildSIToFP(bld->builder, res, vec_type, ""); + res = LLVMBuildSIToFP(builder, res, vec_type, ""); return res; } } @@ -1174,6 +1197,7 @@ LLVMValueRef lp_build_floor(struct lp_build_context *bld, LLVMValueRef a) { + LLVMBuilderRef builder = bld->gallivm->builder; const struct lp_type type = bld->type; assert(type.floating); @@ -1184,10 +1208,10 @@ lp_build_floor(struct lp_build_context *bld, return lp_build_round_sse41(bld, a, LP_BUILD_ROUND_SSE41_FLOOR); } else { - LLVMTypeRef vec_type = lp_build_vec_type(type); + LLVMTypeRef vec_type = lp_build_vec_type(bld->gallivm, type); LLVMValueRef res; res = lp_build_ifloor(bld, a); - res = LLVMBuildSIToFP(bld->builder, res, vec_type, ""); + res = LLVMBuildSIToFP(builder, res, vec_type, ""); return res; } } @@ -1202,6 +1226,7 @@ LLVMValueRef lp_build_ceil(struct lp_build_context *bld, LLVMValueRef a) { + LLVMBuilderRef builder = bld->gallivm->builder; const struct lp_type type = bld->type; assert(type.floating); @@ -1212,10 +1237,10 @@ lp_build_ceil(struct lp_build_context *bld, return lp_build_round_sse41(bld, a, LP_BUILD_ROUND_SSE41_CEIL); } else { - LLVMTypeRef vec_type = lp_build_vec_type(type); + LLVMTypeRef vec_type = lp_build_vec_type(bld->gallivm, type); LLVMValueRef res; res = lp_build_iceil(bld, a); - res = LLVMBuildSIToFP(bld->builder, res, vec_type, ""); + res = LLVMBuildSIToFP(builder, res, vec_type, ""); return res; } } @@ -1243,13 +1268,14 @@ LLVMValueRef lp_build_itrunc(struct lp_build_context *bld, LLVMValueRef a) { + LLVMBuilderRef builder = bld->gallivm->builder; const struct lp_type type = bld->type; - LLVMTypeRef int_vec_type = lp_build_int_vec_type(type); + LLVMTypeRef int_vec_type = lp_build_int_vec_type(bld->gallivm, type); assert(type.floating); assert(lp_check_value(type, a)); - return LLVMBuildFPToSI(bld->builder, a, int_vec_type, ""); + return LLVMBuildFPToSI(builder, a, int_vec_type, ""); } @@ -1263,6 +1289,7 @@ LLVMValueRef lp_build_iround(struct lp_build_context *bld, LLVMValueRef a) { + LLVMBuilderRef builder = bld->gallivm->builder; const struct lp_type type = bld->type; LLVMTypeRef int_vec_type = bld->int_vec_type; LLVMValueRef res; @@ -1282,27 +1309,28 @@ lp_build_iround(struct lp_build_context *bld, else { LLVMValueRef half; - half = lp_build_const_vec(type, 0.5); + half = lp_build_const_vec(bld->gallivm, type, 0.5); if (type.sign) { LLVMTypeRef vec_type = bld->vec_type; - LLVMValueRef mask = lp_build_const_int_vec(type, (unsigned long long)1 << (type.width - 1)); + LLVMValueRef mask = lp_build_const_int_vec(bld->gallivm, type, + (unsigned long long)1 << (type.width - 1)); LLVMValueRef sign; /* get sign bit */ - sign = LLVMBuildBitCast(bld->builder, a, int_vec_type, ""); - sign = LLVMBuildAnd(bld->builder, sign, mask, ""); + sign = LLVMBuildBitCast(builder, a, int_vec_type, ""); + sign = LLVMBuildAnd(builder, sign, mask, ""); /* sign * 0.5 */ - half = LLVMBuildBitCast(bld->builder, half, int_vec_type, ""); - half = LLVMBuildOr(bld->builder, sign, half, ""); - half = LLVMBuildBitCast(bld->builder, half, vec_type, ""); + half = LLVMBuildBitCast(builder, half, int_vec_type, ""); + half = LLVMBuildOr(builder, sign, half, ""); + half = LLVMBuildBitCast(builder, half, vec_type, ""); } - res = LLVMBuildFAdd(bld->builder, a, half, ""); + res = LLVMBuildFAdd(builder, a, half, ""); } - res = LLVMBuildFPToSI(bld->builder, res, int_vec_type, ""); + res = LLVMBuildFPToSI(builder, res, int_vec_type, ""); return res; } @@ -1317,6 +1345,7 @@ LLVMValueRef lp_build_ifloor(struct lp_build_context *bld, LLVMValueRef a) { + LLVMBuilderRef builder = bld->gallivm->builder; const struct lp_type type = bld->type; LLVMTypeRef int_vec_type = bld->int_vec_type; LLVMValueRef res; @@ -1335,29 +1364,34 @@ lp_build_ifloor(struct lp_build_context *bld, /* Take the sign bit and add it to 1 constant */ LLVMTypeRef vec_type = bld->vec_type; unsigned mantissa = lp_mantissa(type); - LLVMValueRef mask = lp_build_const_int_vec(type, (unsigned long long)1 << (type.width - 1)); + LLVMValueRef mask = lp_build_const_int_vec(bld->gallivm, type, + (unsigned long long)1 << (type.width - 1)); LLVMValueRef sign; LLVMValueRef offset; /* sign = a < 0 ? ~0 : 0 */ - sign = LLVMBuildBitCast(bld->builder, a, int_vec_type, ""); - sign = LLVMBuildAnd(bld->builder, sign, mask, ""); - sign = LLVMBuildAShr(bld->builder, sign, lp_build_const_int_vec(type, type.width - 1), "ifloor.sign"); + sign = LLVMBuildBitCast(builder, a, int_vec_type, ""); + sign = LLVMBuildAnd(builder, sign, mask, ""); + sign = LLVMBuildAShr(builder, sign, + lp_build_const_int_vec(bld->gallivm, type, + type.width - 1), + "ifloor.sign"); /* offset = -0.99999(9)f */ - offset = lp_build_const_vec(type, -(double)(((unsigned long long)1 << mantissa) - 10)/((unsigned long long)1 << mantissa)); + offset = lp_build_const_vec(bld->gallivm, type, + -(double)(((unsigned long long)1 << mantissa) - 10)/((unsigned long long)1 << mantissa)); offset = LLVMConstBitCast(offset, int_vec_type); /* offset = a < 0 ? offset : 0.0f */ - offset = LLVMBuildAnd(bld->builder, offset, sign, ""); - offset = LLVMBuildBitCast(bld->builder, offset, vec_type, "ifloor.offset"); + offset = LLVMBuildAnd(builder, offset, sign, ""); + offset = LLVMBuildBitCast(builder, offset, vec_type, "ifloor.offset"); - res = LLVMBuildFAdd(bld->builder, res, offset, "ifloor.res"); + res = LLVMBuildFAdd(builder, res, offset, "ifloor.res"); } } /* round to nearest (toward zero) */ - res = LLVMBuildFPToSI(bld->builder, res, int_vec_type, "ifloor.res"); + res = LLVMBuildFPToSI(builder, res, int_vec_type, "ifloor.res"); return res; } @@ -1372,6 +1406,7 @@ LLVMValueRef lp_build_iceil(struct lp_build_context *bld, LLVMValueRef a) { + LLVMBuilderRef builder = bld->gallivm->builder; const struct lp_type type = bld->type; LLVMTypeRef int_vec_type = bld->int_vec_type; LLVMValueRef res; @@ -1389,29 +1424,34 @@ lp_build_iceil(struct lp_build_context *bld, LLVMValueRef offset; /* offset = 0.99999(9)f */ - offset = lp_build_const_vec(type, (double)(((unsigned long long)1 << mantissa) - 10)/((unsigned long long)1 << mantissa)); + offset = lp_build_const_vec(bld->gallivm, type, + (double)(((unsigned long long)1 << mantissa) - 10)/((unsigned long long)1 << mantissa)); if (type.sign) { - LLVMValueRef mask = lp_build_const_int_vec(type, (unsigned long long)1 << (type.width - 1)); + LLVMValueRef mask = lp_build_const_int_vec(bld->gallivm, type, + (unsigned long long)1 << (type.width - 1)); LLVMValueRef sign; /* sign = a < 0 ? 0 : ~0 */ - sign = LLVMBuildBitCast(bld->builder, a, int_vec_type, ""); - sign = LLVMBuildAnd(bld->builder, sign, mask, ""); - sign = LLVMBuildAShr(bld->builder, sign, lp_build_const_int_vec(type, type.width - 1), "iceil.sign"); - sign = LLVMBuildNot(bld->builder, sign, "iceil.not"); + sign = LLVMBuildBitCast(builder, a, int_vec_type, ""); + sign = LLVMBuildAnd(builder, sign, mask, ""); + sign = LLVMBuildAShr(builder, sign, + lp_build_const_int_vec(bld->gallivm, type, + type.width - 1), + "iceil.sign"); + sign = LLVMBuildNot(builder, sign, "iceil.not"); /* offset = a < 0 ? 0.0 : offset */ offset = LLVMConstBitCast(offset, int_vec_type); - offset = LLVMBuildAnd(bld->builder, offset, sign, ""); - offset = LLVMBuildBitCast(bld->builder, offset, vec_type, "iceil.offset"); + offset = LLVMBuildAnd(builder, offset, sign, ""); + offset = LLVMBuildBitCast(builder, offset, vec_type, "iceil.offset"); } - res = LLVMBuildFAdd(bld->builder, a, offset, "iceil.res"); + res = LLVMBuildFAdd(builder, a, offset, "iceil.res"); } /* round to nearest (toward zero) */ - res = LLVMBuildFPToSI(bld->builder, res, int_vec_type, "iceil.res"); + res = LLVMBuildFPToSI(builder, res, int_vec_type, "iceil.res"); return res; } @@ -1429,6 +1469,7 @@ lp_build_ifloor_fract(struct lp_build_context *bld, LLVMValueRef *out_ipart, LLVMValueRef *out_fpart) { + LLVMBuilderRef builder = bld->gallivm->builder; const struct lp_type type = bld->type; LLVMValueRef ipart; @@ -1442,8 +1483,8 @@ lp_build_ifloor_fract(struct lp_build_context *bld, */ ipart = lp_build_floor(bld, a); - *out_fpart = LLVMBuildFSub(bld->builder, a, ipart, "fpart"); - *out_ipart = LLVMBuildFPToSI(bld->builder, ipart, bld->int_vec_type, "ipart"); + *out_fpart = LLVMBuildFSub(builder, a, ipart, "fpart"); + *out_ipart = LLVMBuildFPToSI(builder, ipart, bld->int_vec_type, "ipart"); } else { /* @@ -1451,8 +1492,8 @@ lp_build_ifloor_fract(struct lp_build_context *bld, */ *out_ipart = lp_build_ifloor(bld, a); - ipart = LLVMBuildSIToFP(bld->builder, *out_ipart, bld->vec_type, "ipart"); - *out_fpart = LLVMBuildFSub(bld->builder, a, ipart, "fpart"); + ipart = LLVMBuildSIToFP(builder, *out_ipart, bld->vec_type, "ipart"); + *out_fpart = LLVMBuildFSub(builder, a, ipart, "fpart"); } } @@ -1461,8 +1502,9 @@ LLVMValueRef lp_build_sqrt(struct lp_build_context *bld, LLVMValueRef a) { + LLVMBuilderRef builder = bld->gallivm->builder; const struct lp_type type = bld->type; - LLVMTypeRef vec_type = lp_build_vec_type(type); + LLVMTypeRef vec_type = lp_build_vec_type(bld->gallivm, type); char intrinsic[32]; assert(lp_check_value(type, a)); @@ -1473,7 +1515,7 @@ lp_build_sqrt(struct lp_build_context *bld, assert(type.floating); util_snprintf(intrinsic, sizeof intrinsic, "llvm.sqrt.v%uf%u", type.length, type.width); - return lp_build_intrinsic_unary(bld->builder, intrinsic, vec_type, a); + return lp_build_intrinsic_unary(builder, intrinsic, vec_type, a); } @@ -1496,12 +1538,13 @@ lp_build_rcp_refine(struct lp_build_context *bld, LLVMValueRef a, LLVMValueRef rcp_a) { - LLVMValueRef two = lp_build_const_vec(bld->type, 2.0); + LLVMBuilderRef builder = bld->gallivm->builder; + LLVMValueRef two = lp_build_const_vec(bld->gallivm, bld->type, 2.0); LLVMValueRef res; - res = LLVMBuildFMul(bld->builder, a, rcp_a, ""); - res = LLVMBuildFSub(bld->builder, two, res, ""); - res = LLVMBuildFMul(bld->builder, rcp_a, res, ""); + res = LLVMBuildFMul(builder, a, rcp_a, ""); + res = LLVMBuildFSub(builder, two, res, ""); + res = LLVMBuildFMul(builder, rcp_a, res, ""); return res; } @@ -1511,6 +1554,7 @@ LLVMValueRef lp_build_rcp(struct lp_build_context *bld, LLVMValueRef a) { + LLVMBuilderRef builder = bld->gallivm->builder; const struct lp_type type = bld->type; assert(lp_check_value(type, a)); @@ -1545,7 +1589,7 @@ lp_build_rcp(struct lp_build_context *bld, LLVMValueRef res; unsigned i; - res = lp_build_intrinsic_unary(bld->builder, "llvm.x86.sse.rcp.ps", bld->vec_type, a); + res = lp_build_intrinsic_unary(builder, "llvm.x86.sse.rcp.ps", bld->vec_type, a); for (i = 0; i < num_iterations; ++i) { res = lp_build_rcp_refine(bld, a, res); @@ -1554,7 +1598,7 @@ lp_build_rcp(struct lp_build_context *bld, return res; } - return LLVMBuildFDiv(bld->builder, bld->one, a, ""); + return LLVMBuildFDiv(builder, bld->one, a, ""); } @@ -1571,15 +1615,16 @@ lp_build_rsqrt_refine(struct lp_build_context *bld, LLVMValueRef a, LLVMValueRef rsqrt_a) { - LLVMValueRef half = lp_build_const_vec(bld->type, 0.5); - LLVMValueRef three = lp_build_const_vec(bld->type, 3.0); + LLVMBuilderRef builder = bld->gallivm->builder; + LLVMValueRef half = lp_build_const_vec(bld->gallivm, bld->type, 0.5); + LLVMValueRef three = lp_build_const_vec(bld->gallivm, bld->type, 3.0); LLVMValueRef res; - res = LLVMBuildFMul(bld->builder, rsqrt_a, rsqrt_a, ""); - res = LLVMBuildFMul(bld->builder, a, res, ""); - res = LLVMBuildFSub(bld->builder, three, res, ""); - res = LLVMBuildFMul(bld->builder, rsqrt_a, res, ""); - res = LLVMBuildFMul(bld->builder, half, res, ""); + res = LLVMBuildFMul(builder, rsqrt_a, rsqrt_a, ""); + res = LLVMBuildFMul(builder, a, res, ""); + res = LLVMBuildFSub(builder, three, res, ""); + res = LLVMBuildFMul(builder, rsqrt_a, res, ""); + res = LLVMBuildFMul(builder, half, res, ""); return res; } @@ -1592,6 +1637,7 @@ LLVMValueRef lp_build_rsqrt(struct lp_build_context *bld, LLVMValueRef a) { + LLVMBuilderRef builder = bld->gallivm->builder; const struct lp_type type = bld->type; assert(lp_check_value(type, a)); @@ -1603,7 +1649,7 @@ lp_build_rsqrt(struct lp_build_context *bld, LLVMValueRef res; unsigned i; - res = lp_build_intrinsic_unary(bld->builder, "llvm.x86.sse.rsqrt.ps", bld->vec_type, a); + res = lp_build_intrinsic_unary(builder, "llvm.x86.sse.rsqrt.ps", bld->vec_type, a); for (i = 0; i < num_iterations; ++i) { res = lp_build_rsqrt_refine(bld, a, res); @@ -1617,17 +1663,17 @@ lp_build_rsqrt(struct lp_build_context *bld, static inline LLVMValueRef -lp_build_const_v4si(unsigned long value) +lp_build_const_v4si(struct gallivm_state *gallivm, unsigned long value) { - LLVMValueRef element = LLVMConstInt(LLVMInt32Type(), value, 0); + LLVMValueRef element = lp_build_const_int32(gallivm, value); LLVMValueRef elements[4] = { element, element, element, element }; return LLVMConstVector(elements, 4); } static inline LLVMValueRef -lp_build_const_v4sf(float value) +lp_build_const_v4sf(struct gallivm_state *gallivm, float value) { - LLVMValueRef element = LLVMConstReal(LLVMFloatType(), value); + LLVMValueRef element = lp_build_const_float(gallivm, value); LLVMValueRef elements[4] = { element, element, element, element }; return LLVMConstVector(elements, 4); } @@ -1640,17 +1686,19 @@ LLVMValueRef lp_build_sin(struct lp_build_context *bld, LLVMValueRef a) { + LLVMBuilderRef builder = bld->gallivm->builder; + struct gallivm_state *gallivm = bld->gallivm; struct lp_type int_type = lp_int_type(bld->type); - LLVMBuilderRef b = bld->builder; - LLVMTypeRef v4sf = LLVMVectorType(LLVMFloatType(), 4); - LLVMTypeRef v4si = LLVMVectorType(LLVMInt32Type(), 4); + LLVMBuilderRef b = builder; + LLVMTypeRef v4sf = LLVMVectorType(LLVMFloatTypeInContext(bld->gallivm->context), 4); + LLVMTypeRef v4si = LLVMVectorType(LLVMInt32TypeInContext(bld->gallivm->context), 4); /* * take the absolute value, * x = _mm_and_ps(x, *(v4sf*)_ps_inv_sign_mask); */ - LLVMValueRef inv_sig_mask = lp_build_const_v4si(~0x80000000); + LLVMValueRef inv_sig_mask = lp_build_const_v4si(bld->gallivm, ~0x80000000); LLVMValueRef a_v4si = LLVMBuildBitCast(b, a, v4si, "a_v4si"); LLVMValueRef absi = LLVMBuildAnd(b, a_v4si, inv_sig_mask, "absi"); @@ -1660,7 +1708,7 @@ lp_build_sin(struct lp_build_context *bld, * extract the sign bit (upper one) * sign_bit = _mm_and_ps(sign_bit, *(v4sf*)_ps_sign_mask); */ - LLVMValueRef sig_mask = lp_build_const_v4si(0x80000000); + LLVMValueRef sig_mask = lp_build_const_v4si(bld->gallivm, 0x80000000); LLVMValueRef sign_bit_i = LLVMBuildAnd(b, a_v4si, sig_mask, "sign_bit_i"); /* @@ -1668,7 +1716,7 @@ lp_build_sin(struct lp_build_context *bld, * y = _mm_mul_ps(x, *(v4sf*)_ps_cephes_FOPI); */ - LLVMValueRef FOPi = lp_build_const_v4sf(1.27323954473516); + LLVMValueRef FOPi = lp_build_const_v4sf(gallivm, 1.27323954473516); LLVMValueRef scale_y = LLVMBuildFMul(b, x_abs, FOPi, "scale_y"); /* @@ -1683,12 +1731,12 @@ lp_build_sin(struct lp_build_context *bld, * emm2 = _mm_add_epi32(emm2, *(v4si*)_pi32_1); */ - LLVMValueRef all_one = lp_build_const_v4si(1); + LLVMValueRef all_one = lp_build_const_v4si(bld->gallivm, 1); LLVMValueRef emm2_add = LLVMBuildAdd(b, emm2_i, all_one, "emm2_add"); /* * emm2 = _mm_and_si128(emm2, *(v4si*)_pi32_inv1); */ - LLVMValueRef inv_one = lp_build_const_v4si(~1); + LLVMValueRef inv_one = lp_build_const_v4si(bld->gallivm, ~1); LLVMValueRef emm2_and = LLVMBuildAnd(b, emm2_add, inv_one, "emm2_and"); /* @@ -1699,13 +1747,13 @@ lp_build_sin(struct lp_build_context *bld, /* get the swap sign flag * emm0 = _mm_and_si128(emm2, *(v4si*)_pi32_4); */ - LLVMValueRef pi32_4 = lp_build_const_v4si(4); + LLVMValueRef pi32_4 = lp_build_const_v4si(bld->gallivm, 4); LLVMValueRef emm0_and = LLVMBuildAnd(b, emm2_add, pi32_4, "emm0_and"); /* * emm2 = _mm_slli_epi32(emm0, 29); */ - LLVMValueRef const_29 = lp_build_const_v4si(29); + LLVMValueRef const_29 = lp_build_const_v4si(bld->gallivm, 29); LLVMValueRef swap_sign_bit = LLVMBuildShl(b, emm0_and, const_29, "swap_sign_bit"); /* @@ -1718,10 +1766,11 @@ lp_build_sin(struct lp_build_context *bld, * emm2 = _mm_cmpeq_epi32(emm2, _mm_setzero_si128()); */ - LLVMValueRef pi32_2 = lp_build_const_v4si(2); + LLVMValueRef pi32_2 = lp_build_const_v4si(bld->gallivm, 2); LLVMValueRef emm2_3 = LLVMBuildAnd(b, emm2_and, pi32_2, "emm2_3"); - LLVMValueRef poly_mask = lp_build_compare(b, int_type, PIPE_FUNC_EQUAL, - emm2_3, lp_build_const_v4si(0)); + LLVMValueRef poly_mask = lp_build_compare(bld->gallivm, + int_type, PIPE_FUNC_EQUAL, + emm2_3, lp_build_const_v4si(bld->gallivm, 0)); /* * sign_bit = _mm_xor_ps(sign_bit, swap_sign_bit); */ @@ -1732,9 +1781,9 @@ lp_build_sin(struct lp_build_context *bld, * _PS_CONST(minus_cephes_DP2, -2.4187564849853515625e-4); * _PS_CONST(minus_cephes_DP3, -3.77489497744594108e-8); */ - LLVMValueRef DP1 = lp_build_const_v4sf(-0.78515625); - LLVMValueRef DP2 = lp_build_const_v4sf(-2.4187564849853515625e-4); - LLVMValueRef DP3 = lp_build_const_v4sf(-3.77489497744594108e-8); + LLVMValueRef DP1 = lp_build_const_v4sf(gallivm, -0.78515625); + LLVMValueRef DP2 = lp_build_const_v4sf(gallivm, -2.4187564849853515625e-4); + LLVMValueRef DP3 = lp_build_const_v4sf(gallivm, -3.77489497744594108e-8); /* * The magic pass: "Extended precision modular arithmetic" @@ -1769,9 +1818,9 @@ lp_build_sin(struct lp_build_context *bld, * _PS_CONST(coscof_p1, -1.388731625493765E-003); * _PS_CONST(coscof_p2, 4.166664568298827E-002); */ - LLVMValueRef coscof_p0 = lp_build_const_v4sf(2.443315711809948E-005); - LLVMValueRef coscof_p1 = lp_build_const_v4sf(-1.388731625493765E-003); - LLVMValueRef coscof_p2 = lp_build_const_v4sf(4.166664568298827E-002); + LLVMValueRef coscof_p0 = lp_build_const_v4sf(gallivm, 2.443315711809948E-005); + LLVMValueRef coscof_p1 = lp_build_const_v4sf(gallivm, -1.388731625493765E-003); + LLVMValueRef coscof_p2 = lp_build_const_v4sf(gallivm, 4.166664568298827E-002); /* * y = *(v4sf*)_ps_coscof_p0; @@ -1790,10 +1839,10 @@ lp_build_sin(struct lp_build_context *bld, * y = _mm_sub_ps(y, tmp); * y = _mm_add_ps(y, *(v4sf*)_ps_1); */ - LLVMValueRef half = lp_build_const_v4sf(0.5); + LLVMValueRef half = lp_build_const_v4sf(gallivm, 0.5); LLVMValueRef tmp = LLVMBuildFMul(b, z, half, "tmp"); LLVMValueRef y_9 = LLVMBuildFSub(b, y_8, tmp, "y_8"); - LLVMValueRef one = lp_build_const_v4sf(1.0); + LLVMValueRef one = lp_build_const_v4sf(gallivm, 1.0); LLVMValueRef y_10 = LLVMBuildFAdd(b, y_9, one, "y_9"); /* @@ -1801,9 +1850,9 @@ lp_build_sin(struct lp_build_context *bld, * _PS_CONST(sincof_p1, 8.3321608736E-3); * _PS_CONST(sincof_p2, -1.6666654611E-1); */ - LLVMValueRef sincof_p0 = lp_build_const_v4sf(-1.9515295891E-4); - LLVMValueRef sincof_p1 = lp_build_const_v4sf(8.3321608736E-3); - LLVMValueRef sincof_p2 = lp_build_const_v4sf(-1.6666654611E-1); + LLVMValueRef sincof_p0 = lp_build_const_v4sf(gallivm, -1.9515295891E-4); + LLVMValueRef sincof_p1 = lp_build_const_v4sf(gallivm, 8.3321608736E-3); + LLVMValueRef sincof_p2 = lp_build_const_v4sf(gallivm, -1.6666654611E-1); /* * Evaluate the second polynom (Pi/4 <= x <= 0) @@ -1836,7 +1885,7 @@ lp_build_sin(struct lp_build_context *bld, LLVMValueRef y2_i = LLVMBuildBitCast(b, y2_9, v4si, "y2_i"); LLVMValueRef y_i = LLVMBuildBitCast(b, y_10, v4si, "y_i"); LLVMValueRef y2_and = LLVMBuildAnd(b, y2_i, poly_mask, "y2_and"); - LLVMValueRef inv = lp_build_const_v4si(~0); + LLVMValueRef inv = lp_build_const_v4si(bld->gallivm, ~0); LLVMValueRef poly_mask_inv = LLVMBuildXor(b, poly_mask, inv, "poly_mask_inv"); LLVMValueRef y_and = LLVMBuildAnd(b, y_i, poly_mask_inv, "y_and"); LLVMValueRef y_combine = LLVMBuildAdd(b, y_and, y2_and, "y_combine"); @@ -1858,17 +1907,19 @@ LLVMValueRef lp_build_cos(struct lp_build_context *bld, LLVMValueRef a) { + LLVMBuilderRef builder = bld->gallivm->builder; + struct gallivm_state *gallivm = bld->gallivm; struct lp_type int_type = lp_int_type(bld->type); - LLVMBuilderRef b = bld->builder; - LLVMTypeRef v4sf = LLVMVectorType(LLVMFloatType(), 4); - LLVMTypeRef v4si = LLVMVectorType(LLVMInt32Type(), 4); + LLVMBuilderRef b = builder; + LLVMTypeRef v4sf = LLVMVectorType(LLVMFloatTypeInContext(bld->gallivm->context), 4); + LLVMTypeRef v4si = LLVMVectorType(LLVMInt32TypeInContext(bld->gallivm->context), 4); /* * take the absolute value, * x = _mm_and_ps(x, *(v4sf*)_ps_inv_sign_mask); */ - LLVMValueRef inv_sig_mask = lp_build_const_v4si(~0x80000000); + LLVMValueRef inv_sig_mask = lp_build_const_v4si(bld->gallivm, ~0x80000000); LLVMValueRef a_v4si = LLVMBuildBitCast(b, a, v4si, "a_v4si"); LLVMValueRef absi = LLVMBuildAnd(b, a_v4si, inv_sig_mask, "absi"); @@ -1879,7 +1930,7 @@ lp_build_cos(struct lp_build_context *bld, * y = _mm_mul_ps(x, *(v4sf*)_ps_cephes_FOPI); */ - LLVMValueRef FOPi = lp_build_const_v4sf(1.27323954473516); + LLVMValueRef FOPi = lp_build_const_v4sf(gallivm, 1.27323954473516); LLVMValueRef scale_y = LLVMBuildFMul(b, x_abs, FOPi, "scale_y"); /* @@ -1894,12 +1945,12 @@ lp_build_cos(struct lp_build_context *bld, * emm2 = _mm_add_epi32(emm2, *(v4si*)_pi32_1); */ - LLVMValueRef all_one = lp_build_const_v4si(1); + LLVMValueRef all_one = lp_build_const_v4si(bld->gallivm, 1); LLVMValueRef emm2_add = LLVMBuildAdd(b, emm2_i, all_one, "emm2_add"); /* * emm2 = _mm_and_si128(emm2, *(v4si*)_pi32_inv1); */ - LLVMValueRef inv_one = lp_build_const_v4si(~1); + LLVMValueRef inv_one = lp_build_const_v4si(bld->gallivm, ~1); LLVMValueRef emm2_and = LLVMBuildAnd(b, emm2_add, inv_one, "emm2_and"); /* @@ -1911,22 +1962,22 @@ lp_build_cos(struct lp_build_context *bld, /* * emm2 = _mm_sub_epi32(emm2, *(v4si*)_pi32_2); */ - LLVMValueRef const_2 = lp_build_const_v4si(2); + LLVMValueRef const_2 = lp_build_const_v4si(bld->gallivm, 2); LLVMValueRef emm2_2 = LLVMBuildSub(b, emm2_and, const_2, "emm2_2"); /* get the swap sign flag * emm0 = _mm_andnot_si128(emm2, *(v4si*)_pi32_4); */ - LLVMValueRef inv = lp_build_const_v4si(~0); + LLVMValueRef inv = lp_build_const_v4si(bld->gallivm, ~0); LLVMValueRef emm0_not = LLVMBuildXor(b, emm2_2, inv, "emm0_not"); - LLVMValueRef pi32_4 = lp_build_const_v4si(4); + LLVMValueRef pi32_4 = lp_build_const_v4si(bld->gallivm, 4); LLVMValueRef emm0_and = LLVMBuildAnd(b, emm0_not, pi32_4, "emm0_and"); /* * emm2 = _mm_slli_epi32(emm0, 29); */ - LLVMValueRef const_29 = lp_build_const_v4si(29); + LLVMValueRef const_29 = lp_build_const_v4si(bld->gallivm, 29); LLVMValueRef sign_bit = LLVMBuildShl(b, emm0_and, const_29, "sign_bit"); /* @@ -1939,19 +1990,20 @@ lp_build_cos(struct lp_build_context *bld, * emm2 = _mm_cmpeq_epi32(emm2, _mm_setzero_si128()); */ - LLVMValueRef pi32_2 = lp_build_const_v4si(2); + LLVMValueRef pi32_2 = lp_build_const_v4si(bld->gallivm, 2); LLVMValueRef emm2_3 = LLVMBuildAnd(b, emm2_2, pi32_2, "emm2_3"); - LLVMValueRef poly_mask = lp_build_compare(b, int_type, PIPE_FUNC_EQUAL, - emm2_3, lp_build_const_v4si(0)); + LLVMValueRef poly_mask = lp_build_compare(bld->gallivm, + int_type, PIPE_FUNC_EQUAL, + emm2_3, lp_build_const_v4si(bld->gallivm, 0)); /* * _PS_CONST(minus_cephes_DP1, -0.78515625); * _PS_CONST(minus_cephes_DP2, -2.4187564849853515625e-4); * _PS_CONST(minus_cephes_DP3, -3.77489497744594108e-8); */ - LLVMValueRef DP1 = lp_build_const_v4sf(-0.78515625); - LLVMValueRef DP2 = lp_build_const_v4sf(-2.4187564849853515625e-4); - LLVMValueRef DP3 = lp_build_const_v4sf(-3.77489497744594108e-8); + LLVMValueRef DP1 = lp_build_const_v4sf(gallivm, -0.78515625); + LLVMValueRef DP2 = lp_build_const_v4sf(gallivm, -2.4187564849853515625e-4); + LLVMValueRef DP3 = lp_build_const_v4sf(gallivm, -3.77489497744594108e-8); /* * The magic pass: "Extended precision modular arithmetic" @@ -1986,9 +2038,9 @@ lp_build_cos(struct lp_build_context *bld, * _PS_CONST(coscof_p1, -1.388731625493765E-003); * _PS_CONST(coscof_p2, 4.166664568298827E-002); */ - LLVMValueRef coscof_p0 = lp_build_const_v4sf(2.443315711809948E-005); - LLVMValueRef coscof_p1 = lp_build_const_v4sf(-1.388731625493765E-003); - LLVMValueRef coscof_p2 = lp_build_const_v4sf(4.166664568298827E-002); + LLVMValueRef coscof_p0 = lp_build_const_v4sf(gallivm, 2.443315711809948E-005); + LLVMValueRef coscof_p1 = lp_build_const_v4sf(gallivm, -1.388731625493765E-003); + LLVMValueRef coscof_p2 = lp_build_const_v4sf(gallivm, 4.166664568298827E-002); /* * y = *(v4sf*)_ps_coscof_p0; @@ -2007,10 +2059,10 @@ lp_build_cos(struct lp_build_context *bld, * y = _mm_sub_ps(y, tmp); * y = _mm_add_ps(y, *(v4sf*)_ps_1); */ - LLVMValueRef half = lp_build_const_v4sf(0.5); + LLVMValueRef half = lp_build_const_v4sf(gallivm, 0.5); LLVMValueRef tmp = LLVMBuildFMul(b, z, half, "tmp"); LLVMValueRef y_9 = LLVMBuildFSub(b, y_8, tmp, "y_8"); - LLVMValueRef one = lp_build_const_v4sf(1.0); + LLVMValueRef one = lp_build_const_v4sf(gallivm, 1.0); LLVMValueRef y_10 = LLVMBuildFAdd(b, y_9, one, "y_9"); /* @@ -2018,9 +2070,9 @@ lp_build_cos(struct lp_build_context *bld, * _PS_CONST(sincof_p1, 8.3321608736E-3); * _PS_CONST(sincof_p2, -1.6666654611E-1); */ - LLVMValueRef sincof_p0 = lp_build_const_v4sf(-1.9515295891E-4); - LLVMValueRef sincof_p1 = lp_build_const_v4sf(8.3321608736E-3); - LLVMValueRef sincof_p2 = lp_build_const_v4sf(-1.6666654611E-1); + LLVMValueRef sincof_p0 = lp_build_const_v4sf(gallivm, -1.9515295891E-4); + LLVMValueRef sincof_p1 = lp_build_const_v4sf(gallivm, 8.3321608736E-3); + LLVMValueRef sincof_p2 = lp_build_const_v4sf(gallivm, -1.6666654611E-1); /* * Evaluate the second polynom (Pi/4 <= x <= 0) @@ -2094,7 +2146,8 @@ lp_build_exp(struct lp_build_context *bld, LLVMValueRef x) { /* log2(e) = 1/log(2) */ - LLVMValueRef log2e = lp_build_const_vec(bld->type, 1.4426950408889634); + LLVMValueRef log2e = lp_build_const_vec(bld->gallivm, bld->type, + 1.4426950408889634); assert(lp_check_value(bld->type, x)); @@ -2110,7 +2163,8 @@ lp_build_log(struct lp_build_context *bld, LLVMValueRef x) { /* log(2) */ - LLVMValueRef log2 = lp_build_const_vec(bld->type, 0.69314718055994529); + LLVMValueRef log2 = lp_build_const_vec(bld->gallivm, bld->type, + 0.69314718055994529); assert(lp_check_value(bld->type, x)); @@ -2144,7 +2198,7 @@ lp_build_polynomial(struct lp_build_context *bld, for (i = num_coeffs; i--; ) { LLVMValueRef coeff; - coeff = lp_build_const_vec(type, coeffs[i]); + coeff = lp_build_const_vec(bld->gallivm, type, coeffs[i]); if(res) res = lp_build_add(bld, coeff, lp_build_mul(bld, x, res)); @@ -2198,9 +2252,10 @@ lp_build_exp2_approx(struct lp_build_context *bld, LLVMValueRef *p_frac_part, LLVMValueRef *p_exp2) { + LLVMBuilderRef builder = bld->gallivm->builder; const struct lp_type type = bld->type; - LLVMTypeRef vec_type = lp_build_vec_type(type); - LLVMTypeRef int_vec_type = lp_build_int_vec_type(type); + LLVMTypeRef vec_type = lp_build_vec_type(bld->gallivm, type); + LLVMTypeRef int_vec_type = lp_build_int_vec_type(bld->gallivm, type); LLVMValueRef ipart = NULL; LLVMValueRef fpart = NULL; LLVMValueRef expipart = NULL; @@ -2219,29 +2274,31 @@ lp_build_exp2_approx(struct lp_build_context *bld, assert(type.floating && type.width == 32); - x = lp_build_min(bld, x, lp_build_const_vec(type, 129.0)); - x = lp_build_max(bld, x, lp_build_const_vec(type, -126.99999)); + x = lp_build_min(bld, x, lp_build_const_vec(bld->gallivm, type, 129.0)); + x = lp_build_max(bld, x, lp_build_const_vec(bld->gallivm, type, -126.99999)); /* ipart = floor(x) */ ipart = lp_build_floor(bld, x); /* fpart = x - ipart */ - fpart = LLVMBuildFSub(bld->builder, x, ipart, ""); + fpart = LLVMBuildFSub(builder, x, ipart, ""); } if(p_exp2_int_part || p_exp2) { /* expipart = (float) (1 << ipart) */ - ipart = LLVMBuildFPToSI(bld->builder, ipart, int_vec_type, ""); - expipart = LLVMBuildAdd(bld->builder, ipart, lp_build_const_int_vec(type, 127), ""); - expipart = LLVMBuildShl(bld->builder, expipart, lp_build_const_int_vec(type, 23), ""); - expipart = LLVMBuildBitCast(bld->builder, expipart, vec_type, ""); + ipart = LLVMBuildFPToSI(builder, ipart, int_vec_type, ""); + expipart = LLVMBuildAdd(builder, ipart, + lp_build_const_int_vec(bld->gallivm, type, 127), ""); + expipart = LLVMBuildShl(builder, expipart, + lp_build_const_int_vec(bld->gallivm, type, 23), ""); + expipart = LLVMBuildBitCast(builder, expipart, vec_type, ""); } if(p_exp2) { expfpart = lp_build_polynomial(bld, fpart, lp_build_exp2_polynomial, Elements(lp_build_exp2_polynomial)); - res = LLVMBuildFMul(bld->builder, expipart, expfpart, ""); + res = LLVMBuildFMul(builder, expipart, expfpart, ""); } if(p_exp2_int_part) @@ -2279,6 +2336,7 @@ lp_build_extract_exponent(struct lp_build_context *bld, LLVMValueRef x, int bias) { + LLVMBuilderRef builder = bld->gallivm->builder; const struct lp_type type = bld->type; unsigned mantissa = lp_mantissa(type); LLVMValueRef res; @@ -2287,11 +2345,14 @@ lp_build_extract_exponent(struct lp_build_context *bld, assert(lp_check_value(bld->type, x)); - x = LLVMBuildBitCast(bld->builder, x, bld->int_vec_type, ""); + x = LLVMBuildBitCast(builder, x, bld->int_vec_type, ""); - res = LLVMBuildLShr(bld->builder, x, lp_build_const_int_vec(type, mantissa), ""); - res = LLVMBuildAnd(bld->builder, res, lp_build_const_int_vec(type, 255), ""); - res = LLVMBuildSub(bld->builder, res, lp_build_const_int_vec(type, 127 - bias), ""); + res = LLVMBuildLShr(builder, x, + lp_build_const_int_vec(bld->gallivm, type, mantissa), ""); + res = LLVMBuildAnd(builder, res, + lp_build_const_int_vec(bld->gallivm, type, 255), ""); + res = LLVMBuildSub(builder, res, + lp_build_const_int_vec(bld->gallivm, type, 127 - bias), ""); return res; } @@ -2308,9 +2369,11 @@ LLVMValueRef lp_build_extract_mantissa(struct lp_build_context *bld, LLVMValueRef x) { + LLVMBuilderRef builder = bld->gallivm->builder; const struct lp_type type = bld->type; unsigned mantissa = lp_mantissa(type); - LLVMValueRef mantmask = lp_build_const_int_vec(type, (1ULL << mantissa) - 1); + LLVMValueRef mantmask = lp_build_const_int_vec(bld->gallivm, type, + (1ULL << mantissa) - 1); LLVMValueRef one = LLVMConstBitCast(bld->one, bld->int_vec_type); LLVMValueRef res; @@ -2318,12 +2381,12 @@ lp_build_extract_mantissa(struct lp_build_context *bld, assert(type.floating); - x = LLVMBuildBitCast(bld->builder, x, bld->int_vec_type, ""); + x = LLVMBuildBitCast(builder, x, bld->int_vec_type, ""); /* res = x / 2**ipart */ - res = LLVMBuildAnd(bld->builder, x, mantmask, ""); - res = LLVMBuildOr(bld->builder, res, one, ""); - res = LLVMBuildBitCast(bld->builder, res, bld->vec_type, ""); + res = LLVMBuildAnd(builder, x, mantmask, ""); + res = LLVMBuildOr(builder, res, one, ""); + res = LLVMBuildBitCast(builder, res, bld->vec_type, ""); return res; } @@ -2374,12 +2437,13 @@ lp_build_log2_approx(struct lp_build_context *bld, LLVMValueRef *p_floor_log2, LLVMValueRef *p_log2) { + LLVMBuilderRef builder = bld->gallivm->builder; const struct lp_type type = bld->type; - LLVMTypeRef vec_type = lp_build_vec_type(type); - LLVMTypeRef int_vec_type = lp_build_int_vec_type(type); + LLVMTypeRef vec_type = lp_build_vec_type(bld->gallivm, type); + LLVMTypeRef int_vec_type = lp_build_int_vec_type(bld->gallivm, type); - LLVMValueRef expmask = lp_build_const_int_vec(type, 0x7f800000); - LLVMValueRef mantmask = lp_build_const_int_vec(type, 0x007fffff); + LLVMValueRef expmask = lp_build_const_int_vec(bld->gallivm, type, 0x7f800000); + LLVMValueRef mantmask = lp_build_const_int_vec(bld->gallivm, type, 0x007fffff); LLVMValueRef one = LLVMConstBitCast(bld->one, int_vec_type); LLVMValueRef i = NULL; @@ -2401,35 +2465,35 @@ lp_build_log2_approx(struct lp_build_context *bld, assert(type.floating && type.width == 32); - i = LLVMBuildBitCast(bld->builder, x, int_vec_type, ""); + i = LLVMBuildBitCast(builder, x, int_vec_type, ""); /* exp = (float) exponent(x) */ - exp = LLVMBuildAnd(bld->builder, i, expmask, ""); + exp = LLVMBuildAnd(builder, i, expmask, ""); } if(p_floor_log2 || p_log2) { - logexp = LLVMBuildLShr(bld->builder, exp, lp_build_const_int_vec(type, 23), ""); - logexp = LLVMBuildSub(bld->builder, logexp, lp_build_const_int_vec(type, 127), ""); - logexp = LLVMBuildSIToFP(bld->builder, logexp, vec_type, ""); + logexp = LLVMBuildLShr(builder, exp, lp_build_const_int_vec(bld->gallivm, type, 23), ""); + logexp = LLVMBuildSub(builder, logexp, lp_build_const_int_vec(bld->gallivm, type, 127), ""); + logexp = LLVMBuildSIToFP(builder, logexp, vec_type, ""); } if(p_log2) { /* mant = (float) mantissa(x) */ - mant = LLVMBuildAnd(bld->builder, i, mantmask, ""); - mant = LLVMBuildOr(bld->builder, mant, one, ""); - mant = LLVMBuildBitCast(bld->builder, mant, vec_type, ""); + mant = LLVMBuildAnd(builder, i, mantmask, ""); + mant = LLVMBuildOr(builder, mant, one, ""); + mant = LLVMBuildBitCast(builder, mant, vec_type, ""); logmant = lp_build_polynomial(bld, mant, lp_build_log2_polynomial, Elements(lp_build_log2_polynomial)); /* This effectively increases the polynomial degree by one, but ensures that log2(1) == 0*/ - logmant = LLVMBuildFMul(bld->builder, logmant, LLVMBuildFSub(bld->builder, mant, bld->one, ""), ""); + logmant = LLVMBuildFMul(builder, logmant, LLVMBuildFSub(builder, mant, bld->one, ""), ""); - res = LLVMBuildFAdd(bld->builder, logmant, logexp, ""); + res = LLVMBuildFAdd(builder, logmant, logexp, ""); } if(p_exp) { - exp = LLVMBuildBitCast(bld->builder, exp, vec_type, ""); + exp = LLVMBuildBitCast(builder, exp, vec_type, ""); *p_exp = exp; } @@ -2465,6 +2529,7 @@ LLVMValueRef lp_build_fast_log2(struct lp_build_context *bld, LLVMValueRef x) { + LLVMBuilderRef builder = bld->gallivm->builder; LLVMValueRef ipart; LLVMValueRef fpart; @@ -2474,13 +2539,13 @@ lp_build_fast_log2(struct lp_build_context *bld, /* ipart = floor(log2(x)) - 1 */ ipart = lp_build_extract_exponent(bld, x, -1); - ipart = LLVMBuildSIToFP(bld->builder, ipart, bld->vec_type, ""); + ipart = LLVMBuildSIToFP(builder, ipart, bld->vec_type, ""); /* fpart = x / 2**ipart */ fpart = lp_build_extract_mantissa(bld, x); /* ipart + fpart */ - return LLVMBuildFAdd(bld->builder, ipart, fpart, ""); + return LLVMBuildFAdd(builder, ipart, fpart, ""); } @@ -2493,7 +2558,8 @@ LLVMValueRef lp_build_ilog2(struct lp_build_context *bld, LLVMValueRef x) { - LLVMValueRef sqrt2 = lp_build_const_vec(bld->type, M_SQRT2); + LLVMBuilderRef builder = bld->gallivm->builder; + LLVMValueRef sqrt2 = lp_build_const_vec(bld->gallivm, bld->type, M_SQRT2); LLVMValueRef ipart; assert(bld->type.floating); @@ -2501,7 +2567,7 @@ lp_build_ilog2(struct lp_build_context *bld, assert(lp_check_value(bld->type, x)); /* x * 2^(0.5) i.e., add 0.5 to the log2(x) */ - x = LLVMBuildFMul(bld->builder, x, sqrt2, ""); + x = LLVMBuildFMul(builder, x, sqrt2, ""); /* ipart = floor(log2(x) + 0.5) */ ipart = lp_build_extract_exponent(bld, x, 0); diff --git a/src/gallium/auxiliary/gallivm/lp_bld_assert.c b/src/gallium/auxiliary/gallivm/lp_bld_assert.c index f2ebd868a8d..9de5e8e7b51 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_assert.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_assert.c @@ -56,20 +56,21 @@ lp_assert(int condition, const char *msg) * \param msg a string to print if the assertion fails. */ LLVMValueRef -lp_build_assert(LLVMBuilderRef builder, LLVMValueRef condition, +lp_build_assert(struct gallivm_state *gallivm, + LLVMValueRef condition, const char *msg) { - LLVMModuleRef module; + LLVMBuilderRef builder = gallivm->builder; + LLVMContextRef context = gallivm->context; + LLVMModuleRef module = gallivm->module; LLVMTypeRef arg_types[2]; LLVMValueRef msg_string, assert_func, params[2], r; - module = LLVMGetGlobalParent(LLVMGetBasicBlockParent( - LLVMGetInsertBlock(builder))); + msg_string = lp_build_const_string_variable(module, context, + msg, strlen(msg) + 1); - msg_string = lp_build_const_string_variable(module, msg, strlen(msg) + 1); - - arg_types[0] = LLVMInt32Type(); - arg_types[1] = LLVMPointerType(LLVMInt8Type(), 0); + arg_types[0] = LLVMInt32TypeInContext(context); + arg_types[1] = LLVMPointerType(LLVMInt8TypeInContext(context), 0); /* lookup the lp_assert function */ assert_func = LLVMGetNamedFunction(module, "lp_assert"); @@ -77,12 +78,12 @@ lp_build_assert(LLVMBuilderRef builder, LLVMValueRef condition, /* Create the assertion function if not found */ if (!assert_func) { LLVMTypeRef func_type = - LLVMFunctionType(LLVMVoidType(), arg_types, 2, 0); + LLVMFunctionType(LLVMVoidTypeInContext(context), arg_types, 2, 0); assert_func = LLVMAddFunction(module, "lp_assert", func_type); LLVMSetFunctionCallConv(assert_func, LLVMCCallConv); LLVMSetLinkage(assert_func, LLVMExternalLinkage); - LLVMAddGlobalMapping(lp_build_engine, assert_func, + LLVMAddGlobalMapping(gallivm->engine, assert_func, func_to_pointer((func_pointer)lp_assert)); } assert(assert_func); diff --git a/src/gallium/auxiliary/gallivm/lp_bld_assert.h b/src/gallium/auxiliary/gallivm/lp_bld_assert.h index ddd879dc2c6..1d2baab30a2 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_assert.h +++ b/src/gallium/auxiliary/gallivm/lp_bld_assert.h @@ -30,10 +30,12 @@ #include "lp_bld.h" +#include "lp_bld_init.h" LLVMValueRef -lp_build_assert(LLVMBuilderRef builder, LLVMValueRef condition, +lp_build_assert(struct gallivm_state *gallivm, + LLVMValueRef condition, const char *msg); diff --git a/src/gallium/auxiliary/gallivm/lp_bld_bitarit.c b/src/gallium/auxiliary/gallivm/lp_bld_bitarit.c index 706479b4d56..a9c57d682f2 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_bitarit.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_bitarit.c @@ -40,6 +40,7 @@ LLVMValueRef lp_build_or(struct lp_build_context *bld, LLVMValueRef a, LLVMValueRef b) { + LLVMBuilderRef builder = bld->gallivm->builder; const struct lp_type type = bld->type; LLVMValueRef res; @@ -48,14 +49,14 @@ lp_build_or(struct lp_build_context *bld, LLVMValueRef a, LLVMValueRef b) /* can't do bitwise ops on floating-point values */ if (type.floating) { - a = LLVMBuildBitCast(bld->builder, a, bld->int_vec_type, ""); - b = LLVMBuildBitCast(bld->builder, b, bld->int_vec_type, ""); + a = LLVMBuildBitCast(builder, a, bld->int_vec_type, ""); + b = LLVMBuildBitCast(builder, b, bld->int_vec_type, ""); } - res = LLVMBuildOr(bld->builder, a, b, ""); + res = LLVMBuildOr(builder, a, b, ""); if (type.floating) { - res = LLVMBuildBitCast(bld->builder, res, bld->vec_type, ""); + res = LLVMBuildBitCast(builder, res, bld->vec_type, ""); } return res; @@ -68,6 +69,7 @@ lp_build_or(struct lp_build_context *bld, LLVMValueRef a, LLVMValueRef b) LLVMValueRef lp_build_and(struct lp_build_context *bld, LLVMValueRef a, LLVMValueRef b) { + LLVMBuilderRef builder = bld->gallivm->builder; const struct lp_type type = bld->type; LLVMValueRef res; @@ -76,14 +78,14 @@ lp_build_and(struct lp_build_context *bld, LLVMValueRef a, LLVMValueRef b) /* can't do bitwise ops on floating-point values */ if (type.floating) { - a = LLVMBuildBitCast(bld->builder, a, bld->int_vec_type, ""); - b = LLVMBuildBitCast(bld->builder, b, bld->int_vec_type, ""); + a = LLVMBuildBitCast(builder, a, bld->int_vec_type, ""); + b = LLVMBuildBitCast(builder, b, bld->int_vec_type, ""); } - res = LLVMBuildAnd(bld->builder, a, b, ""); + res = LLVMBuildAnd(builder, a, b, ""); if (type.floating) { - res = LLVMBuildBitCast(bld->builder, res, bld->vec_type, ""); + res = LLVMBuildBitCast(builder, res, bld->vec_type, ""); } return res; @@ -96,6 +98,7 @@ lp_build_and(struct lp_build_context *bld, LLVMValueRef a, LLVMValueRef b) LLVMValueRef lp_build_andnot(struct lp_build_context *bld, LLVMValueRef a, LLVMValueRef b) { + LLVMBuilderRef builder = bld->gallivm->builder; const struct lp_type type = bld->type; LLVMValueRef res; @@ -104,15 +107,15 @@ lp_build_andnot(struct lp_build_context *bld, LLVMValueRef a, LLVMValueRef b) /* can't do bitwise ops on floating-point values */ if (type.floating) { - a = LLVMBuildBitCast(bld->builder, a, bld->int_vec_type, ""); - b = LLVMBuildBitCast(bld->builder, b, bld->int_vec_type, ""); + a = LLVMBuildBitCast(builder, a, bld->int_vec_type, ""); + b = LLVMBuildBitCast(builder, b, bld->int_vec_type, ""); } - res = LLVMBuildNot(bld->builder, b, ""); - res = LLVMBuildAnd(bld->builder, a, res, ""); + res = LLVMBuildNot(builder, b, ""); + res = LLVMBuildAnd(builder, a, res, ""); if (type.floating) { - res = LLVMBuildBitCast(bld->builder, res, bld->vec_type, ""); + res = LLVMBuildBitCast(builder, res, bld->vec_type, ""); } return res; @@ -125,6 +128,7 @@ lp_build_andnot(struct lp_build_context *bld, LLVMValueRef a, LLVMValueRef b) LLVMValueRef lp_build_shl(struct lp_build_context *bld, LLVMValueRef a, LLVMValueRef b) { + LLVMBuilderRef builder = bld->gallivm->builder; const struct lp_type type = bld->type; LLVMValueRef res; @@ -133,7 +137,7 @@ lp_build_shl(struct lp_build_context *bld, LLVMValueRef a, LLVMValueRef b) assert(lp_check_value(type, a)); assert(lp_check_value(type, b)); - res = LLVMBuildShl(bld->builder, a, b, ""); + res = LLVMBuildShl(builder, a, b, ""); return res; } @@ -145,6 +149,7 @@ lp_build_shl(struct lp_build_context *bld, LLVMValueRef a, LLVMValueRef b) LLVMValueRef lp_build_shr(struct lp_build_context *bld, LLVMValueRef a, LLVMValueRef b) { + LLVMBuilderRef builder = bld->gallivm->builder; const struct lp_type type = bld->type; LLVMValueRef res; @@ -154,9 +159,9 @@ lp_build_shr(struct lp_build_context *bld, LLVMValueRef a, LLVMValueRef b) assert(lp_check_value(type, b)); if (type.sign) { - res = LLVMBuildAShr(bld->builder, a, b, ""); + res = LLVMBuildAShr(builder, a, b, ""); } else { - res = LLVMBuildLShr(bld->builder, a, b, ""); + res = LLVMBuildLShr(builder, a, b, ""); } return res; @@ -169,7 +174,7 @@ lp_build_shr(struct lp_build_context *bld, LLVMValueRef a, LLVMValueRef b) LLVMValueRef lp_build_shl_imm(struct lp_build_context *bld, LLVMValueRef a, unsigned imm) { - LLVMValueRef b = lp_build_const_int_vec(bld->type, imm); + LLVMValueRef b = lp_build_const_int_vec(bld->gallivm, bld->type, imm); assert(imm <= bld->type.width); return lp_build_shl(bld, a, b); } @@ -181,7 +186,7 @@ lp_build_shl_imm(struct lp_build_context *bld, LLVMValueRef a, unsigned imm) LLVMValueRef lp_build_shr_imm(struct lp_build_context *bld, LLVMValueRef a, unsigned imm) { - LLVMValueRef b = lp_build_const_int_vec(bld->type, imm); + LLVMValueRef b = lp_build_const_int_vec(bld->gallivm, bld->type, imm); assert(imm <= bld->type.width); return lp_build_shr(bld, a, b); } diff --git a/src/gallium/auxiliary/gallivm/lp_bld_const.c b/src/gallium/auxiliary/gallivm/lp_bld_const.c index dd839c0bea5..6d8b7c26fc8 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_const.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_const.c @@ -39,6 +39,7 @@ #include "lp_bld_type.h" #include "lp_bld_const.h" +#include "lp_bld_init.h" unsigned @@ -211,31 +212,31 @@ lp_const_eps(struct lp_type type) LLVMValueRef -lp_build_undef(struct lp_type type) +lp_build_undef(struct gallivm_state *gallivm, struct lp_type type) { - LLVMTypeRef vec_type = lp_build_vec_type(type); + LLVMTypeRef vec_type = lp_build_vec_type(gallivm, type); return LLVMGetUndef(vec_type); } LLVMValueRef -lp_build_zero(struct lp_type type) +lp_build_zero(struct gallivm_state *gallivm, struct lp_type type) { if (type.length == 1) { if (type.floating) - return LLVMConstReal(LLVMFloatType(), 0.0); + return lp_build_const_float(gallivm, 0.0); else - return LLVMConstInt(LLVMIntType(type.width), 0, 0); + return LLVMConstInt(LLVMIntTypeInContext(gallivm->context, type.width), 0, 0); } else { - LLVMTypeRef vec_type = lp_build_vec_type(type); + LLVMTypeRef vec_type = lp_build_vec_type(gallivm, type); return LLVMConstNull(vec_type); } } LLVMValueRef -lp_build_one(struct lp_type type) +lp_build_one(struct gallivm_state *gallivm, struct lp_type type) { LLVMTypeRef elem_type; LLVMValueRef elems[LP_MAX_VECTOR_LENGTH]; @@ -243,7 +244,7 @@ lp_build_one(struct lp_type type) assert(type.length <= LP_MAX_VECTOR_LENGTH); - elem_type = lp_build_elem_type(type); + elem_type = lp_build_elem_type(gallivm, type); if(type.floating) elems[0] = LLVMConstReal(elem_type, 1.0); @@ -283,10 +284,11 @@ lp_build_one(struct lp_type type) * Build constant-valued element from a scalar value. */ LLVMValueRef -lp_build_const_elem(struct lp_type type, +lp_build_const_elem(struct gallivm_state *gallivm, + struct lp_type type, double val) { - LLVMTypeRef elem_type = lp_build_elem_type(type); + LLVMTypeRef elem_type = lp_build_elem_type(gallivm, type); LLVMValueRef elem; if(type.floating) { @@ -306,15 +308,15 @@ lp_build_const_elem(struct lp_type type, * Build constant-valued vector from a scalar value. */ LLVMValueRef -lp_build_const_vec(struct lp_type type, +lp_build_const_vec(struct gallivm_state *gallivm, struct lp_type type, double val) { if (type.length == 1) { - return lp_build_const_elem(type, val); + return lp_build_const_elem(gallivm, type, val); } else { LLVMValueRef elems[LP_MAX_VECTOR_LENGTH]; unsigned i; - elems[0] = lp_build_const_elem(type, val); + elems[0] = lp_build_const_elem(gallivm, type, val); for(i = 1; i < type.length; ++i) elems[i] = elems[0]; return LLVMConstVector(elems, type.length); @@ -323,10 +325,10 @@ lp_build_const_vec(struct lp_type type, LLVMValueRef -lp_build_const_int_vec(struct lp_type type, - long long val) +lp_build_const_int_vec(struct gallivm_state *gallivm, struct lp_type type, + long long val) { - LLVMTypeRef elem_type = lp_build_int_elem_type(type); + LLVMTypeRef elem_type = lp_build_int_elem_type(gallivm, type); LLVMValueRef elems[LP_MAX_VECTOR_LENGTH]; unsigned i; @@ -343,7 +345,8 @@ lp_build_const_int_vec(struct lp_type type, LLVMValueRef -lp_build_const_aos(struct lp_type type, +lp_build_const_aos(struct gallivm_state *gallivm, + struct lp_type type, double r, double g, double b, double a, const unsigned char *swizzle) { @@ -355,7 +358,7 @@ lp_build_const_aos(struct lp_type type, assert(type.length % 4 == 0); assert(type.length <= LP_MAX_VECTOR_LENGTH); - elem_type = lp_build_elem_type(type); + elem_type = lp_build_elem_type(gallivm, type); if(swizzle == NULL) swizzle = default_swizzle; @@ -386,10 +389,11 @@ lp_build_const_aos(struct lp_type type, * @param mask TGSI_WRITEMASK_xxx */ LLVMValueRef -lp_build_const_mask_aos(struct lp_type type, +lp_build_const_mask_aos(struct gallivm_state *gallivm, + struct lp_type type, unsigned mask) { - LLVMTypeRef elem_type = LLVMIntType(type.width); + LLVMTypeRef elem_type = LLVMIntTypeInContext(gallivm->context, type.width); LLVMValueRef masks[LP_MAX_VECTOR_LENGTH]; unsigned i, j; diff --git a/src/gallium/auxiliary/gallivm/lp_bld_const.h b/src/gallium/auxiliary/gallivm/lp_bld_const.h index 6b1fc590c17..69718eb4b3d 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_const.h +++ b/src/gallium/auxiliary/gallivm/lp_bld_const.h @@ -39,6 +39,7 @@ #include "pipe/p_compiler.h" #include "gallivm/lp_bld.h" +#include "gallivm/lp_bld_init.h" @@ -73,44 +74,71 @@ lp_const_eps(struct lp_type type); LLVMValueRef -lp_build_undef(struct lp_type type); +lp_build_undef(struct gallivm_state *gallivm, struct lp_type type); LLVMValueRef -lp_build_zero(struct lp_type type); +lp_build_zero(struct gallivm_state *gallivm, struct lp_type type); LLVMValueRef -lp_build_one(struct lp_type type); +lp_build_one(struct gallivm_state *gallivm, struct lp_type type); LLVMValueRef -lp_build_const_elem(struct lp_type type, +lp_build_const_elem(struct gallivm_state *gallivm, struct lp_type type, double val); LLVMValueRef -lp_build_const_vec(struct lp_type type, double val); +lp_build_const_vec(struct gallivm_state *gallivm, struct lp_type type, + double val); LLVMValueRef -lp_build_const_int_vec(struct lp_type type, long long val); +lp_build_const_int_vec(struct gallivm_state *gallivm, + struct lp_type type, long long val); LLVMValueRef -lp_build_const_aos(struct lp_type type, +lp_build_const_aos(struct gallivm_state *gallivm, struct lp_type type, double r, double g, double b, double a, const unsigned char *swizzle); LLVMValueRef -lp_build_const_mask_aos(struct lp_type type, +lp_build_const_mask_aos(struct gallivm_state *gallivm, + struct lp_type type, unsigned mask); static INLINE LLVMValueRef -lp_build_const_int32(int i) +lp_build_const_int32(struct gallivm_state *gallivm, int i) { - return LLVMConstInt(LLVMInt32Type(), i, 0); + return LLVMConstInt(LLVMInt32TypeInContext(gallivm->context), i, 0); +} + + +static INLINE LLVMValueRef +lp_build_const_float(struct gallivm_state *gallivm, float x) +{ + return LLVMConstReal(LLVMFloatTypeInContext(gallivm->context), x); +} + + +/** Return constant-valued pointer to int */ +static INLINE LLVMValueRef +lp_build_const_int_pointer(struct gallivm_state *gallivm, const void *ptr) +{ + LLVMTypeRef int_type; + LLVMValueRef v; + + /* int type large enough to hold a pointer */ + int_type = LLVMIntTypeInContext(gallivm->context, 8 * sizeof(void *)); + v = LLVMConstInt(int_type, (uintptr_t) ptr, 0); + v = LLVMBuildIntToPtr(gallivm->builder, v, + LLVMPointerType(int_type, 0), + "cast int to ptr"); + return v; } diff --git a/src/gallium/auxiliary/gallivm/lp_bld_conv.c b/src/gallium/auxiliary/gallivm/lp_bld_conv.c index 6967dd26225..c43ee8ac638 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_conv.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_conv.c @@ -89,12 +89,13 @@ * return { i32, i32, i32, i32 } where each value is in [0, 2^dst_width-1]. */ LLVMValueRef -lp_build_clamped_float_to_unsigned_norm(LLVMBuilderRef builder, +lp_build_clamped_float_to_unsigned_norm(struct gallivm_state *gallivm, struct lp_type src_type, unsigned dst_width, LLVMValueRef src) { - LLVMTypeRef int_vec_type = lp_build_int_vec_type(src_type); + LLVMBuilderRef builder = gallivm->builder; + LLVMTypeRef int_vec_type = lp_build_int_vec_type(gallivm, src_type); LLVMValueRef res; unsigned mantissa; @@ -122,10 +123,11 @@ lp_build_clamped_float_to_unsigned_norm(LLVMBuilderRef builder, scale = (double)mask/ubound; bias = (double)(1ULL << (mantissa - dst_width)); - res = LLVMBuildFMul(builder, src, lp_build_const_vec(src_type, scale), ""); - res = LLVMBuildFAdd(builder, res, lp_build_const_vec(src_type, bias), ""); + res = LLVMBuildFMul(builder, src, lp_build_const_vec(gallivm, src_type, scale), ""); + res = LLVMBuildFAdd(builder, res, lp_build_const_vec(gallivm, src_type, bias), ""); res = LLVMBuildBitCast(builder, res, int_vec_type, ""); - res = LLVMBuildAnd(builder, res, lp_build_const_int_vec(src_type, mask), ""); + res = LLVMBuildAnd(builder, res, + lp_build_const_int_vec(gallivm, src_type, mask), ""); } else if (dst_width == (mantissa + 1)) { /* @@ -138,7 +140,8 @@ lp_build_clamped_float_to_unsigned_norm(LLVMBuilderRef builder, scale = (double)((1ULL << dst_width) - 1); - res = LLVMBuildFMul(builder, src, lp_build_const_vec(src_type, scale), ""); + res = LLVMBuildFMul(builder, src, + lp_build_const_vec(gallivm, src_type, scale), ""); res = LLVMBuildFPToSI(builder, res, int_vec_type, ""); } else { @@ -166,7 +169,8 @@ lp_build_clamped_float_to_unsigned_norm(LLVMBuilderRef builder, LLVMValueRef lshifted; LLVMValueRef rshifted; - res = LLVMBuildFMul(builder, src, lp_build_const_vec(src_type, scale), ""); + res = LLVMBuildFMul(builder, src, + lp_build_const_vec(gallivm, src_type, scale), ""); res = LLVMBuildFPToSI(builder, res, int_vec_type, ""); /* @@ -177,7 +181,8 @@ lp_build_clamped_float_to_unsigned_norm(LLVMBuilderRef builder, */ if (lshift) { lshifted = LLVMBuildShl(builder, res, - lp_build_const_int_vec(src_type, lshift), ""); + lp_build_const_int_vec(gallivm, src_type, + lshift), ""); } else { lshifted = res; } @@ -186,7 +191,8 @@ lp_build_clamped_float_to_unsigned_norm(LLVMBuilderRef builder, * Align the most significant bit to the right. */ rshifted = LLVMBuildAShr(builder, res, - lp_build_const_int_vec(src_type, rshift), ""); + lp_build_const_int_vec(gallivm, src_type, rshift), + ""); /* * Subtract the MSB to the LSB, therefore re-scaling from @@ -206,13 +212,14 @@ lp_build_clamped_float_to_unsigned_norm(LLVMBuilderRef builder, * return {float, float, float, float} with values in range [0, 1]. */ LLVMValueRef -lp_build_unsigned_norm_to_float(LLVMBuilderRef builder, +lp_build_unsigned_norm_to_float(struct gallivm_state *gallivm, unsigned src_width, struct lp_type dst_type, LLVMValueRef src) { - LLVMTypeRef vec_type = lp_build_vec_type(dst_type); - LLVMTypeRef int_vec_type = lp_build_int_vec_type(dst_type); + LLVMBuilderRef builder = gallivm->builder; + LLVMTypeRef vec_type = lp_build_vec_type(gallivm, dst_type); + LLVMTypeRef int_vec_type = lp_build_int_vec_type(gallivm, dst_type); LLVMValueRef bias_; LLVMValueRef res; unsigned mantissa; @@ -230,7 +237,8 @@ lp_build_unsigned_norm_to_float(LLVMBuilderRef builder, if (src_width == 8) { scale = 1.0/255.0; res = LLVMBuildSIToFP(builder, src, vec_type, ""); - res = LLVMBuildFMul(builder, res, lp_build_const_vec(dst_type, scale), ""); + res = LLVMBuildFMul(builder, res, + lp_build_const_vec(gallivm, dst_type, scale), ""); return res; } @@ -247,10 +255,11 @@ lp_build_unsigned_norm_to_float(LLVMBuilderRef builder, if(src_width > mantissa) { int shift = src_width - mantissa; - res = LLVMBuildLShr(builder, res, lp_build_const_int_vec(dst_type, shift), ""); + res = LLVMBuildLShr(builder, res, + lp_build_const_int_vec(gallivm, dst_type, shift), ""); } - bias_ = lp_build_const_vec(dst_type, bias); + bias_ = lp_build_const_vec(gallivm, dst_type, bias); res = LLVMBuildOr(builder, res, @@ -259,7 +268,7 @@ lp_build_unsigned_norm_to_float(LLVMBuilderRef builder, res = LLVMBuildBitCast(builder, res, vec_type, ""); res = LLVMBuildFSub(builder, res, bias_, ""); - res = LLVMBuildFMul(builder, res, lp_build_const_vec(dst_type, scale), ""); + res = LLVMBuildFMul(builder, res, lp_build_const_vec(gallivm, dst_type, scale), ""); return res; } @@ -272,12 +281,13 @@ lp_build_unsigned_norm_to_float(LLVMBuilderRef builder, * to the lp_type union. */ void -lp_build_conv(LLVMBuilderRef builder, +lp_build_conv(struct gallivm_state *gallivm, struct lp_type src_type, struct lp_type dst_type, const LLVMValueRef *src, unsigned num_srcs, LLVMValueRef *dst, unsigned num_dsts) { + LLVMBuilderRef builder = gallivm->builder; struct lp_type tmp_type; LLVMValueRef tmp[LP_MAX_VECTOR_LENGTH]; unsigned num_tmps; @@ -342,12 +352,12 @@ lp_build_conv(LLVMBuilderRef builder, int32_type.length /= 4; int32_type.sign = 1; - src_vec_type = lp_build_vec_type(src_type); - dst_vec_type = lp_build_vec_type(dst_type); - int16_vec_type = lp_build_vec_type(int16_type); - int32_vec_type = lp_build_vec_type(int32_type); + src_vec_type = lp_build_vec_type(gallivm, src_type); + dst_vec_type = lp_build_vec_type(gallivm, dst_type); + int16_vec_type = lp_build_vec_type(gallivm, int16_type); + int32_vec_type = lp_build_vec_type(gallivm, int32_type); - const_255f = lp_build_const_vec(src_type, 255.0f); + const_255f = lp_build_const_vec(gallivm, src_type, 255.0f); a = LLVMBuildFMul(builder, src[0], const_255f, ""); b = LLVMBuildFMul(builder, src[1], const_255f, ""); @@ -357,14 +367,14 @@ lp_build_conv(LLVMBuilderRef builder, { struct lp_build_context bld; - bld.builder = builder; + bld.gallivm = gallivm; bld.type = src_type; bld.vec_type = src_vec_type; - bld.int_elem_type = lp_build_elem_type(int32_type); + bld.int_elem_type = lp_build_elem_type(gallivm, int32_type); bld.int_vec_type = int32_vec_type; - bld.undef = lp_build_undef(src_type); - bld.zero = lp_build_zero(src_type); - bld.one = lp_build_one(src_type); + bld.undef = lp_build_undef(gallivm, src_type); + bld.zero = lp_build_zero(gallivm, src_type); + bld.one = lp_build_one(gallivm, src_type); src_int0 = lp_build_iround(&bld, a); src_int1 = lp_build_iround(&bld, b); @@ -372,9 +382,9 @@ lp_build_conv(LLVMBuilderRef builder, src_int3 = lp_build_iround(&bld, d); } /* relying on clamping behavior of sse2 intrinsics here */ - lo = lp_build_pack2(builder, int32_type, int16_type, src_int0, src_int1); - hi = lp_build_pack2(builder, int32_type, int16_type, src_int2, src_int3); - dst[i] = lp_build_pack2(builder, int16_type, dst_type, lo, hi); + lo = lp_build_pack2(gallivm, int32_type, int16_type, src_int0, src_int1); + hi = lp_build_pack2(gallivm, int32_type, int16_type, src_int2, src_int3); + dst[i] = lp_build_pack2(gallivm, int16_type, dst_type, lo, hi); } return; } @@ -391,13 +401,13 @@ lp_build_conv(LLVMBuilderRef builder, double dst_max = lp_const_max(dst_type); LLVMValueRef thres; - lp_build_context_init(&bld, builder, tmp_type); + lp_build_context_init(&bld, gallivm, tmp_type); if(src_min < dst_min) { if(dst_min == 0.0) thres = bld.zero; else - thres = lp_build_const_vec(src_type, dst_min); + thres = lp_build_const_vec(gallivm, src_type, dst_min); for(i = 0; i < num_tmps; ++i) tmp[i] = lp_build_max(&bld, tmp[i], thres); } @@ -406,7 +416,7 @@ lp_build_conv(LLVMBuilderRef builder, if(dst_max == 1.0) thres = bld.one; else - thres = lp_build_const_vec(src_type, dst_max); + thres = lp_build_const_vec(gallivm, src_type, dst_max); for(i = 0; i < num_tmps; ++i) tmp[i] = lp_build_min(&bld, tmp[i], thres); } @@ -422,7 +432,7 @@ lp_build_conv(LLVMBuilderRef builder, else if(tmp_type.floating) { if(!dst_type.fixed && !dst_type.sign && dst_type.norm) { for(i = 0; i < num_tmps; ++i) { - tmp[i] = lp_build_clamped_float_to_unsigned_norm(builder, + tmp[i] = lp_build_clamped_float_to_unsigned_norm(gallivm, tmp_type, dst_type.width, tmp[i]); @@ -434,14 +444,14 @@ lp_build_conv(LLVMBuilderRef builder, LLVMTypeRef tmp_vec_type; if (dst_scale != 1.0) { - LLVMValueRef scale = lp_build_const_vec(tmp_type, dst_scale); + LLVMValueRef scale = lp_build_const_vec(gallivm, tmp_type, dst_scale); for(i = 0; i < num_tmps; ++i) tmp[i] = LLVMBuildFMul(builder, tmp[i], scale, ""); } /* Use an equally sized integer for intermediate computations */ tmp_type.floating = FALSE; - tmp_vec_type = lp_build_vec_type(tmp_type); + tmp_vec_type = lp_build_vec_type(gallivm, tmp_type); for(i = 0; i < num_tmps; ++i) { #if 0 if(dst_type.sign) @@ -461,7 +471,8 @@ lp_build_conv(LLVMBuilderRef builder, /* FIXME: compensate different offsets too */ if(src_shift > dst_shift) { - LLVMValueRef shift = lp_build_const_int_vec(tmp_type, src_shift - dst_shift); + LLVMValueRef shift = lp_build_const_int_vec(gallivm, tmp_type, + src_shift - dst_shift); for(i = 0; i < num_tmps; ++i) if(src_type.sign) tmp[i] = LLVMBuildAShr(builder, tmp[i], shift, ""); @@ -485,7 +496,7 @@ lp_build_conv(LLVMBuilderRef builder, new_type.width = dst_type.width; new_type.length = dst_type.length; - lp_build_resize(builder, tmp_type, new_type, tmp, num_srcs, tmp, num_dsts); + lp_build_resize(gallivm, tmp_type, new_type, tmp, num_srcs, tmp, num_dsts); tmp_type = new_type; num_tmps = num_dsts; @@ -501,7 +512,7 @@ lp_build_conv(LLVMBuilderRef builder, else if(!src_type.floating && dst_type.floating) { if(!src_type.fixed && !src_type.sign && src_type.norm) { for(i = 0; i < num_tmps; ++i) { - tmp[i] = lp_build_unsigned_norm_to_float(builder, + tmp[i] = lp_build_unsigned_norm_to_float(gallivm, src_type.width, dst_type, tmp[i]); @@ -515,7 +526,7 @@ lp_build_conv(LLVMBuilderRef builder, /* Use an equally sized integer for intermediate computations */ tmp_type.floating = TRUE; tmp_type.sign = TRUE; - tmp_vec_type = lp_build_vec_type(tmp_type); + tmp_vec_type = lp_build_vec_type(gallivm, tmp_type); for(i = 0; i < num_tmps; ++i) { #if 0 if(dst_type.sign) @@ -529,7 +540,7 @@ lp_build_conv(LLVMBuilderRef builder, } if (src_scale != 1.0) { - LLVMValueRef scale = lp_build_const_vec(tmp_type, 1.0/src_scale); + LLVMValueRef scale = lp_build_const_vec(gallivm, tmp_type, 1.0/src_scale); for(i = 0; i < num_tmps; ++i) tmp[i] = LLVMBuildFMul(builder, tmp[i], scale, ""); } @@ -541,7 +552,7 @@ lp_build_conv(LLVMBuilderRef builder, /* FIXME: compensate different offsets too */ if(src_shift < dst_shift) { - LLVMValueRef shift = lp_build_const_int_vec(tmp_type, dst_shift - src_shift); + LLVMValueRef shift = lp_build_const_int_vec(gallivm, tmp_type, dst_shift - src_shift); for(i = 0; i < num_tmps; ++i) tmp[i] = LLVMBuildShl(builder, tmp[i], shift, ""); } @@ -565,7 +576,7 @@ lp_build_conv(LLVMBuilderRef builder, * This is basically a very trimmed down version of lp_build_conv. */ void -lp_build_conv_mask(LLVMBuilderRef builder, +lp_build_conv_mask(struct gallivm_state *gallivm, struct lp_type src_type, struct lp_type dst_type, const LLVMValueRef *src, unsigned num_srcs, @@ -599,11 +610,11 @@ lp_build_conv_mask(LLVMBuilderRef builder, if(src_type.width > dst_type.width) { assert(num_dsts == 1); - dst[0] = lp_build_pack(builder, src_type, dst_type, TRUE, src, num_srcs); + dst[0] = lp_build_pack(gallivm, src_type, dst_type, TRUE, src, num_srcs); } else if(src_type.width < dst_type.width) { assert(num_srcs == 1); - lp_build_unpack(builder, src_type, dst_type, src[0], dst, num_dsts); + lp_build_unpack(gallivm, src_type, dst_type, src[0], dst, num_dsts); } else { assert(num_srcs == num_dsts); diff --git a/src/gallium/auxiliary/gallivm/lp_bld_conv.h b/src/gallium/auxiliary/gallivm/lp_bld_conv.h index 628831c3ada..cec655980fa 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_conv.h +++ b/src/gallium/auxiliary/gallivm/lp_bld_conv.h @@ -44,27 +44,27 @@ struct lp_type; LLVMValueRef -lp_build_clamped_float_to_unsigned_norm(LLVMBuilderRef builder, +lp_build_clamped_float_to_unsigned_norm(struct gallivm_state *gallivm, struct lp_type src_type, unsigned dst_width, LLVMValueRef src); LLVMValueRef -lp_build_unsigned_norm_to_float(LLVMBuilderRef builder, +lp_build_unsigned_norm_to_float(struct gallivm_state *gallivm, unsigned src_width, struct lp_type dst_type, LLVMValueRef src); void -lp_build_conv(LLVMBuilderRef builder, +lp_build_conv(struct gallivm_state *gallivm, struct lp_type src_type, struct lp_type dst_type, const LLVMValueRef *srcs, unsigned num_srcs, LLVMValueRef *dsts, unsigned num_dsts); void -lp_build_conv_mask(LLVMBuilderRef builder, +lp_build_conv_mask(struct gallivm_state *gallivm, struct lp_type src_type, struct lp_type dst_type, const LLVMValueRef *src, unsigned num_srcs, diff --git a/src/gallium/auxiliary/gallivm/lp_bld_debug.h b/src/gallium/auxiliary/gallivm/lp_bld_debug.h index eb11dcd4ef4..8a58f95b78f 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_debug.h +++ b/src/gallium/auxiliary/gallivm/lp_bld_debug.h @@ -42,6 +42,7 @@ #define GALLIVM_DEBUG_NO_OPT (1 << 3) #define GALLIVM_DEBUG_PERF (1 << 4) #define GALLIVM_DEBUG_NO_BRILINEAR (1 << 5) +#define GALLIVM_DEBUG_GC (1 << 6) #ifdef DEBUG diff --git a/src/gallium/auxiliary/gallivm/lp_bld_flow.c b/src/gallium/auxiliary/gallivm/lp_bld_flow.c index a2cee199a01..a9c9c7af10c 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_flow.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_flow.c @@ -34,6 +34,7 @@ #include "util/u_debug.h" #include "util/u_memory.h" +#include "lp_bld_init.h" #include "lp_bld_type.h" #include "lp_bld_flow.h" @@ -51,25 +52,25 @@ * be used elsewhere. */ LLVMBasicBlockRef -lp_build_insert_new_block(LLVMBuilderRef builder, const char *name) +lp_build_insert_new_block(struct gallivm_state *gallivm, const char *name) { LLVMBasicBlockRef current_block; LLVMBasicBlockRef next_block; LLVMBasicBlockRef new_block; /* get current basic block */ - current_block = LLVMGetInsertBlock(builder); + current_block = LLVMGetInsertBlock(gallivm->builder); /* check if there's another block after this one */ next_block = LLVMGetNextBasicBlock(current_block); if (next_block) { /* insert the new block before the next block */ - new_block = LLVMInsertBasicBlock(next_block, name); + new_block = LLVMInsertBasicBlockInContext(gallivm->context, next_block, name); } else { /* append new block after current block */ LLVMValueRef function = LLVMGetBasicBlockParent(current_block); - new_block = LLVMAppendBasicBlock(function, name); + new_block = LLVMAppendBasicBlockInContext(gallivm->context, function, name); } return new_block; @@ -82,12 +83,11 @@ lp_build_insert_new_block(LLVMBuilderRef builder, const char *name) */ void lp_build_flow_skip_begin(struct lp_build_skip_context *skip, - LLVMBuilderRef builder) + struct gallivm_state *gallivm) { - skip->builder = builder; - + skip->gallivm = gallivm; /* create new basic block */ - skip->block = lp_build_insert_new_block(skip->builder, "skip"); + skip->block = lp_build_insert_new_block(gallivm, "skip"); } @@ -101,12 +101,12 @@ lp_build_flow_skip_cond_break(struct lp_build_skip_context *skip, { LLVMBasicBlockRef new_block; - new_block = lp_build_insert_new_block(skip->builder, ""); + new_block = lp_build_insert_new_block(skip->gallivm, ""); /* if cond is true, goto skip->block, else goto new_block */ - LLVMBuildCondBr(skip->builder, cond, skip->block, new_block); + LLVMBuildCondBr(skip->gallivm->builder, cond, skip->block, new_block); - LLVMPositionBuilderAtEnd(skip->builder, new_block); + LLVMPositionBuilderAtEnd(skip->gallivm->builder, new_block); } @@ -114,8 +114,8 @@ void lp_build_flow_skip_end(struct lp_build_skip_context *skip) { /* goto block */ - LLVMBuildBr(skip->builder, skip->block); - LLVMPositionBuilderAtEnd(skip->builder, skip->block); + LLVMBuildBr(skip->gallivm->builder, skip->block); + LLVMPositionBuilderAtEnd(skip->gallivm->builder, skip->block); } @@ -125,7 +125,7 @@ lp_build_flow_skip_end(struct lp_build_skip_context *skip) void lp_build_mask_check(struct lp_build_mask_context *mask) { - LLVMBuilderRef builder = mask->skip.builder; + LLVMBuilderRef builder = mask->skip.gallivm->builder; LLVMValueRef value; LLVMValueRef cond; @@ -152,27 +152,27 @@ lp_build_mask_check(struct lp_build_mask_context *mask) */ void lp_build_mask_begin(struct lp_build_mask_context *mask, - LLVMBuilderRef builder, + struct gallivm_state *gallivm, struct lp_type type, LLVMValueRef value) { memset(mask, 0, sizeof *mask); - mask->reg_type = LLVMIntType(type.width * type.length); - mask->var = lp_build_alloca(builder, - lp_build_int_vec_type(type), + mask->reg_type = LLVMIntTypeInContext(gallivm->context, type.width * type.length); + mask->var = lp_build_alloca(gallivm, + lp_build_int_vec_type(gallivm, type), "execution_mask"); - LLVMBuildStore(builder, value, mask->var); + LLVMBuildStore(gallivm->builder, value, mask->var); - lp_build_flow_skip_begin(&mask->skip, builder); + lp_build_flow_skip_begin(&mask->skip, gallivm); } LLVMValueRef lp_build_mask_value(struct lp_build_mask_context *mask) { - return LLVMBuildLoad(mask->skip.builder, mask->var, ""); + return LLVMBuildLoad(mask->skip.gallivm->builder, mask->var, ""); } @@ -185,10 +185,10 @@ void lp_build_mask_update(struct lp_build_mask_context *mask, LLVMValueRef value) { - value = LLVMBuildAnd(mask->skip.builder, + value = LLVMBuildAnd(mask->skip.gallivm->builder, lp_build_mask_value(mask), value, ""); - LLVMBuildStore(mask->skip.builder, value, mask->var); + LLVMBuildStore(mask->skip.gallivm->builder, value, mask->var); } @@ -205,13 +205,17 @@ lp_build_mask_end(struct lp_build_mask_context *mask) void -lp_build_loop_begin(LLVMBuilderRef builder, - LLVMValueRef start, - struct lp_build_loop_state *state) +lp_build_loop_begin(struct lp_build_loop_state *state, + struct gallivm_state *gallivm, + LLVMValueRef start) + { - state->block = lp_build_insert_new_block(builder, "loop_begin"); + LLVMBuilderRef builder = gallivm->builder; + + state->block = lp_build_insert_new_block(gallivm, "loop_begin"); - state->counter_var = lp_build_alloca(builder, LLVMTypeOf(start), "loop_counter"); + state->counter_var = lp_build_alloca(gallivm, LLVMTypeOf(start), "loop_counter"); + state->gallivm = gallivm; LLVMBuildStore(builder, start, state->counter_var); @@ -224,12 +228,12 @@ lp_build_loop_begin(LLVMBuilderRef builder, void -lp_build_loop_end_cond(LLVMBuilderRef builder, +lp_build_loop_end_cond(struct lp_build_loop_state *state, LLVMValueRef end, LLVMValueRef step, - LLVMIntPredicate llvm_cond, - struct lp_build_loop_state *state) + LLVMIntPredicate llvm_cond) { + LLVMBuilderRef builder = state->gallivm->builder; LLVMValueRef next; LLVMValueRef cond; LLVMBasicBlockRef after_block; @@ -243,7 +247,7 @@ lp_build_loop_end_cond(LLVMBuilderRef builder, cond = LLVMBuildICmp(builder, llvm_cond, next, end, ""); - after_block = lp_build_insert_new_block(builder, "loop_end"); + after_block = lp_build_insert_new_block(state->gallivm, "loop_end"); LLVMBuildCondBr(builder, cond, after_block, state->block); @@ -254,12 +258,11 @@ lp_build_loop_end_cond(LLVMBuilderRef builder, void -lp_build_loop_end(LLVMBuilderRef builder, +lp_build_loop_end(struct lp_build_loop_state *state, LLVMValueRef end, - LLVMValueRef step, - struct lp_build_loop_state *state) + LLVMValueRef step) { - lp_build_loop_end_cond(builder, end, step, LLVMIntNE, state); + lp_build_loop_end_cond(state, end, step, LLVMIntNE); } @@ -296,24 +299,27 @@ lp_build_loop_end(LLVMBuilderRef builder, */ void lp_build_if(struct lp_build_if_state *ifthen, - LLVMBuilderRef builder, + struct gallivm_state *gallivm, LLVMValueRef condition) { - LLVMBasicBlockRef block = LLVMGetInsertBlock(builder); + LLVMBasicBlockRef block = LLVMGetInsertBlock(gallivm->builder); memset(ifthen, 0, sizeof *ifthen); - ifthen->builder = builder; + ifthen->gallivm = gallivm; ifthen->condition = condition; ifthen->entry_block = block; /* create endif/merge basic block for the phi functions */ - ifthen->merge_block = lp_build_insert_new_block(builder, "endif-block"); + ifthen->merge_block = lp_build_insert_new_block(gallivm, "endif-block"); /* create/insert true_block before merge_block */ - ifthen->true_block = LLVMInsertBasicBlock(ifthen->merge_block, "if-true-block"); + ifthen->true_block = + LLVMInsertBasicBlockInContext(gallivm->context, + ifthen->merge_block, + "if-true-block"); /* successive code goes into the true block */ - LLVMPositionBuilderAtEnd(builder, ifthen->true_block); + LLVMPositionBuilderAtEnd(gallivm->builder, ifthen->true_block); } @@ -323,14 +329,19 @@ lp_build_if(struct lp_build_if_state *ifthen, void lp_build_else(struct lp_build_if_state *ifthen) { + LLVMBuilderRef builder = ifthen->gallivm->builder; + /* Append an unconditional Br(anch) instruction on the true_block */ - LLVMBuildBr(ifthen->builder, ifthen->merge_block); + LLVMBuildBr(builder, ifthen->merge_block); /* create/insert false_block before the merge block */ - ifthen->false_block = LLVMInsertBasicBlock(ifthen->merge_block, "if-false-block"); + ifthen->false_block = + LLVMInsertBasicBlockInContext(ifthen->gallivm->context, + ifthen->merge_block, + "if-false-block"); /* successive code goes into the else block */ - LLVMPositionBuilderAtEnd(ifthen->builder, ifthen->false_block); + LLVMPositionBuilderAtEnd(builder, ifthen->false_block); } @@ -340,28 +351,30 @@ lp_build_else(struct lp_build_if_state *ifthen) void lp_build_endif(struct lp_build_if_state *ifthen) { + LLVMBuilderRef builder = ifthen->gallivm->builder; + /* Insert branch to the merge block from current block */ - LLVMBuildBr(ifthen->builder, ifthen->merge_block); + LLVMBuildBr(builder, ifthen->merge_block); /* * Now patch in the various branch instructions. */ /* Insert the conditional branch instruction at the end of entry_block */ - LLVMPositionBuilderAtEnd(ifthen->builder, ifthen->entry_block); + LLVMPositionBuilderAtEnd(builder, ifthen->entry_block); if (ifthen->false_block) { /* we have an else clause */ - LLVMBuildCondBr(ifthen->builder, ifthen->condition, + LLVMBuildCondBr(builder, ifthen->condition, ifthen->true_block, ifthen->false_block); } else { /* no else clause */ - LLVMBuildCondBr(ifthen->builder, ifthen->condition, + LLVMBuildCondBr(builder, ifthen->condition, ifthen->true_block, ifthen->merge_block); } /* Resume building code at end of the ifthen->merge_block */ - LLVMPositionBuilderAtEnd(ifthen->builder, ifthen->merge_block); + LLVMPositionBuilderAtEnd(builder, ifthen->merge_block); } @@ -381,15 +394,16 @@ lp_build_endif(struct lp_build_if_state *ifthen) * - http://www.llvm.org/docs/tutorial/OCamlLangImpl7.html#memory */ LLVMValueRef -lp_build_alloca(LLVMBuilderRef builder, +lp_build_alloca(struct gallivm_state *gallivm, LLVMTypeRef type, const char *name) { + LLVMBuilderRef builder = gallivm->builder; LLVMBasicBlockRef current_block = LLVMGetInsertBlock(builder); LLVMValueRef function = LLVMGetBasicBlockParent(current_block); LLVMBasicBlockRef first_block = LLVMGetEntryBasicBlock(function); LLVMValueRef first_instr = LLVMGetFirstInstruction(first_block); - LLVMBuilderRef first_builder = LLVMCreateBuilder(); + LLVMBuilderRef first_builder = LLVMCreateBuilderInContext(gallivm->context); LLVMValueRef res; if (first_instr) { @@ -422,16 +436,17 @@ lp_build_alloca(LLVMBuilderRef builder, * - http://www.llvm.org/docs/tutorial/OCamlLangImpl7.html#memory */ LLVMValueRef -lp_build_array_alloca(LLVMBuilderRef builder, +lp_build_array_alloca(struct gallivm_state *gallivm, LLVMTypeRef type, LLVMValueRef count, const char *name) { + LLVMBuilderRef builder = gallivm->builder; LLVMBasicBlockRef current_block = LLVMGetInsertBlock(builder); LLVMValueRef function = LLVMGetBasicBlockParent(current_block); LLVMBasicBlockRef first_block = LLVMGetEntryBasicBlock(function); LLVMValueRef first_instr = LLVMGetFirstInstruction(first_block); - LLVMBuilderRef first_builder = LLVMCreateBuilder(); + LLVMBuilderRef first_builder = LLVMCreateBuilderInContext(gallivm->context); LLVMValueRef res; if (first_instr) { diff --git a/src/gallium/auxiliary/gallivm/lp_bld_flow.h b/src/gallium/auxiliary/gallivm/lp_bld_flow.h index e729ee6eaac..3cd5a9f42a5 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_flow.h +++ b/src/gallium/auxiliary/gallivm/lp_bld_flow.h @@ -47,7 +47,7 @@ struct lp_type; */ struct lp_build_skip_context { - LLVMBuilderRef builder; + struct gallivm_state *gallivm; /** Block to skip to */ LLVMBasicBlockRef block; @@ -55,7 +55,7 @@ struct lp_build_skip_context void lp_build_flow_skip_begin(struct lp_build_skip_context *ctx, - LLVMBuilderRef builder); + struct gallivm_state *gallivm); void lp_build_flow_skip_cond_break(struct lp_build_skip_context *ctx, @@ -77,7 +77,7 @@ struct lp_build_mask_context void lp_build_mask_begin(struct lp_build_mask_context *mask, - LLVMBuilderRef builder, + struct gallivm_state *gallivm, struct lp_type type, LLVMValueRef value); @@ -107,31 +107,28 @@ lp_build_mask_end(struct lp_build_mask_context *mask); */ struct lp_build_loop_state { - LLVMBasicBlockRef block; - LLVMValueRef counter_var; - LLVMValueRef counter; + LLVMBasicBlockRef block; + LLVMValueRef counter_var; + LLVMValueRef counter; + struct gallivm_state *gallivm; }; void -lp_build_loop_begin(LLVMBuilderRef builder, - LLVMValueRef start, - struct lp_build_loop_state *state); - +lp_build_loop_begin(struct lp_build_loop_state *state, + struct gallivm_state *gallivm, + LLVMValueRef start); void -lp_build_loop_end(LLVMBuilderRef builder, +lp_build_loop_end(struct lp_build_loop_state *state, LLVMValueRef end, - LLVMValueRef step, - struct lp_build_loop_state *state); + LLVMValueRef step); void -lp_build_loop_end_cond(LLVMBuilderRef builder, +lp_build_loop_end_cond(struct lp_build_loop_state *state, LLVMValueRef end, LLVMValueRef step, - LLVMIntPredicate cond, - struct lp_build_loop_state *state); - + LLVMIntPredicate cond); @@ -140,7 +137,7 @@ lp_build_loop_end_cond(LLVMBuilderRef builder, */ struct lp_build_if_state { - LLVMBuilderRef builder; + struct gallivm_state *gallivm; LLVMValueRef condition; LLVMBasicBlockRef entry_block; LLVMBasicBlockRef true_block; @@ -151,7 +148,7 @@ struct lp_build_if_state void lp_build_if(struct lp_build_if_state *ctx, - LLVMBuilderRef builder, + struct gallivm_state *gallivm, LLVMValueRef condition); void @@ -161,15 +158,15 @@ void lp_build_endif(struct lp_build_if_state *ctx); LLVMBasicBlockRef -lp_build_insert_new_block(LLVMBuilderRef builder, const char *name); +lp_build_insert_new_block(struct gallivm_state *gallivm, const char *name); LLVMValueRef -lp_build_alloca(LLVMBuilderRef builder, +lp_build_alloca(struct gallivm_state *gallivm, LLVMTypeRef type, const char *name); LLVMValueRef -lp_build_array_alloca(LLVMBuilderRef builder, +lp_build_array_alloca(struct gallivm_state *gallivm, LLVMTypeRef type, LLVMValueRef count, const char *name); diff --git a/src/gallium/auxiliary/gallivm/lp_bld_format.h b/src/gallium/auxiliary/gallivm/lp_bld_format.h index 60e22d727ad..04142d905b1 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_format.h +++ b/src/gallium/auxiliary/gallivm/lp_bld_format.h @@ -35,6 +35,7 @@ */ #include "gallivm/lp_bld.h" +#include "gallivm/lp_bld_init.h" #include "pipe/p_format.h" @@ -53,12 +54,12 @@ lp_build_format_swizzle_aos(const struct util_format_description *desc, LLVMValueRef unswizzled); LLVMValueRef -lp_build_pack_rgba_aos(LLVMBuilderRef builder, +lp_build_pack_rgba_aos(struct gallivm_state *gallivm, const struct util_format_description *desc, LLVMValueRef rgba); LLVMValueRef -lp_build_fetch_rgba_aos(LLVMBuilderRef builder, +lp_build_fetch_rgba_aos(struct gallivm_state *gallivm, const struct util_format_description *format_desc, struct lp_type type, LLVMValueRef base_ptr, @@ -78,20 +79,20 @@ lp_build_format_swizzle_soa(const struct util_format_description *format_desc, LLVMValueRef swizzled_out[4]); void -lp_build_unpack_rgba_soa(LLVMBuilderRef builder, +lp_build_unpack_rgba_soa(struct gallivm_state *gallivm, const struct util_format_description *format_desc, struct lp_type type, LLVMValueRef packed, LLVMValueRef rgba_out[4]); void -lp_build_rgba8_to_f32_soa(LLVMBuilderRef builder, +lp_build_rgba8_to_f32_soa(struct gallivm_state *gallivm, struct lp_type dst_type, LLVMValueRef packed, LLVMValueRef *rgba); void -lp_build_fetch_rgba_soa(LLVMBuilderRef builder, +lp_build_fetch_rgba_soa(struct gallivm_state *gallivm, const struct util_format_description *format_desc, struct lp_type type, LLVMValueRef base_ptr, @@ -106,7 +107,7 @@ lp_build_fetch_rgba_soa(LLVMBuilderRef builder, LLVMValueRef -lp_build_fetch_subsampled_rgba_aos(LLVMBuilderRef builder, +lp_build_fetch_subsampled_rgba_aos(struct gallivm_state *gallivm, const struct util_format_description *format_desc, unsigned n, LLVMValueRef base_ptr, diff --git a/src/gallium/auxiliary/gallivm/lp_bld_format_aos.c b/src/gallium/auxiliary/gallivm/lp_bld_format_aos.c index 6b9189e1da5..82ab19eda14 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_format_aos.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_format_aos.c @@ -36,6 +36,7 @@ #include "util/u_format.h" #include "util/u_memory.h" #include "util/u_math.h" +#include "util/u_pointer.h" #include "util/u_string.h" #include "lp_bld_arit.h" @@ -145,10 +146,11 @@ format_matches_type(const struct util_format_description *desc, * @return RGBA in a float[4] or ubyte[4] or ushort[4] vector. */ static INLINE LLVMValueRef -lp_build_unpack_arith_rgba_aos(LLVMBuilderRef builder, +lp_build_unpack_arith_rgba_aos(struct gallivm_state *gallivm, const struct util_format_description *desc, LLVMValueRef packed) { + LLVMBuilderRef builder = gallivm->builder; LLVMValueRef shifted, casted, scaled, masked; LLVMValueRef shifts[4]; LLVMValueRef masks[4]; @@ -167,21 +169,21 @@ lp_build_unpack_arith_rgba_aos(LLVMBuilderRef builder, /* Do the intermediate integer computations with 32bit integers since it * matches floating point size */ - assert (LLVMTypeOf(packed) == LLVMInt32Type()); + assert (LLVMTypeOf(packed) == LLVMInt32TypeInContext(gallivm->context)); /* Broadcast the packed value to all four channels * before: packed = BGRA * after: packed = {BGRA, BGRA, BGRA, BGRA} */ packed = LLVMBuildInsertElement(builder, - LLVMGetUndef(LLVMVectorType(LLVMInt32Type(), 4)), + LLVMGetUndef(LLVMVectorType(LLVMInt32TypeInContext(gallivm->context), 4)), packed, - LLVMConstNull(LLVMInt32Type()), + LLVMConstNull(LLVMInt32TypeInContext(gallivm->context)), ""); packed = LLVMBuildShuffleVector(builder, packed, - LLVMGetUndef(LLVMVectorType(LLVMInt32Type(), 4)), - LLVMConstNull(LLVMVectorType(LLVMInt32Type(), 4)), + LLVMGetUndef(LLVMVectorType(LLVMInt32TypeInContext(gallivm->context), 4)), + LLVMConstNull(LLVMVectorType(LLVMInt32TypeInContext(gallivm->context), 4)), ""); /* Initialize vector constants */ @@ -194,9 +196,9 @@ lp_build_unpack_arith_rgba_aos(LLVMBuilderRef builder, unsigned bits = desc->channel[i].size; if (desc->channel[i].type == UTIL_FORMAT_TYPE_VOID) { - shifts[i] = LLVMGetUndef(LLVMInt32Type()); - masks[i] = LLVMConstNull(LLVMInt32Type()); - scales[i] = LLVMConstNull(LLVMFloatType()); + shifts[i] = LLVMGetUndef(LLVMInt32TypeInContext(gallivm->context)); + masks[i] = LLVMConstNull(LLVMInt32TypeInContext(gallivm->context)); + scales[i] = LLVMConstNull(LLVMFloatTypeInContext(gallivm->context)); } else { unsigned long long mask = (1ULL << bits) - 1; @@ -207,15 +209,15 @@ lp_build_unpack_arith_rgba_aos(LLVMBuilderRef builder, needs_uitofp = TRUE; } - shifts[i] = LLVMConstInt(LLVMInt32Type(), shift, 0); - masks[i] = LLVMConstInt(LLVMInt32Type(), mask, 0); + shifts[i] = lp_build_const_int32(gallivm, shift); + masks[i] = lp_build_const_int32(gallivm, mask); if (desc->channel[i].normalized) { - scales[i] = LLVMConstReal(LLVMFloatType(), 1.0/mask); + scales[i] = lp_build_const_float(gallivm, 1.0 / mask); normalized = TRUE; } else - scales[i] = LLVMConstReal(LLVMFloatType(), 1.0); + scales[i] = lp_build_const_float(gallivm, 1.0); } shift += bits; @@ -230,9 +232,9 @@ lp_build_unpack_arith_rgba_aos(LLVMBuilderRef builder, if (!needs_uitofp) { /* UIToFP can't be expressed in SSE2 */ - casted = LLVMBuildSIToFP(builder, masked, LLVMVectorType(LLVMFloatType(), 4), ""); + casted = LLVMBuildSIToFP(builder, masked, LLVMVectorType(LLVMFloatTypeInContext(gallivm->context), 4), ""); } else { - casted = LLVMBuildUIToFP(builder, masked, LLVMVectorType(LLVMFloatType(), 4), ""); + casted = LLVMBuildUIToFP(builder, masked, LLVMVectorType(LLVMFloatTypeInContext(gallivm->context), 4), ""); } /* At this point 'casted' may be a vector of floats such as @@ -258,10 +260,11 @@ lp_build_unpack_arith_rgba_aos(LLVMBuilderRef builder, * a time is rarely if ever needed. */ LLVMValueRef -lp_build_pack_rgba_aos(LLVMBuilderRef builder, +lp_build_pack_rgba_aos(struct gallivm_state *gallivm, const struct util_format_description *desc, LLVMValueRef rgba) { + LLVMBuilderRef builder = gallivm->builder; LLVMTypeRef type; LLVMValueRef packed = NULL; LLVMValueRef swizzles[4]; @@ -276,7 +279,7 @@ lp_build_pack_rgba_aos(LLVMBuilderRef builder, assert(desc->block.width == 1); assert(desc->block.height == 1); - type = LLVMIntType(desc->block.bits); + type = LLVMIntTypeInContext(gallivm->context, desc->block.bits); /* Unswizzle the color components into the source vector. */ for (i = 0; i < 4; ++i) { @@ -285,13 +288,13 @@ lp_build_pack_rgba_aos(LLVMBuilderRef builder, break; } if (j < 4) - swizzles[i] = LLVMConstInt(LLVMInt32Type(), j, 0); + swizzles[i] = lp_build_const_int32(gallivm, j); else - swizzles[i] = LLVMGetUndef(LLVMInt32Type()); + swizzles[i] = LLVMGetUndef(LLVMInt32TypeInContext(gallivm->context)); } unswizzled = LLVMBuildShuffleVector(builder, rgba, - LLVMGetUndef(LLVMVectorType(LLVMFloatType(), 4)), + LLVMGetUndef(LLVMVectorType(LLVMFloatTypeInContext(gallivm->context), 4)), LLVMConstVector(swizzles, 4), ""); normalized = FALSE; @@ -300,8 +303,8 @@ lp_build_pack_rgba_aos(LLVMBuilderRef builder, unsigned bits = desc->channel[i].size; if (desc->channel[i].type == UTIL_FORMAT_TYPE_VOID) { - shifts[i] = LLVMGetUndef(LLVMInt32Type()); - scales[i] = LLVMGetUndef(LLVMFloatType()); + shifts[i] = LLVMGetUndef(LLVMInt32TypeInContext(gallivm->context)); + scales[i] = LLVMGetUndef(LLVMFloatTypeInContext(gallivm->context)); } else { unsigned mask = (1 << bits) - 1; @@ -309,14 +312,14 @@ lp_build_pack_rgba_aos(LLVMBuilderRef builder, assert(desc->channel[i].type == UTIL_FORMAT_TYPE_UNSIGNED); assert(bits < 32); - shifts[i] = LLVMConstInt(LLVMInt32Type(), shift, 0); + shifts[i] = lp_build_const_int32(gallivm, shift); if (desc->channel[i].normalized) { - scales[i] = LLVMConstReal(LLVMFloatType(), mask); + scales[i] = lp_build_const_float(gallivm, mask); normalized = TRUE; } else - scales[i] = LLVMConstReal(LLVMFloatType(), 1.0); + scales[i] = lp_build_const_float(gallivm, 1.0); } shift += bits; @@ -327,14 +330,15 @@ lp_build_pack_rgba_aos(LLVMBuilderRef builder, else scaled = unswizzled; - casted = LLVMBuildFPToSI(builder, scaled, LLVMVectorType(LLVMInt32Type(), 4), ""); + casted = LLVMBuildFPToSI(builder, scaled, LLVMVectorType(LLVMInt32TypeInContext(gallivm->context), 4), ""); shifted = LLVMBuildShl(builder, casted, LLVMConstVector(shifts, 4), ""); /* Bitwise or all components */ for (i = 0; i < 4; ++i) { if (desc->channel[i].type == UTIL_FORMAT_TYPE_UNSIGNED) { - LLVMValueRef component = LLVMBuildExtractElement(builder, shifted, LLVMConstInt(LLVMInt32Type(), i, 0), ""); + LLVMValueRef component = LLVMBuildExtractElement(builder, shifted, + lp_build_const_int32(gallivm, i), ""); if (packed) packed = LLVMBuildOr(builder, packed, component, ""); else @@ -343,7 +347,7 @@ lp_build_pack_rgba_aos(LLVMBuilderRef builder, } if (!packed) - packed = LLVMGetUndef(LLVMInt32Type()); + packed = LLVMGetUndef(LLVMInt32TypeInContext(gallivm->context)); if (desc->block.bits < 32) packed = LLVMBuildTrunc(builder, packed, type, ""); @@ -364,7 +368,7 @@ lp_build_pack_rgba_aos(LLVMBuilderRef builder, * \return a 4 element vector with the pixel's RGBA values. */ LLVMValueRef -lp_build_fetch_rgba_aos(LLVMBuilderRef builder, +lp_build_fetch_rgba_aos(struct gallivm_state *gallivm, const struct util_format_description *format_desc, struct lp_type type, LLVMValueRef base_ptr, @@ -372,13 +376,14 @@ lp_build_fetch_rgba_aos(LLVMBuilderRef builder, LLVMValueRef i, LLVMValueRef j) { + LLVMBuilderRef builder = gallivm->builder; unsigned num_pixels = type.length / 4; struct lp_build_context bld; assert(type.length <= LP_MAX_VECTOR_LENGTH); assert(type.length % 4 == 0); - lp_build_context_init(&bld, builder, type); + lp_build_context_init(&bld, gallivm, type); /* * Trivial case @@ -397,13 +402,14 @@ lp_build_fetch_rgba_aos(LLVMBuilderRef builder, * scaling or converting. */ - packed = lp_build_gather(builder, type.length/4, + packed = lp_build_gather(gallivm, type.length/4, format_desc->block.bits, type.width*4, base_ptr, offset); assert(format_desc->block.bits <= type.width * type.length); - packed = LLVMBuildBitCast(builder, packed, lp_build_vec_type(type), ""); + packed = LLVMBuildBitCast(gallivm->builder, packed, + lp_build_vec_type(gallivm, type), ""); return lp_build_format_swizzle_aos(format_desc, &bld, packed); } @@ -435,11 +441,12 @@ lp_build_fetch_rgba_aos(LLVMBuilderRef builder, for (k = 0; k < num_pixels; ++k) { LLVMValueRef packed; - packed = lp_build_gather_elem(builder, num_pixels, + packed = lp_build_gather_elem(gallivm, num_pixels, format_desc->block.bits, 32, base_ptr, offset, k); - tmps[k] = lp_build_unpack_arith_rgba_aos(builder, format_desc, + tmps[k] = lp_build_unpack_arith_rgba_aos(gallivm, + format_desc, packed); } @@ -455,7 +462,7 @@ lp_build_fetch_rgba_aos(LLVMBuilderRef builder, __FUNCTION__, format_desc->short_name); } - lp_build_conv(builder, + lp_build_conv(gallivm, lp_float32_vec4_type(), type, tmps, num_pixels, &res, 1); @@ -476,14 +483,14 @@ lp_build_fetch_rgba_aos(LLVMBuilderRef builder, tmp_type.length = num_pixels * 4; tmp_type.norm = TRUE; - tmp = lp_build_fetch_subsampled_rgba_aos(builder, + tmp = lp_build_fetch_subsampled_rgba_aos(gallivm, format_desc, num_pixels, base_ptr, offset, i, j); - lp_build_conv(builder, + lp_build_conv(gallivm, tmp_type, type, &tmp, 1, &tmp, 1); @@ -505,51 +512,52 @@ lp_build_fetch_rgba_aos(LLVMBuilderRef builder, * or incentive to optimize. */ - LLVMModuleRef module = LLVMGetGlobalParent(LLVMGetBasicBlockParent(LLVMGetInsertBlock(builder))); - char name[256]; - LLVMTypeRef i8t = LLVMInt8Type(); + LLVMTypeRef i8t = LLVMInt8TypeInContext(gallivm->context); LLVMTypeRef pi8t = LLVMPointerType(i8t, 0); - LLVMTypeRef i32t = LLVMInt32Type(); + LLVMTypeRef i32t = LLVMInt32TypeInContext(gallivm->context); LLVMValueRef function; LLVMValueRef tmp_ptr; LLVMValueRef tmp; LLVMValueRef res; unsigned k; - util_snprintf(name, sizeof name, "util_format_%s_fetch_rgba_8unorm", - format_desc->short_name); - if (gallivm_debug & GALLIVM_DEBUG_PERF) { - debug_printf("%s: falling back to %s\n", __FUNCTION__, name); + debug_printf("%s: falling back to util_format_%s_fetch_rgba_8unorm\n", + __FUNCTION__, format_desc->short_name); } /* * Declare and bind format_desc->fetch_rgba_8unorm(). */ - function = LLVMGetNamedFunction(module, name); - if (!function) { + { + /* + * Function to call looks like: + * fetch(uint8_t *dst, const uint8_t *src, unsigned i, unsigned j) + */ LLVMTypeRef ret_type; LLVMTypeRef arg_types[4]; LLVMTypeRef function_type; - ret_type = LLVMVoidType(); + ret_type = LLVMVoidTypeInContext(gallivm->context); arg_types[0] = pi8t; arg_types[1] = pi8t; - arg_types[3] = arg_types[2] = LLVMIntType(sizeof(unsigned) * 8); - function_type = LLVMFunctionType(ret_type, arg_types, Elements(arg_types), 0); - function = LLVMAddFunction(module, name, function_type); - - LLVMSetFunctionCallConv(function, LLVMCCallConv); - LLVMSetLinkage(function, LLVMExternalLinkage); - - assert(LLVMIsDeclaration(function)); - - LLVMAddGlobalMapping(lp_build_engine, function, - func_to_pointer((func_pointer)format_desc->fetch_rgba_8unorm)); + arg_types[2] = i32t; + arg_types[3] = i32t; + function_type = LLVMFunctionType(ret_type, arg_types, + Elements(arg_types), 0); + + /* make const pointer for the C fetch_rgba_8unorm function */ + function = lp_build_const_int_pointer(gallivm, + func_to_pointer((func_pointer) format_desc->fetch_rgba_8unorm)); + + /* cast the callee pointer to the function's type */ + function = LLVMBuildBitCast(builder, function, + LLVMPointerType(function_type, 0), + "cast callee"); } - tmp_ptr = lp_build_alloca(builder, i32t, ""); + tmp_ptr = lp_build_alloca(gallivm, i32t, ""); res = LLVMGetUndef(LLVMVectorType(i32t, num_pixels)); @@ -559,11 +567,11 @@ lp_build_fetch_rgba_aos(LLVMBuilderRef builder, */ for (k = 0; k < num_pixels; ++k) { - LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), k, 0); + LLVMValueRef index = lp_build_const_int32(gallivm, k); LLVMValueRef args[4]; args[0] = LLVMBuildBitCast(builder, tmp_ptr, pi8t, ""); - args[1] = lp_build_gather_elem_ptr(builder, num_pixels, + args[1] = lp_build_gather_elem_ptr(gallivm, num_pixels, base_ptr, offset, k); if (num_pixels == 1) { @@ -608,51 +616,58 @@ lp_build_fetch_rgba_aos(LLVMBuilderRef builder, * or incentive to optimize. */ - LLVMModuleRef module = LLVMGetGlobalParent(LLVMGetBasicBlockParent(LLVMGetInsertBlock(builder))); - char name[256]; - LLVMTypeRef f32t = LLVMFloatType(); + LLVMTypeRef f32t = LLVMFloatTypeInContext(gallivm->context); LLVMTypeRef f32x4t = LLVMVectorType(f32t, 4); LLVMTypeRef pf32t = LLVMPointerType(f32t, 0); + LLVMTypeRef pi8t = LLVMPointerType(LLVMInt8TypeInContext(gallivm->context), 0); + LLVMTypeRef i32t = LLVMInt32TypeInContext(gallivm->context); LLVMValueRef function; LLVMValueRef tmp_ptr; LLVMValueRef tmps[LP_MAX_VECTOR_LENGTH/4]; LLVMValueRef res; unsigned k; - util_snprintf(name, sizeof name, "util_format_%s_fetch_rgba_float", - format_desc->short_name); - if (gallivm_debug & GALLIVM_DEBUG_PERF) { - debug_printf("%s: falling back to %s\n", __FUNCTION__, name); + debug_printf("%s: falling back to util_format_%s_fetch_rgba_float\n", + __FUNCTION__, format_desc->short_name); } /* * Declare and bind format_desc->fetch_rgba_float(). */ - function = LLVMGetNamedFunction(module, name); - if (!function) { + { + /* + * Function to call looks like: + * fetch(float *dst, const uint8_t *src, unsigned i, unsigned j) + */ LLVMTypeRef ret_type; LLVMTypeRef arg_types[4]; LLVMTypeRef function_type; - ret_type = LLVMVoidType(); + ret_type = LLVMVoidTypeInContext(gallivm->context); arg_types[0] = pf32t; - arg_types[1] = LLVMPointerType(LLVMInt8Type(), 0); - arg_types[3] = arg_types[2] = LLVMIntType(sizeof(unsigned) * 8); - function_type = LLVMFunctionType(ret_type, arg_types, Elements(arg_types), 0); - function = LLVMAddFunction(module, name, function_type); + arg_types[1] = pi8t; + arg_types[2] = i32t; + arg_types[3] = i32t; + function_type = LLVMFunctionType(ret_type, arg_types, + Elements(arg_types), 0); - LLVMSetFunctionCallConv(function, LLVMCCallConv); - LLVMSetLinkage(function, LLVMExternalLinkage); + /* Note: we're using this casting here instead of LLVMAddGlobalMapping() + * to work around a bug in LLVM 2.6, and for efficiency/simplicity. + */ - assert(LLVMIsDeclaration(function)); + /* make const pointer for the C fetch_rgba_float function */ + function = lp_build_const_int_pointer(gallivm, + func_to_pointer((func_pointer) format_desc->fetch_rgba_float)); - LLVMAddGlobalMapping(lp_build_engine, function, - func_to_pointer((func_pointer)format_desc->fetch_rgba_float)); + /* cast the callee pointer to the function's type */ + function = LLVMBuildBitCast(builder, function, + LLVMPointerType(function_type, 0), + "cast callee"); } - tmp_ptr = lp_build_alloca(builder, f32x4t, ""); + tmp_ptr = lp_build_alloca(gallivm, f32x4t, ""); /* * Invoke format_desc->fetch_rgba_float() for each pixel and insert the result @@ -663,7 +678,7 @@ lp_build_fetch_rgba_aos(LLVMBuilderRef builder, LLVMValueRef args[4]; args[0] = LLVMBuildBitCast(builder, tmp_ptr, pf32t, ""); - args[1] = lp_build_gather_elem_ptr(builder, num_pixels, + args[1] = lp_build_gather_elem_ptr(gallivm, num_pixels, base_ptr, offset, k); if (num_pixels == 1) { @@ -671,7 +686,7 @@ lp_build_fetch_rgba_aos(LLVMBuilderRef builder, args[3] = j; } else { - LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), k, 0); + LLVMValueRef index = lp_build_const_int32(gallivm, k); args[2] = LLVMBuildExtractElement(builder, i, index, ""); args[3] = LLVMBuildExtractElement(builder, j, index, ""); } @@ -681,7 +696,7 @@ lp_build_fetch_rgba_aos(LLVMBuilderRef builder, tmps[k] = LLVMBuildLoad(builder, tmp_ptr, ""); } - lp_build_conv(builder, + lp_build_conv(gallivm, lp_float32_vec4_type(), type, tmps, num_pixels, &res, 1); @@ -690,5 +705,5 @@ lp_build_fetch_rgba_aos(LLVMBuilderRef builder, } assert(0); - return lp_build_undef(type); + return lp_build_undef(gallivm, type); } diff --git a/src/gallium/auxiliary/gallivm/lp_bld_format_soa.c b/src/gallium/auxiliary/gallivm/lp_bld_format_soa.c index ce7e54afc76..0a57b3ce794 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_format_soa.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_format_soa.c @@ -97,12 +97,13 @@ lp_build_format_swizzle_soa(const struct util_format_description *format_desc, * \param rgba_out returns the SoA R,G,B,A vectors */ void -lp_build_unpack_rgba_soa(LLVMBuilderRef builder, +lp_build_unpack_rgba_soa(struct gallivm_state *gallivm, const struct util_format_description *format_desc, struct lp_type type, LLVMValueRef packed, LLVMValueRef rgba_out[4]) { + LLVMBuilderRef builder = gallivm->builder; struct lp_build_context bld; LLVMValueRef inputs[4]; unsigned start; @@ -116,7 +117,7 @@ lp_build_unpack_rgba_soa(LLVMBuilderRef builder, assert(type.floating); assert(type.width == 32); - lp_build_context_init(&bld, builder, type); + lp_build_context_init(&bld, gallivm, type); /* Decode the input vector components */ start = 0; @@ -129,7 +130,7 @@ lp_build_unpack_rgba_soa(LLVMBuilderRef builder, switch(format_desc->channel[chan].type) { case UTIL_FORMAT_TYPE_VOID: - input = lp_build_undef(type); + input = lp_build_undef(gallivm, type); break; case UTIL_FORMAT_TYPE_UNSIGNED: @@ -138,7 +139,7 @@ lp_build_unpack_rgba_soa(LLVMBuilderRef builder, */ if (start) { - input = LLVMBuildLShr(builder, input, lp_build_const_int_vec(type, start), ""); + input = LLVMBuildLShr(builder, input, lp_build_const_int_vec(gallivm, type, start), ""); } /* @@ -147,7 +148,7 @@ lp_build_unpack_rgba_soa(LLVMBuilderRef builder, if (stop < format_desc->block.bits) { unsigned mask = ((unsigned long long)1 << width) - 1; - input = LLVMBuildAnd(builder, input, lp_build_const_int_vec(type, mask), ""); + input = LLVMBuildAnd(builder, input, lp_build_const_int_vec(gallivm, type, mask), ""); } /* @@ -156,14 +157,15 @@ lp_build_unpack_rgba_soa(LLVMBuilderRef builder, if (type.floating) { if(format_desc->channel[chan].normalized) - input = lp_build_unsigned_norm_to_float(builder, width, type, input); + input = lp_build_unsigned_norm_to_float(gallivm, width, type, input); else - input = LLVMBuildSIToFP(builder, input, lp_build_vec_type(type), ""); + input = LLVMBuildSIToFP(builder, input, + lp_build_vec_type(gallivm, type), ""); } else { /* FIXME */ assert(0); - input = lp_build_undef(type); + input = lp_build_undef(gallivm, type); } break; @@ -175,7 +177,7 @@ lp_build_unpack_rgba_soa(LLVMBuilderRef builder, if (stop < type.width) { unsigned bits = type.width - stop; - LLVMValueRef bits_val = lp_build_const_int_vec(type, bits); + LLVMValueRef bits_val = lp_build_const_int_vec(gallivm, type, bits); input = LLVMBuildShl(builder, input, bits_val, ""); } @@ -185,7 +187,7 @@ lp_build_unpack_rgba_soa(LLVMBuilderRef builder, if (format_desc->channel[chan].size < type.width) { unsigned bits = type.width - format_desc->channel[chan].size; - LLVMValueRef bits_val = lp_build_const_int_vec(type, bits); + LLVMValueRef bits_val = lp_build_const_int_vec(gallivm, type, bits); input = LLVMBuildAShr(builder, input, bits_val, ""); } @@ -194,17 +196,17 @@ lp_build_unpack_rgba_soa(LLVMBuilderRef builder, */ if (type.floating) { - input = LLVMBuildSIToFP(builder, input, lp_build_vec_type(type), ""); + input = LLVMBuildSIToFP(builder, input, lp_build_vec_type(gallivm, type), ""); if (format_desc->channel[chan].normalized) { double scale = 1.0 / ((1 << (format_desc->channel[chan].size - 1)) - 1); - LLVMValueRef scale_val = lp_build_const_vec(type, scale); + LLVMValueRef scale_val = lp_build_const_vec(gallivm, type, scale); input = LLVMBuildFMul(builder, input, scale_val, ""); } } else { /* FIXME */ assert(0); - input = lp_build_undef(type); + input = lp_build_undef(gallivm, type); } break; @@ -214,32 +216,32 @@ lp_build_unpack_rgba_soa(LLVMBuilderRef builder, assert(start == 0); assert(stop == 32); assert(type.width == 32); - input = LLVMBuildBitCast(builder, input, lp_build_vec_type(type), ""); + input = LLVMBuildBitCast(builder, input, lp_build_vec_type(gallivm, type), ""); } else { /* FIXME */ assert(0); - input = lp_build_undef(type); + input = lp_build_undef(gallivm, type); } break; case UTIL_FORMAT_TYPE_FIXED: if (type.floating) { double scale = 1.0 / ((1 << (format_desc->channel[chan].size/2)) - 1); - LLVMValueRef scale_val = lp_build_const_vec(type, scale); - input = LLVMBuildSIToFP(builder, input, lp_build_vec_type(type), ""); + LLVMValueRef scale_val = lp_build_const_vec(gallivm, type, scale); + input = LLVMBuildSIToFP(builder, input, lp_build_vec_type(gallivm, type), ""); input = LLVMBuildFMul(builder, input, scale_val, ""); } else { /* FIXME */ assert(0); - input = lp_build_undef(type); + input = lp_build_undef(gallivm, type); } break; default: assert(0); - input = lp_build_undef(type); + input = lp_build_undef(gallivm, type); break; } @@ -253,16 +255,17 @@ lp_build_unpack_rgba_soa(LLVMBuilderRef builder, void -lp_build_rgba8_to_f32_soa(LLVMBuilderRef builder, +lp_build_rgba8_to_f32_soa(struct gallivm_state *gallivm, struct lp_type dst_type, LLVMValueRef packed, LLVMValueRef *rgba) { - LLVMValueRef mask = lp_build_const_int_vec(dst_type, 0xff); + LLVMBuilderRef builder = gallivm->builder; + LLVMValueRef mask = lp_build_const_int_vec(gallivm, dst_type, 0xff); unsigned chan; packed = LLVMBuildBitCast(builder, packed, - lp_build_int_vec_type(dst_type), ""); + lp_build_int_vec_type(gallivm, dst_type), ""); /* Decode the input vector components */ for (chan = 0; chan < 4; ++chan) { @@ -274,12 +277,12 @@ lp_build_rgba8_to_f32_soa(LLVMBuilderRef builder, if (start) input = LLVMBuildLShr(builder, input, - lp_build_const_int_vec(dst_type, start), ""); + lp_build_const_int_vec(gallivm, dst_type, start), ""); if (stop < 32) input = LLVMBuildAnd(builder, input, mask, ""); - input = lp_build_unsigned_norm_to_float(builder, 8, dst_type, input); + input = lp_build_unsigned_norm_to_float(gallivm, 8, dst_type, input); rgba[chan] = input; } @@ -303,7 +306,7 @@ lp_build_rgba8_to_f32_soa(LLVMBuilderRef builder, * be in [0, block_width-1] and j will be in [0, block_height-1]. */ void -lp_build_fetch_rgba_soa(LLVMBuilderRef builder, +lp_build_fetch_rgba_soa(struct gallivm_state *gallivm, const struct util_format_description *format_desc, struct lp_type type, LLVMValueRef base_ptr, @@ -312,6 +315,7 @@ lp_build_fetch_rgba_soa(LLVMBuilderRef builder, LLVMValueRef j, LLVMValueRef rgba_out[4]) { + LLVMBuilderRef builder = gallivm->builder; if (format_desc->layout == UTIL_FORMAT_LAYOUT_PLAIN && (format_desc->colorspace == UTIL_FORMAT_COLORSPACE_RGB || @@ -334,7 +338,7 @@ lp_build_fetch_rgba_soa(LLVMBuilderRef builder, * gather the texels from the texture * Ex: packed = {BGRA, BGRA, BGRA, BGRA}. */ - packed = lp_build_gather(builder, + packed = lp_build_gather(gallivm, type.length, format_desc->block.bits, type.width, @@ -343,7 +347,7 @@ lp_build_fetch_rgba_soa(LLVMBuilderRef builder, /* * convert texels to float rgba */ - lp_build_unpack_rgba_soa(builder, + lp_build_unpack_rgba_soa(gallivm, format_desc, type, packed, rgba_out); @@ -364,10 +368,10 @@ lp_build_fetch_rgba_soa(LLVMBuilderRef builder, tmp_type.length = type.length * 4; tmp_type.norm = TRUE; - tmp = lp_build_fetch_rgba_aos(builder, format_desc, tmp_type, + tmp = lp_build_fetch_rgba_aos(gallivm, format_desc, tmp_type, base_ptr, offset, i, j); - lp_build_rgba8_to_f32_soa(builder, + lp_build_rgba8_to_f32_soa(gallivm, type, tmp, rgba_out); @@ -397,23 +401,24 @@ lp_build_fetch_rgba_soa(LLVMBuilderRef builder, tmp_type.length = 4; for (chan = 0; chan < 4; ++chan) { - rgba_out[chan] = lp_build_undef(type); + rgba_out[chan] = lp_build_undef(gallivm, type); } /* loop over number of pixels */ for(k = 0; k < type.length; ++k) { - LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), k, 0); + LLVMValueRef index = lp_build_const_int32(gallivm, k); LLVMValueRef offset_elem; LLVMValueRef i_elem, j_elem; LLVMValueRef tmp; - offset_elem = LLVMBuildExtractElement(builder, offset, index, ""); + offset_elem = LLVMBuildExtractElement(builder, offset, + index, ""); i_elem = LLVMBuildExtractElement(builder, i, index, ""); j_elem = LLVMBuildExtractElement(builder, j, index, ""); /* Get a single float[4]={R,G,B,A} pixel */ - tmp = lp_build_fetch_rgba_aos(builder, format_desc, tmp_type, + tmp = lp_build_fetch_rgba_aos(gallivm, format_desc, tmp_type, base_ptr, offset_elem, i_elem, j_elem); @@ -422,7 +427,7 @@ lp_build_fetch_rgba_soa(LLVMBuilderRef builder, * position = 'index'. */ for (chan = 0; chan < 4; ++chan) { - LLVMValueRef chan_val = LLVMConstInt(LLVMInt32Type(), chan, 0), + LLVMValueRef chan_val = lp_build_const_int32(gallivm, chan), tmp_chan = LLVMBuildExtractElement(builder, tmp, chan_val, ""); rgba_out[chan] = LLVMBuildInsertElement(builder, rgba_out[chan], tmp_chan, index, ""); diff --git a/src/gallium/auxiliary/gallivm/lp_bld_format_yuv.c b/src/gallium/auxiliary/gallivm/lp_bld_format_yuv.c index 2bce2895551..cdf1956c093 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_format_yuv.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_format_yuv.c @@ -43,6 +43,7 @@ #include "lp_bld_conv.h" #include "lp_bld_gather.h" #include "lp_bld_format.h" +#include "lp_bld_init.h" #include "lp_bld_logic.h" /** @@ -51,7 +52,7 @@ * @param i is a <n x i32> vector with the x pixel coordinate (0 or 1) */ static void -uyvy_to_yuv_soa(LLVMBuilderRef builder, +uyvy_to_yuv_soa(struct gallivm_state *gallivm, unsigned n, LLVMValueRef packed, LLVMValueRef i, @@ -59,6 +60,7 @@ uyvy_to_yuv_soa(LLVMBuilderRef builder, LLVMValueRef *u, LLVMValueRef *v) { + LLVMBuilderRef builder = gallivm->builder; struct lp_type type; LLVMValueRef mask; @@ -86,25 +88,25 @@ uyvy_to_yuv_soa(LLVMBuilderRef builder, LLVMValueRef sel, tmp, tmp2; struct lp_build_context bld32; - lp_build_context_init(&bld32, builder, type); + lp_build_context_init(&bld32, gallivm, type); - tmp = LLVMBuildLShr(builder, packed, lp_build_const_int_vec(type, 8), ""); - tmp2 = LLVMBuildLShr(builder, tmp, lp_build_const_int_vec(type, 16), ""); - sel = lp_build_compare(builder, type, PIPE_FUNC_EQUAL, i, lp_build_const_int_vec(type, 0)); + tmp = LLVMBuildLShr(builder, packed, lp_build_const_int_vec(gallivm, type, 8), ""); + tmp2 = LLVMBuildLShr(builder, tmp, lp_build_const_int_vec(gallivm, type, 16), ""); + sel = lp_build_compare(gallivm, type, PIPE_FUNC_EQUAL, i, lp_build_const_int_vec(gallivm, type, 0)); *y = lp_build_select(&bld32, sel, tmp, tmp2); } else #endif { LLVMValueRef shift; - shift = LLVMBuildMul(builder, i, lp_build_const_int_vec(type, 16), ""); - shift = LLVMBuildAdd(builder, shift, lp_build_const_int_vec(type, 8), ""); + shift = LLVMBuildMul(builder, i, lp_build_const_int_vec(gallivm, type, 16), ""); + shift = LLVMBuildAdd(builder, shift, lp_build_const_int_vec(gallivm, type, 8), ""); *y = LLVMBuildLShr(builder, packed, shift, ""); } *u = packed; - *v = LLVMBuildLShr(builder, packed, lp_build_const_int_vec(type, 16), ""); + *v = LLVMBuildLShr(builder, packed, lp_build_const_int_vec(gallivm, type, 16), ""); - mask = lp_build_const_int_vec(type, 0xff); + mask = lp_build_const_int_vec(gallivm, type, 0xff); *y = LLVMBuildAnd(builder, *y, mask, "y"); *u = LLVMBuildAnd(builder, *u, mask, "u"); @@ -118,7 +120,7 @@ uyvy_to_yuv_soa(LLVMBuilderRef builder, * @param i is a <n x i32> vector with the x pixel coordinate (0 or 1) */ static void -yuyv_to_yuv_soa(LLVMBuilderRef builder, +yuyv_to_yuv_soa(struct gallivm_state *gallivm, unsigned n, LLVMValueRef packed, LLVMValueRef i, @@ -126,6 +128,7 @@ yuyv_to_yuv_soa(LLVMBuilderRef builder, LLVMValueRef *u, LLVMValueRef *v) { + LLVMBuilderRef builder = gallivm->builder; struct lp_type type; LLVMValueRef mask; @@ -153,23 +156,23 @@ yuyv_to_yuv_soa(LLVMBuilderRef builder, LLVMValueRef sel, tmp; struct lp_build_context bld32; - lp_build_context_init(&bld32, builder, type); + lp_build_context_init(&bld32, gallivm, type); - tmp = LLVMBuildLShr(builder, packed, lp_build_const_int_vec(type, 16), ""); - sel = lp_build_compare(builder, type, PIPE_FUNC_EQUAL, i, lp_build_const_int_vec(type, 0)); + tmp = LLVMBuildLShr(builder, packed, lp_build_const_int_vec(gallivm, type, 16), ""); + sel = lp_build_compare(gallivm, type, PIPE_FUNC_EQUAL, i, lp_build_const_int_vec(gallivm, type, 0)); *y = lp_build_select(&bld32, sel, packed, tmp); } else #endif { LLVMValueRef shift; - shift = LLVMBuildMul(builder, i, lp_build_const_int_vec(type, 16), ""); + shift = LLVMBuildMul(builder, i, lp_build_const_int_vec(gallivm, type, 16), ""); *y = LLVMBuildLShr(builder, packed, shift, ""); } - *u = LLVMBuildLShr(builder, packed, lp_build_const_int_vec(type, 8), ""); - *v = LLVMBuildLShr(builder, packed, lp_build_const_int_vec(type, 24), ""); + *u = LLVMBuildLShr(builder, packed, lp_build_const_int_vec(gallivm, type, 8), ""); + *v = LLVMBuildLShr(builder, packed, lp_build_const_int_vec(gallivm, type, 24), ""); - mask = lp_build_const_int_vec(type, 0xff); + mask = lp_build_const_int_vec(gallivm, type, 0xff); *y = LLVMBuildAnd(builder, *y, mask, "y"); *u = LLVMBuildAnd(builder, *u, mask, "u"); @@ -178,11 +181,12 @@ yuyv_to_yuv_soa(LLVMBuilderRef builder, static INLINE void -yuv_to_rgb_soa(LLVMBuilderRef builder, +yuv_to_rgb_soa(struct gallivm_state *gallivm, unsigned n, LLVMValueRef y, LLVMValueRef u, LLVMValueRef v, LLVMValueRef *r, LLVMValueRef *g, LLVMValueRef *b) { + LLVMBuilderRef builder = gallivm->builder; struct lp_type type; struct lp_build_context bld; @@ -203,7 +207,7 @@ yuv_to_rgb_soa(LLVMBuilderRef builder, type.width = 32; type.length = n; - lp_build_context_init(&bld, builder, type); + lp_build_context_init(&bld, gallivm, type); assert(lp_check_value(type, y)); assert(lp_check_value(type, u)); @@ -213,17 +217,17 @@ yuv_to_rgb_soa(LLVMBuilderRef builder, * Constants */ - c0 = lp_build_const_int_vec(type, 0); - c8 = lp_build_const_int_vec(type, 8); - c16 = lp_build_const_int_vec(type, 16); - c128 = lp_build_const_int_vec(type, 128); - c255 = lp_build_const_int_vec(type, 255); + c0 = lp_build_const_int_vec(gallivm, type, 0); + c8 = lp_build_const_int_vec(gallivm, type, 8); + c16 = lp_build_const_int_vec(gallivm, type, 16); + c128 = lp_build_const_int_vec(gallivm, type, 128); + c255 = lp_build_const_int_vec(gallivm, type, 255); - cy = lp_build_const_int_vec(type, 298); - cug = lp_build_const_int_vec(type, -100); - cub = lp_build_const_int_vec(type, 516); - cvr = lp_build_const_int_vec(type, 409); - cvg = lp_build_const_int_vec(type, -208); + cy = lp_build_const_int_vec(gallivm, type, 298); + cug = lp_build_const_int_vec(gallivm, type, -100); + cub = lp_build_const_int_vec(gallivm, type, 516); + cvr = lp_build_const_int_vec(gallivm, type, 409); + cvg = lp_build_const_int_vec(gallivm, type, -208); /* * y -= 16; @@ -276,10 +280,11 @@ yuv_to_rgb_soa(LLVMBuilderRef builder, static LLVMValueRef -rgb_to_rgba_aos(LLVMBuilderRef builder, +rgb_to_rgba_aos(struct gallivm_state *gallivm, unsigned n, LLVMValueRef r, LLVMValueRef g, LLVMValueRef b) { + LLVMBuilderRef builder = gallivm->builder; struct lp_type type; LLVMValueRef a; LLVMValueRef rgba; @@ -298,9 +303,9 @@ rgb_to_rgba_aos(LLVMBuilderRef builder, */ r = r; - g = LLVMBuildShl(builder, g, lp_build_const_int_vec(type, 8), ""); - b = LLVMBuildShl(builder, b, lp_build_const_int_vec(type, 16), ""); - a = lp_build_const_int_vec(type, 0xff000000); + g = LLVMBuildShl(builder, g, lp_build_const_int_vec(gallivm, type, 8), ""); + b = LLVMBuildShl(builder, b, lp_build_const_int_vec(gallivm, type, 16), ""); + a = lp_build_const_int_vec(gallivm, type, 0xff000000); rgba = r; rgba = LLVMBuildOr(builder, rgba, g, ""); @@ -308,7 +313,7 @@ rgb_to_rgba_aos(LLVMBuilderRef builder, rgba = LLVMBuildOr(builder, rgba, a, ""); rgba = LLVMBuildBitCast(builder, rgba, - LLVMVectorType(LLVMInt8Type(), 4*n), ""); + LLVMVectorType(LLVMInt8TypeInContext(gallivm->context), 4*n), ""); return rgba; } @@ -318,7 +323,7 @@ rgb_to_rgba_aos(LLVMBuilderRef builder, * Convert from <n x i32> packed UYVY to <4n x i8> RGBA AoS */ static LLVMValueRef -uyvy_to_rgba_aos(LLVMBuilderRef builder, +uyvy_to_rgba_aos(struct gallivm_state *gallivm, unsigned n, LLVMValueRef packed, LLVMValueRef i) @@ -327,9 +332,9 @@ uyvy_to_rgba_aos(LLVMBuilderRef builder, LLVMValueRef r, g, b; LLVMValueRef rgba; - uyvy_to_yuv_soa(builder, n, packed, i, &y, &u, &v); - yuv_to_rgb_soa(builder, n, y, u, v, &r, &g, &b); - rgba = rgb_to_rgba_aos(builder, n, r, g, b); + uyvy_to_yuv_soa(gallivm, n, packed, i, &y, &u, &v); + yuv_to_rgb_soa(gallivm, n, y, u, v, &r, &g, &b); + rgba = rgb_to_rgba_aos(gallivm, n, r, g, b); return rgba; } @@ -339,7 +344,7 @@ uyvy_to_rgba_aos(LLVMBuilderRef builder, * Convert from <n x i32> packed YUYV to <4n x i8> RGBA AoS */ static LLVMValueRef -yuyv_to_rgba_aos(LLVMBuilderRef builder, +yuyv_to_rgba_aos(struct gallivm_state *gallivm, unsigned n, LLVMValueRef packed, LLVMValueRef i) @@ -348,9 +353,9 @@ yuyv_to_rgba_aos(LLVMBuilderRef builder, LLVMValueRef r, g, b; LLVMValueRef rgba; - yuyv_to_yuv_soa(builder, n, packed, i, &y, &u, &v); - yuv_to_rgb_soa(builder, n, y, u, v, &r, &g, &b); - rgba = rgb_to_rgba_aos(builder, n, r, g, b); + yuyv_to_yuv_soa(gallivm, n, packed, i, &y, &u, &v); + yuv_to_rgb_soa(gallivm, n, y, u, v, &r, &g, &b); + rgba = rgb_to_rgba_aos(gallivm, n, r, g, b); return rgba; } @@ -360,7 +365,7 @@ yuyv_to_rgba_aos(LLVMBuilderRef builder, * Convert from <n x i32> packed RG_BG to <4n x i8> RGBA AoS */ static LLVMValueRef -rgbg_to_rgba_aos(LLVMBuilderRef builder, +rgbg_to_rgba_aos(struct gallivm_state *gallivm, unsigned n, LLVMValueRef packed, LLVMValueRef i) @@ -368,8 +373,8 @@ rgbg_to_rgba_aos(LLVMBuilderRef builder, LLVMValueRef r, g, b; LLVMValueRef rgba; - uyvy_to_yuv_soa(builder, n, packed, i, &g, &r, &b); - rgba = rgb_to_rgba_aos(builder, n, r, g, b); + uyvy_to_yuv_soa(gallivm, n, packed, i, &g, &r, &b); + rgba = rgb_to_rgba_aos(gallivm, n, r, g, b); return rgba; } @@ -379,7 +384,7 @@ rgbg_to_rgba_aos(LLVMBuilderRef builder, * Convert from <n x i32> packed GR_GB to <4n x i8> RGBA AoS */ static LLVMValueRef -grgb_to_rgba_aos(LLVMBuilderRef builder, +grgb_to_rgba_aos(struct gallivm_state *gallivm, unsigned n, LLVMValueRef packed, LLVMValueRef i) @@ -387,8 +392,8 @@ grgb_to_rgba_aos(LLVMBuilderRef builder, LLVMValueRef r, g, b; LLVMValueRef rgba; - yuyv_to_yuv_soa(builder, n, packed, i, &g, &r, &b); - rgba = rgb_to_rgba_aos(builder, n, r, g, b); + yuyv_to_yuv_soa(gallivm, n, packed, i, &g, &r, &b); + rgba = rgb_to_rgba_aos(gallivm, n, r, g, b); return rgba; } @@ -401,7 +406,7 @@ grgb_to_rgba_aos(LLVMBuilderRef builder, * @return a <4*n x i8> vector with the pixel RGBA values in AoS */ LLVMValueRef -lp_build_fetch_subsampled_rgba_aos(LLVMBuilderRef builder, +lp_build_fetch_subsampled_rgba_aos(struct gallivm_state *gallivm, const struct util_format_description *format_desc, unsigned n, LLVMValueRef base_ptr, @@ -417,26 +422,26 @@ lp_build_fetch_subsampled_rgba_aos(LLVMBuilderRef builder, assert(format_desc->block.width == 2); assert(format_desc->block.height == 1); - packed = lp_build_gather(builder, n, 32, 32, base_ptr, offset); + packed = lp_build_gather(gallivm, n, 32, 32, base_ptr, offset); (void)j; switch (format_desc->format) { case PIPE_FORMAT_UYVY: - rgba = uyvy_to_rgba_aos(builder, n, packed, i); + rgba = uyvy_to_rgba_aos(gallivm, n, packed, i); break; case PIPE_FORMAT_YUYV: - rgba = yuyv_to_rgba_aos(builder, n, packed, i); + rgba = yuyv_to_rgba_aos(gallivm, n, packed, i); break; case PIPE_FORMAT_R8G8_B8G8_UNORM: - rgba = rgbg_to_rgba_aos(builder, n, packed, i); + rgba = rgbg_to_rgba_aos(gallivm, n, packed, i); break; case PIPE_FORMAT_G8R8_G8B8_UNORM: - rgba = grgb_to_rgba_aos(builder, n, packed, i); + rgba = grgb_to_rgba_aos(gallivm, n, packed, i); break; default: assert(0); - rgba = LLVMGetUndef(LLVMVectorType(LLVMInt8Type(), 4*n)); + rgba = LLVMGetUndef(LLVMVectorType(LLVMInt8TypeInContext(gallivm->context), 4*n)); break; } diff --git a/src/gallium/auxiliary/gallivm/lp_bld_gather.c b/src/gallium/auxiliary/gallivm/lp_bld_gather.c index d60472e0656..0dc81b1abbe 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_gather.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_gather.c @@ -31,6 +31,7 @@ #include "lp_bld_const.h" #include "lp_bld_format.h" #include "lp_bld_gather.h" +#include "lp_bld_init.h" /** @@ -39,7 +40,7 @@ * @sa lp_build_gather() */ LLVMValueRef -lp_build_gather_elem_ptr(LLVMBuilderRef builder, +lp_build_gather_elem_ptr(struct gallivm_state *gallivm, unsigned length, LLVMValueRef base_ptr, LLVMValueRef offsets, @@ -48,17 +49,17 @@ lp_build_gather_elem_ptr(LLVMBuilderRef builder, LLVMValueRef offset; LLVMValueRef ptr; - assert(LLVMTypeOf(base_ptr) == LLVMPointerType(LLVMInt8Type(), 0)); + assert(LLVMTypeOf(base_ptr) == LLVMPointerType(LLVMInt8TypeInContext(gallivm->context), 0)); if (length == 1) { assert(i == 0); offset = offsets; } else { - LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0); - offset = LLVMBuildExtractElement(builder, offsets, index, ""); + LLVMValueRef index = lp_build_const_int32(gallivm, i); + offset = LLVMBuildExtractElement(gallivm->builder, offsets, index, ""); } - ptr = LLVMBuildGEP(builder, base_ptr, &offset, 1, ""); + ptr = LLVMBuildGEP(gallivm->builder, base_ptr, &offset, 1, ""); return ptr; } @@ -70,7 +71,7 @@ lp_build_gather_elem_ptr(LLVMBuilderRef builder, * @sa lp_build_gather() */ LLVMValueRef -lp_build_gather_elem(LLVMBuilderRef builder, +lp_build_gather_elem(struct gallivm_state *gallivm, unsigned length, unsigned src_width, unsigned dst_width, @@ -78,23 +79,23 @@ lp_build_gather_elem(LLVMBuilderRef builder, LLVMValueRef offsets, unsigned i) { - LLVMTypeRef src_type = LLVMIntType(src_width); + LLVMTypeRef src_type = LLVMIntTypeInContext(gallivm->context, src_width); LLVMTypeRef src_ptr_type = LLVMPointerType(src_type, 0); - LLVMTypeRef dst_elem_type = LLVMIntType(dst_width); + LLVMTypeRef dst_elem_type = LLVMIntTypeInContext(gallivm->context, dst_width); LLVMValueRef ptr; LLVMValueRef res; - assert(LLVMTypeOf(base_ptr) == LLVMPointerType(LLVMInt8Type(), 0)); + assert(LLVMTypeOf(base_ptr) == LLVMPointerType(LLVMInt8TypeInContext(gallivm->context), 0)); - ptr = lp_build_gather_elem_ptr(builder, length, base_ptr, offsets, i); - ptr = LLVMBuildBitCast(builder, ptr, src_ptr_type, ""); - res = LLVMBuildLoad(builder, ptr, ""); + ptr = lp_build_gather_elem_ptr(gallivm, length, base_ptr, offsets, i); + ptr = LLVMBuildBitCast(gallivm->builder, ptr, src_ptr_type, ""); + res = LLVMBuildLoad(gallivm->builder, ptr, ""); assert(src_width <= dst_width); if (src_width > dst_width) - res = LLVMBuildTrunc(builder, res, dst_elem_type, ""); + res = LLVMBuildTrunc(gallivm->builder, res, dst_elem_type, ""); if (src_width < dst_width) - res = LLVMBuildZExt(builder, res, dst_elem_type, ""); + res = LLVMBuildZExt(gallivm->builder, res, dst_elem_type, ""); return res; } @@ -112,7 +113,7 @@ lp_build_gather_elem(LLVMBuilderRef builder, * @param offsets vector with offsets */ LLVMValueRef -lp_build_gather(LLVMBuilderRef builder, +lp_build_gather(struct gallivm_state *gallivm, unsigned length, unsigned src_width, unsigned dst_width, @@ -123,24 +124,24 @@ lp_build_gather(LLVMBuilderRef builder, if (length == 1) { /* Scalar */ - return lp_build_gather_elem(builder, length, + return lp_build_gather_elem(gallivm, length, src_width, dst_width, base_ptr, offsets, 0); } else { /* Vector */ - LLVMTypeRef dst_elem_type = LLVMIntType(dst_width); + LLVMTypeRef dst_elem_type = LLVMIntTypeInContext(gallivm->context, dst_width); LLVMTypeRef dst_vec_type = LLVMVectorType(dst_elem_type, length); unsigned i; res = LLVMGetUndef(dst_vec_type); for (i = 0; i < length; ++i) { - LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0); + LLVMValueRef index = lp_build_const_int32(gallivm, i); LLVMValueRef elem; - elem = lp_build_gather_elem(builder, length, + elem = lp_build_gather_elem(gallivm, length, src_width, dst_width, base_ptr, offsets, i); - res = LLVMBuildInsertElement(builder, res, elem, index, ""); + res = LLVMBuildInsertElement(gallivm->builder, res, elem, index, ""); } } diff --git a/src/gallium/auxiliary/gallivm/lp_bld_gather.h b/src/gallium/auxiliary/gallivm/lp_bld_gather.h index 131af8ea07e..5b041317302 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_gather.h +++ b/src/gallium/auxiliary/gallivm/lp_bld_gather.h @@ -34,14 +34,14 @@ LLVMValueRef -lp_build_gather_elem_ptr(LLVMBuilderRef builder, +lp_build_gather_elem_ptr(struct gallivm_state *gallivm, unsigned length, LLVMValueRef base_ptr, LLVMValueRef offsets, unsigned i); LLVMValueRef -lp_build_gather_elem(LLVMBuilderRef builder, +lp_build_gather_elem(struct gallivm_state *gallivm, unsigned length, unsigned src_width, unsigned dst_width, @@ -50,7 +50,7 @@ lp_build_gather_elem(LLVMBuilderRef builder, unsigned i); LLVMValueRef -lp_build_gather(LLVMBuilderRef builder, +lp_build_gather(struct gallivm_state *gallivm, unsigned length, unsigned src_width, unsigned dst_width, diff --git a/src/gallium/auxiliary/gallivm/lp_bld_init.c b/src/gallium/auxiliary/gallivm/lp_bld_init.c index 0b9a6f745fb..7504cb5cf2f 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_init.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_init.c @@ -29,6 +29,8 @@ #include "pipe/p_compiler.h" #include "util/u_cpu_detect.h" #include "util/u_debug.h" +#include "util/u_memory.h" +#include "util/u_simple_list.h" #include "lp_bld_debug.h" #include "lp_bld_init.h" @@ -45,6 +47,7 @@ static const struct debug_named_value lp_bld_debug_flags[] = { { "nopt", GALLIVM_DEBUG_NO_OPT, NULL }, { "perf", GALLIVM_DEBUG_PERF, NULL }, { "no_brilinear", GALLIVM_DEBUG_NO_BRILINEAR, NULL }, + { "gc", GALLIVM_DEBUG_GC, NULL }, DEBUG_NAMED_VALUE_END }; @@ -52,11 +55,7 @@ DEBUG_GET_ONCE_FLAGS_OPTION(gallivm_debug, "GALLIVM_DEBUG", lp_bld_debug_flags, #endif -LLVMModuleRef lp_build_module = NULL; -LLVMExecutionEngineRef lp_build_engine = NULL; -LLVMModuleProviderRef lp_build_provider = NULL; -LLVMTargetDataRef lp_build_target = NULL; -LLVMPassManagerRef lp_build_pass = NULL; +static boolean gallivm_initialized = FALSE; /* @@ -82,6 +81,19 @@ enum LLVM_CodeGenOpt_Level { }; +/** + * LLVM 2.6 permits only one ExecutionEngine to be created. This is it. + */ +static LLVMExecutionEngineRef GlobalEngine = NULL; + +/** + * Same gallivm state shared by all contexts. + */ +static struct gallivm_state *GlobalGallivm = NULL; + + + + extern void lp_register_oprofile_jit_event_listener(LLVMExecutionEngineRef EE); @@ -89,26 +101,148 @@ extern void lp_set_target_options(void); -void -lp_build_init(void) + +/** + * Create the LLVM (optimization) pass manager and install + * relevant optimization passes. + * \return TRUE for success, FALSE for failure + */ +static boolean +create_pass_manager(struct gallivm_state *gallivm) { -#ifdef DEBUG - gallivm_debug = debug_get_option_gallivm_debug(); + assert(!gallivm->passmgr); + + gallivm->passmgr = LLVMCreateFunctionPassManager(gallivm->provider); + if (!gallivm->passmgr) + return FALSE; + + LLVMAddTargetData(gallivm->target, gallivm->passmgr); + + if ((gallivm_debug & GALLIVM_DEBUG_NO_OPT) == 0) { + /* These are the passes currently listed in llvm-c/Transforms/Scalar.h, + * but there are more on SVN. + * TODO: Add more passes. + */ + LLVMAddCFGSimplificationPass(gallivm->passmgr); + + if (HAVE_LLVM >= 0x207 && sizeof(void*) == 4) { + /* For LLVM >= 2.7 and 32-bit build, use this order of passes to + * avoid generating bad code. + * Test with piglit glsl-vs-sqrt-zero test. + */ + LLVMAddConstantPropagationPass(gallivm->passmgr); + LLVMAddPromoteMemoryToRegisterPass(gallivm->passmgr); + } + else { + LLVMAddPromoteMemoryToRegisterPass(gallivm->passmgr); + LLVMAddConstantPropagationPass(gallivm->passmgr); + } + + if (util_cpu_caps.has_sse4_1) { + /* FIXME: There is a bug in this pass, whereby the combination + * of fptosi and sitofp (necessary for trunc/floor/ceil/round + * implementation) somehow becomes invalid code. + */ + LLVMAddInstructionCombiningPass(gallivm->passmgr); + } + LLVMAddGVNPass(gallivm->passmgr); + } + else { + /* We need at least this pass to prevent the backends to fail in + * unexpected ways. + */ + LLVMAddPromoteMemoryToRegisterPass(gallivm->passmgr); + } + + return TRUE; +} + + +/** + * Free gallivm object's LLVM allocations, but not the gallivm object itself. + */ +static void +free_gallivm_state(struct gallivm_state *gallivm) +{ +#if HAVE_LLVM >= 0x207 /* XXX or 0x208? */ + /* This leads to crashes w/ some versions of LLVM */ + LLVMModuleRef mod; + char *error; + + if (gallivm->engine && gallivm->provider) + LLVMRemoveModuleProvider(gallivm->engine, gallivm->provider, + &mod, &error); #endif - lp_set_target_options(); +#if 0 + /* XXX this seems to crash with all versions of LLVM */ + if (gallivm->provider) + LLVMDisposeModuleProvider(gallivm->provider); +#endif - LLVMInitializeNativeTarget(); + if (gallivm->passmgr) + LLVMDisposePassManager(gallivm->passmgr); - LLVMLinkInJIT(); +#if HAVE_LLVM >= 0x207 + if (gallivm->module) + LLVMDisposeModule(gallivm->module); +#endif + +#if 0 + /* Don't free the exec engine, it's a global/singleton */ + if (gallivm->engine) + LLVMDisposeExecutionEngine(gallivm->engine); +#endif + +#if 0 + /* Don't free the TargetData, it's owned by the exec engine */ + LLVMDisposeTargetData(gallivm->target); +#endif + + if (gallivm->context) + LLVMContextDispose(gallivm->context); - if (!lp_build_module) - lp_build_module = LLVMModuleCreateWithName("gallivm"); + if (gallivm->builder) + LLVMDisposeBuilder(gallivm->builder); + + gallivm->engine = NULL; + gallivm->target = NULL; + gallivm->module = NULL; + gallivm->provider = NULL; + gallivm->passmgr = NULL; + gallivm->context = NULL; + gallivm->builder = NULL; +} - if (!lp_build_provider) - lp_build_provider = LLVMCreateModuleProviderForExistingModule(lp_build_module); - if (!lp_build_engine) { +/** + * Allocate gallivm LLVM objects. + * \return TRUE for success, FALSE for failure + */ +static boolean +init_gallivm_state(struct gallivm_state *gallivm) +{ + assert(gallivm_initialized); + assert(!gallivm->context); + assert(!gallivm->module); + assert(!gallivm->provider); + + gallivm->context = LLVMContextCreate(); + if (!gallivm->context) + goto fail; + + gallivm->module = LLVMModuleCreateWithNameInContext("gallivm", + gallivm->context); + if (!gallivm->module) + goto fail; + + gallivm->provider = + LLVMCreateModuleProviderForExistingModule(gallivm->module); + if (!gallivm->provider) + goto fail; + + if (!GlobalEngine) { + /* We can only create one LLVMExecutionEngine (w/ LLVM 2.6 anyway) */ enum LLVM_CodeGenOpt_Level optlevel; char *error = NULL; @@ -119,43 +253,153 @@ lp_build_init(void) optlevel = Default; } - if (LLVMCreateJITCompiler(&lp_build_engine, lp_build_provider, - (unsigned)optlevel, &error)) { + if (LLVMCreateJITCompiler(&GlobalEngine, gallivm->provider, + (unsigned) optlevel, &error)) { _debug_printf("%s\n", error); LLVMDisposeMessage(error); - assert(0); + goto fail; } #if defined(DEBUG) || defined(PROFILE) - lp_register_oprofile_jit_event_listener(lp_build_engine); + lp_register_oprofile_jit_event_listener(GlobalEngine); #endif } - if (!lp_build_target) - lp_build_target = LLVMGetExecutionEngineTargetData(lp_build_engine); - - if (!lp_build_pass) { - lp_build_pass = LLVMCreateFunctionPassManager(lp_build_provider); - LLVMAddTargetData(lp_build_target, lp_build_pass); - - if ((gallivm_debug & GALLIVM_DEBUG_NO_OPT) == 0) { - /* These are the passes currently listed in llvm-c/Transforms/Scalar.h, - * but there are more on SVN. */ - /* TODO: Add more passes */ - LLVMAddCFGSimplificationPass(lp_build_pass); - LLVMAddPromoteMemoryToRegisterPass(lp_build_pass); - LLVMAddConstantPropagationPass(lp_build_pass); - LLVMAddInstructionCombiningPass(lp_build_pass); - LLVMAddGVNPass(lp_build_pass); - } else { - /* We need at least this pass to prevent the backends to fail in - * unexpected ways. - */ - LLVMAddPromoteMemoryToRegisterPass(lp_build_pass); + gallivm->engine = GlobalEngine; + + LLVMAddModuleProvider(gallivm->engine, gallivm->provider);//new + + gallivm->target = LLVMGetExecutionEngineTargetData(gallivm->engine); + if (!gallivm->target) + goto fail; + + if (!create_pass_manager(gallivm)) + goto fail; + + gallivm->builder = LLVMCreateBuilderInContext(gallivm->context); + if (!gallivm->builder) + goto fail; + + return TRUE; + +fail: + free_gallivm_state(gallivm); + return FALSE; +} + + +struct callback +{ + garbage_collect_callback_func func; + void *cb_data; + struct callback *prev, *next; +}; + + +/** list of all garbage collector callbacks */ +static struct callback callback_list = {NULL, NULL, NULL, NULL}; + + +/** + * Register a function with gallivm which will be called when we + * do garbage collection. + */ +void +gallivm_register_garbage_collector_callback(garbage_collect_callback_func func, + void *cb_data) +{ + struct callback *cb; + + if (!callback_list.prev) { + make_empty_list(&callback_list); + } + + /* see if already in list */ + foreach(cb, &callback_list) { + if (cb->func == func && cb->cb_data == cb_data) + return; + } + + /* add to list */ + cb = CALLOC_STRUCT(callback); + if (cb) { + cb->func = func; + cb->cb_data = cb_data; + insert_at_head(&callback_list, cb); + } +} + + +/** + * Remove a callback. + */ +void +gallivm_remove_garbage_collector_callback(garbage_collect_callback_func func, + void *cb_data) +{ + struct callback *cb; + + /* search list */ + foreach(cb, &callback_list) { + if (cb->func == func && cb->cb_data == cb_data) { + /* found, remove it */ + remove_from_list(cb); + return; } } +} + + +/** + * Call the callback functions (which are typically in the + * draw module and llvmpipe driver. + */ +static void +call_garbage_collector_callbacks(void) +{ + struct callback *cb; + foreach(cb, &callback_list) { + cb->func(cb->cb_data); + } +} + + + +/** + * Other gallium components using gallivm should call this periodically + * to let us do garbage collection (or at least try to free memory + * accumulated by the LLVM libraries). + */ +void +gallivm_garbage_collect(struct gallivm_state *gallivm) +{ + if (gallivm->context) { + if (gallivm_debug & GALLIVM_DEBUG_GC) + debug_printf("***** Doing LLVM garbage collection\n"); + + call_garbage_collector_callbacks(); + free_gallivm_state(gallivm); + init_gallivm_state(gallivm); + } +} + + +void +lp_build_init(void) +{ +#ifdef DEBUG + gallivm_debug = debug_get_option_gallivm_debug(); +#endif + + lp_set_target_options(); + + LLVMInitializeNativeTarget(); + + LLVMLinkInJIT(); util_cpu_detect(); + + gallivm_initialized = TRUE; #if 0 /* For simulating less capable machines */ @@ -166,6 +410,39 @@ lp_build_init(void) } + +/** + * Create a new gallivm_state object. + * Note that we return a singleton. + */ +struct gallivm_state * +gallivm_create(void) +{ + if (!GlobalGallivm) { + GlobalGallivm = CALLOC_STRUCT(gallivm_state); + if (GlobalGallivm) { + if (!init_gallivm_state(GlobalGallivm)) { + FREE(GlobalGallivm); + GlobalGallivm = NULL; + } + } + } + return GlobalGallivm; +} + + +/** + * Destroy a gallivm_state object. + */ +void +gallivm_destroy(struct gallivm_state *gallivm) +{ + /* No-op: don't destroy the singleton */ + (void) gallivm; +} + + + /* * Hack to allow the linking of release LLVM static libraries on a debug build. * diff --git a/src/gallium/auxiliary/gallivm/lp_bld_init.h b/src/gallium/auxiliary/gallivm/lp_bld_init.h index 0b4b1ca7d11..f68bf75a851 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_init.h +++ b/src/gallium/auxiliary/gallivm/lp_bld_init.h @@ -30,24 +30,53 @@ #define LP_BLD_INIT_H +#include "pipe/p_compiler.h" #include "lp_bld.h" #include <llvm-c/ExecutionEngine.h> -extern LLVMModuleRef lp_build_module; -extern LLVMExecutionEngineRef lp_build_engine; -extern LLVMModuleProviderRef lp_build_provider; -extern LLVMTargetDataRef lp_build_target; -extern LLVMPassManagerRef lp_build_pass; +struct gallivm_state +{ + LLVMModuleRef module; + LLVMExecutionEngineRef engine; + LLVMModuleProviderRef provider; + LLVMTargetDataRef target; + LLVMPassManagerRef passmgr; + LLVMContextRef context; + LLVMBuilderRef builder; +}; void lp_build_init(void); + extern void lp_func_delete_body(LLVMValueRef func); +void +gallivm_garbage_collect(struct gallivm_state *gallivm); + + +typedef void (*garbage_collect_callback_func)(void *cb_data); + +void +gallivm_register_garbage_collector_callback(garbage_collect_callback_func func, + void *cb_data); + +void +gallivm_remove_garbage_collector_callback(garbage_collect_callback_func func, + void *cb_data); + + +struct gallivm_state * +gallivm_create(void); + +void +gallivm_destroy(struct gallivm_state *gallivm); + + extern LLVMValueRef lp_build_load_volatile(LLVMBuilderRef B, LLVMValueRef PointerVal, const char *Name); diff --git a/src/gallium/auxiliary/gallivm/lp_bld_intr.c b/src/gallium/auxiliary/gallivm/lp_bld_intr.c index 9895749d568..518a01fdb9f 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_intr.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_intr.c @@ -46,6 +46,7 @@ #include "util/u_debug.h" +#include "lp_bld_const.h" #include "lp_bld_intr.h" @@ -136,12 +137,13 @@ lp_build_intrinsic_binary(LLVMBuilderRef builder, LLVMValueRef -lp_build_intrinsic_map(LLVMBuilderRef builder, +lp_build_intrinsic_map(struct gallivm_state *gallivm, const char *name, LLVMTypeRef ret_type, LLVMValueRef *args, unsigned num_args) { + LLVMBuilderRef builder = gallivm->builder; LLVMTypeRef ret_elem_type = LLVMGetElementType(ret_type); unsigned n = LLVMGetVectorSize(ret_type); unsigned i, j; @@ -151,7 +153,7 @@ lp_build_intrinsic_map(LLVMBuilderRef builder, res = LLVMGetUndef(ret_type); for(i = 0; i < n; ++i) { - LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0); + LLVMValueRef index = lp_build_const_int32(gallivm, i); LLVMValueRef arg_elems[LP_MAX_FUNC_ARGS]; LLVMValueRef res_elem; for(j = 0; j < num_args; ++j) @@ -165,17 +167,17 @@ lp_build_intrinsic_map(LLVMBuilderRef builder, LLVMValueRef -lp_build_intrinsic_map_unary(LLVMBuilderRef builder, +lp_build_intrinsic_map_unary(struct gallivm_state *gallivm, const char *name, LLVMTypeRef ret_type, LLVMValueRef a) { - return lp_build_intrinsic_map(builder, name, ret_type, &a, 1); + return lp_build_intrinsic_map(gallivm, name, ret_type, &a, 1); } LLVMValueRef -lp_build_intrinsic_map_binary(LLVMBuilderRef builder, +lp_build_intrinsic_map_binary(struct gallivm_state *gallivm, const char *name, LLVMTypeRef ret_type, LLVMValueRef a, @@ -186,7 +188,7 @@ lp_build_intrinsic_map_binary(LLVMBuilderRef builder, args[0] = a; args[1] = b; - return lp_build_intrinsic_map(builder, name, ret_type, args, 2); + return lp_build_intrinsic_map(gallivm, name, ret_type, args, 2); } diff --git a/src/gallium/auxiliary/gallivm/lp_bld_intr.h b/src/gallium/auxiliary/gallivm/lp_bld_intr.h index 977f7673228..b73dd700362 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_intr.h +++ b/src/gallium/auxiliary/gallivm/lp_bld_intr.h @@ -38,6 +38,7 @@ #include "gallivm/lp_bld.h" +#include "gallivm/lp_bld_init.h" /** @@ -77,7 +78,7 @@ lp_build_intrinsic_binary(LLVMBuilderRef builder, LLVMValueRef -lp_build_intrinsic_map(LLVMBuilderRef builder, +lp_build_intrinsic_map(struct gallivm_state *gallivm, const char *name, LLVMTypeRef ret_type, LLVMValueRef *args, @@ -85,14 +86,14 @@ lp_build_intrinsic_map(LLVMBuilderRef builder, LLVMValueRef -lp_build_intrinsic_map_unary(LLVMBuilderRef builder, +lp_build_intrinsic_map_unary(struct gallivm_state *gallivm, const char *name, LLVMTypeRef ret_type, LLVMValueRef a); LLVMValueRef -lp_build_intrinsic_map_binary(LLVMBuilderRef builder, +lp_build_intrinsic_map_binary(struct gallivm_state *gallivm, const char *name, LLVMTypeRef ret_type, LLVMValueRef a, diff --git a/src/gallium/auxiliary/gallivm/lp_bld_logic.c b/src/gallium/auxiliary/gallivm/lp_bld_logic.c index 026b60ac36e..f7e6fbaff1a 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_logic.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_logic.c @@ -39,6 +39,7 @@ #include "lp_bld_type.h" #include "lp_bld_const.h" +#include "lp_bld_init.h" #include "lp_bld_intr.h" #include "lp_bld_debug.h" #include "lp_bld_logic.h" @@ -70,13 +71,14 @@ * The result values will be 0 for false or ~0 for true. */ LLVMValueRef -lp_build_compare(LLVMBuilderRef builder, +lp_build_compare(struct gallivm_state *gallivm, const struct lp_type type, unsigned func, LLVMValueRef a, LLVMValueRef b) { - LLVMTypeRef int_vec_type = lp_build_int_vec_type(type); + LLVMBuilderRef builder = gallivm->builder; + LLVMTypeRef int_vec_type = lp_build_int_vec_type(gallivm, type); LLVMValueRef zeros = LLVMConstNull(int_vec_type); LLVMValueRef ones = LLVMConstAllOnes(int_vec_type); LLVMValueRef cond; @@ -115,7 +117,7 @@ lp_build_compare(LLVMBuilderRef builder, if(type.width * type.length == 128) { if(type.floating && util_cpu_caps.has_sse) { /* float[4] comparison */ - LLVMTypeRef vec_type = lp_build_vec_type(type); + LLVMTypeRef vec_type = lp_build_vec_type(gallivm, type); LLVMValueRef args[3]; unsigned cc; boolean swap; @@ -144,7 +146,7 @@ lp_build_compare(LLVMBuilderRef builder, break; default: assert(0); - return lp_build_undef(type); + return lp_build_undef(gallivm, type); } if(swap) { @@ -156,7 +158,7 @@ lp_build_compare(LLVMBuilderRef builder, args[1] = b; } - args[2] = LLVMConstInt(LLVMInt8Type(), cc, 0); + args[2] = LLVMConstInt(LLVMInt8TypeInContext(gallivm->context), cc, 0); res = lp_build_intrinsic(builder, "llvm.x86.sse.cmp.ps", vec_type, @@ -185,7 +187,7 @@ lp_build_compare(LLVMBuilderRef builder, const char *pcmpgt; LLVMValueRef args[2]; LLVMValueRef res; - LLVMTypeRef vec_type = lp_build_vec_type(type); + LLVMTypeRef vec_type = lp_build_vec_type(gallivm, type); switch (type.width) { case 8: @@ -202,14 +204,14 @@ lp_build_compare(LLVMBuilderRef builder, break; default: assert(0); - return lp_build_undef(type); + return lp_build_undef(gallivm, type); } /* There are no unsigned comparison instructions. So flip the sign bit * so that the results match. */ if (table[func].gt && !type.sign) { - LLVMValueRef msb = lp_build_const_int_vec(type, (unsigned long long)1 << (type.width - 1)); + LLVMValueRef msb = lp_build_const_int_vec(gallivm, type, (unsigned long long)1 << (type.width - 1)); a = LLVMBuildXor(builder, a, msb, ""); b = LLVMBuildXor(builder, b, msb, ""); } @@ -270,7 +272,7 @@ lp_build_compare(LLVMBuilderRef builder, break; default: assert(0); - return lp_build_undef(type); + return lp_build_undef(gallivm, type); } #if HAVE_LLVM >= 0x0207 @@ -289,7 +291,7 @@ lp_build_compare(LLVMBuilderRef builder, debug_printf("%s: warning: using slow element-wise float" " vector comparison\n", __FUNCTION__); for (i = 0; i < type.length; ++i) { - LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0); + LLVMValueRef index = lp_build_const_int32(gallivm, i); cond = LLVMBuildFCmp(builder, op, LLVMBuildExtractElement(builder, a, index, ""), LLVMBuildExtractElement(builder, b, index, ""), @@ -326,7 +328,7 @@ lp_build_compare(LLVMBuilderRef builder, break; default: assert(0); - return lp_build_undef(type); + return lp_build_undef(gallivm, type); } #if HAVE_LLVM >= 0x0207 @@ -348,7 +350,7 @@ lp_build_compare(LLVMBuilderRef builder, } for(i = 0; i < type.length; ++i) { - LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0); + LLVMValueRef index = lp_build_const_int32(gallivm, i); cond = LLVMBuildICmp(builder, op, LLVMBuildExtractElement(builder, a, index, ""), LLVMBuildExtractElement(builder, b, index, ""), @@ -379,7 +381,7 @@ lp_build_cmp(struct lp_build_context *bld, LLVMValueRef a, LLVMValueRef b) { - return lp_build_compare(bld->builder, bld->type, func, a, b); + return lp_build_compare(bld->gallivm, bld->type, func, a, b); } @@ -392,6 +394,7 @@ lp_build_select_bitwise(struct lp_build_context *bld, LLVMValueRef a, LLVMValueRef b) { + LLVMBuilderRef builder = bld->gallivm->builder; struct lp_type type = bld->type; LLVMValueRef res; @@ -403,25 +406,25 @@ lp_build_select_bitwise(struct lp_build_context *bld, } if(type.floating) { - LLVMTypeRef int_vec_type = lp_build_int_vec_type(type); - a = LLVMBuildBitCast(bld->builder, a, int_vec_type, ""); - b = LLVMBuildBitCast(bld->builder, b, int_vec_type, ""); + LLVMTypeRef int_vec_type = lp_build_int_vec_type(bld->gallivm, type); + a = LLVMBuildBitCast(builder, a, int_vec_type, ""); + b = LLVMBuildBitCast(builder, b, int_vec_type, ""); } - a = LLVMBuildAnd(bld->builder, a, mask, ""); + a = LLVMBuildAnd(builder, a, mask, ""); /* This often gets translated to PANDN, but sometimes the NOT is * pre-computed and stored in another constant. The best strategy depends * on available registers, so it is not a big deal -- hopefully LLVM does * the right decision attending the rest of the program. */ - b = LLVMBuildAnd(bld->builder, b, LLVMBuildNot(bld->builder, mask, ""), ""); + b = LLVMBuildAnd(builder, b, LLVMBuildNot(builder, mask, ""), ""); - res = LLVMBuildOr(bld->builder, a, b, ""); + res = LLVMBuildOr(builder, a, b, ""); if(type.floating) { - LLVMTypeRef vec_type = lp_build_vec_type(type); - res = LLVMBuildBitCast(bld->builder, res, vec_type, ""); + LLVMTypeRef vec_type = lp_build_vec_type(bld->gallivm, type); + res = LLVMBuildBitCast(builder, res, vec_type, ""); } return res; @@ -440,6 +443,8 @@ lp_build_select(struct lp_build_context *bld, LLVMValueRef a, LLVMValueRef b) { + LLVMBuilderRef builder = bld->gallivm->builder; + LLVMContextRef lc = bld->gallivm->context; struct lp_type type = bld->type; LLVMValueRef res; @@ -450,8 +455,8 @@ lp_build_select(struct lp_build_context *bld, return a; if (type.length == 1) { - mask = LLVMBuildTrunc(bld->builder, mask, LLVMInt1Type(), ""); - res = LLVMBuildSelect(bld->builder, mask, a, b, ""); + mask = LLVMBuildTrunc(builder, mask, LLVMInt1TypeInContext(lc), ""); + res = LLVMBuildSelect(builder, mask, a, b, ""); } else if (util_cpu_caps.has_sse4_1 && type.width * type.length == 128 && @@ -465,34 +470,34 @@ lp_build_select(struct lp_build_context *bld, if (type.floating && type.width == 64) { intrinsic = "llvm.x86.sse41.blendvpd"; - arg_type = LLVMVectorType(LLVMDoubleType(), 2); + arg_type = LLVMVectorType(LLVMDoubleTypeInContext(lc), 2); } else if (type.floating && type.width == 32) { intrinsic = "llvm.x86.sse41.blendvps"; - arg_type = LLVMVectorType(LLVMFloatType(), 4); + arg_type = LLVMVectorType(LLVMFloatTypeInContext(lc), 4); } else { intrinsic = "llvm.x86.sse41.pblendvb"; - arg_type = LLVMVectorType(LLVMInt8Type(), 16); + arg_type = LLVMVectorType(LLVMInt8TypeInContext(lc), 16); } if (arg_type != bld->int_vec_type) { - mask = LLVMBuildBitCast(bld->builder, mask, arg_type, ""); + mask = LLVMBuildBitCast(builder, mask, arg_type, ""); } if (arg_type != bld->vec_type) { - a = LLVMBuildBitCast(bld->builder, a, arg_type, ""); - b = LLVMBuildBitCast(bld->builder, b, arg_type, ""); + a = LLVMBuildBitCast(builder, a, arg_type, ""); + b = LLVMBuildBitCast(builder, b, arg_type, ""); } args[0] = b; args[1] = a; args[2] = mask; - res = lp_build_intrinsic(bld->builder, intrinsic, + res = lp_build_intrinsic(builder, intrinsic, arg_type, args, Elements(args)); if (arg_type != bld->vec_type) { - res = LLVMBuildBitCast(bld->builder, res, bld->vec_type, ""); + res = LLVMBuildBitCast(builder, res, bld->vec_type, ""); } } else { @@ -514,6 +519,7 @@ lp_build_select_aos(struct lp_build_context *bld, LLVMValueRef a, LLVMValueRef b) { + LLVMBuilderRef builder = bld->gallivm->builder; const struct lp_type type = bld->type; const unsigned n = type.length; unsigned i, j; @@ -544,7 +550,7 @@ lp_build_select_aos(struct lp_build_context *bld, /* * Shuffle. */ - LLVMTypeRef elem_type = LLVMInt32Type(); + LLVMTypeRef elem_type = LLVMInt32TypeInContext(bld->gallivm->context); LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH]; for(j = 0; j < n; j += 4) @@ -553,7 +559,7 @@ lp_build_select_aos(struct lp_build_context *bld, (mask & (1 << i) ? 0 : n) + j + i, 0); - return LLVMBuildShuffleVector(bld->builder, a, b, LLVMConstVector(shuffles, n), ""); + return LLVMBuildShuffleVector(builder, a, b, LLVMConstVector(shuffles, n), ""); } else { #if 0 @@ -567,9 +573,9 @@ lp_build_select_aos(struct lp_build_context *bld, cond_vec[j + i] = LLVMConstInt(elem_type, mask & (1 << i) ? 1 : 0, 0); - return LLVMBuildSelect(bld->builder, LLVMConstVector(cond_vec, n), a, b, ""); + return LLVMBuildSelect(builder, LLVMConstVector(cond_vec, n), a, b, ""); #else - LLVMValueRef mask_vec = lp_build_const_mask_aos(type, mask); + LLVMValueRef mask_vec = lp_build_const_mask_aos(bld->gallivm, type, mask); return lp_build_select(bld, mask_vec, a, b); #endif } diff --git a/src/gallium/auxiliary/gallivm/lp_bld_logic.h b/src/gallium/auxiliary/gallivm/lp_bld_logic.h index 141fb92058a..ef33a653682 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_logic.h +++ b/src/gallium/auxiliary/gallivm/lp_bld_logic.h @@ -47,7 +47,7 @@ struct lp_build_context; LLVMValueRef -lp_build_compare(LLVMBuilderRef builder, +lp_build_compare(struct gallivm_state *gallivm, const struct lp_type type, unsigned func, LLVMValueRef a, diff --git a/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp b/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp index f56ddee7fd7..46dd00d8224 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp +++ b/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp @@ -144,6 +144,7 @@ lp_set_target_options(void) llvm::UnsafeFPMath = true; #endif +#if HAVE_LLVM < 0x0209 /* * LLVM will generate MMX instructions for vectors <= 64 bits, leading to * innefficient code, and in 32bit systems, to the corruption of the FPU @@ -162,6 +163,7 @@ lp_set_target_options(void) llvm::cl::ParseCommandLineOptions(2, const_cast<char**>(options)); first = FALSE; } +#endif /* * By default LLVM adds a signal handler to output a pretty stack trace. diff --git a/src/gallium/auxiliary/gallivm/lp_bld_pack.c b/src/gallium/auxiliary/gallivm/lp_bld_pack.c index f7eb7148ab8..fde6bb594f1 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_pack.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_pack.c @@ -72,6 +72,7 @@ #include "lp_bld_type.h" #include "lp_bld_const.h" +#include "lp_bld_init.h" #include "lp_bld_intr.h" #include "lp_bld_arit.h" #include "lp_bld_pack.h" @@ -81,7 +82,8 @@ * Build shuffle vectors that match PUNPCKLxx and PUNPCKHxx instructions. */ static LLVMValueRef -lp_build_const_unpack_shuffle(unsigned n, unsigned lo_hi) +lp_build_const_unpack_shuffle(struct gallivm_state *gallivm, + unsigned n, unsigned lo_hi) { LLVMValueRef elems[LP_MAX_VECTOR_LENGTH]; unsigned i, j; @@ -92,8 +94,8 @@ lp_build_const_unpack_shuffle(unsigned n, unsigned lo_hi) /* TODO: cache results in a static table */ for(i = 0, j = lo_hi*n/2; i < n; i += 2, ++j) { - elems[i + 0] = LLVMConstInt(LLVMInt32Type(), 0 + j, 0); - elems[i + 1] = LLVMConstInt(LLVMInt32Type(), n + j, 0); + elems[i + 0] = lp_build_const_int32(gallivm, 0 + j); + elems[i + 1] = lp_build_const_int32(gallivm, n + j); } return LLVMConstVector(elems, n); @@ -104,7 +106,7 @@ lp_build_const_unpack_shuffle(unsigned n, unsigned lo_hi) * Build shuffle vectors that match PACKxx instructions. */ static LLVMValueRef -lp_build_const_pack_shuffle(unsigned n) +lp_build_const_pack_shuffle(struct gallivm_state *gallivm, unsigned n) { LLVMValueRef elems[LP_MAX_VECTOR_LENGTH]; unsigned i; @@ -112,7 +114,7 @@ lp_build_const_pack_shuffle(unsigned n) assert(n <= LP_MAX_VECTOR_LENGTH); for(i = 0; i < n; ++i) - elems[i] = LLVMConstInt(LLVMInt32Type(), 2*i, 0); + elems[i] = lp_build_const_int32(gallivm, 2*i); return LLVMConstVector(elems, n); } @@ -124,7 +126,7 @@ lp_build_const_pack_shuffle(unsigned n) * Matches the PUNPCKLxx and PUNPCKHxx SSE instructions. */ LLVMValueRef -lp_build_interleave2(LLVMBuilderRef builder, +lp_build_interleave2(struct gallivm_state *gallivm, struct lp_type type, LLVMValueRef a, LLVMValueRef b, @@ -132,9 +134,9 @@ lp_build_interleave2(LLVMBuilderRef builder, { LLVMValueRef shuffle; - shuffle = lp_build_const_unpack_shuffle(type.length, lo_hi); + shuffle = lp_build_const_unpack_shuffle(gallivm, type.length, lo_hi); - return LLVMBuildShuffleVector(builder, a, b, shuffle, ""); + return LLVMBuildShuffleVector(gallivm->builder, a, b, shuffle, ""); } @@ -145,13 +147,14 @@ lp_build_interleave2(LLVMBuilderRef builder, * values themselves. */ void -lp_build_unpack2(LLVMBuilderRef builder, +lp_build_unpack2(struct gallivm_state *gallivm, struct lp_type src_type, struct lp_type dst_type, LLVMValueRef src, LLVMValueRef *dst_lo, LLVMValueRef *dst_hi) { + LLVMBuilderRef builder = gallivm->builder; LLVMValueRef msb; LLVMTypeRef dst_vec_type; @@ -162,24 +165,24 @@ lp_build_unpack2(LLVMBuilderRef builder, if(dst_type.sign && src_type.sign) { /* Replicate the sign bit in the most significant bits */ - msb = LLVMBuildAShr(builder, src, lp_build_const_int_vec(src_type, src_type.width - 1), ""); + msb = LLVMBuildAShr(builder, src, lp_build_const_int_vec(gallivm, src_type, src_type.width - 1), ""); } else /* Most significant bits always zero */ - msb = lp_build_zero(src_type); + msb = lp_build_zero(gallivm, src_type); /* Interleave bits */ #ifdef PIPE_ARCH_LITTLE_ENDIAN - *dst_lo = lp_build_interleave2(builder, src_type, src, msb, 0); - *dst_hi = lp_build_interleave2(builder, src_type, src, msb, 1); + *dst_lo = lp_build_interleave2(gallivm, src_type, src, msb, 0); + *dst_hi = lp_build_interleave2(gallivm, src_type, src, msb, 1); #else - *dst_lo = lp_build_interleave2(builder, src_type, msb, src, 0); - *dst_hi = lp_build_interleave2(builder, src_type, msb, src, 1); + *dst_lo = lp_build_interleave2(gallivm, src_type, msb, src, 0); + *dst_hi = lp_build_interleave2(gallivm, src_type, msb, src, 1); #endif /* Cast the result into the new type (twice as wide) */ - dst_vec_type = lp_build_vec_type(dst_type); + dst_vec_type = lp_build_vec_type(gallivm, dst_type); *dst_lo = LLVMBuildBitCast(builder, *dst_lo, dst_vec_type, ""); *dst_hi = LLVMBuildBitCast(builder, *dst_hi, dst_vec_type, ""); @@ -193,7 +196,7 @@ lp_build_unpack2(LLVMBuilderRef builder, * values themselves. */ void -lp_build_unpack(LLVMBuilderRef builder, +lp_build_unpack(struct gallivm_state *gallivm, struct lp_type src_type, struct lp_type dst_type, LLVMValueRef src, @@ -218,7 +221,7 @@ lp_build_unpack(LLVMBuilderRef builder, tmp_type.length /= 2; for(i = num_tmps; i--; ) { - lp_build_unpack2(builder, src_type, tmp_type, dst[i], &dst[2*i + 0], &dst[2*i + 1]); + lp_build_unpack2(gallivm, src_type, tmp_type, dst[i], &dst[2*i + 0], &dst[2*i + 1]); } src_type = tmp_type; @@ -247,16 +250,17 @@ lp_build_unpack(LLVMBuilderRef builder, * lp_build_packs2 instead. */ LLVMValueRef -lp_build_pack2(LLVMBuilderRef builder, +lp_build_pack2(struct gallivm_state *gallivm, struct lp_type src_type, struct lp_type dst_type, LLVMValueRef lo, LLVMValueRef hi) { + LLVMBuilderRef builder = gallivm->builder; #if HAVE_LLVM < 0x0207 - LLVMTypeRef src_vec_type = lp_build_vec_type(src_type); + LLVMTypeRef src_vec_type = lp_build_vec_type(gallivm, src_type); #endif - LLVMTypeRef dst_vec_type = lp_build_vec_type(dst_type); + LLVMTypeRef dst_vec_type = lp_build_vec_type(gallivm, dst_type); LLVMValueRef shuffle; LLVMValueRef res = NULL; @@ -318,7 +322,7 @@ lp_build_pack2(LLVMBuilderRef builder, lo = LLVMBuildBitCast(builder, lo, dst_vec_type, ""); hi = LLVMBuildBitCast(builder, hi, dst_vec_type, ""); - shuffle = lp_build_const_pack_shuffle(dst_type.length); + shuffle = lp_build_const_pack_shuffle(gallivm, dst_type.length); res = LLVMBuildShuffleVector(builder, lo, hi, shuffle, ""); @@ -334,7 +338,7 @@ lp_build_pack2(LLVMBuilderRef builder, * destination type. */ LLVMValueRef -lp_build_packs2(LLVMBuilderRef builder, +lp_build_packs2(struct gallivm_state *gallivm, struct lp_type src_type, struct lp_type dst_type, LLVMValueRef lo, @@ -360,14 +364,14 @@ lp_build_packs2(LLVMBuilderRef builder, if(clamp) { struct lp_build_context bld; unsigned dst_bits = dst_type.sign ? dst_type.width - 1 : dst_type.width; - LLVMValueRef dst_max = lp_build_const_int_vec(src_type, ((unsigned long long)1 << dst_bits) - 1); - lp_build_context_init(&bld, builder, src_type); + LLVMValueRef dst_max = lp_build_const_int_vec(gallivm, src_type, ((unsigned long long)1 << dst_bits) - 1); + lp_build_context_init(&bld, gallivm, src_type); lo = lp_build_min(&bld, lo, dst_max); hi = lp_build_min(&bld, hi, dst_max); /* FIXME: What about lower bound? */ } - return lp_build_pack2(builder, src_type, dst_type, lo, hi); + return lp_build_pack2(gallivm, src_type, dst_type, lo, hi); } @@ -377,13 +381,13 @@ lp_build_packs2(LLVMBuilderRef builder, * TODO: Handle saturation consistently. */ LLVMValueRef -lp_build_pack(LLVMBuilderRef builder, +lp_build_pack(struct gallivm_state *gallivm, struct lp_type src_type, struct lp_type dst_type, boolean clamped, const LLVMValueRef *src, unsigned num_srcs) { - LLVMValueRef (*pack2)(LLVMBuilderRef builder, + LLVMValueRef (*pack2)(struct gallivm_state *gallivm, struct lp_type src_type, struct lp_type dst_type, LLVMValueRef lo, @@ -419,7 +423,8 @@ lp_build_pack(LLVMBuilderRef builder, num_srcs /= 2; for(i = 0; i < num_srcs; ++i) - tmp[i] = pack2(builder, src_type, tmp_type, tmp[2*i + 0], tmp[2*i + 1]); + tmp[i] = pack2(gallivm, src_type, tmp_type, + tmp[2*i + 0], tmp[2*i + 1]); src_type = tmp_type; } @@ -437,12 +442,13 @@ lp_build_pack(LLVMBuilderRef builder, * intrinsics that do saturation. */ void -lp_build_resize(LLVMBuilderRef builder, +lp_build_resize(struct gallivm_state *gallivm, struct lp_type src_type, struct lp_type dst_type, const LLVMValueRef *src, unsigned num_srcs, LLVMValueRef *dst, unsigned num_dsts) { + LLVMBuilderRef builder = gallivm->builder; LLVMValueRef tmp[LP_MAX_VECTOR_LENGTH]; unsigned i; @@ -482,7 +488,7 @@ lp_build_resize(LLVMBuilderRef builder, * Register width remains constant -- use vector packing intrinsics */ - tmp[0] = lp_build_pack(builder, src_type, dst_type, TRUE, src, num_srcs); + tmp[0] = lp_build_pack(gallivm, src_type, dst_type, TRUE, src, num_srcs); } else { /* @@ -490,11 +496,11 @@ lp_build_resize(LLVMBuilderRef builder, */ assert(src_type.length == dst_type.length); - tmp[0] = lp_build_undef(dst_type); + tmp[0] = lp_build_undef(gallivm, dst_type); for (i = 0; i < dst_type.length; ++i) { - LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0); + LLVMValueRef index = lp_build_const_int32(gallivm, i); LLVMValueRef val = LLVMBuildExtractElement(builder, src[0], index, ""); - val = LLVMBuildTrunc(builder, val, lp_build_elem_type(dst_type), ""); + val = LLVMBuildTrunc(builder, val, lp_build_elem_type(gallivm, dst_type), ""); tmp[0] = LLVMBuildInsertElement(builder, tmp[0], val, index, ""); } } @@ -510,7 +516,7 @@ lp_build_resize(LLVMBuilderRef builder, /* * Register width remains constant -- use vector unpack intrinsics */ - lp_build_unpack(builder, src_type, dst_type, src[0], tmp, num_dsts); + lp_build_unpack(gallivm, src_type, dst_type, src[0], tmp, num_dsts); } else { /* @@ -518,15 +524,15 @@ lp_build_resize(LLVMBuilderRef builder, */ assert(src_type.length == dst_type.length); - tmp[0] = lp_build_undef(dst_type); + tmp[0] = lp_build_undef(gallivm, dst_type); for (i = 0; i < dst_type.length; ++i) { - LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0); + LLVMValueRef index = lp_build_const_int32(gallivm, i); LLVMValueRef val = LLVMBuildExtractElement(builder, src[0], index, ""); if (src_type.sign && dst_type.sign) { - val = LLVMBuildSExt(builder, val, lp_build_elem_type(dst_type), ""); + val = LLVMBuildSExt(builder, val, lp_build_elem_type(gallivm, dst_type), ""); } else { - val = LLVMBuildZExt(builder, val, lp_build_elem_type(dst_type), ""); + val = LLVMBuildZExt(builder, val, lp_build_elem_type(gallivm, dst_type), ""); } tmp[0] = LLVMBuildInsertElement(builder, tmp[0], val, index, ""); } diff --git a/src/gallium/auxiliary/gallivm/lp_bld_pack.h b/src/gallium/auxiliary/gallivm/lp_bld_pack.h index e947b90d164..d58da4f01b3 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_pack.h +++ b/src/gallium/auxiliary/gallivm/lp_bld_pack.h @@ -46,7 +46,7 @@ struct lp_type; LLVMValueRef -lp_build_interleave2(LLVMBuilderRef builder, +lp_build_interleave2(struct gallivm_state *gallivm, struct lp_type type, LLVMValueRef a, LLVMValueRef b, @@ -54,7 +54,7 @@ lp_build_interleave2(LLVMBuilderRef builder, void -lp_build_unpack2(LLVMBuilderRef builder, +lp_build_unpack2(struct gallivm_state *gallivm, struct lp_type src_type, struct lp_type dst_type, LLVMValueRef src, @@ -63,7 +63,7 @@ lp_build_unpack2(LLVMBuilderRef builder, void -lp_build_unpack(LLVMBuilderRef builder, +lp_build_unpack(struct gallivm_state *gallivm, struct lp_type src_type, struct lp_type dst_type, LLVMValueRef src, @@ -71,7 +71,7 @@ lp_build_unpack(LLVMBuilderRef builder, LLVMValueRef -lp_build_packs2(LLVMBuilderRef builder, +lp_build_packs2(struct gallivm_state *gallivm, struct lp_type src_type, struct lp_type dst_type, LLVMValueRef lo, @@ -79,7 +79,7 @@ lp_build_packs2(LLVMBuilderRef builder, LLVMValueRef -lp_build_pack2(LLVMBuilderRef builder, +lp_build_pack2(struct gallivm_state *gallivm, struct lp_type src_type, struct lp_type dst_type, LLVMValueRef lo, @@ -87,7 +87,7 @@ lp_build_pack2(LLVMBuilderRef builder, LLVMValueRef -lp_build_pack(LLVMBuilderRef builder, +lp_build_pack(struct gallivm_state *gallivm, struct lp_type src_type, struct lp_type dst_type, boolean clamped, @@ -95,7 +95,7 @@ lp_build_pack(LLVMBuilderRef builder, void -lp_build_resize(LLVMBuilderRef builder, +lp_build_resize(struct gallivm_state *gallivm, struct lp_type src_type, struct lp_type dst_type, const LLVMValueRef *src, unsigned num_srcs, diff --git a/src/gallium/auxiliary/gallivm/lp_bld_printf.c b/src/gallium/auxiliary/gallivm/lp_bld_printf.c index f418e96aff4..60cc6094f5a 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_printf.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_printf.c @@ -31,6 +31,8 @@ #include "util/u_memory.h" #include "util/u_string.h" #include "lp_bld_const.h" +#include "lp_bld_init.h" +#include "lp_bld_const.h" #include "lp_bld_printf.h" @@ -65,12 +67,14 @@ lp_get_printf_arg_count(const char *fmt) } LLVMValueRef -lp_build_const_string_variable(LLVMModuleRef module, const char *str, int len) +lp_build_const_string_variable(LLVMModuleRef module, + LLVMContextRef context, + const char *str, int len) { - LLVMValueRef string = LLVMAddGlobal(module, LLVMArrayType(LLVMInt8Type(), len + 1), ""); + LLVMValueRef string = LLVMAddGlobal(module, LLVMArrayType(LLVMInt8TypeInContext(context), len + 1), ""); LLVMSetGlobalConstant(string, TRUE); LLVMSetLinkage(string, LLVMInternalLinkage); - LLVMSetInitializer(string, LLVMConstString(str, len + 1, TRUE)); + LLVMSetInitializer(string, LLVMConstStringInContext(context, str, len + 1, TRUE)); return string; } @@ -83,15 +87,18 @@ lp_build_const_string_variable(LLVMModuleRef module, const char *str, int len) * LLVMValueRef. */ LLVMValueRef -lp_build_printf(LLVMBuilderRef builder, const char *fmt, ...) +lp_build_printf(struct gallivm_state *gallivm, const char *fmt, ...) { va_list arglist; int i = 0; int argcount = lp_get_printf_arg_count(fmt); - LLVMModuleRef module = LLVMGetGlobalParent(LLVMGetBasicBlockParent(LLVMGetInsertBlock(builder))); + LLVMBuilderRef builder = gallivm->builder; + LLVMContextRef context = gallivm->context; + LLVMModuleRef module = gallivm->module; LLVMValueRef params[50]; - LLVMValueRef fmtarg = lp_build_const_string_variable(module, fmt, strlen(fmt) + 1); - LLVMValueRef int0 = LLVMConstInt(LLVMInt32Type(), 0, 0); + LLVMValueRef fmtarg = lp_build_const_string_variable(module, context, + fmt, strlen(fmt) + 1); + LLVMValueRef int0 = lp_build_const_int32(gallivm, 0); LLVMValueRef index[2]; LLVMValueRef func_printf = LLVMGetNamedFunction(module, "printf"); @@ -100,7 +107,7 @@ lp_build_printf(LLVMBuilderRef builder, const char *fmt, ...) index[0] = index[1] = int0; if (!func_printf) { - LLVMTypeRef printf_type = LLVMFunctionType(LLVMIntType(32), NULL, 0, 1); + LLVMTypeRef printf_type = LLVMFunctionType(LLVMIntTypeInContext(context, 32), NULL, 0, 1); func_printf = LLVMAddFunction(module, "printf", printf_type); } @@ -113,7 +120,7 @@ lp_build_printf(LLVMBuilderRef builder, const char *fmt, ...) /* printf wants doubles, so lets convert so that * we can actually print them */ if (LLVMGetTypeKind(type) == LLVMFloatTypeKind) - val = LLVMBuildFPExt(builder, val, LLVMDoubleType(), ""); + val = LLVMBuildFPExt(builder, val, LLVMDoubleTypeInContext(context), ""); params[i] = val; } va_end(arglist); @@ -127,16 +134,18 @@ lp_build_printf(LLVMBuilderRef builder, const char *fmt, ...) * Print a float[4] vector. */ LLVMValueRef -lp_build_print_vec4(LLVMBuilderRef builder, const char *msg, LLVMValueRef vec) +lp_build_print_vec4(struct gallivm_state *gallivm, + const char *msg, LLVMValueRef vec) { + LLVMBuilderRef builder = gallivm->builder; char format[1000]; LLVMValueRef x, y, z, w; - x = LLVMBuildExtractElement(builder, vec, lp_build_const_int32(0), ""); - y = LLVMBuildExtractElement(builder, vec, lp_build_const_int32(1), ""); - z = LLVMBuildExtractElement(builder, vec, lp_build_const_int32(2), ""); - w = LLVMBuildExtractElement(builder, vec, lp_build_const_int32(3), ""); + x = LLVMBuildExtractElement(builder, vec, lp_build_const_int32(gallivm, 0), ""); + y = LLVMBuildExtractElement(builder, vec, lp_build_const_int32(gallivm, 1), ""); + z = LLVMBuildExtractElement(builder, vec, lp_build_const_int32(gallivm, 2), ""); + w = LLVMBuildExtractElement(builder, vec, lp_build_const_int32(gallivm, 3), ""); util_snprintf(format, sizeof(format), "%s %%f %%f %%f %%f\n", msg); - return lp_build_printf(builder, format, x, y, z, w); + return lp_build_printf(gallivm, format, x, y, z, w); } diff --git a/src/gallium/auxiliary/gallivm/lp_bld_printf.h b/src/gallium/auxiliary/gallivm/lp_bld_printf.h index b6222c62ebe..f6bb8348699 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_printf.h +++ b/src/gallium/auxiliary/gallivm/lp_bld_printf.h @@ -31,12 +31,19 @@ #include "pipe/p_compiler.h" #include "lp_bld.h" +#include "lp_bld_init.h" -LLVMValueRef lp_build_const_string_variable(LLVMModuleRef module, const char *str, int len); -LLVMValueRef lp_build_printf(LLVMBuilderRef builder, const char *fmt, ...); + +LLVMValueRef lp_build_const_string_variable(LLVMModuleRef module, + LLVMContextRef context, + const char *str, int len); + +LLVMValueRef lp_build_printf(struct gallivm_state *gallivm, + const char *fmt, ...); LLVMValueRef -lp_build_print_vec4(LLVMBuilderRef builder, const char *msg, LLVMValueRef vec); +lp_build_print_vec4(struct gallivm_state *gallivm, + const char *msg, LLVMValueRef vec); #endif diff --git a/src/gallium/auxiliary/gallivm/lp_bld_quad.c b/src/gallium/auxiliary/gallivm/lp_bld_quad.c index c18c8b47100..b0a5bc0267f 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_quad.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_quad.c @@ -28,6 +28,7 @@ #include "lp_bld_type.h" #include "lp_bld_arit.h" +#include "lp_bld_const.h" #include "lp_bld_swizzle.h" #include "lp_bld_quad.h" @@ -81,15 +82,15 @@ LLVMValueRef lp_build_scalar_ddx(struct lp_build_context *bld, LLVMValueRef a) { - LLVMTypeRef i32t = LLVMInt32Type(); - LLVMValueRef idx_left = LLVMConstInt(i32t, LP_BLD_QUAD_TOP_LEFT, 0); - LLVMValueRef idx_right = LLVMConstInt(i32t, LP_BLD_QUAD_TOP_RIGHT, 0); - LLVMValueRef a_left = LLVMBuildExtractElement(bld->builder, a, idx_left, "left"); - LLVMValueRef a_right = LLVMBuildExtractElement(bld->builder, a, idx_right, "right"); + LLVMBuilderRef builder = bld->gallivm->builder; + LLVMValueRef idx_left = lp_build_const_int32(bld->gallivm, LP_BLD_QUAD_TOP_LEFT); + LLVMValueRef idx_right = lp_build_const_int32(bld->gallivm, LP_BLD_QUAD_TOP_RIGHT); + LLVMValueRef a_left = LLVMBuildExtractElement(builder, a, idx_left, "left"); + LLVMValueRef a_right = LLVMBuildExtractElement(builder, a, idx_right, "right"); if (bld->type.floating) - return LLVMBuildFSub(bld->builder, a_right, a_left, "ddx"); + return LLVMBuildFSub(builder, a_right, a_left, "ddx"); else - return LLVMBuildSub(bld->builder, a_right, a_left, "ddx"); + return LLVMBuildSub(builder, a_right, a_left, "ddx"); } @@ -97,13 +98,13 @@ LLVMValueRef lp_build_scalar_ddy(struct lp_build_context *bld, LLVMValueRef a) { - LLVMTypeRef i32t = LLVMInt32Type(); - LLVMValueRef idx_top = LLVMConstInt(i32t, LP_BLD_QUAD_TOP_LEFT, 0); - LLVMValueRef idx_bottom = LLVMConstInt(i32t, LP_BLD_QUAD_BOTTOM_LEFT, 0); - LLVMValueRef a_top = LLVMBuildExtractElement(bld->builder, a, idx_top, "top"); - LLVMValueRef a_bottom = LLVMBuildExtractElement(bld->builder, a, idx_bottom, "bottom"); + LLVMBuilderRef builder = bld->gallivm->builder; + LLVMValueRef idx_top = lp_build_const_int32(bld->gallivm, LP_BLD_QUAD_TOP_LEFT); + LLVMValueRef idx_bottom = lp_build_const_int32(bld->gallivm, LP_BLD_QUAD_BOTTOM_LEFT); + LLVMValueRef a_top = LLVMBuildExtractElement(builder, a, idx_top, "top"); + LLVMValueRef a_bottom = LLVMBuildExtractElement(builder, a, idx_bottom, "bottom"); if (bld->type.floating) - return LLVMBuildFSub(bld->builder, a_bottom, a_top, "ddy"); + return LLVMBuildFSub(builder, a_bottom, a_top, "ddy"); else - return LLVMBuildSub(bld->builder, a_bottom, a_top, "ddy"); + return LLVMBuildSub(builder, a_bottom, a_top, "ddy"); } diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample.c b/src/gallium/auxiliary/gallivm/lp_bld_sample.c index 844d1d935b5..8ad34598a92 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_sample.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_sample.c @@ -134,7 +134,7 @@ lp_sampler_static_state(struct lp_sampler_static_state *state, state->min_img_filter = sampler->min_img_filter; state->mag_img_filter = sampler->mag_img_filter; - if (view->last_level && sampler->max_lod > 0.0f) { + if (view->u.tex.last_level && sampler->max_lod > 0.0f) { state->min_mip_filter = sampler->min_mip_filter; } else { state->min_mip_filter = PIPE_TEX_MIPFILTER_NONE; @@ -155,7 +155,7 @@ lp_sampler_static_state(struct lp_sampler_static_state *state, state->apply_min_lod = 1; } - if (sampler->max_lod < (float)view->last_level) { + if (sampler->max_lod < (float)view->u.tex.last_level) { state->apply_max_lod = 1; } } @@ -190,7 +190,8 @@ lp_build_rho(struct lp_build_sample_context *bld, struct lp_build_context *float_size_bld = &bld->float_size_bld; struct lp_build_context *float_bld = &bld->float_bld; const unsigned dims = bld->dims; - LLVMTypeRef i32t = LLVMInt32Type(); + LLVMBuilderRef builder = bld->gallivm->builder; + LLVMTypeRef i32t = LLVMInt32TypeInContext(bld->gallivm->context); LLVMValueRef index0 = LLVMConstInt(i32t, 0, 0); LLVMValueRef index1 = LLVMConstInt(i32t, 1, 0); LLVMValueRef index2 = LLVMConstInt(i32t, 2, 0); @@ -211,21 +212,21 @@ lp_build_rho(struct lp_build_sample_context *bld, rho_x = float_size_bld->undef; rho_y = float_size_bld->undef; - rho_x = LLVMBuildInsertElement(bld->builder, rho_x, dsdx, index0, ""); - rho_y = LLVMBuildInsertElement(bld->builder, rho_y, dsdy, index0, ""); + rho_x = LLVMBuildInsertElement(builder, rho_x, dsdx, index0, ""); + rho_y = LLVMBuildInsertElement(builder, rho_y, dsdy, index0, ""); dtdx = ddx[1]; dtdy = ddy[1]; - rho_x = LLVMBuildInsertElement(bld->builder, rho_x, dtdx, index1, ""); - rho_y = LLVMBuildInsertElement(bld->builder, rho_y, dtdy, index1, ""); + rho_x = LLVMBuildInsertElement(builder, rho_x, dtdx, index1, ""); + rho_y = LLVMBuildInsertElement(builder, rho_y, dtdy, index1, ""); if (dims >= 3) { drdx = ddx[2]; drdy = ddy[2]; - rho_x = LLVMBuildInsertElement(bld->builder, rho_x, drdx, index2, ""); - rho_y = LLVMBuildInsertElement(bld->builder, rho_y, drdy, index2, ""); + rho_x = LLVMBuildInsertElement(builder, rho_x, drdx, index2, ""); + rho_y = LLVMBuildInsertElement(builder, rho_y, drdy, index2, ""); } } @@ -245,13 +246,13 @@ lp_build_rho(struct lp_build_sample_context *bld, if (dims >= 2) { LLVMValueRef rho_s, rho_t, rho_r; - rho_s = LLVMBuildExtractElement(bld->builder, rho_vec, index0, ""); - rho_t = LLVMBuildExtractElement(bld->builder, rho_vec, index1, ""); + rho_s = LLVMBuildExtractElement(builder, rho_vec, index0, ""); + rho_t = LLVMBuildExtractElement(builder, rho_vec, index1, ""); rho = lp_build_max(float_bld, rho_s, rho_t); if (dims >= 3) { - rho_r = LLVMBuildExtractElement(bld->builder, rho_vec, index0, ""); + rho_r = LLVMBuildExtractElement(builder, rho_vec, index0, ""); rho = lp_build_max(float_bld, rho, rho_r); } } @@ -304,19 +305,19 @@ lp_build_brilinear_lod(struct lp_build_context *bld, double post_offset = 1 - factor; if (0) { - lp_build_printf(bld->builder, "lod = %f\n", lod); + lp_build_printf(bld->gallivm, "lod = %f\n", lod); } lod = lp_build_add(bld, lod, - lp_build_const_vec(bld->type, pre_offset)); + lp_build_const_vec(bld->gallivm, bld->type, pre_offset)); lp_build_ifloor_fract(bld, lod, out_lod_ipart, &lod_fpart); lod_fpart = lp_build_mul(bld, lod_fpart, - lp_build_const_vec(bld->type, factor)); + lp_build_const_vec(bld->gallivm, bld->type, factor)); lod_fpart = lp_build_add(bld, lod_fpart, - lp_build_const_vec(bld->type, post_offset)); + lp_build_const_vec(bld->gallivm, bld->type, post_offset)); /* * It's not necessary to clamp lod_fpart since: @@ -327,8 +328,8 @@ lp_build_brilinear_lod(struct lp_build_context *bld, *out_lod_fpart = lod_fpart; if (0) { - lp_build_printf(bld->builder, "lod_ipart = %i\n", *out_lod_ipart); - lp_build_printf(bld->builder, "lod_fpart = %f\n\n", *out_lod_fpart); + lp_build_printf(bld->gallivm, "lod_ipart = %i\n", *out_lod_ipart); + lp_build_printf(bld->gallivm, "lod_fpart = %f\n\n", *out_lod_fpart); } } @@ -363,7 +364,7 @@ lp_build_brilinear_rho(struct lp_build_context *bld, * part will not need any post adjustments. */ rho = lp_build_mul(bld, rho, - lp_build_const_vec(bld->type, pre_factor)); + lp_build_const_vec(bld->gallivm, bld->type, pre_factor)); /* ipart = ifloor(log2(rho)) */ lod_ipart = lp_build_extract_exponent(bld, rho, 0); @@ -372,10 +373,10 @@ lp_build_brilinear_rho(struct lp_build_context *bld, lod_fpart = lp_build_extract_mantissa(bld, rho); lod_fpart = lp_build_mul(bld, lod_fpart, - lp_build_const_vec(bld->type, factor)); + lp_build_const_vec(bld->gallivm, bld->type, factor)); lod_fpart = lp_build_add(bld, lod_fpart, - lp_build_const_vec(bld->type, post_offset)); + lp_build_const_vec(bld->gallivm, bld->type, post_offset)); /* * Like lp_build_brilinear_lod, it's not necessary to clamp lod_fpart since: @@ -413,6 +414,7 @@ lp_build_lod_selector(struct lp_build_sample_context *bld, LLVMValueRef *out_lod_fpart) { + LLVMBuilderRef builder = bld->gallivm->builder; struct lp_build_context *float_bld = &bld->float_bld; LLVMValueRef lod; @@ -424,17 +426,17 @@ lp_build_lod_selector(struct lp_build_sample_context *bld, * This is hit during mipmap generation. */ LLVMValueRef min_lod = - bld->dynamic_state->min_lod(bld->dynamic_state, bld->builder, unit); + bld->dynamic_state->min_lod(bld->dynamic_state, bld->gallivm, unit); lod = min_lod; } else { LLVMValueRef sampler_lod_bias = - bld->dynamic_state->lod_bias(bld->dynamic_state, bld->builder, unit); - LLVMValueRef index0 = LLVMConstInt(LLVMInt32Type(), 0, 0); + bld->dynamic_state->lod_bias(bld->dynamic_state, bld->gallivm, unit); + LLVMValueRef index0 = lp_build_const_int32(bld->gallivm, 0); if (explicit_lod) { - lod = LLVMBuildExtractElement(bld->builder, explicit_lod, + lod = LLVMBuildExtractElement(builder, explicit_lod, index0, ""); } else { @@ -479,27 +481,27 @@ lp_build_lod_selector(struct lp_build_sample_context *bld, /* add shader lod bias */ if (lod_bias) { - lod_bias = LLVMBuildExtractElement(bld->builder, lod_bias, + lod_bias = LLVMBuildExtractElement(builder, lod_bias, index0, ""); - lod = LLVMBuildFAdd(bld->builder, lod, lod_bias, "shader_lod_bias"); + lod = LLVMBuildFAdd(builder, lod, lod_bias, "shader_lod_bias"); } } /* add sampler lod bias */ if (bld->static_state->lod_bias_non_zero) - lod = LLVMBuildFAdd(bld->builder, lod, sampler_lod_bias, "sampler_lod_bias"); + lod = LLVMBuildFAdd(builder, lod, sampler_lod_bias, "sampler_lod_bias"); /* clamp lod */ if (bld->static_state->apply_max_lod) { LLVMValueRef max_lod = - bld->dynamic_state->max_lod(bld->dynamic_state, bld->builder, unit); + bld->dynamic_state->max_lod(bld->dynamic_state, bld->gallivm, unit); lod = lp_build_min(float_bld, lod, max_lod); } if (bld->static_state->apply_min_lod) { LLVMValueRef min_lod = - bld->dynamic_state->min_lod(bld->dynamic_state, bld->builder, unit); + bld->dynamic_state->min_lod(bld->dynamic_state, bld->gallivm, unit); lod = lp_build_max(float_bld, lod, min_lod); } @@ -542,10 +544,10 @@ lp_build_nearest_mip_level(struct lp_build_sample_context *bld, struct lp_build_context *int_bld = &bld->int_bld; LLVMValueRef last_level, level; - LLVMValueRef zero = LLVMConstInt(LLVMInt32Type(), 0, 0); + LLVMValueRef zero = lp_build_const_int32(bld->gallivm, 0); last_level = bld->dynamic_state->last_level(bld->dynamic_state, - bld->builder, unit); + bld->gallivm, unit); /* convert float lod to integer */ level = lod_ipart; @@ -568,7 +570,7 @@ lp_build_linear_mip_levels(struct lp_build_sample_context *bld, LLVMValueRef *level0_out, LLVMValueRef *level1_out) { - LLVMBuilderRef builder = bld->builder; + LLVMBuilderRef builder = bld->gallivm->builder; struct lp_build_context *int_bld = &bld->int_bld; struct lp_build_context *float_bld = &bld->float_bld; LLVMValueRef last_level; @@ -579,7 +581,7 @@ lp_build_linear_mip_levels(struct lp_build_sample_context *bld, *level1_out = lp_build_add(int_bld, lod_ipart, int_bld->one); last_level = bld->dynamic_state->last_level(bld->dynamic_state, - bld->builder, unit); + bld->gallivm, unit); /* * Clamp both lod_ipart and lod_ipart + 1 to [0, last_level], with the @@ -630,11 +632,13 @@ LLVMValueRef lp_build_get_mipmap_level(struct lp_build_sample_context *bld, LLVMValueRef level) { + LLVMBuilderRef builder = bld->gallivm->builder; LLVMValueRef indexes[2], data_ptr; - indexes[0] = LLVMConstInt(LLVMInt32Type(), 0, 0); + + indexes[0] = lp_build_const_int32(bld->gallivm, 0); indexes[1] = level; - data_ptr = LLVMBuildGEP(bld->builder, bld->data_array, indexes, 2, ""); - data_ptr = LLVMBuildLoad(bld->builder, data_ptr, ""); + data_ptr = LLVMBuildGEP(builder, bld->data_array, indexes, 2, ""); + data_ptr = LLVMBuildLoad(builder, data_ptr, ""); return data_ptr; } @@ -643,7 +647,7 @@ LLVMValueRef lp_build_get_const_mipmap_level(struct lp_build_sample_context *bld, int level) { - LLVMValueRef lvl = LLVMConstInt(LLVMInt32Type(), level, 0); + LLVMValueRef lvl = lp_build_const_int32(bld->gallivm, level); return lp_build_get_mipmap_level(bld, lvl); } @@ -657,6 +661,7 @@ lp_build_minify(struct lp_build_context *bld, LLVMValueRef base_size, LLVMValueRef level) { + LLVMBuilderRef builder = bld->gallivm->builder; assert(lp_check_value(bld->type, base_size)); assert(lp_check_value(bld->type, level)); @@ -666,7 +671,7 @@ lp_build_minify(struct lp_build_context *bld, } else { LLVMValueRef size = - LLVMBuildLShr(bld->builder, base_size, level, "minify"); + LLVMBuildLShr(builder, base_size, level, "minify"); assert(bld->type.sign); size = lp_build_max(bld, size, bld->one); return size; @@ -682,11 +687,12 @@ static LLVMValueRef lp_build_get_level_stride_vec(struct lp_build_sample_context *bld, LLVMValueRef stride_array, LLVMValueRef level) { + LLVMBuilderRef builder = bld->gallivm->builder; LLVMValueRef indexes[2], stride; - indexes[0] = LLVMConstInt(LLVMInt32Type(), 0, 0); + indexes[0] = lp_build_const_int32(bld->gallivm, 0); indexes[1] = level; - stride = LLVMBuildGEP(bld->builder, stride_array, indexes, 2, ""); - stride = LLVMBuildLoad(bld->builder, stride, ""); + stride = LLVMBuildGEP(builder, stride_array, indexes, 2, ""); + stride = LLVMBuildLoad(builder, stride, ""); stride = lp_build_broadcast_scalar(&bld->int_coord_bld, stride); return stride; } @@ -747,21 +753,21 @@ lp_build_extract_image_sizes(struct lp_build_sample_context *bld, LLVMValueRef *out_depth) { const unsigned dims = bld->dims; - LLVMTypeRef i32t = LLVMInt32Type(); + LLVMTypeRef i32t = LLVMInt32TypeInContext(bld->gallivm->context); - *out_width = lp_build_extract_broadcast(bld->builder, + *out_width = lp_build_extract_broadcast(bld->gallivm, size_type, coord_type, size, LLVMConstInt(i32t, 0, 0)); if (dims >= 2) { - *out_height = lp_build_extract_broadcast(bld->builder, + *out_height = lp_build_extract_broadcast(bld->gallivm, size_type, coord_type, size, LLVMConstInt(i32t, 1, 0)); if (dims == 3) { - *out_depth = lp_build_extract_broadcast(bld->builder, + *out_depth = lp_build_extract_broadcast(bld->gallivm, size_type, coord_type, size, @@ -812,7 +818,7 @@ static LLVMValueRef lp_build_cube_ima(struct lp_build_context *coord_bld, LLVMValueRef coord) { /* ima = -0.5 / abs(coord); */ - LLVMValueRef negHalf = lp_build_const_vec(coord_bld->type, -0.5); + LLVMValueRef negHalf = lp_build_const_vec(coord_bld->gallivm, coord_bld->type, -0.5); LLVMValueRef absCoord = lp_build_abs(coord_bld, coord); LLVMValueRef ima = lp_build_div(coord_bld, negHalf, absCoord); return ima; @@ -831,7 +837,7 @@ lp_build_cube_coord(struct lp_build_context *coord_bld, LLVMValueRef coord, LLVMValueRef ima) { /* return negate(coord) * ima * sign + 0.5; */ - LLVMValueRef half = lp_build_const_vec(coord_bld->type, 0.5); + LLVMValueRef half = lp_build_const_vec(coord_bld->gallivm, coord_bld->type, 0.5); LLVMValueRef res; assert(negate_coord == +1 || negate_coord == -1); @@ -859,12 +865,14 @@ lp_build_cube_face(struct lp_build_sample_context *bld, LLVMValueRef major_coord, unsigned pos_face, unsigned neg_face) { - LLVMValueRef cmp = LLVMBuildFCmp(bld->builder, LLVMRealUGE, + struct gallivm_state *gallivm = bld->gallivm; + LLVMBuilderRef builder = gallivm->builder; + LLVMValueRef cmp = LLVMBuildFCmp(builder, LLVMRealUGE, major_coord, bld->float_bld.zero, ""); - LLVMValueRef pos = LLVMConstInt(LLVMInt32Type(), pos_face, 0); - LLVMValueRef neg = LLVMConstInt(LLVMInt32Type(), neg_face, 0); - LLVMValueRef res = LLVMBuildSelect(bld->builder, cmp, pos, neg, ""); + LLVMValueRef pos = lp_build_const_int32(gallivm, pos_face); + LLVMValueRef neg = lp_build_const_int32(gallivm, neg_face); + LLVMValueRef res = LLVMBuildSelect(builder, cmp, pos, neg, ""); return res; } @@ -884,9 +892,10 @@ lp_build_cube_lookup(struct lp_build_sample_context *bld, { struct lp_build_context *float_bld = &bld->float_bld; struct lp_build_context *coord_bld = &bld->coord_bld; + LLVMBuilderRef builder = bld->gallivm->builder; LLVMValueRef rx, ry, rz; LLVMValueRef arx, ary, arz; - LLVMValueRef c25 = LLVMConstReal(LLVMFloatType(), 0.25); + LLVMValueRef c25 = lp_build_const_float(bld->gallivm, 0.25); LLVMValueRef arx_ge_ary, arx_ge_arz; LLVMValueRef ary_ge_arx, ary_ge_arz; LLVMValueRef arx_ge_ary_arz, ary_ge_arx_arz; @@ -911,17 +920,17 @@ lp_build_cube_lookup(struct lp_build_sample_context *bld, /* * Compare sign/magnitude of rx,ry,rz to determine face */ - arx_ge_ary = LLVMBuildFCmp(bld->builder, LLVMRealUGE, arx, ary, ""); - arx_ge_arz = LLVMBuildFCmp(bld->builder, LLVMRealUGE, arx, arz, ""); - ary_ge_arx = LLVMBuildFCmp(bld->builder, LLVMRealUGE, ary, arx, ""); - ary_ge_arz = LLVMBuildFCmp(bld->builder, LLVMRealUGE, ary, arz, ""); + arx_ge_ary = LLVMBuildFCmp(builder, LLVMRealUGE, arx, ary, ""); + arx_ge_arz = LLVMBuildFCmp(builder, LLVMRealUGE, arx, arz, ""); + ary_ge_arx = LLVMBuildFCmp(builder, LLVMRealUGE, ary, arx, ""); + ary_ge_arz = LLVMBuildFCmp(builder, LLVMRealUGE, ary, arz, ""); - arx_ge_ary_arz = LLVMBuildAnd(bld->builder, arx_ge_ary, arx_ge_arz, ""); - ary_ge_arx_arz = LLVMBuildAnd(bld->builder, ary_ge_arx, ary_ge_arz, ""); + arx_ge_ary_arz = LLVMBuildAnd(builder, arx_ge_ary, arx_ge_arz, ""); + ary_ge_arx_arz = LLVMBuildAnd(builder, ary_ge_arx, ary_ge_arz, ""); - rx_pos = LLVMBuildFCmp(bld->builder, LLVMRealUGE, rx, float_bld->zero, ""); - ry_pos = LLVMBuildFCmp(bld->builder, LLVMRealUGE, ry, float_bld->zero, ""); - rz_pos = LLVMBuildFCmp(bld->builder, LLVMRealUGE, rz, float_bld->zero, ""); + rx_pos = LLVMBuildFCmp(builder, LLVMRealUGE, rx, float_bld->zero, ""); + ry_pos = LLVMBuildFCmp(builder, LLVMRealUGE, ry, float_bld->zero, ""); + rz_pos = LLVMBuildFCmp(builder, LLVMRealUGE, rz, float_bld->zero, ""); { struct lp_build_if_state if_ctx; @@ -929,11 +938,11 @@ lp_build_cube_lookup(struct lp_build_sample_context *bld, LLVMValueRef face_t_var; LLVMValueRef face_var; - face_s_var = lp_build_alloca(bld->builder, bld->coord_bld.vec_type, "face_s_var"); - face_t_var = lp_build_alloca(bld->builder, bld->coord_bld.vec_type, "face_t_var"); - face_var = lp_build_alloca(bld->builder, bld->int_bld.vec_type, "face_var"); + face_s_var = lp_build_alloca(bld->gallivm, bld->coord_bld.vec_type, "face_s_var"); + face_t_var = lp_build_alloca(bld->gallivm, bld->coord_bld.vec_type, "face_t_var"); + face_var = lp_build_alloca(bld->gallivm, bld->int_bld.vec_type, "face_var"); - lp_build_if(&if_ctx, bld->builder, arx_ge_ary_arz); + lp_build_if(&if_ctx, bld->gallivm, arx_ge_ary_arz); { /* +/- X face */ LLVMValueRef sign = lp_build_sgn(float_bld, rx); @@ -943,17 +952,17 @@ lp_build_cube_lookup(struct lp_build_sample_context *bld, *face = lp_build_cube_face(bld, rx, PIPE_TEX_FACE_POS_X, PIPE_TEX_FACE_NEG_X); - LLVMBuildStore(bld->builder, *face_s, face_s_var); - LLVMBuildStore(bld->builder, *face_t, face_t_var); - LLVMBuildStore(bld->builder, *face, face_var); + LLVMBuildStore(builder, *face_s, face_s_var); + LLVMBuildStore(builder, *face_t, face_t_var); + LLVMBuildStore(builder, *face, face_var); } lp_build_else(&if_ctx); { struct lp_build_if_state if_ctx2; - ary_ge_arx_arz = LLVMBuildAnd(bld->builder, ary_ge_arx, ary_ge_arz, ""); + ary_ge_arx_arz = LLVMBuildAnd(builder, ary_ge_arx, ary_ge_arz, ""); - lp_build_if(&if_ctx2, bld->builder, ary_ge_arx_arz); + lp_build_if(&if_ctx2, bld->gallivm, ary_ge_arx_arz); { /* +/- Y face */ LLVMValueRef sign = lp_build_sgn(float_bld, ry); @@ -963,9 +972,9 @@ lp_build_cube_lookup(struct lp_build_sample_context *bld, *face = lp_build_cube_face(bld, ry, PIPE_TEX_FACE_POS_Y, PIPE_TEX_FACE_NEG_Y); - LLVMBuildStore(bld->builder, *face_s, face_s_var); - LLVMBuildStore(bld->builder, *face_t, face_t_var); - LLVMBuildStore(bld->builder, *face, face_var); + LLVMBuildStore(builder, *face_s, face_s_var); + LLVMBuildStore(builder, *face_t, face_t_var); + LLVMBuildStore(builder, *face, face_var); } lp_build_else(&if_ctx2); { @@ -977,18 +986,18 @@ lp_build_cube_lookup(struct lp_build_sample_context *bld, *face = lp_build_cube_face(bld, rz, PIPE_TEX_FACE_POS_Z, PIPE_TEX_FACE_NEG_Z); - LLVMBuildStore(bld->builder, *face_s, face_s_var); - LLVMBuildStore(bld->builder, *face_t, face_t_var); - LLVMBuildStore(bld->builder, *face, face_var); + LLVMBuildStore(builder, *face_s, face_s_var); + LLVMBuildStore(builder, *face_t, face_t_var); + LLVMBuildStore(builder, *face, face_var); } lp_build_endif(&if_ctx2); } lp_build_endif(&if_ctx); - *face_s = LLVMBuildLoad(bld->builder, face_s_var, "face_s"); - *face_t = LLVMBuildLoad(bld->builder, face_t_var, "face_t"); - *face = LLVMBuildLoad(bld->builder, face_var, "face"); + *face_s = LLVMBuildLoad(builder, face_s_var, "face_s"); + *face_t = LLVMBuildLoad(builder, face_t_var, "face_t"); + *face = LLVMBuildLoad(builder, face_var, "face"); } } @@ -1011,6 +1020,7 @@ lp_build_sample_partial_offset(struct lp_build_context *bld, LLVMValueRef *out_offset, LLVMValueRef *out_subcoord) { + LLVMBuilderRef builder = bld->gallivm->builder; LLVMValueRef offset; LLVMValueRef subcoord; @@ -1028,14 +1038,14 @@ lp_build_sample_partial_offset(struct lp_build_context *bld, */ #if 0 LLVMValueRef block_width = lp_build_const_int_vec(bld->type, block_length); - subcoord = LLVMBuildURem(bld->builder, coord, block_width, ""); - coord = LLVMBuildUDiv(bld->builder, coord, block_width, ""); + subcoord = LLVMBuildURem(builder, coord, block_width, ""); + coord = LLVMBuildUDiv(builder, coord, block_width, ""); #else unsigned logbase2 = util_unsigned_logbase2(block_length); - LLVMValueRef block_shift = lp_build_const_int_vec(bld->type, logbase2); - LLVMValueRef block_mask = lp_build_const_int_vec(bld->type, block_length - 1); - subcoord = LLVMBuildAnd(bld->builder, coord, block_mask, ""); - coord = LLVMBuildLShr(bld->builder, coord, block_shift, ""); + LLVMValueRef block_shift = lp_build_const_int_vec(bld->gallivm, bld->type, logbase2); + LLVMValueRef block_mask = lp_build_const_int_vec(bld->gallivm, bld->type, block_length - 1); + subcoord = LLVMBuildAnd(builder, coord, block_mask, ""); + coord = LLVMBuildLShr(builder, coord, block_shift, ""); #endif } @@ -1071,7 +1081,8 @@ lp_build_sample_offset(struct lp_build_context *bld, LLVMValueRef x_stride; LLVMValueRef offset; - x_stride = lp_build_const_vec(bld->type, format_desc->block.bits/8); + x_stride = lp_build_const_vec(bld->gallivm, bld->type, + format_desc->block.bits/8); lp_build_sample_partial_offset(bld, format_desc->block.width, diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample.h b/src/gallium/auxiliary/gallivm/lp_bld_sample.h index ffed27cee83..8c9d5df4e43 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_sample.h +++ b/src/gallium/auxiliary/gallivm/lp_bld_sample.h @@ -105,64 +105,64 @@ struct lp_sampler_dynamic_state /** Obtain the base texture width (returns int32) */ LLVMValueRef (*width)( const struct lp_sampler_dynamic_state *state, - LLVMBuilderRef builder, + struct gallivm_state *gallivm, unsigned unit); /** Obtain the base texture height (returns int32) */ LLVMValueRef (*height)( const struct lp_sampler_dynamic_state *state, - LLVMBuilderRef builder, + struct gallivm_state *gallivm, unsigned unit); /** Obtain the base texture depth (returns int32) */ LLVMValueRef (*depth)( const struct lp_sampler_dynamic_state *state, - LLVMBuilderRef builder, + struct gallivm_state *gallivm, unsigned unit); /** Obtain the number of mipmap levels minus one (returns int32) */ LLVMValueRef (*last_level)( const struct lp_sampler_dynamic_state *state, - LLVMBuilderRef builder, + struct gallivm_state *gallivm, unsigned unit); /** Obtain stride in bytes between image rows/blocks (returns int32) */ LLVMValueRef (*row_stride)( const struct lp_sampler_dynamic_state *state, - LLVMBuilderRef builder, + struct gallivm_state *gallivm, unsigned unit); /** Obtain stride in bytes between image slices (returns int32) */ LLVMValueRef (*img_stride)( const struct lp_sampler_dynamic_state *state, - LLVMBuilderRef builder, + struct gallivm_state *gallivm, unsigned unit); /** Obtain pointer to array of pointers to mimpap levels */ LLVMValueRef (*data_ptr)( const struct lp_sampler_dynamic_state *state, - LLVMBuilderRef builder, + struct gallivm_state *gallivm, unsigned unit); /** Obtain texture min lod (returns float) */ LLVMValueRef (*min_lod)(const struct lp_sampler_dynamic_state *state, - LLVMBuilderRef builder, unsigned unit); + struct gallivm_state *gallivm, unsigned unit); /** Obtain texture max lod (returns float) */ LLVMValueRef (*max_lod)(const struct lp_sampler_dynamic_state *state, - LLVMBuilderRef builder, unsigned unit); + struct gallivm_state *gallivm, unsigned unit); /** Obtain texture lod bias (returns float) */ LLVMValueRef (*lod_bias)(const struct lp_sampler_dynamic_state *state, - LLVMBuilderRef builder, unsigned unit); + struct gallivm_state *gallivm, unsigned unit); /** Obtain texture border color (returns ptr to float[4]) */ LLVMValueRef (*border_color)(const struct lp_sampler_dynamic_state *state, - LLVMBuilderRef builder, unsigned unit); + struct gallivm_state *gallivm, unsigned unit); }; @@ -171,7 +171,7 @@ struct lp_sampler_dynamic_state */ struct lp_build_sample_context { - LLVMBuilderRef builder; + struct gallivm_state *gallivm; const struct lp_sampler_static_state *static_state; @@ -385,7 +385,7 @@ lp_build_sample_offset(struct lp_build_context *bld, void -lp_build_sample_soa(LLVMBuilderRef builder, +lp_build_sample_soa(struct gallivm_state *gallivm, const struct lp_sampler_static_state *static_state, struct lp_sampler_dynamic_state *dynamic_state, struct lp_type fp_type, @@ -399,7 +399,7 @@ lp_build_sample_soa(LLVMBuilderRef builder, LLVMValueRef texel_out[4]); void -lp_build_sample_nop(struct lp_type type, +lp_build_sample_nop(struct gallivm_state *gallivm, struct lp_type type, LLVMValueRef texel_out[4]); diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample_aos.c b/src/gallium/auxiliary/gallivm/lp_bld_sample_aos.c index d6831a580b3..e61cf9541ea 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_sample_aos.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_sample_aos.c @@ -52,6 +52,7 @@ #include "lp_bld_flow.h" #include "lp_bld_gather.h" #include "lp_bld_format.h" +#include "lp_bld_init.h" #include "lp_bld_sample.h" #include "lp_bld_sample_aos.h" #include "lp_bld_quad.h" @@ -82,6 +83,7 @@ lp_build_sample_wrap_nearest_int(struct lp_build_sample_context *bld, LLVMValueRef *out_i) { struct lp_build_context *int_coord_bld = &bld->int_coord_bld; + LLVMBuilderRef builder = bld->gallivm->builder; LLVMValueRef length_minus_one; length_minus_one = lp_build_sub(int_coord_bld, length, int_coord_bld->one); @@ -89,12 +91,12 @@ lp_build_sample_wrap_nearest_int(struct lp_build_sample_context *bld, switch(wrap_mode) { case PIPE_TEX_WRAP_REPEAT: if(is_pot) - coord = LLVMBuildAnd(bld->builder, coord, length_minus_one, ""); + coord = LLVMBuildAnd(builder, coord, length_minus_one, ""); else { /* Add a bias to the texcoord to handle negative coords */ LLVMValueRef bias = lp_build_mul_imm(int_coord_bld, length, 1024); - coord = LLVMBuildAdd(bld->builder, coord, bias, ""); - coord = LLVMBuildURem(bld->builder, coord, length, ""); + coord = LLVMBuildAdd(builder, coord, bias, ""); + coord = LLVMBuildURem(builder, coord, length, ""); } break; @@ -147,6 +149,7 @@ lp_build_sample_wrap_linear_int(struct lp_build_sample_context *bld, LLVMValueRef *i1) { struct lp_build_context *int_coord_bld = &bld->int_coord_bld; + LLVMBuilderRef builder = bld->gallivm->builder; LLVMValueRef length_minus_one; LLVMValueRef lmask, umask, mask; @@ -195,39 +198,39 @@ lp_build_sample_wrap_linear_int(struct lp_build_sample_context *bld, switch(wrap_mode) { case PIPE_TEX_WRAP_REPEAT: if (is_pot) { - coord0 = LLVMBuildAnd(bld->builder, coord0, length_minus_one, ""); + coord0 = LLVMBuildAnd(builder, coord0, length_minus_one, ""); } else { /* Add a bias to the texcoord to handle negative coords */ LLVMValueRef bias = lp_build_mul_imm(int_coord_bld, length, 1024); - coord0 = LLVMBuildAdd(bld->builder, coord0, bias, ""); - coord0 = LLVMBuildURem(bld->builder, coord0, length, ""); + coord0 = LLVMBuildAdd(builder, coord0, bias, ""); + coord0 = LLVMBuildURem(builder, coord0, length, ""); } - mask = lp_build_compare(bld->builder, int_coord_bld->type, + mask = lp_build_compare(bld->gallivm, int_coord_bld->type, PIPE_FUNC_NOTEQUAL, coord0, length_minus_one); *offset0 = lp_build_mul(int_coord_bld, coord0, stride); - *offset1 = LLVMBuildAnd(bld->builder, + *offset1 = LLVMBuildAnd(builder, lp_build_add(int_coord_bld, *offset0, stride), mask, ""); break; case PIPE_TEX_WRAP_CLAMP_TO_EDGE: - lmask = lp_build_compare(int_coord_bld->builder, int_coord_bld->type, + lmask = lp_build_compare(int_coord_bld->gallivm, int_coord_bld->type, PIPE_FUNC_GEQUAL, coord0, int_coord_bld->zero); - umask = lp_build_compare(int_coord_bld->builder, int_coord_bld->type, + umask = lp_build_compare(int_coord_bld->gallivm, int_coord_bld->type, PIPE_FUNC_LESS, coord0, length_minus_one); coord0 = lp_build_select(int_coord_bld, lmask, coord0, int_coord_bld->zero); coord0 = lp_build_select(int_coord_bld, umask, coord0, length_minus_one); - mask = LLVMBuildAnd(bld->builder, lmask, umask, ""); + mask = LLVMBuildAnd(builder, lmask, umask, ""); *offset0 = lp_build_mul(int_coord_bld, coord0, stride); *offset1 = lp_build_add(int_coord_bld, *offset0, - LLVMBuildAnd(bld->builder, stride, mask, "")); + LLVMBuildAnd(builder, stride, mask, "")); break; case PIPE_TEX_WRAP_CLAMP: @@ -263,7 +266,7 @@ lp_build_sample_image_nearest(struct lp_build_sample_context *bld, LLVMValueRef *colors_hi) { const unsigned dims = bld->dims; - LLVMBuilderRef builder = bld->builder; + LLVMBuilderRef builder = bld->gallivm->builder; struct lp_build_context i32, h16, u8n; LLVMTypeRef i32_vec_type, h16_vec_type, u8n_vec_type; LLVMValueRef i32_c8; @@ -273,13 +276,13 @@ lp_build_sample_image_nearest(struct lp_build_sample_context *bld, LLVMValueRef x_offset, offset; LLVMValueRef x_subcoord, y_subcoord, z_subcoord; - lp_build_context_init(&i32, builder, lp_type_int_vec(32)); - lp_build_context_init(&h16, builder, lp_type_ufixed(16)); - lp_build_context_init(&u8n, builder, lp_type_unorm(8)); + lp_build_context_init(&i32, bld->gallivm, lp_type_int_vec(32)); + lp_build_context_init(&h16, bld->gallivm, lp_type_ufixed(16)); + lp_build_context_init(&u8n, bld->gallivm, lp_type_unorm(8)); - i32_vec_type = lp_build_vec_type(i32.type); - h16_vec_type = lp_build_vec_type(h16.type); - u8n_vec_type = lp_build_vec_type(u8n.type); + i32_vec_type = lp_build_vec_type(bld->gallivm, i32.type); + h16_vec_type = lp_build_vec_type(bld->gallivm, h16.type); + u8n_vec_type = lp_build_vec_type(bld->gallivm, u8n.type); lp_build_extract_image_sizes(bld, bld->int_size_type, @@ -317,7 +320,7 @@ lp_build_sample_image_nearest(struct lp_build_sample_context *bld, r = LLVMBuildFPToSI(builder, r, i32_vec_type, ""); /* compute floor (shift right 8) */ - i32_c8 = lp_build_const_int_vec(i32.type, 8); + i32_c8 = lp_build_const_int_vec(bld->gallivm, i32.type, 8); s_ipart = LLVMBuildAShr(builder, s, i32_c8, ""); if (dims >= 2) t_ipart = LLVMBuildAShr(builder, t, i32_c8, ""); @@ -325,7 +328,8 @@ lp_build_sample_image_nearest(struct lp_build_sample_context *bld, r_ipart = LLVMBuildAShr(builder, r, i32_c8, ""); /* get pixel, row, image strides */ - x_stride = lp_build_const_vec(bld->int_coord_bld.type, + x_stride = lp_build_const_vec(bld->gallivm, + bld->int_coord_bld.type, bld->format_desc->block.bits/8); /* Do texcoord wrapping, compute texel offset */ @@ -387,7 +391,7 @@ lp_build_sample_image_nearest(struct lp_build_sample_context *bld, * Given the format is a rgba8, just read the pixels as is, * without any swizzling. Swizzling will be done later. */ - rgba8 = lp_build_gather(bld->builder, + rgba8 = lp_build_gather(bld->gallivm, bld->texel_type.length, bld->format_desc->block.bits, bld->texel_type.width, @@ -396,7 +400,7 @@ lp_build_sample_image_nearest(struct lp_build_sample_context *bld, rgba8 = LLVMBuildBitCast(builder, rgba8, u8n_vec_type, ""); } else { - rgba8 = lp_build_fetch_rgba_aos(bld->builder, + rgba8 = lp_build_fetch_rgba_aos(bld->gallivm, bld->format_desc, u8n.type, data_ptr, offset, @@ -405,7 +409,7 @@ lp_build_sample_image_nearest(struct lp_build_sample_context *bld, } /* Expand one 4*rgba8 to two 2*rgba16 */ - lp_build_unpack2(builder, u8n.type, h16.type, + lp_build_unpack2(bld->gallivm, u8n.type, h16.type, rgba8, colors_lo, colors_hi); } @@ -429,7 +433,7 @@ lp_build_sample_image_linear(struct lp_build_sample_context *bld, LLVMValueRef *colors_hi) { const unsigned dims = bld->dims; - LLVMBuilderRef builder = bld->builder; + LLVMBuilderRef builder = bld->gallivm->builder; struct lp_build_context i32, h16, u8n; LLVMTypeRef i32_vec_type, h16_vec_type, u8n_vec_type; LLVMValueRef i32_c8, i32_c128, i32_c255; @@ -450,13 +454,13 @@ lp_build_sample_image_linear(struct lp_build_sample_context *bld, unsigned i, j, k; unsigned numj, numk; - lp_build_context_init(&i32, builder, lp_type_int_vec(32)); - lp_build_context_init(&h16, builder, lp_type_ufixed(16)); - lp_build_context_init(&u8n, builder, lp_type_unorm(8)); + lp_build_context_init(&i32, bld->gallivm, lp_type_int_vec(32)); + lp_build_context_init(&h16, bld->gallivm, lp_type_ufixed(16)); + lp_build_context_init(&u8n, bld->gallivm, lp_type_unorm(8)); - i32_vec_type = lp_build_vec_type(i32.type); - h16_vec_type = lp_build_vec_type(h16.type); - u8n_vec_type = lp_build_vec_type(u8n.type); + i32_vec_type = lp_build_vec_type(bld->gallivm, i32.type); + h16_vec_type = lp_build_vec_type(bld->gallivm, h16.type); + u8n_vec_type = lp_build_vec_type(bld->gallivm, u8n.type); lp_build_extract_image_sizes(bld, bld->int_size_type, @@ -494,7 +498,7 @@ lp_build_sample_image_linear(struct lp_build_sample_context *bld, r = LLVMBuildFPToSI(builder, r, i32_vec_type, ""); /* subtract 0.5 (add -128) */ - i32_c128 = lp_build_const_int_vec(i32.type, -128); + i32_c128 = lp_build_const_int_vec(bld->gallivm, i32.type, -128); s = LLVMBuildAdd(builder, s, i32_c128, ""); if (dims >= 2) { t = LLVMBuildAdd(builder, t, i32_c128, ""); @@ -504,7 +508,7 @@ lp_build_sample_image_linear(struct lp_build_sample_context *bld, } /* compute floor (shift right 8) */ - i32_c8 = lp_build_const_int_vec(i32.type, 8); + i32_c8 = lp_build_const_int_vec(bld->gallivm, i32.type, 8); s_ipart = LLVMBuildAShr(builder, s, i32_c8, ""); if (dims >= 2) t_ipart = LLVMBuildAShr(builder, t, i32_c8, ""); @@ -512,7 +516,7 @@ lp_build_sample_image_linear(struct lp_build_sample_context *bld, r_ipart = LLVMBuildAShr(builder, r, i32_c8, ""); /* compute fractional part (AND with 0xff) */ - i32_c255 = lp_build_const_int_vec(i32.type, 255); + i32_c255 = lp_build_const_int_vec(bld->gallivm, i32.type, 255); s_fpart = LLVMBuildAnd(builder, s, i32_c255, ""); if (dims >= 2) t_fpart = LLVMBuildAnd(builder, t, i32_c255, ""); @@ -520,7 +524,7 @@ lp_build_sample_image_linear(struct lp_build_sample_context *bld, r_fpart = LLVMBuildAnd(builder, r, i32_c255, ""); /* get pixel, row and image strides */ - x_stride = lp_build_const_vec(bld->int_coord_bld.type, + x_stride = lp_build_const_vec(bld->gallivm, bld->int_coord_bld.type, bld->format_desc->block.bits/8); y_stride = row_stride_vec; z_stride = img_stride_vec; @@ -612,7 +616,7 @@ lp_build_sample_image_linear(struct lp_build_sample_context *bld, r_fpart = LLVMBuildBitCast(builder, r_fpart, h16_vec_type, ""); { - LLVMTypeRef elem_type = LLVMInt32Type(); + LLVMTypeRef elem_type = LLVMInt32TypeInContext(bld->gallivm->context); LLVMValueRef shuffles_lo[LP_MAX_VECTOR_LENGTH]; LLVMValueRef shuffles_hi[LP_MAX_VECTOR_LENGTH]; LLVMValueRef shuffle_lo; @@ -685,7 +689,7 @@ lp_build_sample_image_linear(struct lp_build_sample_context *bld, * Given the format is a rgba8, just read the pixels as is, * without any swizzling. Swizzling will be done later. */ - rgba8 = lp_build_gather(bld->builder, + rgba8 = lp_build_gather(bld->gallivm, bld->texel_type.length, bld->format_desc->block.bits, bld->texel_type.width, @@ -694,7 +698,7 @@ lp_build_sample_image_linear(struct lp_build_sample_context *bld, rgba8 = LLVMBuildBitCast(builder, rgba8, u8n_vec_type, ""); } else { - rgba8 = lp_build_fetch_rgba_aos(bld->builder, + rgba8 = lp_build_fetch_rgba_aos(bld->gallivm, bld->format_desc, u8n.type, data_ptr, offset[k][j][i], @@ -703,7 +707,7 @@ lp_build_sample_image_linear(struct lp_build_sample_context *bld, } /* Expand one 4*rgba8 to two 2*rgba16 */ - lp_build_unpack2(builder, u8n.type, h16.type, + lp_build_unpack2(bld->gallivm, u8n.type, h16.type, rgba8, &neighbors_lo[k][j][i], &neighbors_hi[k][j][i]); } @@ -790,7 +794,7 @@ lp_build_sample_mipmap(struct lp_build_sample_context *bld, LLVMValueRef colors_lo_var, LLVMValueRef colors_hi_var) { - LLVMBuilderRef builder = bld->builder; + LLVMBuilderRef builder = bld->gallivm->builder; LLVMValueRef size0; LLVMValueRef size1; LLVMValueRef row_stride0_vec; @@ -802,7 +806,6 @@ lp_build_sample_mipmap(struct lp_build_sample_context *bld, LLVMValueRef colors0_lo, colors0_hi; LLVMValueRef colors1_lo, colors1_hi; - /* sample the first mipmap level */ lp_build_mipmap_level_sizes(bld, ilevel0, &size0, @@ -829,8 +832,8 @@ lp_build_sample_mipmap(struct lp_build_sample_context *bld, LLVMBuildStore(builder, colors0_hi, colors_hi_var); if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) { - LLVMValueRef h16_scale = LLVMConstReal(LLVMFloatType(), 256.0); - LLVMTypeRef i32_type = LLVMIntType(32); + LLVMValueRef h16_scale = lp_build_const_float(bld->gallivm, 256.0); + LLVMTypeRef i32_type = LLVMIntTypeInContext(bld->gallivm->context, 32); struct lp_build_if_state if_ctx; LLVMValueRef need_lerp; @@ -842,11 +845,11 @@ lp_build_sample_mipmap(struct lp_build_sample_context *bld, lod_fpart, LLVMConstNull(i32_type), "need_lerp"); - lp_build_if(&if_ctx, builder, need_lerp); + lp_build_if(&if_ctx, bld->gallivm, need_lerp); { struct lp_build_context h16_bld; - lp_build_context_init(&h16_bld, builder, lp_type_ufixed(16)); + lp_build_context_init(&h16_bld, bld->gallivm, lp_type_ufixed(16)); /* sample the second mipmap level */ lp_build_mipmap_level_sizes(bld, ilevel1, @@ -885,7 +888,7 @@ lp_build_sample_mipmap(struct lp_build_sample_context *bld, int i; assert(h16_bld.type.length <= Elements(shuffles)); for (i = 0; i < h16_bld.type.length; i++) - shuffles[i] = lp_build_const_int32(2 * (i & 1)); + shuffles[i] = lp_build_const_int32(bld->gallivm, 2 * (i & 1)); shuffle = LLVMConstVector(shuffles, h16_bld.type.length); lod_fpart = LLVMBuildShuffleVector(builder, lod_fpart, lod_fpart, @@ -925,7 +928,7 @@ lp_build_sample_aos(struct lp_build_sample_context *bld, LLVMValueRef texel_out[4]) { struct lp_build_context *int_bld = &bld->int_bld; - LLVMBuilderRef builder = bld->builder; + LLVMBuilderRef builder = bld->gallivm->builder; const unsigned mip_filter = bld->static_state->min_mip_filter; const unsigned min_filter = bld->static_state->min_img_filter; const unsigned mag_filter = bld->static_state->mag_img_filter; @@ -936,8 +939,7 @@ lp_build_sample_aos(struct lp_build_sample_context *bld, LLVMValueRef unswizzled[4]; LLVMValueRef face_ddx[4], face_ddy[4]; struct lp_build_context h16_bld; - LLVMTypeRef i32t = LLVMInt32Type(); - LLVMValueRef i32t_zero = LLVMConstInt(i32t, 0, 0); + LLVMValueRef i32t_zero = lp_build_const_int32(bld->gallivm, 0); /* we only support the common/simple wrap modes at this time */ assert(lp_is_simple_wrap_mode(bld->static_state->wrap_s)); @@ -948,7 +950,7 @@ lp_build_sample_aos(struct lp_build_sample_context *bld, /* make 16-bit fixed-pt builder context */ - lp_build_context_init(&h16_bld, builder, lp_type_ufixed(16)); + lp_build_context_init(&h16_bld, bld->gallivm, lp_type_ufixed(16)); /* cube face selection, compute pre-face coords, etc. */ if (bld->static_state->target == PIPE_TEXTURE_CUBE) { @@ -1026,8 +1028,8 @@ lp_build_sample_aos(struct lp_build_sample_context *bld, * Get/interpolate texture colors. */ - packed_lo = lp_build_alloca(builder, h16_bld.vec_type, "packed_lo"); - packed_hi = lp_build_alloca(builder, h16_bld.vec_type, "packed_hi"); + packed_lo = lp_build_alloca(bld->gallivm, h16_bld.vec_type, "packed_lo"); + packed_hi = lp_build_alloca(bld->gallivm, h16_bld.vec_type, "packed_hi"); if (min_filter == mag_filter) { /* no need to distinquish between minification and magnification */ @@ -1048,7 +1050,7 @@ lp_build_sample_aos(struct lp_build_sample_context *bld, minify = LLVMBuildICmp(builder, LLVMIntSGE, lod_ipart, int_bld->zero, ""); - lp_build_if(&if_ctx, builder, minify); + lp_build_if(&if_ctx, bld->gallivm, minify); { /* Use the minification filter */ lp_build_sample_mipmap(bld, @@ -1073,7 +1075,7 @@ lp_build_sample_aos(struct lp_build_sample_context *bld, * combine the values stored in 'packed_lo' and 'packed_hi' variables * into 'packed' */ - packed = lp_build_pack2(builder, + packed = lp_build_pack2(bld->gallivm, h16_bld.type, lp_type_unorm(8), LLVMBuildLoad(builder, packed_lo, ""), LLVMBuildLoad(builder, packed_hi, "")); @@ -1081,7 +1083,7 @@ lp_build_sample_aos(struct lp_build_sample_context *bld, /* * Convert to SoA and swizzle. */ - lp_build_rgba8_to_f32_soa(builder, + lp_build_rgba8_to_f32_soa(bld->gallivm, bld->texel_type, packed, unswizzled); @@ -1096,6 +1098,4 @@ lp_build_sample_aos(struct lp_build_sample_context *bld, texel_out[2] = unswizzled[2]; texel_out[3] = unswizzled[3]; } - - apply_sampler_swizzle(bld, texel_out); } diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c b/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c index 53cc0c5f345..e685f4b73f0 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c @@ -84,6 +84,7 @@ lp_build_sample_texel_soa(struct lp_build_sample_context *bld, const struct lp_sampler_static_state *static_state = bld->static_state; const unsigned dims = bld->dims; struct lp_build_context *int_coord_bld = &bld->int_coord_bld; + LLVMBuilderRef builder = bld->gallivm->builder; LLVMValueRef offset; LLVMValueRef i, j; LLVMValueRef use_border = NULL; @@ -95,7 +96,7 @@ lp_build_sample_texel_soa(struct lp_build_sample_context *bld, LLVMValueRef b1, b2; b1 = lp_build_cmp(int_coord_bld, PIPE_FUNC_LESS, x, int_coord_bld->zero); b2 = lp_build_cmp(int_coord_bld, PIPE_FUNC_GEQUAL, x, width); - use_border = LLVMBuildOr(bld->builder, b1, b2, "b1_or_b2"); + use_border = LLVMBuildOr(builder, b1, b2, "b1_or_b2"); } if (dims >= 2 && @@ -106,11 +107,11 @@ lp_build_sample_texel_soa(struct lp_build_sample_context *bld, b1 = lp_build_cmp(int_coord_bld, PIPE_FUNC_LESS, y, int_coord_bld->zero); b2 = lp_build_cmp(int_coord_bld, PIPE_FUNC_GEQUAL, y, height); if (use_border) { - use_border = LLVMBuildOr(bld->builder, use_border, b1, "ub_or_b1"); - use_border = LLVMBuildOr(bld->builder, use_border, b2, "ub_or_b2"); + use_border = LLVMBuildOr(builder, use_border, b1, "ub_or_b1"); + use_border = LLVMBuildOr(builder, use_border, b2, "ub_or_b2"); } else { - use_border = LLVMBuildOr(bld->builder, b1, b2, "b1_or_b2"); + use_border = LLVMBuildOr(builder, b1, b2, "b1_or_b2"); } } @@ -122,11 +123,11 @@ lp_build_sample_texel_soa(struct lp_build_sample_context *bld, b1 = lp_build_cmp(int_coord_bld, PIPE_FUNC_LESS, z, int_coord_bld->zero); b2 = lp_build_cmp(int_coord_bld, PIPE_FUNC_GEQUAL, z, depth); if (use_border) { - use_border = LLVMBuildOr(bld->builder, use_border, b1, "ub_or_b1"); - use_border = LLVMBuildOr(bld->builder, use_border, b2, "ub_or_b2"); + use_border = LLVMBuildOr(builder, use_border, b1, "ub_or_b1"); + use_border = LLVMBuildOr(builder, use_border, b2, "ub_or_b2"); } else { - use_border = LLVMBuildOr(bld->builder, b1, b2, "b1_or_b2"); + use_border = LLVMBuildOr(builder, b1, b2, "b1_or_b2"); } } @@ -148,7 +149,7 @@ lp_build_sample_texel_soa(struct lp_build_sample_context *bld, offset = lp_build_andnot(&bld->int_coord_bld, offset, use_border); } - lp_build_fetch_rgba_soa(bld->builder, + lp_build_fetch_rgba_soa(bld->gallivm, bld->format_desc, bld->texel_type, data_ptr, offset, @@ -174,20 +175,18 @@ lp_build_sample_texel_soa(struct lp_build_sample_context *bld, /* select texel color or border color depending on use_border */ LLVMValueRef border_color_ptr = bld->dynamic_state->border_color(bld->dynamic_state, - bld->builder, unit); + bld->gallivm, unit); int chan; for (chan = 0; chan < 4; chan++) { LLVMValueRef border_chan = - lp_build_array_get(bld->builder, border_color_ptr, - lp_build_const_int32(chan)); + lp_build_array_get(bld->gallivm, border_color_ptr, + lp_build_const_int32(bld->gallivm, chan)); LLVMValueRef border_chan_vec = lp_build_broadcast_scalar(&bld->float_vec_bld, border_chan); texel_out[chan] = lp_build_select(&bld->texel_bld, use_border, border_chan_vec, texel_out[chan]); } } - - apply_sampler_swizzle(bld, texel_out); } @@ -205,7 +204,7 @@ lp_build_coord_mirror(struct lp_build_sample_context *bld, lp_build_ifloor_fract(coord_bld, coord, &flr, &fract); /* isOdd = flr & 1 */ - isOdd = LLVMBuildAnd(bld->builder, flr, int_coord_bld->one, ""); + isOdd = LLVMBuildAnd(bld->gallivm->builder, flr, int_coord_bld->one, ""); /* make coord positive or negative depending on isOdd */ coord = lp_build_set_sign(coord_bld, fract, isOdd); @@ -239,7 +238,8 @@ lp_build_sample_wrap_linear(struct lp_build_sample_context *bld, { struct lp_build_context *coord_bld = &bld->coord_bld; struct lp_build_context *int_coord_bld = &bld->int_coord_bld; - LLVMValueRef half = lp_build_const_vec(coord_bld->type, 0.5); + LLVMBuilderRef builder = bld->gallivm->builder; + LLVMValueRef half = lp_build_const_vec(bld->gallivm, coord_bld->type, 0.5); LLVMValueRef length_minus_one = lp_build_sub(int_coord_bld, length, int_coord_bld->one); LLVMValueRef coord0, coord1, weight; @@ -253,18 +253,18 @@ lp_build_sample_wrap_linear(struct lp_build_sample_context *bld, /* repeat wrap */ if (is_pot) { coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one); - coord0 = LLVMBuildAnd(bld->builder, coord0, length_minus_one, ""); - coord1 = LLVMBuildAnd(bld->builder, coord1, length_minus_one, ""); + coord0 = LLVMBuildAnd(builder, coord0, length_minus_one, ""); + coord1 = LLVMBuildAnd(builder, coord1, length_minus_one, ""); } else { /* Add a bias to the texcoord to handle negative coords */ LLVMValueRef bias = lp_build_mul_imm(int_coord_bld, length, 1024); LLVMValueRef mask; - coord0 = LLVMBuildAdd(bld->builder, coord0, bias, ""); - coord0 = LLVMBuildURem(bld->builder, coord0, length, ""); - mask = lp_build_compare(bld->builder, int_coord_bld->type, + coord0 = LLVMBuildAdd(builder, coord0, bias, ""); + coord0 = LLVMBuildURem(builder, coord0, length, ""); + mask = lp_build_compare(bld->gallivm, int_coord_bld->type, PIPE_FUNC_NOTEQUAL, coord0, length_minus_one); - coord1 = LLVMBuildAnd(bld->builder, + coord1 = LLVMBuildAnd(builder, lp_build_add(int_coord_bld, coord0, int_coord_bld->one), mask, ""); } @@ -318,7 +318,7 @@ lp_build_sample_wrap_linear(struct lp_build_sample_context *bld, } /* was: clamp to [-0.5, length + 0.5], then sub 0.5 */ coord = lp_build_sub(coord_bld, coord, half); - min = lp_build_const_vec(coord_bld->type, -1.0F); + min = lp_build_const_vec(bld->gallivm, coord_bld->type, -1.0F); coord = lp_build_clamp(coord_bld, coord, min, length_f); /* convert to int, compute lerp weight */ lp_build_ifloor_fract(coord_bld, coord, &coord0, &weight); @@ -437,6 +437,7 @@ lp_build_sample_wrap_nearest(struct lp_build_sample_context *bld, { struct lp_build_context *coord_bld = &bld->coord_bld; struct lp_build_context *int_coord_bld = &bld->int_coord_bld; + LLVMBuilderRef builder = bld->gallivm->builder; LLVMValueRef length_minus_one = lp_build_sub(int_coord_bld, length, int_coord_bld->one); LLVMValueRef icoord; @@ -445,12 +446,12 @@ lp_build_sample_wrap_nearest(struct lp_build_sample_context *bld, coord = lp_build_mul(coord_bld, coord, length_f); icoord = lp_build_ifloor(coord_bld, coord); if (is_pot) - icoord = LLVMBuildAnd(bld->builder, icoord, length_minus_one, ""); + icoord = LLVMBuildAnd(builder, icoord, length_minus_one, ""); else { /* Add a bias to the texcoord to handle negative coords */ LLVMValueRef bias = lp_build_mul_imm(int_coord_bld, length, 1024); - icoord = LLVMBuildAdd(bld->builder, icoord, bias, ""); - icoord = LLVMBuildURem(bld->builder, icoord, length, ""); + icoord = LLVMBuildAdd(builder, icoord, bias, ""); + icoord = LLVMBuildURem(builder, icoord, length, ""); } break; @@ -830,7 +831,7 @@ lp_build_sample_mipmap(struct lp_build_sample_context *bld, LLVMValueRef lod_fpart, LLVMValueRef *colors_out) { - LLVMBuilderRef builder = bld->builder; + LLVMBuilderRef builder = bld->gallivm->builder; LLVMValueRef size0; LLVMValueRef size1; LLVMValueRef row_stride0_vec; @@ -878,7 +879,7 @@ lp_build_sample_mipmap(struct lp_build_sample_context *bld, bld->float_bld.zero, "need_lerp"); - lp_build_if(&if_ctx, builder, need_lerp); + lp_build_if(&if_ctx, bld->gallivm, need_lerp); { /* sample the second mipmap level */ lp_build_mipmap_level_sizes(bld, ilevel1, @@ -934,7 +935,7 @@ lp_build_sample_general(struct lp_build_sample_context *bld, LLVMValueRef *colors_out) { struct lp_build_context *int_bld = &bld->int_bld; - LLVMBuilderRef builder = bld->builder; + LLVMBuilderRef builder = bld->gallivm->builder; const unsigned mip_filter = bld->static_state->min_mip_filter; const unsigned min_filter = bld->static_state->min_img_filter; const unsigned mag_filter = bld->static_state->mag_img_filter; @@ -942,8 +943,7 @@ lp_build_sample_general(struct lp_build_sample_context *bld, LLVMValueRef ilevel0, ilevel1 = NULL; LLVMValueRef face_ddx[4], face_ddy[4]; LLVMValueRef texels[4]; - LLVMTypeRef i32t = LLVMInt32Type(); - LLVMValueRef i32t_zero = LLVMConstInt(i32t, 0, 0); + LLVMValueRef i32t_zero = lp_build_const_int32(bld->gallivm, 0); unsigned chan; /* @@ -1030,7 +1030,7 @@ lp_build_sample_general(struct lp_build_sample_context *bld, */ for (chan = 0; chan < 4; ++chan) { - texels[chan] = lp_build_alloca(builder, bld->texel_bld.vec_type, ""); + texels[chan] = lp_build_alloca(bld->gallivm, bld->texel_bld.vec_type, ""); lp_build_name(texels[chan], "sampler%u_texel_%c_var", unit, "xyzw"[chan]); } @@ -1053,7 +1053,7 @@ lp_build_sample_general(struct lp_build_sample_context *bld, minify = LLVMBuildICmp(builder, LLVMIntSGE, lod_ipart, int_bld->zero, ""); - lp_build_if(&if_ctx, builder, minify); + lp_build_if(&if_ctx, bld->gallivm, minify); { /* Use the minification filter */ lp_build_sample_mipmap(bld, unit, @@ -1092,6 +1092,7 @@ lp_build_sample_compare(struct lp_build_sample_context *bld, LLVMValueRef texel[4]) { struct lp_build_context *texel_bld = &bld->texel_bld; + LLVMBuilderRef builder = bld->gallivm->builder; LLVMValueRef res; const unsigned chan = 0; @@ -1100,11 +1101,10 @@ lp_build_sample_compare(struct lp_build_sample_context *bld, /* debug code */ if (0) { - LLVMValueRef indx = lp_build_const_int32(0); - LLVMValueRef coord = LLVMBuildExtractElement(bld->builder, p, indx, ""); - LLVMValueRef tex = LLVMBuildExtractElement(bld->builder, - texel[chan], indx, ""); - lp_build_printf(bld->builder, "shadow compare coord %f to texture %f\n", + LLVMValueRef indx = lp_build_const_int32(bld->gallivm, 0); + LLVMValueRef coord = LLVMBuildExtractElement(builder, p, indx, ""); + LLVMValueRef tex = LLVMBuildExtractElement(builder, texel[chan], indx, ""); + lp_build_printf(bld->gallivm, "shadow compare coord %f to texture %f\n", coord, tex); } @@ -1126,10 +1126,10 @@ lp_build_sample_compare(struct lp_build_sample_context *bld, * For debugging. */ void -lp_build_sample_nop(struct lp_type type, +lp_build_sample_nop(struct gallivm_state *gallivm, struct lp_type type, LLVMValueRef texel_out[4]) { - LLVMValueRef one = lp_build_one(type); + LLVMValueRef one = lp_build_one(gallivm, type); unsigned chan; for (chan = 0; chan < 4; chan++) { @@ -1147,7 +1147,7 @@ lp_build_sample_nop(struct lp_type type, * \param ddy partial derivatives of (s,t,r,q) with respect to y */ void -lp_build_sample_soa(LLVMBuilderRef builder, +lp_build_sample_soa(struct gallivm_state *gallivm, const struct lp_sampler_static_state *static_state, struct lp_sampler_dynamic_state *dynamic_state, struct lp_type type, @@ -1162,8 +1162,8 @@ lp_build_sample_soa(LLVMBuilderRef builder, { unsigned dims = texture_dims(static_state->target); struct lp_build_sample_context bld; - LLVMTypeRef i32t = LLVMInt32Type(); - + LLVMTypeRef i32t = LLVMInt32TypeInContext(gallivm->context); + LLVMBuilderRef builder = gallivm->builder; LLVMValueRef s; LLVMValueRef t; LLVMValueRef r; @@ -1178,7 +1178,7 @@ lp_build_sample_soa(LLVMBuilderRef builder, /* Setup our build context */ memset(&bld, 0, sizeof bld); - bld.builder = builder; + bld.gallivm = gallivm; bld.static_state = static_state; bld.dynamic_state = dynamic_state; bld.format_desc = util_format_description(static_state->format); @@ -1195,22 +1195,22 @@ lp_build_sample_soa(LLVMBuilderRef builder, float_vec_type = lp_type_float_vec(32); - lp_build_context_init(&bld.float_bld, builder, bld.float_type); - lp_build_context_init(&bld.float_vec_bld, builder, float_vec_type); - lp_build_context_init(&bld.int_bld, builder, bld.int_type); - lp_build_context_init(&bld.coord_bld, builder, bld.coord_type); - lp_build_context_init(&bld.int_coord_bld, builder, bld.int_coord_type); - lp_build_context_init(&bld.int_size_bld, builder, bld.int_size_type); - lp_build_context_init(&bld.float_size_bld, builder, bld.float_size_type); - lp_build_context_init(&bld.texel_bld, builder, bld.texel_type); + lp_build_context_init(&bld.float_bld, gallivm, bld.float_type); + lp_build_context_init(&bld.float_vec_bld, gallivm, float_vec_type); + lp_build_context_init(&bld.int_bld, gallivm, bld.int_type); + lp_build_context_init(&bld.coord_bld, gallivm, bld.coord_type); + lp_build_context_init(&bld.int_coord_bld, gallivm, bld.int_coord_type); + lp_build_context_init(&bld.int_size_bld, gallivm, bld.int_size_type); + lp_build_context_init(&bld.float_size_bld, gallivm, bld.float_size_type); + lp_build_context_init(&bld.texel_bld, gallivm, bld.texel_type); /* Get the dynamic state */ - bld.width = dynamic_state->width(dynamic_state, builder, unit); - bld.height = dynamic_state->height(dynamic_state, builder, unit); - bld.depth = dynamic_state->depth(dynamic_state, builder, unit); - bld.row_stride_array = dynamic_state->row_stride(dynamic_state, builder, unit); - bld.img_stride_array = dynamic_state->img_stride(dynamic_state, builder, unit); - bld.data_array = dynamic_state->data_ptr(dynamic_state, builder, unit); + bld.width = dynamic_state->width(dynamic_state, gallivm, unit); + bld.height = dynamic_state->height(dynamic_state, gallivm, unit); + bld.depth = dynamic_state->depth(dynamic_state, gallivm, unit); + bld.row_stride_array = dynamic_state->row_stride(dynamic_state, gallivm, unit); + bld.img_stride_array = dynamic_state->img_stride(dynamic_state, gallivm, unit); + bld.data_array = dynamic_state->data_ptr(dynamic_state, gallivm, unit); /* Note that data_array is an array[level] of pointers to texture images */ s = coords[0]; @@ -1236,7 +1236,7 @@ lp_build_sample_soa(LLVMBuilderRef builder, if (0) { /* For debug: no-op texture sampling */ - lp_build_sample_nop(bld.texel_type, texel_out); + lp_build_sample_nop(gallivm, bld.texel_type, texel_out); } else if (util_format_fits_8unorm(bld.format_desc) && lp_is_simple_wrap_mode(static_state->wrap_s) && @@ -1266,4 +1266,6 @@ lp_build_sample_soa(LLVMBuilderRef builder, } lp_build_sample_compare(&bld, r, texel_out); + + apply_sampler_swizzle(&bld, texel_out); } diff --git a/src/gallium/auxiliary/gallivm/lp_bld_struct.c b/src/gallium/auxiliary/gallivm/lp_bld_struct.c index 4693c2de6f9..0dc2f24d10a 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_struct.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_struct.c @@ -37,12 +37,13 @@ #include "util/u_debug.h" #include "util/u_memory.h" +#include "lp_bld_const.h" #include "lp_bld_debug.h" #include "lp_bld_struct.h" LLVMValueRef -lp_build_struct_get_ptr(LLVMBuilderRef builder, +lp_build_struct_get_ptr(struct gallivm_state *gallivm, LLVMValueRef ptr, unsigned member, const char *name) @@ -51,16 +52,16 @@ lp_build_struct_get_ptr(LLVMBuilderRef builder, LLVMValueRef member_ptr; assert(LLVMGetTypeKind(LLVMTypeOf(ptr)) == LLVMPointerTypeKind); assert(LLVMGetTypeKind(LLVMGetElementType(LLVMTypeOf(ptr))) == LLVMStructTypeKind); - indices[0] = LLVMConstInt(LLVMInt32Type(), 0, 0); - indices[1] = LLVMConstInt(LLVMInt32Type(), member, 0); - member_ptr = LLVMBuildGEP(builder, ptr, indices, Elements(indices), ""); + indices[0] = lp_build_const_int32(gallivm, 0); + indices[1] = lp_build_const_int32(gallivm, member); + member_ptr = LLVMBuildGEP(gallivm->builder, ptr, indices, Elements(indices), ""); lp_build_name(member_ptr, "%s.%s_ptr", LLVMGetValueName(ptr), name); return member_ptr; } LLVMValueRef -lp_build_struct_get(LLVMBuilderRef builder, +lp_build_struct_get(struct gallivm_state *gallivm, LLVMValueRef ptr, unsigned member, const char *name) @@ -69,15 +70,15 @@ lp_build_struct_get(LLVMBuilderRef builder, LLVMValueRef res; assert(LLVMGetTypeKind(LLVMTypeOf(ptr)) == LLVMPointerTypeKind); assert(LLVMGetTypeKind(LLVMGetElementType(LLVMTypeOf(ptr))) == LLVMStructTypeKind); - member_ptr = lp_build_struct_get_ptr(builder, ptr, member, name); - res = LLVMBuildLoad(builder, member_ptr, ""); + member_ptr = lp_build_struct_get_ptr(gallivm, ptr, member, name); + res = LLVMBuildLoad(gallivm->builder, member_ptr, ""); lp_build_name(res, "%s.%s", LLVMGetValueName(ptr), name); return res; } LLVMValueRef -lp_build_array_get_ptr(LLVMBuilderRef builder, +lp_build_array_get_ptr(struct gallivm_state *gallivm, LLVMValueRef ptr, LLVMValueRef index) { @@ -85,9 +86,9 @@ lp_build_array_get_ptr(LLVMBuilderRef builder, LLVMValueRef element_ptr; assert(LLVMGetTypeKind(LLVMTypeOf(ptr)) == LLVMPointerTypeKind); assert(LLVMGetTypeKind(LLVMGetElementType(LLVMTypeOf(ptr))) == LLVMArrayTypeKind); - indices[0] = LLVMConstInt(LLVMInt32Type(), 0, 0); + indices[0] = lp_build_const_int32(gallivm, 0); indices[1] = index; - element_ptr = LLVMBuildGEP(builder, ptr, indices, Elements(indices), ""); + element_ptr = LLVMBuildGEP(gallivm->builder, ptr, indices, Elements(indices), ""); #ifdef DEBUG lp_build_name(element_ptr, "&%s[%s]", LLVMGetValueName(ptr), LLVMGetValueName(index)); @@ -97,7 +98,7 @@ lp_build_array_get_ptr(LLVMBuilderRef builder, LLVMValueRef -lp_build_array_get(LLVMBuilderRef builder, +lp_build_array_get(struct gallivm_state *gallivm, LLVMValueRef ptr, LLVMValueRef index) { @@ -105,8 +106,8 @@ lp_build_array_get(LLVMBuilderRef builder, LLVMValueRef res; assert(LLVMGetTypeKind(LLVMTypeOf(ptr)) == LLVMPointerTypeKind); assert(LLVMGetTypeKind(LLVMGetElementType(LLVMTypeOf(ptr))) == LLVMArrayTypeKind); - element_ptr = lp_build_array_get_ptr(builder, ptr, index); - res = LLVMBuildLoad(builder, element_ptr, ""); + element_ptr = lp_build_array_get_ptr(gallivm, ptr, index); + res = LLVMBuildLoad(gallivm->builder, element_ptr, ""); #ifdef DEBUG lp_build_name(res, "%s[%s]", LLVMGetValueName(ptr), LLVMGetValueName(index)); #endif @@ -115,7 +116,7 @@ lp_build_array_get(LLVMBuilderRef builder, void -lp_build_array_set(LLVMBuilderRef builder, +lp_build_array_set(struct gallivm_state *gallivm, LLVMValueRef ptr, LLVMValueRef index, LLVMValueRef value) @@ -123,8 +124,8 @@ lp_build_array_set(LLVMBuilderRef builder, LLVMValueRef element_ptr; assert(LLVMGetTypeKind(LLVMTypeOf(ptr)) == LLVMPointerTypeKind); assert(LLVMGetTypeKind(LLVMGetElementType(LLVMTypeOf(ptr))) == LLVMArrayTypeKind); - element_ptr = lp_build_array_get_ptr(builder, ptr, index); - LLVMBuildStore(builder, value, element_ptr); + element_ptr = lp_build_array_get_ptr(gallivm, ptr, index); + LLVMBuildStore(gallivm->builder, value, element_ptr); } diff --git a/src/gallium/auxiliary/gallivm/lp_bld_struct.h b/src/gallium/auxiliary/gallivm/lp_bld_struct.h index eb87a8eee9e..11605c685f0 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_struct.h +++ b/src/gallium/auxiliary/gallivm/lp_bld_struct.h @@ -38,7 +38,7 @@ #include "gallivm/lp_bld.h" -#include <llvm-c/Target.h> +#include "gallivm/lp_bld_init.h" #include "util/u_debug.h" #include "util/u_memory.h" @@ -57,7 +57,7 @@ * Get value pointer to a structure member. */ LLVMValueRef -lp_build_struct_get_ptr(LLVMBuilderRef builder, +lp_build_struct_get_ptr(struct gallivm_state *gallivm, LLVMValueRef ptr, unsigned member, const char *name); @@ -66,7 +66,7 @@ lp_build_struct_get_ptr(LLVMBuilderRef builder, * Get the value of a structure member. */ LLVMValueRef -lp_build_struct_get(LLVMBuilderRef builder, +lp_build_struct_get(struct gallivm_state *gallivm, LLVMValueRef ptr, unsigned member, const char *name); @@ -75,7 +75,7 @@ lp_build_struct_get(LLVMBuilderRef builder, * Get value pointer to an array element. */ LLVMValueRef -lp_build_array_get_ptr(LLVMBuilderRef builder, +lp_build_array_get_ptr(struct gallivm_state *gallivm, LLVMValueRef ptr, LLVMValueRef index); @@ -83,7 +83,7 @@ lp_build_array_get_ptr(LLVMBuilderRef builder, * Get the value of an array element. */ LLVMValueRef -lp_build_array_get(LLVMBuilderRef builder, +lp_build_array_get(struct gallivm_state *gallivm, LLVMValueRef ptr, LLVMValueRef index); @@ -91,7 +91,7 @@ lp_build_array_get(LLVMBuilderRef builder, * Set the value of an array element. */ void -lp_build_array_set(LLVMBuilderRef builder, +lp_build_array_set(struct gallivm_state *gallivm, LLVMValueRef ptr, LLVMValueRef index, LLVMValueRef value); diff --git a/src/gallium/auxiliary/gallivm/lp_bld_swizzle.c b/src/gallium/auxiliary/gallivm/lp_bld_swizzle.c index 4685a90e418..71693603c12 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_swizzle.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_swizzle.c @@ -37,12 +37,13 @@ #include "lp_bld_type.h" #include "lp_bld_const.h" +#include "lp_bld_init.h" #include "lp_bld_logic.h" #include "lp_bld_swizzle.h" LLVMValueRef -lp_build_broadcast(LLVMBuilderRef builder, +lp_build_broadcast(struct gallivm_state *gallivm, LLVMTypeRef vec_type, LLVMValueRef scalar) { @@ -52,8 +53,8 @@ lp_build_broadcast(LLVMBuilderRef builder, res = LLVMGetUndef(vec_type); for(i = 0; i < n; ++i) { - LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0); - res = LLVMBuildInsertElement(builder, res, scalar, index, ""); + LLVMValueRef index = lp_build_const_int32(gallivm, i); + res = LLVMBuildInsertElement(gallivm->builder, res, scalar, index, ""); } return res; @@ -67,6 +68,7 @@ LLVMValueRef lp_build_broadcast_scalar(struct lp_build_context *bld, LLVMValueRef scalar) { + LLVMBuilderRef builder = bld->gallivm->builder; const struct lp_type type = bld->type; assert(lp_check_elem_type(type, LLVMTypeOf(scalar))); @@ -82,17 +84,17 @@ lp_build_broadcast_scalar(struct lp_build_context *bld, struct lp_type i32_vec_type = lp_type_int_vec(32); i32_vec_type.length = type.length; - res = LLVMBuildInsertElement(bld->builder, bld->undef, scalar, - LLVMConstInt(LLVMInt32Type(), 0, 0), ""); - res = LLVMBuildShuffleVector(bld->builder, res, bld->undef, - lp_build_const_int_vec(i32_vec_type, 0), ""); + res = LLVMBuildInsertElement(builder, bld->undef, scalar, + lp_build_const_int32(bld->gallivm, 0), ""); + res = LLVMBuildShuffleVector(builder, res, bld->undef, + lp_build_const_int_vec(bld->gallivm, i32_vec_type, 0), ""); #else /* XXX: The above path provokes a bug in LLVM 2.6 */ unsigned i; res = bld->undef; for(i = 0; i < type.length; ++i) { - LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0); - res = LLVMBuildInsertElement(bld->builder, res, scalar, index, ""); + LLVMValueRef index = lp_build_const_int32(bld->gallivm, i); + res = LLVMBuildInsertElement(builder, res, scalar, index, ""); } #endif return res; @@ -104,13 +106,13 @@ lp_build_broadcast_scalar(struct lp_build_context *bld, * Combined extract and broadcast (or a mere shuffle when the two types match) */ LLVMValueRef -lp_build_extract_broadcast(LLVMBuilderRef builder, +lp_build_extract_broadcast(struct gallivm_state *gallivm, struct lp_type src_type, struct lp_type dst_type, LLVMValueRef vector, LLVMValueRef index) { - LLVMTypeRef i32t = LLVMInt32Type(); + LLVMTypeRef i32t = LLVMInt32TypeInContext(gallivm->context); LLVMValueRef res; assert(src_type.floating == dst_type.floating); @@ -132,8 +134,8 @@ lp_build_extract_broadcast(LLVMBuilderRef builder, * Broadcast scalar -> vector. */ - res = lp_build_broadcast(builder, - lp_build_vec_type(dst_type), + res = lp_build_broadcast(gallivm, + lp_build_vec_type(gallivm, dst_type), vector); } } @@ -144,16 +146,16 @@ lp_build_extract_broadcast(LLVMBuilderRef builder, */ LLVMValueRef shuffle; - shuffle = lp_build_broadcast(builder, + shuffle = lp_build_broadcast(gallivm, LLVMVectorType(i32t, dst_type.length), index); - res = LLVMBuildShuffleVector(builder, vector, - LLVMGetUndef(lp_build_vec_type(dst_type)), + res = LLVMBuildShuffleVector(gallivm->builder, vector, + LLVMGetUndef(lp_build_vec_type(gallivm, dst_type)), shuffle, ""); } else { LLVMValueRef scalar; - scalar = LLVMBuildExtractElement(builder, vector, index, ""); + scalar = LLVMBuildExtractElement(gallivm->builder, vector, index, ""); if (dst_type.length == 1) { /* * Trivial extract scalar from vector. @@ -166,8 +168,8 @@ lp_build_extract_broadcast(LLVMBuilderRef builder, * General case of different sized vectors. */ - res = lp_build_broadcast(builder, - lp_build_vec_type(dst_type), + res = lp_build_broadcast(gallivm, + lp_build_vec_type(gallivm, dst_type), vector); } } @@ -185,6 +187,7 @@ lp_build_swizzle_scalar_aos(struct lp_build_context *bld, LLVMValueRef a, unsigned channel) { + LLVMBuilderRef builder = bld->gallivm->builder; const struct lp_type type = bld->type; const unsigned n = type.length; unsigned i, j; @@ -199,14 +202,14 @@ lp_build_swizzle_scalar_aos(struct lp_build_context *bld, /* * Shuffle. */ - LLVMTypeRef elem_type = LLVMInt32Type(); + LLVMTypeRef elem_type = LLVMInt32TypeInContext(bld->gallivm->context); LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH]; for(j = 0; j < n; j += 4) for(i = 0; i < 4; ++i) shuffles[j + i] = LLVMConstInt(elem_type, j + channel, 0); - return LLVMBuildShuffleVector(bld->builder, a, bld->undef, LLVMConstVector(shuffles, n), ""); + return LLVMBuildShuffleVector(builder, a, bld->undef, LLVMConstVector(shuffles, n), ""); } else { /* @@ -226,8 +229,9 @@ lp_build_swizzle_scalar_aos(struct lp_build_context *bld, }; unsigned i; - a = LLVMBuildAnd(bld->builder, a, - lp_build_const_mask_aos(type, 1 << channel), ""); + a = LLVMBuildAnd(builder, a, + lp_build_const_mask_aos(bld->gallivm, + type, 1 << channel), ""); /* * Build a type where each element is an integer that cover the four @@ -239,7 +243,7 @@ lp_build_swizzle_scalar_aos(struct lp_build_context *bld, type4.width *= 4; type4.length /= 4; - a = LLVMBuildBitCast(bld->builder, a, lp_build_vec_type(type4), ""); + a = LLVMBuildBitCast(builder, a, lp_build_vec_type(bld->gallivm, type4), ""); for(i = 0; i < 2; ++i) { LLVMValueRef tmp = NULL; @@ -250,16 +254,16 @@ lp_build_swizzle_scalar_aos(struct lp_build_context *bld, #endif if(shift > 0) - tmp = LLVMBuildLShr(bld->builder, a, lp_build_const_int_vec(type4, shift*type.width), ""); + tmp = LLVMBuildLShr(builder, a, lp_build_const_int_vec(bld->gallivm, type4, shift*type.width), ""); if(shift < 0) - tmp = LLVMBuildShl(bld->builder, a, lp_build_const_int_vec(type4, -shift*type.width), ""); + tmp = LLVMBuildShl(builder, a, lp_build_const_int_vec(bld->gallivm, type4, -shift*type.width), ""); assert(tmp); if(tmp) - a = LLVMBuildOr(bld->builder, a, tmp, ""); + a = LLVMBuildOr(builder, a, tmp, ""); } - return LLVMBuildBitCast(bld->builder, a, lp_build_vec_type(type), ""); + return LLVMBuildBitCast(builder, a, lp_build_vec_type(bld->gallivm, type), ""); } } @@ -269,6 +273,7 @@ lp_build_swizzle_aos(struct lp_build_context *bld, LLVMValueRef a, const unsigned char swizzles[4]) { + LLVMBuilderRef builder = bld->gallivm->builder; const struct lp_type type = bld->type; const unsigned n = type.length; unsigned i, j; @@ -303,8 +308,8 @@ lp_build_swizzle_aos(struct lp_build_context *bld, /* * Shuffle. */ - LLVMValueRef undef = LLVMGetUndef(lp_build_elem_type(type)); - LLVMTypeRef i32t = LLVMInt32Type(); + LLVMValueRef undef = LLVMGetUndef(lp_build_elem_type(bld->gallivm, type)); + LLVMTypeRef i32t = LLVMInt32TypeInContext(bld->gallivm->context); LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH]; LLVMValueRef aux[LP_MAX_VECTOR_LENGTH]; @@ -326,13 +331,13 @@ lp_build_swizzle_aos(struct lp_build_context *bld, case PIPE_SWIZZLE_ZERO: shuffle = type.length + 0; if (!aux[0]) { - aux[0] = lp_build_const_elem(type, 0.0); + aux[0] = lp_build_const_elem(bld->gallivm, type, 0.0); } break; case PIPE_SWIZZLE_ONE: shuffle = type.length + 1; if (!aux[1]) { - aux[1] = lp_build_const_elem(type, 1.0); + aux[1] = lp_build_const_elem(bld->gallivm, type, 1.0); } break; } @@ -346,7 +351,7 @@ lp_build_swizzle_aos(struct lp_build_context *bld, } } - return LLVMBuildShuffleVector(bld->builder, a, + return LLVMBuildShuffleVector(builder, a, LLVMConstVector(aux, n), LLVMConstVector(shuffles, n), ""); } else { @@ -387,8 +392,8 @@ lp_build_swizzle_aos(struct lp_build_context *bld, type4.width *= 4; type4.length /= 4; - a = LLVMBuildBitCast(bld->builder, a, lp_build_vec_type(type4), ""); - res = LLVMBuildBitCast(bld->builder, res, lp_build_vec_type(type4), ""); + a = LLVMBuildBitCast(builder, a, lp_build_vec_type(bld->gallivm, type4), ""); + res = LLVMBuildBitCast(builder, res, lp_build_vec_type(bld->gallivm, type4), ""); /* * Mask and shift the channels, trying to group as many channels in the @@ -414,23 +419,24 @@ lp_build_swizzle_aos(struct lp_build_context *bld, if (0) debug_printf("shift = %i, mask = 0x%08llx\n", shift, mask); - masked = LLVMBuildAnd(bld->builder, a, - lp_build_const_int_vec(type4, mask), ""); + masked = LLVMBuildAnd(builder, a, + lp_build_const_int_vec(bld->gallivm, type4, mask), ""); if (shift > 0) { - shifted = LLVMBuildShl(bld->builder, masked, - lp_build_const_int_vec(type4, shift*type.width), ""); + shifted = LLVMBuildShl(builder, masked, + lp_build_const_int_vec(bld->gallivm, type4, shift*type.width), ""); } else if (shift < 0) { - shifted = LLVMBuildLShr(bld->builder, masked, - lp_build_const_int_vec(type4, -shift*type.width), ""); + shifted = LLVMBuildLShr(builder, masked, + lp_build_const_int_vec(bld->gallivm, type4, -shift*type.width), ""); } else { shifted = masked; } - res = LLVMBuildOr(bld->builder, res, shifted, ""); + res = LLVMBuildOr(builder, res, shifted, ""); } } - return LLVMBuildBitCast(bld->builder, res, lp_build_vec_type(type), ""); + return LLVMBuildBitCast(builder, res, + lp_build_vec_type(bld->gallivm, type), ""); } } diff --git a/src/gallium/auxiliary/gallivm/lp_bld_swizzle.h b/src/gallium/auxiliary/gallivm/lp_bld_swizzle.h index fdea8442aef..c366a65103e 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_swizzle.h +++ b/src/gallium/auxiliary/gallivm/lp_bld_swizzle.h @@ -45,7 +45,7 @@ struct lp_build_context; LLVMValueRef -lp_build_broadcast(LLVMBuilderRef builder, +lp_build_broadcast(struct gallivm_state *gallivm, LLVMTypeRef vec_type, LLVMValueRef scalar); @@ -56,7 +56,7 @@ lp_build_broadcast_scalar(struct lp_build_context *bld, LLVMValueRef -lp_build_extract_broadcast(LLVMBuilderRef builder, +lp_build_extract_broadcast(struct gallivm_state *gallivm, struct lp_type src_type, struct lp_type dst_type, LLVMValueRef vector, diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h b/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h index 694818ccfb8..9713d100484 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h +++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h @@ -46,6 +46,7 @@ struct tgsi_shader_info; struct lp_type; struct lp_build_context; struct lp_build_mask_context; +struct gallivm_state; enum lp_build_tex_modifier { @@ -141,7 +142,7 @@ struct lp_build_sampler_soa void (*emit_fetch_texel)( const struct lp_build_sampler_soa *sampler, - LLVMBuilderRef builder, + struct gallivm_state *gallivm, struct lp_type type, unsigned unit, unsigned num_coords, @@ -174,7 +175,7 @@ lp_build_tgsi_info(const struct tgsi_token *tokens, void -lp_build_tgsi_soa(LLVMBuilderRef builder, +lp_build_tgsi_soa(struct gallivm_state *gallivm, const struct tgsi_token *tokens, struct lp_type type, struct lp_build_mask_context *mask, @@ -188,7 +189,7 @@ lp_build_tgsi_soa(LLVMBuilderRef builder, void -lp_build_tgsi_aos(LLVMBuilderRef builder, +lp_build_tgsi_aos(struct gallivm_state *gallivm, const struct tgsi_token *tokens, struct lp_type type, const unsigned char swizzles[4], @@ -200,7 +201,7 @@ lp_build_tgsi_aos(LLVMBuilderRef builder, LLVMValueRef -lp_build_system_values_array(LLVMBuilderRef builder, +lp_build_system_values_array(struct gallivm_state *gallivm, const struct tgsi_shader_info *info, LLVMValueRef instance_id, LLVMValueRef facing); diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_aos.c b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_aos.c index c3c082b2b95..a021efd69ff 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_aos.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_aos.c @@ -151,6 +151,7 @@ emit_fetch( const struct tgsi_full_instruction *inst, unsigned src_op) { + LLVMBuilderRef builder = bld->base.gallivm->builder; struct lp_type type = bld->base.type; const struct tgsi_full_src_register *reg = &inst->Src[src_op]; LLVMValueRef res; @@ -175,14 +176,12 @@ emit_fetch( LLVMValueRef scalar; LLVMValueRef swizzle; - index = LLVMConstInt(LLVMInt32Type(), - reg->Register.Index*4 + chan, - 0); + index = lp_build_const_int32(bld->base.gallivm, reg->Register.Index * 4 + chan); - scalar_ptr = LLVMBuildGEP(bld->base.builder, bld->consts_ptr, + scalar_ptr = LLVMBuildGEP(builder, bld->consts_ptr, &index, 1, ""); - scalar = LLVMBuildLoad(bld->base.builder, scalar_ptr, ""); + scalar = LLVMBuildLoad(builder, scalar_ptr, ""); lp_build_name(scalar, "const[%u].%c", reg->Register.Index, "xyzw"[chan]); @@ -190,9 +189,9 @@ emit_fetch( * NOTE: constants array is always assumed to be RGBA */ - swizzle = LLVMConstInt(LLVMInt32Type(), chan, 0); + swizzle = lp_build_const_int32(bld->base.gallivm, chan); - res = LLVMBuildInsertElement(bld->base.builder, res, scalar, swizzle, ""); + res = LLVMBuildInsertElement(builder, res, scalar, swizzle, ""); } /* @@ -206,14 +205,14 @@ emit_fetch( unsigned i; for (chan = 0; chan < 4; ++chan) { - shuffles[chan] = LLVMConstInt(LLVMInt32Type(), chan, 0); + shuffles[chan] = lp_build_const_int32(bld->base.gallivm, chan); } for (i = 4; i < type.length; ++i) { shuffles[i] = shuffles[i % 4]; } - res = LLVMBuildShuffleVector(bld->base.builder, + res = LLVMBuildShuffleVector(builder, res, bld->base.undef, LLVMConstVector(shuffles, type.length), ""); @@ -234,7 +233,7 @@ emit_fetch( { LLVMValueRef temp_ptr; temp_ptr = bld->temps[reg->Register.Index]; - res = LLVMBuildLoad(bld->base.builder, temp_ptr, ""); + res = LLVMBuildLoad(builder, temp_ptr, ""); if (!res) return bld->base.undef; } @@ -281,6 +280,7 @@ emit_store( unsigned index, LLVMValueRef value) { + LLVMBuilderRef builder = bld->base.gallivm->builder; const struct tgsi_full_dst_register *reg = &inst->Dst[index]; LLVMValueRef mask = NULL; LLVMValueRef ptr; @@ -299,7 +299,7 @@ emit_store( break; case TGSI_SAT_MINUS_PLUS_ONE: - value = lp_build_max(&bld->base, value, lp_build_const_vec(bld->base.type, -1.0)); + value = lp_build_max(&bld->base, value, lp_build_const_vec(bld->base.gallivm, bld->base.type, -1.0)); value = lp_build_min(&bld->base, value, bld->base.one); break; @@ -344,20 +344,20 @@ emit_store( assert(inst->Predicate.Index < LP_MAX_TGSI_PREDS); - pred = LLVMBuildLoad(bld->base.builder, + pred = LLVMBuildLoad(builder, bld->preds[inst->Predicate.Index], ""); /* * Convert the value to an integer mask. */ - pred = lp_build_compare(bld->base.builder, + pred = lp_build_compare(bld->base.gallivm, bld->base.type, PIPE_FUNC_NOTEQUAL, pred, bld->base.zero); if (inst->Predicate.Negate) { - pred = LLVMBuildNot(bld->base.builder, pred, ""); + pred = LLVMBuildNot(builder, pred, ""); } pred = swizzle_aos(bld, pred, @@ -367,7 +367,7 @@ emit_store( inst->Predicate.SwizzleW); if (mask) { - mask = LLVMBuildAnd(bld->base.builder, mask, pred, ""); + mask = LLVMBuildAnd(builder, mask, pred, ""); } else { mask = pred; } @@ -380,10 +380,11 @@ emit_store( if (reg->Register.WriteMask != TGSI_WRITEMASK_XYZW) { LLVMValueRef writemask; - writemask = lp_build_const_mask_aos(bld->base.type, reg->Register.WriteMask); + writemask = lp_build_const_mask_aos(bld->base.gallivm, bld->base.type, + reg->Register.WriteMask); if (mask) { - mask = LLVMBuildAnd(bld->base.builder, mask, writemask, ""); + mask = LLVMBuildAnd(builder, mask, writemask, ""); } else { mask = writemask; } @@ -392,12 +393,12 @@ emit_store( if (mask) { LLVMValueRef orig_value; - orig_value = LLVMBuildLoad(bld->base.builder, ptr, ""); + orig_value = LLVMBuildLoad(builder, ptr, ""); value = lp_build_select(&bld->base, mask, value, orig_value); } - LLVMBuildStore(bld->base.builder, value, ptr); + LLVMBuildStore(builder, value, ptr); } @@ -454,7 +455,8 @@ emit_declaration( struct lp_build_tgsi_aos_context *bld, const struct tgsi_full_declaration *decl) { - LLVMTypeRef vec_type = lp_build_vec_type(bld->base.type); + struct gallivm_state *gallivm = bld->base.gallivm; + LLVMTypeRef vec_type = lp_build_vec_type(bld->base.gallivm, bld->base.type); unsigned first = decl->Range.First; unsigned last = decl->Range.Last; @@ -465,31 +467,26 @@ emit_declaration( case TGSI_FILE_TEMPORARY: assert(idx < LP_MAX_TGSI_TEMPS); if (bld->indirect_files & (1 << TGSI_FILE_TEMPORARY)) { - LLVMValueRef array_size = LLVMConstInt(LLVMInt32Type(), - last + 1, 0); - bld->temps_array = lp_build_array_alloca(bld->base.builder, + LLVMValueRef array_size = lp_build_const_int32(gallivm, last + 1); + bld->temps_array = lp_build_array_alloca(bld->base.gallivm, vec_type, array_size, ""); } else { - bld->temps[idx] = lp_build_alloca(bld->base.builder, - vec_type, ""); + bld->temps[idx] = lp_build_alloca(gallivm, vec_type, ""); } break; case TGSI_FILE_OUTPUT: - bld->outputs[idx] = lp_build_alloca(bld->base.builder, - vec_type, ""); + bld->outputs[idx] = lp_build_alloca(gallivm, vec_type, ""); break; case TGSI_FILE_ADDRESS: assert(idx < LP_MAX_TGSI_ADDRS); - bld->addr[idx] = lp_build_alloca(bld->base.builder, - vec_type, ""); + bld->addr[idx] = lp_build_alloca(gallivm, vec_type, ""); break; case TGSI_FILE_PREDICATE: assert(idx < LP_MAX_TGSI_PREDS); - bld->preds[idx] = lp_build_alloca(bld->base.builder, - vec_type, ""); + bld->preds[idx] = lp_build_alloca(gallivm, vec_type, ""); break; default: @@ -644,7 +641,7 @@ emit_instruction( src0 = emit_fetch(bld, inst, 0); src1 = emit_fetch(bld, inst, 1); src2 = emit_fetch(bld, inst, 2); - tmp1 = lp_build_const_vec(bld->base.type, 0.5); + tmp1 = lp_build_const_vec(bld->base.gallivm, bld->base.type, 0.5); tmp0 = lp_build_cmp(&bld->base, PIPE_FUNC_GREATER, src2, tmp1); dst0 = lp_build_select(&bld->base, tmp0, src0, src1); break; @@ -1039,7 +1036,7 @@ emit_instruction( void -lp_build_tgsi_aos(LLVMBuilderRef builder, +lp_build_tgsi_aos(struct gallivm_state *gallivm, const struct tgsi_token *tokens, struct lp_type type, const unsigned char swizzles[4], @@ -1058,8 +1055,8 @@ lp_build_tgsi_aos(LLVMBuilderRef builder, /* Setup build context */ memset(&bld, 0, sizeof bld); - lp_build_context_init(&bld.base, builder, type); - lp_build_context_init(&bld.int_bld, builder, lp_int_type(type)); + lp_build_context_init(&bld.base, gallivm, type); + lp_build_context_init(&bld.int_bld, gallivm, lp_int_type(type)); for (chan = 0; chan < 4; ++chan) { bld.swizzles[chan] = swizzles[chan]; @@ -1131,7 +1128,7 @@ lp_build_tgsi_aos(LLVMBuilderRef builder, imm[swizzle] = parse.FullToken.FullImmediate.u[chan].Float; } bld.immediates[num_immediates] = - lp_build_const_aos(type, + lp_build_const_aos(gallivm, type, imm[0], imm[1], imm[2], imm[3], NULL); num_immediates++; @@ -1156,7 +1153,7 @@ lp_build_tgsi_aos(LLVMBuilderRef builder, } if (0) { - LLVMBasicBlockRef block = LLVMGetInsertBlock(builder); + LLVMBasicBlockRef block = LLVMGetInsertBlock(gallivm->builder); LLVMValueRef function = LLVMGetBasicBlockParent(block); debug_printf("11111111111111111111111111111 \n"); tgsi_dump(tokens, 0); @@ -1167,7 +1164,7 @@ lp_build_tgsi_aos(LLVMBuilderRef builder, if (0) { LLVMModuleRef module = LLVMGetGlobalParent( - LLVMGetBasicBlockParent(LLVMGetInsertBlock(bld.base.builder))); + LLVMGetBasicBlockParent(LLVMGetInsertBlock(gallivm->builder))); LLVMDumpModule(module); } diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c index 3fdfac95a0a..d1585c8e2b7 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c @@ -51,6 +51,7 @@ #include "lp_bld_arit.h" #include "lp_bld_bitarit.h" #include "lp_bld_gather.h" +#include "lp_bld_init.h" #include "lp_bld_logic.h" #include "lp_bld_swizzle.h" #include "lp_bld_flow.h" @@ -177,22 +178,24 @@ static void lp_exec_mask_init(struct lp_exec_mask *mask, struct lp_build_context mask->loop_stack_size = 0; mask->call_stack_size = 0; - mask->int_vec_type = lp_build_int_vec_type(mask->bld->type); + mask->int_vec_type = lp_build_int_vec_type(bld->gallivm, mask->bld->type); mask->exec_mask = mask->ret_mask = mask->break_mask = mask->cont_mask = mask->cond_mask = LLVMConstAllOnes(mask->int_vec_type); } static void lp_exec_mask_update(struct lp_exec_mask *mask) { + LLVMBuilderRef builder = mask->bld->gallivm->builder; + if (mask->loop_stack_size) { /*for loops we need to update the entire mask at runtime */ LLVMValueRef tmp; assert(mask->break_mask); - tmp = LLVMBuildAnd(mask->bld->builder, + tmp = LLVMBuildAnd(builder, mask->cont_mask, mask->break_mask, "maskcb"); - mask->exec_mask = LLVMBuildAnd(mask->bld->builder, + mask->exec_mask = LLVMBuildAnd(builder, mask->cond_mask, tmp, "maskfull"); @@ -200,7 +203,7 @@ static void lp_exec_mask_update(struct lp_exec_mask *mask) mask->exec_mask = mask->cond_mask; if (mask->call_stack_size) { - mask->exec_mask = LLVMBuildAnd(mask->bld->builder, + mask->exec_mask = LLVMBuildAnd(builder, mask->exec_mask, mask->ret_mask, "callmask"); @@ -214,13 +217,15 @@ static void lp_exec_mask_update(struct lp_exec_mask *mask) static void lp_exec_mask_cond_push(struct lp_exec_mask *mask, LLVMValueRef val) { + LLVMBuilderRef builder = mask->bld->gallivm->builder; + assert(mask->cond_stack_size < LP_MAX_TGSI_NESTING); if (mask->cond_stack_size == 0) { assert(mask->cond_mask == LLVMConstAllOnes(mask->int_vec_type)); } mask->cond_stack[mask->cond_stack_size++] = mask->cond_mask; assert(LLVMTypeOf(val) == mask->int_vec_type); - mask->cond_mask = LLVMBuildAnd(mask->bld->builder, + mask->cond_mask = LLVMBuildAnd(builder, mask->cond_mask, val, ""); @@ -229,6 +234,7 @@ static void lp_exec_mask_cond_push(struct lp_exec_mask *mask, static void lp_exec_mask_cond_invert(struct lp_exec_mask *mask) { + LLVMBuilderRef builder = mask->bld->gallivm->builder; LLVMValueRef prev_mask; LLVMValueRef inv_mask; @@ -238,9 +244,9 @@ static void lp_exec_mask_cond_invert(struct lp_exec_mask *mask) assert(prev_mask == LLVMConstAllOnes(mask->int_vec_type)); } - inv_mask = LLVMBuildNot(mask->bld->builder, mask->cond_mask, ""); + inv_mask = LLVMBuildNot(builder, mask->cond_mask, ""); - mask->cond_mask = LLVMBuildAnd(mask->bld->builder, + mask->cond_mask = LLVMBuildAnd(builder, inv_mask, prev_mask, ""); lp_exec_mask_update(mask); @@ -255,6 +261,8 @@ static void lp_exec_mask_cond_pop(struct lp_exec_mask *mask) static void lp_exec_bgnloop(struct lp_exec_mask *mask) { + LLVMBuilderRef builder = mask->bld->gallivm->builder; + if (mask->loop_stack_size == 0) { assert(mask->loop_block == NULL); assert(mask->cont_mask == LLVMConstAllOnes(mask->int_vec_type)); @@ -270,25 +278,26 @@ static void lp_exec_bgnloop(struct lp_exec_mask *mask) mask->loop_stack[mask->loop_stack_size].break_var = mask->break_var; ++mask->loop_stack_size; - mask->break_var = lp_build_alloca(mask->bld->builder, mask->int_vec_type, ""); - LLVMBuildStore(mask->bld->builder, mask->break_mask, mask->break_var); + mask->break_var = lp_build_alloca(mask->bld->gallivm, mask->int_vec_type, ""); + LLVMBuildStore(builder, mask->break_mask, mask->break_var); - mask->loop_block = lp_build_insert_new_block(mask->bld->builder, "bgnloop"); - LLVMBuildBr(mask->bld->builder, mask->loop_block); - LLVMPositionBuilderAtEnd(mask->bld->builder, mask->loop_block); + mask->loop_block = lp_build_insert_new_block(mask->bld->gallivm, "bgnloop"); + LLVMBuildBr(builder, mask->loop_block); + LLVMPositionBuilderAtEnd(builder, mask->loop_block); - mask->break_mask = LLVMBuildLoad(mask->bld->builder, mask->break_var, ""); + mask->break_mask = LLVMBuildLoad(builder, mask->break_var, ""); lp_exec_mask_update(mask); } static void lp_exec_break(struct lp_exec_mask *mask) { - LLVMValueRef exec_mask = LLVMBuildNot(mask->bld->builder, + LLVMBuilderRef builder = mask->bld->gallivm->builder; + LLVMValueRef exec_mask = LLVMBuildNot(builder, mask->exec_mask, "break"); - mask->break_mask = LLVMBuildAnd(mask->bld->builder, + mask->break_mask = LLVMBuildAnd(builder, mask->break_mask, exec_mask, "break_full"); @@ -297,11 +306,12 @@ static void lp_exec_break(struct lp_exec_mask *mask) static void lp_exec_continue(struct lp_exec_mask *mask) { - LLVMValueRef exec_mask = LLVMBuildNot(mask->bld->builder, + LLVMBuilderRef builder = mask->bld->gallivm->builder; + LLVMValueRef exec_mask = LLVMBuildNot(builder, mask->exec_mask, ""); - mask->cont_mask = LLVMBuildAnd(mask->bld->builder, + mask->cont_mask = LLVMBuildAnd(builder, mask->cont_mask, exec_mask, ""); @@ -309,11 +319,14 @@ static void lp_exec_continue(struct lp_exec_mask *mask) } -static void lp_exec_endloop(struct lp_exec_mask *mask) +static void lp_exec_endloop(struct gallivm_state *gallivm, + struct lp_exec_mask *mask) { + LLVMBuilderRef builder = mask->bld->gallivm->builder; LLVMBasicBlockRef endloop; - LLVMTypeRef reg_type = LLVMIntType(mask->bld->type.width* - mask->bld->type.length); + LLVMTypeRef reg_type = LLVMIntTypeInContext(gallivm->context, + mask->bld->type.width * + mask->bld->type.length); LLVMValueRef i1cond; assert(mask->break_mask); @@ -329,21 +342,21 @@ static void lp_exec_endloop(struct lp_exec_mask *mask) * Unlike the continue mask, the break_mask must be preserved across loop * iterations */ - LLVMBuildStore(mask->bld->builder, mask->break_mask, mask->break_var); + LLVMBuildStore(builder, mask->break_mask, mask->break_var); /* i1cond = (mask == 0) */ i1cond = LLVMBuildICmp( - mask->bld->builder, + builder, LLVMIntNE, - LLVMBuildBitCast(mask->bld->builder, mask->exec_mask, reg_type, ""), + LLVMBuildBitCast(builder, mask->exec_mask, reg_type, ""), LLVMConstNull(reg_type), ""); - endloop = lp_build_insert_new_block(mask->bld->builder, "endloop"); + endloop = lp_build_insert_new_block(mask->bld->gallivm, "endloop"); - LLVMBuildCondBr(mask->bld->builder, + LLVMBuildCondBr(builder, i1cond, mask->loop_block, endloop); - LLVMPositionBuilderAtEnd(mask->bld->builder, endloop); + LLVMPositionBuilderAtEnd(builder, endloop); assert(mask->loop_stack_size); --mask->loop_stack_size; @@ -365,10 +378,12 @@ static void lp_exec_mask_store(struct lp_exec_mask *mask, LLVMValueRef val, LLVMValueRef dst) { + LLVMBuilderRef builder = mask->bld->gallivm->builder; + /* Mix the predicate and execution mask */ if (mask->has_mask) { if (pred) { - pred = LLVMBuildAnd(mask->bld->builder, pred, mask->exec_mask, ""); + pred = LLVMBuildAnd(builder, pred, mask->exec_mask, ""); } else { pred = mask->exec_mask; } @@ -377,14 +392,14 @@ static void lp_exec_mask_store(struct lp_exec_mask *mask, if (pred) { LLVMValueRef real_val, dst_val; - dst_val = LLVMBuildLoad(mask->bld->builder, dst, ""); + dst_val = LLVMBuildLoad(builder, dst, ""); real_val = lp_build_select(mask->bld, pred, val, dst_val); - LLVMBuildStore(mask->bld->builder, real_val, dst); + LLVMBuildStore(builder, real_val, dst); } else - LLVMBuildStore(mask->bld->builder, val, dst); + LLVMBuildStore(builder, val, dst); } static void lp_exec_mask_call(struct lp_exec_mask *mask, @@ -400,6 +415,7 @@ static void lp_exec_mask_call(struct lp_exec_mask *mask, static void lp_exec_mask_ret(struct lp_exec_mask *mask, int *pc) { + LLVMBuilderRef builder = mask->bld->gallivm->builder; LLVMValueRef exec_mask; if (mask->call_stack_size == 0) { @@ -407,11 +423,11 @@ static void lp_exec_mask_ret(struct lp_exec_mask *mask, int *pc) *pc = -1; return; } - exec_mask = LLVMBuildNot(mask->bld->builder, + exec_mask = LLVMBuildNot(builder, mask->exec_mask, "ret"); - mask->ret_mask = LLVMBuildAnd(mask->bld->builder, + mask->ret_mask = LLVMBuildAnd(builder, mask->ret_mask, exec_mask, "ret_full"); @@ -443,10 +459,11 @@ get_temp_ptr(struct lp_build_tgsi_soa_context *bld, unsigned index, unsigned chan) { + LLVMBuilderRef builder = bld->base.gallivm->builder; assert(chan < 4); if (bld->indirect_files & (1 << TGSI_FILE_TEMPORARY)) { - LLVMValueRef lindex = lp_build_const_int32(index * 4 + chan); - return LLVMBuildGEP(bld->base.builder, bld->temps_array, &lindex, 1, ""); + LLVMValueRef lindex = lp_build_const_int32(bld->base.gallivm, index * 4 + chan); + return LLVMBuildGEP(builder, bld->temps_array, &lindex, 1, ""); } else { return bld->temps[index][chan]; @@ -464,10 +481,12 @@ get_output_ptr(struct lp_build_tgsi_soa_context *bld, unsigned index, unsigned chan) { + LLVMBuilderRef builder = bld->base.gallivm->builder; assert(chan < 4); if (bld->indirect_files & (1 << TGSI_FILE_OUTPUT)) { - LLVMValueRef lindex = lp_build_const_int32(index * 4 + chan); - return LLVMBuildGEP(bld->base.builder, bld->outputs_array, &lindex, 1, ""); + LLVMValueRef lindex = lp_build_const_int32(bld->base.gallivm, + index * 4 + chan); + return LLVMBuildGEP(builder, bld->outputs_array, &lindex, 1, ""); } else { return bld->outputs[index][chan]; @@ -484,6 +503,7 @@ build_gather(struct lp_build_tgsi_soa_context *bld, LLVMValueRef base_ptr, LLVMValueRef indexes) { + LLVMBuilderRef builder = bld->base.gallivm->builder; LLVMValueRef res = bld->base.undef; unsigned i; @@ -491,14 +511,14 @@ build_gather(struct lp_build_tgsi_soa_context *bld, * Loop over elements of index_vec, load scalar value, insert it into 'res'. */ for (i = 0; i < bld->base.type.length; i++) { - LLVMValueRef ii = LLVMConstInt(LLVMInt32Type(), i, 0); - LLVMValueRef index = LLVMBuildExtractElement(bld->base.builder, + LLVMValueRef ii = lp_build_const_int32(bld->base.gallivm, i); + LLVMValueRef index = LLVMBuildExtractElement(builder, indexes, ii, ""); - LLVMValueRef scalar_ptr = LLVMBuildGEP(bld->base.builder, base_ptr, + LLVMValueRef scalar_ptr = LLVMBuildGEP(builder, base_ptr, &index, 1, "gather_ptr"); - LLVMValueRef scalar = LLVMBuildLoad(bld->base.builder, scalar_ptr, ""); + LLVMValueRef scalar = LLVMBuildLoad(builder, scalar_ptr, ""); - res = LLVMBuildInsertElement(bld->base.builder, res, scalar, ii, ""); + res = LLVMBuildInsertElement(builder, res, scalar, ii, ""); } return res; @@ -516,13 +536,14 @@ emit_mask_scatter(struct lp_build_tgsi_soa_context *bld, struct lp_exec_mask *mask, LLVMValueRef pred) { - LLVMBuilderRef builder = bld->base.builder; + struct gallivm_state *gallivm = bld->base.gallivm; + LLVMBuilderRef builder = gallivm->builder; unsigned i; /* Mix the predicate and execution mask */ if (mask->has_mask) { if (pred) { - pred = LLVMBuildAnd(mask->bld->builder, pred, mask->exec_mask, ""); + pred = LLVMBuildAnd(builder, pred, mask->exec_mask, ""); } else { pred = mask->exec_mask; @@ -533,7 +554,7 @@ emit_mask_scatter(struct lp_build_tgsi_soa_context *bld, * Loop over elements of index_vec, store scalar value. */ for (i = 0; i < bld->base.type.length; i++) { - LLVMValueRef ii = LLVMConstInt(LLVMInt32Type(), i, 0); + LLVMValueRef ii = lp_build_const_int32(gallivm, i); LLVMValueRef index = LLVMBuildExtractElement(builder, indexes, ii, ""); LLVMValueRef scalar_ptr = LLVMBuildGEP(builder, base_ptr, &index, 1, "scatter_ptr"); LLVMValueRef val = LLVMBuildExtractElement(builder, values, ii, "scatter_val"); @@ -541,7 +562,7 @@ emit_mask_scatter(struct lp_build_tgsi_soa_context *bld, LLVMBuildExtractElement(builder, pred, ii, "scatter_pred") : NULL; if (0) - lp_build_printf(builder, "scatter %d: val %f at %d %p\n", + lp_build_printf(gallivm, "scatter %d: val %f at %d %p\n", ii, val, index, scalar_ptr); if (scalar_pred) { @@ -568,6 +589,7 @@ get_indirect_index(struct lp_build_tgsi_soa_context *bld, unsigned reg_file, unsigned reg_index, const struct tgsi_src_register *indirect_reg) { + LLVMBuilderRef builder = bld->base.gallivm->builder; struct lp_build_context *uint_bld = &bld->uint_bld; /* always use X component of address register */ unsigned swizzle = indirect_reg->SwizzleX; @@ -578,21 +600,22 @@ get_indirect_index(struct lp_build_tgsi_soa_context *bld, assert(bld->indirect_files & (1 << reg_file)); - base = lp_build_const_int_vec(uint_bld->type, reg_index); + base = lp_build_const_int_vec(bld->base.gallivm, uint_bld->type, reg_index); assert(swizzle < 4); - rel = LLVMBuildLoad(bld->base.builder, + rel = LLVMBuildLoad(builder, bld->addr[indirect_reg->Index][swizzle], "load addr reg"); /* for indexing we want integers */ - rel = LLVMBuildFPToSI(bld->base.builder, + rel = LLVMBuildFPToSI(builder, rel, uint_bld->vec_type, ""); index = lp_build_add(uint_bld, base, rel); - max_index = lp_build_const_int_vec(uint_bld->type, + max_index = lp_build_const_int_vec(bld->base.gallivm, + uint_bld->type, bld->info->file_max[reg_file]); assert(!uint_bld->type.sign); @@ -612,6 +635,8 @@ emit_fetch( unsigned src_op, const unsigned chan_index ) { + struct gallivm_state *gallivm = bld->base.gallivm; + LLVMBuilderRef builder = gallivm->builder; struct lp_build_context *uint_bld = &bld->uint_bld; const struct tgsi_full_src_register *reg = &inst->Src[src_op]; const unsigned swizzle = @@ -637,7 +662,7 @@ emit_fetch( case TGSI_FILE_CONSTANT: if (reg->Register.Indirect) { LLVMValueRef swizzle_vec = - lp_build_const_int_vec(uint_bld->type, swizzle); + lp_build_const_int_vec(bld->base.gallivm, uint_bld->type, swizzle); LLVMValueRef index_vec; /* index into the const buffer */ /* index_vec = indirect_index * 4 + swizzle */ @@ -651,11 +676,11 @@ emit_fetch( LLVMValueRef index; /* index into the const buffer */ LLVMValueRef scalar, scalar_ptr; - index = lp_build_const_int32(reg->Register.Index*4 + swizzle); + index = lp_build_const_int32(gallivm, reg->Register.Index*4 + swizzle); - scalar_ptr = LLVMBuildGEP(bld->base.builder, bld->consts_ptr, + scalar_ptr = LLVMBuildGEP(builder, bld->consts_ptr, &index, 1, ""); - scalar = LLVMBuildLoad(bld->base.builder, scalar_ptr, ""); + scalar = LLVMBuildLoad(builder, scalar_ptr, ""); res = lp_build_broadcast_scalar(&bld->base, scalar); } @@ -669,9 +694,9 @@ emit_fetch( case TGSI_FILE_INPUT: if (reg->Register.Indirect) { LLVMValueRef swizzle_vec = - lp_build_const_int_vec(uint_bld->type, swizzle); + lp_build_const_int_vec(gallivm, uint_bld->type, swizzle); LLVMValueRef length_vec = - lp_build_const_int_vec(uint_bld->type, bld->base.type.length); + lp_build_const_int_vec(gallivm, uint_bld->type, bld->base.type.length); LLVMValueRef index_vec; /* index into the const buffer */ LLVMValueRef inputs_array; LLVMTypeRef float4_ptr_type; @@ -682,18 +707,19 @@ emit_fetch( index_vec = lp_build_mul(uint_bld, index_vec, length_vec); /* cast inputs_array pointer to float* */ - float4_ptr_type = LLVMPointerType(LLVMFloatType(), 0); - inputs_array = LLVMBuildBitCast(uint_bld->builder, bld->inputs_array, - float4_ptr_type, ""); + float4_ptr_type = LLVMPointerType(LLVMFloatTypeInContext(gallivm->context), 0); + inputs_array = LLVMBuildBitCast(builder, bld->inputs_array, + float4_ptr_type, ""); /* Gather values from the temporary register array */ res = build_gather(bld, inputs_array, index_vec); } else { if (bld->indirect_files & (1 << TGSI_FILE_INPUT)) { - LLVMValueRef lindex = lp_build_const_int32(reg->Register.Index * 4 + swizzle); - LLVMValueRef input_ptr = LLVMBuildGEP(bld->base.builder, + LLVMValueRef lindex = lp_build_const_int32(gallivm, + reg->Register.Index * 4 + swizzle); + LLVMValueRef input_ptr = LLVMBuildGEP(builder, bld->inputs_array, &lindex, 1, ""); - res = LLVMBuildLoad(bld->base.builder, input_ptr, ""); + res = LLVMBuildLoad(builder, input_ptr, ""); } else { res = bld->inputs[reg->Register.Index][swizzle]; @@ -705,9 +731,10 @@ emit_fetch( case TGSI_FILE_TEMPORARY: if (reg->Register.Indirect) { LLVMValueRef swizzle_vec = - lp_build_const_int_vec(uint_bld->type, swizzle); + lp_build_const_int_vec(bld->base.gallivm, uint_bld->type, swizzle); LLVMValueRef length_vec = - lp_build_const_int_vec(uint_bld->type, bld->base.type.length); + lp_build_const_int_vec(bld->base.gallivm, uint_bld->type, + bld->base.type.length); LLVMValueRef index_vec; /* index into the const buffer */ LLVMValueRef temps_array; LLVMTypeRef float4_ptr_type; @@ -718,8 +745,8 @@ emit_fetch( index_vec = lp_build_mul(uint_bld, index_vec, length_vec); /* cast temps_array pointer to float* */ - float4_ptr_type = LLVMPointerType(LLVMFloatType(), 0); - temps_array = LLVMBuildBitCast(uint_bld->builder, bld->temps_array, + float4_ptr_type = LLVMPointerType(LLVMFloatTypeInContext(bld->base.gallivm->context), 0); + temps_array = LLVMBuildBitCast(builder, bld->temps_array, float4_ptr_type, ""); /* Gather values from the temporary register array */ @@ -728,7 +755,7 @@ emit_fetch( else { LLVMValueRef temp_ptr; temp_ptr = get_temp_ptr(bld, reg->Register.Index, swizzle); - res = LLVMBuildLoad(bld->base.builder, temp_ptr, ""); + res = LLVMBuildLoad(builder, temp_ptr, ""); if (!res) return bld->base.undef; } @@ -740,11 +767,12 @@ emit_fetch( LLVMValueRef index; /* index into the system value array */ LLVMValueRef scalar, scalar_ptr; - index = lp_build_const_int32(reg->Register.Index * 4 + swizzle); + index = lp_build_const_int32(gallivm, + reg->Register.Index * 4 + swizzle); - scalar_ptr = LLVMBuildGEP(bld->base.builder, bld->system_values_array, + scalar_ptr = LLVMBuildGEP(builder, bld->system_values_array, &index, 1, ""); - scalar = LLVMBuildLoad(bld->base.builder, scalar_ptr, ""); + scalar = LLVMBuildLoad(builder, scalar_ptr, ""); res = lp_build_broadcast_scalar(&bld->base, scalar); } @@ -814,6 +842,7 @@ emit_fetch_predicate( const struct tgsi_full_instruction *inst, LLVMValueRef *pred) { + LLVMBuilderRef builder = bld->base.gallivm->builder; unsigned index; unsigned char swizzles[4]; LLVMValueRef unswizzled[4] = {NULL, NULL, NULL, NULL}; @@ -843,7 +872,7 @@ emit_fetch_predicate( * in the swizzles */ if (!unswizzled[swizzle]) { - value = LLVMBuildLoad(bld->base.builder, + value = LLVMBuildLoad(builder, bld->preds[index][swizzle], ""); /* @@ -853,13 +882,13 @@ emit_fetch_predicate( * is needlessly causing two comparisons due to storing the intermediate * result as float vector instead of an integer mask vector. */ - value = lp_build_compare(bld->base.builder, + value = lp_build_compare(bld->base.gallivm, bld->base.type, PIPE_FUNC_NOTEQUAL, value, bld->base.zero); if (inst->Predicate.Negate) { - value = LLVMBuildNot(bld->base.builder, value, ""); + value = LLVMBuildNot(builder, value, ""); } unswizzled[swizzle] = value; @@ -884,6 +913,8 @@ emit_store( LLVMValueRef pred, LLVMValueRef value) { + struct gallivm_state *gallivm = bld->base.gallivm; + LLVMBuilderRef builder = gallivm->builder; const struct tgsi_full_dst_register *reg = &inst->Dst[index]; struct lp_build_context *uint_bld = &bld->uint_bld; LLVMValueRef indirect_index = NULL; @@ -898,7 +929,7 @@ emit_store( break; case TGSI_SAT_MINUS_PLUS_ONE: - value = lp_build_max(&bld->base, value, lp_build_const_vec(bld->base.type, -1.0)); + value = lp_build_max(&bld->base, value, lp_build_const_vec(bld->base.gallivm, bld->base.type, -1.0)); value = lp_build_min(&bld->base, value, bld->base.one); break; @@ -918,11 +949,10 @@ emit_store( switch( reg->Register.File ) { case TGSI_FILE_OUTPUT: if (reg->Register.Indirect) { - LLVMBuilderRef builder = bld->base.builder; LLVMValueRef chan_vec = - lp_build_const_int_vec(uint_bld->type, chan_index); + lp_build_const_int_vec(gallivm, uint_bld->type, chan_index); LLVMValueRef length_vec = - lp_build_const_int_vec(uint_bld->type, bld->base.type.length); + lp_build_const_int_vec(gallivm, uint_bld->type, bld->base.type.length); LLVMValueRef index_vec; /* indexes into the temp registers */ LLVMValueRef outputs_array; LLVMValueRef pixel_offsets; @@ -932,7 +962,7 @@ emit_store( /* build pixel offset vector: {0, 1, 2, 3, ...} */ pixel_offsets = uint_bld->undef; for (i = 0; i < bld->base.type.length; i++) { - LLVMValueRef ii = lp_build_const_int32(i); + LLVMValueRef ii = lp_build_const_int32(gallivm, i); pixel_offsets = LLVMBuildInsertElement(builder, pixel_offsets, ii, ii, ""); } @@ -943,7 +973,8 @@ emit_store( index_vec = lp_build_mul(uint_bld, index_vec, length_vec); index_vec = lp_build_add(uint_bld, index_vec, pixel_offsets); - float_ptr_type = LLVMPointerType(LLVMFloatType(), 0); + float_ptr_type = + LLVMPointerType(LLVMFloatTypeInContext(gallivm->context), 0); outputs_array = LLVMBuildBitCast(builder, bld->outputs_array, float_ptr_type, ""); @@ -960,11 +991,11 @@ emit_store( case TGSI_FILE_TEMPORARY: if (reg->Register.Indirect) { - LLVMBuilderRef builder = bld->base.builder; LLVMValueRef chan_vec = - lp_build_const_int_vec(uint_bld->type, chan_index); + lp_build_const_int_vec(gallivm, uint_bld->type, chan_index); LLVMValueRef length_vec = - lp_build_const_int_vec(uint_bld->type, bld->base.type.length); + lp_build_const_int_vec(gallivm, uint_bld->type, + bld->base.type.length); LLVMValueRef index_vec; /* indexes into the temp registers */ LLVMValueRef temps_array; LLVMValueRef pixel_offsets; @@ -974,7 +1005,7 @@ emit_store( /* build pixel offset vector: {0, 1, 2, 3, ...} */ pixel_offsets = uint_bld->undef; for (i = 0; i < bld->base.type.length; i++) { - LLVMValueRef ii = lp_build_const_int32(i); + LLVMValueRef ii = lp_build_const_int32(gallivm, i); pixel_offsets = LLVMBuildInsertElement(builder, pixel_offsets, ii, ii, ""); } @@ -985,7 +1016,8 @@ emit_store( index_vec = lp_build_mul(uint_bld, index_vec, length_vec); index_vec = lp_build_add(uint_bld, index_vec, pixel_offsets); - float_ptr_type = LLVMPointerType(LLVMFloatType(), 0); + float_ptr_type = + LLVMPointerType(LLVMFloatTypeInContext(gallivm->context), 0); temps_array = LLVMBuildBitCast(builder, bld->temps_array, float_ptr_type, ""); @@ -1002,7 +1034,7 @@ emit_store( case TGSI_FILE_ADDRESS: lp_exec_mask_store(&bld->exec_mask, pred, value, - bld->addr[reg->Indirect.Index][chan_index]); + bld->addr[reg->Register.Index][chan_index]); break; case TGSI_FILE_PREDICATE: @@ -1026,6 +1058,7 @@ emit_tex( struct lp_build_tgsi_soa_context *bld, enum lp_build_tex_modifier modifier, LLVMValueRef *texel) { + LLVMBuilderRef builder = bld->base.gallivm->builder; unsigned unit; LLVMValueRef lod_bias, explicit_lod; LLVMValueRef oow = NULL; @@ -1091,13 +1124,12 @@ emit_tex( struct lp_build_tgsi_soa_context *bld, } if (modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV) { - LLVMTypeRef i32t = LLVMInt32Type(); - LLVMValueRef index0 = LLVMConstInt(i32t, 0, 0); + LLVMValueRef index0 = lp_build_const_int32(bld->base.gallivm, 0); for (i = 0; i < num_coords; i++) { LLVMValueRef src1 = emit_fetch( bld, inst, 1, i ); LLVMValueRef src2 = emit_fetch( bld, inst, 2, i ); - ddx[i] = LLVMBuildExtractElement(bld->base.builder, src1, index0, ""); - ddy[i] = LLVMBuildExtractElement(bld->base.builder, src2, index0, ""); + ddx[i] = LLVMBuildExtractElement(builder, src1, index0, ""); + ddy[i] = LLVMBuildExtractElement(builder, src2, index0, ""); } unit = inst->Src[3].Register.Index; } else { @@ -1113,7 +1145,7 @@ emit_tex( struct lp_build_tgsi_soa_context *bld, } bld->sampler->emit_fetch_texel(bld->sampler, - bld->base.builder, + bld->base.gallivm, bld->base.type, unit, num_coords, coords, ddx, ddy, @@ -1168,6 +1200,7 @@ emit_kil( const struct tgsi_full_instruction *inst, int pc) { + LLVMBuilderRef builder = bld->base.gallivm->builder; const struct tgsi_full_src_register *reg = &inst->Src[0]; LLVMValueRef terms[NUM_CHANNELS]; LLVMValueRef mask; @@ -1199,7 +1232,7 @@ emit_kil( chan_mask = lp_build_cmp(&bld->base, PIPE_FUNC_GEQUAL, terms[chan_index], bld->base.zero); if(mask) - mask = LLVMBuildAnd(bld->base.builder, mask, chan_mask, ""); + mask = LLVMBuildAnd(builder, mask, chan_mask, ""); else mask = chan_mask; } @@ -1225,13 +1258,14 @@ emit_kilp(struct lp_build_tgsi_soa_context *bld, const struct tgsi_full_instruction *inst, int pc) { + LLVMBuilderRef builder = bld->base.gallivm->builder; LLVMValueRef mask; /* For those channels which are "alive", disable fragment shader * execution. */ if (bld->exec_mask.has_mask) { - mask = LLVMBuildNot(bld->base.builder, bld->exec_mask.exec_mask, "kilp"); + mask = LLVMBuildNot(builder, bld->exec_mask.exec_mask, "kilp"); } else { LLVMValueRef zero = LLVMConstNull(bld->base.int_vec_type); @@ -1252,38 +1286,39 @@ emit_kilp(struct lp_build_tgsi_soa_context *bld, static void emit_dump_temps(struct lp_build_tgsi_soa_context *bld) { - LLVMBuilderRef builder = bld->base.builder; + struct gallivm_state *gallivm = bld->base.gallivm; + LLVMBuilderRef builder = gallivm->builder; LLVMValueRef temp_ptr; - LLVMValueRef i0 = lp_build_const_int32(0); - LLVMValueRef i1 = lp_build_const_int32(1); - LLVMValueRef i2 = lp_build_const_int32(2); - LLVMValueRef i3 = lp_build_const_int32(3); + LLVMValueRef i0 = lp_build_const_int32(gallivm, 0); + LLVMValueRef i1 = lp_build_const_int32(gallivm, 1); + LLVMValueRef i2 = lp_build_const_int32(gallivm, 2); + LLVMValueRef i3 = lp_build_const_int32(gallivm, 3); int index; int n = bld->info->file_max[TGSI_FILE_TEMPORARY]; for (index = 0; index < n; index++) { - LLVMValueRef idx = lp_build_const_int32(index); + LLVMValueRef idx = lp_build_const_int32(gallivm, index); LLVMValueRef v[4][4], res; int chan; - lp_build_printf(builder, "TEMP[%d]:\n", idx); + lp_build_printf(gallivm, "TEMP[%d]:\n", idx); for (chan = 0; chan < 4; chan++) { temp_ptr = get_temp_ptr(bld, index, chan); - res = LLVMBuildLoad(bld->base.builder, temp_ptr, ""); + res = LLVMBuildLoad(builder, temp_ptr, ""); v[chan][0] = LLVMBuildExtractElement(builder, res, i0, ""); v[chan][1] = LLVMBuildExtractElement(builder, res, i1, ""); v[chan][2] = LLVMBuildExtractElement(builder, res, i2, ""); v[chan][3] = LLVMBuildExtractElement(builder, res, i3, ""); } - lp_build_printf(builder, " X: %f %f %f %f\n", + lp_build_printf(gallivm, " X: %f %f %f %f\n", v[0][0], v[0][1], v[0][2], v[0][3]); - lp_build_printf(builder, " Y: %f %f %f %f\n", + lp_build_printf(gallivm, " Y: %f %f %f %f\n", v[1][0], v[1][1], v[1][2], v[1][3]); - lp_build_printf(builder, " Z: %f %f %f %f\n", + lp_build_printf(gallivm, " Z: %f %f %f %f\n", v[2][0], v[2][1], v[2][2], v[2][3]); - lp_build_printf(builder, " W: %f %f %f %f\n", + lp_build_printf(gallivm, " W: %f %f %f %f\n", v[3][0], v[3][1], v[3][2], v[3][3]); } } @@ -1295,6 +1330,7 @@ emit_declaration( struct lp_build_tgsi_soa_context *bld, const struct tgsi_full_declaration *decl) { + struct gallivm_state *gallivm = bld->base.gallivm; LLVMTypeRef vec_type = bld->base.vec_type; const unsigned first = decl->Range.First; const unsigned last = decl->Range.Last; @@ -1307,15 +1343,14 @@ emit_declaration( assert(idx < LP_MAX_TGSI_TEMPS); if (!(bld->indirect_files & (1 << TGSI_FILE_TEMPORARY))) { for (i = 0; i < NUM_CHANNELS; i++) - bld->temps[idx][i] = lp_build_alloca(bld->base.builder, - vec_type, "temp"); + bld->temps[idx][i] = lp_build_alloca(gallivm, vec_type, "temp"); } break; case TGSI_FILE_OUTPUT: if (!(bld->indirect_files & (1 << TGSI_FILE_OUTPUT))) { for (i = 0; i < NUM_CHANNELS; i++) - bld->outputs[idx][i] = lp_build_alloca(bld->base.builder, + bld->outputs[idx][i] = lp_build_alloca(gallivm, vec_type, "output"); } break; @@ -1323,15 +1358,14 @@ emit_declaration( case TGSI_FILE_ADDRESS: assert(idx < LP_MAX_TGSI_ADDRS); for (i = 0; i < NUM_CHANNELS; i++) - bld->addr[idx][i] = lp_build_alloca(bld->base.builder, - vec_type, "addr"); + bld->addr[idx][i] = lp_build_alloca(gallivm, vec_type, "addr"); break; case TGSI_FILE_PREDICATE: assert(idx < LP_MAX_TGSI_PREDS); for (i = 0; i < NUM_CHANNELS; i++) - bld->preds[idx][i] = lp_build_alloca(bld->base.builder, - vec_type, "predicate"); + bld->preds[idx][i] = lp_build_alloca(gallivm, vec_type, + "predicate"); break; default: @@ -1657,7 +1691,7 @@ emit_instruction( src0 = emit_fetch( bld, inst, 0, chan_index ); src1 = emit_fetch( bld, inst, 1, chan_index ); src2 = emit_fetch( bld, inst, 2, chan_index ); - tmp1 = lp_build_const_vec(bld->base.type, 0.5); + tmp1 = lp_build_const_vec(bld->base.gallivm, bld->base.type, 0.5); tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_GREATER, src2, tmp1); dst0[chan_index] = lp_build_select( &bld->base, tmp0, src0, src1 ); } @@ -2169,7 +2203,7 @@ emit_instruction( break; case TGSI_OPCODE_ENDLOOP: - lp_exec_endloop(&bld->exec_mask); + lp_exec_endloop(bld->base.gallivm, &bld->exec_mask); break; case TGSI_OPCODE_ENDSUB: @@ -2302,7 +2336,7 @@ emit_instruction( void -lp_build_tgsi_soa(LLVMBuilderRef builder, +lp_build_tgsi_soa(struct gallivm_state *gallivm, const struct tgsi_token *tokens, struct lp_type type, struct lp_build_mask_context *mask, @@ -2331,9 +2365,9 @@ lp_build_tgsi_soa(LLVMBuilderRef builder, /* Setup build context */ memset(&bld, 0, sizeof bld); - lp_build_context_init(&bld.base, builder, type); - lp_build_context_init(&bld.uint_bld, builder, lp_uint_type(type)); - lp_build_context_init(&bld.elem_bld, builder, lp_elem_type(type)); + lp_build_context_init(&bld.base, gallivm, type); + lp_build_context_init(&bld.uint_bld, gallivm, lp_uint_type(type)); + lp_build_context_init(&bld.elem_bld, gallivm, lp_elem_type(type)); bld.mask = mask; bld.pos = pos; bld.inputs = inputs; @@ -2353,17 +2387,19 @@ lp_build_tgsi_soa(LLVMBuilderRef builder, lp_exec_mask_init(&bld.exec_mask, &bld.base); if (bld.indirect_files & (1 << TGSI_FILE_TEMPORARY)) { - LLVMValueRef array_size = LLVMConstInt(LLVMInt32Type(), - info->file_max[TGSI_FILE_TEMPORARY]*4 + 4, 0); - bld.temps_array = lp_build_array_alloca(bld.base.builder, + LLVMValueRef array_size = + lp_build_const_int32(gallivm, + info->file_max[TGSI_FILE_TEMPORARY] * 4 + 4); + bld.temps_array = lp_build_array_alloca(gallivm, bld.base.vec_type, array_size, "temp_array"); } if (bld.indirect_files & (1 << TGSI_FILE_OUTPUT)) { - LLVMValueRef array_size = LLVMConstInt(LLVMInt32Type(), - info->file_max[TGSI_FILE_OUTPUT]*4 + 4, 0); - bld.outputs_array = lp_build_array_alloca(bld.base.builder, + LLVMValueRef array_size = + lp_build_const_int32(gallivm, + info->file_max[TGSI_FILE_OUTPUT] * 4 + 4); + bld.outputs_array = lp_build_array_alloca(gallivm, bld.base.vec_type, array_size, "output_array"); } @@ -2373,9 +2409,9 @@ lp_build_tgsi_soa(LLVMBuilderRef builder, if (bld.indirect_files & (1 << TGSI_FILE_INPUT)) { unsigned index, chan; LLVMTypeRef vec_type = bld.base.vec_type; - LLVMValueRef array_size = LLVMConstInt(LLVMInt32Type(), - info->file_max[TGSI_FILE_INPUT]*4 + 4, 0); - bld.inputs_array = lp_build_array_alloca(bld.base.builder, + LLVMValueRef array_size = + lp_build_const_int32(gallivm, info->file_max[TGSI_FILE_INPUT]*4 + 4); + bld.inputs_array = lp_build_array_alloca(gallivm, vec_type, array_size, "input_array"); @@ -2383,13 +2419,14 @@ lp_build_tgsi_soa(LLVMBuilderRef builder, for (index = 0; index < info->num_inputs; ++index) { for (chan = 0; chan < NUM_CHANNELS; ++chan) { - LLVMValueRef lindex = lp_build_const_int32(index * 4 + chan); + LLVMValueRef lindex = + lp_build_const_int32(gallivm, index * 4 + chan); LLVMValueRef input_ptr = - LLVMBuildGEP(bld.base.builder, bld.inputs_array, + LLVMBuildGEP(gallivm->builder, bld.inputs_array, &lindex, 1, ""); LLVMValueRef value = bld.inputs[index][chan]; if (value) - LLVMBuildStore(bld.base.builder, value, input_ptr); + LLVMBuildStore(gallivm->builder, value, input_ptr); } } } @@ -2441,7 +2478,7 @@ lp_build_tgsi_soa(LLVMBuilderRef builder, assert(num_immediates < LP_MAX_TGSI_IMMEDIATES); for( i = 0; i < size; ++i ) bld.immediates[num_immediates][i] = - lp_build_const_vec(type, parse.FullToken.FullImmediate.u[i].Float); + lp_build_const_vec(gallivm, type, parse.FullToken.FullImmediate.u[i].Float); for( i = size; i < 4; ++i ) bld.immediates[num_immediates][i] = bld.base.undef; num_immediates++; @@ -2478,7 +2515,7 @@ lp_build_tgsi_soa(LLVMBuilderRef builder, } if (0) { - LLVMBasicBlockRef block = LLVMGetInsertBlock(builder); + LLVMBasicBlockRef block = LLVMGetInsertBlock(gallivm->builder); LLVMValueRef function = LLVMGetBasicBlockParent(block); debug_printf("11111111111111111111111111111 \n"); tgsi_dump(tokens, 0); @@ -2489,7 +2526,7 @@ lp_build_tgsi_soa(LLVMBuilderRef builder, if (0) { LLVMModuleRef module = LLVMGetGlobalParent( - LLVMGetBasicBlockParent(LLVMGetInsertBlock(bld.base.builder))); + LLVMGetBasicBlockParent(LLVMGetInsertBlock(gallivm->builder))); LLVMDumpModule(module); } @@ -2515,24 +2552,25 @@ lp_build_tgsi_soa(LLVMBuilderRef builder, * \return LLVM float array (interpreted as float [][4]) */ LLVMValueRef -lp_build_system_values_array(LLVMBuilderRef builder, +lp_build_system_values_array(struct gallivm_state *gallivm, const struct tgsi_shader_info *info, LLVMValueRef instance_id, LLVMValueRef facing) { - LLVMValueRef size = lp_build_const_int32(4 * info->num_system_values); - LLVMValueRef array = lp_build_array_alloca(builder, LLVMFloatType(), + LLVMValueRef size = lp_build_const_int32(gallivm, 4 * info->num_system_values); + LLVMTypeRef float_t = LLVMFloatTypeInContext(gallivm->context); + LLVMValueRef array = lp_build_array_alloca(gallivm, float_t, size, "sysvals_array"); unsigned i; for (i = 0; i < info->num_system_values; i++) { - LLVMValueRef index = lp_build_const_int32(i * 4); + LLVMValueRef index = lp_build_const_int32(gallivm, i * 4); LLVMValueRef ptr, value; switch (info->system_value_semantic_name[i]) { case TGSI_SEMANTIC_INSTANCEID: /* convert instance ID from int to float */ - value = LLVMBuildSIToFP(builder, instance_id, LLVMFloatType(), + value = LLVMBuildSIToFP(gallivm->builder, instance_id, float_t, "sysval_instanceid"); break; case TGSI_SEMANTIC_FACE: @@ -2541,8 +2579,8 @@ lp_build_system_values_array(LLVMBuilderRef builder, assert(0 && "unexpected semantic in build_system_values_array()"); } - ptr = LLVMBuildGEP(builder, array, &index, 1, ""); - LLVMBuildStore(builder, value, ptr); + ptr = LLVMBuildGEP(gallivm->builder, array, &index, 1, ""); + LLVMBuildStore(gallivm->builder, value, ptr); } return array; diff --git a/src/gallium/auxiliary/gallivm/lp_bld_type.c b/src/gallium/auxiliary/gallivm/lp_bld_type.c index 5205c7ada91..c5cf6d4a6c4 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_type.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_type.c @@ -30,34 +30,35 @@ #include "lp_bld_type.h" #include "lp_bld_const.h" +#include "lp_bld_init.h" LLVMTypeRef -lp_build_elem_type(struct lp_type type) +lp_build_elem_type(struct gallivm_state *gallivm, struct lp_type type) { if (type.floating) { switch(type.width) { case 32: - return LLVMFloatType(); + return LLVMFloatTypeInContext(gallivm->context); break; case 64: - return LLVMDoubleType(); + return LLVMDoubleTypeInContext(gallivm->context); break; default: assert(0); - return LLVMFloatType(); + return LLVMFloatTypeInContext(gallivm->context); } } else { - return LLVMIntType(type.width); + return LLVMIntTypeInContext(gallivm->context, type.width); } } LLVMTypeRef -lp_build_vec_type(struct lp_type type) +lp_build_vec_type(struct gallivm_state *gallivm,struct lp_type type) { - LLVMTypeRef elem_type = lp_build_elem_type(type); + LLVMTypeRef elem_type = lp_build_elem_type(gallivm, type); if (type.length == 1) return elem_type; else @@ -149,16 +150,16 @@ lp_check_value(struct lp_type type, LLVMValueRef val) LLVMTypeRef -lp_build_int_elem_type(struct lp_type type) +lp_build_int_elem_type(struct gallivm_state *gallivm, struct lp_type type) { - return LLVMIntType(type.width); + return LLVMIntTypeInContext(gallivm->context, type.width); } LLVMTypeRef -lp_build_int_vec_type(struct lp_type type) +lp_build_int_vec_type(struct gallivm_state *gallivm, struct lp_type type) { - LLVMTypeRef elem_type = lp_build_int_elem_type(type); + LLVMTypeRef elem_type = lp_build_int_elem_type(gallivm, type); if (type.length == 1) return elem_type; else @@ -170,7 +171,7 @@ lp_build_int_vec_type(struct lp_type type) * Build int32[4] vector type */ LLVMTypeRef -lp_build_int32_vec4_type(void) +lp_build_int32_vec4_type(struct gallivm_state *gallivm) { struct lp_type t; LLVMTypeRef type; @@ -182,7 +183,7 @@ lp_build_int32_vec4_type(void) t.width = 32; /* 32-bit int */ t.length = 4; /* 4 elements per vector */ - type = lp_build_int_elem_type(t); + type = lp_build_int_elem_type(gallivm, t); return LLVMVectorType(type, t.length); } @@ -383,15 +384,15 @@ lp_dump_llvmtype(LLVMTypeRef t) void lp_build_context_init(struct lp_build_context *bld, - LLVMBuilderRef builder, + struct gallivm_state *gallivm, struct lp_type type) { - bld->builder = builder; + bld->gallivm = gallivm; bld->type = type; - bld->int_elem_type = lp_build_int_elem_type(type); + bld->int_elem_type = lp_build_int_elem_type(gallivm, type); if (type.floating) - bld->elem_type = lp_build_elem_type(type); + bld->elem_type = lp_build_elem_type(gallivm, type); else bld->elem_type = bld->int_elem_type; @@ -406,5 +407,5 @@ lp_build_context_init(struct lp_build_context *bld, bld->undef = LLVMGetUndef(bld->vec_type); bld->zero = LLVMConstNull(bld->vec_type); - bld->one = lp_build_one(type); + bld->one = lp_build_one(gallivm, type); } diff --git a/src/gallium/auxiliary/gallivm/lp_bld_type.h b/src/gallium/auxiliary/gallivm/lp_bld_type.h index a135d0df847..5007e83ac5f 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_type.h +++ b/src/gallium/auxiliary/gallivm/lp_bld_type.h @@ -120,7 +120,7 @@ struct lp_type { */ struct lp_build_context { - LLVMBuilderRef builder; + struct gallivm_state *gallivm; /** * This not only describes the input/output LLVM types, but also whether @@ -285,11 +285,11 @@ lp_type_ufixed(unsigned width) LLVMTypeRef -lp_build_elem_type(struct lp_type type); +lp_build_elem_type(struct gallivm_state *gallivm, struct lp_type type); LLVMTypeRef -lp_build_vec_type(struct lp_type type); +lp_build_vec_type(struct gallivm_state *gallivm, struct lp_type type); boolean @@ -305,15 +305,15 @@ lp_check_value(struct lp_type type, LLVMValueRef val); LLVMTypeRef -lp_build_int_elem_type(struct lp_type type); +lp_build_int_elem_type(struct gallivm_state *gallivm, struct lp_type type); LLVMTypeRef -lp_build_int_vec_type(struct lp_type type); +lp_build_int_vec_type(struct gallivm_state *gallivm, struct lp_type type); LLVMTypeRef -lp_build_int32_vec4_type(void); +lp_build_int32_vec4_type(struct gallivm_state *gallivm); static INLINE struct lp_type @@ -394,7 +394,7 @@ lp_dump_llvmtype(LLVMTypeRef t); void lp_build_context_init(struct lp_build_context *bld, - LLVMBuilderRef builder, + struct gallivm_state *gallivm, struct lp_type type); diff --git a/src/gallium/auxiliary/target-helpers/inline_noop_helper.h b/src/gallium/auxiliary/target-helpers/inline_noop_helper.h new file mode 100644 index 00000000000..77c7cfd0c20 --- /dev/null +++ b/src/gallium/auxiliary/target-helpers/inline_noop_helper.h @@ -0,0 +1,51 @@ + +#ifndef INLINE_DEBUG_HELPER_H +#define INLINE_DEBUG_HELPER_H + +#include "pipe/p_compiler.h" +#include "util/u_debug.h" + + +/* Helper function to wrap a screen with + * one or more debug driver: rbug, trace. + */ + +#ifdef GALLIUM_TRACE +#include "trace/tr_public.h" +#endif + +#ifdef GALLIUM_RBUG +#include "rbug/rbug_public.h" +#endif + +#ifdef GALLIUM_GALAHAD +#include "galahad/glhd_public.h" +#endif + +#ifdef GALLIUM_NOOP +#include "noop/noop_public.h" +#endif + +static INLINE struct pipe_screen * +debug_screen_wrap(struct pipe_screen *screen) +{ +#if defined(GALLIUM_RBUG) + screen = rbug_screen_create(screen); +#endif + +#if defined(GALLIUM_TRACE) + screen = trace_screen_create(screen); +#endif + +#if defined(GALLIUM_GALAHAD) + screen = galahad_screen_create(screen); +#endif + +#if defined(GALLIUM_NOOP) + screen = noop_screen_create(screen); +#endif + + return screen; +} + +#endif diff --git a/src/gallium/auxiliary/tgsi/tgsi_dump.c b/src/gallium/auxiliary/tgsi/tgsi_dump.c index 77bde86684e..82cd8eaa969 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_dump.c +++ b/src/gallium/auxiliary/tgsi/tgsi_dump.c @@ -138,7 +138,7 @@ static const char *immediate_type_names[] = }; const char * -tgsi_swizzle_names[] = +tgsi_swizzle_names[4] = { "x", "y", @@ -147,7 +147,7 @@ tgsi_swizzle_names[] = }; const char * -tgsi_texture_names[] = +tgsi_texture_names[TGSI_TEXTURE_COUNT] = { "UNKNOWN", "1D", @@ -160,16 +160,17 @@ tgsi_texture_names[] = "SHADOWRECT" }; -static const char *property_names[] = +const char *tgsi_property_names[TGSI_PROPERTY_COUNT] = { "GS_INPUT_PRIMITIVE", "GS_OUTPUT_PRIMITIVE", "GS_MAX_OUTPUT_VERTICES", "FS_COORD_ORIGIN", - "FS_COORD_PIXEL_CENTER" + "FS_COORD_PIXEL_CENTER", + "FS_COLOR0_WRITES_ALL_CBUFS", }; -static const char *primitive_names[] = +const char *tgsi_primitive_names[PIPE_PRIM_MAX] = { "POINTS", "LINES", @@ -187,13 +188,13 @@ static const char *primitive_names[] = "TRIANGLE_STRIP_ADJACENCY" }; -static const char *fs_coord_origin_names[] = +const char *tgsi_fs_coord_origin_names[2] = { "UPPER_LEFT", "LOWER_LEFT" }; -static const char *fs_coord_pixel_center_names[] = +const char *tgsi_fs_coord_pixel_center_names[2] = { "HALF_INTEGER", "INTEGER" @@ -484,10 +485,10 @@ iter_property( int i; struct dump_ctx *ctx = (struct dump_ctx *)iter; - assert(Elements(property_names) == TGSI_PROPERTY_COUNT); + assert(Elements(tgsi_property_names) == TGSI_PROPERTY_COUNT); TXT( "PROPERTY " ); - ENM(prop->Property.PropertyName, property_names); + ENM(prop->Property.PropertyName, tgsi_property_names); if (prop->Property.NrTokens > 1) TXT(" "); @@ -496,13 +497,13 @@ iter_property( switch (prop->Property.PropertyName) { case TGSI_PROPERTY_GS_INPUT_PRIM: case TGSI_PROPERTY_GS_OUTPUT_PRIM: - ENM(prop->u[i].Data, primitive_names); + ENM(prop->u[i].Data, tgsi_primitive_names); break; case TGSI_PROPERTY_FS_COORD_ORIGIN: - ENM(prop->u[i].Data, fs_coord_origin_names); + ENM(prop->u[i].Data, tgsi_fs_coord_origin_names); break; case TGSI_PROPERTY_FS_COORD_PIXEL_CENTER: - ENM(prop->u[i].Data, fs_coord_pixel_center_names); + ENM(prop->u[i].Data, tgsi_fs_coord_pixel_center_names); break; default: SID( prop->u[i].Data ); diff --git a/src/gallium/auxiliary/tgsi/tgsi_dump.h b/src/gallium/auxiliary/tgsi/tgsi_dump.h index fc0429ad8d9..2491e9198a2 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_dump.h +++ b/src/gallium/auxiliary/tgsi/tgsi_dump.h @@ -29,6 +29,7 @@ #define TGSI_DUMP_H #include "pipe/p_compiler.h" +#include "pipe/p_defines.h" #include "pipe/p_shader_tokens.h" #if defined __cplusplus @@ -39,10 +40,22 @@ extern const char * tgsi_file_names[TGSI_FILE_COUNT]; extern const char * -tgsi_swizzle_names[]; +tgsi_swizzle_names[4]; extern const char * -tgsi_texture_names[]; +tgsi_texture_names[TGSI_TEXTURE_COUNT]; + +extern const char * +tgsi_property_names[TGSI_PROPERTY_COUNT]; + +extern const char * +tgsi_primitive_names[PIPE_PRIM_MAX]; + +extern const char * +tgsi_fs_coord_origin_names[2]; + +extern const char * +tgsi_fs_coord_pixel_center_names[2]; void tgsi_dump_str( diff --git a/src/gallium/auxiliary/tgsi/tgsi_parse.h b/src/gallium/auxiliary/tgsi/tgsi_parse.h index d4df5851764..2aafa2a6e83 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_parse.h +++ b/src/gallium/auxiliary/tgsi/tgsi_parse.h @@ -136,7 +136,8 @@ tgsi_parse_token( static INLINE unsigned tgsi_num_tokens(const struct tgsi_token *tokens) { - struct tgsi_header header = *(const struct tgsi_header *) tokens; + struct tgsi_header header; + memcpy(&header, tokens, sizeof(header)); return header.HeaderSize + header.BodySize; } diff --git a/src/gallium/auxiliary/tgsi/tgsi_sanity.h b/src/gallium/auxiliary/tgsi/tgsi_sanity.h index 73f0f414e3f..707d11ec6dd 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_sanity.h +++ b/src/gallium/auxiliary/tgsi/tgsi_sanity.h @@ -28,12 +28,14 @@ #ifndef TGSI_SANITY_H #define TGSI_SANITY_H -#include "pipe/p_shader_tokens.h" - #if defined __cplusplus extern "C" { #endif +#include "pipe/p_compiler.h" + +struct tgsi_token; + /* Check the given token stream for errors and common mistakes. * Diagnostic messages are printed out to the debug output, and is * controlled by the debug option TGSI_PRINT_SANITY (default false). diff --git a/src/gallium/auxiliary/tgsi/tgsi_text.c b/src/gallium/auxiliary/tgsi/tgsi_text.c index 9a38c37979c..819b0725a1f 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_text.c +++ b/src/gallium/auxiliary/tgsi/tgsi_text.c @@ -36,6 +36,7 @@ #include "tgsi_parse.h" #include "tgsi_sanity.h" #include "tgsi_util.h" +#include "tgsi_dump.h" static boolean is_alpha_underscore( const char *cur ) { @@ -1258,42 +1259,6 @@ static boolean parse_immediate( struct translate_ctx *ctx ) return TRUE; } -static const char *property_names[] = -{ - "GS_INPUT_PRIMITIVE", - "GS_OUTPUT_PRIMITIVE", - "GS_MAX_OUTPUT_VERTICES", - "FS_COORD_ORIGIN", - "FS_COORD_PIXEL_CENTER" -}; - -static const char *primitive_names[] = -{ - "POINTS", - "LINES", - "LINE_LOOP", - "LINE_STRIP", - "TRIANGLES", - "TRIANGLE_STRIP", - "TRIANGLE_FAN", - "QUADS", - "QUAD_STRIP", - "POLYGON" -}; - -static const char *fs_coord_origin_names[] = -{ - "UPPER_LEFT", - "LOWER_LEFT" -}; - -static const char *fs_coord_pixel_center_names[] = -{ - "HALF_INTEGER", - "INTEGER" -}; - - static boolean parse_primitive( const char **pcur, uint *primitive ) { @@ -1302,7 +1267,7 @@ parse_primitive( const char **pcur, uint *primitive ) for (i = 0; i < PIPE_PRIM_MAX; i++) { const char *cur = *pcur; - if (str_match_no_case( &cur, primitive_names[i])) { + if (str_match_no_case( &cur, tgsi_primitive_names[i])) { *primitive = i; *pcur = cur; return TRUE; @@ -1316,10 +1281,10 @@ parse_fs_coord_origin( const char **pcur, uint *fs_coord_origin ) { uint i; - for (i = 0; i < sizeof(fs_coord_origin_names) / sizeof(fs_coord_origin_names[0]); i++) { + for (i = 0; i < Elements(tgsi_fs_coord_origin_names); i++) { const char *cur = *pcur; - if (str_match_no_case( &cur, fs_coord_origin_names[i])) { + if (str_match_no_case( &cur, tgsi_fs_coord_origin_names[i])) { *fs_coord_origin = i; *pcur = cur; return TRUE; @@ -1333,10 +1298,10 @@ parse_fs_coord_pixel_center( const char **pcur, uint *fs_coord_pixel_center ) { uint i; - for (i = 0; i < sizeof(fs_coord_pixel_center_names) / sizeof(fs_coord_pixel_center_names[0]); i++) { + for (i = 0; i < Elements(tgsi_fs_coord_pixel_center_names); i++) { const char *cur = *pcur; - if (str_match_no_case( &cur, fs_coord_pixel_center_names[i])) { + if (str_match_no_case( &cur, tgsi_fs_coord_pixel_center_names[i])) { *fs_coord_pixel_center = i; *pcur = cur; return TRUE; @@ -1364,7 +1329,7 @@ static boolean parse_property( struct translate_ctx *ctx ) } for (property_name = 0; property_name < TGSI_PROPERTY_COUNT; ++property_name) { - if (streq_nocase_uprcase(property_names[property_name], id)) { + if (streq_nocase_uprcase(tgsi_property_names[property_name], id)) { break; } } @@ -1398,6 +1363,7 @@ static boolean parse_property( struct translate_ctx *ctx ) return FALSE; } break; + case TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS: default: if (!parse_uint(&ctx->cur, &values[0] )) { report_error( ctx, "Expected unsigned integer as property!" ); diff --git a/src/gallium/auxiliary/tgsi/tgsi_text.h b/src/gallium/auxiliary/tgsi/tgsi_text.h index 8eeeeef1402..5fe53cf007d 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_text.h +++ b/src/gallium/auxiliary/tgsi/tgsi_text.h @@ -28,12 +28,14 @@ #ifndef TGSI_TEXT_H #define TGSI_TEXT_H -#include "pipe/p_shader_tokens.h" - #if defined __cplusplus extern "C" { #endif +#include "pipe/p_compiler.h" + +struct tgsi_token; + boolean tgsi_text_translate( const char *text, diff --git a/src/gallium/auxiliary/tgsi/tgsi_ureg.c b/src/gallium/auxiliary/tgsi/tgsi_ureg.c index 7d13a17bdbc..02de12d77d5 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_ureg.c +++ b/src/gallium/auxiliary/tgsi/tgsi_ureg.c @@ -148,6 +148,7 @@ struct ureg_program unsigned property_gs_max_vertices; unsigned char property_fs_coord_origin; /* = TGSI_FS_COORD_ORIGIN_* */ unsigned char property_fs_coord_pixel_center; /* = TGSI_FS_COORD_PIXEL_CENTER_* */ + unsigned char property_fs_color0_writes_all_cbufs; /* = TGSI_FS_COLOR0_WRITES_ALL_CBUFS * */ unsigned nr_addrs; unsigned nr_preds; @@ -284,7 +285,12 @@ ureg_property_fs_coord_pixel_center(struct ureg_program *ureg, ureg->property_fs_coord_pixel_center = fs_coord_pixel_center; } - +void +ureg_property_fs_color0_writes_all_cbufs(struct ureg_program *ureg, + unsigned fs_color0_writes_all_cbufs) +{ + ureg->property_fs_color0_writes_all_cbufs = fs_color0_writes_all_cbufs; +} struct ureg_src ureg_DECL_fs_input_cyl_centroid(struct ureg_program *ureg, @@ -1278,6 +1284,14 @@ static void emit_decls( struct ureg_program *ureg ) ureg->property_fs_coord_pixel_center); } + if (ureg->property_fs_color0_writes_all_cbufs) { + assert(ureg->processor == TGSI_PROCESSOR_FRAGMENT); + + emit_property(ureg, + TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS, + ureg->property_fs_color0_writes_all_cbufs); + } + if (ureg->processor == TGSI_PROCESSOR_VERTEX) { for (i = 0; i < UREG_MAX_INPUT; i++) { if (ureg->vs_inputs[i/32] & (1 << (i%32))) { diff --git a/src/gallium/auxiliary/tgsi/tgsi_ureg.h b/src/gallium/auxiliary/tgsi/tgsi_ureg.h index acc463200a6..807128a5e52 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_ureg.h +++ b/src/gallium/auxiliary/tgsi/tgsi_ureg.h @@ -153,6 +153,10 @@ void ureg_property_fs_coord_pixel_center(struct ureg_program *ureg, unsigned fs_coord_pixel_center); +void +ureg_property_fs_color0_writes_all_cbufs(struct ureg_program *ureg, + unsigned fs_color0_writes_all_cbufs); + /*********************************************************************** * Build shader declarations: */ diff --git a/src/gallium/auxiliary/util/u_blit.c b/src/gallium/auxiliary/util/u_blit.c index dfb142b9e1c..c11f7d383db 100644 --- a/src/gallium/auxiliary/util/u_blit.c +++ b/src/gallium/auxiliary/util/u_blit.c @@ -291,7 +291,7 @@ regions_overlap(int srcX0, int srcY0, void util_blit_pixels_writemask(struct blit_state *ctx, struct pipe_resource *src_tex, - struct pipe_subresource srcsub, + unsigned src_level, int srcX0, int srcY0, int srcX1, int srcY1, int srcZ0, @@ -316,13 +316,12 @@ util_blit_pixels_writemask(struct blit_state *ctx, assert(filter == PIPE_TEX_MIPFILTER_NEAREST || filter == PIPE_TEX_MIPFILTER_LINEAR); - assert(srcsub.level <= src_tex->last_level); + assert(src_level <= src_tex->last_level); /* do the regions overlap? */ overlap = src_tex == dst->texture && - dst->face == srcsub.face && - dst->level == srcsub.level && - dst->zslice == srcZ0 && + dst->u.tex.level == src_level && + dst->u.tex.first_layer == srcZ0 && regions_overlap(srcX0, srcY0, srcX1, srcY1, dstX0, dstY0, dstX1, dstY1); @@ -339,16 +338,19 @@ util_blit_pixels_writemask(struct blit_state *ctx, (dstX1 - dstX0) == (srcX1 - srcX0) && (dstY1 - dstY0) == (srcY1 - srcY0) && !overlap) { - struct pipe_subresource subdst; - subdst.face = dst->face; - subdst.level = dst->level; + struct pipe_box src_box; + src_box.x = srcX0; + src_box.y = srcY0; + src_box.z = srcZ0; + src_box.width = srcW; + src_box.height = srcH; + src_box.depth = 1; pipe->resource_copy_region(pipe, - dst->texture, subdst, - dstX0, dstY0, dst->zslice,/* dest */ - src_tex, srcsub, - srcX0, srcY0, srcZ0,/* src */ - srcW, srcH); /* size */ - return; + dst->texture, dst->u.tex.level, + dstX0, dstY0, dst->u.tex.first_layer,/* dest */ + src_tex, src_level, + &src_box); + return; } /* Create a temporary texture when src and dest alias or when src @@ -359,16 +361,16 @@ util_blit_pixels_writemask(struct blit_state *ctx, * This can still be improved upon. */ if ((src_tex == dst->texture && - dst->face == srcsub.face && - dst->level == srcsub.level && - dst->zslice == srcZ0) || + dst->u.tex.level == src_level && + dst->u.tex.first_layer == srcZ0) || (src_tex->target != PIPE_TEXTURE_2D && + src_tex->target != PIPE_TEXTURE_2D && src_tex->target != PIPE_TEXTURE_RECT)) { struct pipe_resource texTemp; struct pipe_resource *tex; struct pipe_sampler_view sv_templ; - struct pipe_subresource texsub; + struct pipe_box src_box; const int srcLeft = MIN2(srcX0, srcX1); const int srcTop = MIN2(srcY0, srcY1); @@ -394,19 +396,23 @@ util_blit_pixels_writemask(struct blit_state *ctx, texTemp.width0 = srcW; texTemp.height0 = srcH; texTemp.depth0 = 1; + texTemp.array_size = 1; texTemp.bind = PIPE_BIND_SAMPLER_VIEW; tex = screen->resource_create(screen, &texTemp); if (!tex) return; - texsub.face = 0; - texsub.level = 0; + src_box.x = srcLeft; + src_box.y = srcTop; + src_box.z = srcZ0; + src_box.width = srcW; + src_box.height = srcH; + src_box.depth = 1; /* load temp texture */ pipe->resource_copy_region(pipe, - tex, texsub, 0, 0, 0, /* dest */ - src_tex, srcsub, srcLeft, srcTop, srcZ0, /* src */ - srcW, srcH); /* size */ + tex, 0, 0, 0, 0, /* dest */ + src_tex, src_level, &src_box); normalized = tex->target != PIPE_TEXTURE_RECT; if(normalized) { @@ -433,7 +439,6 @@ util_blit_pixels_writemask(struct blit_state *ctx, } else { u_sampler_view_default_template(&sv_templ, src_tex, src_tex->format); - sv_templ.first_level = sv_templ.last_level = srcsub.level; sampler_view = pipe->create_sampler_view(pipe, src_tex, &sv_templ); if (!sampler_view) { @@ -447,10 +452,10 @@ util_blit_pixels_writemask(struct blit_state *ctx, normalized = sampler_view->texture->target != PIPE_TEXTURE_RECT; if(normalized) { - s0 /= (float)(u_minify(sampler_view->texture->width0, srcsub.level)); - s1 /= (float)(u_minify(sampler_view->texture->width0, srcsub.level)); - t0 /= (float)(u_minify(sampler_view->texture->height0, srcsub.level)); - t1 /= (float)(u_minify(sampler_view->texture->height0, srcsub.level)); + s0 /= (float)(u_minify(sampler_view->texture->width0, src_level)); + s1 /= (float)(u_minify(sampler_view->texture->width0, src_level)); + t0 /= (float)(u_minify(sampler_view->texture->height0, src_level)); + t1 /= (float)(u_minify(sampler_view->texture->height0, src_level)); } } @@ -489,9 +494,8 @@ util_blit_pixels_writemask(struct blit_state *ctx, ctx->sampler.normalized_coords = normalized; ctx->sampler.min_img_filter = filter; ctx->sampler.mag_img_filter = filter; - /* we've limited this already with the sampler view but you never know... */ - ctx->sampler.min_lod = srcsub.level; - ctx->sampler.max_lod = srcsub.level; + ctx->sampler.min_lod = src_level; + ctx->sampler.max_lod = src_level; cso_single_sampler(ctx->cso, 0, &ctx->sampler); cso_single_sampler_done(ctx->cso); @@ -575,7 +579,7 @@ util_blit_pixels_writemask(struct blit_state *ctx, void util_blit_pixels(struct blit_state *ctx, struct pipe_resource *src_tex, - struct pipe_subresource srcsub, + unsigned src_level, int srcX0, int srcY0, int srcX1, int srcY1, int srcZ, @@ -585,7 +589,7 @@ util_blit_pixels(struct blit_state *ctx, float z, uint filter ) { util_blit_pixels_writemask( ctx, src_tex, - srcsub, + src_level, srcX0, srcY0, srcX1, srcY1, srcZ, @@ -662,6 +666,7 @@ util_blit_pixels_tex(struct blit_state *ctx, cso_save_rasterizer(ctx->cso); cso_save_samplers(ctx->cso); cso_save_fragment_sampler_views(ctx->cso); + cso_save_viewport(ctx->cso); cso_save_framebuffer(ctx->cso); cso_save_fragment_shader(ctx->cso); cso_save_vertex_shader(ctx->cso); @@ -729,6 +734,7 @@ util_blit_pixels_tex(struct blit_state *ctx, cso_restore_rasterizer(ctx->cso); cso_restore_samplers(ctx->cso); cso_restore_fragment_sampler_views(ctx->cso); + cso_restore_viewport(ctx->cso); cso_restore_framebuffer(ctx->cso); cso_restore_fragment_shader(ctx->cso); cso_restore_vertex_shader(ctx->cso); diff --git a/src/gallium/auxiliary/util/u_blit.h b/src/gallium/auxiliary/util/u_blit.h index b8a0dfce13f..3009e25eca3 100644 --- a/src/gallium/auxiliary/util/u_blit.h +++ b/src/gallium/auxiliary/util/u_blit.h @@ -42,7 +42,6 @@ struct cso_context; struct pipe_context; struct pipe_resource; struct pipe_sampler_view; -struct pipe_subresource; struct pipe_surface; @@ -55,7 +54,7 @@ util_destroy_blit(struct blit_state *ctx); extern void util_blit_pixels(struct blit_state *ctx, struct pipe_resource *src_tex, - struct pipe_subresource srcsub, + unsigned src_level, int srcX0, int srcY0, int srcX1, int srcY1, int srcZ0, @@ -67,7 +66,7 @@ util_blit_pixels(struct blit_state *ctx, void util_blit_pixels_writemask(struct blit_state *ctx, struct pipe_resource *src_tex, - struct pipe_subresource srcsub, + unsigned src_level, int srcX0, int srcY0, int srcX1, int srcY1, int srcZ0, diff --git a/src/gallium/auxiliary/util/u_blitter.c b/src/gallium/auxiliary/util/u_blitter.c index a163f93cb82..545021d2642 100644 --- a/src/gallium/auxiliary/util/u_blitter.c +++ b/src/gallium/auxiliary/util/u_blitter.c @@ -63,8 +63,7 @@ struct blitter_context_priv /* Constant state objects. */ /* Vertex shaders. */ - void *vs_col; /**< Vertex shader which passes {pos, color} to the output */ - void *vs_tex; /**< Vertex shader which passes {pos, texcoord} to the output.*/ + void *vs; /**< Vertex shader which passes {pos, generic} to the output.*/ /* Fragment shaders. */ /* The shader at index i outputs color to color buffers 0,1,...,i-1. */ @@ -211,20 +210,12 @@ struct blitter_context *util_blitter_create(struct pipe_context *pipe) /* fragment shaders are created on-demand */ - /* vertex shaders */ - { - const uint semantic_names[] = { TGSI_SEMANTIC_POSITION, - TGSI_SEMANTIC_COLOR }; - const uint semantic_indices[] = { 0, 0 }; - ctx->vs_col = - util_make_vertex_passthrough_shader(pipe, 2, semantic_names, - semantic_indices); - } + /* vertex shader */ { const uint semantic_names[] = { TGSI_SEMANTIC_POSITION, TGSI_SEMANTIC_GENERIC }; const uint semantic_indices[] = { 0, 0 }; - ctx->vs_tex = + ctx->vs = util_make_vertex_passthrough_shader(pipe, 2, semantic_names, semantic_indices); } @@ -257,8 +248,7 @@ void util_blitter_destroy(struct blitter_context *blitter) pipe->delete_depth_stencil_alpha_state(pipe, ctx->dsa_flush_depth_stencil); pipe->delete_rasterizer_state(pipe, ctx->rs_state); - pipe->delete_vs_state(pipe, ctx->vs_col); - pipe->delete_vs_state(pipe, ctx->vs_tex); + pipe->delete_vs_state(pipe, ctx->vs); pipe->delete_vertex_elements_state(pipe, ctx->velem_state); for (i = 0; i < PIPE_MAX_TEXTURE_TYPES; i++) { @@ -419,17 +409,17 @@ static void blitter_set_clear_color(struct blitter_context_priv *ctx, } static void get_texcoords(struct pipe_resource *src, - struct pipe_subresource subsrc, - unsigned x1, unsigned y1, - unsigned x2, unsigned y2, - boolean normalized, float out[4]) + unsigned level, + unsigned x1, unsigned y1, + unsigned x2, unsigned y2, + boolean normalized, float out[4]) { if(normalized) { - out[0] = x1 / (float)u_minify(src->width0, subsrc.level); - out[1] = y1 / (float)u_minify(src->height0, subsrc.level); - out[2] = x2 / (float)u_minify(src->width0, subsrc.level); - out[3] = y2 / (float)u_minify(src->height0, subsrc.level); + out[0] = x1 / (float)u_minify(src->width0, level); + out[1] = y1 / (float)u_minify(src->height0, level); + out[2] = x2 / (float)u_minify(src->width0, level); + out[3] = y2 / (float)u_minify(src->height0, level); } else { @@ -458,14 +448,14 @@ static void set_texcoords_in_vertices(const float coord[4], static void blitter_set_texcoords_2d(struct blitter_context_priv *ctx, struct pipe_resource *src, - struct pipe_subresource subsrc, + unsigned level, unsigned x1, unsigned y1, unsigned x2, unsigned y2) { unsigned i; float coord[4]; - get_texcoords(src, subsrc, x1, y1, x2, y2, TRUE, coord); + get_texcoords(src, level, x1, y1, x2, y2, TRUE, coord); set_texcoords_in_vertices(coord, &ctx->vertices[0][1][0], 8); for (i = 0; i < 4; i++) { @@ -476,15 +466,15 @@ static void blitter_set_texcoords_2d(struct blitter_context_priv *ctx, static void blitter_set_texcoords_3d(struct blitter_context_priv *ctx, struct pipe_resource *src, - struct pipe_subresource subsrc, + unsigned level, unsigned zslice, unsigned x1, unsigned y1, unsigned x2, unsigned y2) { int i; - float r = zslice / (float)u_minify(src->depth0, subsrc.level); + float r = zslice / (float)u_minify(src->depth0, level); - blitter_set_texcoords_2d(ctx, src, subsrc, x1, y1, x2, y2); + blitter_set_texcoords_2d(ctx, src, level, x1, y1, x2, y2); for (i = 0; i < 4; i++) ctx->vertices[i][1][2] = r; /*r*/ @@ -492,7 +482,7 @@ static void blitter_set_texcoords_3d(struct blitter_context_priv *ctx, static void blitter_set_texcoords_cube(struct blitter_context_priv *ctx, struct pipe_resource *src, - struct pipe_subresource subsrc, + unsigned level, unsigned face, unsigned x1, unsigned y1, unsigned x2, unsigned y2) { @@ -500,10 +490,10 @@ static void blitter_set_texcoords_cube(struct blitter_context_priv *ctx, float coord[4]; float st[4][2]; - get_texcoords(src, subsrc, x1, y1, x2, y2, TRUE, coord); + get_texcoords(src, level, x1, y1, x2, y2, TRUE, coord); set_texcoords_in_vertices(coord, &st[0][0], 2); - util_map_texcoords2d_onto_cubemap(subsrc.face, + util_map_texcoords2d_onto_cubemap(face, /* pointer, stride in floats */ &st[0][0], 2, &ctx->vertices[0][1][0], 8); @@ -522,10 +512,13 @@ static void blitter_set_dst_dimensions(struct blitter_context_priv *ctx, static void blitter_draw_quad(struct blitter_context_priv *ctx) { struct pipe_context *pipe = ctx->base.pipe; + struct pipe_box box; /* write vertices and draw them */ - pipe_buffer_write(pipe, ctx->vbuf, - 0, sizeof(ctx->vertices), ctx->vertices); + u_box_1d(0, sizeof(ctx->vertices), &box); + pipe->transfer_inline_write(pipe, ctx->vbuf, 0, + PIPE_TRANSFER_WRITE | PIPE_TRANSFER_DISCARD, + &box, ctx->vertices, sizeof(ctx->vertices), 0); util_draw_vertex_buffer(pipe, ctx->vbuf, 0, PIPE_PRIM_TRIANGLE_FAN, 4, /* verts */ @@ -566,7 +559,9 @@ void *blitter_get_fs_col(struct blitter_context_priv *ctx, unsigned num_cbufs) if (!ctx->fs_col[num_cbufs]) ctx->fs_col[num_cbufs] = - util_make_fragment_clonecolor_shader(pipe, num_cbufs); + util_make_fragment_cloneinput_shader(pipe, num_cbufs, + TGSI_SEMANTIC_GENERIC, + TGSI_INTERPOLATE_LINEAR); return ctx->fs_col[num_cbufs]; } @@ -697,7 +692,7 @@ void util_blitter_clear(struct blitter_context *blitter, pipe->bind_rasterizer_state(pipe, ctx->rs_state); pipe->bind_vertex_elements_state(pipe, ctx->velem_state); pipe->bind_fs_state(pipe, blitter_get_fs_col(ctx, num_cbufs)); - pipe->bind_vs_state(pipe, ctx->vs_col); + pipe->bind_vs_state(pipe, ctx->vs); blitter_set_dst_dimensions(ctx, width, height); blitter->draw_rectangle(blitter, 0, 0, width, height, depth, @@ -714,21 +709,22 @@ boolean is_overlap(unsigned sx1, unsigned sx2, unsigned sy1, unsigned sy2, void util_blitter_copy_region(struct blitter_context *blitter, struct pipe_resource *dst, - struct pipe_subresource subdst, + unsigned dstlevel, unsigned dstx, unsigned dsty, unsigned dstz, struct pipe_resource *src, - struct pipe_subresource subsrc, - unsigned srcx, unsigned srcy, unsigned srcz, - unsigned width, unsigned height, + unsigned srclevel, + const struct pipe_box *srcbox, boolean ignore_stencil) { struct blitter_context_priv *ctx = (struct blitter_context_priv*)blitter; struct pipe_context *pipe = ctx->base.pipe; struct pipe_screen *screen = pipe->screen; - struct pipe_surface *dstsurf; + struct pipe_surface *dstsurf, surf_templ; struct pipe_framebuffer_state fb_state; struct pipe_sampler_view viewTempl, *view; unsigned bind; + unsigned width = srcbox->width; + unsigned height = srcbox->height; boolean is_stencil, is_depth; boolean normalized; @@ -739,12 +735,15 @@ void util_blitter_copy_region(struct blitter_context *blitter, /* Sanity checks. */ if (dst == src) { - assert(!is_overlap(srcx, srcx + width, srcy, srcy + height, + assert(!is_overlap(srcbox->x, srcbox->x + width, srcbox->y, srcbox->y + height, dstx, dstx + width, dsty, dsty + height)); } else { - assert(dst->format == src->format); + assert(util_is_format_compatible(util_format_description(src->format), + util_format_description(dst->format))); } assert(src->target < PIPE_MAX_TEXTURE_TYPES); + /* XXX should handle 3d regions */ + assert(srcbox->depth == 1); /* Is this a ZS format? */ is_depth = util_format_get_component_bits(src->format, UTIL_FORMAT_COLORSPACE_ZS, 0) != 0; @@ -762,15 +761,18 @@ void util_blitter_copy_region(struct blitter_context *blitter, dst->nr_samples, bind, 0) || !screen->is_format_supported(screen, src->format, src->target, src->nr_samples, PIPE_BIND_SAMPLER_VIEW, 0)) { - util_resource_copy_region(pipe, dst, subdst, dstx, dsty, dstz, - src, subsrc, srcx, srcy, srcz, width, height); + util_resource_copy_region(pipe, dst, dstlevel, dstx, dsty, dstz, + src, srclevel, srcbox); return; } - /* Get surfaces. */ - dstsurf = screen->get_tex_surface(screen, dst, - subdst.face, subdst.level, dstz, - bind); + /* Get surface. */ + memset(&surf_templ, 0, sizeof(surf_templ)); + u_surface_default_template(&surf_templ, dst, bind); + surf_templ.u.tex.level = dstlevel; + surf_templ.u.tex.first_layer = dstz; + surf_templ.u.tex.last_layer = dstz; + dstsurf = pipe->create_surface(pipe, dst, &surf_templ); /* Check whether the states are properly saved. */ blitter_check_saved_CSOs(ctx); @@ -810,9 +812,9 @@ void util_blitter_copy_region(struct blitter_context *blitter, /* Set rasterizer state, shaders, and textures. */ pipe->bind_rasterizer_state(pipe, ctx->rs_state); - pipe->bind_vs_state(pipe, ctx->vs_tex); + pipe->bind_vs_state(pipe, ctx->vs); pipe->bind_fragment_sampler_states(pipe, 1, - blitter_get_sampler_state(ctx, subsrc.level, normalized)); + blitter_get_sampler_state(ctx, srclevel, normalized)); pipe->bind_vertex_elements_state(pipe, ctx->velem_state); pipe->set_fragment_sampler_views(pipe, 1, &view); pipe->set_framebuffer_state(pipe, &fb_state); @@ -827,8 +829,8 @@ void util_blitter_copy_region(struct blitter_context *blitter, { /* Set texture coordinates. */ float coord[4]; - get_texcoords(src, subsrc, srcx, srcy, - srcx+width, srcy+height, normalized, coord); + get_texcoords(src, srclevel, srcbox->x, srcbox->y, + srcbox->x+width, srcbox->y+height, normalized, coord); /* Draw. */ blitter->draw_rectangle(blitter, dstx, dsty, dstx+width, dsty+height, 0, @@ -841,11 +843,13 @@ void util_blitter_copy_region(struct blitter_context *blitter, case PIPE_TEXTURE_CUBE: /* Set texture coordinates. */ if (src->target == PIPE_TEXTURE_3D) - blitter_set_texcoords_3d(ctx, src, subsrc, srcz, - srcx, srcy, srcx+width, srcy+height); + blitter_set_texcoords_3d(ctx, src, srclevel, srcbox->z, + srcbox->x, srcbox->y, + srcbox->x + width, srcbox->y + height); else - blitter_set_texcoords_cube(ctx, src, subsrc, - srcx, srcy, srcx+width, srcy+height); + blitter_set_texcoords_cube(ctx, src, srclevel, srcbox->z, + srcbox->x, srcbox->y, + srcbox->x + width, srcbox->y + height); /* Draw. */ blitter_set_rectangle(ctx, dstx, dsty, dstx+width, dsty+height, 0); @@ -887,7 +891,7 @@ void util_blitter_clear_render_target(struct blitter_context *blitter, pipe->bind_depth_stencil_alpha_state(pipe, ctx->dsa_keep_depth_stencil); pipe->bind_rasterizer_state(pipe, ctx->rs_state); pipe->bind_fs_state(pipe, blitter_get_fs_col(ctx, 1)); - pipe->bind_vs_state(pipe, ctx->vs_col); + pipe->bind_vs_state(pipe, ctx->vs); pipe->bind_vertex_elements_state(pipe, ctx->velem_state); /* set a framebuffer state */ @@ -947,7 +951,7 @@ void util_blitter_clear_depth_stencil(struct blitter_context *blitter, pipe->bind_rasterizer_state(pipe, ctx->rs_state); pipe->bind_fs_state(pipe, blitter_get_fs_col(ctx, 0)); - pipe->bind_vs_state(pipe, ctx->vs_col); + pipe->bind_vs_state(pipe, ctx->vs); pipe->bind_vertex_elements_state(pipe, ctx->velem_state); /* set a framebuffer state */ @@ -988,7 +992,7 @@ void util_blitter_custom_depth_stencil(struct blitter_context *blitter, pipe->bind_rasterizer_state(pipe, ctx->rs_state); pipe->bind_fs_state(pipe, blitter_get_fs_col(ctx, 0)); - pipe->bind_vs_state(pipe, ctx->vs_col); + pipe->bind_vs_state(pipe, ctx->vs); pipe->bind_vertex_elements_state(pipe, ctx->velem_state); /* set a framebuffer state */ diff --git a/src/gallium/auxiliary/util/u_blitter.h b/src/gallium/auxiliary/util/u_blitter.h index f9f96f25c77..922a8580ac1 100644 --- a/src/gallium/auxiliary/util/u_blitter.h +++ b/src/gallium/auxiliary/util/u_blitter.h @@ -164,12 +164,11 @@ void util_blitter_clear(struct blitter_context *blitter, */ void util_blitter_copy_region(struct blitter_context *blitter, struct pipe_resource *dst, - struct pipe_subresource subdst, + unsigned dstlevel, unsigned dstx, unsigned dsty, unsigned dstz, struct pipe_resource *src, - struct pipe_subresource subsrc, - unsigned srcx, unsigned srcy, unsigned srcz, - unsigned width, unsigned height, + unsigned srclevel, + const struct pipe_box *srcbox, boolean ignore_stencil); /** @@ -319,21 +318,13 @@ util_blitter_save_vertex_buffers(struct blitter_context *blitter, int num_vertex_buffers, struct pipe_vertex_buffer *vertex_buffers) { - unsigned i; assert(num_vertex_buffers <= Elements(blitter->saved_vertex_buffers)); - blitter->saved_num_vertex_buffers = num_vertex_buffers; - - for (i = 0; i < num_vertex_buffers; i++) { - if (vertex_buffers[i].buffer) { - pipe_resource_reference(&blitter->saved_vertex_buffers[i].buffer, - vertex_buffers[i].buffer); - } - } - - memcpy(blitter->saved_vertex_buffers, - vertex_buffers, - num_vertex_buffers * sizeof(struct pipe_vertex_buffer)); + blitter->saved_num_vertex_buffers = 0; + util_copy_vertex_buffers(blitter->saved_vertex_buffers, + (unsigned*)&blitter->saved_num_vertex_buffers, + vertex_buffers, + num_vertex_buffers); } #ifdef __cplusplus diff --git a/src/gallium/auxiliary/util/u_box.h b/src/gallium/auxiliary/util/u_box.h index e9c71743fc8..0b28d0f12c3 100644 --- a/src/gallium/auxiliary/util/u_box.h +++ b/src/gallium/auxiliary/util/u_box.h @@ -60,7 +60,6 @@ void u_box_2d_zslice( unsigned x, box->depth = 1; } - static INLINE void u_box_3d( unsigned x, unsigned y, @@ -78,15 +77,4 @@ void u_box_3d( unsigned x, box->depth = d; } - -static INLINE -struct pipe_subresource u_subresource( unsigned face, - unsigned level ) -{ - struct pipe_subresource subresource; - subresource.face = face; - subresource.level = level; - return subresource; -} - #endif diff --git a/src/gallium/auxiliary/util/u_debug.c b/src/gallium/auxiliary/util/u_debug.c index 504e6d2a18f..f4ad545bee7 100644 --- a/src/gallium/auxiliary/util/u_debug.c +++ b/src/gallium/auxiliary/util/u_debug.c @@ -40,12 +40,14 @@ #include "util/u_string.h" #include "util/u_math.h" #include "util/u_tile.h" -#include "util/u_prim.h" +#include "util/u_prim.h" +#include "util/u_surface.h" #include <limits.h> /* CHAR_BIT */ void _debug_vprintf(const char *format, va_list ap) { +#if defined(PIPE_OS_WINDOWS) || defined(PIPE_OS_EMBEDDED) /* We buffer until we find a newline. */ static char buf[4096] = {'\0'}; size_t len = strlen(buf); @@ -54,6 +56,10 @@ void _debug_vprintf(const char *format, va_list ap) os_log_message(buf); buf[0] = '\0'; } +#else + /* Just print as-is to stderr */ + vfprintf(stderr, format, ap); +#endif } @@ -453,9 +459,10 @@ void debug_dump_image(const char *prefix, #endif } +/* FIXME: dump resources, not surfaces... */ void debug_dump_surface(struct pipe_context *pipe, - const char *prefix, - struct pipe_surface *surface) + const char *prefix, + struct pipe_surface *surface) { struct pipe_resource *texture; struct pipe_transfer *transfer; @@ -472,23 +479,23 @@ void debug_dump_surface(struct pipe_context *pipe, */ texture = surface->texture; - transfer = pipe_get_transfer(pipe, texture, surface->face, - surface->level, surface->zslice, - PIPE_TRANSFER_READ, 0, 0, surface->width, - surface->height); - + transfer = pipe_get_transfer(pipe, texture, surface->u.tex.level, + surface->u.tex.first_layer, + PIPE_TRANSFER_READ, + 0, 0, surface->width, surface->height); + data = pipe->transfer_map(pipe, transfer); if(!data) goto error; - - debug_dump_image(prefix, + + debug_dump_image(prefix, texture->format, - util_format_get_blocksize(texture->format), + util_format_get_blocksize(texture->format), util_format_get_nblocksx(texture->format, surface->width), util_format_get_nblocksy(texture->format, surface->height), transfer->stride, data); - + pipe->transfer_unmap(pipe, transfer); error: pipe->transfer_destroy(pipe, transfer); @@ -499,20 +506,18 @@ void debug_dump_texture(struct pipe_context *pipe, const char *prefix, struct pipe_resource *texture) { - struct pipe_surface *surface; - struct pipe_screen *screen; + struct pipe_surface *surface, surf_tmpl; if (!texture) return; - screen = texture->screen; - - /* XXX for now, just dump image for face=0, level=0 */ - surface = screen->get_tex_surface(screen, texture, 0, 0, 0, - PIPE_BIND_SAMPLER_VIEW); + /* XXX for now, just dump image for layer=0, level=0 */ + memset(&surf_tmpl, 0, sizeof(surf_tmpl)); + u_surface_default_template(&surf_tmpl, texture, 0 /* no bind flag - not a surface */); + surface = pipe->create_surface(pipe, texture, &surf_tmpl); if (surface) { debug_dump_surface(pipe, prefix, surface); - screen->tex_surface_destroy(surface); + pipe->surface_destroy(pipe, surface); } } @@ -550,17 +555,16 @@ struct bmp_rgb_quad { void debug_dump_surface_bmp(struct pipe_context *pipe, - const char *filename, + const char *filename, struct pipe_surface *surface) { #ifndef PIPE_SUBSYSTEM_WINDOWS_MINIPORT struct pipe_transfer *transfer; struct pipe_resource *texture = surface->texture; - transfer = pipe_get_transfer(pipe, texture, surface->face, - surface->level, surface->zslice, - PIPE_TRANSFER_READ, 0, 0, surface->width, - surface->height); + transfer = pipe_get_transfer(pipe, texture, surface->u.tex.level, + surface->u.tex.first_layer, PIPE_TRANSFER_READ, + 0, 0, surface->width, surface->height); debug_dump_transfer_bmp(pipe, filename, transfer); diff --git a/src/gallium/auxiliary/util/u_debug_describe.c b/src/gallium/auxiliary/util/u_debug_describe.c index 1c90ff31069..7ed8ee608a8 100644 --- a/src/gallium/auxiliary/util/u_debug_describe.c +++ b/src/gallium/auxiliary/util/u_debug_describe.c @@ -69,7 +69,7 @@ debug_describe_surface(char* buf, const struct pipe_surface *ptr) { char res[128]; debug_describe_resource(res, ptr->texture); - util_sprintf(buf, "pipe_surface<%s,%u,%u,%u>", res, ptr->face, ptr->level, ptr->zslice); + util_sprintf(buf, "pipe_surface<%s,%u,%u,%u>", res, ptr->u.tex.level, ptr->u.tex.first_layer, ptr->u.tex.last_layer); } void diff --git a/src/gallium/auxiliary/util/u_debug_stack.c b/src/gallium/auxiliary/util/u_debug_stack.c index 528a1c394be..24e039fd226 100644 --- a/src/gallium/auxiliary/util/u_debug_stack.c +++ b/src/gallium/auxiliary/util/u_debug_stack.c @@ -48,7 +48,10 @@ debug_backtrace_capture(struct debug_stack_frame *backtrace, if(!nr_frames) return; -#if defined(PIPE_CC_GCC) +#if defined(PIPE_CC_GCC) && defined(PIPE_ARCH_X86) + __asm__ __volatile__("mov (%%ebp),%0": "=r" (frame_pointer)); + frame_pointer = (const void **)frame_pointer[0]; +#elif defined(PIPE_CC_GCC) frame_pointer = ((const void **)__builtin_frame_address(1)); #elif defined(PIPE_CC_MSVC) && defined(PIPE_ARCH_X86) __asm { diff --git a/src/gallium/auxiliary/util/u_debug_symbol.c b/src/gallium/auxiliary/util/u_debug_symbol.c index 332952af88b..44d437747a1 100644 --- a/src/gallium/auxiliary/util/u_debug_symbol.c +++ b/src/gallium/auxiliary/util/u_debug_symbol.c @@ -40,7 +40,7 @@ #include "u_debug_symbol.h" #include "u_hash_table.h" -#if defined(PIPE_SUBSYSTEM_WINDOWS_USER) && defined(PIPE_ARCH_X86) +#if defined(PIPE_CC_MSVC) && defined(PIPE_ARCH_X86) #include <windows.h> #include <stddef.h> @@ -165,7 +165,7 @@ debug_symbol_name_glibc(const void *addr, char* buf, unsigned size) void debug_symbol_name(const void *addr, char* buf, unsigned size) { -#if defined(PIPE_SUBSYSTEM_WINDOWS_USER) && defined(PIPE_ARCH_X86) +#if defined(PIPE_CC_MSVC) && defined(PIPE_ARCH_X86) debug_symbol_name_imagehlp(addr, buf, size); if(buf[0]) return; diff --git a/src/gallium/auxiliary/util/u_dirty_flags.h b/src/gallium/auxiliary/util/u_dirty_flags.h index 7e1be45ad5a..40539f0b0ea 100644 --- a/src/gallium/auxiliary/util/u_dirty_flags.h +++ b/src/gallium/auxiliary/util/u_dirty_flags.h @@ -24,5 +24,9 @@ #define U_NEW_VERTEX_BUFFER 0x10000 #define U_NEW_QUERY 0x20000 #define U_NEW_DEPTH_STENCIL 0x40000 +#define U_NEW_GS 0x80000 +#define U_NEW_GS_CONSTANTS 0x100000 +#define U_NEW_GS_SAMPLER_VIEW 0x200000 +#define U_NEW_GS_SAMPLER_STATES 0x400000 #endif diff --git a/src/gallium/auxiliary/util/u_dirty_surfaces.h b/src/gallium/auxiliary/util/u_dirty_surfaces.h index fd1bbe5ffdf..f3618d9be74 100644 --- a/src/gallium/auxiliary/util/u_dirty_surfaces.h +++ b/src/gallium/auxiliary/util/u_dirty_surfaces.h @@ -77,7 +77,7 @@ util_dirty_surfaces_use_levels_for_sampling(struct pipe_context *pipe, struct ut struct util_dirty_surface *ds = LIST_ENTRY(struct util_dirty_surface, p, dirty_list); next = p->next; - if(ds->base.level >= first && ds->base.level <= last) + if(ds->base.u.tex.level >= first && ds->base.u.tex.level <= last) flush(pipe, &ds->base); } } @@ -86,7 +86,8 @@ static INLINE void util_dirty_surfaces_use_for_sampling_with(struct pipe_context *pipe, struct util_dirty_surfaces *dss, struct pipe_sampler_view *psv, struct pipe_sampler_state *pss, util_dirty_surface_flush_t flush) { if(!LIST_IS_EMPTY(&dss->dirty_list)) - util_dirty_surfaces_use_levels_for_sampling(pipe, dss, (unsigned)pss->min_lod + psv->first_level, MIN2((unsigned)ceilf(pss->max_lod) + psv->first_level, psv->last_level), flush); + util_dirty_surfaces_use_levels_for_sampling(pipe, dss, (unsigned)pss->min_lod + psv->u.tex.first_level, + MIN2((unsigned)ceilf(pss->max_lod) + psv->u.tex.first_level, psv->u.tex.last_level), flush); } static INLINE void diff --git a/src/gallium/auxiliary/util/u_dump_state.c b/src/gallium/auxiliary/util/u_dump_state.c index cda5b8ba512..b471d59eebf 100644 --- a/src/gallium/auxiliary/util/u_dump_state.c +++ b/src/gallium/auxiliary/util/u_dump_state.c @@ -279,6 +279,10 @@ util_dump_template(struct os_stream *stream, const struct pipe_resource *templat util_dump_uint(stream, templat->depth0); util_dump_member_end(stream); + util_dump_member_begin(stream, "array_size"); + util_dump_uint(stream, templat->array_size); + util_dump_member_end(stream); + util_dump_member(stream, uint, templat, last_level); util_dump_member(stream, uint, templat, usage); util_dump_member(stream, uint, templat, bind); @@ -633,14 +637,12 @@ util_dump_surface(struct os_stream *stream, const struct pipe_surface *state) util_dump_member(stream, uint, state, width); util_dump_member(stream, uint, state, height); - util_dump_member(stream, uint, state, layout); - util_dump_member(stream, uint, state, offset); util_dump_member(stream, uint, state, usage); util_dump_member(stream, ptr, state, texture); - util_dump_member(stream, uint, state, face); - util_dump_member(stream, uint, state, level); - util_dump_member(stream, uint, state, zslice); + util_dump_member(stream, uint, state, u.tex.level); + util_dump_member(stream, uint, state, u.tex.first_layer); + util_dump_member(stream, uint, state, u.tex.last_layer); util_dump_struct_end(stream); } @@ -660,7 +662,7 @@ util_dump_transfer(struct os_stream *stream, const struct pipe_transfer *state) /*util_dump_member(stream, uint, state, box);*/ util_dump_member(stream, uint, state, stride); - util_dump_member(stream, uint, state, slice_stride); + util_dump_member(stream, uint, state, layer_stride); /*util_dump_member(stream, ptr, state, data);*/ diff --git a/src/gallium/auxiliary/util/u_format.csv b/src/gallium/auxiliary/util/u_format.csv index 1fbd83841c1..26a0eebc544 100644 --- a/src/gallium/auxiliary/util/u_format.csv +++ b/src/gallium/auxiliary/util/u_format.csv @@ -76,6 +76,7 @@ PIPE_FORMAT_B4G4R4X4_UNORM , plain, 1, 1, un4 , un4 , un4 , x4 , zyx1, r PIPE_FORMAT_B5G6R5_UNORM , plain, 1, 1, un5 , un6 , un5 , , zyx1, rgb PIPE_FORMAT_R10G10B10A2_UNORM , plain, 1, 1, un10, un10, un10, un2 , xyzw, rgb PIPE_FORMAT_B10G10R10A2_UNORM , plain, 1, 1, un10, un10, un10, un2 , zyxw, rgb +PIPE_FORMAT_B2G3R3_UNORM , plain, 1, 1, un2 , un3 , un3 , , zyx1, rgb # Luminance/Intensity/Alpha formats PIPE_FORMAT_L8_UNORM , plain, 1, 1, un8 , , , , xxx1, rgb @@ -84,6 +85,9 @@ PIPE_FORMAT_I8_UNORM , plain, 1, 1, un8 , , , , xxxx, r PIPE_FORMAT_L4A4_UNORM , plain, 1, 1, un4 , un4 , , , xxxy, rgb PIPE_FORMAT_L8A8_UNORM , plain, 1, 1, un8 , un8 , , , xxxy, rgb PIPE_FORMAT_L16_UNORM , plain, 1, 1, un16, , , , xxx1, rgb +PIPE_FORMAT_A16_UNORM , plain, 1, 1, un16, , , , 000x, rgb +PIPE_FORMAT_I16_UNORM , plain, 1, 1, un16, , , , xxxx, rgb +PIPE_FORMAT_L16A16_UNORM , plain, 1, 1, un16, un16, , , xxxy, rgb # SRGB formats PIPE_FORMAT_L8_SRGB , plain, 1, 1, un8 , , , , xxx1, srgb diff --git a/src/gallium/auxiliary/util/u_gen_mipmap.c b/src/gallium/auxiliary/util/u_gen_mipmap.c index 6a931a95819..d22ae8b375b 100644 --- a/src/gallium/auxiliary/util/u_gen_mipmap.c +++ b/src/gallium/auxiliary/util/u_gen_mipmap.c @@ -48,6 +48,8 @@ #include "util/u_simple_shaders.h" #include "util/u_math.h" #include "util/u_texture.h" +#include "util/u_half.h" +#include "util/u_surface.h" #include "cso_cache/cso_context.h" @@ -65,7 +67,7 @@ struct gen_mipmap_state struct pipe_vertex_element velem[2]; void *vs; - void *fs2d, *fsCube; + void *fs1d, *fs2d, *fs3d, *fsCube; struct pipe_resource *vbuf; /**< quad vertices */ unsigned vbuf_slot; @@ -89,24 +91,7 @@ enum dtype }; -typedef ushort half_float; - - -static half_float -float_to_half(float f) -{ - /* XXX fix this */ - return 0; -} - -static float -half_to_float(half_float h) -{ - /* XXX fix this */ - return 0.0f; -} - - +typedef uint16_t half_float; /** @@ -145,7 +130,7 @@ half_to_float(half_float h) rowC[j][e], rowC[k][e], \ rowD[j][e], rowD[k][e]); \ } while(0) - + #define FILTER_F_3D(e) \ do { \ dst[i][e] = (rowA[j][e] + rowA[k][e] \ @@ -156,15 +141,15 @@ half_to_float(half_float h) #define FILTER_HF_3D(e) \ do { \ - const float aj = half_to_float(rowA[j][e]); \ - const float ak = half_to_float(rowA[k][e]); \ - const float bj = half_to_float(rowB[j][e]); \ - const float bk = half_to_float(rowB[k][e]); \ - const float cj = half_to_float(rowC[j][e]); \ - const float ck = half_to_float(rowC[k][e]); \ - const float dj = half_to_float(rowD[j][e]); \ - const float dk = half_to_float(rowD[k][e]); \ - dst[i][e] = float_to_half((aj + ak + bj + bk + cj + ck + dj + dk) \ + const float aj = util_half_to_float(rowA[j][e]); \ + const float ak = util_half_to_float(rowA[k][e]); \ + const float bj = util_half_to_float(rowB[j][e]); \ + const float bk = util_half_to_float(rowB[k][e]); \ + const float cj = util_half_to_float(rowC[j][e]); \ + const float ck = util_half_to_float(rowC[k][e]); \ + const float dj = util_half_to_float(rowD[j][e]); \ + const float dk = util_half_to_float(rowD[k][e]); \ + dst[i][e] = util_float_to_half((aj + ak + bj + bk + cj + ck + dj + dk) \ * 0.125F); \ } while(0) /*@}*/ @@ -343,8 +328,7 @@ do_row(enum dtype datatype, uint comps, int srcWidth, } } -#if 0 - else if (datatype == HALF_DTYPE_FLOAT && comps == 4) { + else if (datatype == DTYPE_HALF_FLOAT && comps == 4) { uint i, j, k, comp; const half_float(*rowA)[4] = (const half_float(*)[4]) srcRowA; const half_float(*rowB)[4] = (const half_float(*)[4]) srcRowB; @@ -353,11 +337,11 @@ do_row(enum dtype datatype, uint comps, int srcWidth, i++, j += colStride, k += colStride) { for (comp = 0; comp < 4; comp++) { float aj, ak, bj, bk; - aj = half_to_float(rowA[j][comp]); - ak = half_to_float(rowA[k][comp]); - bj = half_to_float(rowB[j][comp]); - bk = half_to_float(rowB[k][comp]); - dst[i][comp] = float_to_half((aj + ak + bj + bk) * 0.25F); + aj = util_half_to_float(rowA[j][comp]); + ak = util_half_to_float(rowA[k][comp]); + bj = util_half_to_float(rowB[j][comp]); + bk = util_half_to_float(rowB[k][comp]); + dst[i][comp] = util_float_to_half((aj + ak + bj + bk) * 0.25F); } } } @@ -370,11 +354,11 @@ do_row(enum dtype datatype, uint comps, int srcWidth, i++, j += colStride, k += colStride) { for (comp = 0; comp < 3; comp++) { float aj, ak, bj, bk; - aj = half_to_float(rowA[j][comp]); - ak = half_to_float(rowA[k][comp]); - bj = half_to_float(rowB[j][comp]); - bk = half_to_float(rowB[k][comp]); - dst[i][comp] = float_to_half((aj + ak + bj + bk) * 0.25F); + aj = util_half_to_float(rowA[j][comp]); + ak = util_half_to_float(rowA[k][comp]); + bj = util_half_to_float(rowB[j][comp]); + bk = util_half_to_float(rowB[k][comp]); + dst[i][comp] = util_float_to_half((aj + ak + bj + bk) * 0.25F); } } } @@ -387,11 +371,11 @@ do_row(enum dtype datatype, uint comps, int srcWidth, i++, j += colStride, k += colStride) { for (comp = 0; comp < 2; comp++) { float aj, ak, bj, bk; - aj = half_to_float(rowA[j][comp]); - ak = half_to_float(rowA[k][comp]); - bj = half_to_float(rowB[j][comp]); - bk = half_to_float(rowB[k][comp]); - dst[i][comp] = float_to_half((aj + ak + bj + bk) * 0.25F); + aj = util_half_to_float(rowA[j][comp]); + ak = util_half_to_float(rowA[k][comp]); + bj = util_half_to_float(rowB[j][comp]); + bk = util_half_to_float(rowB[k][comp]); + dst[i][comp] = util_float_to_half((aj + ak + bj + bk) * 0.25F); } } } @@ -403,14 +387,13 @@ do_row(enum dtype datatype, uint comps, int srcWidth, for (i = j = 0, k = k0; i < (uint) dstWidth; i++, j += colStride, k += colStride) { float aj, ak, bj, bk; - aj = half_to_float(rowA[j]); - ak = half_to_float(rowA[k]); - bj = half_to_float(rowB[j]); - bk = half_to_float(rowB[k]); - dst[i] = float_to_half((aj + ak + bj + bk) * 0.25F); + aj = util_half_to_float(rowA[j]); + ak = util_half_to_float(rowA[k]); + bj = util_half_to_float(rowB[j]); + bk = util_half_to_float(rowB[k]); + dst[i] = util_float_to_half((aj + ak + bj + bk) * 0.25F); } } -#endif else if (datatype == DTYPE_UINT && comps == 1) { uint i, j, k; @@ -1036,32 +1019,34 @@ reduce_2d(enum pipe_format pformat, static void reduce_3d(enum pipe_format pformat, int srcWidth, int srcHeight, int srcDepth, - int srcRowStride, const ubyte *srcPtr, + int srcRowStride, int srcImageStride, const ubyte *srcPtr, int dstWidth, int dstHeight, int dstDepth, - int dstRowStride, ubyte *dstPtr) + int dstRowStride, int dstImageStride, ubyte *dstPtr) { const int bpt = util_format_get_blocksize(pformat); - const int border = 0; int img, row; - int bytesPerSrcImage, bytesPerDstImage; - int bytesPerSrcRow, bytesPerDstRow; int srcImageOffset, srcRowOffset; enum dtype datatype; uint comps; format_to_type_comps(pformat, &datatype, &comps); - bytesPerSrcImage = srcWidth * srcHeight * bpt; - bytesPerDstImage = dstWidth * dstHeight * bpt; + /* XXX I think we should rather assert those strides */ + if (!srcImageStride) + srcImageStride = srcWidth * srcHeight * bpt; + if (!dstImageStride) + dstImageStride = dstWidth * dstHeight * bpt; - bytesPerSrcRow = srcWidth * bpt; - bytesPerDstRow = dstWidth * bpt; + if (!srcRowStride) + srcRowStride = srcWidth * bpt; + if (!dstRowStride) + dstRowStride = dstWidth * bpt; /* Offset between adjacent src images to be averaged together */ - srcImageOffset = (srcDepth == dstDepth) ? 0 : bytesPerSrcImage; + srcImageOffset = (srcDepth == dstDepth) ? 0 : srcImageStride; /* Offset between adjacent src rows to be averaged together */ - srcRowOffset = (srcHeight == dstHeight) ? 0 : srcWidth * bpt; + srcRowOffset = (srcHeight == dstHeight) ? 0 : srcRowStride; /* * Need to average together up to 8 src pixels for each dest pixel. @@ -1077,16 +1062,13 @@ reduce_3d(enum pipe_format pformat, */ for (img = 0; img < dstDepth; img++) { - /* first source image pointer, skipping border */ + /* first source image pointer */ const ubyte *imgSrcA = srcPtr - + (bytesPerSrcImage + bytesPerSrcRow + border) * bpt * border - + img * (bytesPerSrcImage + srcImageOffset); - /* second source image pointer, skipping border */ + + img * (srcImageStride + srcImageOffset); + /* second source image pointer */ const ubyte *imgSrcB = imgSrcA + srcImageOffset; - /* address of the dest image, skipping border */ - ubyte *imgDst = dstPtr - + (bytesPerDstImage + bytesPerDstRow + border) * bpt * border - + img * bytesPerDstImage; + /* address of the dest image */ + ubyte *imgDst = dstPtr + img * dstImageStride; /* setup the four source row pointers and the dest row pointer */ const ubyte *srcImgARowA = imgSrcA; @@ -1102,11 +1084,11 @@ reduce_3d(enum pipe_format pformat, dstWidth, dstImgRow); /* advance to next rows */ - srcImgARowA += bytesPerSrcRow + srcRowOffset; - srcImgARowB += bytesPerSrcRow + srcRowOffset; - srcImgBRowA += bytesPerSrcRow + srcRowOffset; - srcImgBRowB += bytesPerSrcRow + srcRowOffset; - dstImgRow += bytesPerDstRow; + srcImgARowA += srcRowStride + srcRowOffset; + srcImgARowB += srcRowStride + srcRowOffset; + srcImgBRowA += srcRowStride + srcRowOffset; + srcImgBRowB += srcRowStride + srcRowOffset; + dstImgRow += dstImageStride; } } } @@ -1117,25 +1099,24 @@ reduce_3d(enum pipe_format pformat, static void make_1d_mipmap(struct gen_mipmap_state *ctx, struct pipe_resource *pt, - uint face, uint baseLevel, uint lastLevel) + uint layer, uint baseLevel, uint lastLevel) { struct pipe_context *pipe = ctx->pipe; - const uint zslice = 0; uint dstLevel; for (dstLevel = baseLevel + 1; dstLevel <= lastLevel; dstLevel++) { const uint srcLevel = dstLevel - 1; struct pipe_transfer *srcTrans, *dstTrans; void *srcMap, *dstMap; - - srcTrans = pipe_get_transfer(pipe, pt, face, srcLevel, zslice, - PIPE_TRANSFER_READ, 0, 0, - u_minify(pt->width0, srcLevel), - u_minify(pt->height0, srcLevel)); - dstTrans = pipe_get_transfer(pipe, pt, face, dstLevel, zslice, - PIPE_TRANSFER_WRITE, 0, 0, - u_minify(pt->width0, dstLevel), - u_minify(pt->height0, dstLevel)); + + srcTrans = pipe_get_transfer(pipe, pt, srcLevel, layer, + PIPE_TRANSFER_READ, 0, 0, + u_minify(pt->width0, srcLevel), + u_minify(pt->height0, srcLevel)); + dstTrans = pipe_get_transfer(pipe, pt, dstLevel, layer, + PIPE_TRANSFER_WRITE, 0, 0, + u_minify(pt->width0, dstLevel), + u_minify(pt->height0, dstLevel)); srcMap = (ubyte *) pipe->transfer_map(pipe, srcTrans); dstMap = (ubyte *) pipe->transfer_map(pipe, dstTrans); @@ -1156,12 +1137,11 @@ make_1d_mipmap(struct gen_mipmap_state *ctx, static void make_2d_mipmap(struct gen_mipmap_state *ctx, struct pipe_resource *pt, - uint face, uint baseLevel, uint lastLevel) + uint layer, uint baseLevel, uint lastLevel) { struct pipe_context *pipe = ctx->pipe; - const uint zslice = 0; uint dstLevel; - + assert(util_format_get_blockwidth(pt->format) == 1); assert(util_format_get_blockheight(pt->format) == 1); @@ -1169,15 +1149,15 @@ make_2d_mipmap(struct gen_mipmap_state *ctx, const uint srcLevel = dstLevel - 1; struct pipe_transfer *srcTrans, *dstTrans; ubyte *srcMap, *dstMap; - - srcTrans = pipe_get_transfer(pipe, pt, face, srcLevel, zslice, - PIPE_TRANSFER_READ, 0, 0, - u_minify(pt->width0, srcLevel), - u_minify(pt->height0, srcLevel)); - dstTrans = pipe_get_transfer(pipe, pt, face, dstLevel, zslice, - PIPE_TRANSFER_WRITE, 0, 0, - u_minify(pt->width0, dstLevel), - u_minify(pt->height0, dstLevel)); + + srcTrans = pipe_get_transfer(pipe, pt, srcLevel, layer, + PIPE_TRANSFER_READ, 0, 0, + u_minify(pt->width0, srcLevel), + u_minify(pt->height0, srcLevel)); + dstTrans = pipe_get_transfer(pipe, pt, dstLevel, layer, + PIPE_TRANSFER_WRITE, 0, 0, + u_minify(pt->width0, dstLevel), + u_minify(pt->height0, dstLevel)); srcMap = (ubyte *) pipe->transfer_map(pipe, srcTrans); dstMap = (ubyte *) pipe->transfer_map(pipe, dstTrans); @@ -1197,41 +1177,49 @@ make_2d_mipmap(struct gen_mipmap_state *ctx, } +/* XXX looks a bit more like it could work now but need to test */ static void make_3d_mipmap(struct gen_mipmap_state *ctx, struct pipe_resource *pt, uint face, uint baseLevel, uint lastLevel) { -#if 0 struct pipe_context *pipe = ctx->pipe; - struct pipe_screen *screen = pipe->screen; - uint dstLevel, zslice = 0; + uint dstLevel; + struct pipe_box src_box, dst_box; assert(util_format_get_blockwidth(pt->format) == 1); assert(util_format_get_blockheight(pt->format) == 1); + src_box.x = src_box.y = src_box.z = 0; + dst_box.x = dst_box.y = dst_box.z = 0; + for (dstLevel = baseLevel + 1; dstLevel <= lastLevel; dstLevel++) { const uint srcLevel = dstLevel - 1; struct pipe_transfer *srcTrans, *dstTrans; ubyte *srcMap, *dstMap; - - srcTrans = pipe->get_transfer(pipe, pt, face, srcLevel, zslice, - PIPE_TRANSFER_READ, 0, 0, - u_minify(pt->width0, srcLevel), - u_minify(pt->height0, srcLevel)); - dstTrans = pipe->get_transfer(pipe, pt, face, dstLevel, zslice, - PIPE_TRANSFER_WRITE, 0, 0, - u_minify(pt->width0, dstLevel), - u_minify(pt->height0, dstLevel)); + struct pipe_box src_box, dst_box; + src_box.width = u_minify(pt->width0, srcLevel); + src_box.height = u_minify(pt->height0, srcLevel); + src_box.depth = u_minify(pt->depth0, srcLevel); + dst_box.width = u_minify(pt->width0, dstLevel); + dst_box.height = u_minify(pt->height0, dstLevel); + dst_box.depth = u_minify(pt->depth0, dstLevel); + + srcTrans = pipe->get_transfer(pipe, pt, srcLevel, + PIPE_TRANSFER_READ, + &src_box); + dstTrans = pipe->get_transfer(pipe, pt, dstLevel, + PIPE_TRANSFER_WRITE, + &dst_box); srcMap = (ubyte *) pipe->transfer_map(pipe, srcTrans); dstMap = (ubyte *) pipe->transfer_map(pipe, dstTrans); reduce_3d(pt->format, - srcTrans->width, srcTrans->height, - srcTrans->stride, srcMap, - dstTrans->width, dstTrans->height, - dstTrans->stride, dstMap); + srcTrans->box.width, srcTrans->box.height, srcTrans->box.depth, + srcTrans->stride, srcTrans->layer_stride, srcMap, + dstTrans->box.width, dstTrans->box.height, dstTrans->box.depth, + dstTrans->stride, dstTrans->layer_stride, dstMap); pipe->transfer_unmap(pipe, srcTrans); pipe->transfer_unmap(pipe, dstTrans); @@ -1239,28 +1227,25 @@ make_3d_mipmap(struct gen_mipmap_state *ctx, pipe->transfer_destroy(pipe, srcTrans); pipe->transfer_destroy(pipe, dstTrans); } -#else - (void) reduce_3d; -#endif } static void fallback_gen_mipmap(struct gen_mipmap_state *ctx, struct pipe_resource *pt, - uint face, uint baseLevel, uint lastLevel) + uint layer, uint baseLevel, uint lastLevel) { switch (pt->target) { case PIPE_TEXTURE_1D: - make_1d_mipmap(ctx, pt, face, baseLevel, lastLevel); + make_1d_mipmap(ctx, pt, layer, baseLevel, lastLevel); break; case PIPE_TEXTURE_2D: case PIPE_TEXTURE_RECT: case PIPE_TEXTURE_CUBE: - make_2d_mipmap(ctx, pt, face, baseLevel, lastLevel); + make_2d_mipmap(ctx, pt, layer, baseLevel, lastLevel); break; case PIPE_TEXTURE_3D: - make_3d_mipmap(ctx, pt, face, baseLevel, lastLevel); + make_3d_mipmap(ctx, pt, layer, baseLevel, lastLevel); break; default: assert(0); @@ -1328,8 +1313,12 @@ util_create_gen_mipmap(struct pipe_context *pipe, } /* fragment shader */ + ctx->fs1d = util_make_fragment_tex_shader(pipe, TGSI_TEXTURE_1D, + TGSI_INTERPOLATE_LINEAR); ctx->fs2d = util_make_fragment_tex_shader(pipe, TGSI_TEXTURE_2D, TGSI_INTERPOLATE_LINEAR); + ctx->fs3d = util_make_fragment_tex_shader(pipe, TGSI_TEXTURE_3D, + TGSI_INTERPOLATE_LINEAR); ctx->fsCube = util_make_fragment_tex_shader(pipe, TGSI_TEXTURE_CUBE, TGSI_INTERPOLATE_LINEAR); @@ -1371,7 +1360,7 @@ get_next_slot(struct gen_mipmap_state *ctx) static unsigned set_vertex_data(struct gen_mipmap_state *ctx, enum pipe_texture_target tex_target, - uint face) + uint layer, float r) { unsigned offset; @@ -1397,26 +1386,26 @@ set_vertex_data(struct gen_mipmap_state *ctx, {0.0f, 0.0f}, {1.0f, 0.0f}, {1.0f, 1.0f}, {0.0f, 1.0f} }; - util_map_texcoords2d_onto_cubemap(face, &st[0][0], 2, + util_map_texcoords2d_onto_cubemap(layer, &st[0][0], 2, &ctx->vertices[0][1][0], 8); } else { - /* 1D/2D */ + /* 1D/2D/3D */ ctx->vertices[0][1][0] = 0.0f; /*s*/ ctx->vertices[0][1][1] = 0.0f; /*t*/ - ctx->vertices[0][1][2] = 0.0f; /*r*/ + ctx->vertices[0][1][2] = r; /*r*/ ctx->vertices[1][1][0] = 1.0f; ctx->vertices[1][1][1] = 0.0f; - ctx->vertices[1][1][2] = 0.0f; + ctx->vertices[1][1][2] = r; ctx->vertices[2][1][0] = 1.0f; ctx->vertices[2][1][1] = 1.0f; - ctx->vertices[2][1][2] = 0.0f; + ctx->vertices[2][1][2] = r; ctx->vertices[3][1][0] = 0.0f; ctx->vertices[3][1][1] = 1.0f; - ctx->vertices[3][1][2] = 0.0f; + ctx->vertices[3][1][2] = r; } offset = get_next_slot( ctx ); @@ -1437,9 +1426,11 @@ util_destroy_gen_mipmap(struct gen_mipmap_state *ctx) { struct pipe_context *pipe = ctx->pipe; - pipe->delete_vs_state(pipe, ctx->vs); - pipe->delete_fs_state(pipe, ctx->fs2d); pipe->delete_fs_state(pipe, ctx->fsCube); + pipe->delete_fs_state(pipe, ctx->fs3d); + pipe->delete_fs_state(pipe, ctx->fs2d); + pipe->delete_fs_state(pipe, ctx->fs1d); + pipe->delete_vs_state(pipe, ctx->vs); pipe_resource_reference(&ctx->vbuf, NULL); @@ -1478,9 +1469,8 @@ util_gen_mipmap(struct gen_mipmap_state *ctx, struct pipe_screen *screen = pipe->screen; struct pipe_framebuffer_state fb; struct pipe_resource *pt = psv->texture; - void *fs = (pt->target == PIPE_TEXTURE_CUBE) ? ctx->fsCube : ctx->fs2d; + void *fs; uint dstLevel; - uint zslice = 0; uint offset; /* The texture object should have room for the levels which we're @@ -1494,8 +1484,28 @@ util_gen_mipmap(struct gen_mipmap_state *ctx, assert(filter == PIPE_TEX_FILTER_LINEAR || filter == PIPE_TEX_FILTER_NEAREST); + switch (pt->target) { + case PIPE_TEXTURE_1D: + fs = ctx->fs1d; + break; + case PIPE_TEXTURE_2D: + fs = ctx->fs2d; + break; + case PIPE_TEXTURE_3D: + fs = ctx->fs3d; + break; + case PIPE_TEXTURE_CUBE: + fs = ctx->fsCube; + break; + case PIPE_TEXTURE_1D_ARRAY: + case PIPE_TEXTURE_2D_ARRAY: + default: + assert(0); + fs = ctx->fs2d; + } + /* check if we can render in the texture's format */ - if (!screen->is_format_supported(screen, psv->format, PIPE_TEXTURE_2D, + if (!screen->is_format_supported(screen, psv->format, pt->target, pt->nr_samples, PIPE_BIND_RENDER_TARGET, 0)) { fallback_gen_mipmap(ctx, pt, face, baseLevel, lastLevel); return; @@ -1539,60 +1549,85 @@ util_gen_mipmap(struct gen_mipmap_state *ctx, for (dstLevel = baseLevel + 1; dstLevel <= lastLevel; dstLevel++) { const uint srcLevel = dstLevel - 1; struct pipe_viewport_state vp; - - struct pipe_surface *surf = - screen->get_tex_surface(screen, pt, face, dstLevel, zslice, - PIPE_BIND_RENDER_TARGET); - - /* - * Setup framebuffer / dest surface - */ - fb.cbufs[0] = surf; - fb.width = u_minify(pt->width0, dstLevel); - fb.height = u_minify(pt->height0, dstLevel); - cso_set_framebuffer(ctx->cso, &fb); - - /* viewport */ - vp.scale[0] = 0.5f * fb.width; - vp.scale[1] = 0.5f * fb.height; - vp.scale[2] = 1.0f; - vp.scale[3] = 1.0f; - vp.translate[0] = 0.5f * fb.width; - vp.translate[1] = 0.5f * fb.height; - vp.translate[2] = 0.0f; - vp.translate[3] = 0.0f; - cso_set_viewport(ctx->cso, &vp); - - /* - * Setup sampler state - * Note: we should only have to set the min/max LOD clamps to ensure - * we grab texels from the right mipmap level. But some hardware - * has trouble with min clamping so we also set the lod_bias to - * try to work around that. - */ - ctx->sampler.min_lod = ctx->sampler.max_lod = (float) srcLevel; - ctx->sampler.lod_bias = (float) srcLevel; - cso_single_sampler(ctx->cso, 0, &ctx->sampler); - cso_single_sampler_done(ctx->cso); - - cso_set_fragment_sampler_views(ctx->cso, 1, &psv); - - /* quad coords in clip coords */ - offset = set_vertex_data(ctx, - pt->target, - face); - - util_draw_vertex_buffer(ctx->pipe, - ctx->vbuf, - offset, - PIPE_PRIM_TRIANGLE_FAN, - 4, /* verts */ - 2); /* attribs/vert */ - - pipe->flush(pipe, PIPE_FLUSH_RENDER_CACHE, NULL); - - /* need to signal that the texture has changed _after_ rendering to it */ - pipe_surface_reference( &surf, NULL ); + unsigned nr_layers, layer, i; + float rcoord = 0.0f; + + if (pt->target == PIPE_TEXTURE_3D) + nr_layers = u_minify(pt->depth0, dstLevel); + else + nr_layers = 1; + + for (i = 0; i < nr_layers; i++) { + struct pipe_surface *surf, surf_templ; + if (pt->target == PIPE_TEXTURE_3D) { + /* in theory with geom shaders and driver with full layer support + could do that in one go. */ + layer = i; + offset = 1.0f / (float)(nr_layers * 2); + /* XXX hmm really? */ + rcoord = (float)layer / (float)nr_layers + 1.0f / (float)(nr_layers * 2); + } + else + layer = face; + + memset(&surf_templ, 0, sizeof(surf_templ)); + u_surface_default_template(&surf_templ, pt, PIPE_BIND_RENDER_TARGET); + surf_templ.u.tex.level = dstLevel; + surf_templ.u.tex.first_layer = layer; + surf_templ.u.tex.last_layer = layer; + surf = pipe->create_surface(pipe, pt, &surf_templ); + + /* + * Setup framebuffer / dest surface + */ + fb.cbufs[0] = surf; + fb.width = u_minify(pt->width0, dstLevel); + fb.height = u_minify(pt->height0, dstLevel); + cso_set_framebuffer(ctx->cso, &fb); + + /* viewport */ + vp.scale[0] = 0.5f * fb.width; + vp.scale[1] = 0.5f * fb.height; + vp.scale[2] = 1.0f; + vp.scale[3] = 1.0f; + vp.translate[0] = 0.5f * fb.width; + vp.translate[1] = 0.5f * fb.height; + vp.translate[2] = 0.0f; + vp.translate[3] = 0.0f; + cso_set_viewport(ctx->cso, &vp); + + /* + * Setup sampler state + * Note: we should only have to set the min/max LOD clamps to ensure + * we grab texels from the right mipmap level. But some hardware + * has trouble with min clamping so we also set the lod_bias to + * try to work around that. + */ + ctx->sampler.min_lod = ctx->sampler.max_lod = (float) srcLevel; + ctx->sampler.lod_bias = (float) srcLevel; + cso_single_sampler(ctx->cso, 0, &ctx->sampler); + cso_single_sampler_done(ctx->cso); + + cso_set_fragment_sampler_views(ctx->cso, 1, &psv); + + /* quad coords in clip coords */ + offset = set_vertex_data(ctx, + pt->target, + face, + rcoord); + + util_draw_vertex_buffer(ctx->pipe, + ctx->vbuf, + offset, + PIPE_PRIM_TRIANGLE_FAN, + 4, /* verts */ + 2); /* attribs/vert */ + + pipe->flush(pipe, PIPE_FLUSH_RENDER_CACHE, NULL); + + /* need to signal that the texture has changed _after_ rendering to it */ + pipe_surface_reference( &surf, NULL ); + } } /* restore state we changed */ diff --git a/src/gallium/auxiliary/util/u_gen_mipmap.h b/src/gallium/auxiliary/util/u_gen_mipmap.h index a7502b9982b..a10b6a4aba9 100644 --- a/src/gallium/auxiliary/util/u_gen_mipmap.h +++ b/src/gallium/auxiliary/util/u_gen_mipmap.h @@ -60,7 +60,7 @@ util_gen_mipmap_flush( struct gen_mipmap_state *ctx ); extern void util_gen_mipmap(struct gen_mipmap_state *ctx, struct pipe_sampler_view *psv, - uint face, uint baseLevel, uint lastLevel, uint filter); + uint layer, uint baseLevel, uint lastLevel, uint filter); #ifdef __cplusplus diff --git a/src/gallium/auxiliary/util/u_index_modify.c b/src/gallium/auxiliary/util/u_index_modify.c index 65b079ed537..f2c9db3caf1 100644 --- a/src/gallium/auxiliary/util/u_index_modify.c +++ b/src/gallium/auxiliary/util/u_index_modify.c @@ -24,26 +24,70 @@ #include "util/u_index_modify.h" #include "util/u_inlines.h" +/* Ubyte indices. */ + +void util_shorten_ubyte_elts_to_userptr(struct pipe_context *context, + struct pipe_resource *elts, + int index_bias, + unsigned start, + unsigned count, + void *out) +{ + struct pipe_transfer *src_transfer; + unsigned char *in_map; + unsigned short *out_map = out; + unsigned i; + + in_map = pipe_buffer_map(context, elts, PIPE_TRANSFER_READ, &src_transfer); + in_map += start; + + for (i = 0; i < count; i++) { + *out_map = (unsigned short)(*in_map + index_bias); + in_map++; + out_map++; + } + + pipe_buffer_unmap(context, src_transfer); +} + void util_shorten_ubyte_elts(struct pipe_context *context, struct pipe_resource **elts, int index_bias, unsigned start, unsigned count) { - struct pipe_screen* screen = context->screen; struct pipe_resource* new_elts; - unsigned char *in_map; unsigned short *out_map; - struct pipe_transfer *src_transfer, *dst_transfer; - unsigned i; + struct pipe_transfer *dst_transfer; - new_elts = pipe_buffer_create(screen, + new_elts = pipe_buffer_create(context->screen, PIPE_BIND_INDEX_BUFFER, 2 * count); - in_map = pipe_buffer_map(context, *elts, PIPE_TRANSFER_READ, &src_transfer); - out_map = pipe_buffer_map(context, new_elts, PIPE_TRANSFER_WRITE, &dst_transfer); + out_map = pipe_buffer_map(context, new_elts, PIPE_TRANSFER_WRITE, + &dst_transfer); + util_shorten_ubyte_elts_to_userptr(context, *elts, index_bias, + start, count, out_map); + pipe_buffer_unmap(context, dst_transfer); + + *elts = new_elts; +} + + +/* Ushort indices. */ +void util_rebuild_ushort_elts_to_userptr(struct pipe_context *context, + struct pipe_resource *elts, + int index_bias, + unsigned start, unsigned count, + void *out) +{ + struct pipe_transfer *in_transfer = NULL; + unsigned short *in_map; + unsigned short *out_map = out; + unsigned i; + + in_map = pipe_buffer_map(context, elts, PIPE_TRANSFER_READ, &in_transfer); in_map += start; for (i = 0; i < count; i++) { @@ -52,10 +96,7 @@ void util_shorten_ubyte_elts(struct pipe_context *context, out_map++; } - pipe_buffer_unmap(context, *elts, src_transfer); - pipe_buffer_unmap(context, new_elts, dst_transfer); - - *elts = new_elts; + pipe_buffer_unmap(context, in_transfer); } void util_rebuild_ushort_elts(struct pipe_context *context, @@ -63,33 +104,47 @@ void util_rebuild_ushort_elts(struct pipe_context *context, int index_bias, unsigned start, unsigned count) { - struct pipe_transfer *in_transfer = NULL; struct pipe_transfer *out_transfer = NULL; struct pipe_resource *new_elts; - unsigned short *in_map; unsigned short *out_map; - unsigned i; new_elts = pipe_buffer_create(context->screen, PIPE_BIND_INDEX_BUFFER, 2 * count); - in_map = pipe_buffer_map(context, *elts, - PIPE_TRANSFER_READ, &in_transfer); out_map = pipe_buffer_map(context, new_elts, PIPE_TRANSFER_WRITE, &out_transfer); + util_rebuild_ushort_elts_to_userptr(context, *elts, index_bias, + start, count, out_map); + pipe_buffer_unmap(context, out_transfer); + + *elts = new_elts; +} + + +/* Uint indices. */ +void util_rebuild_uint_elts_to_userptr(struct pipe_context *context, + struct pipe_resource *elts, + int index_bias, + unsigned start, unsigned count, + void *out) +{ + struct pipe_transfer *in_transfer = NULL; + unsigned int *in_map; + unsigned int *out_map = out; + unsigned i; + + in_map = pipe_buffer_map(context, elts, PIPE_TRANSFER_READ, &in_transfer); in_map += start; + for (i = 0; i < count; i++) { - *out_map = (unsigned short)(*in_map + index_bias); + *out_map = (unsigned int)(*in_map + index_bias); in_map++; out_map++; } - pipe_buffer_unmap(context, *elts, in_transfer); - pipe_buffer_unmap(context, new_elts, out_transfer); - - *elts = new_elts; + pipe_buffer_unmap(context, in_transfer); } void util_rebuild_uint_elts(struct pipe_context *context, @@ -97,31 +152,19 @@ void util_rebuild_uint_elts(struct pipe_context *context, int index_bias, unsigned start, unsigned count) { - struct pipe_transfer *in_transfer = NULL; struct pipe_transfer *out_transfer = NULL; struct pipe_resource *new_elts; - unsigned int *in_map; unsigned int *out_map; - unsigned i; new_elts = pipe_buffer_create(context->screen, PIPE_BIND_INDEX_BUFFER, 2 * count); - in_map = pipe_buffer_map(context, *elts, - PIPE_TRANSFER_READ, &in_transfer); out_map = pipe_buffer_map(context, new_elts, PIPE_TRANSFER_WRITE, &out_transfer); - - in_map += start; - for (i = 0; i < count; i++) { - *out_map = (unsigned int)(*in_map + index_bias); - in_map++; - out_map++; - } - - pipe_buffer_unmap(context, *elts, in_transfer); - pipe_buffer_unmap(context, new_elts, out_transfer); + util_rebuild_uint_elts_to_userptr(context, *elts, index_bias, + start, count, out_map); + pipe_buffer_unmap(context, out_transfer); *elts = new_elts; } diff --git a/src/gallium/auxiliary/util/u_index_modify.h b/src/gallium/auxiliary/util/u_index_modify.h index 01a6cae94fc..1e9de3dfac8 100644 --- a/src/gallium/auxiliary/util/u_index_modify.h +++ b/src/gallium/auxiliary/util/u_index_modify.h @@ -23,19 +23,46 @@ #ifndef UTIL_INDEX_MODIFY_H #define UTIL_INDEX_MODIFY_H +struct pipe_context; +struct pipe_resource; + +void util_shorten_ubyte_elts_to_userptr(struct pipe_context *context, + struct pipe_resource *elts, + int index_bias, + unsigned start, + unsigned count, + void *out); + void util_shorten_ubyte_elts(struct pipe_context *context, struct pipe_resource **elts, int index_bias, unsigned start, unsigned count); + + +void util_rebuild_ushort_elts_to_userptr(struct pipe_context *context, + struct pipe_resource *elts, + int index_bias, + unsigned start, unsigned count, + void *out); + void util_rebuild_ushort_elts(struct pipe_context *context, struct pipe_resource **elts, int index_bias, unsigned start, unsigned count); + + +void util_rebuild_uint_elts_to_userptr(struct pipe_context *context, + struct pipe_resource *elts, + int index_bias, + unsigned start, unsigned count, + void *out); + void util_rebuild_uint_elts(struct pipe_context *context, struct pipe_resource **elts, int index_bias, unsigned start, unsigned count); + #endif diff --git a/src/gallium/auxiliary/util/u_inlines.h b/src/gallium/auxiliary/util/u_inlines.h index 6ed39561fbe..b4870bce981 100644 --- a/src/gallium/auxiliary/util/u_inlines.h +++ b/src/gallium/auxiliary/util/u_inlines.h @@ -109,7 +109,7 @@ pipe_surface_reference(struct pipe_surface **ptr, struct pipe_surface *surf) if (pipe_reference_described(&(*ptr)->reference, &surf->reference, (debug_reference_descriptor)debug_describe_surface)) - old_surf->texture->screen->tex_surface_destroy(old_surf); + old_surf->context->surface_destroy(old_surf->context, old_surf); *ptr = surf; } @@ -136,26 +136,28 @@ pipe_sampler_view_reference(struct pipe_sampler_view **ptr, struct pipe_sampler_ } static INLINE void -pipe_surface_reset(struct pipe_surface* ps, struct pipe_resource *pt, - unsigned face, unsigned level, unsigned zslice, unsigned flags) +pipe_surface_reset(struct pipe_context *ctx, struct pipe_surface* ps, + struct pipe_resource *pt, unsigned level, unsigned layer, + unsigned flags) { pipe_resource_reference(&ps->texture, pt); ps->format = pt->format; ps->width = u_minify(pt->width0, level); ps->height = u_minify(pt->height0, level); ps->usage = flags; - ps->face = face; - ps->level = level; - ps->zslice = zslice; + ps->u.tex.level = level; + ps->u.tex.first_layer = ps->u.tex.last_layer = layer; + ps->context = ctx; } static INLINE void -pipe_surface_init(struct pipe_surface* ps, struct pipe_resource *pt, - unsigned face, unsigned level, unsigned zslice, unsigned flags) +pipe_surface_init(struct pipe_context *ctx, struct pipe_surface* ps, + struct pipe_resource *pt, unsigned level, unsigned layer, + unsigned flags) { ps->texture = 0; pipe_reference_init(&ps->reference, 1); - pipe_surface_reset(ps, pt, face, level, zslice, flags); + pipe_surface_reset(ctx, ps, pt, level, layer, flags); } /* @@ -177,6 +179,7 @@ pipe_buffer_create( struct pipe_screen *screen, buffer.width0 = size; buffer.height0 = 1; buffer.depth0 = 1; + buffer.array_size = 1; return screen->resource_create(screen, &buffer); } @@ -202,15 +205,15 @@ pipe_buffer_map_range(struct pipe_context *pipe, assert(offset < buffer->width0); assert(offset + length <= buffer->width0); assert(length); - + u_box_1d(offset, length, &box); *transfer = pipe->get_transfer( pipe, - buffer, - u_subresource(0, 0), - usage, - &box); - + buffer, + 0, + usage, + &box); + if (*transfer == NULL) return NULL; @@ -231,7 +234,7 @@ static INLINE void * pipe_buffer_map(struct pipe_context *pipe, struct pipe_resource *buffer, unsigned usage, - struct pipe_transfer **transfer) + struct pipe_transfer **transfer) { return pipe_buffer_map_range(pipe, buffer, 0, buffer->width0, usage, transfer); } @@ -239,8 +242,7 @@ pipe_buffer_map(struct pipe_context *pipe, static INLINE void pipe_buffer_unmap(struct pipe_context *pipe, - struct pipe_resource *buf, - struct pipe_transfer *transfer) + struct pipe_transfer *transfer) { if (transfer) { pipe->transfer_unmap(pipe, transfer); @@ -250,7 +252,7 @@ pipe_buffer_unmap(struct pipe_context *pipe, static INLINE void pipe_buffer_flush_mapped_range(struct pipe_context *pipe, - struct pipe_transfer *transfer, + struct pipe_transfer *transfer, unsigned offset, unsigned length) { @@ -266,7 +268,7 @@ pipe_buffer_flush_mapped_range(struct pipe_context *pipe, * mapped range. */ transfer_offset = offset - transfer->box.x; - + u_box_1d(transfer_offset, length, &box); pipe->transfer_flush_region(pipe, transfer, &box); @@ -276,7 +278,7 @@ static INLINE void pipe_buffer_write(struct pipe_context *pipe, struct pipe_resource *buf, unsigned offset, - unsigned size, + unsigned size, const void *data) { struct pipe_box box; @@ -284,13 +286,13 @@ pipe_buffer_write(struct pipe_context *pipe, u_box_1d(offset, size, &box); pipe->transfer_inline_write( pipe, - buf, - u_subresource(0,0), - PIPE_TRANSFER_WRITE, - &box, - data, - size, - 0); + buf, + 0, + PIPE_TRANSFER_WRITE, + &box, + data, + size, + 0); } /** @@ -309,21 +311,21 @@ pipe_buffer_write_nooverlap(struct pipe_context *pipe, u_box_1d(offset, size, &box); - pipe->transfer_inline_write(pipe, - buf, - u_subresource(0,0), - (PIPE_TRANSFER_WRITE | - PIPE_TRANSFER_NOOVERWRITE), - &box, - data, - 0, 0); + pipe->transfer_inline_write(pipe, + buf, + 0, + (PIPE_TRANSFER_WRITE | + PIPE_TRANSFER_NOOVERWRITE), + &box, + data, + 0, 0); } static INLINE void pipe_buffer_read(struct pipe_context *pipe, struct pipe_resource *buf, unsigned offset, - unsigned size, + unsigned size, void *data) { struct pipe_transfer *src_transfer; @@ -338,25 +340,24 @@ pipe_buffer_read(struct pipe_context *pipe, if (map) memcpy(data, map + offset, size); - pipe_buffer_unmap(pipe, buf, src_transfer); + pipe_buffer_unmap(pipe, src_transfer); } static INLINE struct pipe_transfer * pipe_get_transfer( struct pipe_context *context, - struct pipe_resource *resource, - unsigned face, unsigned level, - unsigned zslice, - enum pipe_transfer_usage usage, - unsigned x, unsigned y, - unsigned w, unsigned h) + struct pipe_resource *resource, + unsigned level, unsigned layer, + enum pipe_transfer_usage usage, + unsigned x, unsigned y, + unsigned w, unsigned h) { struct pipe_box box; - u_box_2d_zslice( x, y, zslice, w, h, &box ); + u_box_2d_zslice( x, y, layer, w, h, &box ); return context->get_transfer( context, - resource, - u_subresource(face, level), - usage, - &box ); + resource, + level, + usage, + &box ); } static INLINE void * @@ -376,7 +377,7 @@ pipe_transfer_unmap( struct pipe_context *context, static INLINE void pipe_transfer_destroy( struct pipe_context *context, - struct pipe_transfer *transfer ) + struct pipe_transfer *transfer ) { context->transfer_destroy(context, transfer); } @@ -399,6 +400,34 @@ static INLINE boolean util_get_offset( } } +/** + * This function is used to copy an array of pipe_vertex_buffer structures, + * while properly referencing the pipe_vertex_buffer::buffer member. + * + * \sa util_copy_framebuffer_state + */ +static INLINE void util_copy_vertex_buffers(struct pipe_vertex_buffer *dst, + unsigned *dst_count, + const struct pipe_vertex_buffer *src, + unsigned src_count) +{ + unsigned i; + + /* Reference the buffers of 'src' in 'dst'. */ + for (i = 0; i < src_count; i++) { + pipe_resource_reference(&dst[i].buffer, src[i].buffer); + } + /* Unreference the rest of the buffers in 'dst'. */ + for (; i < *dst_count; i++) { + pipe_resource_reference(&dst[i].buffer, NULL); + } + + /* Update the size of 'dst' and copy over the other members + * of pipe_vertex_buffer. */ + *dst_count = src_count; + memcpy(dst, src, src_count * sizeof(struct pipe_vertex_buffer)); +} + #ifdef __cplusplus } #endif diff --git a/src/gallium/auxiliary/util/u_resource.c b/src/gallium/auxiliary/util/u_resource.c index 9e6474b83de..443c9f8067e 100644 --- a/src/gallium/auxiliary/util/u_resource.c +++ b/src/gallium/auxiliary/util/u_resource.c @@ -10,85 +10,85 @@ u_resource( struct pipe_resource *res ) } boolean u_resource_get_handle_vtbl(struct pipe_screen *screen, - struct pipe_resource *resource, - struct winsys_handle *handle) + struct pipe_resource *resource, + struct winsys_handle *handle) { struct u_resource *ur = u_resource(resource); return ur->vtbl->resource_get_handle(screen, resource, handle); } void u_resource_destroy_vtbl(struct pipe_screen *screen, - struct pipe_resource *resource) + struct pipe_resource *resource) { struct u_resource *ur = u_resource(resource); ur->vtbl->resource_destroy(screen, resource); } unsigned u_is_resource_referenced_vtbl( struct pipe_context *pipe, - struct pipe_resource *resource, - unsigned face, unsigned level) + struct pipe_resource *resource, + unsigned level, int layer) { struct u_resource *ur = u_resource(resource); - return ur->vtbl->is_resource_referenced(pipe, resource, face, level); + return ur->vtbl->is_resource_referenced(pipe, resource, level, layer); } struct pipe_transfer *u_get_transfer_vtbl(struct pipe_context *context, - struct pipe_resource *resource, - struct pipe_subresource sr, - enum pipe_transfer_usage usage, - const struct pipe_box *box) + struct pipe_resource *resource, + unsigned level, + enum pipe_transfer_usage usage, + const struct pipe_box *box) { struct u_resource *ur = u_resource(resource); - return ur->vtbl->get_transfer(context, resource, sr, usage, box); + return ur->vtbl->get_transfer(context, resource, level, usage, box); } void u_transfer_destroy_vtbl(struct pipe_context *pipe, - struct pipe_transfer *transfer) + struct pipe_transfer *transfer) { struct u_resource *ur = u_resource(transfer->resource); ur->vtbl->transfer_destroy(pipe, transfer); } void *u_transfer_map_vtbl( struct pipe_context *pipe, - struct pipe_transfer *transfer ) + struct pipe_transfer *transfer ) { struct u_resource *ur = u_resource(transfer->resource); return ur->vtbl->transfer_map(pipe, transfer); } void u_transfer_flush_region_vtbl( struct pipe_context *pipe, - struct pipe_transfer *transfer, - const struct pipe_box *box) + struct pipe_transfer *transfer, + const struct pipe_box *box) { struct u_resource *ur = u_resource(transfer->resource); ur->vtbl->transfer_flush_region(pipe, transfer, box); } void u_transfer_unmap_vtbl( struct pipe_context *pipe, - struct pipe_transfer *transfer ) + struct pipe_transfer *transfer ) { struct u_resource *ur = u_resource(transfer->resource); ur->vtbl->transfer_unmap(pipe, transfer); } void u_transfer_inline_write_vtbl( struct pipe_context *pipe, - struct pipe_resource *resource, - struct pipe_subresource sr, - unsigned usage, - const struct pipe_box *box, - const void *data, - unsigned stride, - unsigned slice_stride) + struct pipe_resource *resource, + unsigned level, + unsigned usage, + const struct pipe_box *box, + const void *data, + unsigned stride, + unsigned layer_stride) { struct u_resource *ur = u_resource(resource); - ur->vtbl->transfer_inline_write(pipe, - resource, - sr, - usage, - box, - data, - stride, - slice_stride); + ur->vtbl->transfer_inline_write(pipe, + resource, + level, + usage, + box, + data, + stride, + layer_stride); } diff --git a/src/gallium/auxiliary/util/u_sampler.c b/src/gallium/auxiliary/util/u_sampler.c index e77f562ea22..bb26099b7e1 100644 --- a/src/gallium/auxiliary/util/u_sampler.c +++ b/src/gallium/auxiliary/util/u_sampler.c @@ -40,8 +40,11 @@ default_template(struct pipe_sampler_view *view, */ view->format = format; - view->first_level = 0; - view->last_level = texture->last_level; + view->u.tex.first_level = 0; + view->u.tex.last_level = texture->last_level; + view->u.tex.first_layer = 0; + view->u.tex.last_layer = texture->target == PIPE_TEXTURE_3D ? + texture->depth0 - 1 : texture->array_size - 1; view->swizzle_r = PIPE_SWIZZLE_RED; view->swizzle_g = PIPE_SWIZZLE_GREEN; view->swizzle_b = PIPE_SWIZZLE_BLUE; diff --git a/src/gallium/auxiliary/util/u_simple_screen.h b/src/gallium/auxiliary/util/u_simple_screen.h index b52232f025c..7139aaabc56 100644 --- a/src/gallium/auxiliary/util/u_simple_screen.h +++ b/src/gallium/auxiliary/util/u_simple_screen.h @@ -57,7 +57,8 @@ struct pipe_winsys * displayed, eg copy fake frontbuffer. */ void (*flush_frontbuffer)( struct pipe_winsys *ws, - struct pipe_surface *surf, + struct pipe_resource *resource, + unsigned level, unsigned layer, void *context_private ); diff --git a/src/gallium/auxiliary/util/u_simple_shaders.c b/src/gallium/auxiliary/util/u_simple_shaders.c index 58ef68377fc..b0f2dd8aa29 100644 --- a/src/gallium/auxiliary/util/u_simple_shaders.c +++ b/src/gallium/auxiliary/util/u_simple_shaders.c @@ -204,7 +204,8 @@ util_make_fragment_tex_shader_writedepth(struct pipe_context *pipe, void * util_make_fragment_passthrough_shader(struct pipe_context *pipe) { - return util_make_fragment_clonecolor_shader(pipe, 1); + return util_make_fragment_cloneinput_shader(pipe, 1, TGSI_SEMANTIC_COLOR, + TGSI_INTERPOLATE_PERSPECTIVE); } @@ -212,7 +213,9 @@ util_make_fragment_passthrough_shader(struct pipe_context *pipe) * Make a fragment shader that copies the input color to N output colors. */ void * -util_make_fragment_clonecolor_shader(struct pipe_context *pipe, int num_cbufs) +util_make_fragment_cloneinput_shader(struct pipe_context *pipe, int num_cbufs, + int input_semantic, + int input_interpolate) { struct ureg_program *ureg; struct ureg_src src; @@ -225,8 +228,8 @@ util_make_fragment_clonecolor_shader(struct pipe_context *pipe, int num_cbufs) if (ureg == NULL) return NULL; - src = ureg_DECL_fs_input( ureg, TGSI_SEMANTIC_COLOR, 0, - TGSI_INTERPOLATE_PERSPECTIVE ); + src = ureg_DECL_fs_input( ureg, input_semantic, 0, + input_interpolate ); for (i = 0; i < num_cbufs; i++) dst[i] = ureg_DECL_output( ureg, TGSI_SEMANTIC_COLOR, i ); diff --git a/src/gallium/auxiliary/util/u_simple_shaders.h b/src/gallium/auxiliary/util/u_simple_shaders.h index 4aa34bc4757..1bfec183e34 100644 --- a/src/gallium/auxiliary/util/u_simple_shaders.h +++ b/src/gallium/auxiliary/util/u_simple_shaders.h @@ -71,7 +71,9 @@ util_make_fragment_passthrough_shader(struct pipe_context *pipe); extern void * -util_make_fragment_clonecolor_shader(struct pipe_context *pipe, int num_cbufs); +util_make_fragment_cloneinput_shader(struct pipe_context *pipe, int num_cbufs, + int input_semantic, + int input_interpolate); #ifdef __cplusplus } diff --git a/src/gallium/auxiliary/util/u_mempool.c b/src/gallium/auxiliary/util/u_slab.c index 1f336b39a1a..21bf2d735ac 100644 --- a/src/gallium/auxiliary/util/u_mempool.c +++ b/src/gallium/auxiliary/util/u_slab.c @@ -20,7 +20,7 @@ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE * USE OR OTHER DEALINGS IN THE SOFTWARE. */ -#include "util/u_mempool.h" +#include "util/u_slab.h" #include "util/u_math.h" #include "util/u_memory.h" @@ -28,13 +28,13 @@ #include <stdio.h> -#define UTIL_MEMPOOL_MAGIC 0xcafe4321 +#define UTIL_SLAB_MAGIC 0xcafe4321 /* The block is either allocated memory or free space. */ -struct util_mempool_block { +struct util_slab_block { /* The header. */ /* The first next free block. */ - struct util_mempool_block *next_free; + struct util_slab_block *next_free; intptr_t magic; @@ -42,19 +42,19 @@ struct util_mempool_block { * The allocated size is always larger than this structure. */ }; -static struct util_mempool_block * -util_mempool_get_block(struct util_mempool *pool, - struct util_mempool_page *page, unsigned index) +static struct util_slab_block * +util_slab_get_block(struct util_slab_mempool *pool, + struct util_slab_page *page, unsigned index) { - return (struct util_mempool_block*) - ((uint8_t*)page + sizeof(struct util_mempool_page) + + return (struct util_slab_block*) + ((uint8_t*)page + sizeof(struct util_slab_page) + (pool->block_size * index)); } -static void util_mempool_add_new_page(struct util_mempool *pool) +static void util_slab_add_new_page(struct util_slab_mempool *pool) { - struct util_mempool_page *page; - struct util_mempool_block *block; + struct util_slab_page *page; + struct util_slab_block *block; int i; page = MALLOC(pool->page_size); @@ -62,15 +62,15 @@ static void util_mempool_add_new_page(struct util_mempool *pool) /* Mark all blocks as free. */ for (i = 0; i < pool->num_blocks-1; i++) { - block = util_mempool_get_block(pool, page, i); - block->next_free = util_mempool_get_block(pool, page, i+1); - block->magic = UTIL_MEMPOOL_MAGIC; + block = util_slab_get_block(pool, page, i); + block->next_free = util_slab_get_block(pool, page, i+1); + block->magic = UTIL_SLAB_MAGIC; } - block = util_mempool_get_block(pool, page, pool->num_blocks-1); + block = util_slab_get_block(pool, page, pool->num_blocks-1); block->next_free = pool->first_free; - block->magic = UTIL_MEMPOOL_MAGIC; - pool->first_free = util_mempool_get_block(pool, page, 0); + block->magic = UTIL_SLAB_MAGIC; + pool->first_free = util_slab_get_block(pool, page, 0); pool->num_pages++; #if 0 @@ -78,74 +78,74 @@ static void util_mempool_add_new_page(struct util_mempool *pool) #endif } -static void *util_mempool_malloc_st(struct util_mempool *pool) +static void *util_slab_alloc_st(struct util_slab_mempool *pool) { - struct util_mempool_block *block; + struct util_slab_block *block; if (!pool->first_free) - util_mempool_add_new_page(pool); + util_slab_add_new_page(pool); block = pool->first_free; - assert(block->magic == UTIL_MEMPOOL_MAGIC); + assert(block->magic == UTIL_SLAB_MAGIC); pool->first_free = block->next_free; - return (uint8_t*)block + sizeof(struct util_mempool_block); + return (uint8_t*)block + sizeof(struct util_slab_block); } -static void util_mempool_free_st(struct util_mempool *pool, void *ptr) +static void util_slab_free_st(struct util_slab_mempool *pool, void *ptr) { - struct util_mempool_block *block = - (struct util_mempool_block*) - ((uint8_t*)ptr - sizeof(struct util_mempool_block)); + struct util_slab_block *block = + (struct util_slab_block*) + ((uint8_t*)ptr - sizeof(struct util_slab_block)); - assert(block->magic == UTIL_MEMPOOL_MAGIC); + assert(block->magic == UTIL_SLAB_MAGIC); block->next_free = pool->first_free; pool->first_free = block; } -static void *util_mempool_malloc_mt(struct util_mempool *pool) +static void *util_slab_alloc_mt(struct util_slab_mempool *pool) { void *mem; pipe_mutex_lock(pool->mutex); - mem = util_mempool_malloc_st(pool); + mem = util_slab_alloc_st(pool); pipe_mutex_unlock(pool->mutex); return mem; } -static void util_mempool_free_mt(struct util_mempool *pool, void *ptr) +static void util_slab_free_mt(struct util_slab_mempool *pool, void *ptr) { pipe_mutex_lock(pool->mutex); - util_mempool_free_st(pool, ptr); + util_slab_free_st(pool, ptr); pipe_mutex_unlock(pool->mutex); } -void util_mempool_set_thread_safety(struct util_mempool *pool, - enum util_mempool_threading threading) +void util_slab_set_thread_safety(struct util_slab_mempool *pool, + enum util_slab_threading threading) { pool->threading = threading; if (threading) { - pool->malloc = util_mempool_malloc_mt; - pool->free = util_mempool_free_mt; + pool->alloc = util_slab_alloc_mt; + pool->free = util_slab_free_mt; } else { - pool->malloc = util_mempool_malloc_st; - pool->free = util_mempool_free_st; + pool->alloc = util_slab_alloc_st; + pool->free = util_slab_free_st; } } -void util_mempool_create(struct util_mempool *pool, - unsigned item_size, - unsigned num_blocks, - enum util_mempool_threading threading) +void util_slab_create(struct util_slab_mempool *pool, + unsigned item_size, + unsigned num_blocks, + enum util_slab_threading threading) { item_size = align(item_size, sizeof(intptr_t)); pool->num_pages = 0; pool->num_blocks = num_blocks; - pool->block_size = sizeof(struct util_mempool_block) + item_size; + pool->block_size = sizeof(struct util_slab_block) + item_size; pool->block_size = align(pool->block_size, sizeof(intptr_t)); - pool->page_size = sizeof(struct util_mempool_page) + + pool->page_size = sizeof(struct util_slab_page) + num_blocks * pool->block_size; pool->first_free = NULL; @@ -153,12 +153,12 @@ void util_mempool_create(struct util_mempool *pool, pipe_mutex_init(pool->mutex); - util_mempool_set_thread_safety(pool, threading); + util_slab_set_thread_safety(pool, threading); } -void util_mempool_destroy(struct util_mempool *pool) +void util_slab_destroy(struct util_slab_mempool *pool) { - struct util_mempool_page *page, *temp; + struct util_slab_page *page, *temp; foreach_s(page, temp, &pool->list) { remove_from_list(page); diff --git a/src/gallium/auxiliary/util/u_mempool.h b/src/gallium/auxiliary/util/u_slab.h index a5b5d6a9b7c..6b9718d08a7 100644 --- a/src/gallium/auxiliary/util/u_mempool.h +++ b/src/gallium/auxiliary/util/u_slab.h @@ -22,66 +22,66 @@ /** * @file - * Simple memory pool for equally sized memory allocations. - * util_mempool_malloc and util_mempool_free are in O(1). + * Simple slab allocator for equally sized memory allocations. + * util_slab_alloc and util_slab_free have time complexity in O(1). * * Good for allocations which have very low lifetime and are allocated - * and freed very often. Use a profiler first! + * and freed very often. Use a profiler first to know if it's worth using it! * * Candidates: get_transfer, user_buffer_create * * @author Marek Olšák */ -#ifndef U_MEMPOOL_H -#define U_MEMPOOL_H +#ifndef U_SLAB_H +#define U_SLAB_H #include "os/os_thread.h" -enum util_mempool_threading { - UTIL_MEMPOOL_SINGLETHREADED = FALSE, - UTIL_MEMPOOL_MULTITHREADED = TRUE +enum util_slab_threading { + UTIL_SLAB_SINGLETHREADED = FALSE, + UTIL_SLAB_MULTITHREADED = TRUE }; /* The page is an array of blocks (allocations). */ -struct util_mempool_page { +struct util_slab_page { /* The header (linked-list pointers). */ - struct util_mempool_page *prev, *next; + struct util_slab_page *prev, *next; /* Memory after the last member is dedicated to the page itself. * The allocated size is always larger than this structure. */ }; -struct util_mempool { +struct util_slab_mempool { /* Public members. */ - void *(*malloc)(struct util_mempool *pool); - void (*free)(struct util_mempool *pool, void *ptr); + void *(*alloc)(struct util_slab_mempool *pool); + void (*free)(struct util_slab_mempool *pool, void *ptr); /* Private members. */ - struct util_mempool_block *first_free; + struct util_slab_block *first_free; - struct util_mempool_page list; + struct util_slab_page list; unsigned block_size; unsigned page_size; unsigned num_blocks; unsigned num_pages; - enum util_mempool_threading threading; + enum util_slab_threading threading; pipe_mutex mutex; }; -void util_mempool_create(struct util_mempool *pool, - unsigned item_size, - unsigned num_blocks, - enum util_mempool_threading threading); +void util_slab_create(struct util_slab_mempool *pool, + unsigned item_size, + unsigned num_blocks, + enum util_slab_threading threading); -void util_mempool_destroy(struct util_mempool *pool); +void util_slab_destroy(struct util_slab_mempool *pool); -void util_mempool_set_thread_safety(struct util_mempool *pool, - enum util_mempool_threading threading); +void util_slab_set_thread_safety(struct util_slab_mempool *pool, + enum util_slab_threading threading); -#define util_mempool_malloc(pool) (pool)->malloc(pool) -#define util_mempool_free(pool, ptr) (pool)->free(pool, ptr) +#define util_slab_alloc(pool) (pool)->alloc(pool) +#define util_slab_free(pool, ptr) (pool)->free(pool, ptr) #endif diff --git a/src/gallium/auxiliary/util/u_staging.c b/src/gallium/auxiliary/util/u_staging.c index c5d68f8df86..b6bf241a22a 100644 --- a/src/gallium/auxiliary/util/u_staging.c +++ b/src/gallium/auxiliary/util/u_staging.c @@ -41,6 +41,7 @@ util_staging_resource_template(struct pipe_resource *pt, unsigned width, unsigne template->width0 = width; template->height0 = height; template->depth0 = depth; + template->array_size = 1; template->last_level = 0; template->nr_samples = pt->nr_samples; template->bind = 0; @@ -51,7 +52,7 @@ util_staging_resource_template(struct pipe_resource *pt, unsigned width, unsigne struct util_staging_transfer * util_staging_transfer_init(struct pipe_context *pipe, struct pipe_resource *pt, - struct pipe_subresource sr, + unsigned level, unsigned usage, const struct pipe_box *box, bool direct, struct util_staging_transfer *tx) @@ -61,7 +62,7 @@ util_staging_transfer_init(struct pipe_context *pipe, struct pipe_resource staging_resource_template; pipe_resource_reference(&tx->base.resource, pt); - tx->base.sr = sr; + tx->base.level = level; tx->base.usage = usage; tx->base.box = *box; @@ -82,12 +83,20 @@ util_staging_transfer_init(struct pipe_context *pipe, if (usage & PIPE_TRANSFER_READ) { - struct pipe_subresource dstsr; + /* XXX this looks wrong dst is always the same but looping over src z? */ unsigned zi; - dstsr.face = 0; - dstsr.level = 0; - for(zi = 0; zi < box->depth; ++zi) - pipe->resource_copy_region(pipe, tx->staging_resource, dstsr, 0, 0, 0, tx->base.resource, sr, box->x, box->y, box->z + zi, box->width, box->height); + struct pipe_box sbox; + sbox.x = box->x; + sbox.y = box->y; + sbox.z = box->z; + sbox.width = box->width; + sbox.height = box->height; + sbox.depth = 1; + for(zi = 0; zi < box->depth; ++zi) { + sbox.z = sbox.z + zi; + pipe->resource_copy_region(pipe, tx->staging_resource, 0, 0, 0, 0, + tx->base.resource, level, &sbox); + } } return tx; @@ -101,12 +110,18 @@ util_staging_transfer_destroy(struct pipe_context *pipe, struct pipe_transfer *p if (tx->staging_resource != tx->base.resource) { if(tx->base.usage & PIPE_TRANSFER_WRITE) { - struct pipe_subresource srcsr; + /* XXX this looks wrong src is always the same but looping over dst z? */ unsigned zi; - srcsr.face = 0; - srcsr.level = 0; + struct pipe_box sbox; + sbox.x = 0; + sbox.y = 0; + sbox.z = 0; + sbox.width = tx->base.box.width; + sbox.height = tx->base.box.height; + sbox.depth = 1; for(zi = 0; zi < tx->base.box.depth; ++zi) - pipe->resource_copy_region(pipe, tx->base.resource, tx->base.sr, tx->base.box.x, tx->base.box.y, tx->base.box.z + zi, tx->staging_resource, srcsr, 0, 0, 0, tx->base.box.width, tx->base.box.height); + pipe->resource_copy_region(pipe, tx->base.resource, tx->base.level, tx->base.box.x, tx->base.box.y, tx->base.box.z + zi, + tx->staging_resource, 0, &sbox); } pipe_resource_reference(&tx->staging_resource, NULL); diff --git a/src/gallium/auxiliary/util/u_staging.h b/src/gallium/auxiliary/util/u_staging.h index 1aab78cc881..49839d25439 100644 --- a/src/gallium/auxiliary/util/u_staging.h +++ b/src/gallium/auxiliary/util/u_staging.h @@ -52,7 +52,7 @@ struct util_staging_transfer { struct util_staging_transfer * util_staging_transfer_init(struct pipe_context *pipe, struct pipe_resource *pt, - struct pipe_subresource sr, + unsigned level, unsigned usage, const struct pipe_box *box, bool direct, struct util_staging_transfer *tx); diff --git a/src/gallium/auxiliary/util/u_surface.c b/src/gallium/auxiliary/util/u_surface.c index f78b6838a72..4eddd3f519e 100644 --- a/src/gallium/auxiliary/util/u_surface.c +++ b/src/gallium/auxiliary/util/u_surface.c @@ -42,6 +42,18 @@ #include "util/u_surface.h" #include "util/u_pack_color.h" +void +u_surface_default_template(struct pipe_surface *view, + const struct pipe_resource *texture, + unsigned bind) +{ + view->format = texture->format; + view->u.tex.level = 0; + view->u.tex.first_layer = 0; + view->u.tex.last_layer = 0; + /* XXX should filter out all non-rt/ds bind flags ? */ + view->usage = bind; +} /** * Helper to quickly create an RGBA rendering surface of a certain size. @@ -50,9 +62,9 @@ * \return TRUE for success, FALSE if failure */ boolean -util_create_rgba_surface(struct pipe_screen *screen, +util_create_rgba_surface(struct pipe_context *pipe, uint width, uint height, - uint bind, + uint bind, struct pipe_resource **textureOut, struct pipe_surface **surfaceOut) { @@ -65,6 +77,8 @@ util_create_rgba_surface(struct pipe_screen *screen, const uint target = PIPE_TEXTURE_2D; enum pipe_format format = PIPE_FORMAT_NONE; struct pipe_resource templ; + struct pipe_surface surf_templ; + struct pipe_screen *screen = pipe->screen; uint i; /* Choose surface format */ @@ -86,17 +100,20 @@ util_create_rgba_surface(struct pipe_screen *screen, templ.width0 = width; templ.height0 = height; templ.depth0 = 1; + templ.array_size = 1; templ.bind = bind; *textureOut = screen->resource_create(screen, &templ); if (!*textureOut) return FALSE; + /* create surface */ + memset(&surf_templ, 0, sizeof(surf_templ)); + u_surface_default_template(&surf_templ, *textureOut, bind); /* create surface / view into texture */ - *surfaceOut = screen->get_tex_surface(screen, - *textureOut, - 0, 0, 0, - bind); + *surfaceOut = pipe->create_surface(pipe, + *textureOut, + &surf_templ); if (!*surfaceOut) { pipe_resource_reference(textureOut, NULL); return FALSE; @@ -126,17 +143,18 @@ util_destroy_rgba_surface(struct pipe_resource *texture, void util_resource_copy_region(struct pipe_context *pipe, struct pipe_resource *dst, - struct pipe_subresource subdst, + unsigned dst_level, unsigned dst_x, unsigned dst_y, unsigned dst_z, struct pipe_resource *src, - struct pipe_subresource subsrc, - unsigned src_x, unsigned src_y, unsigned src_z, - unsigned w, unsigned h) + unsigned src_level, + const struct pipe_box *src_box) { struct pipe_transfer *src_trans, *dst_trans; void *dst_map; const void *src_map; enum pipe_format src_format, dst_format; + unsigned w = src_box->width; + unsigned h = src_box->height; assert(src && dst); if (!src || !dst) @@ -146,20 +164,18 @@ util_resource_copy_region(struct pipe_context *pipe, dst_format = dst->format; src_trans = pipe_get_transfer(pipe, - src, - subsrc.face, - subsrc.level, - src_z, - PIPE_TRANSFER_READ, - src_x, src_y, w, h); + src, + src_level, + src_box->z, + PIPE_TRANSFER_READ, + src_box->x, src_box->y, w, h); dst_trans = pipe_get_transfer(pipe, - dst, - subdst.face, - subdst.level, - src_z, - PIPE_TRANSFER_WRITE, - dst_x, dst_y, w, h); + dst, + dst_level, + dst_z, + PIPE_TRANSFER_WRITE, + dst_x, dst_y, w, h); assert(util_format_get_blocksize(dst_format) == util_format_get_blocksize(src_format)); assert(util_format_get_blockwidth(dst_format) == util_format_get_blockwidth(src_format)); @@ -216,14 +232,13 @@ util_clear_render_target(struct pipe_context *pipe, assert(dst->texture); if (!dst->texture) return; - + /* XXX: should handle multiple layers */ dst_trans = pipe_get_transfer(pipe, - dst->texture, - dst->face, - dst->level, - dst->zslice, - PIPE_TRANSFER_WRITE, - dstx, dsty, width, height); + dst->texture, + dst->u.tex.level, + dst->u.tex.first_layer, + PIPE_TRANSFER_WRITE, + dstx, dsty, width, height); dst_map = pipe->transfer_map(pipe, dst_trans); @@ -271,9 +286,8 @@ util_clear_depth_stencil(struct pipe_context *pipe, return; dst_trans = pipe_get_transfer(pipe, dst->texture, - dst->face, - dst->level, - dst->zslice, + dst->u.tex.level, + dst->u.tex.first_layer, (need_rmw ? PIPE_TRANSFER_READ_WRITE : PIPE_TRANSFER_WRITE), dstx, dsty, width, height); diff --git a/src/gallium/auxiliary/util/u_surface.h b/src/gallium/auxiliary/util/u_surface.h index 6cd12af3a8b..6a7cc82b055 100644 --- a/src/gallium/auxiliary/util/u_surface.h +++ b/src/gallium/auxiliary/util/u_surface.h @@ -33,8 +33,18 @@ #include "pipe/p_state.h" +#ifdef __cplusplus +extern "C" { +#endif + + +extern void +u_surface_default_template(struct pipe_surface *view, + const struct pipe_resource *texture, + unsigned bind); + extern boolean -util_create_rgba_surface(struct pipe_screen *screen, +util_create_rgba_surface(struct pipe_context *ctx, uint width, uint height, uint bind, struct pipe_resource **textureOut, struct pipe_surface **surfaceOut); @@ -49,12 +59,11 @@ util_destroy_rgba_surface(struct pipe_resource *texture, extern void util_resource_copy_region(struct pipe_context *pipe, struct pipe_resource *dst, - struct pipe_subresource subdst, + unsigned dst_level, unsigned dst_x, unsigned dst_y, unsigned dst_z, struct pipe_resource *src, - struct pipe_subresource subsrc, - unsigned src_x, unsigned src_y, unsigned src_z, - unsigned w, unsigned h); + unsigned src_level, + const struct pipe_box *src_box); extern void util_clear_render_target(struct pipe_context *pipe, @@ -73,4 +82,9 @@ util_clear_depth_stencil(struct pipe_context *pipe, unsigned width, unsigned height); +#ifdef __cplusplus +} +#endif + + #endif /* U_SURFACE_H */ diff --git a/src/gallium/auxiliary/util/u_surfaces.c b/src/gallium/auxiliary/util/u_surfaces.c index 404e1219952..b0cfec2a826 100644 --- a/src/gallium/auxiliary/util/u_surfaces.c +++ b/src/gallium/auxiliary/util/u_surfaces.c @@ -29,8 +29,11 @@ #include "util/u_inlines.h" #include "util/u_memory.h" -struct pipe_surface * -util_surfaces_do_get(struct util_surfaces *us, unsigned surface_struct_size, struct pipe_screen *pscreen, struct pipe_resource *pt, unsigned face, unsigned level, unsigned zslice, unsigned flags) +boolean +util_surfaces_do_get(struct util_surfaces *us, unsigned surface_struct_size, + struct pipe_context *ctx, struct pipe_resource *pt, + unsigned level, unsigned layer, unsigned flags, + struct pipe_surface **res) { struct pipe_surface *ps; @@ -39,7 +42,7 @@ util_surfaces_do_get(struct util_surfaces *us, unsigned surface_struct_size, str if(!us->u.hash) us->u.hash = cso_hash_create(); - ps = cso_hash_iter_data(cso_hash_find(us->u.hash, ((zslice + face) << 8) | level)); + ps = cso_hash_iter_data(cso_hash_find(us->u.hash, (layer << 8) | level)); } else { @@ -48,25 +51,29 @@ util_surfaces_do_get(struct util_surfaces *us, unsigned surface_struct_size, str ps = us->u.array[level]; } - if(ps) + if(ps && ps->context == ctx) { p_atomic_inc(&ps->reference.count); - return ps; + *res = ps; + return FALSE; } ps = (struct pipe_surface *)CALLOC(1, surface_struct_size); if(!ps) - return NULL; + { + *res = NULL; + return FALSE; + } - pipe_surface_init(ps, pt, face, level, zslice, flags); - ps->offset = ~0; + pipe_surface_init(ctx, ps, pt, level, layer, flags); if(pt->target == PIPE_TEXTURE_3D || pt->target == PIPE_TEXTURE_CUBE) - cso_hash_insert(us->u.hash, ((zslice + face) << 8) | level, ps); + cso_hash_insert(us->u.hash, (layer << 8) | level, ps); else us->u.array[level] = ps; - return ps; + *res = ps; + return TRUE; } void @@ -75,10 +82,10 @@ util_surfaces_do_detach(struct util_surfaces *us, struct pipe_surface *ps) struct pipe_resource *pt = ps->texture; if(pt->target == PIPE_TEXTURE_3D || pt->target == PIPE_TEXTURE_CUBE) { /* or 2D array */ - cso_hash_erase(us->u.hash, cso_hash_find(us->u.hash, ((ps->zslice + ps->face) << 8) | ps->level)); + cso_hash_erase(us->u.hash, cso_hash_find(us->u.hash, (ps->u.tex.first_layer << 8) | ps->u.tex.level)); } else - us->u.array[ps->level] = 0; + us->u.array[ps->u.tex.level] = 0; } void diff --git a/src/gallium/auxiliary/util/u_surfaces.h b/src/gallium/auxiliary/util/u_surfaces.h index 17d8a5d3a5b..9581feda7c8 100644 --- a/src/gallium/auxiliary/util/u_surfaces.h +++ b/src/gallium/auxiliary/util/u_surfaces.h @@ -42,33 +42,42 @@ struct util_surfaces } u; }; -struct pipe_surface *util_surfaces_do_get(struct util_surfaces *us, unsigned surface_struct_size, struct pipe_screen *pscreen, struct pipe_resource *pt, unsigned face, unsigned level, unsigned zslice, unsigned flags); +/* Return value indicates if the pipe surface result is new */ +boolean +util_surfaces_do_get(struct util_surfaces *us, unsigned surface_struct_size, + struct pipe_context *ctx, struct pipe_resource *pt, + unsigned level, unsigned layer, unsigned flags, + struct pipe_surface **res); /* fast inline path for the very common case */ -static INLINE struct pipe_surface * -util_surfaces_get(struct util_surfaces *us, unsigned surface_struct_size, struct pipe_screen *pscreen, struct pipe_resource *pt, unsigned face, unsigned level, unsigned zslice, unsigned flags) +static INLINE boolean +util_surfaces_get(struct util_surfaces *us, unsigned surface_struct_size, + struct pipe_context *ctx, struct pipe_resource *pt, + unsigned level, unsigned layer, unsigned flags, + struct pipe_surface **res) { if(likely((pt->target == PIPE_TEXTURE_2D || pt->target == PIPE_TEXTURE_RECT) && us->u.array)) { struct pipe_surface *ps = us->u.array[level]; - if(ps) + if(ps && ps->context == ctx) { p_atomic_inc(&ps->reference.count); - return ps; + *res = ps; + return FALSE; } } - return util_surfaces_do_get(us, surface_struct_size, pscreen, pt, face, level, zslice, flags); + return util_surfaces_do_get(us, surface_struct_size, ctx, pt, level, layer, flags, res); } static INLINE struct pipe_surface * -util_surfaces_peek(struct util_surfaces *us, struct pipe_resource *pt, unsigned face, unsigned level, unsigned zslice) +util_surfaces_peek(struct util_surfaces *us, struct pipe_resource *pt, unsigned level, unsigned layer) { if(!us->u.pv) return 0; if(unlikely(pt->target == PIPE_TEXTURE_3D || pt->target == PIPE_TEXTURE_CUBE)) - return cso_hash_iter_data(cso_hash_find(us->u.hash, ((zslice + face) << 8) | level)); + return cso_hash_iter_data(cso_hash_find(us->u.hash, (layer << 8) | level)); else return us->u.array[level]; } @@ -80,7 +89,7 @@ util_surfaces_detach(struct util_surfaces *us, struct pipe_surface *ps) { if(likely(ps->texture->target == PIPE_TEXTURE_2D || ps->texture->target == PIPE_TEXTURE_RECT)) { - us->u.array[ps->level] = 0; + us->u.array[ps->u.tex.level] = 0; return; } diff --git a/src/gallium/auxiliary/util/u_transfer.c b/src/gallium/auxiliary/util/u_transfer.c index 69f6fab9504..e2828cfd99e 100644 --- a/src/gallium/auxiliary/util/u_transfer.c +++ b/src/gallium/auxiliary/util/u_transfer.c @@ -8,22 +8,24 @@ * pointer. XXX: strides?? */ void u_default_transfer_inline_write( struct pipe_context *pipe, - struct pipe_resource *resource, - struct pipe_subresource sr, - unsigned usage, - const struct pipe_box *box, - const void *data, - unsigned stride, - unsigned slice_stride) + struct pipe_resource *resource, + unsigned level, + unsigned usage, + const struct pipe_box *box, + const void *data, + unsigned stride, + unsigned layer_stride) { struct pipe_transfer *transfer = NULL; uint8_t *map = NULL; - - transfer = pipe->get_transfer(pipe, - resource, - sr, - usage, - box ); + const uint8_t *src_data = data; + unsigned i; + + transfer = pipe->get_transfer(pipe, + resource, + level, + usage, + box ); if (transfer == NULL) goto out; @@ -31,17 +33,19 @@ void u_default_transfer_inline_write( struct pipe_context *pipe, if (map == NULL) goto out; - assert(box->depth == 1); /* XXX: fix me */ - - util_copy_rect(map, - resource->format, - transfer->stride, /* bytes */ - 0, 0, - box->width, - box->height, - data, - stride, /* bytes */ - 0, 0); + for (i = 0; i < box->depth; i++) { + util_copy_rect(map, + resource->format, + transfer->stride, /* bytes */ + 0, 0, + box->width, + box->height, + src_data, + stride, /* bytes */ + 0, 0); + map += transfer->layer_stride; + src_data += layer_stride; + } out: if (map) @@ -53,8 +57,8 @@ out: boolean u_default_resource_get_handle(struct pipe_screen *screen, - struct pipe_resource *resource, - struct winsys_handle *handle) + struct pipe_resource *resource, + struct winsys_handle *handle) { return FALSE; } @@ -62,32 +66,32 @@ boolean u_default_resource_get_handle(struct pipe_screen *screen, void u_default_transfer_flush_region( struct pipe_context *pipe, - struct pipe_transfer *transfer, - const struct pipe_box *box) + struct pipe_transfer *transfer, + const struct pipe_box *box) { /* This is a no-op implementation, nothing to do. */ } unsigned u_default_is_resource_referenced( struct pipe_context *pipe, - struct pipe_resource *resource, - unsigned face, unsigned level) + struct pipe_resource *resource, + unsigned level, int layer) { return 0; } struct pipe_transfer * u_default_get_transfer(struct pipe_context *context, - struct pipe_resource *resource, - struct pipe_subresource sr, - unsigned usage, - const struct pipe_box *box) + struct pipe_resource *resource, + unsigned level, + unsigned usage, + const struct pipe_box *box) { struct pipe_transfer *transfer = CALLOC_STRUCT(pipe_transfer); if (transfer == NULL) return NULL; transfer->resource = resource; - transfer->sr = sr; + transfer->level = level; transfer->usage = usage; transfer->box = *box; @@ -98,12 +102,12 @@ struct pipe_transfer * u_default_get_transfer(struct pipe_context *context, } void u_default_transfer_unmap( struct pipe_context *pipe, - struct pipe_transfer *transfer ) + struct pipe_transfer *transfer ) { } void u_default_transfer_destroy(struct pipe_context *pipe, - struct pipe_transfer *transfer) + struct pipe_transfer *transfer) { FREE(transfer); } diff --git a/src/gallium/auxiliary/util/u_transfer.h b/src/gallium/auxiliary/util/u_transfer.h index e3a38730f21..3412e13c3cc 100644 --- a/src/gallium/auxiliary/util/u_transfer.h +++ b/src/gallium/auxiliary/util/u_transfer.h @@ -11,37 +11,37 @@ struct pipe_context; struct winsys_handle; boolean u_default_resource_get_handle(struct pipe_screen *screen, - struct pipe_resource *resource, - struct winsys_handle *handle); + struct pipe_resource *resource, + struct winsys_handle *handle); void u_default_transfer_inline_write( struct pipe_context *pipe, - struct pipe_resource *resource, - struct pipe_subresource sr, - unsigned usage, - const struct pipe_box *box, - const void *data, - unsigned stride, - unsigned slice_stride); + struct pipe_resource *resource, + unsigned level, + unsigned usage, + const struct pipe_box *box, + const void *data, + unsigned stride, + unsigned layer_stride); void u_default_transfer_flush_region( struct pipe_context *pipe, - struct pipe_transfer *transfer, - const struct pipe_box *box); + struct pipe_transfer *transfer, + const struct pipe_box *box); unsigned u_default_is_resource_referenced( struct pipe_context *pipe, - struct pipe_resource *resource, - unsigned face, unsigned level); + struct pipe_resource *resource, + unsigned level, int layer); struct pipe_transfer * u_default_get_transfer(struct pipe_context *context, - struct pipe_resource *resource, - struct pipe_subresource sr, - unsigned usage, - const struct pipe_box *box); + struct pipe_resource *resource, + unsigned level, + unsigned usage, + const struct pipe_box *box); void u_default_transfer_unmap( struct pipe_context *pipe, - struct pipe_transfer *transfer ); + struct pipe_transfer *transfer ); void u_default_transfer_destroy(struct pipe_context *pipe, - struct pipe_transfer *transfer); + struct pipe_transfer *transfer); @@ -51,43 +51,43 @@ void u_default_transfer_destroy(struct pipe_context *pipe, struct u_resource_vtbl { boolean (*resource_get_handle)(struct pipe_screen *, - struct pipe_resource *tex, - struct winsys_handle *handle); + struct pipe_resource *tex, + struct winsys_handle *handle); void (*resource_destroy)(struct pipe_screen *, - struct pipe_resource *pt); + struct pipe_resource *pt); unsigned (*is_resource_referenced)(struct pipe_context *pipe, - struct pipe_resource *texture, - unsigned face, unsigned level); + struct pipe_resource *texture, + unsigned level, int layer); struct pipe_transfer *(*get_transfer)(struct pipe_context *, - struct pipe_resource *resource, - struct pipe_subresource, - unsigned usage, - const struct pipe_box *); + struct pipe_resource *resource, + unsigned level, + unsigned usage, + const struct pipe_box *); void (*transfer_destroy)(struct pipe_context *, - struct pipe_transfer *); + struct pipe_transfer *); void *(*transfer_map)( struct pipe_context *, - struct pipe_transfer *transfer ); + struct pipe_transfer *transfer ); void (*transfer_flush_region)( struct pipe_context *, - struct pipe_transfer *transfer, - const struct pipe_box *); + struct pipe_transfer *transfer, + const struct pipe_box *); void (*transfer_unmap)( struct pipe_context *, - struct pipe_transfer *transfer ); + struct pipe_transfer *transfer ); void (*transfer_inline_write)( struct pipe_context *pipe, - struct pipe_resource *resource, - struct pipe_subresource sr, - unsigned usage, - const struct pipe_box *box, - const void *data, - unsigned stride, - unsigned slice_stride); + struct pipe_resource *resource, + unsigned level, + unsigned usage, + const struct pipe_box *box, + const void *data, + unsigned stride, + unsigned layer_stride); }; @@ -98,43 +98,43 @@ struct u_resource { boolean u_resource_get_handle_vtbl(struct pipe_screen *screen, - struct pipe_resource *resource, - struct winsys_handle *handle); + struct pipe_resource *resource, + struct winsys_handle *handle); void u_resource_destroy_vtbl(struct pipe_screen *screen, - struct pipe_resource *resource); + struct pipe_resource *resource); unsigned u_is_resource_referenced_vtbl( struct pipe_context *pipe, - struct pipe_resource *resource, - unsigned face, unsigned level); + struct pipe_resource *resource, + unsigned level, int layer); struct pipe_transfer *u_get_transfer_vtbl(struct pipe_context *context, - struct pipe_resource *resource, - struct pipe_subresource sr, - unsigned usage, - const struct pipe_box *box); + struct pipe_resource *resource, + unsigned level, + unsigned usage, + const struct pipe_box *box); void u_transfer_destroy_vtbl(struct pipe_context *pipe, - struct pipe_transfer *transfer); + struct pipe_transfer *transfer); void *u_transfer_map_vtbl( struct pipe_context *pipe, - struct pipe_transfer *transfer ); + struct pipe_transfer *transfer ); void u_transfer_flush_region_vtbl( struct pipe_context *pipe, - struct pipe_transfer *transfer, - const struct pipe_box *box); + struct pipe_transfer *transfer, + const struct pipe_box *box); void u_transfer_unmap_vtbl( struct pipe_context *rm_ctx, - struct pipe_transfer *transfer ); + struct pipe_transfer *transfer ); void u_transfer_inline_write_vtbl( struct pipe_context *rm_ctx, - struct pipe_resource *resource, - struct pipe_subresource sr, - unsigned usage, - const struct pipe_box *box, - const void *data, - unsigned stride, - unsigned slice_stride); + struct pipe_resource *resource, + unsigned level, + unsigned usage, + const struct pipe_box *box, + const void *data, + unsigned stride, + unsigned layer_stride); diff --git a/src/gallium/auxiliary/util/u_upload_mgr.c b/src/gallium/auxiliary/util/u_upload_mgr.c index af229e61a00..3b3d5b418fe 100644 --- a/src/gallium/auxiliary/util/u_upload_mgr.c +++ b/src/gallium/auxiliary/util/u_upload_mgr.c @@ -41,22 +41,23 @@ struct u_upload_mgr { struct pipe_context *pipe; - unsigned default_size; - unsigned alignment; - unsigned usage; - - /* The active buffer: - */ - struct pipe_resource *buffer; - unsigned size; - unsigned offset; + unsigned default_size; /* Minimum size of the upload buffer, in bytes. */ + unsigned alignment; /* Alignment of each sub-allocation. */ + unsigned bind; /* Bitmask of PIPE_BIND_* flags. */ + + struct pipe_resource *buffer; /* Upload buffer. */ + struct pipe_transfer *transfer; /* Transfer object for the upload buffer. */ + uint8_t *map; /* Pointer to the mapped upload buffer. */ + unsigned size; /* Actual size of the upload buffer. */ + unsigned offset; /* Aligned offset to the upload buffer, pointing + * at the first unused byte. */ }; struct u_upload_mgr *u_upload_create( struct pipe_context *pipe, unsigned default_size, unsigned alignment, - unsigned usage ) + unsigned bind ) { struct u_upload_mgr *upload = CALLOC_STRUCT( u_upload_mgr ); if (!upload) @@ -65,54 +66,12 @@ struct u_upload_mgr *u_upload_create( struct pipe_context *pipe, upload->pipe = pipe; upload->default_size = default_size; upload->alignment = alignment; - upload->usage = usage; + upload->bind = bind; upload->buffer = NULL; return upload; } -/* Slightly specialized version of buffer_write designed to maximize - * chances of the driver consolidating successive writes into a single - * upload. - * - * dirty_size may be slightly greater than size to cope with - * alignment. We don't want to leave holes between succesively mapped - * regions as that may prevent the driver from consolidating uploads. - * - * Note that the 'data' pointer has probably come from the application - * and we cannot read even a byte past its end without risking - * segfaults, or at least complaints from valgrind.. - */ -static INLINE enum pipe_error -my_buffer_write(struct pipe_context *pipe, - struct pipe_resource *buf, - unsigned offset, unsigned size, unsigned dirty_size, - const void *data) -{ - struct pipe_transfer *transfer = NULL; - uint8_t *map; - - assert(offset < buf->width0); - assert(offset + size <= buf->width0); - assert(dirty_size >= size); - assert(size); - - map = pipe_buffer_map_range(pipe, buf, offset, dirty_size, - PIPE_TRANSFER_WRITE | - PIPE_TRANSFER_FLUSH_EXPLICIT | - PIPE_TRANSFER_DISCARD | - PIPE_TRANSFER_UNSYNCHRONIZED, - &transfer); - if (map == NULL) - return PIPE_ERROR_OUT_OF_MEMORY; - - memcpy(map + offset, data, size); - pipe_buffer_flush_mapped_range(pipe, transfer, offset, dirty_size); - pipe_buffer_unmap(pipe, buf, transfer); - - return PIPE_OK; -} - /* Release old buffer. * * This must usually be called prior to firing the command stream @@ -124,6 +83,11 @@ my_buffer_write(struct pipe_context *pipe, */ void u_upload_flush( struct u_upload_mgr *upload ) { + /* Unmap and unreference the upload buffer. */ + if (upload->transfer) { + pipe_transfer_unmap(upload->pipe, upload->transfer); + upload->transfer = NULL; + } pipe_resource_reference( &upload->buffer, NULL ); upload->size = 0; } @@ -142,7 +106,7 @@ u_upload_alloc_buffer( struct u_upload_mgr *upload, { unsigned size; - /* Release old buffer, if present: + /* Release the old buffer, if present: */ u_upload_flush( upload ); @@ -151,10 +115,14 @@ u_upload_alloc_buffer( struct u_upload_mgr *upload, size = align(MAX2(upload->default_size, min_size), 4096); upload->buffer = pipe_buffer_create( upload->pipe->screen, - upload->usage, + upload->bind, size ); if (upload->buffer == NULL) goto fail; + + /* Map the new buffer. */ + upload->map = pipe_buffer_map(upload->pipe, upload->buffer, + PIPE_TRANSFER_WRITE, &upload->transfer); upload->size = size; @@ -168,38 +136,62 @@ fail: return PIPE_ERROR_OUT_OF_MEMORY; } - -enum pipe_error u_upload_data( struct u_upload_mgr *upload, - unsigned size, - const void *data, - unsigned *out_offset, - struct pipe_resource **outbuf ) +enum pipe_error u_upload_alloc( struct u_upload_mgr *upload, + unsigned min_out_offset, + unsigned size, + unsigned *out_offset, + struct pipe_resource **outbuf, + boolean *flushed, + void **ptr ) { unsigned alloc_size = align( size, upload->alignment ); - enum pipe_error ret = PIPE_OK; + unsigned alloc_offset = align(min_out_offset, upload->alignment); + unsigned offset; - if (upload->offset + alloc_size > upload->size) { - ret = u_upload_alloc_buffer( upload, alloc_size ); + /* Make sure we have enough space in the upload buffer + * for the sub-allocation. */ + if (MAX2(upload->offset, alloc_offset) + alloc_size > upload->size) { + enum pipe_error ret = u_upload_alloc_buffer(upload, + alloc_offset + alloc_size); if (ret) return ret; + + *flushed = TRUE; + } else { + *flushed = FALSE; } - /* Copy the data, using map_range if available: - */ - ret = my_buffer_write( upload->pipe, - upload->buffer, - upload->offset, - size, - alloc_size, - data ); + offset = MAX2(upload->offset, alloc_offset); + + assert(offset < upload->buffer->width0); + assert(offset + size <= upload->buffer->width0); + assert(size); + + /* Emit the return values: */ + *ptr = upload->map + offset; + pipe_resource_reference( outbuf, upload->buffer ); + *out_offset = offset; + + upload->offset = offset + alloc_size; + return PIPE_OK; +} + +enum pipe_error u_upload_data( struct u_upload_mgr *upload, + unsigned min_out_offset, + unsigned size, + const void *data, + unsigned *out_offset, + struct pipe_resource **outbuf, + boolean *flushed ) +{ + uint8_t *ptr; + enum pipe_error ret = u_upload_alloc(upload, min_out_offset, size, + out_offset, outbuf, flushed, + (void**)&ptr); if (ret) return ret; - /* Emit the return values: - */ - pipe_resource_reference( outbuf, upload->buffer ); - *out_offset = upload->offset; - upload->offset += alloc_size; + memcpy(ptr, data, size); return PIPE_OK; } @@ -210,11 +202,13 @@ enum pipe_error u_upload_data( struct u_upload_mgr *upload, * renders or DrawElements calls. */ enum pipe_error u_upload_buffer( struct u_upload_mgr *upload, + unsigned min_out_offset, unsigned offset, unsigned size, struct pipe_resource *inbuf, unsigned *out_offset, - struct pipe_resource **outbuf ) + struct pipe_resource **outbuf, + boolean *flushed ) { enum pipe_error ret = PIPE_OK; struct pipe_transfer *transfer = NULL; @@ -233,17 +227,16 @@ enum pipe_error u_upload_buffer( struct u_upload_mgr *upload, if (0) debug_printf("upload ptr %p ofs %d sz %d\n", map, offset, size); - ret = u_upload_data( upload, + ret = u_upload_data( upload, + min_out_offset, size, map + offset, out_offset, - outbuf ); - if (ret) - goto done; + outbuf, flushed ); done: if (map) - pipe_buffer_unmap( upload->pipe, inbuf, transfer ); + pipe_buffer_unmap( upload->pipe, transfer ); return ret; } diff --git a/src/gallium/auxiliary/util/u_upload_mgr.h b/src/gallium/auxiliary/util/u_upload_mgr.h index de016df02e0..c9a2ffeb572 100644 --- a/src/gallium/auxiliary/util/u_upload_mgr.h +++ b/src/gallium/auxiliary/util/u_upload_mgr.h @@ -32,15 +32,28 @@ #ifndef U_UPLOAD_MGR_H #define U_UPLOAD_MGR_H +#include "pipe/p_compiler.h" + struct pipe_context; struct pipe_resource; +/** + * Create the upload manager. + * + * \param pipe Pipe driver. + * \param default_size Minimum size of the upload buffer, in bytes. + * \param alignment Alignment of each suballocation in the upload buffer. + * \param bind Bitmask of PIPE_BIND_* flags. + */ struct u_upload_mgr *u_upload_create( struct pipe_context *pipe, unsigned default_size, unsigned alignment, - unsigned usage ); + unsigned bind ); +/** + * Destroy the upload manager. + */ void u_upload_destroy( struct u_upload_mgr *upload ); /* Unmap and release old buffer. @@ -53,20 +66,55 @@ void u_upload_destroy( struct u_upload_mgr *upload ); */ void u_upload_flush( struct u_upload_mgr *upload ); +/** + * Sub-allocate new memory from the upload buffer. + * + * \param upload Upload manager + * \param min_out_offset Minimum offset that should be returned in out_offset. + * \param size Size of the allocation. + * \param out_offset Pointer to where the new buffer offset will be returned. + * \param outbuf Pointer to where the upload buffer will be returned. + * \param flushed Whether the upload buffer was flushed. + * \param ptr Pointer to the allocated memory that is returned. + */ +enum pipe_error u_upload_alloc( struct u_upload_mgr *upload, + unsigned min_out_offset, + unsigned size, + unsigned *out_offset, + struct pipe_resource **outbuf, + boolean *flushed, + void **ptr ); + +/** + * Allocate and write data to the upload buffer. + * + * Same as u_upload_alloc, but in addition to that, it copies "data" + * to the pointer returned from u_upload_alloc. + */ enum pipe_error u_upload_data( struct u_upload_mgr *upload, + unsigned min_out_offset, unsigned size, const void *data, unsigned *out_offset, - struct pipe_resource **outbuf ); + struct pipe_resource **outbuf, + boolean *flushed ); +/** + * Allocate and copy an input buffer to the upload buffer. + * + * Same as u_upload_data, except that the input data comes from a buffer + * instead of a user pointer. + */ enum pipe_error u_upload_buffer( struct u_upload_mgr *upload, + unsigned min_out_offset, unsigned offset, unsigned size, struct pipe_resource *inbuf, unsigned *out_offset, - struct pipe_resource **outbuf ); + struct pipe_resource **outbuf, + boolean *flushed ); diff --git a/src/gallium/docs/d3d11ddi.txt b/src/gallium/docs/d3d11ddi.txt index f8155c828b1..11e77190895 100644 --- a/src/gallium/docs/d3d11ddi.txt +++ b/src/gallium/docs/d3d11ddi.txt @@ -162,8 +162,8 @@ CreateDepthStencilState -> create_depth_stencil_alpha_state + Gallium has per-face writemask/valuemasks, D3D11 uses the same value for back and front + Gallium supports the alpha test, which D3D11 lacks -CreateDepthStencilView -> get_tex_surface -CreateRenderTargetView -> get_tex_surface +CreateDepthStencilView -> create_surface +CreateRenderTargetView -> create_surface ! Gallium merges depthstencil and rendertarget views into pipe_surface, which also doubles as a 2D surface abstraction - lack of texture array support - lack of render-to-buffer support @@ -221,7 +221,6 @@ CreateResource -> texture_create or buffer_create ! D3D11 specifies mapping flags (i.e. read/write/discard);:it's unclear what they are used for here - D3D11 supports odd things in the D3D10_DDI_RESOURCE_MISC_FLAG enum (D3D10_DDI_RESOURCE_MISC_DISCARD_ON_PRESENT, D3D11_DDI_RESOURCE_MISC_BUFFER_ALLOW_RAW_VIEWS, D3D11_DDI_RESOURCE_MISC_BUFFER_STRUCTURED) - Gallium does not support indirect draw call parameter buffers - - Gallium lacks array textures ! D3D11 supports specifying hardware modes and other stuff here for scanout resources + Gallium allows specifying minimum buffer alignment ! D3D11 implements cube maps as 2D array textures diff --git a/src/gallium/docs/source/context.rst b/src/gallium/docs/source/context.rst index e09a1304c4d..c33cf7c5738 100644 --- a/src/gallium/docs/source/context.rst +++ b/src/gallium/docs/source/context.rst @@ -84,7 +84,14 @@ in the result register. For example, ``swizzle_r`` specifies what is going to be placed in first component of result register. The ``first_level`` and ``last_level`` fields of sampler view template specify -the LOD range the texture is going to be constrained to. +the LOD range the texture is going to be constrained to. Note that these +values are in addition to the respective min_lod, max_lod values in the +pipe_sampler_state (that is if min_lod is 2.0, and first_level 3, the first mip +level used for sampling from the resource is effectively the fifth). + +The ``first_layer`` and ``last_layer`` fields specify the layer range the +texture is going to be constrained to. Similar to the LOD range, this is added +to the array index which is used for sampling. * ``set_fragment_sampler_views`` binds an array of sampler views to fragment shader stage. Every binding point acquires a reference @@ -103,6 +110,22 @@ the LOD range the texture is going to be constrained to. * ``sampler_view_destroy`` destroys a sampler view and releases its reference to associated texture. +Surfaces +^^^^^^^^ + +These are the means to use resources as color render targets or depthstencil +attachments. To create one, specify the mip level, the range of layers, and +the bind flags (either PIPE_BIND_DEPTH_STENCIL or PIPE_BIND_RENDER_TARGET). +Note that layer values are in addition to what is indicated by the geometry +shader output variable XXX_FIXME (that is if first_layer is 3 and geometry +shader indicates index 2, the 5th layer of the resource will be used). These +first_layer and last_layer parameters will only be used for 1d array, 2d array, +cube, and 3d textures otherwise they are 0. + +* ``create_surface`` creates a new surface. + +* ``surface_destroy`` destroys a surface and releases its reference to the + associated resource. Clearing ^^^^^^^^ @@ -118,8 +141,7 @@ used by GL), and always clears the whole surfaces (no scissoring as used by GL clear or explicit rectangles like d3d9 uses). It can, however, also clear only depth or stencil in a combined depth/stencil surface, if the driver supports PIPE_CAP_DEPTHSTENCIL_CLEAR_SEPARATE. -If a surface includes several layers/slices (XXX: not yet...) then all layers -will be cleared. +If a surface includes several layers then all layers will be cleared. ``clear_render_target`` clears a single color rendertarget with the specified color value. While it is only possible to clear one surface at a time (which can @@ -271,12 +293,12 @@ These methods operate directly on ``pipe_resource`` objects, and stand apart from any 3D state in the context. Blitting functionality may be moved to a separate abstraction at some point in the future. -``resource_copy_region`` blits a region of a subresource of a resource to a -region of another subresource of a resource, provided that both resources have -the same format, or compatible formats, i.e., formats for which copying the -bytes from the source resource unmodified to the destination resource will -achieve the same effect of a textured quad blitter. The source and destination -may be the same resource, but overlapping blits are not permitted. +``resource_copy_region`` blits a region of a resource to a region of another +resource, provided that both resources have the same format, or compatible +formats, i.e., formats for which copying the bytes from the source resource +unmodified to the destination resource will achieve the same effect of a +textured quad blitter.. The source and destination may be the same resource, +but overlapping blits are not permitted. ``resource_resolve`` resolves a multisampled resource into a non-multisampled one. Formats and dimensions must match. This function must be present if a driver diff --git a/src/gallium/docs/source/glossary.rst b/src/gallium/docs/source/glossary.rst index acde56eafc4..c749d0c9556 100644 --- a/src/gallium/docs/source/glossary.rst +++ b/src/gallium/docs/source/glossary.rst @@ -22,6 +22,14 @@ Glossary Level of Detail. Also spelled "LoD." The value that determines when the switches between mipmaps occur during texture sampling. + layer + This term is used as the name of the "3rd coordinate" of a resource. + 3D textures have zslices, cube maps have faces, 1D and 2D array textures + have array members (other resources do not have multiple layers). + Since the functions only take one parameter no matter what type of + resource is used, use the term "layer" instead of a resource type + specific one. + GLSL GL Shading Language. The official, common high-level shader language used in GL 2.0 and above. diff --git a/src/gallium/docs/source/screen.rst b/src/gallium/docs/source/screen.rst index a62520061ea..ab90097add6 100644 --- a/src/gallium/docs/source/screen.rst +++ b/src/gallium/docs/source/screen.rst @@ -259,25 +259,33 @@ resource_create Create a new resource from a template. The following fields of the pipe_resource must be specified in the template: -target +**target** one of the pipe_texture_target enums. +Note that PIPE_BUFFER and PIPE_TEXTURE_X are not really fundamentally different. +Modern APIs allow using buffers as shader resources. -format +**format** one of the pipe_format enums. -width0 +**width0** the width of the base mip level of the texture or size of the buffer. -height0 +**height0** the height of the base mip level of the texture +(1 for 1D or 1D array textures). -depth0 +**depth0** the depth of the base mip level of the texture +(1 for everything else). -last_level +**array_size the array size for 1D and 2D array textures. +For cube maps this must be 6, for other textures 1. -nr_samples +**last_level** the last mip map level present. -usage +**nr_samples** the nr of msaa samples. 0 (or 1) specifies a resource +which isn't multisampled. -bind +**usage** one of the PIPE_USAGE flags. -flags +**bind** bitmask of the PIPE_BIND flags. + +**flags** bitmask of PIPE_RESOURCE_FLAG flags. diff --git a/src/gallium/docs/source/tgsi.rst b/src/gallium/docs/source/tgsi.rst index d99ed7c6d6b..d986e6601e6 100644 --- a/src/gallium/docs/source/tgsi.rst +++ b/src/gallium/docs/source/tgsi.rst @@ -577,17 +577,45 @@ This instruction replicates its result. .. opcode:: TEX - Texture Lookup - TBD +.. math:: + + coord = src0 + + bias = 0.0 + + dst = texture_sample(unit, coord, bias) .. opcode:: TXD - Texture Lookup with Derivatives - TBD +.. math:: + + coord = src0 + + ddx = src1 + + ddy = src2 + + bias = 0.0 + + dst = texture_sample_deriv(unit, coord, bias, ddx, ddy) .. opcode:: TXP - Projective Texture Lookup - TBD +.. math:: + + coord.x = src0.x / src.w + + coord.y = src0.y / src.w + + coord.z = src0.z / src.w + + coord.w = src0.w + + bias = 0.0 + + dst = texture_sample(unit, coord, bias) .. opcode:: UP2H - Unpack Two 16-Bit Floats @@ -729,7 +757,19 @@ This instruction replicates its result. .. opcode:: TXB - Texture Lookup With Bias - TBD +.. math:: + + coord.x = src.x + + coord.y = src.y + + coord.z = src.z + + coord.w = 1.0 + + bias = src.z + + dst = texture_sample(unit, coord, bias) .. opcode:: NRM - 3-component Vector Normalise @@ -767,9 +807,21 @@ This instruction replicates its result. dst = src0.x \times src1.x + src0.y \times src1.y -.. opcode:: TXL - Texture Lookup With LOD +.. opcode:: TXL - Texture Lookup With explicit LOD - TBD +.. math:: + + coord.x = src0.x + + coord.y = src0.y + + coord.z = src0.z + + coord.w = 1.0 + + lod = src0.w + + dst = texture_sample(unit, coord, lod) .. opcode:: BRK - Break @@ -1464,6 +1516,11 @@ GL_ARB_fragment_coord_conventions extension. DirectX 9 uses INTEGER. DirectX 10 uses HALF_INTEGER. +FS_COLOR0_WRITES_ALL_CBUFS +"""""""""""""""""""""""""" +Specifies that writes to the fragment shader color 0 are replicated to all +bound cbufs. This facilitates OpenGL's fragColor output vs fragData[0] where +fragData is directed to a single color buffer, but fragColor is broadcast. Texture Sampling and Texture Formats diff --git a/src/gallium/drivers/cell/ppu/cell_context.c b/src/gallium/drivers/cell/ppu/cell_context.c index 143eca848f1..f9b83c8666c 100644 --- a/src/gallium/drivers/cell/ppu/cell_context.c +++ b/src/gallium/drivers/cell/ppu/cell_context.c @@ -37,6 +37,7 @@ #include "pipe/p_format.h" #include "util/u_memory.h" #include "pipe/p_screen.h" +#include "util/u_inlines.h" #include "draw/draw_context.h" #include "draw/draw_private.h" @@ -61,6 +62,11 @@ static void cell_destroy_context( struct pipe_context *pipe ) { struct cell_context *cell = cell_context(pipe); + unsigned i; + + for (i = 0; i < cell->num_vertex_buffers; i++) { + pipe_resource_reference(&cell->vertex_buffer[i].buffer, NULL); + } util_delete_keymap(cell->fragment_ops_cache, NULL); @@ -100,8 +106,8 @@ static const struct debug_named_value cell_debug_flags[] = { static unsigned int cell_is_resource_referenced( struct pipe_context *pipe, - struct pipe_resource *texture, - unsigned face, unsigned level) + struct pipe_resource *texture, + unsigned level, int layer) { /** * FIXME: Optimize. diff --git a/src/gallium/drivers/cell/ppu/cell_state_vertex.c b/src/gallium/drivers/cell/ppu/cell_state_vertex.c index a065d68b5a6..eb22a09a913 100644 --- a/src/gallium/drivers/cell/ppu/cell_state_vertex.c +++ b/src/gallium/drivers/cell/ppu/cell_state_vertex.c @@ -82,8 +82,9 @@ cell_set_vertex_buffers(struct pipe_context *pipe, assert(count <= PIPE_MAX_ATTRIBS); - memcpy(cell->vertex_buffer, buffers, count * sizeof(buffers[0])); - cell->num_vertex_buffers = count; + util_copy_vertex_buffers(cell->vertex_buffer, + &cell->num_vertex_buffers, + buffers, count); cell->dirty |= CELL_NEW_VERTEX; diff --git a/src/gallium/drivers/cell/ppu/cell_texture.c b/src/gallium/drivers/cell/ppu/cell_texture.c index b3042df7792..946a7050e5f 100644 --- a/src/gallium/drivers/cell/ppu/cell_texture.c +++ b/src/gallium/drivers/cell/ppu/cell_texture.c @@ -304,47 +304,34 @@ untwiddle_image_uint(uint w, uint h, uint tile_size, uint *dst, static struct pipe_surface * -cell_get_tex_surface(struct pipe_screen *screen, - struct pipe_resource *pt, - unsigned face, unsigned level, unsigned zslice, - unsigned usage) +cell_create_surface(struct pipe_context *ctx, + struct pipe_resource *pt, + const struct pipe_surface *surf_tmpl) { struct cell_resource *ct = cell_resource(pt); struct pipe_surface *ps; + assert(surf_tmpl->u.tex.first_layer == surf_tmpl->u.tex.last_layer); ps = CALLOC_STRUCT(pipe_surface); if (ps) { pipe_reference_init(&ps->reference, 1); pipe_resource_reference(&ps->texture, pt); - ps->format = pt->format; - ps->width = u_minify(pt->width0, level); - ps->height = u_minify(pt->height0, level); - ps->offset = ct->level_offset[level]; + ps->format = surf_tmpl->format; + ps->context = ctx; + ps->width = u_minify(pt->width0, surf_tmpl->u.tex.level); + ps->height = u_minify(pt->height0, surf_tmpl->u.tex.level); /* XXX may need to override usage flags (see sp_texture.c) */ - ps->usage = usage; - ps->face = face; - ps->level = level; - ps->zslice = zslice; - - if (pt->target == PIPE_TEXTURE_CUBE) { - unsigned h_tile = align(ps->height, TILE_SIZE); - ps->offset += face * util_format_get_nblocksy(ps->format, h_tile) * ct->stride[level]; - } - else if (pt->target == PIPE_TEXTURE_3D) { - unsigned h_tile = align(ps->height, TILE_SIZE); - ps->offset += zslice * util_format_get_nblocksy(ps->format, h_tile) * ct->stride[level]; - } - else { - assert(face == 0); - assert(zslice == 0); - } + ps->usage = surf_tmpl->usage; + ps->u.tex.level = surf_tmpl->u.tex.level; + ps->u.tex.first_layer = surf_tmpl->u.tex.first_layer; + ps->u.tex.last_layer = surf_tmpl->u.tex.last_layer; } return ps; } static void -cell_tex_surface_destroy(struct pipe_surface *surf) +cell_surface_destroy(struct pipe_context *ctx, struct pipe_surface *surf) { pipe_resource_reference(&surf->texture, NULL); FREE(surf); @@ -358,44 +345,39 @@ cell_tex_surface_destroy(struct pipe_surface *surf) */ static struct pipe_transfer * cell_get_transfer(struct pipe_context *ctx, - struct pipe_resource *resource, - struct pipe_subresource sr, - unsigned usage, - const struct pipe_box *box) + struct pipe_resource *resource, + unsigned level, + unsigned usage, + const struct pipe_box *box) { struct cell_resource *ct = cell_resource(resource); struct cell_transfer *ctrans; enum pipe_format format = resource->format; assert(resource); - assert(sr.level <= resource->last_level); + assert(level <= resource->last_level); /* make sure the requested region is in the image bounds */ - assert(box->x + box->width <= u_minify(resource->width0, sr.level)); - assert(box->y + box->height <= u_minify(resource->height0, sr.level)); - assert(box->z + box->depth <= u_minify(resource->depth0, sr.level)); + assert(box->x + box->width <= u_minify(resource->width0, level)); + assert(box->y + box->height <= u_minify(resource->height0, level)); + assert(box->z + box->depth <= (u_minify(resource->depth0, level) + resource->array_size - 1)); ctrans = CALLOC_STRUCT(cell_transfer); if (ctrans) { struct pipe_transfer *pt = &ctrans->base; pipe_resource_reference(&pt->resource, resource); - pt->sr = sr; + pt->level = level; pt->usage = usage; pt->box = *box; - pt->stride = ct->stride[sr.level]; + pt->stride = ct->stride[level]; - ctrans->offset = ct->level_offset[sr.level]; + ctrans->offset = ct->level_offset[level]; - if (resource->target == PIPE_TEXTURE_CUBE) { - unsigned h_tile = align(u_minify(resource->height0, sr.level), TILE_SIZE); - ctrans->offset += sr.face * util_format_get_nblocksy(format, h_tile) * pt->stride; - } - else if (resource->target == PIPE_TEXTURE_3D) { - unsigned h_tile = align(u_minify(resource->height0, sr.level), TILE_SIZE); + if (resource->target == PIPE_TEXTURE_CUBE || resource->target == PIPE_TEXTURE_3D) { + unsigned h_tile = align(u_minify(resource->height0, level), TILE_SIZE); ctrans->offset += box->z * util_format_get_nblocksy(format, h_tile) * pt->stride; } else { - assert(sr.face == 0); assert(box->z == 0); } @@ -439,7 +421,7 @@ cell_transfer_map(struct pipe_context *ctx, struct pipe_transfer *transfer) /* Better test would be resource->is_linear */ if (transfer->resource->target != PIPE_BUFFER) { - const uint level = ctrans->base.sr.level; + const uint level = ctrans->base.level; const uint texWidth = u_minify(pt->width0, level); const uint texHeight = u_minify(pt->height0, level); unsigned size; @@ -500,7 +482,7 @@ cell_transfer_unmap(struct pipe_context *ctx, struct cell_transfer *ctrans = cell_transfer(transfer); struct pipe_resource *pt = transfer->resource; struct cell_resource *ct = cell_resource(pt); - const uint level = ctrans->base.sr.level; + const uint level = ctrans->base.level; const uint texWidth = u_minify(pt->width0, level); const uint texHeight = u_minify(pt->height0, level); const uint stride = ct->stride[level]; @@ -548,12 +530,13 @@ cell_transfer_unmap(struct pipe_context *ctx, */ static void cell_flush_frontbuffer(struct pipe_screen *_screen, - struct pipe_surface *surface, + struct pipe_resource *resource, + unsigned level, unsigned layer, void *context_private) { struct cell_screen *screen = cell_screen(_screen); struct sw_winsys *winsys = screen->winsys; - struct cell_resource *ct = cell_resource(surface->texture); + struct cell_resource *ct = cell_resource(resource); if (!ct->dt) return; @@ -564,10 +547,10 @@ cell_flush_frontbuffer(struct pipe_screen *_screen, unsigned *map = winsys->displaytarget_map(winsys, ct->dt, (PIPE_TRANSFER_READ | PIPE_TRANSFER_WRITE)); - unsigned *src = (unsigned *)(ct->data + ct->level_offset[surface->level]); + unsigned *src = (unsigned *)(ct->data + ct->level_offset[level]); - untwiddle_image_uint(surface->width, - surface->height, + untwiddle_image_uint(u_minify(resource->width0, level), + u_minify(resource->height0, level), TILE_SIZE, map, ct->dt_stride, @@ -605,6 +588,7 @@ cell_user_buffer_create(struct pipe_screen *screen, buffer->base.width0 = bytes; buffer->base.height0 = 1; buffer->base.depth0 = 1; + buffer->base.array_size = 1; buffer->userBuffer = TRUE; buffer->data = ptr; @@ -641,9 +625,6 @@ cell_init_screen_texture_funcs(struct pipe_screen *screen) screen->resource_get_handle = cell_resource_get_handle; screen->user_buffer_create = cell_user_buffer_create; - screen->get_tex_surface = cell_get_tex_surface; - screen->tex_surface_destroy = cell_tex_surface_destroy; - screen->flush_frontbuffer = cell_flush_frontbuffer; } @@ -657,4 +638,7 @@ cell_init_texture_transfer_funcs(struct cell_context *cell) cell->pipe.transfer_flush_region = u_default_transfer_flush_region; cell->pipe.transfer_inline_write = u_default_transfer_inline_write; + + cell->pipe.create_surface = cell_create_surface; + cell->pipe.surface_destroy = cell_surface_destroy; } diff --git a/src/gallium/drivers/failover/fo_context.c b/src/gallium/drivers/failover/fo_context.c index ec3609291e9..d60718d9716 100644 --- a/src/gallium/drivers/failover/fo_context.c +++ b/src/gallium/drivers/failover/fo_context.c @@ -29,6 +29,7 @@ #include "pipe/p_defines.h" #include "util/u_memory.h" #include "pipe/p_context.h" +#include "util/u_inlines.h" #include "fo_context.h" #include "fo_winsys.h" @@ -38,6 +39,11 @@ static void failover_destroy( struct pipe_context *pipe ) { struct failover_context *failover = failover_context( pipe ); + unsigned i; + + for (i = 0; i < failover->num_vertex_buffers; i++) { + pipe_resource_reference(&failover->vertex_buffers[i].buffer, NULL); + } FREE( failover ); } @@ -89,14 +95,14 @@ static void failover_draw_vbo( struct pipe_context *pipe, static unsigned int failover_is_resource_referenced( struct pipe_context *_pipe, - struct pipe_resource *resource, - unsigned face, unsigned level) + struct pipe_resource *resource, + unsigned level, int layer) { struct failover_context *failover = failover_context( _pipe ); struct pipe_context *pipe = (failover->mode == FO_HW) ? failover->hw : failover->sw; - return pipe->is_resource_referenced(pipe, resource, face, level); + return pipe->is_resource_referenced(pipe, resource, level, layer); } struct pipe_context *failover_create( struct pipe_context *hw, @@ -137,10 +143,10 @@ struct pipe_context *failover_create( struct pipe_context *hw, failover->pipe.resource_copy_region = hw->resource_copy_region; #if 0 - failover->pipe.texture_create = hw->texture_create; - failover->pipe.texture_destroy = hw->texture_destroy; - failover->pipe.get_tex_surface = hw->get_tex_surface; - failover->pipe.texture_update = hw->texture_update; + failover->pipe.resource_create = hw->resource_create; + failover->pipe.resource_destroy = hw->resource_destroy; + failover->pipe.create_surface = hw->create_surface; + failover->pipe.surface_destroy = hw->surface_destroy; #endif failover->pipe.flush = hw->flush; diff --git a/src/gallium/drivers/failover/fo_state.c b/src/gallium/drivers/failover/fo_state.c index c265f381b67..af1fd953aaf 100644 --- a/src/gallium/drivers/failover/fo_state.c +++ b/src/gallium/drivers/failover/fo_state.c @@ -574,10 +574,10 @@ failover_set_vertex_buffers(struct pipe_context *pipe, { struct failover_context *failover = failover_context(pipe); - memcpy(failover->vertex_buffers, vertex_buffers, - count * sizeof(vertex_buffers[0])); + util_copy_vertex_buffers(failover->vertex_buffers, + &failover->num_vertex_buffers, + vertex_buffers, count); failover->dirty |= FO_NEW_VERTEX_BUFFER; - failover->num_vertex_buffers = count; failover->sw->set_vertex_buffers( failover->sw, count, vertex_buffers ); failover->hw->set_vertex_buffers( failover->hw, count, vertex_buffers ); } diff --git a/src/gallium/drivers/galahad/glhd_context.c b/src/gallium/drivers/galahad/glhd_context.c index 50f66079c2a..8cbf0b1de4a 100644 --- a/src/gallium/drivers/galahad/glhd_context.c +++ b/src/gallium/drivers/galahad/glhd_context.c @@ -381,6 +381,8 @@ galahad_create_vertex_elements_state(struct pipe_context *_pipe, struct galahad_context *glhd_pipe = galahad_context(_pipe); struct pipe_context *pipe = glhd_pipe->pipe; + /* XXX check if stride lines up with element size, at least for floats */ + return pipe->create_vertex_elements_state(pipe, num_elements, vertex_elements); @@ -662,17 +664,13 @@ galahad_set_index_buffer(struct pipe_context *_pipe, static void galahad_resource_copy_region(struct pipe_context *_pipe, struct pipe_resource *_dst, - struct pipe_subresource subdst, + unsigned dst_level, unsigned dstx, unsigned dsty, unsigned dstz, struct pipe_resource *_src, - struct pipe_subresource subsrc, - unsigned srcx, - unsigned srcy, - unsigned srcz, - unsigned width, - unsigned height) + unsigned src_level, + const struct pipe_box *src_box) { struct galahad_context *glhd_pipe = galahad_context(_pipe); struct galahad_resource *glhd_resource_dst = galahad_resource(_dst); @@ -689,17 +687,13 @@ galahad_resource_copy_region(struct pipe_context *_pipe, pipe->resource_copy_region(pipe, dst, - subdst, + dst_level, dstx, dsty, dstz, src, - subsrc, - srcx, - srcy, - srcz, - width, - height); + src_level, + src_box); } static void @@ -781,8 +775,8 @@ galahad_flush(struct pipe_context *_pipe, static unsigned int galahad_is_resource_referenced(struct pipe_context *_pipe, struct pipe_resource *_resource, - unsigned face, - unsigned level) + unsigned level, + int layer) { struct galahad_context *glhd_pipe = galahad_context(_pipe); struct galahad_resource *glhd_resource = galahad_resource(_resource); @@ -791,8 +785,8 @@ galahad_is_resource_referenced(struct pipe_context *_pipe, return pipe->is_resource_referenced(pipe, resource, - face, - level); + level, + layer); } static struct pipe_sampler_view * @@ -823,10 +817,40 @@ galahad_context_sampler_view_destroy(struct pipe_context *_pipe, galahad_sampler_view(_view)); } +static struct pipe_surface * +galahad_context_create_surface(struct pipe_context *_pipe, + struct pipe_resource *_resource, + const struct pipe_surface *templ) +{ + struct galahad_context *glhd_context = galahad_context(_pipe); + struct galahad_resource *glhd_resource = galahad_resource(_resource); + struct pipe_context *pipe = glhd_context->pipe; + struct pipe_resource *resource = glhd_resource->resource; + struct pipe_surface *result; + + result = pipe->create_surface(pipe, + resource, + templ); + + if (result) + return galahad_surface_create(glhd_context, glhd_resource, result); + return NULL; +} + +static void +galahad_context_surface_destroy(struct pipe_context *_pipe, + struct pipe_surface *_surface) +{ + galahad_surface_destroy(galahad_context(_pipe), + galahad_surface(_surface)); +} + + + static struct pipe_transfer * galahad_context_get_transfer(struct pipe_context *_context, struct pipe_resource *_resource, - struct pipe_subresource sr, + unsigned level, unsigned usage, const struct pipe_box *box) { @@ -838,7 +862,7 @@ galahad_context_get_transfer(struct pipe_context *_context, result = context->get_transfer(context, resource, - sr, + level, usage, box); @@ -915,7 +939,7 @@ galahad_context_transfer_unmap(struct pipe_context *_context, static void galahad_context_transfer_inline_write(struct pipe_context *_context, struct pipe_resource *_resource, - struct pipe_subresource sr, + unsigned level, unsigned usage, const struct pipe_box *box, const void *data, @@ -929,7 +953,7 @@ galahad_context_transfer_inline_write(struct pipe_context *_context, context->transfer_inline_write(context, resource, - sr, + level, usage, box, data, @@ -1004,6 +1028,8 @@ galahad_context_create(struct pipe_screen *_screen, struct pipe_context *pipe) glhd_pipe->base.is_resource_referenced = galahad_is_resource_referenced; glhd_pipe->base.create_sampler_view = galahad_context_create_sampler_view; glhd_pipe->base.sampler_view_destroy = galahad_context_sampler_view_destroy; + glhd_pipe->base.create_surface = galahad_context_create_surface; + glhd_pipe->base.surface_destroy = galahad_context_surface_destroy; glhd_pipe->base.get_transfer = galahad_context_get_transfer; glhd_pipe->base.transfer_destroy = galahad_context_transfer_destroy; glhd_pipe->base.transfer_map = galahad_context_transfer_map; diff --git a/src/gallium/drivers/galahad/glhd_objects.c b/src/gallium/drivers/galahad/glhd_objects.c index 6c5a21ae704..b50d85655e8 100644 --- a/src/gallium/drivers/galahad/glhd_objects.c +++ b/src/gallium/drivers/galahad/glhd_objects.c @@ -71,7 +71,8 @@ galahad_resource_destroy(struct galahad_resource *glhd_resource) struct pipe_surface * -galahad_surface_create(struct galahad_resource *glhd_resource, +galahad_surface_create(struct galahad_context *glhd_context, + struct galahad_resource *glhd_resource, struct pipe_surface *surface) { struct galahad_surface *glhd_surface; @@ -100,10 +101,11 @@ error: } void -galahad_surface_destroy(struct galahad_surface *glhd_surface) +galahad_surface_destroy(struct galahad_context *glhd_context, + struct galahad_surface *glhd_surface) { pipe_resource_reference(&glhd_surface->base.texture, NULL); - pipe_surface_reference(&glhd_surface->surface, NULL); + glhd_context->pipe->surface_destroy(glhd_context->pipe, glhd_surface->surface); FREE(glhd_surface); } diff --git a/src/gallium/drivers/galahad/glhd_objects.h b/src/gallium/drivers/galahad/glhd_objects.h index dc74c5bebc9..13dc7485887 100644 --- a/src/gallium/drivers/galahad/glhd_objects.h +++ b/src/gallium/drivers/galahad/glhd_objects.h @@ -149,11 +149,13 @@ void galahad_resource_destroy(struct galahad_resource *glhd_resource); struct pipe_surface * -galahad_surface_create(struct galahad_resource *glhd_resource, +galahad_surface_create(struct galahad_context *glhd_context, + struct galahad_resource *glhd_resource, struct pipe_surface *surface); void -galahad_surface_destroy(struct galahad_surface *glhd_surface); +galahad_surface_destroy(struct galahad_context *glhd_context, + struct galahad_surface *glhd_surface); struct pipe_sampler_view * galahad_sampler_view_create(struct galahad_context *glhd_context, diff --git a/src/gallium/drivers/galahad/glhd_screen.c b/src/gallium/drivers/galahad/glhd_screen.c index b6cc41d908b..b4825bef66d 100644 --- a/src/gallium/drivers/galahad/glhd_screen.c +++ b/src/gallium/drivers/galahad/glhd_screen.c @@ -223,39 +223,6 @@ galahad_screen_resource_destroy(struct pipe_screen *screen, galahad_resource_destroy(galahad_resource(_resource)); } -static struct pipe_surface * -galahad_screen_get_tex_surface(struct pipe_screen *_screen, - struct pipe_resource *_resource, - unsigned face, - unsigned level, - unsigned zslice, - unsigned usage) -{ - struct galahad_screen *glhd_screen = galahad_screen(_screen); - struct galahad_resource *glhd_resource = galahad_resource(_resource); - struct pipe_screen *screen = glhd_screen->screen; - struct pipe_resource *resource = glhd_resource->resource; - struct pipe_surface *result; - - result = screen->get_tex_surface(screen, - resource, - face, - level, - zslice, - usage); - - if (result) - return galahad_surface_create(glhd_resource, result); - return NULL; -} - -static void -galahad_screen_tex_surface_destroy(struct pipe_surface *_surface) -{ - galahad_surface_destroy(galahad_surface(_surface)); -} - - static struct pipe_resource * galahad_screen_user_buffer_create(struct pipe_screen *_screen, @@ -281,16 +248,18 @@ galahad_screen_user_buffer_create(struct pipe_screen *_screen, static void galahad_screen_flush_frontbuffer(struct pipe_screen *_screen, - struct pipe_surface *_surface, + struct pipe_resource *_resource, + unsigned level, unsigned layer, void *context_private) { struct galahad_screen *glhd_screen = galahad_screen(_screen); - struct galahad_surface *glhd_surface = galahad_surface(_surface); + struct galahad_resource *glhd_resource = galahad_resource(_resource); struct pipe_screen *screen = glhd_screen->screen; - struct pipe_surface *surface = glhd_surface->surface; + struct pipe_resource *resource = glhd_resource->resource; screen->flush_frontbuffer(screen, - surface, + resource, + level, layer, context_private); } @@ -360,8 +329,6 @@ galahad_screen_create(struct pipe_screen *screen) glhd_screen->base.resource_from_handle = galahad_screen_resource_from_handle; glhd_screen->base.resource_get_handle = galahad_screen_resource_get_handle; glhd_screen->base.resource_destroy = galahad_screen_resource_destroy; - glhd_screen->base.get_tex_surface = galahad_screen_get_tex_surface; - glhd_screen->base.tex_surface_destroy = galahad_screen_tex_surface_destroy; glhd_screen->base.user_buffer_create = galahad_screen_user_buffer_create; glhd_screen->base.flush_frontbuffer = galahad_screen_flush_frontbuffer; glhd_screen->base.fence_reference = galahad_screen_fence_reference; diff --git a/src/gallium/drivers/i915/TODO b/src/gallium/drivers/i915/TODO new file mode 100644 index 00000000000..94c428bebf8 --- /dev/null +++ b/src/gallium/drivers/i915/TODO @@ -0,0 +1,25 @@ +Random list of problems with i915g: + +- Dies with BadDrawable on GLXFBconfig changes/destruction. Makes piglit totally + unusable :( Upgrading xserver helped here, it doesn't crash anymore. Still + broken, it doesn't update the viewport/get new buffers. + +- Tends to hang the chip after a few minutes of openarena. Looks tiling related, + at the last frame rendered has tiling corruption over the complete frame. + +- Kills the chip in 3D_PRIMITIVE LINELIST with mesa-demos/fbotexture in + wireframe mode. + +- Tiling is funny: If unlucky, it renders/samples all black. No clue yet what's + going on. Seems to depend on tiny details like whethever the sampler + relocation is fenced/unfenced (broken _with_ fenced reloc using tiling bits!). + +- Y-tiling is even more fun. i915c doesn't use it, maybe there's a reason? + Texture sampling from Y-tiled buffers seems to work, though (save above + problems). + +- Need to validate buffers before usage. Currently do_exec on the batchbuffer + can fail with -ENOSPC. + +Other bugs can be found here: +https://bugs.freedesktop.org/buglist.cgi?bug_status=NEW&bug_status=ASSIGNED&bug_status=REOPENED&component=Drivers/Gallium/i915g diff --git a/src/gallium/drivers/i915/i915_batch.h b/src/gallium/drivers/i915/i915_batch.h index c411b84ccd4..6e93da76209 100644 --- a/src/gallium/drivers/i915/i915_batch.h +++ b/src/gallium/drivers/i915/i915_batch.h @@ -38,7 +38,10 @@ i915_winsys_batchbuffer_dword(i915->batch, dword) #define OUT_RELOC(buf, usage, offset) \ - i915_winsys_batchbuffer_reloc(i915->batch, buf, usage, offset) + i915_winsys_batchbuffer_reloc(i915->batch, buf, usage, offset, false) + +#define OUT_RELOC_FENCED(buf, usage, offset) \ + i915_winsys_batchbuffer_reloc(i915->batch, buf, usage, offset, true) #define FLUSH_BATCH(fence) \ i915_flush(i915, fence) diff --git a/src/gallium/drivers/i915/i915_batchbuffer.h b/src/gallium/drivers/i915/i915_batchbuffer.h index c1cd314e7b8..d92b2ccb31e 100644 --- a/src/gallium/drivers/i915/i915_batchbuffer.h +++ b/src/gallium/drivers/i915/i915_batchbuffer.h @@ -29,42 +29,47 @@ #define I915_BATCHBUFFER_H #include "i915_winsys.h" +#include "util/u_debug.h" struct i915_context; +static INLINE size_t +i915_winsys_batchbuffer_space(struct i915_winsys_batchbuffer *batch) +{ + return batch->size - (batch->ptr - batch->map); +} + static INLINE boolean i915_winsys_batchbuffer_check(struct i915_winsys_batchbuffer *batch, size_t dwords, size_t relocs) { - return dwords * 4 <= batch->size - (batch->ptr - batch->map) && + return dwords * 4 <= i915_winsys_batchbuffer_space(batch) && relocs <= (batch->max_relocs - batch->relocs); } -static INLINE size_t -i915_winsys_batchbuffer_space(struct i915_winsys_batchbuffer *batch) +static INLINE void +i915_winsys_batchbuffer_dword_unchecked(struct i915_winsys_batchbuffer *batch, + unsigned dword) { - return batch->size - (batch->ptr - batch->map); + *(unsigned *)batch->ptr = dword; + batch->ptr += 4; } static INLINE void i915_winsys_batchbuffer_dword(struct i915_winsys_batchbuffer *batch, unsigned dword) { - if (i915_winsys_batchbuffer_space(batch) < 4) - return; - - *(unsigned *)batch->ptr = dword; - batch->ptr += 4; + assert (i915_winsys_batchbuffer_space(batch) >= 4); + i915_winsys_batchbuffer_dword_unchecked(batch, dword); } static INLINE void i915_winsys_batchbuffer_write(struct i915_winsys_batchbuffer *batch, - void *data, - size_t size) + void *data, + size_t size) { - if (i915_winsys_batchbuffer_space(batch) < size) - return; + assert (i915_winsys_batchbuffer_space(batch) >= size); memcpy(data, batch->ptr, size); batch->ptr += size; @@ -74,9 +79,9 @@ static INLINE int i915_winsys_batchbuffer_reloc(struct i915_winsys_batchbuffer *batch, struct i915_winsys_buffer *buffer, enum i915_winsys_buffer_usage usage, - size_t offset) + size_t offset, bool fenced) { - return batch->iws->batchbuffer_reloc(batch, buffer, usage, offset); + return batch->iws->batchbuffer_reloc(batch, buffer, usage, offset, fenced); } #endif diff --git a/src/gallium/drivers/i915/i915_blit.c b/src/gallium/drivers/i915/i915_blit.c index cdf20c0055a..97c25665156 100644 --- a/src/gallium/drivers/i915/i915_blit.c +++ b/src/gallium/drivers/i915/i915_blit.c @@ -74,7 +74,7 @@ i915_fill_blit(struct i915_context *i915, OUT_BATCH(BR13); OUT_BATCH((y << 16) | x); OUT_BATCH(((y + h) << 16) | (x + w)); - OUT_RELOC(dst_buffer, I915_USAGE_2D_TARGET, dst_offset); + OUT_RELOC_FENCED(dst_buffer, I915_USAGE_2D_TARGET, dst_offset); OUT_BATCH(color); } @@ -138,8 +138,8 @@ i915_copy_blit(struct i915_context *i915, OUT_BATCH(BR13); OUT_BATCH((dst_y << 16) | dst_x); OUT_BATCH((dst_y2 << 16) | dst_x2); - OUT_RELOC(dst_buffer, I915_USAGE_2D_TARGET, dst_offset); + OUT_RELOC_FENCED(dst_buffer, I915_USAGE_2D_TARGET, dst_offset); OUT_BATCH((src_y << 16) | src_x); OUT_BATCH(((int) src_pitch & 0xffff)); - OUT_RELOC(src_buffer, I915_USAGE_2D_SOURCE, src_offset); + OUT_RELOC_FENCED(src_buffer, I915_USAGE_2D_SOURCE, src_offset); } diff --git a/src/gallium/drivers/i915/i915_context.c b/src/gallium/drivers/i915/i915_context.c index 847dd6dd47e..9be31619254 100644 --- a/src/gallium/drivers/i915/i915_context.c +++ b/src/gallium/drivers/i915/i915_context.c @@ -107,6 +107,10 @@ static void i915_destroy(struct pipe_context *pipe) if(i915->batch) i915->iws->batchbuffer_destroy(i915->batch); + for (i = 0; i < i915->num_vertex_buffers; i++) { + pipe_resource_reference(&i915->vertex_buffer[i].buffer, NULL); + } + /* unbind framebuffer */ for (i = 0; i < PIPE_MAX_COLOR_BUFS; i++) { pipe_surface_reference(&i915->framebuffer.cbufs[i], NULL); diff --git a/src/gallium/drivers/i915/i915_context.h b/src/gallium/drivers/i915/i915_context.h index 3ae61d0ea70..d15e1723d83 100644 --- a/src/gallium/drivers/i915/i915_context.h +++ b/src/gallium/drivers/i915/i915_context.h @@ -193,8 +193,7 @@ struct i915_velems_state { }; -struct i915_context -{ +struct i915_context { struct pipe_context base; struct i915_winsys *iws; @@ -273,7 +272,7 @@ struct i915_context #define I915_HW_PROGRAM (1<<I915_CACHE_PROGRAM) #define I915_HW_CONSTANTS (1<<I915_CACHE_CONSTANTS) #define I915_HW_IMMEDIATE (1<<(I915_MAX_CACHE+0)) -#define I915_HW_INVARIENT (1<<(I915_MAX_CACHE+1)) +#define I915_HW_INVARIANT (1<<(I915_MAX_CACHE+1)) /*********************************************************************** diff --git a/src/gallium/drivers/i915/i915_debug.c b/src/gallium/drivers/i915/i915_debug.c index 57d3390dea3..87c435a2f36 100644 --- a/src/gallium/drivers/i915/i915_debug.c +++ b/src/gallium/drivers/i915/i915_debug.c @@ -46,10 +46,12 @@ static const struct debug_named_value debug_options[] = { }; unsigned i915_debug = 0; +boolean i915_tiling = TRUE; void i915_debug_init(struct i915_screen *screen) { i915_debug = debug_get_flags_option("I915_DEBUG", debug_options, 0); + i915_tiling = !debug_get_bool_option("I915_NO_TILING", FALSE); } @@ -974,7 +976,7 @@ i915_dump_hardware_dirty(struct i915_context *i915, const char *func) {I915_HW_PROGRAM, "program"}, {I915_HW_CONSTANTS, "constants"}, {I915_HW_IMMEDIATE, "immediate"}, - {I915_HW_INVARIENT, "invarient"}, + {I915_HW_INVARIANT, "invariant"}, {0, NULL}, }; int i; diff --git a/src/gallium/drivers/i915/i915_debug.h b/src/gallium/drivers/i915/i915_debug.h index fa60799d0c5..11af7662f0a 100644 --- a/src/gallium/drivers/i915/i915_debug.h +++ b/src/gallium/drivers/i915/i915_debug.h @@ -46,6 +46,7 @@ struct i915_winsys_batchbuffer; #define DBG_CONSTANTS 0x20 extern unsigned i915_debug; +extern boolean i915_tiling; #ifdef DEBUG static INLINE boolean diff --git a/src/gallium/drivers/i915/i915_fpc_translate.c b/src/gallium/drivers/i915/i915_fpc_translate.c index 25c53210be8..9e20010c4a1 100644 --- a/src/gallium/drivers/i915/i915_fpc_translate.c +++ b/src/gallium/drivers/i915/i915_fpc_translate.c @@ -924,6 +924,14 @@ i915_translate_instructions(struct i915_fp_compile *p, tgsi_parse_token( &parse ); switch( parse.FullToken.Token.Type ) { + case TGSI_TOKEN_TYPE_PROPERTY: + /* + * We only support one cbuf, but we still need to ignore the property + * correctly so we don't hit the assert at the end of the switch case. + */ + assert(parse.FullToken.FullProperty.Property.PropertyName == + TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS); + break; case TGSI_TOKEN_TYPE_DECLARATION: if (parse.FullToken.FullDeclaration.Declaration.File == TGSI_FILE_CONSTANT) { diff --git a/src/gallium/drivers/i915/i915_prim_vbuf.c b/src/gallium/drivers/i915/i915_prim_vbuf.c index bd046bd9058..baebbc7bae3 100644 --- a/src/gallium/drivers/i915/i915_prim_vbuf.c +++ b/src/gallium/drivers/i915/i915_prim_vbuf.c @@ -172,6 +172,7 @@ i915_vbuf_render_reserve(struct i915_vbuf_render *i915_render, size_t size) * * Side effects: * Updates hw_offset, sw_offset, index and allocates a new buffer. + * Will set i915->vbo to null on buffer allocation. */ static void i915_vbuf_render_new_buf(struct i915_vbuf_render *i915_render, size_t size) @@ -179,8 +180,16 @@ i915_vbuf_render_new_buf(struct i915_vbuf_render *i915_render, size_t size) struct i915_context *i915 = i915_render->i915; struct i915_winsys *iws = i915->iws; - if (i915_render->vbo) + if (i915_render->vbo) { iws->buffer_destroy(iws, i915_render->vbo); + /* + * XXX If buffers where referenced then this should be done in + * update_vbo_state but since they arn't and malloc likes to reuse + * memory we need to set it to null + */ + i915->vbo = NULL; + i915_render->vbo = NULL; + } i915->vbo_flushed = 0; @@ -198,7 +207,7 @@ i915_vbuf_render_new_buf(struct i915_vbuf_render *i915_render, size_t size) #endif i915_render->vbo = iws->buffer_create(iws, i915_render->vbo_size, - 64, I915_NEW_VERTEX); + I915_NEW_VERTEX); } /** @@ -726,7 +735,7 @@ i915_vbuf_render_create(struct i915_context *i915) i915_render->pool_fifo = u_fifo_create(6); for (i = 0; i < 6; i++) u_fifo_add(i915_render->pool_fifo, - iws->buffer_create(iws, i915_render->pool_buffer_size, 64, + iws->buffer_create(iws, i915_render->pool_buffer_size, I915_NEW_VERTEX)); #else (void)i; diff --git a/src/gallium/drivers/i915/i915_reg.h b/src/gallium/drivers/i915/i915_reg.h index cc28891e4ac..5e4e80ddf6b 100644 --- a/src/gallium/drivers/i915/i915_reg.h +++ b/src/gallium/drivers/i915/i915_reg.h @@ -753,7 +753,7 @@ #define MT_COMPRESS_DXT1_RGB (4<<3) #define MS3_USE_FENCE_REGS (1<<2) #define MS3_TILED_SURFACE (1<<1) -#define MS3_TILE_WALK (1<<0) +#define MS3_TILE_WALK_Y (1<<0) #define MS4_PITCH_SHIFT 21 #define MS4_CUBE_FACE_ENA_NEGX (1<<20) @@ -851,6 +851,7 @@ #define MI_FLUSH ((0<<29)|(4<<23)) #define FLUSH_MAP_CACHE (1<<0) #define INHIBIT_FLUSH_RENDER_CACHE (1<<2) +#define MI_NOOP 0 #define CMD_3D (0x3<<29) diff --git a/src/gallium/drivers/i915/i915_resource.h b/src/gallium/drivers/i915/i915_resource.h index 753bd266b12..86620e6a123 100644 --- a/src/gallium/drivers/i915/i915_resource.h +++ b/src/gallium/drivers/i915/i915_resource.h @@ -49,6 +49,10 @@ struct i915_buffer { #define I915_MAX_TEXTURE_3D_LEVELS 8 /* max 128x128x128 */ +struct offset_pair { + unsigned short nblocksx; + unsigned short nblocksy; +}; struct i915_texture { struct u_resource b; @@ -63,14 +67,18 @@ struct i915_texture { /* Explicitly store the offset of each image for each cube face or * depth value. + * + * Array [depth] off offsets. */ - unsigned *image_offset[I915_MAX_TEXTURE_2D_LEVELS]; /**< array [depth] of offsets */ + struct offset_pair *image_offset[I915_MAX_TEXTURE_2D_LEVELS]; /* The data is held here: */ struct i915_winsys_buffer *buffer; }; +unsigned i915_texture_offset(struct i915_texture *tex, + unsigned level, unsigned layer); void i915_init_screen_resource_functions(struct i915_screen *is); void i915_init_resource_functions(struct i915_context *i915); diff --git a/src/gallium/drivers/i915/i915_resource_buffer.c b/src/gallium/drivers/i915/i915_resource_buffer.c index 0d379497dfc..450203d60a9 100644 --- a/src/gallium/drivers/i915/i915_resource_buffer.c +++ b/src/gallium/drivers/i915/i915_resource_buffer.c @@ -62,7 +62,7 @@ i915_buffer_destroy(struct pipe_screen *screen, static void * i915_buffer_transfer_map( struct pipe_context *pipe, - struct pipe_transfer *transfer ) + struct pipe_transfer *transfer ) { struct i915_buffer *buffer = i915_buffer(transfer->resource); return buffer->data + transfer->box.x; @@ -71,19 +71,19 @@ i915_buffer_transfer_map( struct pipe_context *pipe, static void i915_buffer_transfer_inline_write( struct pipe_context *rm_ctx, - struct pipe_resource *resource, - struct pipe_subresource sr, - unsigned usage, - const struct pipe_box *box, - const void *data, - unsigned stride, - unsigned slice_stride) + struct pipe_resource *resource, + unsigned level, + unsigned usage, + const struct pipe_box *box, + const void *data, + unsigned stride, + unsigned layer_stride) { struct i915_buffer *buffer = i915_buffer(resource); memcpy(buffer->data + box->x, - data, - box->width); + data, + box->width); } @@ -115,7 +115,7 @@ i915_buffer_create(struct pipe_screen *screen, buf->b.vtbl = &i915_buffer_vtbl; pipe_reference_init(&buf->b.b.reference, 1); buf->b.b.screen = screen; - + buf->data = MALLOC(template->width0); buf->free_on_destroy = TRUE; @@ -135,7 +135,7 @@ struct pipe_resource * i915_user_buffer_create(struct pipe_screen *screen, void *ptr, unsigned bytes, - unsigned bind) + unsigned bind) { struct i915_buffer *buf = CALLOC_STRUCT(i915_buffer); @@ -152,6 +152,7 @@ i915_user_buffer_create(struct pipe_screen *screen, buf->b.b.width0 = bytes; buf->b.b.height0 = 1; buf->b.b.depth0 = 1; + buf->b.b.array_size = 1; buf->data = ptr; buf->free_on_destroy = FALSE; diff --git a/src/gallium/drivers/i915/i915_resource_texture.c b/src/gallium/drivers/i915/i915_resource_texture.c index d45346b32ad..f19106f3414 100644 --- a/src/gallium/drivers/i915/i915_resource_texture.c +++ b/src/gallium/drivers/i915/i915_resource_texture.c @@ -106,6 +106,23 @@ get_pot_stride(enum pipe_format format, unsigned width) return util_next_power_of_two(util_format_get_stride(format, width)); } +static INLINE const char* +get_tiling_string(enum i915_winsys_buffer_tile tile) +{ + switch(tile) { + case I915_TILE_NONE: + return "none"; + case I915_TILE_X: + return "x"; + case I915_TILE_Y: + return "y"; + default: + assert(FALSE); + return "?"; + } +} + + /* * More advanced helper funcs */ @@ -120,28 +137,56 @@ i915_texture_set_level_info(struct i915_texture *tex, assert(!tex->image_offset[level]); tex->nr_images[level] = nr_images; - tex->image_offset[level] = (unsigned *) MALLOC(nr_images * sizeof(unsigned)); - tex->image_offset[level][0] = 0; + tex->image_offset[level] = MALLOC(nr_images * sizeof(struct offset_pair)); + tex->image_offset[level][0].nblocksx = 0; + tex->image_offset[level][0].nblocksy = 0; +} + +INLINE unsigned i915_texture_offset(struct i915_texture *tex, + unsigned level, unsigned layer) +{ + unsigned x, y; + x = tex->image_offset[level][layer].nblocksx + * util_format_get_blocksize(tex->b.b.format); + y = tex->image_offset[level][layer].nblocksy; + + return y * tex->stride + x; } static void i915_texture_set_image_offset(struct i915_texture *tex, unsigned level, unsigned img, - unsigned x, unsigned y) + unsigned nblocksx, unsigned nblocksy) { /* for the first image and level make sure offset is zero */ - assert(!(img == 0 && level == 0) || (x == 0 && y == 0)); + assert(!(img == 0 && level == 0) || (nblocksx == 0 && nblocksy == 0)); assert(img < tex->nr_images[level]); - tex->image_offset[level][img] = y * tex->stride + x * util_format_get_blocksize(tex->b.b.format); + tex->image_offset[level][img].nblocksx = nblocksx; + tex->image_offset[level][img].nblocksy = nblocksy; #if DEBUG_TEXTURES - debug_printf("%s: %p level %u, img %u (%u, %u) %p\n", __FUNCTION__, - tex, level, img, x, y, - (void*)(uintptr_t)tex->image_offset[level][img]); + debug_printf("%s: %p level %u, img %u (%u, %u)\n", __FUNCTION__, + tex, level, img, x, y); #endif } +static enum i915_winsys_buffer_tile +i915_texture_tiling(struct pipe_resource *pt) +{ + if (!i915_tiling) + return I915_TILE_NONE; + + if (pt->target == PIPE_TEXTURE_1D) + return I915_TILE_NONE; + + if (util_format_is_s3tc(pt->format)) + /* XXX X-tiling might make sense */ + return I915_TILE_NONE; + + return I915_TILE_X; +} + /* * Shared layout functions @@ -163,9 +208,10 @@ i9x5_scanout_layout(struct i915_texture *tex) i915_texture_set_image_offset(tex, 0, 0, 0, 0); if (pt->width0 >= 240) { - tex->stride = get_pot_stride(pt->format, pt->width0); + tex->stride = align(util_format_get_stride(pt->format, pt->width0), 64); tex->total_nblocksy = align_nblocksy(pt->format, pt->height0, 8); tex->tiling = I915_TILE_X; + /* special case for cursors */ } else if (pt->width0 == 64 && pt->height0 == 64) { tex->stride = get_pot_stride(pt->format, pt->width0); tex->total_nblocksy = align_nblocksy(pt->format, pt->height0, 8); @@ -200,7 +246,7 @@ i9x5_display_target_layout(struct i915_texture *tex) i915_texture_set_level_info(tex, 0, 1); i915_texture_set_image_offset(tex, 0, 0, 0, 0); - tex->stride = get_pot_stride(pt->format, pt->width0); + tex->stride = align(util_format_get_stride(pt->format, pt->width0), 64); tex->total_nblocksy = align_nblocksy(pt->format, pt->height0, 8); tex->tiling = I915_TILE_X; @@ -357,6 +403,8 @@ i915_texture_layout(struct i915_texture * tex) { struct pipe_resource *pt = &tex->b.b; + tex->tiling = i915_texture_tiling(pt); + switch (pt->target) { case PIPE_TEXTURE_1D: case PIPE_TEXTURE_2D: @@ -603,6 +651,8 @@ i945_texture_layout(struct i915_texture * tex) { struct pipe_resource *pt = &tex->b.b; + tex->tiling = i915_texture_tiling(pt); + switch (pt->target) { case PIPE_TEXTURE_1D: case PIPE_TEXTURE_2D: @@ -650,7 +700,7 @@ i915_texture_get_handle(struct pipe_screen * screen, static void i915_texture_destroy(struct pipe_screen *screen, - struct pipe_resource *pt) + struct pipe_resource *pt) { struct i915_texture *tex = i915_texture(pt); struct i915_winsys *iws = i915_screen(screen)->iws; @@ -667,10 +717,10 @@ i915_texture_destroy(struct pipe_screen *screen, static struct pipe_transfer * i915_texture_get_transfer(struct pipe_context *context, - struct pipe_resource *resource, - struct pipe_subresource sr, - unsigned usage, - const struct pipe_box *box) + struct pipe_resource *resource, + unsigned level, + unsigned usage, + const struct pipe_box *box) { struct i915_texture *tex = i915_texture(resource); struct pipe_transfer *transfer = CALLOC_STRUCT(pipe_transfer); @@ -678,37 +728,31 @@ i915_texture_get_transfer(struct pipe_context *context, return NULL; transfer->resource = resource; - transfer->sr = sr; + transfer->level = level; transfer->usage = usage; transfer->box = *box; transfer->stride = tex->stride; + /* FIXME: layer_stride */ return transfer; } - static void * i915_texture_transfer_map(struct pipe_context *pipe, - struct pipe_transfer *transfer) + struct pipe_transfer *transfer) { struct pipe_resource *resource = transfer->resource; struct i915_texture *tex = i915_texture(resource); struct i915_winsys *iws = i915_screen(pipe->screen)->iws; - struct pipe_subresource sr = transfer->sr; struct pipe_box *box = &transfer->box; enum pipe_format format = resource->format; unsigned offset; char *map; - if (resource->target == PIPE_TEXTURE_CUBE) { - offset = tex->image_offset[sr.level][sr.face]; - } else if (resource->target == PIPE_TEXTURE_3D) { - offset = tex->image_offset[sr.level][box->z]; - } else { - offset = tex->image_offset[sr.level][0]; - assert(sr.face == 0); + if (resource->target != PIPE_TEXTURE_3D && + resource->target != PIPE_TEXTURE_CUBE) assert(box->z == 0); - } + offset = i915_texture_offset(tex, transfer->level, box->z); map = iws->buffer_map(iws, tex->buffer, (transfer->usage & PIPE_TRANSFER_WRITE) ? TRUE : FALSE); @@ -754,7 +798,6 @@ i915_texture_create(struct pipe_screen *screen, struct i915_screen *is = i915_screen(screen); struct i915_winsys *iws = is->iws; struct i915_texture *tex = CALLOC_STRUCT(i915_texture); - size_t tex_size; unsigned buf_usage = 0; if (!tex) @@ -773,8 +816,6 @@ i915_texture_create(struct pipe_screen *screen, goto fail; } - tex_size = tex->stride * tex->total_nblocksy; - /* for scanouts and cursors, cursors arn't scanouts */ /* XXX: use a custom flag for cursors, don't rely on magically @@ -785,27 +826,15 @@ i915_texture_create(struct pipe_screen *screen, else buf_usage = I915_NEW_TEXTURE; - tex->buffer = iws->buffer_create(iws, tex_size, 64, buf_usage); + tex->buffer = iws->buffer_create_tiled(iws, &tex->stride, tex->total_nblocksy, + &tex->tiling, buf_usage); if (!tex->buffer) goto fail; - /* setup any hw fences */ - if (tex->tiling) { - iws->buffer_set_fence_reg(iws, tex->buffer, tex->stride, tex->tiling); - } - - -#if 0 - void *ptr = ws->buffer_map(ws, tex->buffer, - PIPE_BUFFER_USAGE_CPU_WRITE); - memset(ptr, 0x80, tex_size); - ws->buffer_unmap(ws, tex->buffer); -#endif - - I915_DBG(DBG_TEXTURE, "%s: %p size %u, stride %u, blocks (%u, %u)\n", __func__, - tex, (unsigned int)tex_size, tex->stride, + I915_DBG(DBG_TEXTURE, "%s: %p stride %u, blocks (%u, %u) tiling %s\n", __func__, + tex, tex->stride, tex->stride / util_format_get_blocksize(tex->b.b.format), - tex->total_nblocksy); + tex->total_nblocksy, get_tiling_string(tex->tiling)); return &tex->b.b; @@ -824,10 +853,11 @@ i915_texture_from_handle(struct pipe_screen * screen, struct i915_winsys *iws = is->iws; struct i915_winsys_buffer *buffer; unsigned stride; + enum i915_winsys_buffer_tile tiling; assert(screen); - buffer = iws->buffer_from_handle(iws, whandle, &stride); + buffer = iws->buffer_from_handle(iws, whandle, &tiling, &stride); /* Only supports one type */ if ((template->target != PIPE_TEXTURE_2D && @@ -847,6 +877,7 @@ i915_texture_from_handle(struct pipe_screen * screen, tex->b.b.screen = screen; tex->stride = stride; + tex->tiling = tiling; tex->total_nblocksy = align_nblocksy(tex->b.b.format, tex->b.b.height0, 8); i915_texture_set_level_info(tex, 0, 1); @@ -854,10 +885,10 @@ i915_texture_from_handle(struct pipe_screen * screen, tex->buffer = buffer; - I915_DBG(DBG_TEXTURE, "%s: %p stride %u, blocks (%ux%u)\n", __func__, + I915_DBG(DBG_TEXTURE, "%s: %p stride %u, blocks (%u, %u) tiling %s\n", __func__, tex, tex->stride, tex->stride / util_format_get_blocksize(tex->b.b.format), - tex->total_nblocksy); + tex->total_nblocksy, get_tiling_string(tex->tiling)); return &tex->b.b; } diff --git a/src/gallium/drivers/i915/i915_screen.c b/src/gallium/drivers/i915/i915_screen.c index a3c51138008..bdbc08e8086 100644 --- a/src/gallium/drivers/i915/i915_screen.c +++ b/src/gallium/drivers/i915/i915_screen.c @@ -35,7 +35,6 @@ #include "i915_debug.h" #include "i915_context.h" #include "i915_screen.h" -#include "i915_surface.h" #include "i915_resource.h" #include "i915_winsys.h" #include "i915_public.h" @@ -387,7 +386,6 @@ i915_screen_create(struct i915_winsys *iws) is->base.fence_finish = i915_fence_finish; i915_init_screen_resource_functions(is); - i915_init_screen_surface_functions(is); i915_debug_init(is); diff --git a/src/gallium/drivers/i915/i915_state.c b/src/gallium/drivers/i915/i915_state.c index bbfcff6bc4d..f5b60ed5942 100644 --- a/src/gallium/drivers/i915/i915_state.c +++ b/src/gallium/drivers/i915/i915_state.c @@ -760,8 +760,9 @@ static void i915_set_vertex_buffers(struct pipe_context *pipe, */ draw_flush(i915->draw); - memcpy(i915->vertex_buffer, buffers, count * sizeof(buffers[0])); - i915->num_vertex_buffers = count; + util_copy_vertex_buffers(i915->vertex_buffer, + &i915->num_vertex_buffers, + buffers, count); /* pass-through to draw module */ draw_set_vertex_buffers(i915->draw, count, buffers); diff --git a/src/gallium/drivers/i915/i915_state_emit.c b/src/gallium/drivers/i915/i915_state_emit.c index 49dff1f775c..86c02976495 100644 --- a/src/gallium/drivers/i915/i915_state_emit.c +++ b/src/gallium/drivers/i915/i915_state_emit.c @@ -86,6 +86,22 @@ framebuffer_size(const struct pipe_framebuffer_state *fb, } } +static inline uint32_t +buf_3d_tiling_bits(enum i915_winsys_buffer_tile tiling) +{ + uint32_t tiling_bits = 0; + + switch (tiling) { + case I915_TILE_Y: + tiling_bits |= BUF_3D_TILE_WALK_Y; + case I915_TILE_X: + tiling_bits |= BUF_3D_TILED_SURFACE; + case I915_TILE_NONE: + break; + } + + return tiling_bits; +} /* Push the state into the sarea and/or texture memory. */ @@ -126,7 +142,7 @@ i915_emit_hardware_state(struct i915_context *i915 ) save_relocs = i915->batch->relocs; /* 14 dwords, 0 relocs */ - if (i915->hardware_dirty & I915_HW_INVARIENT) + if (i915->hardware_dirty & I915_HW_INVARIANT) { OUT_BATCH(_3DSTATE_AA_CMD | AA_LINE_ECAAR_WIDTH_ENABLE | @@ -220,7 +236,6 @@ i915_emit_hardware_state(struct i915_context *i915 ) struct pipe_surface *depth_surface = i915->framebuffer.zsbuf; if (cbuf_surface) { - unsigned ctile = BUF_3D_USE_FENCE; struct i915_texture *tex = i915_texture(cbuf_surface->texture); assert(tex); @@ -228,30 +243,32 @@ i915_emit_hardware_state(struct i915_context *i915 ) OUT_BATCH(BUF_3D_ID_COLOR_BACK | BUF_3D_PITCH(tex->stride) | /* pitch in bytes */ - ctile); + buf_3d_tiling_bits(tex->tiling)); OUT_RELOC(tex->buffer, I915_USAGE_RENDER, - cbuf_surface->offset); + 0); } /* What happens if no zbuf?? */ if (depth_surface) { - unsigned ztile = BUF_3D_USE_FENCE; struct i915_texture *tex = i915_texture(depth_surface->texture); + unsigned offset = i915_texture_offset(tex, depth_surface->u.tex.level, + depth_surface->u.tex.first_layer); assert(tex); + assert(offset == 0); OUT_BATCH(_3DSTATE_BUF_INFO_CMD); assert(tex); OUT_BATCH(BUF_3D_ID_DEPTH | BUF_3D_PITCH(tex->stride) | /* pitch in bytes */ - ztile); + buf_3d_tiling_bits(tex->tiling)); OUT_RELOC(tex->buffer, I915_USAGE_RENDER, - depth_surface->offset); + 0); } { @@ -293,12 +310,11 @@ i915_emit_hardware_state(struct i915_context *i915 ) if (enabled & (1 << unit)) { struct i915_texture *texture = i915_texture(i915->fragment_sampler_views[unit]->texture); struct i915_winsys_buffer *buf = texture->buffer; - uint offset = 0; assert(buf); count++; - OUT_RELOC(buf, I915_USAGE_SAMPLER, offset); + OUT_RELOC(buf, I915_USAGE_SAMPLER, 0); OUT_BATCH(i915->current.texbuffer[unit][0]); /* MS3 */ OUT_BATCH(i915->current.texbuffer[unit][1]); /* MS4 */ } @@ -391,18 +407,33 @@ i915_emit_hardware_state(struct i915_context *i915 ) #if 01 /* drawing surface size */ /* 6 dwords, 0 relocs */ + if (i915->hardware_dirty & I915_HW_STATIC) { uint w, h; - boolean k = framebuffer_size(&i915->framebuffer, &w, &h); - (void)k; - assert(k); + struct pipe_surface *cbuf_surface = i915->framebuffer.cbufs[0]; + struct i915_texture *tex = i915_texture(cbuf_surface->texture); + unsigned x, y; + int layer; + uint32_t draw_offset; + boolean ret; + + ret = framebuffer_size(&i915->framebuffer, &w, &h); + assert(ret); + layer = cbuf_surface->u.tex.first_layer; + + x = tex->image_offset[cbuf_surface->u.tex.level][layer].nblocksx; + y = tex->image_offset[cbuf_surface->u.tex.level][layer].nblocksy; + + draw_offset = x | (y << 16); + + /* XXX flush only required when the draw_offset changes! */ + OUT_BATCH(MI_FLUSH | INHIBIT_FLUSH_RENDER_CACHE); OUT_BATCH(_3DSTATE_DRAW_RECT_CMD); - OUT_BATCH(0); - OUT_BATCH(0); - OUT_BATCH(((w - 1) & 0xffff) | ((h - 1) << 16)); - OUT_BATCH(0); - OUT_BATCH(0); + OUT_BATCH(DRAW_RECT_DIS_DEPTH_OFS); + OUT_BATCH(draw_offset); + OUT_BATCH((w - 1 + x) | ((h - 1 + y) << 16)); + OUT_BATCH(draw_offset); } #endif diff --git a/src/gallium/drivers/i915/i915_state_sampler.c b/src/gallium/drivers/i915/i915_state_sampler.c index 9771274ca11..916cb767536 100644 --- a/src/gallium/drivers/i915/i915_state_sampler.c +++ b/src/gallium/drivers/i915/i915_state_sampler.c @@ -243,6 +243,23 @@ static uint translate_texture_format(enum pipe_format pipeFormat) } } +static inline uint32_t +ms3_tiling_bits(enum i915_winsys_buffer_tile tiling) +{ + uint32_t tiling_bits = 0; + + switch (tiling) { + case I915_TILE_Y: + tiling_bits |= MS3_TILE_WALK_Y; + case I915_TILE_X: + tiling_bits |= MS3_TILED_SURFACE; + case I915_TILE_NONE: + break; + } + + return tiling_bits; +} + static void update_map(struct i915_context *i915, uint unit, const struct i915_texture *tex, @@ -254,7 +271,6 @@ static void update_map(struct i915_context *i915, const uint width = pt->width0, height = pt->height0, depth = pt->depth0; const uint num_levels = pt->last_level; unsigned max_lod = num_levels * 4; - unsigned tiled = MS3_USE_FENCE_REGS; assert(tex); assert(width); @@ -272,7 +288,7 @@ static void update_map(struct i915_context *i915, (((height - 1) << MS3_HEIGHT_SHIFT) | ((width - 1) << MS3_WIDTH_SHIFT) | format - | tiled); + | ms3_tiling_bits(tex->tiling)); /* * XXX When min_filter != mag_filter and there's just one mipmap level, diff --git a/src/gallium/drivers/i915/i915_surface.c b/src/gallium/drivers/i915/i915_surface.c index f40876e708e..becc6e93c2d 100644 --- a/src/gallium/drivers/i915/i915_surface.c +++ b/src/gallium/drivers/i915/i915_surface.c @@ -43,11 +43,10 @@ */ static void i915_surface_copy(struct pipe_context *pipe, - struct pipe_resource *dst, struct pipe_subresource subdst, + struct pipe_resource *dst, unsigned dst_level, unsigned dstx, unsigned dsty, unsigned dstz, - struct pipe_resource *src, struct pipe_subresource subsrc, - unsigned srcx, unsigned srcy, unsigned srcz, - unsigned width, unsigned height) + struct pipe_resource *src, unsigned src_level, + const struct pipe_box *src_box) { struct i915_texture *dst_tex = i915_texture(dst); struct i915_texture *src_tex = i915_texture(src); @@ -55,29 +54,17 @@ i915_surface_copy(struct pipe_context *pipe, struct pipe_resource *spt = &src_tex->b.b; unsigned dst_offset, src_offset; /* in bytes */ - if (dst->target == PIPE_TEXTURE_CUBE) { - dst_offset = dst_tex->image_offset[subdst.level][subdst.face]; - } - else if (dst->target == PIPE_TEXTURE_3D) { - dst_offset = dst_tex->image_offset[subdst.level][dstz]; - } - else { - dst_offset = dst_tex->image_offset[subdst.level][0]; - assert(subdst.face == 0); + /* XXX cannot copy 3d regions at this time */ + assert(src_box->depth == 1); + if (dst->target != PIPE_TEXTURE_CUBE && + dst->target != PIPE_TEXTURE_3D) assert(dstz == 0); - } - if (src->target == PIPE_TEXTURE_CUBE) { - src_offset = src_tex->image_offset[subsrc.level][subsrc.face]; - } - else if (src->target == PIPE_TEXTURE_3D) { - src_offset = src_tex->image_offset[subsrc.level][srcz]; - } - else { - src_offset = src_tex->image_offset[subsrc.level][0]; - assert(subsrc.face == 0); - assert(srcz == 0); - } + dst_offset = i915_texture_offset(dst_tex, dst_level, dstz); + if (src->target != PIPE_TEXTURE_CUBE && + src->target != PIPE_TEXTURE_3D) + assert(src_box->z == 0); + src_offset = i915_texture_offset(src_tex, src_level, src_box->z); assert( dst != src ); assert( util_format_get_blocksize(dpt->format) == util_format_get_blocksize(spt->format) ); @@ -90,7 +77,8 @@ i915_surface_copy(struct pipe_context *pipe, util_format_get_blocksize(dpt->format), (unsigned short) src_tex->stride, src_tex->buffer, src_offset, (unsigned short) dst_tex->stride, dst_tex->buffer, dst_offset, - (short) srcx, (short) srcy, (short) dstx, (short) dsty, (short) width, (short) height ); + (short) src_box->x, (short) src_box->y, (short) dstx, (short) dsty, + (short) src_box->width, (short) src_box->height ); } @@ -104,6 +92,7 @@ i915_clear_render_target(struct pipe_context *pipe, struct i915_texture *tex = i915_texture(dst->texture); struct pipe_resource *pt = &tex->b.b; union util_color uc; + unsigned offset = i915_texture_offset(tex, dst->u.tex.level, dst->u.tex.first_layer); assert(util_format_get_blockwidth(pt->format) == 1); assert(util_format_get_blockheight(pt->format) == 1); @@ -113,7 +102,7 @@ i915_clear_render_target(struct pipe_context *pipe, util_format_get_blocksize(pt->format), XY_COLOR_BLT_WRITE_ALPHA | XY_COLOR_BLT_WRITE_RGB, (unsigned short) tex->stride, - tex->buffer, dst->offset, + tex->buffer, offset, (short) dstx, (short) dsty, (short) width, (short) height, uc.ui ); @@ -132,6 +121,7 @@ i915_clear_depth_stencil(struct pipe_context *pipe, struct pipe_resource *pt = &tex->b.b; unsigned packedds; unsigned mask = 0; + unsigned offset = i915_texture_offset(tex, dst->u.tex.level, dst->u.tex.first_layer); assert(util_format_get_blockwidth(pt->format) == 1); assert(util_format_get_blockheight(pt->format) == 1); @@ -151,7 +141,7 @@ i915_clear_depth_stencil(struct pipe_context *pipe, util_format_get_blocksize(pt->format), mask, (unsigned short) tex->stride, - tex->buffer, dst->offset, + tex->buffer, offset, (short) dstx, (short) dsty, (short) width, (short) height, packedds ); @@ -163,42 +153,37 @@ i915_clear_depth_stencil(struct pipe_context *pipe, static struct pipe_surface * -i915_get_tex_surface(struct pipe_screen *screen, - struct pipe_resource *pt, - unsigned face, unsigned level, unsigned zslice, - unsigned flags) +i915_create_surface(struct pipe_context *ctx, + struct pipe_resource *pt, + const struct pipe_surface *surf_tmpl) { - struct i915_texture *tex = i915_texture(pt); struct pipe_surface *ps; - unsigned offset; /* in bytes */ - if (pt->target == PIPE_TEXTURE_CUBE) { - offset = tex->image_offset[level][face]; - } - else if (pt->target == PIPE_TEXTURE_3D) { - offset = tex->image_offset[level][zslice]; - } - else { - offset = tex->image_offset[level][0]; - assert(face == 0); - assert(zslice == 0); - } + assert(surf_tmpl->u.tex.first_layer == surf_tmpl->u.tex.last_layer); + if (pt->target != PIPE_TEXTURE_CUBE && + pt->target != PIPE_TEXTURE_3D) + assert(surf_tmpl->u.tex.first_layer == 0); ps = CALLOC_STRUCT(pipe_surface); if (ps) { + /* could subclass pipe_surface and store offset as it used to do */ pipe_reference_init(&ps->reference, 1); pipe_resource_reference(&ps->texture, pt); - ps->format = pt->format; - ps->width = u_minify(pt->width0, level); - ps->height = u_minify(pt->height0, level); - ps->offset = offset; - ps->usage = flags; + ps->format = surf_tmpl->format; + ps->width = u_minify(pt->width0, surf_tmpl->u.tex.level); + ps->height = u_minify(pt->height0, surf_tmpl->u.tex.level); + ps->u.tex.level = surf_tmpl->u.tex.level; + ps->u.tex.first_layer = surf_tmpl->u.tex.first_layer; + ps->u.tex.last_layer = surf_tmpl->u.tex.last_layer; + ps->usage = surf_tmpl->usage; + ps->context = ctx; } return ps; } static void -i915_tex_surface_destroy(struct pipe_surface *surf) +i915_surface_destroy(struct pipe_context *ctx, + struct pipe_surface *surf) { pipe_resource_reference(&surf->texture, NULL); FREE(surf); @@ -211,13 +196,6 @@ i915_init_surface_functions(struct i915_context *i915) i915->base.resource_copy_region = i915_surface_copy; i915->base.clear_render_target = i915_clear_render_target; i915->base.clear_depth_stencil = i915_clear_depth_stencil; -} - -/* No good reason for these to be in the screen. - */ -void -i915_init_screen_surface_functions(struct i915_screen *is) -{ - is->base.get_tex_surface = i915_get_tex_surface; - is->base.tex_surface_destroy = i915_tex_surface_destroy; + i915->base.create_surface = i915_create_surface; + i915->base.surface_destroy = i915_surface_destroy; } diff --git a/src/gallium/drivers/i915/i915_surface.h b/src/gallium/drivers/i915/i915_surface.h index 448106d5662..70b61de80f2 100644 --- a/src/gallium/drivers/i915/i915_surface.h +++ b/src/gallium/drivers/i915/i915_surface.h @@ -32,7 +32,6 @@ struct i915_context; struct i915_screen; void i915_init_surface_functions( struct i915_context *i915 ); -void i915_init_screen_surface_functions( struct i915_screen *is ); #endif /* I915_SCREEN_H */ diff --git a/src/gallium/drivers/i915/i915_winsys.h b/src/gallium/drivers/i915/i915_winsys.h index 5385e403d22..24ea416f015 100644 --- a/src/gallium/drivers/i915/i915_winsys.h +++ b/src/gallium/drivers/i915/i915_winsys.h @@ -53,6 +53,7 @@ enum i915_winsys_buffer_type I915_NEW_VERTEX }; +/* These need to be in sync with the definitions of libdrm-intel! */ enum i915_winsys_buffer_tile { I915_TILE_NONE, @@ -106,7 +107,7 @@ struct i915_winsys { int (*batchbuffer_reloc)(struct i915_winsys_batchbuffer *batch, struct i915_winsys_buffer *reloc, enum i915_winsys_buffer_usage usage, - unsigned offset); + unsigned offset, bool fenced); /** * Flush a bufferbatch. @@ -130,10 +131,24 @@ struct i915_winsys { */ struct i915_winsys_buffer * (*buffer_create)(struct i915_winsys *iws, - unsigned size, unsigned alignment, + unsigned size, enum i915_winsys_buffer_type type); /** + * Create a tiled buffer. + * + * *stride, height are in bytes. The winsys tries to allocate the buffer with + * the tiling mode provide in *tiling. If tiling is no possible, *tiling will + * be set to I915_TILE_NONE. The calculated stride (incorporateing hw/kernel + * requirements) is always returned in *stride. + */ + struct i915_winsys_buffer * + (*buffer_create_tiled)(struct i915_winsys *iws, + unsigned *stride, unsigned height, + enum i915_winsys_buffer_tile *tiling, + enum i915_winsys_buffer_type type); + + /** * Creates a buffer from a handle. * Used to implement pipe_screen::resource_from_handle. * Also provides the stride information needed for the @@ -142,6 +157,7 @@ struct i915_winsys { struct i915_winsys_buffer * (*buffer_from_handle)(struct i915_winsys *iws, struct winsys_handle *whandle, + enum i915_winsys_buffer_tile *tiling, unsigned *stride); /** @@ -154,15 +170,6 @@ struct i915_winsys { unsigned stride); /** - * Fence a buffer with a fence reg. - * Not to be confused with pipe_fence_handle. - */ - int (*buffer_set_fence_reg)(struct i915_winsys *iws, - struct i915_winsys_buffer *buffer, - unsigned stride, - enum i915_winsys_buffer_tile tile); - - /** * Map a buffer. */ void *(*buffer_map)(struct i915_winsys *iws, diff --git a/src/gallium/drivers/i965/Makefile b/src/gallium/drivers/i965/Makefile index b0b09703384..a0331f80581 100644 --- a/src/gallium/drivers/i965/Makefile +++ b/src/gallium/drivers/i965/Makefile @@ -33,6 +33,7 @@ C_SOURCES = \ brw_pipe_flush.c \ brw_pipe_misc.c \ brw_pipe_sampler.c \ + brw_pipe_surface.c \ brw_pipe_vertex.c \ brw_pipe_clear.c \ brw_pipe_rast.c \ @@ -66,7 +67,6 @@ C_SOURCES = \ brw_resource_buffer.c \ brw_resource_texture.c \ brw_resource_texture_layout.c \ - brw_screen_surface.c \ brw_batchbuffer.c \ brw_winsys_debug.c \ intel_decode.c diff --git a/src/gallium/drivers/i965/SConscript b/src/gallium/drivers/i965/SConscript index 019af682f68..3ef6c880305 100644 --- a/src/gallium/drivers/i965/SConscript +++ b/src/gallium/drivers/i965/SConscript @@ -36,6 +36,8 @@ i965 = env.ConvenienceLibrary( 'brw_pipe_query.c', 'brw_pipe_rast.c', 'brw_pipe_sampler.c', + 'brw_pipe_surface.c', + 'brw_pipe_surface.c', 'brw_pipe_shader.c', 'brw_pipe_vertex.c', 'brw_resource.c', @@ -43,7 +45,6 @@ i965 = env.ConvenienceLibrary( 'brw_resource_texture.c', 'brw_resource_texture_layout.c', 'brw_screen.c', - 'brw_screen_surface.c', 'brw_structs_dump.c', 'brw_sf.c', 'brw_sf_emit.c', diff --git a/src/gallium/drivers/i965/brw_batchbuffer.c b/src/gallium/drivers/i965/brw_batchbuffer.c index e80067f3b19..3c935792465 100644 --- a/src/gallium/drivers/i965/brw_batchbuffer.c +++ b/src/gallium/drivers/i965/brw_batchbuffer.c @@ -64,13 +64,11 @@ brw_batchbuffer_reset(struct brw_batchbuffer *batch) } struct brw_batchbuffer * -brw_batchbuffer_alloc(struct brw_winsys_screen *sws, - struct brw_chipset chipset) +brw_batchbuffer_alloc(struct brw_winsys_screen *sws) { struct brw_batchbuffer *batch = CALLOC_STRUCT(brw_batchbuffer); batch->sws = sws; - batch->chipset = chipset; brw_batchbuffer_reset(batch); return batch; diff --git a/src/gallium/drivers/i965/brw_batchbuffer.h b/src/gallium/drivers/i965/brw_batchbuffer.h index 6ca9f617f5e..6ecb91857dd 100644 --- a/src/gallium/drivers/i965/brw_batchbuffer.h +++ b/src/gallium/drivers/i965/brw_batchbuffer.h @@ -26,7 +26,6 @@ struct brw_batchbuffer { struct brw_winsys_screen *sws; struct brw_winsys_buffer *buf; - struct brw_chipset chipset; /** * Values exported to speed up the writing the batchbuffer, @@ -47,8 +46,8 @@ struct brw_batchbuffer { /*@}*/ }; -struct brw_batchbuffer *brw_batchbuffer_alloc( struct brw_winsys_screen *sws, - struct brw_chipset chipset ); +struct brw_batchbuffer *brw_batchbuffer_alloc( struct brw_winsys_screen *sws ); + void brw_batchbuffer_free(struct brw_batchbuffer *batch); diff --git a/src/gallium/drivers/i965/brw_clip.c b/src/gallium/drivers/i965/brw_clip.c index ccba205e8c7..66b13ea58e2 100644 --- a/src/gallium/drivers/i965/brw_clip.c +++ b/src/gallium/drivers/i965/brw_clip.c @@ -66,16 +66,14 @@ compile_clip_prog( struct brw_context *brw, c.func.single_program_flow = 1; - c.chipset = brw->chipset; c.key = *key; - c.need_ff_sync = c.chipset.is_igdng; /* Need to locate the two positions present in vertex + header. * These are currently hardcoded: */ c.header_position_offset = ATTR_SIZE; - if (c.chipset.is_igdng) + if (brw->gen == 5) delta = 3 * REG_SIZE; else delta = REG_SIZE; @@ -97,7 +95,7 @@ compile_clip_prog( struct brw_context *brw, if (c.key.output_edgeflag != BRW_OUTPUT_NOT_PRESENT) c.offset_edgeflag = delta + c.key.output_edgeflag * ATTR_SIZE; - if (BRW_IS_IGDNG(brw)) + if (brw->gen == 5) c.nr_regs = (c.key.nr_attrs + 1) / 2 + 3; /* are vertices packed, or reg-aligned? */ else c.nr_regs = (c.key.nr_attrs + 1) / 2 + 1; /* are vertices packed, or reg-aligned? */ diff --git a/src/gallium/drivers/i965/brw_clip.h b/src/gallium/drivers/i965/brw_clip.h index 80e3a11a370..f123b73c063 100644 --- a/src/gallium/drivers/i965/brw_clip.h +++ b/src/gallium/drivers/i965/brw_clip.h @@ -125,12 +125,10 @@ struct brw_clip_compile { GLuint last_tmp; GLboolean need_direction; - struct brw_chipset chipset; GLuint last_mrf; GLuint header_position_offset; - GLboolean need_ff_sync; GLuint nr_color_attrs; GLuint offset_color0; diff --git a/src/gallium/drivers/i965/brw_clip_line.c b/src/gallium/drivers/i965/brw_clip_line.c index 66caadc4d53..4ed7362171b 100644 --- a/src/gallium/drivers/i965/brw_clip_line.c +++ b/src/gallium/drivers/i965/brw_clip_line.c @@ -32,6 +32,7 @@ #include "util/u_debug.h" #include "brw_defines.h" +#include "brw_context.h" #include "brw_eu.h" #include "brw_clip.h" @@ -41,7 +42,7 @@ static void brw_clip_line_alloc_regs( struct brw_clip_compile *c ) { GLuint i = 0,j; - + struct brw_context *brw = c->func.brw; /* Register usage is static, precompute here: */ c->reg.R0 = retype(brw_vec8_grf(i, 0), BRW_REGISTER_TYPE_UD); i++; @@ -79,7 +80,7 @@ static void brw_clip_line_alloc_regs( struct brw_clip_compile *c ) i++; } - if (c->need_ff_sync) { + if (brw->needs_ff_sync) { c->reg.ff_sync = retype(brw_vec1_grf(i, 0), BRW_REGISTER_TYPE_UD); i++; } @@ -120,6 +121,7 @@ static void brw_clip_line_alloc_regs( struct brw_clip_compile *c ) static void clip_and_emit_line( struct brw_clip_compile *c ) { struct brw_compile *p = &c->func; + struct brw_context *brw = p->brw; struct brw_indirect vtx0 = brw_indirect(0, 0); struct brw_indirect vtx1 = brw_indirect(1, 0); struct brw_indirect newvtx0 = brw_indirect(2, 0); @@ -146,7 +148,7 @@ static void clip_and_emit_line( struct brw_clip_compile *c ) brw_clip_init_clipmask(c); /* -ve rhw workaround */ - if (c->chipset.is_965) { + if (brw->has_negative_rhw_bug) { brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ); brw_AND(p, brw_null_reg(), get_element_ud(c->reg.R0, 2), brw_imm_ud(1<<20)); @@ -183,7 +185,7 @@ static void clip_and_emit_line( struct brw_clip_compile *c ) * Both can be negative on GM965/G965 due to RHW workaround * if so, this object should be rejected. */ - if (c->chipset.is_965) { + if (brw->has_negative_rhw_bug) { brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_LE, c->reg.dp0, brw_imm_f(0.0)); is_neg2 = brw_IF(p, BRW_EXECUTE_1); { @@ -208,7 +210,7 @@ static void clip_and_emit_line( struct brw_clip_compile *c ) /* If both are positive, do nothing */ /* Only on GM965/G965 */ - if (c->chipset.is_965) { + if (brw->has_negative_rhw_bug) { brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_L, c->reg.dp0, brw_imm_f(0.0)); is_neg2 = brw_IF(p, BRW_EXECUTE_1); } @@ -223,7 +225,7 @@ static void clip_and_emit_line( struct brw_clip_compile *c ) brw_set_predicate_control(p, BRW_PREDICATE_NONE); } - if (c->chipset.is_965) { + if (brw->has_negative_rhw_bug) { brw_ENDIF(p, is_neg2); } } diff --git a/src/gallium/drivers/i965/brw_clip_state.c b/src/gallium/drivers/i965/brw_clip_state.c index 5c3ccfd8d0d..f56edf3177c 100644 --- a/src/gallium/drivers/i965/brw_clip_state.c +++ b/src/gallium/drivers/i965/brw_clip_state.c @@ -109,7 +109,7 @@ clip_unit_create_from_key(struct brw_context *brw, /* Although up to 16 concurrent Clip threads are allowed on IGDNG, * only 2 threads can output VUEs at a time. */ - if (BRW_IS_IGDNG(brw)) + if (brw->gen == 5) clip.thread4.max_threads = 16 - 1; else clip.thread4.max_threads = 2 - 1; @@ -134,7 +134,7 @@ clip_unit_create_from_key(struct brw_context *brw, clip.clip5.api_mode = BRW_CLIP_API_OGL; clip.clip5.clip_mode = key->clip_mode; - if (BRW_IS_G4X(brw)) + if (brw->is_g4x) clip.clip5.negative_w_clip_test = 1; clip.clip6.clipper_viewport_state_ptr = 0; diff --git a/src/gallium/drivers/i965/brw_clip_tri.c b/src/gallium/drivers/i965/brw_clip_tri.c index 069524bc14f..7d400e6028b 100644 --- a/src/gallium/drivers/i965/brw_clip_tri.c +++ b/src/gallium/drivers/i965/brw_clip_tri.c @@ -30,6 +30,7 @@ */ #include "brw_defines.h" +#include "brw_context.h" #include "brw_eu.h" #include "brw_clip.h" @@ -43,6 +44,7 @@ void brw_clip_tri_alloc_regs( struct brw_clip_compile *c, GLuint nr_verts ) { GLuint i = 0,j; + struct brw_context *brw = c->func.brw; /* Register usage is static, precompute here: */ @@ -69,7 +71,7 @@ void brw_clip_tri_alloc_regs( struct brw_clip_compile *c, for (j = 0; j < 3; j++) { GLuint delta = c->key.nr_attrs*16 + 32; - if (c->chipset.is_igdng) + if (brw->gen == 5) delta = c->key.nr_attrs * 16 + 32 * 3; brw_MOV(&c->func, byte_offset(c->reg.vertex[j], delta), brw_imm_f(0)); @@ -110,7 +112,7 @@ void brw_clip_tri_alloc_regs( struct brw_clip_compile *c, i++; } - if (c->need_ff_sync) { + if (brw->needs_ff_sync) { c->reg.ff_sync = retype(brw_vec1_grf(i, 0), BRW_REGISTER_TYPE_UD); i++; } @@ -563,7 +565,7 @@ void brw_emit_tri_clip( struct brw_clip_compile *c ) /* if -ve rhw workaround bit is set, do cliptest */ - if (c->chipset.is_965) { + if (p->brw->has_negative_rhw_bug) { brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ); brw_AND(p, brw_null_reg(), get_element_ud(c->reg.R0, 2), brw_imm_ud(1<<20)); diff --git a/src/gallium/drivers/i965/brw_clip_util.c b/src/gallium/drivers/i965/brw_clip_util.c index 23e51ee9bcd..5713f25da7c 100644 --- a/src/gallium/drivers/i965/brw_clip_util.c +++ b/src/gallium/drivers/i965/brw_clip_util.c @@ -31,6 +31,7 @@ #include "brw_defines.h" +#include "brw_context.h" #include "brw_eu.h" #include "brw_clip.h" @@ -126,6 +127,7 @@ void brw_clip_interp_vertex( struct brw_clip_compile *c, GLboolean force_edgeflag) { struct brw_compile *p = &c->func; + struct brw_context *brw = p->brw; struct brw_reg tmp = get_tmp(c); GLuint i; @@ -142,7 +144,7 @@ void brw_clip_interp_vertex( struct brw_clip_compile *c, for (i = 0; i < c->key.nr_attrs; i++) { GLuint delta = i*16 + 32; - if (c->chipset.is_igdng) + if (brw->gen == 5) delta = i * 16 + 32 * 3; if (delta == c->offset_edgeflag) { @@ -176,7 +178,7 @@ void brw_clip_interp_vertex( struct brw_clip_compile *c, if (i & 1) { GLuint delta = i*16 + 32; - if (c->chipset.is_igdng) + if (brw->gen == 5) delta = i * 16 + 32 * 3; brw_MOV(p, deref_4f(dest_ptr, delta), brw_imm_f(0)); @@ -350,7 +352,8 @@ void brw_clip_init_clipmask( struct brw_clip_compile *c ) void brw_clip_ff_sync(struct brw_clip_compile *c) { - if (c->need_ff_sync) { + struct brw_context *brw = c->func.brw; + if (brw->needs_ff_sync) { struct brw_compile *p = &c->func; struct brw_instruction *need_ff_sync; @@ -379,7 +382,8 @@ void brw_clip_ff_sync(struct brw_clip_compile *c) void brw_clip_init_ff_sync(struct brw_clip_compile *c) { - if (c->need_ff_sync) { + struct brw_context *brw = c->func.brw; + if (brw->needs_ff_sync) { struct brw_compile *p = &c->func; brw_MOV(p, c->reg.ff_sync, brw_imm_ud(0)); diff --git a/src/gallium/drivers/i965/brw_context.c b/src/gallium/drivers/i965/brw_context.c index 227bc790deb..41a468a32f0 100644 --- a/src/gallium/drivers/i965/brw_context.c +++ b/src/gallium/drivers/i965/brw_context.c @@ -107,7 +107,7 @@ struct pipe_context *brw_create_context(struct pipe_screen *screen, void *priv) { struct brw_context *brw = (struct brw_context *) CALLOC_STRUCT(brw_context); - + struct brw_screen *brs = brw_screen(screen); if (!brw) { debug_printf("%s: failed to alloc context\n", __FUNCTION__); return NULL; @@ -117,7 +117,10 @@ struct pipe_context *brw_create_context(struct pipe_screen *screen, brw->base.priv = priv; brw->base.destroy = brw_destroy_context; brw->sws = brw_screen(screen)->sws; - brw->chipset = brw_screen(screen)->chipset; + brw->is_g4x = brs->is_g4x; + brw->needs_ff_sync = brs->needs_ff_sync; + brw->has_negative_rhw_bug = brs->has_negative_rhw_bug; + brw->gen = brs->gen; brw_init_resource_functions( brw ); brw_pipe_blend_init( brw ); @@ -131,6 +134,7 @@ struct pipe_context *brw_create_context(struct pipe_screen *screen, brw_pipe_shader_init( brw ); brw_pipe_vertex_init( brw ); brw_pipe_clear_init( brw ); + brw_pipe_surface_init( brw ); brw_hw_cc_init( brw ); @@ -144,7 +148,7 @@ struct pipe_context *brw_create_context(struct pipe_screen *screen, make_empty_list(&brw->query.active_head); - brw->batch = brw_batchbuffer_alloc( brw->sws, brw->chipset ); + brw->batch = brw_batchbuffer_alloc( brw->sws ); if (brw->batch == NULL) goto fail; diff --git a/src/gallium/drivers/i965/brw_context.h b/src/gallium/drivers/i965/brw_context.h index 56d351f97d1..45fc26dd7d8 100644 --- a/src/gallium/drivers/i965/brw_context.h +++ b/src/gallium/drivers/i965/brw_context.h @@ -529,7 +529,14 @@ struct brw_query_object { struct brw_context { struct pipe_context base; - struct brw_chipset chipset; + int gen; + boolean has_negative_rhw_bug; + boolean needs_ff_sync; + boolean is_g4x; + + int urb_size; + int vs_max_threads; + int wm_max_threads; struct brw_winsys_screen *sws; @@ -821,6 +828,7 @@ void brw_pipe_sampler_cleanup( struct brw_context *brw ); void brw_pipe_shader_cleanup( struct brw_context *brw ); void brw_pipe_vertex_cleanup( struct brw_context *brw ); void brw_pipe_clear_cleanup( struct brw_context *brw ); +void brw_pipe_surface_init( struct brw_context *brw ); void brw_hw_cc_init( struct brw_context *brw ); void brw_hw_cc_cleanup( struct brw_context *brw ); @@ -853,11 +861,5 @@ brw_context( struct pipe_context *ctx ) return (struct brw_context *)ctx; } - -#define BRW_IS_965(brw) ((brw)->chipset.is_965) -#define BRW_IS_IGDNG(brw) ((brw)->chipset.is_igdng) -#define BRW_IS_G4X(brw) ((brw)->chipset.is_g4x) - - #endif diff --git a/src/gallium/drivers/i965/brw_defines.h b/src/gallium/drivers/i965/brw_defines.h index e201ce4d7ce..7547eae97c7 100644 --- a/src/gallium/drivers/i965/brw_defines.h +++ b/src/gallium/drivers/i965/brw_defines.h @@ -463,6 +463,13 @@ #define BRW_COMPRESSION_2NDHALF 1 #define BRW_COMPRESSION_COMPRESSED 2 +#define GEN6_COMPRESSION_1Q 0 +#define GEN6_COMPRESSION_2Q 1 +#define GEN6_COMPRESSION_3Q 2 +#define GEN6_COMPRESSION_4Q 3 +#define GEN6_COMPRESSION_1H 0 +#define GEN6_COMPRESSION_2H 2 + #define BRW_CONDITIONAL_NONE 0 #define BRW_CONDITIONAL_Z 1 #define BRW_CONDITIONAL_NZ 2 @@ -502,6 +509,27 @@ #define BRW_MASK_ENABLE 0 #define BRW_MASK_DISABLE 1 +/** @{ + * + * Gen6 has replaced "mask enable/disable" with WECtrl, which is + * effectively the same but much simpler to think about. Now, there + * are two contributors ANDed together to whether channels are + * executed: The predication on the instruction, and the channel write + * enable. + */ +/** + * This is the default value. It means that a channel's write enable is set + * if the per-channel IP is pointing at this instruction. + */ +#define BRW_WE_NORMAL 0 +/** + * This is used like BRW_MASK_DISABLE, and causes all channels to have + * their write enable set. Note that predication still contributes to + * whether the channel actually gets written. + */ +#define BRW_WE_ALL 1 +/** @} */ + #define BRW_OPCODE_MOV 1 #define BRW_OPCODE_SEL 2 #define BRW_OPCODE_NOT 4 @@ -531,6 +559,8 @@ #define BRW_OPCODE_POP 47 #define BRW_OPCODE_WAIT 48 #define BRW_OPCODE_SEND 49 +#define BRW_OPCODE_SENDC 50 +#define BRW_OPCODE_MATH 56 #define BRW_OPCODE_ADD 64 #define BRW_OPCODE_MUL 65 #define BRW_OPCODE_AVG 66 @@ -550,6 +580,7 @@ #define BRW_OPCODE_DP2 87 #define BRW_OPCODE_DPA2 88 #define BRW_OPCODE_LINE 89 +#define BRW_OPCODE_PLN 90 #define BRW_OPCODE_NOP 126 #define BRW_PREDICATE_NONE 0 @@ -599,6 +630,8 @@ #define BRW_ARF_NOTIFICATION_COUNT 0x90 #define BRW_ARF_IP 0xA0 +#define BRW_MRF_COMPR4 (1 << 7) + #define BRW_AMASK 0 #define BRW_IMASK 1 #define BRW_LMASK 2 @@ -645,13 +678,14 @@ #define BRW_POLYGON_FACING_BACK 1 #define BRW_MESSAGE_TARGET_NULL 0 -#define BRW_MESSAGE_TARGET_MATH 1 +#define BRW_MESSAGE_TARGET_MATH 1 /* reserved on GEN6 */ #define BRW_MESSAGE_TARGET_SAMPLER 2 #define BRW_MESSAGE_TARGET_GATEWAY 3 -#define BRW_MESSAGE_TARGET_DATAPORT_READ 4 -#define BRW_MESSAGE_TARGET_DATAPORT_WRITE 5 +#define BRW_MESSAGE_TARGET_DATAPORT_READ 4 /* sampler cache on GEN6 */ +#define BRW_MESSAGE_TARGET_DATAPORT_WRITE 5 /* render cache on Gen6 */ #define BRW_MESSAGE_TARGET_URB 6 #define BRW_MESSAGE_TARGET_THREAD_SPAWNER 7 +#define BRW_MESSAGE_TARGET_CONST_CACHE 9 /* GEN6 */ #define BRW_SAMPLER_RETURN_FORMAT_FLOAT32 0 #define BRW_SAMPLER_RETURN_FORMAT_UINT32 2 @@ -674,20 +708,15 @@ #define BRW_SAMPLER_MESSAGE_SIMD8_LD 3 #define BRW_SAMPLER_MESSAGE_SIMD16_LD 3 -#define BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_IGDNG 0 -#define BRW_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_IGDNG 0 -#define BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_IGDNG 0 -#define BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_BIAS_IGDNG 1 -#define BRW_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_BIAS_IGDNG 1 -#define BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS_IGDNG 1 -#define BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_LOD_IGDNG 2 -#define BRW_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_LOD_IGDNG 2 -#define BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_LOD_IGDNG 2 -#define BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_COMPARE_IGDNG 3 -#define BRW_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_COMPARE_IGDNG 3 -#define BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_COMPARE_IGDNG 3 - -/* for IGDNG only */ +#define BRW_SAMPLER_MESSAGE_SAMPLE_GEN5 0 +#define BRW_SAMPLER_MESSAGE_SAMPLE_BIAS_GEN5 1 +#define BRW_SAMPLER_MESSAGE_SAMPLE_LOD_GEN5 2 +#define BRW_SAMPLER_MESSAGE_SAMPLE_COMPARE_GEN5 3 +#define BRW_SAMPLER_MESSAGE_SAMPLE_DERIVS_GEN5 4 +#define BRW_SAMPLER_MESSAGE_SAMPLE_BIAS_COMPARE_GEN5 5 +#define BRW_SAMPLER_MESSAGE_SAMPLE_LOD_COMPARE_GEN5 6 + +/* for GEN5 only */ #define BRW_SAMPLER_SIMD_MODE_SIMD4X2 0 #define BRW_SAMPLER_SIMD_MODE_SIMD8 1 #define BRW_SAMPLER_SIMD_MODE_SIMD16 2 @@ -705,10 +734,24 @@ #define BRW_DATAPORT_DWORD_SCATTERED_BLOCK_8DWORDS 2 #define BRW_DATAPORT_DWORD_SCATTERED_BLOCK_16DWORDS 3 +/* This one stays the same across generations. */ #define BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ 0 +/* GEN4 */ #define BRW_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ 1 -#define BRW_DATAPORT_READ_MESSAGE_DWORD_BLOCK_READ 2 +#define BRW_DATAPORT_READ_MESSAGE_MEDIA_BLOCK_READ 2 #define BRW_DATAPORT_READ_MESSAGE_DWORD_SCATTERED_READ 3 +/* G45, GEN5 */ +#define G45_DATAPORT_READ_MESSAGE_RENDER_UNORM_READ 1 +#define G45_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ 2 +#define G45_DATAPORT_READ_MESSAGE_AVC_LOOP_FILTER_READ 3 +#define G45_DATAPORT_READ_MESSAGE_MEDIA_BLOCK_READ 4 +#define G45_DATAPORT_READ_MESSAGE_DWORD_SCATTERED_READ 6 +/* GEN6 */ +#define GEN6_DATAPORT_READ_MESSAGE_RENDER_UNORM_READ 1 +#define GEN6_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ 2 +#define GEN6_DATAPORT_READ_MESSAGE_MEDIA_BLOCK_READ 4 +#define GEN6_DATAPORT_READ_MESSAGE_OWORD_UNALIGN_BLOCK_READ 5 +#define GEN6_DATAPORT_READ_MESSAGE_DWORD_SCATTERED_READ 6 #define BRW_DATAPORT_READ_TARGET_DATA_CACHE 0 #define BRW_DATAPORT_READ_TARGET_RENDER_CACHE 1 @@ -728,6 +771,16 @@ #define BRW_DATAPORT_WRITE_MESSAGE_STREAMED_VERTEX_BUFFER_WRITE 5 #define BRW_DATAPORT_WRITE_MESSAGE_FLUSH_RENDER_CACHE 7 +/* GEN6 */ +#define BRW_DATAPORT_WRITE_MESSAGE_DWORD_ATOMIC_WRITE_GEN6 7 +#define BRW_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE_GEN6 8 +#define BRW_DATAPORT_WRITE_MESSAGE_OWORD_DUAL_BLOCK_WRITE_GEN6 9 +#define BRW_DATAPORT_WRITE_MESSAGE_MEDIA_BLOCK_WRITE_GEN6 10 +#define BRW_DATAPORT_WRITE_MESSAGE_DWORLD_SCATTERED_WRITE_GEN6 11 +#define BRW_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE_GEN6 12 +#define BRW_DATAPORT_WRITE_MESSAGE_STREAMED_VB_WRITE_GEN6 13 +#define BRW_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_UNORM_WRITE_GEN6 14 + #define BRW_MATH_FUNCTION_INV 1 #define BRW_MATH_FUNCTION_LOG 2 #define BRW_MATH_FUNCTION_EXP 3 @@ -736,7 +789,8 @@ #define BRW_MATH_FUNCTION_SIN 6 /* was 7 */ #define BRW_MATH_FUNCTION_COS 7 /* was 8 */ #define BRW_MATH_FUNCTION_SINCOS 8 /* was 6 */ -#define BRW_MATH_FUNCTION_TAN 9 +#define BRW_MATH_FUNCTION_TAN 9 /* gen4 */ +#define BRW_MATH_FUNCTION_FDIV 9 /* gen6+ */ #define BRW_MATH_FUNCTION_POW 10 #define BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER 11 #define BRW_MATH_FUNCTION_INT_DIV_QUOTIENT 12 @@ -787,17 +841,33 @@ #define CMD_PIPELINED_STATE_POINTERS 0x7800 #define CMD_BINDING_TABLE_PTRS 0x7801 +# define GEN6_BINDING_TABLE_MODIFY_VS (1 << 8) +# define GEN6_BINDING_TABLE_MODIFY_GS (1 << 9) +# define GEN6_BINDING_TABLE_MODIFY_PS (1 << 12) + +#define CMD_3D_SAMPLER_STATE_POINTERS 0x7802 /* SNB+ */ +# define PS_SAMPLER_STATE_CHANGE (1 << 12) +# define GS_SAMPLER_STATE_CHANGE (1 << 9) +# define VS_SAMPLER_STATE_CHANGE (1 << 8) +/* DW1: VS */ +/* DW2: GS */ +/* DW3: PS */ #define CMD_VERTEX_BUFFER 0x7808 # define BRW_VB0_INDEX_SHIFT 27 +# define GEN6_VB0_INDEX_SHIFT 26 # define BRW_VB0_ACCESS_VERTEXDATA (0 << 26) # define BRW_VB0_ACCESS_INSTANCEDATA (1 << 26) +# define GEN6_VB0_ACCESS_VERTEXDATA (0 << 20) +# define GEN6_VB0_ACCESS_INSTANCEDATA (1 << 20) # define BRW_VB0_PITCH_SHIFT 0 #define CMD_VERTEX_ELEMENT 0x7809 # define BRW_VE0_INDEX_SHIFT 27 +# define GEN6_VE0_INDEX_SHIFT 26 # define BRW_VE0_FORMAT_SHIFT 16 # define BRW_VE0_VALID (1 << 26) +# define GEN6_VE0_VALID (1 << 25) # define BRW_VE0_SRC_OFFSET_SHIFT 0 # define BRW_VE1_COMPONENT_NOSTORE 0 # define BRW_VE1_COMPONENT_STORE_SRC 1 @@ -816,6 +886,236 @@ #define CMD_INDEX_BUFFER 0x780a #define CMD_VF_STATISTICS_965 0x780b #define CMD_VF_STATISTICS_GM45 0x680b +#define CMD_3D_CC_STATE_POINTERS 0x780e /* GEN6+ */ + +#define CMD_URB 0x7805 /* GEN6+ */ +# define GEN6_URB_VS_SIZE_SHIFT 16 +# define GEN6_URB_VS_ENTRIES_SHIFT 0 +# define GEN6_URB_GS_ENTRIES_SHIFT 8 +# define GEN6_URB_GS_SIZE_SHIFT 0 + +#define CMD_VIEWPORT_STATE_POINTERS 0x780d /* GEN6+ */ +# define GEN6_CC_VIEWPORT_MODIFY (1 << 12) +# define GEN6_SF_VIEWPORT_MODIFY (1 << 11) +# define GEN6_CLIP_VIEWPORT_MODIFY (1 << 10) + +#define CMD_3D_SCISSOR_STATE_POINTERS 0x780f /* GEN6+ */ + +#define CMD_3D_VS_STATE 0x7810 /* GEN6+ */ +/* DW2 */ +# define GEN6_VS_SPF_MODE (1 << 31) +# define GEN6_VS_VECTOR_MASK_ENABLE (1 << 30) +# define GEN6_VS_SAMPLER_COUNT_SHIFT 27 +# define GEN6_VS_BINDING_TABLE_ENTRY_COUNT_SHIFT 18 +# define GEN6_VS_FLOATING_POINT_MODE_IEEE_754 (0 << 16) +# define GEN6_VS_FLOATING_POINT_MODE_ALT (1 << 16) +/* DW4 */ +# define GEN6_VS_DISPATCH_START_GRF_SHIFT 20 +# define GEN6_VS_URB_READ_LENGTH_SHIFT 11 +# define GEN6_VS_URB_ENTRY_READ_OFFSET_SHIFT 4 +/* DW5 */ +# define GEN6_VS_MAX_THREADS_SHIFT 25 +# define GEN6_VS_STATISTICS_ENABLE (1 << 10) +# define GEN6_VS_CACHE_DISABLE (1 << 1) +# define GEN6_VS_ENABLE (1 << 0) + +#define CMD_3D_GS_STATE 0x7811 /* GEN6+ */ +/* DW2 */ +# define GEN6_GS_SPF_MODE (1 << 31) +# define GEN6_GS_VECTOR_MASK_ENABLE (1 << 30) +# define GEN6_GS_SAMPLER_COUNT_SHIFT 27 +# define GEN6_GS_BINDING_TABLE_ENTRY_COUNT_SHIFT 18 +/* DW4 */ +# define GEN6_GS_URB_READ_LENGTH_SHIFT 11 +# define GEN6_GS_URB_ENTRY_READ_OFFSET_SHIFT 4 +# define GEN6_GS_DISPATCH_START_GRF_SHIFT 0 +/* DW5 */ +# define GEN6_GS_MAX_THREADS_SHIFT 25 +# define GEN6_GS_STATISTICS_ENABLE (1 << 10) +# define GEN6_GS_SO_STATISTICS_ENABLE (1 << 9) +# define GEN6_GS_RENDERING_ENABLE (1 << 8) +/* DW6 */ +# define GEN6_GS_ENABLE (1 << 15) + +#define CMD_3D_CLIP_STATE 0x7812 /* GEN6+ */ +/* DW1 */ +# define GEN6_CLIP_STATISTICS_ENABLE (1 << 10) +/** + * Just does cheap culling based on the clip distance. Bits must be + * disjoint with USER_CLIP_CLIP_DISTANCE bits. + */ +# define GEN6_USER_CLIP_CULL_DISTANCES_SHIFT 0 +/* DW2 */ +# define GEN6_CLIP_ENABLE (1 << 31) +# define GEN6_CLIP_API_OGL (0 << 30) +# define GEN6_CLIP_API_D3D (1 << 30) +# define GEN6_CLIP_XY_TEST (1 << 28) +# define GEN6_CLIP_Z_TEST (1 << 27) +# define GEN6_CLIP_GB_TEST (1 << 26) +/** 8-bit field of which user clip distances to clip aganist. */ +# define GEN6_USER_CLIP_CLIP_DISTANCES_SHIFT 16 +# define GEN6_CLIP_MODE_NORMAL (0 << 13) +# define GEN6_CLIP_MODE_REJECT_ALL (3 << 13) +# define GEN6_CLIP_MODE_ACCEPT_ALL (4 << 13) +# define GEN6_CLIP_PERSPECTIVE_DIVIDE_DISABLE (1 << 9) +# define GEN6_CLIP_BARYCENTRIC_ENABLE (1 << 8) +# define GEN6_CLIP_TRI_PROVOKE_SHIFT 4 +# define GEN6_CLIP_LINE_PROVOKE_SHIFT 2 +# define GEN6_CLIP_TRIFAN_PROVOKE_SHIFT 0 +/* DW3 */ +# define GEN6_CLIP_MIN_POINT_WIDTH_SHIFT 17 +# define GEN6_CLIP_MAX_POINT_WIDTH_SHIFT 6 +# define GEN6_CLIP_FORCE_ZERO_RTAINDEX (1 << 5) + +#define CMD_3D_SF_STATE 0x7813 /* GEN6+ */ +/* DW1 */ +# define GEN6_SF_NUM_OUTPUTS_SHIFT 22 +# define GEN6_SF_SWIZZLE_ENABLE (1 << 21) +# define GEN6_SF_POINT_SPRITE_LOWERLEFT (1 << 20) +# define GEN6_SF_URB_ENTRY_READ_LENGTH_SHIFT 11 +# define GEN6_SF_URB_ENTRY_READ_OFFSET_SHIFT 4 +/* DW2 */ +# define GEN6_SF_LEGACY_GLOBAL_DEPTH_BIAS (1 << 11) +# define GEN6_SF_STATISTICS_ENABLE (1 << 10) +# define GEN6_SF_GLOBAL_DEPTH_OFFSET_SOLID (1 << 9) +# define GEN6_SF_GLOBAL_DEPTH_OFFSET_WIREFRAME (1 << 8) +# define GEN6_SF_GLOBAL_DEPTH_OFFSET_POINT (1 << 7) +# define GEN6_SF_FRONT_SOLID (0 << 5) +# define GEN6_SF_FRONT_WIREFRAME (1 << 5) +# define GEN6_SF_FRONT_POINT (2 << 5) +# define GEN6_SF_BACK_SOLID (0 << 3) +# define GEN6_SF_BACK_WIREFRAME (1 << 3) +# define GEN6_SF_BACK_POINT (2 << 3) +# define GEN6_SF_VIEWPORT_TRANSFORM_ENABLE (1 << 1) +# define GEN6_SF_WINDING_CCW (1 << 0) +/* DW3 */ +# define GEN6_SF_LINE_AA_ENABLE (1 << 31) +# define GEN6_SF_CULL_BOTH (0 << 29) +# define GEN6_SF_CULL_NONE (1 << 29) +# define GEN6_SF_CULL_FRONT (2 << 29) +# define GEN6_SF_CULL_BACK (3 << 29) +# define GEN6_SF_LINE_WIDTH_SHIFT 18 /* U3.7 */ +# define GEN6_SF_LINE_END_CAP_WIDTH_0_5 (0 << 16) +# define GEN6_SF_LINE_END_CAP_WIDTH_1_0 (1 << 16) +# define GEN6_SF_LINE_END_CAP_WIDTH_2_0 (2 << 16) +# define GEN6_SF_LINE_END_CAP_WIDTH_4_0 (3 << 16) +# define GEN6_SF_SCISSOR_ENABLE (1 << 11) +# define GEN6_SF_MSRAST_OFF_PIXEL (0 << 8) +# define GEN6_SF_MSRAST_OFF_PATTERN (1 << 8) +# define GEN6_SF_MSRAST_ON_PIXEL (2 << 8) +# define GEN6_SF_MSRAST_ON_PATTERN (3 << 8) +/* DW4 */ +# define GEN6_SF_TRI_PROVOKE_SHIFT 29 +# define GEN6_SF_LINE_PROVOKE_SHIFT 27 +# define GEN6_SF_TRIFAN_PROVOKE_SHIFT 25 +# define GEN6_SF_LINE_AA_MODE_MANHATTAN (0 << 14) +# define GEN6_SF_LINE_AA_MODE_TRUE (1 << 14) +# define GEN6_SF_VERTEX_SUBPIXEL_8BITS (0 << 12) +# define GEN6_SF_VERTEX_SUBPIXEL_4BITS (1 << 12) +# define GEN6_SF_USE_STATE_POINT_WIDTH (1 << 11) +# define GEN6_SF_POINT_WIDTH_SHIFT 0 /* U8.3 */ +/* DW5: depth offset constant */ +/* DW6: depth offset scale */ +/* DW7: depth offset clamp */ +/* DW8 */ +# define ATTRIBUTE_1_OVERRIDE_W (1 << 31) +# define ATTRIBUTE_1_OVERRIDE_Z (1 << 30) +# define ATTRIBUTE_1_OVERRIDE_Y (1 << 29) +# define ATTRIBUTE_1_OVERRIDE_X (1 << 28) +# define ATTRIBUTE_1_CONST_SOURCE_SHIFT 25 +# define ATTRIBUTE_1_SWIZZLE_SHIFT 22 +# define ATTRIBUTE_1_SOURCE_SHIFT 16 +# define ATTRIBUTE_0_OVERRIDE_W (1 << 15) +# define ATTRIBUTE_0_OVERRIDE_Z (1 << 14) +# define ATTRIBUTE_0_OVERRIDE_Y (1 << 13) +# define ATTRIBUTE_0_OVERRIDE_X (1 << 12) +# define ATTRIBUTE_0_CONST_SOURCE_SHIFT 9 +# define ATTRIBUTE_0_SWIZZLE_SHIFT 6 +# define ATTRIBUTE_0_SOURCE_SHIFT 0 + +# define ATTRIBUTE_SWIZZLE_INPUTATTR 0 +# define ATTRIBUTE_SWIZZLE_INPUTATTR_FACING 1 +# define ATTRIBUTE_SWIZZLE_INPUTATTR_W 2 +# define ATTRIBUTE_SWIZZLE_INPUTATTR_FACING_W 3 +# define ATTRIBUTE_SWIZZLE_SHIFT 6 + +/* DW16: Point sprite texture coordinate enables */ +/* DW17: Constant interpolation enables */ +/* DW18: attr 0-7 wrap shortest enables */ +/* DW19: attr 8-16 wrap shortest enables */ + +#define CMD_3D_WM_STATE 0x7814 /* GEN6+ */ +/* DW1: kernel pointer */ +/* DW2 */ +# define GEN6_WM_SPF_MODE (1 << 31) +# define GEN6_WM_VECTOR_MASK_ENABLE (1 << 30) +# define GEN6_WM_SAMPLER_COUNT_SHIFT 27 +# define GEN6_WM_BINDING_TABLE_ENTRY_COUNT_SHIFT 18 +# define GEN6_WM_FLOATING_POINT_MODE_IEEE_754 (0 << 16) +# define GEN6_WM_FLOATING_POINT_MODE_ALT (1 << 16) +/* DW3: scratch space */ +/* DW4 */ +# define GEN6_WM_STATISTICS_ENABLE (1 << 31) +# define GEN6_WM_DEPTH_CLEAR (1 << 30) +# define GEN6_WM_DEPTH_RESOLVE (1 << 28) +# define GEN6_WM_HIERARCHICAL_DEPTH_RESOLVE (1 << 27) +# define GEN6_WM_DISPATCH_START_GRF_SHIFT_0 16 +# define GEN6_WM_DISPATCH_START_GRF_SHIFT_1 8 +# define GEN6_WM_DISPATCH_START_GRF_SHIFT_2 0 +/* DW5 */ +# define GEN6_WM_MAX_THREADS_SHIFT 25 +# define GEN6_WM_KILL_ENABLE (1 << 22) +# define GEN6_WM_COMPUTED_DEPTH (1 << 21) +# define GEN6_WM_USES_SOURCE_DEPTH (1 << 20) +# define GEN6_WM_DISPATCH_ENABLE (1 << 19) +# define GEN6_WM_LINE_END_CAP_AA_WIDTH_0_5 (0 << 16) +# define GEN6_WM_LINE_END_CAP_AA_WIDTH_1_0 (1 << 16) +# define GEN6_WM_LINE_END_CAP_AA_WIDTH_2_0 (2 << 16) +# define GEN6_WM_LINE_END_CAP_AA_WIDTH_4_0 (3 << 16) +# define GEN6_WM_LINE_AA_WIDTH_0_5 (0 << 14) +# define GEN6_WM_LINE_AA_WIDTH_1_0 (1 << 14) +# define GEN6_WM_LINE_AA_WIDTH_2_0 (2 << 14) +# define GEN6_WM_LINE_AA_WIDTH_4_0 (3 << 14) +# define GEN6_WM_POLYGON_STIPPLE_ENABLE (1 << 13) +# define GEN6_WM_LINE_STIPPLE_ENABLE (1 << 11) +# define GEN6_WM_OMASK_TO_RENDER_TARGET (1 << 9) +# define GEN6_WM_USES_SOURCE_W (1 << 8) +# define GEN6_WM_DUAL_SOURCE_BLEND_ENABLE (1 << 7) +# define GEN6_WM_32_DISPATCH_ENABLE (1 << 2) +# define GEN6_WM_16_DISPATCH_ENABLE (1 << 1) +# define GEN6_WM_8_DISPATCH_ENABLE (1 << 0) +/* DW6 */ +# define GEN6_WM_NUM_SF_OUTPUTS_SHIFT 20 +# define GEN6_WM_POSOFFSET_NONE (0 << 18) +# define GEN6_WM_POSOFFSET_CENTROID (2 << 18) +# define GEN6_WM_POSOFFSET_SAMPLE (3 << 18) +# define GEN6_WM_POSITION_ZW_PIXEL (0 << 16) +# define GEN6_WM_POSITION_ZW_CENTROID (2 << 16) +# define GEN6_WM_POSITION_ZW_SAMPLE (3 << 16) +# define GEN6_WM_NONPERSPECTIVE_SAMPLE_BARYCENTRIC (1 << 15) +# define GEN6_WM_NONPERSPECTIVE_CENTROID_BARYCENTRIC (1 << 14) +# define GEN6_WM_NONPERSPECTIVE_PIXEL_BARYCENTRIC (1 << 13) +# define GEN6_WM_PERSPECTIVE_SAMPLE_BARYCENTRIC (1 << 12) +# define GEN6_WM_PERSPECTIVE_CENTROID_BARYCENTRIC (1 << 11) +# define GEN6_WM_PERSPECTIVE_PIXEL_BARYCENTRIC (1 << 10) +# define GEN6_WM_POINT_RASTRULE_UPPER_RIGHT (1 << 9) +# define GEN6_WM_MSRAST_OFF_PIXEL (0 << 1) +# define GEN6_WM_MSRAST_OFF_PATTERN (1 << 1) +# define GEN6_WM_MSRAST_ON_PIXEL (2 << 1) +# define GEN6_WM_MSRAST_ON_PATTERN (3 << 1) +# define GEN6_WM_MSDISPMODE_PERPIXEL (1 << 0) +/* DW7: kernel 1 pointer */ +/* DW8: kernel 2 pointer */ + +#define CMD_3D_CONSTANT_VS_STATE 0x7815 /* GEN6+ */ +#define CMD_3D_CONSTANT_GS_STATE 0x7816 /* GEN6+ */ +#define CMD_3D_CONSTANT_PS_STATE 0x7817 /* GEN6+ */ +# define GEN6_CONSTANT_BUFFER_3_ENABLE (1 << 15) +# define GEN6_CONSTANT_BUFFER_2_ENABLE (1 << 14) +# define GEN6_CONSTANT_BUFFER_1_ENABLE (1 << 13) +# define GEN6_CONSTANT_BUFFER_0_ENABLE (1 << 12) + +#define CMD_3D_SAMPLE_MASK 0x7818 /* GEN6+ */ #define CMD_DRAW_RECT 0x7900 #define CMD_BLEND_CONSTANT_COLOR 0x7901 @@ -827,6 +1127,25 @@ #define CMD_GLOBAL_DEPTH_OFFSET_CLAMP 0x7909 #define CMD_AA_LINE_PARAMETERS 0x790a +#define CMD_GS_SVB_INDEX 0x790b /* CTG+ */ +/* DW1 */ +# define SVB_INDEX_SHIFT 29 +# define SVB_LOAD_INTERNAL_VERTEX_COUNT (1 << 0) /* SNB+ */ +/* DW2: SVB index */ +/* DW3: SVB maximum index */ + +#define CMD_3D_MULTISAMPLE 0x790d /* SNB+ */ +/* DW1 */ +# define MS_PIXEL_LOCATION_CENTER (0 << 4) +# define MS_PIXEL_LOCATION_UPPER_LEFT (1 << 4) +# define MS_NUMSAMPLES_1 (0 << 1) +# define MS_NUMSAMPLES_4 (2 << 1) +# define MS_NUMSAMPLES_8 (3 << 1) + +#define CMD_3D_CLEAR_PARAMS 0x7910 /* ILK+ */ +# define DEPTH_CLEAR_VALID (1 << 15) +/* DW1: depth clear value */ + #define CMD_PIPE_CONTROL 0x7a00 #define CMD_3D_PRIM 0x7b00 @@ -839,8 +1158,8 @@ #define R02_PRIM_END 0x1 #define R02_PRIM_START 0x2 -#define URB_SIZES(brw) (BRW_IS_IGDNG(brw) ? 1024 : \ - (BRW_IS_G4X(brw) ? 384 : 256)) /* 512 bit units */ +#define URB_SIZES(brw) (brw->gen == 5 ? 1024 : \ + (brw->is_g4x ? 384 : 256)) /* 512 bit units */ diff --git a/src/gallium/drivers/i965/brw_disasm.c b/src/gallium/drivers/i965/brw_disasm.c index 28c83515ba9..b093569f0cf 100644 --- a/src/gallium/drivers/i965/brw_disasm.c +++ b/src/gallium/drivers/i965/brw_disasm.c @@ -49,12 +49,14 @@ struct { [BRW_OPCODE_MAC] = { .name = "mac", .nsrc = 2, .ndst = 1 }, [BRW_OPCODE_MACH] = { .name = "mach", .nsrc = 2, .ndst = 1 }, [BRW_OPCODE_LINE] = { .name = "line", .nsrc = 2, .ndst = 1 }, + [BRW_OPCODE_PLN] = { .name = "pln", .nsrc = 2, .ndst = 1 }, [BRW_OPCODE_SAD2] = { .name = "sad2", .nsrc = 2, .ndst = 1 }, [BRW_OPCODE_SADA2] = { .name = "sada2", .nsrc = 2, .ndst = 1 }, [BRW_OPCODE_DP4] = { .name = "dp4", .nsrc = 2, .ndst = 1 }, [BRW_OPCODE_DPH] = { .name = "dph", .nsrc = 2, .ndst = 1 }, [BRW_OPCODE_DP3] = { .name = "dp3", .nsrc = 2, .ndst = 1 }, [BRW_OPCODE_DP2] = { .name = "dp2", .nsrc = 2, .ndst = 1 }, + [BRW_OPCODE_MATH] = { .name = "math", .nsrc = 2, .ndst = 1 }, [BRW_OPCODE_AVG] = { .name = "avg", .nsrc = 2, .ndst = 1 }, [BRW_OPCODE_ADD] = { .name = "add", .nsrc = 2, .ndst = 1 }, @@ -69,13 +71,14 @@ struct { [BRW_OPCODE_CMPN] = { .name = "cmpn", .nsrc = 2, .ndst = 1 }, [BRW_OPCODE_SEND] = { .name = "send", .nsrc = 1, .ndst = 1 }, + [BRW_OPCODE_SENDC] = { .name = "sendc", .nsrc = 1, .ndst = 1 }, [BRW_OPCODE_NOP] = { .name = "nop", .nsrc = 0, .ndst = 0 }, [BRW_OPCODE_JMPI] = { .name = "jmpi", .nsrc = 1, .ndst = 0 }, [BRW_OPCODE_IF] = { .name = "if", .nsrc = 2, .ndst = 0 }, - [BRW_OPCODE_IFF] = { .name = "iff", .nsrc = 1, .ndst = 01 }, - [BRW_OPCODE_WHILE] = { .name = "while", .nsrc = 1, .ndst = 0 }, + [BRW_OPCODE_IFF] = { .name = "iff", .nsrc = 2, .ndst = 1 }, + [BRW_OPCODE_WHILE] = { .name = "while", .nsrc = 2, .ndst = 0 }, [BRW_OPCODE_ELSE] = { .name = "else", .nsrc = 2, .ndst = 0 }, - [BRW_OPCODE_BREAK] = { .name = "break", .nsrc = 1, .ndst = 0 }, + [BRW_OPCODE_BREAK] = { .name = "break", .nsrc = 2, .ndst = 0 }, [BRW_OPCODE_CONTINUE] = { .name = "cont", .nsrc = 1, .ndst = 0 }, [BRW_OPCODE_HALT] = { .name = "halt", .nsrc = 1, .ndst = 0 }, [BRW_OPCODE_MSAVE] = { .name = "msave", .nsrc = 1, .ndst = 1 }, @@ -144,7 +147,6 @@ char *chan_sel[4] = { }; char *dest_condmod[16] = { - [0] = NULL }; char *debug_ctrl[2] = { @@ -157,6 +159,16 @@ char *saturate[2] = { [1] = ".sat" }; +char *accwr[2] = { + [0] = "", + [1] = "AccWrEnable" +}; + +char *wectrl[2] = { + [0] = "WE_normal", + [1] = "WE_all" +}; + char *exec_size[8] = { [0] = "1", [1] = "2", @@ -204,6 +216,7 @@ char *compr_ctrl[4] = { [0] = "", [1] = "sechalf", [2] = "compr", + [3] = "compr4", }; char *dep_ctrl[4] = { @@ -233,6 +246,16 @@ char *reg_encoding[8] = { [7] = "F" }; +int reg_type_size[8] = { + [0] = 4, + [1] = 4, + [2] = 2, + [3] = 2, + [4] = 1, + [5] = 1, + [7] = 4 +}; + char *imm_encoding[8] = { [0] = "UD", [1] = "D", @@ -321,6 +344,11 @@ char *math_precision[2] = { [1] = "partial_precision" }; +char *urb_opcode[2] = { + [0] = "urb_write", + [1] = "ff_sync", +}; + char *urb_swizzle[4] = { [BRW_URB_SWIZZLE_NONE] = "", [BRW_URB_SWIZZLE_INTERLEAVE] = "interleave", @@ -416,6 +444,11 @@ static int print_opcode (FILE *file, int id) static int reg (FILE *file, GLuint _reg_file, GLuint _reg_nr) { int err = 0; + + /* Clear the Compr4 instruction compression bit. */ + if (_reg_file == BRW_MESSAGE_REGISTER_FILE) + _reg_nr &= ~(1 << 7); + if (_reg_file == BRW_ARCHITECTURE_REGISTER_FILE) { switch (_reg_nr & 0xf0) { case BRW_ARF_NULL: @@ -427,6 +460,9 @@ static int reg (FILE *file, GLuint _reg_file, GLuint _reg_nr) case BRW_ARF_ACCUMULATOR: format (file, "acc%d", _reg_nr & 0x0f); break; + case BRW_ARF_FLAG: + format (file, "f%d", _reg_nr & 0x0f); + break; case BRW_ARF_MASK: format (file, "mask%d", _reg_nr & 0x0f); break; @@ -457,7 +493,7 @@ static int reg (FILE *file, GLuint _reg_file, GLuint _reg_nr) return err; } -static int dest (FILE *file, const struct brw_instruction *inst) +static int dest (FILE *file, struct brw_instruction *inst) { int err = 0; @@ -469,7 +505,8 @@ static int dest (FILE *file, const struct brw_instruction *inst) if (err == -1) return 0; if (inst->bits1.da1.dest_subreg_nr) - format (file, ".%d", inst->bits1.da1.dest_subreg_nr); + format (file, ".%d", inst->bits1.da1.dest_subreg_nr / + reg_type_size[inst->bits1.da1.dest_reg_type]); format (file, "<%d>", inst->bits1.da1.dest_horiz_stride); err |= control (file, "dest reg encoding", reg_encoding, inst->bits1.da1.dest_reg_type, NULL); } @@ -477,7 +514,8 @@ static int dest (FILE *file, const struct brw_instruction *inst) { string (file, "g[a0"); if (inst->bits1.ia1.dest_subreg_nr) - format (file, ".%d", inst->bits1.ia1.dest_subreg_nr); + format (file, ".%d", inst->bits1.ia1.dest_subreg_nr / + reg_type_size[inst->bits1.ia1.dest_reg_type]); if (inst->bits1.ia1.dest_indirect_offset) format (file, " %d", inst->bits1.ia1.dest_indirect_offset); string (file, "]"); @@ -493,7 +531,8 @@ static int dest (FILE *file, const struct brw_instruction *inst) if (err == -1) return 0; if (inst->bits1.da16.dest_subreg_nr) - format (file, ".%d", inst->bits1.da16.dest_subreg_nr); + format (file, ".%d", inst->bits1.da16.dest_subreg_nr / + reg_type_size[inst->bits1.da16.dest_reg_type]); string (file, "<1>"); err |= control (file, "writemask", writemask, inst->bits1.da16.dest_writemask, NULL); err |= control (file, "dest reg encoding", reg_encoding, inst->bits1.da16.dest_reg_type, NULL); @@ -534,7 +573,7 @@ static int src_da1 (FILE *file, GLuint type, GLuint _reg_file, if (err == -1) return 0; if (sub_reg_num) - format (file, ".%d", sub_reg_num); + format (file, ".%d", sub_reg_num / reg_type_size[type]); /* use formal style like spec */ src_align1_region (file, _vert_stride, _width, _horiz_stride); err |= control (file, "src reg encoding", reg_encoding, type, NULL); return err; @@ -588,11 +627,12 @@ static int src_da16 (FILE *file, if (err == -1) return 0; if (_subreg_nr) - format (file, ".%d", _subreg_nr); + /* bit4 for subreg number byte addressing. Make this same meaning as + in da1 case, so output looks consistent. */ + format (file, ".%d", 16 / reg_type_size[_reg_type]); string (file, "<"); err |= control (file, "vert stride", vert_stride, _vert_stride, NULL); - string (file, ",1,1>"); - err |= control (file, "src da16 reg type", reg_encoding, _reg_type, NULL); + string (file, ",4,1>"); /* * Three kinds of swizzle display: * identity - nothing printed @@ -619,11 +659,12 @@ static int src_da16 (FILE *file, err |= control (file, "channel select", chan_sel, swz_z, NULL); err |= control (file, "channel select", chan_sel, swz_w, NULL); } + err |= control (file, "src da16 reg type", reg_encoding, _reg_type, NULL); return err; } -static int imm (FILE *file, GLuint type, const struct brw_instruction *inst) { +static int imm (FILE *file, GLuint type, struct brw_instruction *inst) { switch (type) { case BRW_REGISTER_TYPE_UD: format (file, "0x%08xUD", inst->bits3.ud); @@ -652,7 +693,7 @@ static int imm (FILE *file, GLuint type, const struct brw_instruction *inst) { return 0; } -static int src0 (FILE *file, const struct brw_instruction *inst) +static int src0 (FILE *file, struct brw_instruction *inst) { if (inst->bits1.da1.src0_reg_file == BRW_IMMEDIATE_VALUE) return imm (file, inst->bits1.da1.src0_reg_type, @@ -712,7 +753,7 @@ static int src0 (FILE *file, const struct brw_instruction *inst) } } -static int src1 (FILE *file, const struct brw_instruction *inst) +static int src1 (FILE *file, struct brw_instruction *inst) { if (inst->bits1.da1.src1_reg_file == BRW_IMMEDIATE_VALUE) return imm (file, inst->bits1.da1.src1_reg_type, @@ -772,7 +813,7 @@ static int src1 (FILE *file, const struct brw_instruction *inst) } } -int brw_disasm_insn (FILE *file, const struct brw_instruction *inst) +int brw_disasm_insn (FILE *file, struct brw_instruction *inst, int gen) { int err = 0; int space = 0; @@ -822,7 +863,8 @@ int brw_disasm_insn (FILE *file, const struct brw_instruction *inst) err |= src1 (file, inst); } - if (inst->header.opcode == BRW_OPCODE_SEND) { + if (inst->header.opcode == BRW_OPCODE_SEND || + inst->header.opcode == BRW_OPCODE_SENDC) { newline (file); pad (file, 16); space = 0; @@ -842,24 +884,70 @@ int brw_disasm_insn (FILE *file, const struct brw_instruction *inst) inst->bits3.math.precision, &space); break; case BRW_MESSAGE_TARGET_SAMPLER: - format (file, " (%d, %d, ", - inst->bits3.sampler.binding_table_index, - inst->bits3.sampler.sampler); - err |= control (file, "sampler target format", sampler_target_format, - inst->bits3.sampler.return_format, NULL); - string (file, ")"); + if (gen >= 5) { + format (file, " (%d, %d, %d, %d)", + inst->bits3.sampler_gen5.binding_table_index, + inst->bits3.sampler_gen5.sampler, + inst->bits3.sampler_gen5.msg_type, + inst->bits3.sampler_gen5.simd_mode); + } else if (0 /* FINISHME: is_g4x */) { + format (file, " (%d, %d)", + inst->bits3.sampler_g4x.binding_table_index, + inst->bits3.sampler_g4x.sampler); + } else { + format (file, " (%d, %d, ", + inst->bits3.sampler.binding_table_index, + inst->bits3.sampler.sampler); + err |= control (file, "sampler target format", sampler_target_format, + inst->bits3.sampler.return_format, NULL); + string (file, ")"); + } + break; + case BRW_MESSAGE_TARGET_DATAPORT_READ: + if (gen >= 6) { + format (file, " (%d, %d, %d, %d, %d, %d)", + inst->bits3.dp_render_cache.binding_table_index, + inst->bits3.dp_render_cache.msg_control, + inst->bits3.dp_render_cache.msg_type, + inst->bits3.dp_render_cache.send_commit_msg, + inst->bits3.dp_render_cache.msg_length, + inst->bits3.dp_render_cache.response_length); + } else if (gen >= 5 /* FINISHME: || is_g4x */) { + format (file, " (%d, %d, %d)", + inst->bits3.dp_read_gen5.binding_table_index, + inst->bits3.dp_read_gen5.msg_control, + inst->bits3.dp_read_gen5.msg_type); + } else { + format (file, " (%d, %d, %d)", + inst->bits3.dp_read.binding_table_index, + inst->bits3.dp_read.msg_control, + inst->bits3.dp_read.msg_type); + } break; case BRW_MESSAGE_TARGET_DATAPORT_WRITE: - format (file, " (%d, %d, %d, %d)", - inst->bits3.dp_write.binding_table_index, - (inst->bits3.dp_write.pixel_scoreboard_clear << 3) | - inst->bits3.dp_write.msg_control, - inst->bits3.dp_write.msg_type, - inst->bits3.dp_write.send_commit_msg); + if (gen >= 6) { + format (file, " (%d, %d, %d, %d, %d, %d)", + inst->bits3.dp_render_cache.binding_table_index, + inst->bits3.dp_render_cache.msg_control, + inst->bits3.dp_render_cache.msg_type, + inst->bits3.dp_render_cache.send_commit_msg, + inst->bits3.dp_render_cache.msg_length, + inst->bits3.dp_render_cache.response_length); + } else { + format (file, " (%d, %d, %d, %d)", + inst->bits3.dp_write.binding_table_index, + (inst->bits3.dp_write.pixel_scoreboard_clear << 3) | + inst->bits3.dp_write.msg_control, + inst->bits3.dp_write.msg_type, + inst->bits3.dp_write.send_commit_msg); + } break; case BRW_MESSAGE_TARGET_URB: - format (file, " %d", inst->bits3.urb.offset); - space = 1; + if (gen >= 5) { + format (file, " %d", inst->bits3.urb_gen5.offset); + } else { + format (file, " %d", inst->bits3.urb.offset); + } err |= control (file, "urb swizzle", urb_swizzle, inst->bits3.urb.swizzle_control, &space); err |= control (file, "urb allocate", urb_allocate, @@ -868,6 +956,11 @@ int brw_disasm_insn (FILE *file, const struct brw_instruction *inst) inst->bits3.urb.used, &space); err |= control (file, "urb complete", urb_complete, inst->bits3.urb.complete, &space); + if (gen >= 5) { + format (file, " mlen %d, rlen %d\n", + inst->bits3.urb_gen5.msg_length, + inst->bits3.urb_gen5.response_length); + } break; case BRW_MESSAGE_TARGET_THREAD_SPAWNER: break; @@ -877,10 +970,17 @@ int brw_disasm_insn (FILE *file, const struct brw_instruction *inst) } if (space) string (file, " "); - format (file, "mlen %d", - inst->bits3.generic.msg_length); - format (file, " rlen %d", - inst->bits3.generic.response_length); + if (gen >= 5) { + format (file, "mlen %d", + inst->bits3.generic_gen5.msg_length); + format (file, " rlen %d", + inst->bits3.generic_gen5.response_length); + } else { + format (file, "mlen %d", + inst->bits3.generic.msg_length); + format (file, " rlen %d", + inst->bits3.generic.response_length); + } } pad (file, 64); if (inst->header.opcode != BRW_OPCODE_NOP) { @@ -891,7 +991,8 @@ int brw_disasm_insn (FILE *file, const struct brw_instruction *inst) err |= control (file, "dependency control", dep_ctrl, inst->header.dependency_control, &space); err |= control (file, "compression control", compr_ctrl, inst->header.compression_control, &space); err |= control (file, "thread control", thread_ctrl, inst->header.thread_control, &space); - if (inst->header.opcode == BRW_OPCODE_SEND) + if (inst->header.opcode == BRW_OPCODE_SEND || + inst->header.opcode == BRW_OPCODE_SENDC) err |= control (file, "end of thread", end_of_thread, inst->bits3.generic.end_of_thread, &space); if (space) @@ -905,13 +1006,13 @@ int brw_disasm_insn (FILE *file, const struct brw_instruction *inst) int brw_disasm (FILE *file, - const struct brw_instruction *inst, - unsigned count) + struct brw_instruction *inst, + unsigned count, int gen) { int i, err; for (i = 0; i < count; i++) { - err = brw_disasm_insn(stderr, &inst[i]); + err = brw_disasm_insn(stderr, &inst[i], gen); if (err) return err; } diff --git a/src/gallium/drivers/i965/brw_disasm.h b/src/gallium/drivers/i965/brw_disasm.h index ba5b109c483..ce451ed5a06 100644 --- a/src/gallium/drivers/i965/brw_disasm.h +++ b/src/gallium/drivers/i965/brw_disasm.h @@ -27,10 +27,10 @@ struct brw_instruction; -int brw_disasm_insn (FILE *file, const struct brw_instruction *inst); +int brw_disasm_insn (FILE *file, struct brw_instruction *inst, int gen); int brw_disasm (FILE *file, - const struct brw_instruction *inst, - unsigned count); + struct brw_instruction *inst, + unsigned count, int gen); #endif diff --git a/src/gallium/drivers/i965/brw_draw_upload.c b/src/gallium/drivers/i965/brw_draw_upload.c index ebeb1e146aa..04ec5c81a6b 100644 --- a/src/gallium/drivers/i965/brw_draw_upload.c +++ b/src/gallium/drivers/i965/brw_draw_upload.c @@ -89,13 +89,16 @@ static int brw_prepare_vertices(struct brw_context *brw) vb->buffer->width0 - vb->buffer_offset : MAX2(vb->buffer->width0 - vb->buffer_offset, vb->stride * (max_index + 1 - min_index))); + boolean flushed; - ret = u_upload_buffer( brw->vb.upload_vertex, + ret = u_upload_buffer( brw->vb.upload_vertex, + 0, vb->buffer_offset + min_index * vb->stride, size, vb->buffer, &offset, - &upload_buf ); + &upload_buf, + &flushed ); if (ret) return ret; @@ -167,7 +170,7 @@ static int brw_emit_vertex_buffers( struct brw_context *brw ) OUT_RELOC(brw->vb.vb[i].bo, BRW_USAGE_VERTEX, brw->vb.vb[i].offset); - if (BRW_IS_IGDNG(brw)) { + if (brw->gen == 5) { OUT_RELOC(brw->vb.vb[i].bo, BRW_USAGE_VERTEX, brw->vb.vb[i].bo->size - 1); @@ -251,13 +254,16 @@ static int brw_prepare_indices(struct brw_context *brw) /* Turn userbuffer into a proper hardware buffer? */ if (brw_buffer_is_user_buffer(index_buffer)) { + boolean flushed; ret = u_upload_buffer( brw->vb.upload_index, + 0, index_offset, ib_size, index_buffer, &offset, - &upload_buf ); + &upload_buf, + &flushed ); if (ret) return ret; diff --git a/src/gallium/drivers/i965/brw_eu_emit.c b/src/gallium/drivers/i965/brw_eu_emit.c index 00d8eaccbc4..ba1159e4c32 100644 --- a/src/gallium/drivers/i965/brw_eu_emit.c +++ b/src/gallium/drivers/i965/brw_eu_emit.c @@ -255,19 +255,19 @@ static void brw_set_math_message( struct brw_context *brw, { brw_set_src1(insn, brw_imm_d(0)); - if (BRW_IS_IGDNG(brw)) { - insn->bits3.math_igdng.function = function; - insn->bits3.math_igdng.int_type = integer_type; - insn->bits3.math_igdng.precision = low_precision; - insn->bits3.math_igdng.saturate = saturate; - insn->bits3.math_igdng.data_type = dataType; - insn->bits3.math_igdng.snapshot = 0; - insn->bits3.math_igdng.header_present = 0; - insn->bits3.math_igdng.response_length = response_length; - insn->bits3.math_igdng.msg_length = msg_length; - insn->bits3.math_igdng.end_of_thread = 0; - insn->bits2.send_igdng.sfid = BRW_MESSAGE_TARGET_MATH; - insn->bits2.send_igdng.end_of_thread = 0; + if (brw->gen == 5) { + insn->bits3.math_gen5.function = function; + insn->bits3.math_gen5.int_type = integer_type; + insn->bits3.math_gen5.precision = low_precision; + insn->bits3.math_gen5.saturate = saturate; + insn->bits3.math_gen5.data_type = dataType; + insn->bits3.math_gen5.snapshot = 0; + insn->bits3.math_gen5.header_present = 0; + insn->bits3.math_gen5.response_length = response_length; + insn->bits3.math_gen5.msg_length = msg_length; + insn->bits3.math_gen5.end_of_thread = 0; + insn->bits2.send_gen5.sfid = BRW_MESSAGE_TARGET_MATH; + insn->bits2.send_gen5.end_of_thread = 0; } else { insn->bits3.math.function = function; insn->bits3.math.int_type = integer_type; @@ -295,18 +295,18 @@ static void brw_set_ff_sync_message( struct brw_context *brw, { brw_set_src1(insn, brw_imm_d(0)); - insn->bits3.urb_igdng.opcode = 1; - insn->bits3.urb_igdng.offset = offset; - insn->bits3.urb_igdng.swizzle_control = swizzle_control; - insn->bits3.urb_igdng.allocate = allocate; - insn->bits3.urb_igdng.used = used; - insn->bits3.urb_igdng.complete = complete; - insn->bits3.urb_igdng.header_present = 1; - insn->bits3.urb_igdng.response_length = response_length; - insn->bits3.urb_igdng.msg_length = msg_length; - insn->bits3.urb_igdng.end_of_thread = end_of_thread; - insn->bits2.send_igdng.sfid = BRW_MESSAGE_TARGET_URB; - insn->bits2.send_igdng.end_of_thread = end_of_thread; + insn->bits3.urb_gen5.opcode = 1; + insn->bits3.urb_gen5.offset = offset; + insn->bits3.urb_gen5.swizzle_control = swizzle_control; + insn->bits3.urb_gen5.allocate = allocate; + insn->bits3.urb_gen5.used = used; + insn->bits3.urb_gen5.complete = complete; + insn->bits3.urb_gen5.header_present = 1; + insn->bits3.urb_gen5.response_length = response_length; + insn->bits3.urb_gen5.msg_length = msg_length; + insn->bits3.urb_gen5.end_of_thread = end_of_thread; + insn->bits2.send_gen5.sfid = BRW_MESSAGE_TARGET_URB; + insn->bits2.send_gen5.end_of_thread = end_of_thread; } static void brw_set_urb_message( struct brw_context *brw, @@ -322,19 +322,19 @@ static void brw_set_urb_message( struct brw_context *brw, { brw_set_src1(insn, brw_imm_d(0)); - if (BRW_IS_IGDNG(brw)) { - insn->bits3.urb_igdng.opcode = 0; /* ? */ - insn->bits3.urb_igdng.offset = offset; - insn->bits3.urb_igdng.swizzle_control = swizzle_control; - insn->bits3.urb_igdng.allocate = allocate; - insn->bits3.urb_igdng.used = used; /* ? */ - insn->bits3.urb_igdng.complete = complete; - insn->bits3.urb_igdng.header_present = 1; - insn->bits3.urb_igdng.response_length = response_length; - insn->bits3.urb_igdng.msg_length = msg_length; - insn->bits3.urb_igdng.end_of_thread = end_of_thread; - insn->bits2.send_igdng.sfid = BRW_MESSAGE_TARGET_URB; - insn->bits2.send_igdng.end_of_thread = end_of_thread; + if (brw->gen == 5) { + insn->bits3.urb_gen5.opcode = 0; /* ? */ + insn->bits3.urb_gen5.offset = offset; + insn->bits3.urb_gen5.swizzle_control = swizzle_control; + insn->bits3.urb_gen5.allocate = allocate; + insn->bits3.urb_gen5.used = used; /* ? */ + insn->bits3.urb_gen5.complete = complete; + insn->bits3.urb_gen5.header_present = 1; + insn->bits3.urb_gen5.response_length = response_length; + insn->bits3.urb_gen5.msg_length = msg_length; + insn->bits3.urb_gen5.end_of_thread = end_of_thread; + insn->bits2.send_gen5.sfid = BRW_MESSAGE_TARGET_URB; + insn->bits2.send_gen5.end_of_thread = end_of_thread; } else { insn->bits3.urb.opcode = 0; /* ? */ insn->bits3.urb.offset = offset; @@ -361,18 +361,18 @@ static void brw_set_dp_write_message( struct brw_context *brw, { brw_set_src1(insn, brw_imm_d(0)); - if (BRW_IS_IGDNG(brw)) { - insn->bits3.dp_write_igdng.binding_table_index = binding_table_index; - insn->bits3.dp_write_igdng.msg_control = msg_control; - insn->bits3.dp_write_igdng.pixel_scoreboard_clear = pixel_scoreboard_clear; - insn->bits3.dp_write_igdng.msg_type = msg_type; - insn->bits3.dp_write_igdng.send_commit_msg = 0; - insn->bits3.dp_write_igdng.header_present = 1; - insn->bits3.dp_write_igdng.response_length = response_length; - insn->bits3.dp_write_igdng.msg_length = msg_length; - insn->bits3.dp_write_igdng.end_of_thread = end_of_thread; - insn->bits2.send_igdng.sfid = BRW_MESSAGE_TARGET_DATAPORT_WRITE; - insn->bits2.send_igdng.end_of_thread = end_of_thread; + if (brw->gen == 5) { + insn->bits3.dp_write_gen5.binding_table_index = binding_table_index; + insn->bits3.dp_write_gen5.msg_control = msg_control; + insn->bits3.dp_write_gen5.pixel_scoreboard_clear = pixel_scoreboard_clear; + insn->bits3.dp_write_gen5.msg_type = msg_type; + insn->bits3.dp_write_gen5.send_commit_msg = 0; + insn->bits3.dp_write_gen5.header_present = 1; + insn->bits3.dp_write_gen5.response_length = response_length; + insn->bits3.dp_write_gen5.msg_length = msg_length; + insn->bits3.dp_write_gen5.end_of_thread = end_of_thread; + insn->bits2.send_gen5.sfid = BRW_MESSAGE_TARGET_DATAPORT_WRITE; + insn->bits2.send_gen5.end_of_thread = end_of_thread; } else { insn->bits3.dp_write.binding_table_index = binding_table_index; insn->bits3.dp_write.msg_control = msg_control; @@ -398,18 +398,18 @@ static void brw_set_dp_read_message( struct brw_context *brw, { brw_set_src1(insn, brw_imm_d(0)); - if (BRW_IS_IGDNG(brw)) { - insn->bits3.dp_read_igdng.binding_table_index = binding_table_index; - insn->bits3.dp_read_igdng.msg_control = msg_control; - insn->bits3.dp_read_igdng.msg_type = msg_type; - insn->bits3.dp_read_igdng.target_cache = target_cache; - insn->bits3.dp_read_igdng.header_present = 1; - insn->bits3.dp_read_igdng.response_length = response_length; - insn->bits3.dp_read_igdng.msg_length = msg_length; - insn->bits3.dp_read_igdng.pad1 = 0; - insn->bits3.dp_read_igdng.end_of_thread = end_of_thread; - insn->bits2.send_igdng.sfid = BRW_MESSAGE_TARGET_DATAPORT_READ; - insn->bits2.send_igdng.end_of_thread = end_of_thread; + if (brw->gen == 5) { + insn->bits3.dp_read_gen5.binding_table_index = binding_table_index; + insn->bits3.dp_read_gen5.msg_control = msg_control; + insn->bits3.dp_read_gen5.msg_type = msg_type; + insn->bits3.dp_read_gen5.target_cache = target_cache; + insn->bits3.dp_read_gen5.header_present = 1; + insn->bits3.dp_read_gen5.response_length = response_length; + insn->bits3.dp_read_gen5.msg_length = msg_length; + insn->bits3.dp_read_gen5.pad1 = 0; + insn->bits3.dp_read_gen5.end_of_thread = end_of_thread; + insn->bits2.send_gen5.sfid = BRW_MESSAGE_TARGET_DATAPORT_READ; + insn->bits2.send_gen5.end_of_thread = end_of_thread; } else { insn->bits3.dp_read.binding_table_index = binding_table_index; /*0:7*/ insn->bits3.dp_read.msg_control = msg_control; /*8:11*/ @@ -437,18 +437,18 @@ static void brw_set_sampler_message(struct brw_context *brw, assert(eot == 0); brw_set_src1(insn, brw_imm_d(0)); - if (BRW_IS_IGDNG(brw)) { - insn->bits3.sampler_igdng.binding_table_index = binding_table_index; - insn->bits3.sampler_igdng.sampler = sampler; - insn->bits3.sampler_igdng.msg_type = msg_type; - insn->bits3.sampler_igdng.simd_mode = simd_mode; - insn->bits3.sampler_igdng.header_present = header_present; - insn->bits3.sampler_igdng.response_length = response_length; - insn->bits3.sampler_igdng.msg_length = msg_length; - insn->bits3.sampler_igdng.end_of_thread = eot; - insn->bits2.send_igdng.sfid = BRW_MESSAGE_TARGET_SAMPLER; - insn->bits2.send_igdng.end_of_thread = eot; - } else if (BRW_IS_G4X(brw)) { + if (brw->gen == 5) { + insn->bits3.sampler_gen5.binding_table_index = binding_table_index; + insn->bits3.sampler_gen5.sampler = sampler; + insn->bits3.sampler_gen5.msg_type = msg_type; + insn->bits3.sampler_gen5.simd_mode = simd_mode; + insn->bits3.sampler_gen5.header_present = header_present; + insn->bits3.sampler_gen5.response_length = response_length; + insn->bits3.sampler_gen5.msg_length = msg_length; + insn->bits3.sampler_gen5.end_of_thread = eot; + insn->bits2.send_gen5.sfid = BRW_MESSAGE_TARGET_SAMPLER; + insn->bits2.send_gen5.end_of_thread = eot; + } else if (brw->is_g4x) { insn->bits3.sampler_g4x.binding_table_index = binding_table_index; insn->bits3.sampler_g4x.sampler = sampler; insn->bits3.sampler_g4x.msg_type = msg_type; @@ -478,7 +478,7 @@ static struct brw_instruction *next_insn( struct brw_compile *p, if (0 && (BRW_DEBUG & DEBUG_DISASSEM)) { if (p->nr_insn) - brw_disasm_insn(stderr, &p->store[p->nr_insn-1]); + brw_disasm_insn(stderr, &p->store[p->nr_insn-1], p->brw->gen); } assert(p->nr_insn + 1 < BRW_EU_MAX_INSN); @@ -658,7 +658,7 @@ struct brw_instruction *brw_ELSE(struct brw_compile *p, struct brw_instruction *insn; GLuint br = 1; - if (BRW_IS_IGDNG(p->brw)) + if (p->brw->gen == 5) br = 2; if (p->single_program_flow) { @@ -699,7 +699,7 @@ void brw_ENDIF(struct brw_compile *p, { GLuint br = 1; - if (BRW_IS_IGDNG(p->brw)) + if (p->brw->gen == 5) br = 2; if (p->single_program_flow) { @@ -813,7 +813,7 @@ struct brw_instruction *brw_WHILE(struct brw_compile *p, struct brw_instruction *insn; GLuint br = 1; - if (BRW_IS_IGDNG(p->brw)) + if (p->brw->gen == 5) br = 2; if (p->single_program_flow) @@ -856,7 +856,7 @@ void brw_land_fwd_jump(struct brw_compile *p, struct brw_instruction *landing = &p->store[p->nr_insn]; GLuint jmpi = 1; - if (BRW_IS_IGDNG(p->brw)) + if (p->brw->gen == 5) jmpi = 2; assert(jmp_insn->header.opcode == BRW_OPCODE_JMPI); diff --git a/src/gallium/drivers/i965/brw_gs.c b/src/gallium/drivers/i965/brw_gs.c index 06826635a8a..2a8165b83ee 100644 --- a/src/gallium/drivers/i965/brw_gs.c +++ b/src/gallium/drivers/i965/brw_gs.c @@ -51,13 +51,13 @@ static enum pipe_error compile_gs_prog( struct brw_context *brw, memset(&c, 0, sizeof(c)); c.key = *key; - c.need_ff_sync = BRW_IS_IGDNG(brw); + c.need_ff_sync = brw->gen == 5; /* Need to locate the two positions present in vertex + header. * These are currently hardcoded: */ c.nr_attrs = c.key.nr_attrs; - if (BRW_IS_IGDNG(brw)) + if (brw->gen == 5) c.nr_regs = (c.nr_attrs + 1) / 2 + 3; /* are vertices packed, or reg-aligned? */ else c.nr_regs = (c.nr_attrs + 1) / 2 + 1; /* are vertices packed, or reg-aligned? */ diff --git a/src/gallium/drivers/i965/brw_gs_state.c b/src/gallium/drivers/i965/brw_gs_state.c index b64ec286cea..6e070f6d756 100644 --- a/src/gallium/drivers/i965/brw_gs_state.c +++ b/src/gallium/drivers/i965/brw_gs_state.c @@ -103,7 +103,7 @@ gs_unit_create_from_key(struct brw_context *brw, else gs.thread4.max_threads = 0; - if (BRW_IS_IGDNG(brw)) + if (brw->gen == 5) gs.thread4.rendering_enable = 1; if (BRW_DEBUG & DEBUG_STATS) diff --git a/src/gallium/drivers/i965/brw_misc_state.c b/src/gallium/drivers/i965/brw_misc_state.c index b5029ceb69f..d53ce6ccfd4 100644 --- a/src/gallium/drivers/i965/brw_misc_state.c +++ b/src/gallium/drivers/i965/brw_misc_state.c @@ -239,7 +239,7 @@ static int prepare_depthbuffer(struct brw_context *brw) static int emit_depthbuffer(struct brw_context *brw) { struct pipe_surface *surface = brw->curr.fb.zsbuf; - unsigned int len = (BRW_IS_G4X(brw) || BRW_IS_IGDNG(brw)) ? 6 : 5; + unsigned int len = (brw->is_g4x || brw->gen == 5) ? 6 : 5; if (surface == NULL) { BEGIN_BATCH(len, IGNORE_CLIPRECTS); @@ -250,7 +250,7 @@ static int emit_depthbuffer(struct brw_context *brw) OUT_BATCH(0); OUT_BATCH(0); - if (BRW_IS_G4X(brw) || BRW_IS_IGDNG(brw)) + if (brw->is_g4x || brw->gen == 5) OUT_BATCH(0); ADVANCE_BATCH(); @@ -287,17 +287,18 @@ static int emit_depthbuffer(struct brw_context *brw) OUT_BATCH(((pitch * cpp) - 1) | (format << 18) | (BRW_TILEWALK_YMAJOR << 26) | - ((surface->layout != PIPE_SURFACE_LAYOUT_LINEAR) << 27) | + /* always linear ? + ((surface->layout != PIPE_SURFACE_LAYOUT_LINEAR) << 27) |*/ (BRW_SURFACE_2D << 29)); OUT_RELOC(bo, BRW_USAGE_DEPTH_BUFFER, - surface->offset); + brw_surface(surface)->offset); OUT_BATCH((BRW_SURFACE_MIPMAPLAYOUT_BELOW << 1) | ((pitch - 1) << 6) | ((surface->height - 1) << 19)); OUT_BATCH(0); - if (BRW_IS_G4X(brw) || BRW_IS_IGDNG(brw)) + if (brw->is_g4x || brw->gen == 5) OUT_BATCH(0); ADVANCE_BATCH(); @@ -362,10 +363,10 @@ const struct brw_tracked_state brw_line_stipple = { /*********************************************************************** - * Misc invarient state packets + * Misc invariant state packets */ -static int upload_invarient_state( struct brw_context *brw ) +static int upload_invariant_state( struct brw_context *brw ) { { /* 0x61040000 Pipeline Select */ @@ -373,7 +374,7 @@ static int upload_invarient_state( struct brw_context *brw ) struct brw_pipeline_select ps; memset(&ps, 0, sizeof(ps)); - if (BRW_IS_G4X(brw) || BRW_IS_IGDNG(brw)) + if (brw->is_g4x || brw->gen == 5) ps.header.opcode = CMD_PIPELINE_SELECT_GM45; else ps.header.opcode = CMD_PIPELINE_SELECT_965; @@ -412,7 +413,7 @@ static int upload_invarient_state( struct brw_context *brw ) struct brw_vf_statistics vfs; memset(&vfs, 0, sizeof(vfs)); - if (BRW_IS_G4X(brw) || BRW_IS_IGDNG(brw)) + if (brw->is_g4x || brw->gen == 5) vfs.opcode = CMD_VF_STATISTICS_GM45; else vfs.opcode = CMD_VF_STATISTICS_965; @@ -423,7 +424,7 @@ static int upload_invarient_state( struct brw_context *brw ) BRW_BATCH_STRUCT(brw, &vfs); } - if (!BRW_IS_965(brw)) + if (!(brw->gen == 4)) { struct brw_aa_line_parameters balp; @@ -438,7 +439,7 @@ static int upload_invarient_state( struct brw_context *brw ) { struct brw_polygon_stipple_offset bpso; - /* This is invarient state in gallium: + /* This is invariant state in gallium: */ memset(&bpso, 0, sizeof(bpso)); bpso.header.opcode = CMD_POLY_STIPPLE_OFFSET; @@ -452,13 +453,13 @@ static int upload_invarient_state( struct brw_context *brw ) return 0; } -const struct brw_tracked_state brw_invarient_state = { +const struct brw_tracked_state brw_invariant_state = { .dirty = { .mesa = 0, .brw = BRW_NEW_CONTEXT, .cache = 0 }, - .emit = upload_invarient_state + .emit = upload_invariant_state }; @@ -479,7 +480,7 @@ static int upload_state_base_address( struct brw_context *brw ) /* Output the structure (brw_state_base_address) directly to the * batchbuffer, so we can emit relocations inline. */ - if (BRW_IS_IGDNG(brw)) { + if (brw->gen == 5) { BEGIN_BATCH(8, IGNORE_CLIPRECTS); OUT_BATCH(CMD_STATE_BASE_ADDRESS << 16 | (8 - 2)); OUT_BATCH(1); /* General state base address */ diff --git a/src/gallium/drivers/i965/brw_pipe_clear.c b/src/gallium/drivers/i965/brw_pipe_clear.c index d5cff338a66..7bf3ea6994a 100644 --- a/src/gallium/drivers/i965/brw_pipe_clear.c +++ b/src/gallium/drivers/i965/brw_pipe_clear.c @@ -64,7 +64,7 @@ try_clear( struct brw_context *brw, debug_printf("%s dst:buf(%p)/%d+%d %d,%d sz:%dx%d\n", __FUNCTION__, (void *)surface->bo, pitch * cpp, - surface->base.offset, + surface->offset, x1, y1, x2 - x1, y2 - y1); BR13 = 0xf0 << 16; @@ -99,7 +99,7 @@ try_clear( struct brw_context *brw, OUT_BATCH((y2 << 16) | x2); OUT_RELOC(surface->bo, BRW_USAGE_BLIT_DEST, - surface->base.offset); + surface->offset); OUT_BATCH(value); ADVANCE_BATCH(); diff --git a/src/gallium/drivers/i965/brw_pipe_rast.c b/src/gallium/drivers/i965/brw_pipe_rast.c index 4c1a6d7dcdf..c86681d1495 100644 --- a/src/gallium/drivers/i965/brw_pipe_rast.c +++ b/src/gallium/drivers/i965/brw_pipe_rast.c @@ -35,7 +35,7 @@ calculate_clip_key_rast( const struct brw_context *brw, { memset(key, 0, sizeof *key); - if (brw->chipset.is_igdng) + if (brw->gen == 5) key->clip_mode = BRW_CLIPMODE_KERNEL_CLIP; else key->clip_mode = BRW_CLIPMODE_NORMAL; diff --git a/src/gallium/drivers/i965/brw_screen_surface.c b/src/gallium/drivers/i965/brw_pipe_surface.c index f288fdbcd37..58a610089e2 100644 --- a/src/gallium/drivers/i965/brw_screen_surface.c +++ b/src/gallium/drivers/i965/brw_pipe_surface.c @@ -35,6 +35,7 @@ #include "pipe/p_screen.h" #include "brw_screen.h" +#include "brw_context.h" #include "brw_defines.h" #include "brw_resource.h" #include "brw_winsys.h" @@ -108,9 +109,10 @@ void brw_update_texture( struct brw_screen *brw_screen, * where it would be illegal (perhaps due to tiling constraints) to do * this in-place. * - * Currently not implmented, not sure if it's needed. + * Currently not implemented, not sure if it's needed. */ static struct brw_surface *create_linear_view( struct brw_screen *brw_screen, + struct pipe_context *pipe, struct brw_texture *tex, union brw_surface_id id, unsigned usage ) @@ -123,9 +125,10 @@ static struct brw_surface *create_linear_view( struct brw_screen *brw_screen, * texture's storage. */ static struct brw_surface *create_in_place_view( struct brw_screen *brw_screen, - struct brw_texture *tex, - union brw_surface_id id, - unsigned usage ) + struct pipe_context *pipe, + struct brw_texture *tex, + union brw_surface_id id, + unsigned usage ) { struct brw_surface *surface; @@ -137,17 +140,18 @@ static struct brw_surface *create_in_place_view( struct brw_screen *brw_screen, /* XXX: ignoring render-to-slice-of-3d-texture */ - assert(id.bits.zslice == 0); + assert(tex->b.b.target != PIPE_TEXTURE_3D || id.bits.layer == 0); + surface->base.context = pipe; surface->base.format = tex->b.b.format; surface->base.width = u_minify(tex->b.b.width0, id.bits.level); surface->base.height = u_minify(tex->b.b.height0, id.bits.level); - surface->base.offset = tex->image_offset[id.bits.level][id.bits.face]; surface->base.usage = usage; - surface->base.zslice = id.bits.zslice; - surface->base.face = id.bits.face; - surface->base.level = id.bits.level; + surface->base.u.tex.first_layer = id.bits.layer; + surface->base.u.tex.last_layer = surface->base.u.tex.first_layer; + surface->base.u.tex.level = id.bits.level; surface->id = id; + surface->offset = tex->image_offset[id.bits.level][id.bits.layer]; surface->cpp = tex->cpp; surface->pitch = tex->pitch; surface->tiling = tex->tiling; @@ -159,26 +163,21 @@ static struct brw_surface *create_in_place_view( struct brw_screen *brw_screen, surface->ss.ss0.surface_type = BRW_SURFACE_2D; if (tex->tiling == BRW_TILING_NONE) { - surface->ss.ss1.base_addr = surface->base.offset; + surface->ss.ss1.base_addr = surface->offset; } else { - uint32_t tile_offset = surface->base.offset % 4096; - - surface->ss.ss1.base_addr = surface->base.offset - tile_offset; - - if (brw_screen->chipset.is_g4x) { - if (tex->tiling == BRW_TILING_X) { - /* Note that the low bits of these fields are missing, so - * there's the possibility of getting in trouble. - */ - surface->ss.ss5.x_offset = (tile_offset % 512) / tex->cpp / 4; - surface->ss.ss5.y_offset = tile_offset / 512 / 2; - } else { - surface->ss.ss5.x_offset = (tile_offset % 128) / tex->cpp / 4; + uint32_t tile_offset = surface->offset % 4096; + + surface->ss.ss1.base_addr = surface->offset - tile_offset; + + if (tex->tiling == BRW_TILING_X) { + /* Note that the low bits of these fields are missing, so + * there's the possibility of getting in trouble. + */ + surface->ss.ss5.x_offset = (tile_offset % 512) / tex->cpp / 4; + surface->ss.ss5.y_offset = tile_offset / 512 / 2; + } else { + surface->ss.ss5.x_offset = (tile_offset % 128) / tex->cpp / 4; surface->ss.ss5.y_offset = tile_offset / 128 / 2; - } - } - else { - assert(tile_offset == 0); } } @@ -198,23 +197,21 @@ static struct brw_surface *create_in_place_view( struct brw_screen *brw_screen, /* Get a surface which is view into a texture */ -static struct pipe_surface *brw_get_tex_surface(struct pipe_screen *screen, - struct pipe_resource *pt, - unsigned face, unsigned level, - unsigned zslice, - unsigned usage ) +static struct pipe_surface *brw_create_surface(struct pipe_context *pipe, + struct pipe_resource *pt, + const struct pipe_surface *surf_tmpl) { struct brw_texture *tex = brw_texture(pt); - struct brw_screen *bscreen = brw_screen(screen); + struct brw_screen *bscreen = brw_screen(pipe->screen); struct brw_surface *surface; union brw_surface_id id; int type; - id.bits.face = face; - id.bits.level = level; - id.bits.zslice = zslice; + assert(surf_tmpl->u.tex.first_layer == surf_tmpl->u.tex.last_layer); + id.bits.level = surf_tmpl->u.tex.level; + id.bits.layer = surf_tmpl->u.tex.first_layer; - if (need_linear_view(bscreen, tex, id, usage)) + if (need_linear_view(bscreen, tex, id, surf_tmpl->usage)) type = BRW_VIEW_LINEAR; else type = BRW_VIEW_IN_PLACE; @@ -227,10 +224,10 @@ static struct pipe_surface *brw_get_tex_surface(struct pipe_screen *screen, switch (type) { case BRW_VIEW_LINEAR: - surface = create_linear_view( bscreen, tex, id, usage ); + surface = create_linear_view( bscreen, pipe, tex, id, surf_tmpl->usage ); break; case BRW_VIEW_IN_PLACE: - surface = create_in_place_view( bscreen, tex, id, usage ); + surface = create_in_place_view( bscreen, pipe, tex, id, surf_tmpl->usage ); break; } @@ -239,7 +236,8 @@ static struct pipe_surface *brw_get_tex_surface(struct pipe_screen *screen, } -static void brw_tex_surface_destroy( struct pipe_surface *surf ) +static void brw_surface_destroy( struct pipe_context *pipe, + struct pipe_surface *surf ) { struct brw_surface *surface = brw_surface(surf); @@ -249,13 +247,12 @@ static void brw_tex_surface_destroy( struct pipe_surface *surf ) bo_reference(&surface->bo, NULL); pipe_resource_reference( &surface->base.texture, NULL ); - FREE(surface); } -void brw_screen_tex_surface_init( struct brw_screen *brw_screen ) +void brw_pipe_surface_init( struct brw_context *brw ) { - brw_screen->base.get_tex_surface = brw_get_tex_surface; - brw_screen->base.tex_surface_destroy = brw_tex_surface_destroy; + brw->base.create_surface = brw_create_surface; + brw->base.surface_destroy = brw_surface_destroy; } diff --git a/src/gallium/drivers/i965/brw_pipe_vertex.c b/src/gallium/drivers/i965/brw_pipe_vertex.c index 007239efc40..b23454b5808 100644 --- a/src/gallium/drivers/i965/brw_pipe_vertex.c +++ b/src/gallium/drivers/i965/brw_pipe_vertex.c @@ -203,7 +203,7 @@ static void brw_translate_vertex_elements(struct brw_context *brw, brw_velems->ve[i].ve1.vfcomponent2 = comp2; brw_velems->ve[i].ve1.vfcomponent3 = comp3; - if (BRW_IS_IGDNG(brw)) + if (brw->gen == 5) brw_velems->ve[i].ve1.dst_offset = 0; else brw_velems->ve[i].ve1.dst_offset = i * 4; @@ -248,7 +248,6 @@ static void brw_set_vertex_buffers(struct pipe_context *pipe, const struct pipe_vertex_buffer *buffers) { struct brw_context *brw = brw_context(pipe); - unsigned i; /* Check for no change */ if (count == brw->curr.num_vertex_buffers && @@ -257,18 +256,9 @@ static void brw_set_vertex_buffers(struct pipe_context *pipe, count * sizeof buffers[0]) == 0) return; - /* Adjust refcounts */ - for (i = 0; i < count; i++) - pipe_resource_reference(&brw->curr.vertex_buffer[i].buffer, - buffers[i].buffer); - - for ( ; i < brw->curr.num_vertex_buffers; i++) - pipe_resource_reference(&brw->curr.vertex_buffer[i].buffer, - NULL); - - /* Copy remaining data */ - memcpy(brw->curr.vertex_buffer, buffers, count * sizeof buffers[0]); - brw->curr.num_vertex_buffers = count; + util_copy_vertex_buffers(brw->curr.vertex_buffer, + &brw->curr.num_vertex_buffers, + buffers, count); brw->state.dirty.mesa |= PIPE_NEW_VERTEX_BUFFER; } @@ -318,9 +308,13 @@ brw_pipe_vertex_init( struct brw_context *brw ) void brw_pipe_vertex_cleanup( struct brw_context *brw ) { + unsigned i; /* Release bound pipe vertex_buffers */ + for (i = 0; i < brw->curr.num_vertex_buffers; i++) { + pipe_resource_reference(&brw->curr.vertex_buffer[i].buffer, NULL); + } /* Release some other stuff */ diff --git a/src/gallium/drivers/i965/brw_reg.h b/src/gallium/drivers/i965/brw_reg.h index ba10f9d5df1..53c7c435713 100644 --- a/src/gallium/drivers/i965/brw_reg.h +++ b/src/gallium/drivers/i965/brw_reg.h @@ -93,18 +93,54 @@ #define PCI_CHIP_G45_G 0x2E22 #define PCI_CHIP_G41_G 0x2E32 #define PCI_CHIP_B43_G 0x2E42 +#define PCI_CHIP_B43_G1 0x2E92 #define PCI_CHIP_ILD_G 0x0042 #define PCI_CHIP_ILM_G 0x0046 -struct brw_chipset { - unsigned pci_id:16; - unsigned is_965:1; - unsigned is_igdng:1; - unsigned is_g4x:1; - unsigned pad:13; -}; - +#define PCI_CHIP_SANDYBRIDGE_GT1 0x0102 /* Desktop */ +#define PCI_CHIP_SANDYBRIDGE_GT2 0x0112 +#define PCI_CHIP_SANDYBRIDGE_GT2_PLUS 0x0122 +#define PCI_CHIP_SANDYBRIDGE_M_GT1 0x0106 /* Mobile */ +#define PCI_CHIP_SANDYBRIDGE_M_GT2 0x0116 +#define PCI_CHIP_SANDYBRIDGE_M_GT2_PLUS 0x0126 +#define PCI_CHIP_SANDYBRIDGE_S 0x010A /* Server */ + +#define IS_G45(devid) (devid == PCI_CHIP_IGD_E_G || \ + devid == PCI_CHIP_Q45_G || \ + devid == PCI_CHIP_G45_G || \ + devid == PCI_CHIP_G41_G || \ + devid == PCI_CHIP_B43_G || \ + devid == PCI_CHIP_B43_G1) +#define IS_GM45(devid) (devid == PCI_CHIP_GM45_GM) +#define IS_G4X(devid) (IS_G45(devid) || IS_GM45(devid)) + +#define IS_GEN4(devid) (devid == PCI_CHIP_I965_G || \ + devid == PCI_CHIP_I965_Q || \ + devid == PCI_CHIP_I965_G_1 || \ + devid == PCI_CHIP_I965_GM || \ + devid == PCI_CHIP_I965_GME || \ + devid == PCI_CHIP_I946_GZ || \ + IS_G4X(devid)) + +#define IS_ILD(devid) (devid == PCI_CHIP_ILD_G) +#define IS_ILM(devid) (devid == PCI_CHIP_ILM_G) +#define IS_GEN5(devid) (IS_ILD(devid) || IS_ILM(devid)) + +#define IS_IRONLAKE(devid) IS_GEN5(devid) + +#define IS_GEN6(devid) (devid == PCI_CHIP_SANDYBRIDGE_GT1 || \ + devid == PCI_CHIP_SANDYBRIDGE_GT2 || \ + devid == PCI_CHIP_SANDYBRIDGE_GT2_PLUS || \ + devid == PCI_CHIP_SANDYBRIDGE_M_GT1 || \ + devid == PCI_CHIP_SANDYBRIDGE_M_GT2 || \ + devid == PCI_CHIP_SANDYBRIDGE_M_GT2_PLUS || \ + devid == PCI_CHIP_SANDYBRIDGE_S) + +#define IS_965(devid) (IS_GEN4(devid) || \ + IS_G4X(devid) || \ + IS_GEN5(devid) || \ + IS_GEN6(devid)) /* XXX: hacks */ diff --git a/src/gallium/drivers/i965/brw_resource_buffer.c b/src/gallium/drivers/i965/brw_resource_buffer.c index 5f9e8a87c99..afb96ee3e7f 100644 --- a/src/gallium/drivers/i965/brw_resource_buffer.c +++ b/src/gallium/drivers/i965/brw_resource_buffer.c @@ -92,9 +92,9 @@ brw_buffer_transfer_unmap( struct pipe_context *pipe, static unsigned brw_buffer_is_referenced( struct pipe_context *pipe, - struct pipe_resource *resource, - unsigned face, - unsigned level) + struct pipe_resource *resource, + unsigned level, + int layer) { struct brw_context *brw = brw_context(pipe); struct brw_winsys_buffer *batch_bo = brw->batch->buf; @@ -194,6 +194,7 @@ brw_user_buffer_create(struct pipe_screen *screen, buf->b.b.width0 = bytes; buf->b.b.height0 = 1; buf->b.b.depth0 = 1; + buf->b.b.array_size = 1; buf->user_buffer = ptr; diff --git a/src/gallium/drivers/i965/brw_resource_texture.c b/src/gallium/drivers/i965/brw_resource_texture.c index 3860d18a7a2..0cb895f35de 100644 --- a/src/gallium/drivers/i965/brw_resource_texture.c +++ b/src/gallium/drivers/i965/brw_resource_texture.c @@ -229,8 +229,8 @@ static void brw_texture_destroy(struct pipe_screen *screen, static unsigned brw_texture_is_referenced( struct pipe_context *pipe, struct pipe_resource *texture, - unsigned face, - unsigned level ) + unsigned level, + int layer ) { struct brw_context *brw = brw_context(pipe); struct brw_screen *bscreen = brw_screen(pipe->screen); @@ -246,7 +246,7 @@ static unsigned brw_texture_is_referenced( struct pipe_context *pipe, if (bscreen->sws->bo_references( batch_bo, tex->bo )) return PIPE_REFERENCED_FOR_READ | PIPE_REFERENCED_FOR_WRITE; - /* Find any view on this texture for this face/level and see if it + /* Find any view on this texture for this level/layer and see if it * is referenced: */ for (i = 0; i < 2; i++) { @@ -254,7 +254,7 @@ static unsigned brw_texture_is_referenced( struct pipe_context *pipe, if (surf->bo == tex->bo) continue; - if (surf->id.bits.face != face || + if (!(layer == -1 || surf->id.bits.layer == layer) || surf->id.bits.level != level) continue; @@ -274,10 +274,10 @@ static unsigned brw_texture_is_referenced( struct pipe_context *pipe, static struct pipe_transfer * brw_texture_get_transfer(struct pipe_context *context, - struct pipe_resource *resource, - struct pipe_subresource sr, - unsigned usage, - const struct pipe_box *box) + struct pipe_resource *resource, + unsigned level, + unsigned usage, + const struct pipe_box *box) { struct brw_texture *tex = brw_texture(resource); struct pipe_transfer *transfer = CALLOC_STRUCT(pipe_transfer); @@ -285,10 +285,11 @@ brw_texture_get_transfer(struct pipe_context *context, return NULL; transfer->resource = resource; - transfer->sr = sr; + transfer->level = level; transfer->usage = usage; transfer->box = *box; transfer->stride = tex->pitch * tex->cpp; + /* FIXME: layer_stride */ return transfer; } @@ -301,24 +302,16 @@ brw_texture_transfer_map(struct pipe_context *pipe, struct pipe_resource *resource = transfer->resource; struct brw_texture *tex = brw_texture(transfer->resource); struct brw_winsys_screen *sws = brw_screen(pipe->screen)->sws; - struct pipe_subresource sr = transfer->sr; struct pipe_box *box = &transfer->box; enum pipe_format format = resource->format; unsigned usage = transfer->usage; unsigned offset; char *map; - if (resource->target == PIPE_TEXTURE_CUBE) { - offset = tex->image_offset[sr.level][sr.face]; - } - else if (resource->target == PIPE_TEXTURE_3D) { - offset = tex->image_offset[sr.level][box->z]; - } - else { - offset = tex->image_offset[sr.level][0]; - assert(sr.face == 0); + if (resource->target != PIPE_TEXTURE_3D && + resource->target != PIPE_TEXTURE_CUBE) assert(box->z == 0); - } + offset = tex->image_offset[transfer->level][box->z]; map = sws->bo_map(tex->bo, BRW_DATA_OTHER, @@ -399,7 +392,7 @@ brw_texture_create( struct pipe_screen *screen, if (tex->compressed == 0 && !bscreen->no_tiling) { - if (bscreen->chipset.is_965 && + if (bscreen->gen < 5 && util_format_is_depth_or_stencil(template->format)) tex->tiling = BRW_TILING_Y; else diff --git a/src/gallium/drivers/i965/brw_resource_texture_layout.c b/src/gallium/drivers/i965/brw_resource_texture_layout.c index 2187bdd82ce..afecc77e312 100644 --- a/src/gallium/drivers/i965/brw_resource_texture_layout.c +++ b/src/gallium/drivers/i965/brw_resource_texture_layout.c @@ -388,7 +388,7 @@ GLboolean brw_texture_layout(struct brw_screen *brw_screen, { switch (tex->b.b.target) { case PIPE_TEXTURE_CUBE: - if (brw_screen->chipset.is_igdng) + if (brw_screen->gen == 5) brw_layout_cubemap_idgng( tex ); else brw_layout_3d_cube( tex ); diff --git a/src/gallium/drivers/i965/brw_screen.c b/src/gallium/drivers/i965/brw_screen.c index 29486f5b815..bf805fd080c 100644 --- a/src/gallium/drivers/i965/brw_screen.c +++ b/src/gallium/drivers/i965/brw_screen.c @@ -97,7 +97,7 @@ brw_get_name(struct pipe_screen *screen) static char buffer[128]; const char *chipset; - switch (brw_screen(screen)->chipset.pci_id) { + switch (brw_screen(screen)->pci_id) { case PCI_CHIP_I965_G: chipset = "I965_G"; break; @@ -405,8 +405,6 @@ struct pipe_screen * brw_screen_create(struct brw_winsys_screen *sws) { struct brw_screen *bscreen; - struct brw_chipset chipset; - #ifdef DEBUG BRW_DEBUG = debug_get_flags_option("BRW_DEBUG", debug_names, 0); BRW_DEBUG |= debug_get_flags_option("INTEL_DEBUG", debug_names, 0); @@ -415,46 +413,30 @@ brw_screen_create(struct brw_winsys_screen *sws) BRW_DUMP = debug_get_flags_option("BRW_DUMP", dump_names, 0); #endif - memset(&chipset, 0, sizeof chipset); - - chipset.pci_id = sws->pci_id; - - switch (chipset.pci_id) { - case PCI_CHIP_I965_G: - case PCI_CHIP_I965_Q: - case PCI_CHIP_I965_G_1: - case PCI_CHIP_I946_GZ: - case PCI_CHIP_I965_GM: - case PCI_CHIP_I965_GME: - chipset.is_965 = TRUE; - break; - - case PCI_CHIP_GM45_GM: - case PCI_CHIP_IGD_E_G: - case PCI_CHIP_Q45_G: - case PCI_CHIP_G45_G: - case PCI_CHIP_G41_G: - case PCI_CHIP_B43_G: - chipset.is_g4x = TRUE; - break; - - case PCI_CHIP_ILD_G: - case PCI_CHIP_ILM_G: - chipset.is_igdng = TRUE; - break; + bscreen = CALLOC_STRUCT(brw_screen); + if (!bscreen) + return NULL; - default: + bscreen->pci_id = sws->pci_id; + if (IS_GEN6(sws->pci_id)) { + bscreen->gen = 6; + bscreen->needs_ff_sync = TRUE; + } else if (IS_GEN5(sws->pci_id)) { + bscreen->gen = 5; + bscreen->needs_ff_sync = TRUE; + } else if (IS_965(sws->pci_id)) { + bscreen->gen = 4; + if (IS_G4X(sws->pci_id)) { + bscreen->is_g4x = true; + } + } else { debug_printf("%s: unknown pci id 0x%x, cannot create screen\n", - __FUNCTION__, chipset.pci_id); + __FUNCTION__, sws->pci_id); + free(bscreen); return NULL; } - - bscreen = CALLOC_STRUCT(brw_screen); - if (!bscreen) - return NULL; - - bscreen->chipset = chipset; + sws->gen = bscreen->gen; bscreen->sws = sws; bscreen->base.winsys = NULL; bscreen->base.destroy = brw_destroy_screen; @@ -470,7 +452,6 @@ brw_screen_create(struct brw_winsys_screen *sws) bscreen->base.fence_finish = brw_fence_finish; brw_init_screen_resource_functions(bscreen); - brw_screen_tex_surface_init(bscreen); bscreen->no_tiling = debug_get_option("BRW_NO_TILING", FALSE) != NULL; diff --git a/src/gallium/drivers/i965/brw_screen.h b/src/gallium/drivers/i965/brw_screen.h index 522a3bf8995..a62e1afc405 100644 --- a/src/gallium/drivers/i965/brw_screen.h +++ b/src/gallium/drivers/i965/brw_screen.h @@ -43,7 +43,11 @@ struct brw_winsys_screen; struct brw_screen { struct pipe_screen base; - struct brw_chipset chipset; + int gen; + boolean has_negative_rhw_bug; + boolean needs_ff_sync; + boolean is_g4x; + int pci_id; struct brw_winsys_screen *sws; boolean no_tiling; }; @@ -52,9 +56,8 @@ struct brw_screen union brw_surface_id { struct { - unsigned face:3; - unsigned zslice:13; unsigned level:16; + unsigned layer:16; } bits; unsigned value; }; @@ -63,8 +66,9 @@ union brw_surface_id { struct brw_surface { struct pipe_surface base; - + union brw_surface_id id; + unsigned offset; unsigned cpp; unsigned pitch; unsigned draw_offset; @@ -96,7 +100,5 @@ brw_surface(struct pipe_surface *surface) unsigned brw_surface_pitch( const struct pipe_surface *surface ); -void brw_screen_tex_surface_init( struct brw_screen *brw_screen ); - #endif /* BRW_SCREEN_H */ diff --git a/src/gallium/drivers/i965/brw_sf_emit.c b/src/gallium/drivers/i965/brw_sf_emit.c index 497634ec9ed..901c3341642 100644 --- a/src/gallium/drivers/i965/brw_sf_emit.c +++ b/src/gallium/drivers/i965/brw_sf_emit.c @@ -161,7 +161,7 @@ static void do_flatshade_triangle( struct brw_sf_compile *c ) if (c->key.primitive == SF_UNFILLED_TRIS) return; - if (BRW_IS_IGDNG(p->brw)) + if (p->brw->gen == 5) jmpi = 2; brw_push_insn_state(p); @@ -205,7 +205,7 @@ static void do_flatshade_line( struct brw_sf_compile *c ) if (c->key.primitive == SF_UNFILLED_TRIS) return; - if (BRW_IS_IGDNG(p->brw)) + if (p->brw->gen == 5) jmpi = 2; brw_push_insn_state(p); diff --git a/src/gallium/drivers/i965/brw_sf_state.c b/src/gallium/drivers/i965/brw_sf_state.c index 6c299a86b49..eec024650ce 100644 --- a/src/gallium/drivers/i965/brw_sf_state.c +++ b/src/gallium/drivers/i965/brw_sf_state.c @@ -148,7 +148,7 @@ sf_unit_create_from_key(struct brw_context *brw, sf.thread3.dispatch_grf_start_reg = 3; - if (BRW_IS_IGDNG(brw)) + if (brw->gen == 5) sf.thread3.urb_entry_read_offset = 3; else sf.thread3.urb_entry_read_offset = 1; @@ -161,7 +161,7 @@ sf_unit_create_from_key(struct brw_context *brw, /* Each SF thread produces 1 PUE, and there can be up to 24(Pre-IGDNG) or * 48(IGDNG) threads */ - if (BRW_IS_IGDNG(brw)) + if (brw->gen == 5) chipset_max_threads = 48; else chipset_max_threads = 24; diff --git a/src/gallium/drivers/i965/brw_state.h b/src/gallium/drivers/i965/brw_state.h index d2bbd0123d1..380d511f9bb 100644 --- a/src/gallium/drivers/i965/brw_state.h +++ b/src/gallium/drivers/i965/brw_state.h @@ -56,7 +56,7 @@ const struct brw_tracked_state brw_clip_prog; const struct brw_tracked_state brw_clip_unit; const struct brw_tracked_state brw_curbe_buffer; const struct brw_tracked_state brw_curbe_offsets; -const struct brw_tracked_state brw_invarient_state; +const struct brw_tracked_state brw_invariant_state; const struct brw_tracked_state brw_gs_prog; const struct brw_tracked_state brw_gs_unit; const struct brw_tracked_state brw_line_stipple; diff --git a/src/gallium/drivers/i965/brw_state_upload.c b/src/gallium/drivers/i965/brw_state_upload.c index f8b91eff816..cdbf270e06a 100644 --- a/src/gallium/drivers/i965/brw_state_upload.c +++ b/src/gallium/drivers/i965/brw_state_upload.c @@ -69,7 +69,7 @@ const struct brw_tracked_state *atoms[] = /* Command packets: */ - &brw_invarient_state, + &brw_invariant_state, &brw_state_base_address, &brw_binding_table_pointers, diff --git a/src/gallium/drivers/i965/brw_structs.h b/src/gallium/drivers/i965/brw_structs.h index e97ddeb5e1c..b0d75b4f828 100644 --- a/src/gallium/drivers/i965/brw_structs.h +++ b/src/gallium/drivers/i965/brw_structs.h @@ -279,7 +279,7 @@ struct brw_aa_line_parameters struct header header; struct { - GLuint aa_coverage_scope:8; + GLuint aa_coverage_slope:8; GLuint pad0:8; GLuint aa_coverage_bias:8; GLuint pad1:8; @@ -659,7 +659,105 @@ struct brw_clip_unit_state GLfloat viewport_ymax; }; +struct gen6_blend_state +{ + struct { + GLuint dest_blend_factor:5; + GLuint source_blend_factor:5; + GLuint pad3:1; + GLuint blend_func:3; + GLuint pad2:1; + GLuint ia_dest_blend_factor:5; + GLuint ia_source_blend_factor:5; + GLuint pad1:1; + GLuint ia_blend_func:3; + GLuint pad0:1; + GLuint ia_blend_enable:1; + GLuint blend_enable:1; + } blend0; + + struct { + GLuint post_blend_clamp_enable:1; + GLuint pre_blend_clamp_enable:1; + GLuint clamp_range:2; + GLuint pad0:4; + GLuint x_dither_offset:2; + GLuint y_dither_offset:2; + GLuint dither_enable:1; + GLuint alpha_test_func:3; + GLuint alpha_test_enable:1; + GLuint pad1:1; + GLuint logic_op_func:4; + GLuint logic_op_enable:1; + GLuint pad2:1; + GLuint write_disable_b:1; + GLuint write_disable_g:1; + GLuint write_disable_r:1; + GLuint write_disable_a:1; + GLuint pad3:1; + GLuint alpha_to_coverage_dither:1; + GLuint alpha_to_one:1; + GLuint alpha_to_coverage:1; + } blend1; +}; +struct gen6_color_calc_state +{ + struct { + GLuint alpha_test_format:1; + GLuint pad0:14; + GLuint round_disable:1; + GLuint bf_stencil_ref:8; + GLuint stencil_ref:8; + } cc0; + + union { + GLfloat alpha_ref_f; + struct { + GLuint ui:8; + GLuint pad0:24; + } alpha_ref_fi; + } cc1; + + GLfloat constant_r; + GLfloat constant_g; + GLfloat constant_b; + GLfloat constant_a; +}; + +struct gen6_depth_stencil_state +{ + struct { + GLuint pad0:3; + GLuint bf_stencil_pass_depth_pass_op:3; + GLuint bf_stencil_pass_depth_fail_op:3; + GLuint bf_stencil_fail_op:3; + GLuint bf_stencil_func:3; + GLuint bf_stencil_enable:1; + GLuint pad1:2; + GLuint stencil_write_enable:1; + GLuint stencil_pass_depth_pass_op:3; + GLuint stencil_pass_depth_fail_op:3; + GLuint stencil_fail_op:3; + GLuint stencil_func:3; + GLuint stencil_enable:1; + } ds0; + + struct { + GLuint bf_stencil_write_mask:8; + GLuint bf_stencil_test_mask:8; + GLuint stencil_write_mask:8; + GLuint stencil_test_mask:8; + } ds1; + + struct { + GLuint pad0:26; + GLuint depth_write_enable:1; + GLuint depth_test_func:3; + GLuint pad1:1; + GLuint depth_test_enable:1; + } ds2; +}; struct brw_cc_unit_state { @@ -814,6 +912,13 @@ struct brw_sf_unit_state }; +struct gen6_scissor_rect +{ + GLuint xmin:16; + GLuint ymin:16; + GLuint xmax:16; + GLuint ymax:16; +}; struct brw_gs_unit_state { @@ -825,7 +930,7 @@ struct brw_gs_unit_state struct { GLuint pad0:8; - GLuint rendering_enable:1; /* for IGDNG */ + GLuint rendering_enable:1; /* for Ironlake */ GLuint pad4:1; GLuint stats_enable:1; GLuint nr_urb_entries:7; @@ -935,7 +1040,7 @@ struct brw_wm_unit_state GLfloat global_depth_offset_constant; GLfloat global_depth_offset_scale; - /* for IGDNG only */ + /* for Ironlake only */ struct { GLuint pad0:1; GLuint grf_reg_count_1:3; @@ -962,6 +1067,15 @@ struct brw_sampler_default_color { GLfloat color[4]; }; +struct gen5_sampler_default_color { + uint8_t ub[4]; + float f[4]; + uint16_t hf[4]; + uint16_t us[4]; + int16_t s[4]; + uint8_t b[4]; +}; + struct brw_sampler_state { @@ -973,7 +1087,7 @@ struct brw_sampler_state GLuint mag_filter:3; GLuint mip_filter:2; GLuint base_level:5; - GLuint pad:1; + GLuint min_mag_neq:1; GLuint lod_preclamp:1; GLuint default_color_mode:1; GLuint pad0:1; @@ -985,7 +1099,8 @@ struct brw_sampler_state GLuint r_wrap_mode:3; GLuint t_wrap_mode:3; GLuint s_wrap_mode:3; - GLuint pad:3; + GLuint cube_control_mode:1; + GLuint pad:2; GLuint max_lod:10; GLuint min_lod:10; } ss1; @@ -999,7 +1114,9 @@ struct brw_sampler_state struct brw_ss3 { - GLuint pad:19; + GLuint non_normalized_coord:1; + GLuint pad:12; + GLuint address_round:6; GLuint max_aniso:3; GLuint chroma_key_mode:1; GLuint chroma_key_index:2; @@ -1044,6 +1161,15 @@ struct brw_sf_viewport } scissor; }; +struct gen6_sf_viewport { + GLfloat m00; + GLfloat m11; + GLfloat m22; + GLfloat m30; + GLfloat m31; + GLfloat m32; +}; + /* Documented in the subsystem/shared-functions/sampler chapter... */ struct brw_surface_state @@ -1055,7 +1181,12 @@ struct brw_surface_state GLuint cube_neg_y:1; GLuint cube_pos_x:1; GLuint cube_neg_x:1; - GLuint pad:4; + GLuint pad:2; + /* Required on gen6 for surfaces accessed through render cache messages. + */ + GLuint render_cache_read_write:1; + /* Ironlake and newer: instead of replicating one of the texels */ + GLuint cube_corner_average:1; GLuint mipmap_layout_mode:1; GLuint vert_line_stride_ofs:1; GLuint vert_line_stride:1; @@ -1202,7 +1333,8 @@ struct brw_instruction GLuint predicate_inverse:1; GLuint execution_size:3; GLuint destreg__conditionalmod:4; /* destreg - send, conditionalmod - others */ - GLuint pad0:2; + GLuint acc_wr_control:1; + GLuint cmpt_control:1; GLuint debug_control:1; GLuint saturate:1; } header; @@ -1250,7 +1382,7 @@ struct brw_instruction GLuint dest_writemask:4; GLuint dest_subreg_nr:1; GLuint dest_reg_nr:8; - GLuint pad1:2; + GLuint dest_horiz_stride:2; GLuint dest_address_mode:1; } da16; @@ -1264,9 +1396,21 @@ struct brw_instruction GLuint dest_writemask:4; GLint dest_indirect_offset:6; GLuint dest_subreg_nr:3; - GLuint pad1:2; + GLuint dest_horiz_stride:2; GLuint dest_address_mode:1; } ia16; + + struct { + GLuint dest_reg_file:2; + GLuint dest_reg_type:3; + GLuint src0_reg_file:2; + GLuint src0_reg_type:3; + GLuint src1_reg_file:2; + GLuint src1_reg_type:3; + GLuint pad:1; + + GLint jump_count:16; + } branch_gen6; } bits1; @@ -1339,7 +1483,7 @@ struct brw_instruction GLuint end_of_thread:1; GLuint pad1:1; GLuint sfid:4; - } send_igdng; /* for IGDNG only */ + } send_gen5; /* for Ironlake only */ } bits2; @@ -1413,6 +1557,21 @@ struct brw_instruction GLuint pad0:12; } if_else; + struct + { + /* Signed jump distance to the ip to jump to if all channels + * are disabled after the break or continue. It should point + * to the end of the innermost control flow block, as that's + * where some channel could get re-enabled. + */ + int jip:16; + + /* Signed jump distance to the location to resume execution + * of this channel if it's enabled for the break or continue. + */ + int uip:16; + } break_cont; + struct { GLuint function:4; GLuint int_type:1; @@ -1440,7 +1599,7 @@ struct brw_instruction GLuint msg_length:4; GLuint pad1:2; GLuint end_of_thread:1; - } math_igdng; + } math_gen5; struct { GLuint binding_table_index:8; @@ -1476,7 +1635,7 @@ struct brw_instruction GLuint msg_length:4; GLuint pad1:2; GLuint end_of_thread:1; - } sampler_igdng; + } sampler_gen5; struct brw_urb_immediate urb; @@ -1494,7 +1653,7 @@ struct brw_instruction GLuint msg_length:4; GLuint pad1:2; GLuint end_of_thread:1; - } urb_igdng; + } urb_gen5; struct { GLuint binding_table_index:8; @@ -1510,6 +1669,18 @@ struct brw_instruction struct { GLuint binding_table_index:8; + GLuint msg_control:3; + GLuint msg_type:3; + GLuint target_cache:2; + GLuint response_length:4; + GLuint msg_length:4; + GLuint msg_target:4; + GLuint pad1:3; + GLuint end_of_thread:1; + } dp_read_g4x; + + struct { + GLuint binding_table_index:8; GLuint msg_control:3; GLuint msg_type:3; GLuint target_cache:2; @@ -1519,7 +1690,7 @@ struct brw_instruction GLuint msg_length:4; GLuint pad1:2; GLuint end_of_thread:1; - } dp_read_igdng; + } dp_read_gen5; struct { GLuint binding_table_index:8; @@ -1546,10 +1717,38 @@ struct brw_instruction GLuint msg_length:4; GLuint pad1:2; GLuint end_of_thread:1; - } dp_write_igdng; + } dp_write_gen5; + + /* Sandybridge DP for sample cache, constant cache, render cache */ + struct { + GLuint binding_table_index:8; + GLuint msg_control:5; + GLuint msg_type:3; + GLuint pad0:3; + GLuint header_present:1; + GLuint response_length:5; + GLuint msg_length:4; + GLuint pad1:2; + GLuint end_of_thread:1; + } dp_sampler_const_cache; + + struct { + GLuint binding_table_index:8; + GLuint msg_control:3; + GLuint slot_group_select:1; + GLuint pixel_scoreboard_clear:1; + GLuint msg_type:4; + GLuint send_commit_msg:1; + GLuint pad0:1; + GLuint header_present:1; + GLuint response_length:5; + GLuint msg_length:4; + GLuint pad1:2; + GLuint end_of_thread:1; + } dp_render_cache; struct { - GLuint pad:16; + GLuint function_control:16; GLuint response_length:4; GLuint msg_length:4; GLuint msg_target:4; @@ -1557,14 +1756,15 @@ struct brw_instruction GLuint end_of_thread:1; } generic; + /* Of this struct, only end_of_thread is not present for gen6. */ struct { - GLuint pad:19; + GLuint function_control:19; GLuint header_present:1; GLuint response_length:5; GLuint msg_length:4; GLuint pad1:2; GLuint end_of_thread:1; - } generic_igdng; + } generic_gen5; GLint d; GLuint ud; diff --git a/src/gallium/drivers/i965/brw_structs_dump.c b/src/gallium/drivers/i965/brw_structs_dump.c index cd40fc6d618..f3de2f995b3 100644 --- a/src/gallium/drivers/i965/brw_structs_dump.c +++ b/src/gallium/drivers/i965/brw_structs_dump.c @@ -72,7 +72,7 @@ brw_dump_aa_line_parameters(const struct brw_aa_line_parameters *ptr) { debug_printf("\t\t.header.length = 0x%x\n", (*ptr).header.length); debug_printf("\t\t.header.opcode = 0x%x\n", (*ptr).header.opcode); - debug_printf("\t\t.bits0.aa_coverage_scope = 0x%x\n", (*ptr).bits0.aa_coverage_scope); + debug_printf("\t\t.bits0.aa_coverage_scope = 0x%x\n", (*ptr).bits0.aa_coverage_slope); debug_printf("\t\t.bits0.aa_coverage_bias = 0x%x\n", (*ptr).bits0.aa_coverage_bias); debug_printf("\t\t.bits1.aa_coverage_endcap_slope = 0x%x\n", (*ptr).bits1.aa_coverage_endcap_slope); debug_printf("\t\t.bits1.aa_coverage_endcap_bias = 0x%x\n", (*ptr).bits1.aa_coverage_endcap_bias); diff --git a/src/gallium/drivers/i965/brw_urb.c b/src/gallium/drivers/i965/brw_urb.c index 907ec56c6ca..b630752809e 100644 --- a/src/gallium/drivers/i965/brw_urb.c +++ b/src/gallium/drivers/i965/brw_urb.c @@ -147,7 +147,7 @@ static int recalculate_urb_fence( struct brw_context *brw ) brw->urb.constrained = 0; - if (BRW_IS_IGDNG(brw)) { + if (brw->gen == 5) { brw->urb.nr_vs_entries = 128; brw->urb.nr_sf_entries = 48; if (check_urb_layout(brw)) { @@ -157,7 +157,7 @@ static int recalculate_urb_fence( struct brw_context *brw ) brw->urb.nr_vs_entries = limits[VS].preferred_nr_entries; brw->urb.nr_sf_entries = limits[SF].preferred_nr_entries; } - } else if (BRW_IS_G4X(brw)) { + } else if (brw->is_g4x) { brw->urb.nr_vs_entries = 64; if (check_urb_layout(brw)) { goto done; diff --git a/src/gallium/drivers/i965/brw_vs.h b/src/gallium/drivers/i965/brw_vs.h index 944d88c84cc..b6d1091618e 100644 --- a/src/gallium/drivers/i965/brw_vs.h +++ b/src/gallium/drivers/i965/brw_vs.h @@ -56,7 +56,6 @@ struct brw_vs_compile { struct brw_compile func; struct brw_vs_prog_key key; struct brw_vs_prog_data prog_data; - struct brw_chipset chipset; struct brw_vertex_shader *vp; diff --git a/src/gallium/drivers/i965/brw_vs_emit.c b/src/gallium/drivers/i965/brw_vs_emit.c index 5dcbd597ddc..559f0c61d8d 100644 --- a/src/gallium/drivers/i965/brw_vs_emit.c +++ b/src/gallium/drivers/i965/brw_vs_emit.c @@ -116,6 +116,7 @@ static boolean find_output_slot( struct brw_vs_compile *c, */ static void brw_vs_alloc_regs( struct brw_vs_compile *c ) { + struct brw_context *brw = c->func.brw; GLuint i, reg = 0, subreg = 0, mrf; int attributes_in_vue; @@ -218,7 +219,7 @@ static void brw_vs_alloc_regs( struct brw_vs_compile *c ) */ c->nr_outputs = c->prog_data.nr_outputs; - if (c->chipset.is_igdng) + if (brw->gen == 5) mrf = 8; else mrf = 4; @@ -333,7 +334,7 @@ static void brw_vs_alloc_regs( struct brw_vs_compile *c ) */ attributes_in_vue = MAX2(c->nr_outputs, c->nr_inputs); - if (c->chipset.is_igdng) + if (brw->gen == 5) c->prog_data.urb_entry_size = (attributes_in_vue + 6 + 3) / 4; else c->prog_data.urb_entry_size = (attributes_in_vue + 2 + 3) / 4; @@ -1124,6 +1125,7 @@ static struct brw_reg get_dst( struct brw_vs_compile *c, static void emit_vertex_write( struct brw_vs_compile *c) { struct brw_compile *p = &c->func; + struct brw_context *brw = p->brw; struct brw_reg m0 = brw_message_reg(0); struct brw_reg pos = c->regs[TGSI_FILE_OUTPUT][VERT_RESULT_HPOS]; struct brw_reg ndc; @@ -1143,7 +1145,7 @@ static void emit_vertex_write( struct brw_vs_compile *c) */ if (c->prog_data.writes_psiz || c->key.nr_userclip || - c->chipset.is_965) + brw->has_negative_rhw_bug) { struct brw_reg header1 = retype(get_tmp(c), BRW_REGISTER_TYPE_UD); GLuint i; @@ -1174,7 +1176,7 @@ static void emit_vertex_write( struct brw_vs_compile *c) * Later, clipping will detect ucp[6] and ensure the primitive is * clipped against all fixed planes. */ - if (c->chipset.is_965) { + if (brw->has_negative_rhw_bug) { brw_CMP(p, vec8(brw_null_reg()), BRW_CONDITIONAL_L, @@ -1202,7 +1204,7 @@ static void emit_vertex_write( struct brw_vs_compile *c) brw_set_access_mode(p, BRW_ALIGN_1); brw_MOV(p, offset(m0, 2), ndc); - if (c->chipset.is_igdng) { + if (brw->gen == 5) { /* There are 20 DWs (D0-D19) in VUE vertex header on IGDNG */ brw_MOV(p, offset(m0, 3), pos); /* a portion of vertex header */ /* m4, m5 contain the distances from vertex to the user clip planeXXX. @@ -1339,6 +1341,7 @@ static void emit_insn(struct brw_vs_compile *c, unsigned opcode = inst->Instruction.Opcode; unsigned label = inst->Label.Label; struct brw_compile *p = &c->func; + struct brw_context *brw = p->brw; struct brw_reg args[3], dst; GLuint i; @@ -1514,7 +1517,7 @@ static void emit_insn(struct brw_vs_compile *c, c->loop_depth--; - if (c->chipset.is_igdng) + if (brw->gen == 5) br = 2; inst0 = inst1 = brw_WHILE(p, c->loop_inst[c->loop_depth]); @@ -1652,6 +1655,6 @@ void brw_vs_emit(struct brw_vs_compile *c) if (BRW_DEBUG & DEBUG_VS) { debug_printf("vs-native:\n"); - brw_disasm(stderr, p->store, p->nr_insn); + brw_disasm(stderr, p->store, p->nr_insn, p->brw->gen); } } diff --git a/src/gallium/drivers/i965/brw_vs_state.c b/src/gallium/drivers/i965/brw_vs_state.c index dadbb622e4d..6d2ccfd6d98 100644 --- a/src/gallium/drivers/i965/brw_vs_state.c +++ b/src/gallium/drivers/i965/brw_vs_state.c @@ -100,7 +100,7 @@ vs_unit_create_from_key(struct brw_context *brw, */ vs.thread1.single_program_flow = 0; - if (BRW_IS_IGDNG(brw)) + if (brw->gen == 5) vs.thread1.binding_table_entry_count = 0; /* hardware requirement */ else vs.thread1.binding_table_entry_count = key->nr_surfaces; @@ -111,16 +111,16 @@ vs_unit_create_from_key(struct brw_context *brw, vs.thread3.urb_entry_read_offset = 0; vs.thread3.const_urb_entry_read_offset = key->curbe_offset * 2; - if (BRW_IS_IGDNG(brw)) + if (brw->gen == 5) vs.thread4.nr_urb_entries = key->nr_urb_entries >> 2; else vs.thread4.nr_urb_entries = key->nr_urb_entries; vs.thread4.urb_entry_allocation_size = key->urb_size - 1; - if (BRW_IS_IGDNG(brw)) + if (brw->gen == 5) chipset_max_threads = 72; - else if (BRW_IS_G4X(brw)) + else if (brw->is_g4x) chipset_max_threads = 32; else chipset_max_threads = 16; diff --git a/src/gallium/drivers/i965/brw_winsys.h b/src/gallium/drivers/i965/brw_winsys.h index a06f8bb7d61..038f6f788a0 100644 --- a/src/gallium/drivers/i965/brw_winsys.h +++ b/src/gallium/drivers/i965/brw_winsys.h @@ -148,7 +148,7 @@ static INLINE void make_reloc(struct brw_winsys_reloc *reloc, struct brw_winsys_screen { unsigned pci_id; - + int gen; /** * Buffer functions. */ @@ -282,7 +282,7 @@ void brw_dump_data( unsigned pci_id, enum brw_buffer_data_type data_type, unsigned offset, const void *data, - size_t size ); + size_t size, int gen ); #endif diff --git a/src/gallium/drivers/i965/brw_winsys_debug.c b/src/gallium/drivers/i965/brw_winsys_debug.c index f8f6a539bc9..b66b1cfccb6 100644 --- a/src/gallium/drivers/i965/brw_winsys_debug.c +++ b/src/gallium/drivers/i965/brw_winsys_debug.c @@ -9,7 +9,7 @@ void brw_dump_data( unsigned pci_id, enum brw_buffer_data_type data_type, unsigned offset, const void *data, - size_t size ) + size_t size, int gen ) { if (BRW_DUMP & DUMP_ASM) { switch (data_type) { @@ -18,7 +18,7 @@ void brw_dump_data( unsigned pci_id, case BRW_DATA_GS_VS_PROG: case BRW_DATA_GS_GS_PROG: case BRW_DATA_GS_CLIP_PROG: - brw_disasm( stderr, data, size / sizeof(struct brw_instruction) ); + brw_disasm( stderr, (struct brw_instruction *)data, size / sizeof(struct brw_instruction), gen ); break; default: break; @@ -77,7 +77,7 @@ void brw_dump_data( unsigned pci_id, if (BRW_DUMP & DUMP_BATCH) { switch (data_type) { case BRW_DATA_BATCH_BUFFER: - intel_decode(data, size / 4, offset, pci_id); + intel_decode(data, size / 4, offset, pci_id, 0); break; default: break; diff --git a/src/gallium/drivers/i965/brw_wm_emit.c b/src/gallium/drivers/i965/brw_wm_emit.c index 8f983a60ae8..6301062fd79 100644 --- a/src/gallium/drivers/i965/brw_wm_emit.c +++ b/src/gallium/drivers/i965/brw_wm_emit.c @@ -848,11 +848,11 @@ static void emit_tex( struct brw_wm_compile *c, responseLength = 8; /* always */ - if (BRW_IS_IGDNG(p->brw)) { + if (p->brw->gen == 5) { if (shadow) - msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_COMPARE_IGDNG; + msg_type = BRW_SAMPLER_MESSAGE_SAMPLE_COMPARE_GEN5; else - msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_IGDNG; + msg_type = BRW_SAMPLER_MESSAGE_SAMPLE_GEN5; } else { if (shadow) msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_COMPARE; @@ -917,8 +917,8 @@ static void emit_txb( struct brw_wm_compile *c, brw_MOV(p, brw_message_reg(8), coord[3]); msgLength = 9; - if (BRW_IS_IGDNG(p->brw)) - msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS_IGDNG; + if (p->brw->gen == 5) + msg_type = BRW_SAMPLER_MESSAGE_SAMPLE_BIAS_GEN5; else msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS; @@ -1516,6 +1516,6 @@ void brw_wm_emit( struct brw_wm_compile *c ) if (BRW_DEBUG & DEBUG_WM) { debug_printf("wm-native:\n"); - brw_disasm(stderr, p->store, p->nr_insn); + brw_disasm(stderr, p->store, p->nr_insn, p->brw->gen); } } diff --git a/src/gallium/drivers/i965/brw_wm_fp.c b/src/gallium/drivers/i965/brw_wm_fp.c index f7ee55cc1c8..a65e16edec0 100644 --- a/src/gallium/drivers/i965/brw_wm_fp.c +++ b/src/gallium/drivers/i965/brw_wm_fp.c @@ -812,7 +812,7 @@ static void precalc_tex( struct brw_wm_compile *c, } /* XXX: add GL_EXT_texture_swizzle support to gallium -- by - * generating shader varients in mesa state tracker. + * generating shader variants in mesa state tracker. */ /* Release this temp if we ended up allocating it: diff --git a/src/gallium/drivers/i965/brw_wm_glsl.c b/src/gallium/drivers/i965/brw_wm_glsl.c index 3b3afc39d3c..fb8e40d928e 100644 --- a/src/gallium/drivers/i965/brw_wm_glsl.c +++ b/src/gallium/drivers/i965/brw_wm_glsl.c @@ -1607,7 +1607,7 @@ static void emit_txb(struct brw_wm_compile *c, brw_MOV(p, brw_message_reg(5), src[3]); /* bias */ brw_MOV(p, brw_message_reg(6), brw_imm_f(0)); /* ref (unused?) */ - if (BRW_IS_IGDNG(p->brw)) { + if (p->brw->gen == 5) { msg_type = BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_BIAS_IGDNG; } else { /* Does it work well on SIMD8? */ @@ -1688,7 +1688,7 @@ static void emit_tex(struct brw_wm_compile *c, brw_MOV(p, brw_message_reg(6), src[2]); /* ref value / R coord */ } - if (BRW_IS_IGDNG(p->brw)) { + if (p->brw->gen == 5) { if (shadow) msg_type = BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_COMPARE_IGDNG; else @@ -1970,7 +1970,7 @@ static void brw_wm_emit_branching_shader(struct brw_context *brw, struct brw_wm_ struct brw_instruction *inst0, *inst1; GLuint br = 1; - if (BRW_IS_IGDNG(brw)) + if (brw->gen == 5) br = 2; loop_depth--; diff --git a/src/gallium/drivers/i965/brw_wm_state.c b/src/gallium/drivers/i965/brw_wm_state.c index efc2d96be13..a690003ecbd 100644 --- a/src/gallium/drivers/i965/brw_wm_state.c +++ b/src/gallium/drivers/i965/brw_wm_state.c @@ -70,9 +70,9 @@ wm_unit_populate_key(struct brw_context *brw, struct brw_wm_unit_key *key) key->max_threads = 1; else { /* WM maximum threads is number of EUs times number of threads per EU. */ - if (BRW_IS_IGDNG(brw)) + if (brw->gen == 5) key->max_threads = 12 * 6; - else if (BRW_IS_G4X(brw)) + else if (brw->is_g4x) key->max_threads = 10 * 5; else key->max_threads = 8 * 4; @@ -155,7 +155,7 @@ wm_unit_create_from_key(struct brw_context *brw, struct brw_wm_unit_key *key, wm.thread1.depth_coef_urb_read_offset = 1; wm.thread1.floating_point_mode = BRW_FLOATING_POINT_NON_IEEE_754; - if (BRW_IS_IGDNG(brw)) + if (brw->gen == 5) wm.thread1.binding_table_entry_count = 0; /* hardware requirement */ else wm.thread1.binding_table_entry_count = key->nr_surfaces; @@ -174,7 +174,7 @@ wm_unit_create_from_key(struct brw_context *brw, struct brw_wm_unit_key *key, wm.thread3.const_urb_entry_read_length = key->curb_entry_read_length; wm.thread3.const_urb_entry_read_offset = key->curbe_offset * 2; - if (BRW_IS_IGDNG(brw)) + if (brw->gen == 5) wm.wm4.sampler_count = 0; /* hardware requirement */ else wm.wm4.sampler_count = (key->sampler_count + 1) / 4; @@ -277,7 +277,7 @@ static enum pipe_error upload_wm_unit( struct brw_context *brw ) grf_reg_count = (align(key.total_grf, 16) / 16 - 1); per_thread_scratch_space = key.total_scratch / 1024 - 1; stats_enable = (BRW_DEBUG & DEBUG_STATS) || key.stats_wm; - sampler_count = BRW_IS_IGDNG(brw) ? 0 :(key.sampler_count + 1) / 4; + sampler_count = brw->gen == 5 ? 0 :(key.sampler_count + 1) / 4; /* Emit WM program relocation */ make_reloc(&reloc[nr_reloc++], diff --git a/src/gallium/drivers/i965/intel_decode.c b/src/gallium/drivers/i965/intel_decode.c index 36c04a31655..1abe869f1ae 100644 --- a/src/gallium/drivers/i965/intel_decode.c +++ b/src/gallium/drivers/i965/intel_decode.c @@ -42,10 +42,11 @@ #include "util/u_memory.h" #include "util/u_string.h" + #include "intel_decode.h" +#include "brw_reg.h" /*#include "intel_chipset.h"*/ -#define IS_965(x) 1 /* XXX */ #define IS_9XX(x) 1 /* XXX */ #define BUFFER_FAIL(_count, _len, _name) do { \ @@ -99,10 +100,11 @@ decode_mi(const uint32_t *data, int count, uint32_t hw_offset, int *failures) } opcodes_mi[] = { { 0x08, 0, 1, 1, "MI_ARB_ON_OFF" }, { 0x0a, 0, 1, 1, "MI_BATCH_BUFFER_END" }, + { 0x30, 0x3f, 3, 3, "MI_BATCH_BUFFER" }, { 0x31, 0x3f, 2, 2, "MI_BATCH_BUFFER_START" }, { 0x14, 0x3f, 3, 3, "MI_DISPLAY_BUFFER_INFO" }, { 0x04, 0, 1, 1, "MI_FLUSH" }, - { 0x22, 0, 3, 3, "MI_LOAD_REGISTER_IMM" }, + { 0x22, 0x1f, 3, 3, "MI_LOAD_REGISTER_IMM" }, { 0x13, 0x3f, 2, 2, "MI_LOAD_SCAN_LINES_EXCL" }, { 0x12, 0x3f, 2, 2, "MI_LOAD_SCAN_LINES_INCL" }, { 0x00, 0, 1, 1, "MI_NOOP" }, @@ -116,6 +118,11 @@ decode_mi(const uint32_t *data, int count, uint32_t hw_offset, int *failures) { 0x03, 0, 1, 1, "MI_WAIT_FOR_EVENT" }, }; + switch ((data[0] & 0x1f800000) >> 23) { + case 0x0a: + instr_out(data, hw_offset, 0, "MI_BATCH_BUFFER_END\n"); + return -1; + } for (opcode = 0; opcode < Elements(opcodes_mi); opcode++) { if ((data[0] & 0x1f800000) >> 23 == opcodes_mi[opcode].opcode) { @@ -308,9 +315,13 @@ decode_2d(const uint32_t *data, int count, uint32_t hw_offset, int *failures) static int decode_3d_1c(const uint32_t *data, int count, uint32_t hw_offset, int *failures) { - switch ((data[0] & 0x00f80000) >> 19) { + uint32_t opcode; + + opcode = (data[0] & 0x00f80000) >> 19; + + switch (opcode) { case 0x11: - instr_out(data, hw_offset, 0, "3DSTATE_DEPTH_SUBRECTANGLE_DISALBE\n"); + instr_out(data, hw_offset, 0, "3DSTATE_DEPTH_SUBRECTANGLE_DISABLE\n"); return 1; case 0x10: instr_out(data, hw_offset, 0, "3DSTATE_SCISSOR_ENABLE\n"); @@ -326,7 +337,8 @@ decode_3d_1c(const uint32_t *data, int count, uint32_t hw_offset, int *failures) return 1; } - instr_out(data, hw_offset, 0, "3D UNKNOWN\n"); + instr_out(data, hw_offset, 0, "3D UNKNOWN: 3d_1c opcode = 0x%x\n", + opcode); (*failures)++; return 1; } @@ -384,7 +396,7 @@ i915_get_instruction_dst(const uint32_t *data, int i, char *dstname, int do_mask sprintf(dstname, "oD%s%s", dstmask, sat); break; case 6: - if (dst_nr > 2) + if (dst_nr > 3) fprintf(out, "bad destination reg U%d\n", dst_nr); sprintf(dstname, "U%d%s%s", dst_nr, dstmask, sat); break; @@ -455,7 +467,7 @@ i915_get_instruction_src_name(uint32_t src_type, uint32_t src_nr, char *name) break; case 6: sprintf(name, "U%d", src_nr); - if (src_nr > 2) + if (src_nr > 3) fprintf(out, "bad src reg %s\n", name); break; default: @@ -800,10 +812,14 @@ i915_decode_instruction(const uint32_t *data, uint32_t hw_offset, } static int -decode_3d_1d(const uint32_t *data, int count, uint32_t hw_offset, int *failures, int i830) +decode_3d_1d(const uint32_t *data, int count, + uint32_t hw_offset, + uint32_t devid, + int *failures) { - unsigned int len, i, c, opcode, word, map, sampler, instr; + unsigned int len, i, c, idx, word, map, sampler, instr; char *format; + uint32_t opcode; struct { uint32_t opcode; @@ -814,7 +830,7 @@ decode_3d_1d(const uint32_t *data, int count, uint32_t hw_offset, int *failures, } opcodes_3d_1d[] = { { 0x8e, 0, 3, 3, "3DSTATE_BUFFER_INFO" }, { 0x86, 0, 4, 4, "3DSTATE_CHROMA_KEY" }, - { 0x9c, 0, 1, 1, "3DSTATE_CLEAR_PARAMETERS" }, + { 0x9c, 0, 7, 7, "3DSTATE_CLEAR_PARAMETERS" }, { 0x88, 0, 2, 2, "3DSTATE_CONSTANT_BLEND_COLOR" }, { 0x99, 0, 2, 2, "3DSTATE_DEFAULT_DIFFUSE" }, { 0x9a, 0, 2, 2, "3DSTATE_DEFAULT_SPECULAR" }, @@ -822,7 +838,6 @@ decode_3d_1d(const uint32_t *data, int count, uint32_t hw_offset, int *failures, { 0x97, 0, 2, 2, "3DSTATE_DEPTH_OFFSET_SCALE" }, { 0x85, 0, 2, 2, "3DSTATE_DEST_BUFFER_VARIABLES" }, { 0x80, 0, 5, 5, "3DSTATE_DRAWING_RECTANGLE" }, - { 0x8e, 0, 3, 3, "3DSTATE_BUFFER_INFO" }, { 0x9d, 0, 65, 65, "3DSTATE_FILTER_COEFFICIENTS_4X4" }, { 0x9e, 0, 4, 4, "3DSTATE_MONO_FILTER" }, { 0x89, 0, 4, 4, "3DSTATE_FOG_MODE" }, @@ -834,9 +849,11 @@ decode_3d_1d(const uint32_t *data, int count, uint32_t hw_offset, int *failures, { 0x8d, 1, 3, 3, "3DSTATE_W_STATE_I830" }, { 0x01, 1, 2, 2, "3DSTATE_COLOR_FACTOR_I830" }, { 0x02, 1, 2, 2, "3DSTATE_MAP_COORD_SETBIND_I830" }, - }; + }, *opcode_3d_1d; - switch ((data[0] & 0x00ff0000) >> 16) { + opcode = (data[0] & 0x00ff0000) >> 16; + + switch (opcode) { case 0x07: /* This instruction is unusual. A 0 length means just 1 DWORD instead of * 2. The 0 length is specified in one place to be unsupported, but @@ -891,26 +908,56 @@ decode_3d_1d(const uint32_t *data, int count, uint32_t hw_offset, int *failures, instr_out(data, hw_offset, 0, "3DSTATE_LOAD_STATE_IMMEDIATE_1\n"); len = (data[0] & 0x0000000f) + 2; i = 1; - for (word = 0; word <= 7; word++) { + for (word = 0; word <= 8; word++) { if (data[0] & (1 << (4 + word))) { if (i >= count) BUFFER_FAIL(count, len, "3DSTATE_LOAD_STATE_IMMEDIATE_1"); /* save vertex state for decode */ - if (word == 2) { - saved_s2_set = 1; - saved_s2 = data[i]; - } - if (word == 4) { - saved_s4_set = 1; - saved_s4 = data[i]; + if (IS_9XX(devid)) { + if (word == 2) { + saved_s2_set = 1; + saved_s2 = data[i]; + } + if (word == 4) { + saved_s4_set = 1; + saved_s4 = data[i]; + } } instr_out(data, hw_offset, i++, "S%d\n", word); } } if (len != i) { - fprintf(out, "Bad count in 3DSTATE_LOAD_INDIRECT\n"); + fprintf(out, "Bad count in 3DSTATE_LOAD_STATE_IMMEDIATE_1\n"); + (*failures)++; + } + return len; + case 0x03: + instr_out(data, hw_offset, 0, "3DSTATE_LOAD_STATE_IMMEDIATE_2\n"); + len = (data[0] & 0x0000000f) + 2; + i = 1; + for (word = 6; word <= 14; word++) { + if (data[0] & (1 << word)) { + if (i >= count) + BUFFER_FAIL(count, len, "3DSTATE_LOAD_STATE_IMMEDIATE_2"); + + if (word == 6) + instr_out(data, hw_offset, i++, "TBCF\n"); + else if (word >= 7 && word <= 10) { + instr_out(data, hw_offset, i++, "TB%dC\n", word - 7); + instr_out(data, hw_offset, i++, "TB%dA\n", word - 7); + } else if (word >= 11 && word <= 14) { + instr_out(data, hw_offset, i++, "TM%dS0\n", word - 11); + instr_out(data, hw_offset, i++, "TM%dS1\n", word - 11); + instr_out(data, hw_offset, i++, "TM%dS2\n", word - 11); + instr_out(data, hw_offset, i++, "TM%dS3\n", word - 11); + instr_out(data, hw_offset, i++, "TM%dS4\n", word - 11); + } + } + } + if (len != i) { + fprintf(out, "Bad count in 3DSTATE_LOAD_STATE_IMMEDIATE_2\n"); (*failures)++; } return len; @@ -922,11 +969,27 @@ decode_3d_1d(const uint32_t *data, int count, uint32_t hw_offset, int *failures, i = 2; for (map = 0; map <= 15; map++) { if (data[1] & (1 << map)) { + int width, height, pitch, dword; + const char *tiling; + if (i + 3 >= count) BUFFER_FAIL(count, len, "3DSTATE_MAP_STATE"); instr_out(data, hw_offset, i++, "map %d MS2\n", map); - instr_out(data, hw_offset, i++, "map %d MS3\n", map); - instr_out(data, hw_offset, i++, "map %d MS4\n", map); + + dword = data[i]; + width = ((dword >> 10) & ((1 << 11) - 1))+1; + height = ((dword >> 21) & ((1 << 11) - 1))+1; + + tiling = "none"; + if (dword & (1 << 2)) + tiling = "fenced"; + else if (dword & (1 << 1)) + tiling = dword & (1 << 0) ? "Y" : "X"; + instr_out(data, hw_offset, i++, "map %d MS3 [width=%d, height=%d, tiling=%s]\n", map, width, height, tiling); + + dword = data[i]; + pitch = 4*(((dword >> 21) & ((1 << 11) - 1))+1); + instr_out(data, hw_offset, i++, "map %d MS4 [pitch=%d]\n", map, pitch); } } if (len != i) { @@ -982,8 +1045,8 @@ decode_3d_1d(const uint32_t *data, int count, uint32_t hw_offset, int *failures, } return len; case 0x01: - if (i830) - break; + if (!IS_9XX(devid)) + break; instr_out(data, hw_offset, 0, "3DSTATE_SAMPLER_STATE\n"); instr_out(data, hw_offset, 1, "mask\n"); len = (data[0] & 0x0000003f) + 2; @@ -1034,30 +1097,61 @@ decode_3d_1d(const uint32_t *data, int count, uint32_t hw_offset, int *failures, format, (data[1] & (1 << 31)) ? "en" : "dis"); return len; + + case 0x8e: + { + const char *name, *tiling; + + len = (data[0] & 0x0000000f) + 2; + if (len != 3) + fprintf(out, "Bad count in 3DSTATE_BUFFER_INFO\n"); + if (count < 3) + BUFFER_FAIL(count, len, "3DSTATE_BUFFER_INFO"); + + switch((data[1] >> 24) & 0x7) { + case 0x3: name = "color"; break; + case 0x7: name = "depth"; break; + default: name = "unknown"; break; + } + + tiling = "none"; + if (data[1] & (1 << 23)) + tiling = "fenced"; + else if (data[1] & (1 << 22)) + tiling = data[1] & (1 << 21) ? "Y" : "X"; + + instr_out(data, hw_offset, 0, "3DSTATE_BUFFER_INFO\n"); + instr_out(data, hw_offset, 1, "%s, tiling = %s, pitch=%d\n", name, tiling, data[1]&0xffff); + + instr_out(data, hw_offset, 2, "address\n"); + return len; + } } - for (opcode = 0; opcode < Elements(opcodes_3d_1d); opcode++) { - if (opcodes_3d_1d[opcode].i830_only && !i830) + for (idx = 0; idx < Elements(opcodes_3d_1d); idx++) + { + opcode_3d_1d = &opcodes_3d_1d[idx]; + if (opcode_3d_1d->i830_only && IS_9XX(devid)) continue; - if (((data[0] & 0x00ff0000) >> 16) == opcodes_3d_1d[opcode].opcode) { + if (((data[0] & 0x00ff0000) >> 16) == opcode_3d_1d->opcode) { len = 1; - instr_out(data, hw_offset, 0, "%s\n", opcodes_3d_1d[opcode].name); - if (opcodes_3d_1d[opcode].max_len > 1) { + instr_out(data, hw_offset, 0, "%s\n", opcode_3d_1d->name); + if (opcode_3d_1d->max_len > 1) { len = (data[0] & 0x0000ffff) + 2; - if (len < opcodes_3d_1d[opcode].min_len || - len > opcodes_3d_1d[opcode].max_len) + if (len < opcode_3d_1d->min_len || + len > opcode_3d_1d->max_len) { fprintf(out, "Bad count in %s\n", - opcodes_3d_1d[opcode].name); + opcode_3d_1d->name); (*failures)++; } } for (i = 1; i < len; i++) { if (i >= count) - BUFFER_FAIL(count, len, opcodes_3d_1d[opcode].name); + BUFFER_FAIL(count, len, opcode_3d_1d->name); instr_out(data, hw_offset, i, "dword %d\n", i); } @@ -1065,7 +1159,7 @@ decode_3d_1d(const uint32_t *data, int count, uint32_t hw_offset, int *failures, } } - instr_out(data, hw_offset, 0, "3D UNKNOWN\n"); + instr_out(data, hw_offset, 0, "3D UNKNOWN: 3d_1d opcode = 0x%x\n", opcode); (*failures)++; return 1; } @@ -1075,8 +1169,10 @@ decode_3d_primitive(const uint32_t *data, int count, uint32_t hw_offset, int *failures) { char immediate = (data[0] & (1 << 23)) == 0; - unsigned int len, i; + unsigned int len, i, ret; char *primtype; + int original_s2 = saved_s2; + int original_s4 = saved_s4; switch ((data[0] >> 18) & 0xf) { case 0x0: primtype = "TRILIST"; break; @@ -1089,7 +1185,7 @@ decode_3d_primitive(const uint32_t *data, int count, uint32_t hw_offset, case 0x7: primtype = "RECTLIST"; break; case 0x8: primtype = "POINTLIST"; break; case 0x9: primtype = "DIB"; break; - case 0xa: primtype = "CLEAR_RECT"; break; + case 0xa: primtype = "CLEAR_RECT"; saved_s4 = 3 << 6; saved_s2 = ~0; break; default: primtype = "unknown"; break; } @@ -1193,6 +1289,8 @@ decode_3d_primitive(const uint32_t *data, int count, uint32_t hw_offset, vertex++; } } + + ret = len; } else { /* indirect vertices */ len = data[0] & 0x0000ffff; /* index count */ @@ -1210,13 +1308,15 @@ decode_3d_primitive(const uint32_t *data, int count, uint32_t hw_offset, if ((data[i] & 0xffff) == 0xffff) { instr_out(data, hw_offset, i, " indices: (terminator)\n"); - return i; + ret = i; + goto out; } else if ((data[i] >> 16) == 0xffff) { instr_out(data, hw_offset, i, " indices: 0x%04x, " "(terminator)\n", data[i] & 0xffff); - return i; + ret = i; + goto out; } else { instr_out(data, hw_offset, i, " indices: 0x%04x, 0x%04x\n", @@ -1226,7 +1326,8 @@ decode_3d_primitive(const uint32_t *data, int count, uint32_t hw_offset, fprintf(out, "3DPRIMITIVE: no terminator found in index buffer\n"); (*failures)++; - return count; + ret = count; + goto out; } else { /* fixed size vertex index buffer */ for (i = 0; i < len; i += 2) { @@ -1241,7 +1342,8 @@ decode_3d_primitive(const uint32_t *data, int count, uint32_t hw_offset, } } } - return (len + 1) / 2 + 1; + ret = (len + 1) / 2 + 1; + goto out; } else { /* sequential vertex access */ if (count < 2) @@ -1250,17 +1352,22 @@ decode_3d_primitive(const uint32_t *data, int count, uint32_t hw_offset, "3DPRIMITIVE sequential indirect %s, %d starting from " "%d\n", primtype, len, data[1] & 0xffff); instr_out(data, hw_offset, 1, " start\n"); - return 2; + ret = 2; + goto out; } } - return len; +out: + saved_s2 = original_s2; + saved_s4 = original_s4; + return ret; } static int -decode_3d(const uint32_t *data, int count, uint32_t hw_offset, int *failures) +decode_3d(const uint32_t *data, int count, uint32_t hw_offset, uint32_t devid, int *failures) { - unsigned int opcode; + uint32_t opcode; + unsigned int idx; struct { uint32_t opcode; @@ -1277,41 +1384,44 @@ decode_3d(const uint32_t *data, int count, uint32_t hw_offset, int *failures) { 0x0d, 1, 1, "3DSTATE_MODES_4" }, { 0x0c, 1, 1, "3DSTATE_MODES_5" }, { 0x07, 1, 1, "3DSTATE_RASTERIZATION_RULES" }, - }; + }, *opcode_3d; + + opcode = (data[0] & 0x1f000000) >> 24; - switch ((data[0] & 0x1f000000) >> 24) { + switch (opcode) { case 0x1f: return decode_3d_primitive(data, count, hw_offset, failures); case 0x1d: - return decode_3d_1d(data, count, hw_offset, failures, 0); + return decode_3d_1d(data, count, hw_offset, devid, failures); case 0x1c: return decode_3d_1c(data, count, hw_offset, failures); } - for (opcode = 0; opcode < Elements(opcodes_3d); opcode++) { - if ((data[0] & 0x1f000000) >> 24 == opcodes_3d[opcode].opcode) { + for (idx = 0; idx < Elements(opcodes_3d); idx++) { + opcode_3d = &opcodes_3d[idx]; + if (opcode == opcode_3d->opcode) { unsigned int len = 1, i; - instr_out(data, hw_offset, 0, "%s\n", opcodes_3d[opcode].name); - if (opcodes_3d[opcode].max_len > 1) { + instr_out(data, hw_offset, 0, "%s\n", opcode_3d->name); + if (opcode_3d->max_len > 1) { len = (data[0] & 0xff) + 2; - if (len < opcodes_3d[opcode].min_len || - len > opcodes_3d[opcode].max_len) + if (len < opcode_3d->min_len || + len > opcode_3d->max_len) { - fprintf(out, "Bad count in %s\n", opcodes_3d[opcode].name); + fprintf(out, "Bad count in %s\n", opcode_3d->name); } } for (i = 1; i < len; i++) { if (i >= count) - BUFFER_FAIL(count, len, opcodes_3d[opcode].name); + BUFFER_FAIL(count, len, opcode_3d->name); instr_out(data, hw_offset, i, "dword %d\n", i); } return len; } } - instr_out(data, hw_offset, 0, "3D UNKNOWN\n"); + instr_out(data, hw_offset, 0, "3D UNKNOWN: 3d opcode = 0x%x\n", opcode); (*failures)++; return 1; } @@ -1401,12 +1511,87 @@ get_965_prim_type(uint32_t data) default: return "fail"; } } +static int +i965_decode_urb_fence(const uint32_t *data, uint32_t hw_offset, int len, int count, + int *failures) +{ + uint32_t vs_fence, clip_fence, gs_fence, sf_fence, vfe_fence, cs_fence; + + if (len != 3) + fprintf(out, "Bad count in URB_FENCE\n"); + if (count < 3) + BUFFER_FAIL(count, len, "URB_FENCE"); + + vs_fence = data[1] & 0x3ff; + gs_fence = (data[1] >> 10) & 0x3ff; + clip_fence = (data[1] >> 20) & 0x3ff; + sf_fence = data[2] & 0x3ff; + vfe_fence = (data[2] >> 10) & 0x3ff; + cs_fence = (data[2] >> 20) & 0x7ff; + + instr_out(data, hw_offset, 0, "URB_FENCE: %s%s%s%s%s%s\n", + (data[0] >> 13) & 1 ? "cs " : "", + (data[0] >> 12) & 1 ? "vfe " : "", + (data[0] >> 11) & 1 ? "sf " : "", + (data[0] >> 10) & 1 ? "clip " : "", + (data[0] >> 9) & 1 ? "gs " : "", + (data[0] >> 8) & 1 ? "vs " : ""); + instr_out(data, hw_offset, 1, + "vs fence: %d, clip_fence: %d, gs_fence: %d\n", + vs_fence, clip_fence, gs_fence); + instr_out(data, hw_offset, 2, + "sf fence: %d, vfe_fence: %d, cs_fence: %d\n", + sf_fence, vfe_fence, cs_fence); + if (gs_fence < vs_fence) + fprintf(out, "gs fence < vs fence!\n"); + if (clip_fence < gs_fence) + fprintf(out, "clip fence < gs fence!\n"); + if (sf_fence < clip_fence) + fprintf(out, "sf fence < clip fence!\n"); + if (cs_fence < sf_fence) + fprintf(out, "cs fence < sf fence!\n"); + + return len; +} + +static void +state_base_out(const uint32_t *data, uint32_t hw_offset, unsigned int index, + char *name) +{ + if (data[index] & 1) { + instr_out(data, hw_offset, index, "%s state base address 0x%08x\n", + name, data[index] & ~1); + } else { + instr_out(data, hw_offset, index, "%s state base not updated\n", + name); + } +} + +static void +state_max_out(const uint32_t *data, uint32_t hw_offset, unsigned int index, + char *name) +{ + if (data[index] & 1) { + if (data[index] == 1) { + instr_out(data, hw_offset, index, + "%s state upper bound disabled\n", name); + } else { + instr_out(data, hw_offset, index, "%s state upper bound 0x%08x\n", + name, data[index] & ~1); + } + } else { + instr_out(data, hw_offset, index, "%s state upper bound not updated\n", + name); + } +} static int -decode_3d_965(const uint32_t *data, int count, uint32_t hw_offset, int *failures) +decode_3d_965(const uint32_t *data, int count, uint32_t hw_offset, uint32_t devid, int *failures) { - unsigned int opcode, len; - int i; + uint32_t opcode; + unsigned int idx, len; + int i, sba_len; + char *desc1 = NULL; struct { uint32_t opcode; @@ -1435,51 +1620,78 @@ decode_3d_965(const uint32_t *data, int count, uint32_t hw_offset, int *failures { 0x7907, 33, 33, "3DSTATE_POLY_STIPPLE_PATTERN" }, { 0x7908, 3, 3, "3DSTATE_LINE_STIPPLE" }, { 0x7909, 2, 2, "3DSTATE_GLOBAL_DEPTH_OFFSET_CLAMP" }, + { 0x7909, 2, 2, "3DSTATE_CLEAR_PARAMS" }, { 0x790a, 3, 3, "3DSTATE_AA_LINE_PARAMETERS" }, + { 0x790b, 4, 4, "3DSTATE_GS_SVB_INDEX" }, + { 0x790d, 3, 3, "3DSTATE_MULTISAMPLE" }, { 0x7b00, 6, 6, "3DPRIMITIVE" }, - }; + { 0x7802, 4, 4, "3DSTATE_SAMPLER_STATE_POINTERS" }, + { 0x7805, 3, 3, "3DSTATE_URB" }, + { 0x780e, 4, 4, "3DSTATE_CC_STATE_POINTERS" }, + { 0x7810, 6, 6, "3DSTATE_VS_STATE" }, + { 0x7811, 7, 7, "3DSTATE_GS_STATE" }, + { 0x7812, 4, 4, "3DSTATE_CLIP_STATE" }, + { 0x7813, 20, 20, "3DSTATE_SF_STATE" }, + { 0x7814, 9, 9, "3DSTATE_WM_STATE" }, + { 0x7812, 4, 4, "3DSTATE_CLIP_STATE" }, + { 0x7815, 5, 5, "3DSTATE_CONSTANT_VS_STATE" }, + { 0x7816, 5, 5, "3DSTATE_CONSTANT_GS_STATE" }, + { 0x7817, 5, 5, "3DSTATE_CONSTANT_PS_STATE" }, + { 0x7818, 2, 2, "3DSTATE_SAMPLE_MASK" }, + }, *opcode_3d; len = (data[0] & 0x0000ffff) + 2; - switch ((data[0] & 0xffff0000) >> 16) { + opcode = (data[0] & 0xffff0000) >> 16; + switch (opcode) { + case 0x6000: + len = (data[0] & 0x000000ff) + 2; + return i965_decode_urb_fence(data, hw_offset, len, count, failures); + case 0x6001: + instr_out(data, hw_offset, 0, "CS_URB_STATE\n"); + instr_out(data, hw_offset, 1, "entry_size: %d [%d bytes], n_entries: %d\n", + (data[1] >> 4) & 0x1f, + (((data[1] >> 4) & 0x1f) + 1) * 64, + data[1] & 0x7); + return len; + case 0x6002: + len = (data[0] & 0x000000ff) + 2; + instr_out(data, hw_offset, 0, "CONSTANT_BUFFER: %s\n", + (data[0] >> 8) & 1 ? "valid" : "invalid"); + instr_out(data, hw_offset, 1, "offset: 0x%08x, length: %d bytes\n", + data[1] & ~0x3f, ((data[1] & 0x3f) + 1) * 64); + return len; case 0x6101: - if (len != 6) + if (IS_GEN6(devid)) + sba_len = 10; + else if (IS_IRONLAKE(devid)) + sba_len = 8; + else + sba_len = 6; + if (len != sba_len) fprintf(out, "Bad count in STATE_BASE_ADDRESS\n"); - if (count < 6) + if (len != sba_len) BUFFER_FAIL(count, len, "STATE_BASE_ADDRESS"); + i = 0; instr_out(data, hw_offset, 0, "STATE_BASE_ADDRESS\n"); - - if (data[1] & 1) { - instr_out(data, hw_offset, 1, "General state at 0x%08x\n", - data[1] & ~1); - } else - instr_out(data, hw_offset, 1, "General state not updated\n"); - - if (data[2] & 1) { - instr_out(data, hw_offset, 2, "Surface state at 0x%08x\n", - data[2] & ~1); - } else - instr_out(data, hw_offset, 2, "Surface state not updated\n"); - - if (data[3] & 1) { - instr_out(data, hw_offset, 3, "Indirect state at 0x%08x\n", - data[3] & ~1); - } else - instr_out(data, hw_offset, 3, "Indirect state not updated\n"); - - if (data[4] & 1) { - instr_out(data, hw_offset, 4, "General state upper bound 0x%08x\n", - data[4] & ~1); - } else - instr_out(data, hw_offset, 4, "General state not updated\n"); - - if (data[5] & 1) { - instr_out(data, hw_offset, 5, "Indirect state upper bound 0x%08x\n", - data[5] & ~1); - } else - instr_out(data, hw_offset, 5, "Indirect state not updated\n"); + i++; + + state_base_out(data, hw_offset, i++, "general"); + state_base_out(data, hw_offset, i++, "surface"); + if (IS_GEN6(devid)) + state_base_out(data, hw_offset, i++, "dynamic"); + state_base_out(data, hw_offset, i++, "indirect"); + if (IS_IRONLAKE(devid) || IS_GEN6(devid)) + state_base_out(data, hw_offset, i++, "instruction"); + + state_max_out(data, hw_offset, i++, "general"); + if (IS_GEN6(devid)) + state_max_out(data, hw_offset, i++, "dynamic"); + state_max_out(data, hw_offset, i++, "indirect"); + if (IS_IRONLAKE(devid) || IS_GEN6(devid)) + state_max_out(data, hw_offset, i++, "instruction"); return len; case 0x7800: @@ -1498,18 +1710,33 @@ decode_3d_965(const uint32_t *data, int count, uint32_t hw_offset, int *failures instr_out(data, hw_offset, 6, "CC state\n"); return len; case 0x7801: - if (len != 6) + len = (data[0] & 0x000000ff) + 2; + if (len != 6 && len != 4) fprintf(out, "Bad count in 3DSTATE_BINDING_TABLE_POINTERS\n"); - if (count < 6) - BUFFER_FAIL(count, len, "3DSTATE_BINDING_TABLE_POINTERS"); + if (len == 6) { + if (count < 6) + BUFFER_FAIL(count, len, "3DSTATE_BINDING_TABLE_POINTERS"); + instr_out(data, hw_offset, 0, + "3DSTATE_BINDING_TABLE_POINTERS\n"); + instr_out(data, hw_offset, 1, "VS binding table\n"); + instr_out(data, hw_offset, 2, "GS binding table\n"); + instr_out(data, hw_offset, 3, "Clip binding table\n"); + instr_out(data, hw_offset, 4, "SF binding table\n"); + instr_out(data, hw_offset, 5, "WM binding table\n"); + } else { + if (count < 4) + BUFFER_FAIL(count, len, "3DSTATE_BINDING_TABLE_POINTERS"); - instr_out(data, hw_offset, 0, - "3DSTATE_BINDING_TABLE_POINTERS\n"); - instr_out(data, hw_offset, 1, "VS binding table\n"); - instr_out(data, hw_offset, 2, "GS binding table\n"); - instr_out(data, hw_offset, 3, "Clip binding table\n"); - instr_out(data, hw_offset, 4, "SF binding table\n"); - instr_out(data, hw_offset, 5, "WM binding table\n"); + instr_out(data, hw_offset, 0, + "3DSTATE_BINDING_TABLE_POINTERS: VS mod %d, " + "GS mod %d, PS mod %d\n", + (data[0] & (1 << 8)) != 0, + (data[0] & (1 << 9)) != 0, + (data[0] & (1 << 10)) != 0); + instr_out(data, hw_offset, 1, "VS binding table\n"); + instr_out(data, hw_offset, 2, "GS binding table\n"); + instr_out(data, hw_offset, 3, "WM binding table\n"); + } return len; @@ -1560,6 +1787,18 @@ decode_3d_965(const uint32_t *data, int count, uint32_t hw_offset, int *failures } return len; + case 0x780d: + len = (data[0] & 0xff) + 2; + if (len != 4) + fprintf(out, "Bad count in 3DSTATE_VIEWPORT_STATE_POINTERS\n"); + if (count < len) + BUFFER_FAIL(count, len, "3DSTATE_VIEWPORT_STATE_POINTERS"); + instr_out(data, hw_offset, 0, "3DSTATE_VIEWPORT_STATE_POINTERS\n"); + instr_out(data, hw_offset, 1, "clip\n"); + instr_out(data, hw_offset, 2, "sf\n"); + instr_out(data, hw_offset, 3, "cc\n"); + return len; + case 0x780a: len = (data[0] & 0xff) + 2; if (len != 3) @@ -1592,7 +1831,7 @@ decode_3d_965(const uint32_t *data, int count, uint32_t hw_offset, int *failures return len; case 0x7905: - if (len != 5 && len != 6) + if (len < 5 || len > 7) fprintf(out, "Bad count in 3DSTATE_DEPTH_BUFFER\n"); if (count < len) BUFFER_FAIL(count, len, "3DSTATE_DEPTH_BUFFER"); @@ -1609,9 +1848,36 @@ decode_3d_965(const uint32_t *data, int count, uint32_t hw_offset, int *failures ((data[3] & 0x0007ffc0) >> 6) + 1, ((data[3] & 0xfff80000) >> 19) + 1); instr_out(data, hw_offset, 4, "volume depth\n"); - if (len == 6) + if (len >= 6) instr_out(data, hw_offset, 5, "\n"); + if (len >= 7) + instr_out(data, hw_offset, 6, "render target view extent\n"); + + return len; + case 0x7a00: + len = (data[0] & 0xff) + 2; + if (len != 4) + fprintf(out, "Bad count in PIPE_CONTROL\n"); + if (count < len) + BUFFER_FAIL(count, len, "PIPE_CONTROL"); + + switch ((data[0] >> 14) & 0x3) { + case 0: desc1 = "no write"; break; + case 1: desc1 = "qword write"; break; + case 2: desc1 = "PS_DEPTH_COUNT write"; break; + case 3: desc1 = "TIMESTAMP write"; break; + } + instr_out(data, hw_offset, 0, + "PIPE_CONTROL: %s, %sdepth stall, %sRC write flush, " + "%sinst flush\n", + desc1, + data[0] & (1 << 13) ? "" : "no ", + data[0] & (1 << 12) ? "" : "no ", + data[0] & (1 << 11) ? "" : "no "); + instr_out(data, hw_offset, 1, "destination address\n"); + instr_out(data, hw_offset, 2, "immediate dword low\n"); + instr_out(data, hw_offset, 3, "immediate dword high\n"); return len; case 0x7b00: @@ -1633,39 +1899,41 @@ decode_3d_965(const uint32_t *data, int count, uint32_t hw_offset, int *failures return len; } - for (opcode = 0; opcode < Elements(opcodes_3d); opcode++) { - if ((data[0] & 0xffff0000) >> 16 == opcodes_3d[opcode].opcode) { + for (idx = 0; idx < Elements(opcodes_3d); idx++) { + opcode_3d = &opcodes_3d[idx]; + if ((data[0] & 0xffff0000) >> 16 == opcode_3d->opcode) { unsigned int i; len = 1; - instr_out(data, hw_offset, 0, "%s\n", opcodes_3d[opcode].name); - if (opcodes_3d[opcode].max_len > 1) { + instr_out(data, hw_offset, 0, "%s\n", opcode_3d->name); + if (opcode_3d->max_len > 1) { len = (data[0] & 0xff) + 2; - if (len < opcodes_3d[opcode].min_len || - len > opcodes_3d[opcode].max_len) + if (len < opcode_3d->min_len || + len > opcode_3d->max_len) { - fprintf(out, "Bad count in %s\n", opcodes_3d[opcode].name); + fprintf(out, "Bad count in %s\n", opcode_3d->name); } } for (i = 1; i < len; i++) { if (i >= count) - BUFFER_FAIL(count, len, opcodes_3d[opcode].name); + BUFFER_FAIL(count, len, opcode_3d->name); instr_out(data, hw_offset, i, "dword %d\n", i); } return len; } } - instr_out(data, hw_offset, 0, "3D UNKNOWN\n"); + instr_out(data, hw_offset, 0, "3D UNKNOWN: 3d_965 opcode = 0x%x\n", opcode); (*failures)++; return 1; } static int -decode_3d_i830(const uint32_t *data, int count, uint32_t hw_offset, int *failures) +decode_3d_i830(const uint32_t *data, int count, uint32_t hw_offset, uint32_t devid, int *failures) { - unsigned int opcode; + unsigned int idx; + uint32_t opcode; struct { uint32_t opcode; @@ -1689,41 +1957,44 @@ decode_3d_i830(const uint32_t *data, int count, uint32_t hw_offset, int *failure { 0x0f, 1, 1, "3DSTATE_MODES_2" }, { 0x15, 1, 1, "3DSTATE_FOG_COLOR" }, { 0x16, 1, 1, "3DSTATE_MODES_4" }, - }; + }, *opcode_3d; + + opcode = (data[0] & 0x1f000000) >> 24; - switch ((data[0] & 0x1f000000) >> 24) { + switch (opcode) { case 0x1f: return decode_3d_primitive(data, count, hw_offset, failures); case 0x1d: - return decode_3d_1d(data, count, hw_offset, failures, 1); + return decode_3d_1d(data, count, hw_offset, devid, failures); case 0x1c: return decode_3d_1c(data, count, hw_offset, failures); } - for (opcode = 0; opcode < Elements(opcodes_3d); opcode++) { - if ((data[0] & 0x1f000000) >> 24 == opcodes_3d[opcode].opcode) { + for (idx = 0; idx < Elements(opcodes_3d); idx++) { + opcode_3d = &opcodes_3d[idx]; + if ((data[0] & 0x1f000000) >> 24 == opcode_3d->opcode) { unsigned int len = 1, i; - instr_out(data, hw_offset, 0, "%s\n", opcodes_3d[opcode].name); - if (opcodes_3d[opcode].max_len > 1) { + instr_out(data, hw_offset, 0, "%s\n", opcode_3d->name); + if (opcode_3d->max_len > 1) { len = (data[0] & 0xff) + 2; - if (len < opcodes_3d[opcode].min_len || - len > opcodes_3d[opcode].max_len) + if (len < opcode_3d->min_len || + len > opcode_3d->max_len) { - fprintf(out, "Bad count in %s\n", opcodes_3d[opcode].name); + fprintf(out, "Bad count in %s\n", opcode_3d->name); } } for (i = 1; i < len; i++) { if (i >= count) - BUFFER_FAIL(count, len, opcodes_3d[opcode].name); + BUFFER_FAIL(count, len, opcode_3d->name); instr_out(data, hw_offset, i, "dword %d\n", i); } return len; } } - instr_out(data, hw_offset, 0, "3D UNKNOWN\n"); + instr_out(data, hw_offset, 0, "3D UNKNOWN: 3d_i830 opcode = 0x%x\n", opcode); (*failures)++; return 1; } @@ -1736,8 +2007,12 @@ decode_3d_i830(const uint32_t *data, int count, uint32_t hw_offset, int *failure * \param hw_offset hardware address for the buffer */ int -intel_decode(const uint32_t *data, int count, uint32_t hw_offset, uint32_t devid) +intel_decode(const uint32_t *data, int count, + uint32_t hw_offset, + uint32_t devid, + uint32_t ignore_end_of_batchbuffer) { + int ret; int index = 0; int failures = 0; @@ -1746,8 +2021,23 @@ intel_decode(const uint32_t *data, int count, uint32_t hw_offset, uint32_t devid while (index < count) { switch ((data[index] & 0xe0000000) >> 29) { case 0x0: - index += decode_mi(data + index, count - index, + ret = decode_mi(data + index, count - index, hw_offset + index * 4, &failures); + + /* If MI_BATCHBUFFER_END happened, then dump the rest of the + * output in case we some day want it in debugging, but don't + * decode it since it'll just confuse in the common case. + */ + if (ret == -1) { + if (ignore_end_of_batchbuffer) { + index++; + } else { + for (index = index + 1; index < count; index++) { + instr_out(data, hw_offset, index, "\n"); + } + } + } else + index += ret; break; case 0x2: index += decode_2d(data + index, count - index, @@ -1756,13 +2046,16 @@ intel_decode(const uint32_t *data, int count, uint32_t hw_offset, uint32_t devid case 0x3: if (IS_965(devid)) { index += decode_3d_965(data + index, count - index, - hw_offset + index * 4, &failures); + hw_offset + index * 4, + devid, &failures); } else if (IS_9XX(devid)) { index += decode_3d(data + index, count - index, - hw_offset + index * 4, &failures); + hw_offset + index * 4, + devid, &failures); } else { index += decode_3d_i830(data + index, count - index, - hw_offset + index * 4, &failures); + hw_offset + index * 4, + devid, &failures); } break; default: diff --git a/src/gallium/drivers/i965/intel_decode.h b/src/gallium/drivers/i965/intel_decode.h index 7683097b869..7e7c108c0c6 100644 --- a/src/gallium/drivers/i965/intel_decode.h +++ b/src/gallium/drivers/i965/intel_decode.h @@ -25,5 +25,7 @@ * */ -int intel_decode(const uint32_t *data, int count, uint32_t hw_offset, uint32_t devid); +#include "pipe/p_compiler.h" + +int intel_decode(const uint32_t *data, int count, uint32_t hw_offset, uint32_t devid, uint32_t ignore_end_of_batchbuffer); void intel_decode_context_reset(void); diff --git a/src/gallium/drivers/i965/intel_structs.h b/src/gallium/drivers/i965/intel_structs.h index 522e3bd92c2..ec6eec8910f 100644 --- a/src/gallium/drivers/i965/intel_structs.h +++ b/src/gallium/drivers/i965/intel_structs.h @@ -1,6 +1,8 @@ #ifndef INTEL_STRUCTS_H #define INTEL_STRUCTS_H +#include "brw_types.h" + struct br0 { GLuint length:8; GLuint pad0:3; diff --git a/src/gallium/drivers/identity/SConscript b/src/gallium/drivers/identity/SConscript index b364e0acc84..d24d1ec7c61 100644 --- a/src/gallium/drivers/identity/SConscript +++ b/src/gallium/drivers/identity/SConscript @@ -10,4 +10,6 @@ identity = env.ConvenienceLibrary( 'id_screen.c', ]) +env.Alias('identity', identity) + Export('identity') diff --git a/src/gallium/drivers/identity/id_context.c b/src/gallium/drivers/identity/id_context.c index de83c249057..3efbd6a246d 100644 --- a/src/gallium/drivers/identity/id_context.c +++ b/src/gallium/drivers/identity/id_context.c @@ -577,17 +577,13 @@ identity_set_index_buffer(struct pipe_context *_pipe, static void identity_resource_copy_region(struct pipe_context *_pipe, struct pipe_resource *_dst, - struct pipe_subresource subdst, + unsigned dst_level, unsigned dstx, unsigned dsty, unsigned dstz, struct pipe_resource *_src, - struct pipe_subresource subsrc, - unsigned srcx, - unsigned srcy, - unsigned srcz, - unsigned width, - unsigned height) + unsigned src_level, + const struct pipe_box *src_box) { struct identity_context *id_pipe = identity_context(_pipe); struct identity_resource *id_resource_dst = identity_resource(_dst); @@ -598,17 +594,13 @@ identity_resource_copy_region(struct pipe_context *_pipe, pipe->resource_copy_region(pipe, dst, - subdst, + dst_level, dstx, dsty, dstz, src, - subsrc, - srcx, - srcy, - srcz, - width, - height); + src_level, + src_box); } static void @@ -690,8 +682,8 @@ identity_flush(struct pipe_context *_pipe, static unsigned int identity_is_resource_referenced(struct pipe_context *_pipe, struct pipe_resource *_resource, - unsigned face, - unsigned level) + unsigned level, + int layer) { struct identity_context *id_pipe = identity_context(_pipe); struct identity_resource *id_resource = identity_resource(_resource); @@ -700,8 +692,8 @@ identity_is_resource_referenced(struct pipe_context *_pipe, return pipe->is_resource_referenced(pipe, resource, - face, - level); + level, + layer); } static struct pipe_sampler_view * @@ -732,10 +724,38 @@ identity_context_sampler_view_destroy(struct pipe_context *_pipe, identity_sampler_view(_view)); } +static struct pipe_surface * +identity_context_create_surface(struct pipe_context *_pipe, + struct pipe_resource *_resource, + const struct pipe_surface *templ) +{ + struct identity_context *id_context = identity_context(_pipe); + struct identity_resource *id_resource = identity_resource(_resource); + struct pipe_context *pipe = id_context->pipe; + struct pipe_resource *resource = id_resource->resource; + struct pipe_surface *result; + + result = pipe->create_surface(pipe, + resource, + templ); + + if (result) + return identity_surface_create(id_context, id_resource, result); + return NULL; +} + +static void +identity_context_surface_destroy(struct pipe_context *_pipe, + struct pipe_surface *_surf) +{ + identity_surface_destroy(identity_context(_pipe), + identity_surface(_surf)); +} + static struct pipe_transfer * identity_context_get_transfer(struct pipe_context *_context, struct pipe_resource *_resource, - struct pipe_subresource sr, + unsigned level, unsigned usage, const struct pipe_box *box) { @@ -747,7 +767,7 @@ identity_context_get_transfer(struct pipe_context *_context, result = context->get_transfer(context, resource, - sr, + level, usage, box); @@ -812,12 +832,12 @@ identity_context_transfer_unmap(struct pipe_context *_context, static void identity_context_transfer_inline_write(struct pipe_context *_context, struct pipe_resource *_resource, - struct pipe_subresource sr, + unsigned level, unsigned usage, const struct pipe_box *box, const void *data, unsigned stride, - unsigned slice_stride) + unsigned layer_stride) { struct identity_context *id_context = identity_context(_context); struct identity_resource *id_resource = identity_resource(_resource); @@ -826,12 +846,12 @@ identity_context_transfer_inline_write(struct pipe_context *_context, context->transfer_inline_write(context, resource, - sr, + level, usage, box, data, stride, - slice_stride); + layer_stride); } @@ -899,6 +919,8 @@ identity_context_create(struct pipe_screen *_screen, struct pipe_context *pipe) id_pipe->base.clear_depth_stencil = identity_clear_depth_stencil; id_pipe->base.flush = identity_flush; id_pipe->base.is_resource_referenced = identity_is_resource_referenced; + id_pipe->base.create_surface = identity_context_create_surface; + id_pipe->base.surface_destroy = identity_context_surface_destroy; id_pipe->base.create_sampler_view = identity_context_create_sampler_view; id_pipe->base.sampler_view_destroy = identity_context_sampler_view_destroy; id_pipe->base.get_transfer = identity_context_get_transfer; diff --git a/src/gallium/drivers/identity/id_objects.c b/src/gallium/drivers/identity/id_objects.c index 593928f399c..63454410525 100644 --- a/src/gallium/drivers/identity/id_objects.c +++ b/src/gallium/drivers/identity/id_objects.c @@ -71,7 +71,8 @@ identity_resource_destroy(struct identity_resource *id_resource) struct pipe_surface * -identity_surface_create(struct identity_resource *id_resource, +identity_surface_create(struct identity_context *id_context, + struct identity_resource *id_resource, struct pipe_surface *surface) { struct identity_surface *id_surface; @@ -100,10 +101,12 @@ error: } void -identity_surface_destroy(struct identity_surface *id_surface) +identity_surface_destroy(struct identity_context *id_context, + struct identity_surface *id_surface) { pipe_resource_reference(&id_surface->base.texture, NULL); - pipe_surface_reference(&id_surface->surface, NULL); + id_context->pipe->surface_destroy(id_context->pipe, + id_surface->surface); FREE(id_surface); } diff --git a/src/gallium/drivers/identity/id_objects.h b/src/gallium/drivers/identity/id_objects.h index e8deabf4fc7..181f2d6623e 100644 --- a/src/gallium/drivers/identity/id_objects.h +++ b/src/gallium/drivers/identity/id_objects.h @@ -147,11 +147,13 @@ void identity_resource_destroy(struct identity_resource *id_resource); struct pipe_surface * -identity_surface_create(struct identity_resource *id_resource, +identity_surface_create(struct identity_context *id_context, + struct identity_resource *id_resource, struct pipe_surface *surface); void -identity_surface_destroy(struct identity_surface *id_surface); +identity_surface_destroy(struct identity_context *id_context, + struct identity_surface *id_surface); struct pipe_sampler_view * identity_sampler_view_create(struct identity_context *id_context, diff --git a/src/gallium/drivers/identity/id_screen.c b/src/gallium/drivers/identity/id_screen.c index 5fb464b4148..644481bb748 100644 --- a/src/gallium/drivers/identity/id_screen.c +++ b/src/gallium/drivers/identity/id_screen.c @@ -189,39 +189,6 @@ identity_screen_resource_destroy(struct pipe_screen *screen, identity_resource_destroy(identity_resource(_resource)); } -static struct pipe_surface * -identity_screen_get_tex_surface(struct pipe_screen *_screen, - struct pipe_resource *_resource, - unsigned face, - unsigned level, - unsigned zslice, - unsigned usage) -{ - struct identity_screen *id_screen = identity_screen(_screen); - struct identity_resource *id_resource = identity_resource(_resource); - struct pipe_screen *screen = id_screen->screen; - struct pipe_resource *resource = id_resource->resource; - struct pipe_surface *result; - - result = screen->get_tex_surface(screen, - resource, - face, - level, - zslice, - usage); - - if (result) - return identity_surface_create(id_resource, result); - return NULL; -} - -static void -identity_screen_tex_surface_destroy(struct pipe_surface *_surface) -{ - identity_surface_destroy(identity_surface(_surface)); -} - - static struct pipe_resource * identity_screen_user_buffer_create(struct pipe_screen *_screen, @@ -247,16 +214,18 @@ identity_screen_user_buffer_create(struct pipe_screen *_screen, static void identity_screen_flush_frontbuffer(struct pipe_screen *_screen, - struct pipe_surface *_surface, + struct pipe_resource *_resource, + unsigned level, unsigned layer, void *context_private) { struct identity_screen *id_screen = identity_screen(_screen); - struct identity_surface *id_surface = identity_surface(_surface); + struct identity_resource *id_resource = identity_resource(_resource); struct pipe_screen *screen = id_screen->screen; - struct pipe_surface *surface = id_surface->surface; + struct pipe_resource *resource = id_resource->resource; screen->flush_frontbuffer(screen, - surface, + resource, + level, layer, context_private); } @@ -323,8 +292,6 @@ identity_screen_create(struct pipe_screen *screen) id_screen->base.resource_from_handle = identity_screen_resource_from_handle; id_screen->base.resource_get_handle = identity_screen_resource_get_handle; id_screen->base.resource_destroy = identity_screen_resource_destroy; - id_screen->base.get_tex_surface = identity_screen_get_tex_surface; - id_screen->base.tex_surface_destroy = identity_screen_tex_surface_destroy; id_screen->base.user_buffer_create = identity_screen_user_buffer_create; id_screen->base.flush_frontbuffer = identity_screen_flush_frontbuffer; id_screen->base.fence_reference = identity_screen_fence_reference; diff --git a/src/gallium/drivers/llvmpipe/Makefile b/src/gallium/drivers/llvmpipe/Makefile index 669e42e3003..4068bed393c 100644 --- a/src/gallium/drivers/llvmpipe/Makefile +++ b/src/gallium/drivers/llvmpipe/Makefile @@ -3,8 +3,6 @@ include $(TOP)/configs/current LIBNAME = llvmpipe -DEFINES += -D__STDC_CONSTANT_MACROS -D__STDC_LIMIT_MACROS - C_SOURCES = \ lp_bld_alpha.c \ lp_bld_blend_aos.c \ diff --git a/src/gallium/drivers/llvmpipe/lp_bld_alpha.c b/src/gallium/drivers/llvmpipe/lp_bld_alpha.c index e50643790c8..518969c3202 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_alpha.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_alpha.c @@ -43,7 +43,7 @@ void -lp_build_alpha_test(LLVMBuilderRef builder, +lp_build_alpha_test(struct gallivm_state *gallivm, unsigned func, struct lp_type type, struct lp_build_mask_context *mask, @@ -54,7 +54,7 @@ lp_build_alpha_test(LLVMBuilderRef builder, struct lp_build_context bld; LLVMValueRef test; - lp_build_context_init(&bld, builder, type); + lp_build_context_init(&bld, gallivm, type); test = lp_build_cmp(&bld, func, alpha, ref); diff --git a/src/gallium/drivers/llvmpipe/lp_bld_alpha.h b/src/gallium/drivers/llvmpipe/lp_bld_alpha.h index 27ca8aad4d4..06206a24d83 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_alpha.h +++ b/src/gallium/drivers/llvmpipe/lp_bld_alpha.h @@ -34,16 +34,18 @@ #ifndef LP_BLD_ALPHA_H #define LP_BLD_ALPHA_H +#include "pipe/p_compiler.h" #include "gallivm/lp_bld.h" struct pipe_alpha_state; +struct gallivm_state; struct lp_type; struct lp_build_mask_context; void -lp_build_alpha_test(LLVMBuilderRef builder, +lp_build_alpha_test(struct gallivm_state *gallivm, unsigned func, struct lp_type type, struct lp_build_mask_context *mask, diff --git a/src/gallium/drivers/llvmpipe/lp_bld_blend.h b/src/gallium/drivers/llvmpipe/lp_bld_blend.h index 5cecec3d7f9..f82ae30bb7d 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_blend.h +++ b/src/gallium/drivers/llvmpipe/lp_bld_blend.h @@ -30,6 +30,7 @@ #include "gallivm/lp_bld.h" +#include "gallivm/lp_bld_init.h" #include "pipe/p_format.h" @@ -61,7 +62,7 @@ lp_build_blend_func(struct lp_build_context *bld, LLVMValueRef -lp_build_blend_aos(LLVMBuilderRef builder, +lp_build_blend_aos(struct gallivm_state *gallivm, const struct pipe_blend_state *blend, struct lp_type type, unsigned rt, @@ -72,7 +73,7 @@ lp_build_blend_aos(LLVMBuilderRef builder, void -lp_build_blend_soa(LLVMBuilderRef builder, +lp_build_blend_soa(struct gallivm_state *gallivm, const struct pipe_blend_state *blend, struct lp_type type, unsigned rt, diff --git a/src/gallium/drivers/llvmpipe/lp_bld_blend_aos.c b/src/gallium/drivers/llvmpipe/lp_bld_blend_aos.c index d1c9b88f9bb..c342346a36e 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_blend_aos.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_blend_aos.c @@ -301,7 +301,7 @@ lp_build_blend_func(struct lp_build_context *bld, LLVMValueRef -lp_build_blend_aos(LLVMBuilderRef builder, +lp_build_blend_aos(struct gallivm_state *gallivm, const struct pipe_blend_state *blend, struct lp_type type, unsigned rt, @@ -322,7 +322,7 @@ lp_build_blend_aos(LLVMBuilderRef builder, /* Setup build context */ memset(&bld, 0, sizeof bld); - lp_build_context_init(&bld.base, builder, type); + lp_build_context_init(&bld.base, gallivm, type); bld.src = src; bld.dst = dst; bld.const_ = const_; diff --git a/src/gallium/drivers/llvmpipe/lp_bld_blend_soa.c b/src/gallium/drivers/llvmpipe/lp_bld_blend_soa.c index 30d261e979f..4d5bc9642d9 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_blend_soa.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_blend_soa.c @@ -73,6 +73,7 @@ #include "gallivm/lp_bld_type.h" #include "gallivm/lp_bld_arit.h" +#include "gallivm/lp_bld_init.h" #include "lp_bld_blend.h" @@ -211,7 +212,7 @@ lp_build_blend_factor_complementary(unsigned src_factor, unsigned dst_factor) * \param res the result/output */ void -lp_build_blend_soa(LLVMBuilderRef builder, +lp_build_blend_soa(struct gallivm_state *gallivm, const struct pipe_blend_state *blend, struct lp_type type, unsigned rt, @@ -220,6 +221,7 @@ lp_build_blend_soa(LLVMBuilderRef builder, LLVMValueRef con[4], LLVMValueRef res[4]) { + LLVMBuilderRef builder = gallivm->builder; struct lp_build_blend_soa_context bld; unsigned i, j, k; @@ -227,7 +229,7 @@ lp_build_blend_soa(LLVMBuilderRef builder, /* Setup build context */ memset(&bld, 0, sizeof bld); - lp_build_context_init(&bld.base, builder, type); + lp_build_context_init(&bld.base, gallivm, type); for (i = 0; i < 4; ++i) { bld.src[i] = src[i]; bld.dst[i] = dst[i]; diff --git a/src/gallium/drivers/llvmpipe/lp_bld_depth.c b/src/gallium/drivers/llvmpipe/lp_bld_depth.c index 7eb76d4fb31..1bf741194c5 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_depth.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_depth.c @@ -97,6 +97,7 @@ lp_build_stencil_test_single(struct lp_build_context *bld, LLVMValueRef stencilRef, LLVMValueRef stencilVals) { + LLVMBuilderRef builder = bld->gallivm->builder; const unsigned stencilMax = 255; /* XXX fix */ struct lp_type type = bld->type; LLVMValueRef res; @@ -107,10 +108,10 @@ lp_build_stencil_test_single(struct lp_build_context *bld, if (stencil->valuemask != stencilMax) { /* compute stencilRef = stencilRef & valuemask */ - LLVMValueRef valuemask = lp_build_const_int_vec(type, stencil->valuemask); - stencilRef = LLVMBuildAnd(bld->builder, stencilRef, valuemask, ""); + LLVMValueRef valuemask = lp_build_const_int_vec(bld->gallivm, type, stencil->valuemask); + stencilRef = LLVMBuildAnd(builder, stencilRef, valuemask, ""); /* compute stencilVals = stencilVals & valuemask */ - stencilVals = LLVMBuildAnd(bld->builder, stencilVals, valuemask, ""); + stencilVals = LLVMBuildAnd(builder, stencilVals, valuemask, ""); } res = lp_build_cmp(bld, stencil->func, stencilRef, stencilVals); @@ -167,9 +168,10 @@ lp_build_stencil_op_single(struct lp_build_context *bld, LLVMValueRef stencilVals) { + LLVMBuilderRef builder = bld->gallivm->builder; struct lp_type type = bld->type; LLVMValueRef res; - LLVMValueRef max = lp_build_const_int_vec(type, 0xff); + LLVMValueRef max = lp_build_const_int_vec(bld->gallivm, type, 0xff); unsigned stencil_op; assert(type.sign); @@ -210,15 +212,15 @@ lp_build_stencil_op_single(struct lp_build_context *bld, break; case PIPE_STENCIL_OP_INCR_WRAP: res = lp_build_add(bld, stencilVals, bld->one); - res = LLVMBuildAnd(bld->builder, res, max, ""); + res = LLVMBuildAnd(builder, res, max, ""); break; case PIPE_STENCIL_OP_DECR_WRAP: res = lp_build_sub(bld, stencilVals, bld->one); - res = LLVMBuildAnd(bld->builder, res, max, ""); + res = LLVMBuildAnd(builder, res, max, ""); break; case PIPE_STENCIL_OP_INVERT: - res = LLVMBuildNot(bld->builder, stencilVals, ""); - res = LLVMBuildAnd(bld->builder, res, max, ""); + res = LLVMBuildNot(builder, stencilVals, ""); + res = LLVMBuildAnd(builder, res, max, ""); break; default: assert(0 && "bad stencil op mode"); @@ -242,6 +244,7 @@ lp_build_stencil_op(struct lp_build_context *bld, LLVMValueRef front_facing) { + LLVMBuilderRef builder = bld->gallivm->builder; LLVMValueRef res; assert(stencil[0].enabled); @@ -262,10 +265,11 @@ lp_build_stencil_op(struct lp_build_context *bld, if (stencil->writemask != 0xff) { /* mask &= stencil->writemask */ - LLVMValueRef writemask = lp_build_const_int_vec(bld->type, stencil->writemask); - mask = LLVMBuildAnd(bld->builder, mask, writemask, ""); + LLVMValueRef writemask = lp_build_const_int_vec(bld->gallivm, bld->type, + stencil->writemask); + mask = LLVMBuildAnd(builder, mask, writemask, ""); /* res = (res & mask) | (stencilVals & ~mask) */ - res = lp_build_select_bitwise(bld, writemask, res, stencilVals); + res = lp_build_select_bitwise(bld, mask, res, stencilVals); } else { /* res = mask ? res : stencilVals */ @@ -411,25 +415,27 @@ get_s_shift_and_mask(const struct util_format_description *format_desc, * \param counter is a pointer of the uint32 counter. */ void -lp_build_occlusion_count(LLVMBuilderRef builder, +lp_build_occlusion_count(struct gallivm_state *gallivm, struct lp_type type, LLVMValueRef maskvalue, LLVMValueRef counter) { - LLVMValueRef countmask = lp_build_const_int_vec(type, 1); + LLVMBuilderRef builder = gallivm->builder; + LLVMContextRef context = gallivm->context; + LLVMValueRef countmask = lp_build_const_int_vec(gallivm, type, 1); LLVMValueRef countv = LLVMBuildAnd(builder, maskvalue, countmask, "countv"); - LLVMTypeRef i8v16 = LLVMVectorType(LLVMInt8Type(), 16); + LLVMTypeRef i8v16 = LLVMVectorType(LLVMInt8TypeInContext(context), 16); LLVMValueRef counti = LLVMBuildBitCast(builder, countv, i8v16, "counti"); LLVMValueRef maskarray[4] = { - LLVMConstInt(LLVMInt32Type(), 0, 0), - LLVMConstInt(LLVMInt32Type(), 4, 0), - LLVMConstInt(LLVMInt32Type(), 8, 0), - LLVMConstInt(LLVMInt32Type(), 12, 0), + lp_build_const_int32(gallivm, 0), + lp_build_const_int32(gallivm, 4), + lp_build_const_int32(gallivm, 8), + lp_build_const_int32(gallivm, 12) }; LLVMValueRef shufflemask = LLVMConstVector(maskarray, 4); LLVMValueRef shufflev = LLVMBuildShuffleVector(builder, counti, LLVMGetUndef(i8v16), shufflemask, "shufflev"); - LLVMValueRef shuffle = LLVMBuildBitCast(builder, shufflev, LLVMInt32Type(), "shuffle"); - LLVMValueRef count = lp_build_intrinsic_unary(builder, "llvm.ctpop.i32", LLVMInt32Type(), shuffle); + LLVMValueRef shuffle = LLVMBuildBitCast(builder, shufflev, LLVMInt32TypeInContext(context), "shuffle"); + LLVMValueRef count = lp_build_intrinsic_unary(builder, "llvm.ctpop.i32", LLVMInt32TypeInContext(context), shuffle); LLVMValueRef orig = LLVMBuildLoad(builder, counter, "orig"); LLVMValueRef incr = LLVMBuildAdd(builder, orig, count, "incr"); LLVMBuildStore(builder, incr, counter); @@ -452,7 +458,7 @@ lp_build_occlusion_count(LLVMBuilderRef builder, * \param facing contains boolean value indicating front/back facing polygon */ void -lp_build_depth_stencil_test(LLVMBuilderRef builder, +lp_build_depth_stencil_test(struct gallivm_state *gallivm, const struct pipe_depth_state *depth, const struct pipe_stencil_state stencil[2], struct lp_type z_src_type, @@ -465,6 +471,7 @@ lp_build_depth_stencil_test(LLVMBuilderRef builder, LLVMValueRef *zs_value, boolean do_branch) { + LLVMBuilderRef builder = gallivm->builder; struct lp_type z_type; struct lp_build_context z_bld; struct lp_build_context s_bld; @@ -537,11 +544,11 @@ lp_build_depth_stencil_test(LLVMBuilderRef builder, /* Setup build context for Z vals */ - lp_build_context_init(&z_bld, builder, z_type); + lp_build_context_init(&z_bld, gallivm, z_type); /* Setup build context for stencil vals */ s_type = lp_type_int_vec(z_type.width); - lp_build_context_init(&s_bld, builder, s_type); + lp_build_context_init(&s_bld, gallivm, s_type); /* Load current z/stencil value from z/stencil buffer */ zs_dst_ptr = LLVMBuildBitCast(builder, @@ -559,14 +566,14 @@ lp_build_depth_stencil_test(LLVMBuilderRef builder, if (get_z_shift_and_mask(format_desc, &z_shift, &z_width, &z_mask)) { if (z_mask != 0xffffffff) { - z_bitmask = lp_build_const_int_vec(z_type, z_mask); + z_bitmask = lp_build_const_int_vec(gallivm, z_type, z_mask); } /* * Align the framebuffer Z 's LSB to the right. */ if (z_shift) { - LLVMValueRef shift = lp_build_const_int_vec(z_type, z_shift); + LLVMValueRef shift = lp_build_const_int_vec(gallivm, z_type, z_shift); z_dst = LLVMBuildLShr(builder, zs_dst, shift, "z_dst"); } else if (z_bitmask) { /* TODO: Instead of loading a mask from memory and ANDing, it's @@ -580,7 +587,7 @@ lp_build_depth_stencil_test(LLVMBuilderRef builder, if (get_s_shift_and_mask(format_desc, &s_shift, &s_mask)) { if (s_shift) { - LLVMValueRef shift = lp_build_const_int_vec(s_type, s_shift); + LLVMValueRef shift = lp_build_const_int_vec(gallivm, s_type, s_shift); stencil_vals = LLVMBuildLShr(builder, zs_dst, shift, ""); stencil_shift = shift; /* used below */ } @@ -589,7 +596,7 @@ lp_build_depth_stencil_test(LLVMBuilderRef builder, } if (s_mask != 0xffffffff) { - LLVMValueRef mask = lp_build_const_int_vec(s_type, s_mask); + LLVMValueRef mask = lp_build_const_int_vec(gallivm, s_type, s_mask); stencil_vals = LLVMBuildAnd(builder, stencil_vals, mask, ""); } @@ -600,12 +607,13 @@ lp_build_depth_stencil_test(LLVMBuilderRef builder, if (stencil[0].enabled) { if (face) { - LLVMValueRef zero = LLVMConstInt(LLVMInt32Type(), 0, 0); + LLVMValueRef zero = lp_build_const_int32(gallivm, 0); /* front_facing = face != 0 ? ~0 : 0 */ front_facing = LLVMBuildICmp(builder, LLVMIntNE, face, zero, ""); front_facing = LLVMBuildSExt(builder, front_facing, - LLVMIntType(s_bld.type.length*s_bld.type.width), + LLVMIntTypeInContext(gallivm->context, + s_bld.type.length*s_bld.type.width), ""); front_facing = LLVMBuildBitCast(builder, front_facing, s_bld.int_vec_type, ""); @@ -642,7 +650,7 @@ lp_build_depth_stencil_test(LLVMBuilderRef builder, */ if (!z_type.floating) { - z_src = lp_build_clamped_float_to_unsigned_norm(builder, + z_src = lp_build_clamped_float_to_unsigned_norm(gallivm, z_src_type, z_width, z_src); @@ -657,7 +665,7 @@ lp_build_depth_stencil_test(LLVMBuilderRef builder, assert(z_src_type.norm); assert(!z_type.floating); if (z_src_type.width > z_width) { - LLVMValueRef shift = lp_build_const_int_vec(z_src_type, + LLVMValueRef shift = lp_build_const_int_vec(gallivm, z_src_type, z_src_type.width - z_width); z_src = LLVMBuildLShr(builder, z_src, shift, ""); } @@ -710,7 +718,7 @@ lp_build_depth_stencil_test(LLVMBuilderRef builder, z_fail_mask, front_facing); /* apply Z-pass operator */ - z_pass_mask = LLVMBuildAnd(z_bld.builder, orig_mask, z_pass, ""); + z_pass_mask = LLVMBuildAnd(builder, orig_mask, z_pass, ""); stencil_vals = lp_build_stencil_op(&s_bld, stencil, Z_PASS_OP, stencil_refs, stencil_vals, z_pass_mask, front_facing); @@ -720,7 +728,7 @@ lp_build_depth_stencil_test(LLVMBuilderRef builder, /* No depth test: apply Z-pass operator to stencil buffer values which * passed the stencil test. */ - s_pass_mask = LLVMBuildAnd(s_bld.builder, orig_mask, s_pass_mask, ""); + s_pass_mask = LLVMBuildAnd(builder, orig_mask, s_pass_mask, ""); stencil_vals = lp_build_stencil_op(&s_bld, stencil, Z_PASS_OP, stencil_refs, stencil_vals, s_pass_mask, front_facing); @@ -728,11 +736,11 @@ lp_build_depth_stencil_test(LLVMBuilderRef builder, /* Put Z and ztencil bits in the right place */ if (z_dst && z_shift) { - LLVMValueRef shift = lp_build_const_int_vec(z_type, z_shift); + LLVMValueRef shift = lp_build_const_int_vec(gallivm, z_type, z_shift); z_dst = LLVMBuildShl(builder, z_dst, shift, ""); } if (stencil_vals && stencil_shift) - stencil_vals = LLVMBuildShl(s_bld.builder, stencil_vals, + stencil_vals = LLVMBuildShl(builder, stencil_vals, stencil_shift, ""); /* Finally, merge/store the z/stencil values */ @@ -740,7 +748,7 @@ lp_build_depth_stencil_test(LLVMBuilderRef builder, (stencil[0].enabled && stencil[0].writemask)) { if (z_dst && stencil_vals) - zs_dst = LLVMBuildOr(z_bld.builder, z_dst, stencil_vals, ""); + zs_dst = LLVMBuildOr(builder, z_dst, stencil_vals, ""); else if (z_dst) zs_dst = z_dst; else @@ -775,7 +783,7 @@ lp_build_depth_write(LLVMBuilderRef builder, void -lp_build_deferred_depth_write(LLVMBuilderRef builder, +lp_build_deferred_depth_write(struct gallivm_state *gallivm, struct lp_type z_src_type, const struct util_format_description *format_desc, struct lp_build_mask_context *mask, @@ -785,11 +793,12 @@ lp_build_deferred_depth_write(LLVMBuilderRef builder, struct lp_type z_type; struct lp_build_context z_bld; LLVMValueRef z_dst; + LLVMBuilderRef builder = gallivm->builder; /* XXX: pointlessly redo type logic: */ z_type = lp_depth_type(format_desc, z_src_type.width*z_src_type.length); - lp_build_context_init(&z_bld, builder, z_type); + lp_build_context_init(&z_bld, gallivm, z_type); zs_dst_ptr = LLVMBuildBitCast(builder, zs_dst_ptr, LLVMPointerType(z_bld.vec_type, 0), ""); diff --git a/src/gallium/drivers/llvmpipe/lp_bld_depth.h b/src/gallium/drivers/llvmpipe/lp_bld_depth.h index a54ef3a711e..e01fc46ec16 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_depth.h +++ b/src/gallium/drivers/llvmpipe/lp_bld_depth.h @@ -36,10 +36,14 @@ #define LP_BLD_DEPTH_H +#include "pipe/p_compiler.h" +#include "pipe/p_state.h" + #include "gallivm/lp_bld.h" struct pipe_depth_state; +struct gallivm_state; struct util_format_description; struct lp_type; struct lp_build_mask_context; @@ -51,7 +55,7 @@ lp_depth_type(const struct util_format_description *format_desc, void -lp_build_depth_stencil_test(LLVMBuilderRef builder, +lp_build_depth_stencil_test(struct gallivm_state *gallivm, const struct pipe_depth_state *depth, const struct pipe_stencil_state stencil[2], struct lp_type type, @@ -71,7 +75,7 @@ lp_build_depth_write(LLVMBuilderRef builder, LLVMValueRef zs_value); void -lp_build_deferred_depth_write(LLVMBuilderRef builder, +lp_build_deferred_depth_write(struct gallivm_state *gallivm, struct lp_type z_src_type, const struct util_format_description *format_desc, struct lp_build_mask_context *mask, @@ -79,7 +83,7 @@ lp_build_deferred_depth_write(LLVMBuilderRef builder, LLVMValueRef zs_value); void -lp_build_occlusion_count(LLVMBuilderRef builder, +lp_build_occlusion_count(struct gallivm_state *gallivm, struct lp_type type, LLVMValueRef maskvalue, LLVMValueRef counter); diff --git a/src/gallium/drivers/llvmpipe/lp_bld_interp.c b/src/gallium/drivers/llvmpipe/lp_bld_interp.c index c9da8900d0c..45ddf547bf0 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_interp.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_interp.c @@ -127,13 +127,14 @@ coeffs_init(struct lp_build_interp_soa_context *bld, LLVMValueRef dady_ptr) { struct lp_build_context *coeff_bld = &bld->coeff_bld; - LLVMBuilderRef builder = coeff_bld->builder; + struct gallivm_state *gallivm = coeff_bld->gallivm; + LLVMBuilderRef builder = gallivm->builder; LLVMValueRef zero = LLVMConstNull(coeff_bld->elem_type); LLVMValueRef one = LLVMConstReal(coeff_bld->elem_type, 1.0); - LLVMValueRef i0 = LLVMConstInt(LLVMInt32Type(), 0, 0); - LLVMValueRef i1 = LLVMConstInt(LLVMInt32Type(), 1, 0); - LLVMValueRef i2 = LLVMConstInt(LLVMInt32Type(), 2, 0); - LLVMValueRef i3 = LLVMConstInt(LLVMInt32Type(), 3, 0); + LLVMValueRef i0 = lp_build_const_int32(gallivm, 0); + LLVMValueRef i1 = lp_build_const_int32(gallivm, 1); + LLVMValueRef i2 = lp_build_const_int32(gallivm, 2); + LLVMValueRef i3 = lp_build_const_int32(gallivm, 3); unsigned attrib; unsigned chan; @@ -144,7 +145,8 @@ coeffs_init(struct lp_build_interp_soa_context *bld, const unsigned interp = bld->interp[attrib]; for (chan = 0; chan < NUM_CHANNELS; ++chan) { if (mask & (1 << chan)) { - LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), attrib*NUM_CHANNELS + chan, 0); + LLVMValueRef index = lp_build_const_int32(gallivm, + attrib * NUM_CHANNELS + chan); LLVMValueRef a0 = zero; LLVMValueRef dadx = zero; LLVMValueRef dady = zero; @@ -231,7 +233,7 @@ coeffs_init(struct lp_build_interp_soa_context *bld, * a = {a, a, a, a} */ - a = lp_build_broadcast(builder, coeff_bld->vec_type, a); + a = lp_build_broadcast(gallivm, coeff_bld->vec_type, a); /* * Compute the attrib values on the upper-left corner of each quad. @@ -273,12 +275,14 @@ coeffs_init(struct lp_build_interp_soa_context *bld, */ static void attribs_update(struct lp_build_interp_soa_context *bld, + struct gallivm_state *gallivm, int quad_index, int start, int end) { + LLVMBuilderRef builder = gallivm->builder; struct lp_build_context *coeff_bld = &bld->coeff_bld; - LLVMValueRef shuffle = lp_build_const_int_vec(coeff_bld->type, quad_index); + LLVMValueRef shuffle = lp_build_const_int_vec(gallivm, coeff_bld->type, quad_index); LLVMValueRef oow = NULL; unsigned attrib; unsigned chan; @@ -308,7 +312,7 @@ attribs_update(struct lp_build_interp_soa_context *bld, * Broadcast the attribute value for this quad into all elements */ - a = LLVMBuildShuffleVector(coeff_bld->builder, + a = LLVMBuildShuffleVector(builder, a, coeff_bld->undef, shuffle, ""); /* @@ -380,10 +384,11 @@ pos_init(struct lp_build_interp_soa_context *bld, LLVMValueRef x0, LLVMValueRef y0) { + LLVMBuilderRef builder = bld->coeff_bld.gallivm->builder; struct lp_build_context *coeff_bld = &bld->coeff_bld; - bld->x = LLVMBuildSIToFP(coeff_bld->builder, x0, coeff_bld->elem_type, ""); - bld->y = LLVMBuildSIToFP(coeff_bld->builder, y0, coeff_bld->elem_type, ""); + bld->x = LLVMBuildSIToFP(builder, x0, coeff_bld->elem_type, ""); + bld->y = LLVMBuildSIToFP(builder, y0, coeff_bld->elem_type, ""); } @@ -392,6 +397,7 @@ pos_init(struct lp_build_interp_soa_context *bld, */ void lp_build_interp_soa_init(struct lp_build_interp_soa_context *bld, + struct gallivm_state *gallivm, unsigned num_inputs, const struct lp_shader_input *inputs, LLVMBuilderRef builder, @@ -417,7 +423,7 @@ lp_build_interp_soa_init(struct lp_build_interp_soa_context *bld, /* XXX: we don't support interpolating into any other types */ assert(memcmp(&coeff_type, &type, sizeof coeff_type) == 0); - lp_build_context_init(&bld->coeff_bld, builder, coeff_type); + lp_build_context_init(&bld->coeff_bld, gallivm, coeff_type); /* For convenience */ bld->pos = bld->attribs[0]; @@ -453,19 +459,21 @@ lp_build_interp_soa_init(struct lp_build_interp_soa_context *bld, */ void lp_build_interp_soa_update_inputs(struct lp_build_interp_soa_context *bld, + struct gallivm_state *gallivm, int quad_index) { assert(quad_index < 4); - attribs_update(bld, quad_index, 1, bld->num_attribs); + attribs_update(bld, gallivm, quad_index, 1, bld->num_attribs); } void lp_build_interp_soa_update_pos(struct lp_build_interp_soa_context *bld, + struct gallivm_state *gallivm, int quad_index) { assert(quad_index < 4); - attribs_update(bld, quad_index, 0, 1); + attribs_update(bld, gallivm, quad_index, 0, 1); } diff --git a/src/gallium/drivers/llvmpipe/lp_bld_interp.h b/src/gallium/drivers/llvmpipe/lp_bld_interp.h index a7ebdd1bfa2..b58b2dc1155 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_interp.h +++ b/src/gallium/drivers/llvmpipe/lp_bld_interp.h @@ -102,6 +102,7 @@ struct lp_build_interp_soa_context void lp_build_interp_soa_init(struct lp_build_interp_soa_context *bld, + struct gallivm_state *gallivm, unsigned num_inputs, const struct lp_shader_input *inputs, LLVMBuilderRef builder, @@ -114,11 +115,13 @@ lp_build_interp_soa_init(struct lp_build_interp_soa_context *bld, void lp_build_interp_soa_update_inputs(struct lp_build_interp_soa_context *bld, - int quad_index); + struct gallivm_state *gallivm, + int quad_index); void lp_build_interp_soa_update_pos(struct lp_build_interp_soa_context *bld, - int quad_index); + struct gallivm_state *gallivm, + int quad_index); #endif /* LP_BLD_INTERP_H */ diff --git a/src/gallium/drivers/llvmpipe/lp_context.c b/src/gallium/drivers/llvmpipe/lp_context.c index 763432ed712..644201ddf7c 100644 --- a/src/gallium/drivers/llvmpipe/lp_context.c +++ b/src/gallium/drivers/llvmpipe/lp_context.c @@ -50,6 +50,46 @@ DEBUG_GET_ONCE_BOOL_OPTION(lp_no_rast, "LP_NO_RAST", FALSE) +/** shared by all contexts */ +unsigned llvmpipe_variant_count; + + +/** + * This function is called by the gallivm "garbage collector" when + * the LLVM global data structures are freed. We must free all LLVM-related + * data. Specifically, all JIT'd shader variants. + */ +static void +garbage_collect_callback(void *cb_data) +{ + struct llvmpipe_context *lp = (struct llvmpipe_context *) cb_data; + struct lp_fs_variant_list_item *li; + + /* Free all the context's shader variants */ + li = first_elem(&lp->fs_variants_list); + while (!at_end(&lp->fs_variants_list, li)) { + struct lp_fs_variant_list_item *next = next_elem(li); + llvmpipe_remove_shader_variant(lp, li->base); + li = next; + } + + /* Free all the context's primitive setup variants */ + lp_delete_setup_variants(lp); + + /* release references to setup variants, shaders */ + lp_setup_set_setup_variant(lp->setup, NULL); + lp_setup_set_fs_variant(lp->setup, NULL); + lp_setup_reset(lp->setup); + + /* This type will be recreated upon demand */ + lp->jit_context_ptr_type = NULL; + + /* mark all state as dirty to ensure new shaders are jit'd, etc. */ + lp->dirty = ~0; +} + + + static void llvmpipe_destroy( struct pipe_context *pipe ) { struct llvmpipe_context *llvmpipe = llvmpipe_context( pipe ); @@ -57,6 +97,9 @@ static void llvmpipe_destroy( struct pipe_context *pipe ) lp_print_counters(); + gallivm_remove_garbage_collector_callback(garbage_collect_callback, + llvmpipe); + /* This will also destroy llvmpipe->setup: */ if (llvmpipe->draw) @@ -82,7 +125,11 @@ static void llvmpipe_destroy( struct pipe_context *pipe ) } } - lp_delete_setup_variants(llvmpipe); + for (i = 0; i < llvmpipe->num_vertex_buffers; i++) { + pipe_resource_reference(&llvmpipe->vertex_buffer[i].buffer, NULL); + } + + gallivm_destroy(llvmpipe->gallivm); align_free( llvmpipe ); } @@ -110,8 +157,10 @@ llvmpipe_create_context( struct pipe_screen *screen, void *priv ) memset(llvmpipe, 0, sizeof *llvmpipe); make_empty_list(&llvmpipe->fs_variants_list); + make_empty_list(&llvmpipe->setup_variants_list); + llvmpipe->pipe.winsys = screen->winsys; llvmpipe->pipe.screen = screen; llvmpipe->pipe.priv = priv; @@ -136,10 +185,12 @@ llvmpipe_create_context( struct pipe_screen *screen, void *priv ) llvmpipe_init_context_resource_funcs( &llvmpipe->pipe ); llvmpipe_init_surface_functions(llvmpipe); + llvmpipe->gallivm = gallivm_create(); + /* * Create drawing context and plug our rendering stage into it. */ - llvmpipe->draw = draw_create(&llvmpipe->pipe); + llvmpipe->draw = draw_create_gallivm(&llvmpipe->pipe, llvmpipe->gallivm); if (!llvmpipe->draw) goto fail; @@ -173,6 +224,9 @@ llvmpipe_create_context( struct pipe_screen *screen, void *priv ) lp_reset_counters(); + gallivm_register_garbage_collector_callback(garbage_collect_callback, + llvmpipe); + return &llvmpipe->pipe; fail: diff --git a/src/gallium/drivers/llvmpipe/lp_context.h b/src/gallium/drivers/llvmpipe/lp_context.h index a35e09e8b47..503f09d810c 100644 --- a/src/gallium/drivers/llvmpipe/lp_context.h +++ b/src/gallium/drivers/llvmpipe/lp_context.h @@ -126,14 +126,27 @@ struct llvmpipe_context { unsigned tex_timestamp; boolean no_rast; + /** List of all fragment shader variants */ struct lp_fs_variant_list_item fs_variants_list; unsigned nr_fs_variants; + /** JIT code generation */ + struct gallivm_state *gallivm; + LLVMTypeRef jit_context_ptr_type; + struct lp_setup_variant_list_item setup_variants_list; unsigned nr_setup_variants; }; +/** + * Fragment and setup variant count, used to trigger garbage collection. + * This is global since all variants in all contexts will be free when + * we do garbage collection. + */ +extern unsigned llvmpipe_variant_count; + + struct pipe_context * llvmpipe_create_context( struct pipe_screen *screen, void *priv ); diff --git a/src/gallium/drivers/llvmpipe/lp_flush.c b/src/gallium/drivers/llvmpipe/lp_flush.c index e2c723b7a87..85e3cdec82c 100644 --- a/src/gallium/drivers/llvmpipe/lp_flush.c +++ b/src/gallium/drivers/llvmpipe/lp_flush.c @@ -56,6 +56,13 @@ llvmpipe_flush( struct pipe_context *pipe, /* ask the setup module to flush */ lp_setup_flush(llvmpipe->setup, flags, fence, reason); + + if (llvmpipe_variant_count > 1000) { + /* time to do a garbage collection */ + gallivm_garbage_collect(llvmpipe->gallivm); + llvmpipe_variant_count = 0; + } + /* Enable to dump BMPs of the color/depth buffers each frame */ if (0) { if (flags & PIPE_FLUSH_FRAME) { @@ -101,8 +108,8 @@ llvmpipe_finish( struct pipe_context *pipe, boolean llvmpipe_flush_resource(struct pipe_context *pipe, struct pipe_resource *resource, - unsigned face, unsigned level, + int layer, unsigned flush_flags, boolean read_only, boolean cpu_access, @@ -111,7 +118,7 @@ llvmpipe_flush_resource(struct pipe_context *pipe, { unsigned referenced; - referenced = pipe->is_resource_referenced(pipe, resource, face, level); + referenced = pipe->is_resource_referenced(pipe, resource, level, layer); if ((referenced & PIPE_REFERENCED_FOR_WRITE) || ((referenced & PIPE_REFERENCED_FOR_READ) && !read_only)) { diff --git a/src/gallium/drivers/llvmpipe/lp_flush.h b/src/gallium/drivers/llvmpipe/lp_flush.h index 3626ce4a86c..579d24c68ad 100644 --- a/src/gallium/drivers/llvmpipe/lp_flush.h +++ b/src/gallium/drivers/llvmpipe/lp_flush.h @@ -47,8 +47,8 @@ llvmpipe_finish( struct pipe_context *pipe, boolean llvmpipe_flush_resource(struct pipe_context *pipe, struct pipe_resource *resource, - unsigned face, unsigned level, + int layer, unsigned flush_flags, boolean read_only, boolean cpu_access, diff --git a/src/gallium/drivers/llvmpipe/lp_jit.c b/src/gallium/drivers/llvmpipe/lp_jit.c index c540f9b3628..482a902dd23 100644 --- a/src/gallium/drivers/llvmpipe/lp_jit.c +++ b/src/gallium/drivers/llvmpipe/lp_jit.c @@ -33,82 +33,83 @@ */ -#include <llvm-c/Transforms/Scalar.h> - #include "util/u_memory.h" #include "gallivm/lp_bld_init.h" #include "gallivm/lp_bld_debug.h" -#include "lp_screen.h" -#include "gallivm/lp_bld_intr.h" +#include "lp_context.h" #include "lp_jit.h" static void -lp_jit_init_globals(struct llvmpipe_screen *screen) +lp_jit_create_types(struct llvmpipe_context *lp) { + struct gallivm_state *gallivm = lp->gallivm; + LLVMContextRef lc = gallivm->context; LLVMTypeRef texture_type; /* struct lp_jit_texture */ { LLVMTypeRef elem_types[LP_JIT_TEXTURE_NUM_FIELDS]; - elem_types[LP_JIT_TEXTURE_WIDTH] = LLVMInt32Type(); - elem_types[LP_JIT_TEXTURE_HEIGHT] = LLVMInt32Type(); - elem_types[LP_JIT_TEXTURE_DEPTH] = LLVMInt32Type(); - elem_types[LP_JIT_TEXTURE_LAST_LEVEL] = LLVMInt32Type(); + elem_types[LP_JIT_TEXTURE_WIDTH] = + elem_types[LP_JIT_TEXTURE_HEIGHT] = + elem_types[LP_JIT_TEXTURE_DEPTH] = + elem_types[LP_JIT_TEXTURE_LAST_LEVEL] = LLVMInt32TypeInContext(lc); elem_types[LP_JIT_TEXTURE_ROW_STRIDE] = - LLVMArrayType(LLVMInt32Type(), LP_MAX_TEXTURE_LEVELS); elem_types[LP_JIT_TEXTURE_IMG_STRIDE] = - LLVMArrayType(LLVMInt32Type(), LP_MAX_TEXTURE_LEVELS); + LLVMArrayType(LLVMInt32TypeInContext(lc), LP_MAX_TEXTURE_LEVELS); elem_types[LP_JIT_TEXTURE_DATA] = - LLVMArrayType(LLVMPointerType(LLVMInt8Type(), 0), + LLVMArrayType(LLVMPointerType(LLVMInt8TypeInContext(lc), 0), LP_MAX_TEXTURE_LEVELS); - elem_types[LP_JIT_TEXTURE_MIN_LOD] = LLVMFloatType(); - elem_types[LP_JIT_TEXTURE_MAX_LOD] = LLVMFloatType(); - elem_types[LP_JIT_TEXTURE_LOD_BIAS] = LLVMFloatType(); + elem_types[LP_JIT_TEXTURE_MIN_LOD] = + elem_types[LP_JIT_TEXTURE_MAX_LOD] = + elem_types[LP_JIT_TEXTURE_LOD_BIAS] = LLVMFloatTypeInContext(lc); elem_types[LP_JIT_TEXTURE_BORDER_COLOR] = - LLVMArrayType(LLVMFloatType(), 4); + LLVMArrayType(LLVMFloatTypeInContext(lc), 4); + + texture_type = LLVMStructTypeInContext(lc, elem_types, + Elements(elem_types), 0); - texture_type = LLVMStructType(elem_types, Elements(elem_types), 0); + LLVMInvalidateStructLayout(gallivm->target, texture_type); LP_CHECK_MEMBER_OFFSET(struct lp_jit_texture, width, - screen->target, texture_type, + gallivm->target, texture_type, LP_JIT_TEXTURE_WIDTH); LP_CHECK_MEMBER_OFFSET(struct lp_jit_texture, height, - screen->target, texture_type, + gallivm->target, texture_type, LP_JIT_TEXTURE_HEIGHT); LP_CHECK_MEMBER_OFFSET(struct lp_jit_texture, depth, - screen->target, texture_type, + gallivm->target, texture_type, LP_JIT_TEXTURE_DEPTH); LP_CHECK_MEMBER_OFFSET(struct lp_jit_texture, last_level, - screen->target, texture_type, + gallivm->target, texture_type, LP_JIT_TEXTURE_LAST_LEVEL); LP_CHECK_MEMBER_OFFSET(struct lp_jit_texture, row_stride, - screen->target, texture_type, + gallivm->target, texture_type, LP_JIT_TEXTURE_ROW_STRIDE); LP_CHECK_MEMBER_OFFSET(struct lp_jit_texture, img_stride, - screen->target, texture_type, + gallivm->target, texture_type, LP_JIT_TEXTURE_IMG_STRIDE); LP_CHECK_MEMBER_OFFSET(struct lp_jit_texture, data, - screen->target, texture_type, + gallivm->target, texture_type, LP_JIT_TEXTURE_DATA); LP_CHECK_MEMBER_OFFSET(struct lp_jit_texture, min_lod, - screen->target, texture_type, + gallivm->target, texture_type, LP_JIT_TEXTURE_MIN_LOD); LP_CHECK_MEMBER_OFFSET(struct lp_jit_texture, max_lod, - screen->target, texture_type, + gallivm->target, texture_type, LP_JIT_TEXTURE_MAX_LOD); LP_CHECK_MEMBER_OFFSET(struct lp_jit_texture, lod_bias, - screen->target, texture_type, + gallivm->target, texture_type, LP_JIT_TEXTURE_LOD_BIAS); LP_CHECK_MEMBER_OFFSET(struct lp_jit_texture, border_color, - screen->target, texture_type, + gallivm->target, texture_type, LP_JIT_TEXTURE_BORDER_COLOR); LP_CHECK_STRUCT_SIZE(struct lp_jit_texture, - screen->target, texture_type); + gallivm->target, texture_type); - LLVMAddTypeName(screen->module, "texture", texture_type); + LLVMAddTypeName(gallivm->module, "texture", texture_type); } /* struct lp_jit_context */ @@ -116,44 +117,47 @@ lp_jit_init_globals(struct llvmpipe_screen *screen) LLVMTypeRef elem_types[LP_JIT_CTX_COUNT]; LLVMTypeRef context_type; - elem_types[LP_JIT_CTX_CONSTANTS] = LLVMPointerType(LLVMFloatType(), 0); - elem_types[LP_JIT_CTX_ALPHA_REF] = LLVMFloatType(); - elem_types[LP_JIT_CTX_STENCIL_REF_FRONT] = LLVMInt32Type(); - elem_types[LP_JIT_CTX_STENCIL_REF_BACK] = LLVMInt32Type(); - elem_types[LP_JIT_CTX_BLEND_COLOR] = LLVMPointerType(LLVMInt8Type(), 0); + elem_types[LP_JIT_CTX_CONSTANTS] = LLVMPointerType(LLVMFloatTypeInContext(lc), 0); + elem_types[LP_JIT_CTX_ALPHA_REF] = LLVMFloatTypeInContext(lc); + elem_types[LP_JIT_CTX_STENCIL_REF_FRONT] = + elem_types[LP_JIT_CTX_STENCIL_REF_BACK] = LLVMInt32TypeInContext(lc); + elem_types[LP_JIT_CTX_BLEND_COLOR] = LLVMPointerType(LLVMInt8TypeInContext(lc), 0); elem_types[LP_JIT_CTX_TEXTURES] = LLVMArrayType(texture_type, PIPE_MAX_SAMPLERS); - context_type = LLVMStructType(elem_types, Elements(elem_types), 0); + context_type = LLVMStructTypeInContext(lc, elem_types, + Elements(elem_types), 0); + + LLVMInvalidateStructLayout(gallivm->target, context_type); LP_CHECK_MEMBER_OFFSET(struct lp_jit_context, constants, - screen->target, context_type, + gallivm->target, context_type, LP_JIT_CTX_CONSTANTS); LP_CHECK_MEMBER_OFFSET(struct lp_jit_context, alpha_ref_value, - screen->target, context_type, + gallivm->target, context_type, LP_JIT_CTX_ALPHA_REF); LP_CHECK_MEMBER_OFFSET(struct lp_jit_context, stencil_ref_front, - screen->target, context_type, + gallivm->target, context_type, LP_JIT_CTX_STENCIL_REF_FRONT); LP_CHECK_MEMBER_OFFSET(struct lp_jit_context, stencil_ref_back, - screen->target, context_type, + gallivm->target, context_type, LP_JIT_CTX_STENCIL_REF_BACK); LP_CHECK_MEMBER_OFFSET(struct lp_jit_context, blend_color, - screen->target, context_type, + gallivm->target, context_type, LP_JIT_CTX_BLEND_COLOR); LP_CHECK_MEMBER_OFFSET(struct lp_jit_context, textures, - screen->target, context_type, + gallivm->target, context_type, LP_JIT_CTX_TEXTURES); LP_CHECK_STRUCT_SIZE(struct lp_jit_context, - screen->target, context_type); + gallivm->target, context_type); - LLVMAddTypeName(screen->module, "context", context_type); + LLVMAddTypeName(gallivm->module, "context", context_type); - screen->context_ptr_type = LLVMPointerType(context_type, 0); + lp->jit_context_ptr_type = LLVMPointerType(context_type, 0); } if (gallivm_debug & GALLIVM_DEBUG_IR) { - LLVMDumpModule(screen->module); + LLVMDumpModule(gallivm->module); } } @@ -161,8 +165,7 @@ lp_jit_init_globals(struct llvmpipe_screen *screen) void lp_jit_screen_cleanup(struct llvmpipe_screen *screen) { - if(screen->pass) - LLVMDisposePassManager(screen->pass); + /* nothing */ } @@ -170,30 +173,14 @@ void lp_jit_screen_init(struct llvmpipe_screen *screen) { lp_build_init(); +} - screen->module = lp_build_module; - screen->provider = lp_build_provider; - screen->engine = lp_build_engine; - screen->target = lp_build_target; - - screen->pass = LLVMCreateFunctionPassManager(screen->provider); - LLVMAddTargetData(screen->target, screen->pass); - - if ((gallivm_debug & GALLIVM_DEBUG_NO_OPT) == 0) { - /* These are the passes currently listed in llvm-c/Transforms/Scalar.h, - * but there are more on SVN. */ - /* TODO: Add more passes */ - LLVMAddCFGSimplificationPass(screen->pass); - LLVMAddPromoteMemoryToRegisterPass(screen->pass); - LLVMAddConstantPropagationPass(screen->pass); - LLVMAddInstructionCombiningPass(screen->pass); - LLVMAddGVNPass(screen->pass); - } else { - /* We need at least this pass to prevent the backends to fail in - * unexpected ways. - */ - LLVMAddPromoteMemoryToRegisterPass(screen->pass); - } - lp_jit_init_globals(screen); +LLVMTypeRef +lp_jit_get_context_type(struct llvmpipe_context *lp) +{ + if (!lp->jit_context_ptr_type) + lp_jit_create_types(lp); + + return lp->jit_context_ptr_type; } diff --git a/src/gallium/drivers/llvmpipe/lp_jit.h b/src/gallium/drivers/llvmpipe/lp_jit.h index 114f21f2d16..a6763dce17a 100644 --- a/src/gallium/drivers/llvmpipe/lp_jit.h +++ b/src/gallium/drivers/llvmpipe/lp_jit.h @@ -120,23 +120,23 @@ enum { }; -#define lp_jit_context_constants(_builder, _ptr) \ - lp_build_struct_get(_builder, _ptr, LP_JIT_CTX_CONSTANTS, "constants") +#define lp_jit_context_constants(_gallivm, _ptr) \ + lp_build_struct_get(_gallivm, _ptr, LP_JIT_CTX_CONSTANTS, "constants") -#define lp_jit_context_alpha_ref_value(_builder, _ptr) \ - lp_build_struct_get(_builder, _ptr, LP_JIT_CTX_ALPHA_REF, "alpha_ref_value") +#define lp_jit_context_alpha_ref_value(_gallivm, _ptr) \ + lp_build_struct_get(_gallivm, _ptr, LP_JIT_CTX_ALPHA_REF, "alpha_ref_value") -#define lp_jit_context_stencil_ref_front_value(_builder, _ptr) \ - lp_build_struct_get(_builder, _ptr, LP_JIT_CTX_STENCIL_REF_FRONT, "stencil_ref_front") +#define lp_jit_context_stencil_ref_front_value(_gallivm, _ptr) \ + lp_build_struct_get(_gallivm, _ptr, LP_JIT_CTX_STENCIL_REF_FRONT, "stencil_ref_front") -#define lp_jit_context_stencil_ref_back_value(_builder, _ptr) \ - lp_build_struct_get(_builder, _ptr, LP_JIT_CTX_STENCIL_REF_BACK, "stencil_ref_back") +#define lp_jit_context_stencil_ref_back_value(_gallivm, _ptr) \ + lp_build_struct_get(_gallivm, _ptr, LP_JIT_CTX_STENCIL_REF_BACK, "stencil_ref_back") -#define lp_jit_context_blend_color(_builder, _ptr) \ - lp_build_struct_get(_builder, _ptr, LP_JIT_CTX_BLEND_COLOR, "blend_color") +#define lp_jit_context_blend_color(_gallivm, _ptr) \ + lp_build_struct_get(_gallivm, _ptr, LP_JIT_CTX_BLEND_COLOR, "blend_color") -#define lp_jit_context_textures(_builder, _ptr) \ - lp_build_struct_get_ptr(_builder, _ptr, LP_JIT_CTX_TEXTURES, "textures") +#define lp_jit_context_textures(_gallivm, _ptr) \ + lp_build_struct_get_ptr(_gallivm, _ptr, LP_JIT_CTX_TEXTURES, "textures") @@ -162,4 +162,8 @@ void lp_jit_screen_init(struct llvmpipe_screen *screen); +LLVMTypeRef +lp_jit_get_context_type(struct llvmpipe_context *lp); + + #endif /* LP_JIT_H */ diff --git a/src/gallium/drivers/llvmpipe/lp_perf.h b/src/gallium/drivers/llvmpipe/lp_perf.h index b23a100b873..455adf7d6f8 100644 --- a/src/gallium/drivers/llvmpipe/lp_perf.h +++ b/src/gallium/drivers/llvmpipe/lp_perf.h @@ -33,6 +33,7 @@ #ifndef LP_PERF_H #define LP_PERF_H +#include "pipe/p_compiler.h" /** * Various counters diff --git a/src/gallium/drivers/llvmpipe/lp_rast.c b/src/gallium/drivers/llvmpipe/lp_rast.c index decf3bd4499..dafadc1ea9b 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast.c +++ b/src/gallium/drivers/llvmpipe/lp_rast.c @@ -47,6 +47,7 @@ #ifdef DEBUG int jit_line = 0; const struct lp_rast_state *jit_state = NULL; +const struct lp_rasterizer_task *jit_task = NULL; #endif @@ -119,8 +120,8 @@ lp_rast_tile_begin(struct lp_rasterizer_task *task, * and update the tile's layout info. */ (void) llvmpipe_get_texture_tile(lpt, - zsbuf->face + zsbuf->zslice, - zsbuf->level, + zsbuf->u.tex.first_layer, + zsbuf->u.tex.level, usage, task->x, task->y); @@ -288,7 +289,6 @@ lp_rast_clear_zstencil(struct lp_rasterizer_task *task, - /** * Convert the color tile from tiled to linear layout. * This is generally only done when we're flushing the scene just prior to @@ -306,15 +306,15 @@ lp_rast_store_linear_color( struct lp_rasterizer_task *task ) for (buf = 0; buf < scene->fb.nr_cbufs; buf++) { struct pipe_surface *cbuf = scene->fb.cbufs[buf]; - const unsigned face_slice = cbuf->face + cbuf->zslice; - const unsigned level = cbuf->level; + const unsigned layer = cbuf->u.tex.first_layer; + const unsigned level = cbuf->u.tex.level; struct llvmpipe_resource *lpt = llvmpipe_resource(cbuf->texture); if (!task->color_tiles[buf]) continue; llvmpipe_unswizzle_cbuf_tile(lpt, - face_slice, + layer, level, task->x, task->y, task->color_tiles[buf]); @@ -362,7 +362,7 @@ lp_rast_shade_tile(struct lp_rasterizer_task *task, depth = lp_rast_get_depth_block_pointer(task, tile_x + x, tile_y + y); /* run shader on 4x4 block */ - BEGIN_JIT_CALL(state); + BEGIN_JIT_CALL(state, task); variant->jit_function[RAST_WHOLE]( &state->jit_context, tile_x + x, tile_y + y, inputs->frontfacing, @@ -443,7 +443,7 @@ lp_rast_shade_quads_mask(struct lp_rasterizer_task *task, assert(lp_check_alignment(state->jit_context.blend_color, 16)); /* run shader on 4x4 block */ - BEGIN_JIT_CALL(state); + BEGIN_JIT_CALL(state, task); variant->jit_function[RAST_EDGE_TEST](&state->jit_context, x, y, inputs->frontfacing, diff --git a/src/gallium/drivers/llvmpipe/lp_rast_priv.h b/src/gallium/drivers/llvmpipe/lp_rast_priv.h index b30408f097b..cd686bc82c1 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast_priv.h +++ b/src/gallium/drivers/llvmpipe/lp_rast_priv.h @@ -45,13 +45,16 @@ */ #ifdef DEBUG +struct lp_rasterizer_task; extern int jit_line; extern const struct lp_rast_state *jit_state; +extern const struct lp_rasterizer_task *jit_task; -#define BEGIN_JIT_CALL(state) \ +#define BEGIN_JIT_CALL(state, task) \ do { \ jit_line = __LINE__; \ jit_state = state; \ + jit_task = task; \ } while (0) #define END_JIT_CALL() \ @@ -62,7 +65,7 @@ extern const struct lp_rast_state *jit_state; #else -#define BEGIN_JIT_CALL(X) +#define BEGIN_JIT_CALL(X, Y) #define END_JIT_CALL() #endif @@ -191,8 +194,8 @@ lp_rast_get_color_tile_pointer(struct lp_rasterizer_task *task, if (usage != LP_TEX_USAGE_WRITE_ALL) { llvmpipe_swizzle_cbuf_tile(lpt, - cbuf->face + cbuf->zslice, - cbuf->level, + cbuf->u.tex.first_layer, + cbuf->u.tex.level, task->x, task->y, task->color_tiles[buf]); } @@ -258,7 +261,7 @@ lp_rast_shade_quads_all( struct lp_rasterizer_task *task, depth = lp_rast_get_depth_block_pointer(task, x, y); /* run shader on 4x4 block */ - BEGIN_JIT_CALL(state); + BEGIN_JIT_CALL(state, task); variant->jit_function[RAST_WHOLE]( &state->jit_context, x, y, inputs->frontfacing, diff --git a/src/gallium/drivers/llvmpipe/lp_scene.c b/src/gallium/drivers/llvmpipe/lp_scene.c index a4fdf7cff36..5d0f5f8b7b5 100644 --- a/src/gallium/drivers/llvmpipe/lp_scene.c +++ b/src/gallium/drivers/llvmpipe/lp_scene.c @@ -74,6 +74,7 @@ lp_scene_create( struct pipe_context *pipe ) void lp_scene_destroy(struct lp_scene *scene) { + lp_fence_reference(&scene->fence, NULL); pipe_mutex_destroy(scene->mutex); assert(scene->data.head->next == NULL); FREE(scene->data.head); @@ -136,30 +137,30 @@ lp_scene_begin_rasterization(struct lp_scene *scene) int i; //LP_DBG(DEBUG_RAST, "%s\n", __FUNCTION__); - + for (i = 0; i < scene->fb.nr_cbufs; i++) { struct pipe_surface *cbuf = scene->fb.cbufs[i]; + assert(cbuf->u.tex.first_layer == cbuf->u.tex.last_layer); scene->cbufs[i].stride = llvmpipe_resource_stride(cbuf->texture, - cbuf->level); + cbuf->u.tex.level); scene->cbufs[i].map = llvmpipe_resource_map(cbuf->texture, - cbuf->face, - cbuf->level, - cbuf->zslice, + cbuf->u.tex.level, + cbuf->u.tex.first_layer, LP_TEX_USAGE_READ_WRITE, LP_TEX_LAYOUT_LINEAR); } if (fb->zsbuf) { struct pipe_surface *zsbuf = scene->fb.zsbuf; - scene->zsbuf.stride = llvmpipe_resource_stride(zsbuf->texture, zsbuf->level); + assert(zsbuf->u.tex.first_layer == zsbuf->u.tex.last_layer); + scene->zsbuf.stride = llvmpipe_resource_stride(zsbuf->texture, zsbuf->u.tex.level); scene->zsbuf.blocksize = util_format_get_blocksize(zsbuf->texture->format); scene->zsbuf.map = llvmpipe_resource_map(zsbuf->texture, - zsbuf->face, - zsbuf->level, - zsbuf->zslice, + zsbuf->u.tex.level, + zsbuf->u.tex.first_layer, LP_TEX_USAGE_READ_WRITE, LP_TEX_LAYOUT_NONE); } @@ -181,9 +182,8 @@ lp_scene_end_rasterization(struct lp_scene *scene ) if (scene->cbufs[i].map) { struct pipe_surface *cbuf = scene->fb.cbufs[i]; llvmpipe_resource_unmap(cbuf->texture, - cbuf->face, - cbuf->level, - cbuf->zslice); + cbuf->u.tex.level, + cbuf->u.tex.first_layer); scene->cbufs[i].map = NULL; } } @@ -192,9 +192,8 @@ lp_scene_end_rasterization(struct lp_scene *scene ) if (scene->zsbuf.map) { struct pipe_surface *zsbuf = scene->fb.zsbuf; llvmpipe_resource_unmap(zsbuf->texture, - zsbuf->face, - zsbuf->level, - zsbuf->zslice); + zsbuf->u.tex.level, + zsbuf->u.tex.first_layer); scene->zsbuf.map = NULL; } diff --git a/src/gallium/drivers/llvmpipe/lp_scene_queue.h b/src/gallium/drivers/llvmpipe/lp_scene_queue.h index fd7c65a2c8b..dd9ab593b4a 100644 --- a/src/gallium/drivers/llvmpipe/lp_scene_queue.h +++ b/src/gallium/drivers/llvmpipe/lp_scene_queue.h @@ -29,6 +29,8 @@ #ifndef LP_SCENE_QUEUE #define LP_SCENE_QUEUE +#include "pipe/p_compiler.h" + struct lp_scene_queue; struct lp_scene; diff --git a/src/gallium/drivers/llvmpipe/lp_screen.c b/src/gallium/drivers/llvmpipe/lp_screen.c index 43904343c3a..b6919a5c6d3 100644 --- a/src/gallium/drivers/llvmpipe/lp_screen.c +++ b/src/gallium/drivers/llvmpipe/lp_screen.c @@ -289,12 +289,13 @@ llvmpipe_is_format_supported( struct pipe_screen *_screen, static void llvmpipe_flush_frontbuffer(struct pipe_screen *_screen, - struct pipe_surface *surface, + struct pipe_resource *resource, + unsigned level, unsigned layer, void *context_private) { struct llvmpipe_screen *screen = llvmpipe_screen(_screen); struct sw_winsys *winsys = screen->winsys; - struct llvmpipe_resource *texture = llvmpipe_resource(surface->texture); + struct llvmpipe_resource *texture = llvmpipe_resource(resource); assert(texture->dt); if (texture->dt) diff --git a/src/gallium/drivers/llvmpipe/lp_screen.h b/src/gallium/drivers/llvmpipe/lp_screen.h index 731526dfabe..7f69a11a6e3 100644 --- a/src/gallium/drivers/llvmpipe/lp_screen.h +++ b/src/gallium/drivers/llvmpipe/lp_screen.h @@ -34,12 +34,10 @@ #ifndef LP_SCREEN_H #define LP_SCREEN_H -#include "gallivm/lp_bld.h" -#include <llvm-c/ExecutionEngine.h> - -#include "os/os_thread.h" #include "pipe/p_screen.h" #include "pipe/p_defines.h" +#include "os/os_thread.h" +#include "gallivm/lp_bld.h" struct sw_winsys; @@ -51,14 +49,6 @@ struct llvmpipe_screen struct sw_winsys *winsys; - LLVMModuleRef module; - LLVMExecutionEngineRef engine; - LLVMModuleProviderRef provider; - LLVMTargetDataRef target; - LLVMPassManagerRef pass; - - LLVMTypeRef context_ptr_type; - unsigned num_threads; /* Increments whenever textures are modified. Contexts can track this. diff --git a/src/gallium/drivers/llvmpipe/lp_setup.c b/src/gallium/drivers/llvmpipe/lp_setup.c index 6118434d3d3..5d83a1e3579 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup.c +++ b/src/gallium/drivers/llvmpipe/lp_setup.c @@ -114,7 +114,7 @@ first_point( struct lp_setup_context *setup, setup->point( setup, v0 ); } -static void lp_setup_reset( struct lp_setup_context *setup ) +void lp_setup_reset( struct lp_setup_context *setup ) { LP_DBG(DEBUG_SETUP, "%s\n", __FUNCTION__); @@ -263,7 +263,6 @@ execute_clears( struct lp_setup_context *setup ) const char *states[] = { "FLUSHED", - "EMPTY ", "CLEARED", "ACTIVE " }; @@ -913,6 +912,12 @@ lp_setup_update_state( struct lp_setup_context *setup, llvmpipe_update_derived(lp); } + if (lp->setup->dirty) { + llvmpipe_update_setup(lp); + } + + assert(setup->setup.variant); + /* Will probably need to move this somewhere else, just need * to know about vertex shader point size attribute. */ @@ -928,7 +933,7 @@ lp_setup_update_state( struct lp_setup_context *setup, setup->setup.variant->key.size) == 0); } - if (update_scene) { + if (update_scene && setup->state != SETUP_ACTIVE) { if (!set_scene_state( setup, SETUP_ACTIVE, __FUNCTION__ )) return FALSE; } @@ -991,6 +996,8 @@ lp_setup_destroy( struct lp_setup_context *setup ) lp_scene_destroy(scene); } + lp_fence_reference(&setup->last_fence, NULL); + FREE( setup ); } diff --git a/src/gallium/drivers/llvmpipe/lp_setup.h b/src/gallium/drivers/llvmpipe/lp_setup.h index ebb18f81344..0d6e161a218 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup.h +++ b/src/gallium/drivers/llvmpipe/lp_setup.h @@ -45,6 +45,9 @@ struct lp_jit_context; struct llvmpipe_query; struct pipe_fence_handle; struct lp_setup_variant; +struct lp_setup_context; + +void lp_setup_reset( struct lp_setup_context *setup ); struct lp_setup_context * lp_setup_create( struct pipe_context *pipe, diff --git a/src/gallium/drivers/llvmpipe/lp_setup_vbuf.c b/src/gallium/drivers/llvmpipe/lp_setup_vbuf.c index 9c1f0fe7939..384242f81d7 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup_vbuf.c +++ b/src/gallium/drivers/llvmpipe/lp_setup_vbuf.c @@ -141,6 +141,8 @@ lp_setup_draw_elements(struct vbuf_render *vbr, const ushort *indices, uint nr) const boolean flatshade_first = setup->flatshade_first; unsigned i; + assert(setup->setup.variant); + if (!lp_setup_update_state(setup, TRUE)) return; diff --git a/src/gallium/drivers/llvmpipe/lp_state_fs.c b/src/gallium/drivers/llvmpipe/lp_state_fs.c index cd67d488616..1b9119eda00 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_fs.c +++ b/src/gallium/drivers/llvmpipe/lp_state_fs.c @@ -90,7 +90,6 @@ #include "lp_context.h" #include "lp_debug.h" #include "lp_perf.h" -#include "lp_screen.h" #include "lp_setup.h" #include "lp_state.h" #include "lp_tex_sample.h" @@ -102,10 +101,10 @@ #include <llvm-c/BitWriter.h> +/** Fragment shader number (for debugging) */ static unsigned fs_no = 0; - /** * Expand the relevent bits of mask_input to a 4-dword mask for the * four pixels in a 2x2 quad. This will set the four elements of the @@ -115,13 +114,14 @@ static unsigned fs_no = 0; * \param mask_input bitwise mask for the whole 4x4 stamp */ static LLVMValueRef -generate_quad_mask(LLVMBuilderRef builder, +generate_quad_mask(struct gallivm_state *gallivm, struct lp_type fs_type, unsigned quad, LLVMValueRef mask_input) /* int32 */ { + LLVMBuilderRef builder = gallivm->builder; struct lp_type mask_type; - LLVMTypeRef i32t = LLVMInt32Type(); + LLVMTypeRef i32t = LLVMInt32TypeInContext(gallivm->context); LLVMValueRef bits[4]; LLVMValueRef mask; int shift; @@ -136,7 +136,6 @@ generate_quad_mask(LLVMBuilderRef builder, /* * mask_input >>= (quad * 4) */ - switch (quad) { case 0: shift = 0; @@ -163,8 +162,9 @@ generate_quad_mask(LLVMBuilderRef builder, /* * mask = { mask_input & (1 << i), for i in [0,3] } */ - - mask = lp_build_broadcast(builder, lp_build_vec_type(mask_type), mask_input); + mask = lp_build_broadcast(gallivm, + lp_build_vec_type(gallivm, mask_type), + mask_input); bits[0] = LLVMConstInt(i32t, 1 << 0, 0); bits[1] = LLVMConstInt(i32t, 1 << 1, 0); @@ -176,11 +176,10 @@ generate_quad_mask(LLVMBuilderRef builder, /* * mask = mask != 0 ? ~0 : 0 */ - - mask = lp_build_compare(builder, + mask = lp_build_compare(gallivm, mask_type, PIPE_FUNC_NOTEQUAL, mask, - lp_build_const_int_vec(mask_type, 0)); + lp_build_const_int_vec(gallivm, mask_type, 0)); return mask; } @@ -213,7 +212,8 @@ find_output_by_semantic( const struct tgsi_shader_info *info, * \param partial_mask if 1, do mask_input testing */ static void -generate_fs(struct lp_fragment_shader *shader, +generate_fs(struct gallivm_state *gallivm, + struct lp_fragment_shader *shader, const struct lp_fragment_shader_variant_key *key, LLVMBuilderRef builder, struct lp_type type, @@ -278,42 +278,42 @@ generate_fs(struct lp_fragment_shader *shader, assert(i < 4); - stencil_refs[0] = lp_jit_context_stencil_ref_front_value(builder, context_ptr); - stencil_refs[1] = lp_jit_context_stencil_ref_back_value(builder, context_ptr); + stencil_refs[0] = lp_jit_context_stencil_ref_front_value(gallivm, context_ptr); + stencil_refs[1] = lp_jit_context_stencil_ref_back_value(gallivm, context_ptr); - vec_type = lp_build_vec_type(type); + vec_type = lp_build_vec_type(gallivm, type); - consts_ptr = lp_jit_context_constants(builder, context_ptr); + consts_ptr = lp_jit_context_constants(gallivm, context_ptr); memset(outputs, 0, sizeof outputs); /* Declare the color and z variables */ for(cbuf = 0; cbuf < key->nr_cbufs; cbuf++) { for(chan = 0; chan < NUM_CHANNELS; ++chan) { - color[cbuf][chan] = lp_build_alloca(builder, vec_type, "color"); + color[cbuf][chan] = lp_build_alloca(gallivm, vec_type, "color"); } } /* do triangle edge testing */ if (partial_mask) { - *pmask = generate_quad_mask(builder, type, + *pmask = generate_quad_mask(gallivm, type, i, mask_input); } else { - *pmask = lp_build_const_int_vec(type, ~0); + *pmask = lp_build_const_int_vec(gallivm, type, ~0); } /* 'mask' will control execution based on quad's pixel alive/killed state */ - lp_build_mask_begin(&mask, builder, type, *pmask); + lp_build_mask_begin(&mask, gallivm, type, *pmask); if (!(depth_mode & EARLY_DEPTH_TEST) && !simple_shader) lp_build_mask_check(&mask); - lp_build_interp_soa_update_pos(interp, i); + lp_build_interp_soa_update_pos(interp, gallivm, i); z = interp->pos[2]; if (depth_mode & EARLY_DEPTH_TEST) { - lp_build_depth_stencil_test(builder, + lp_build_depth_stencil_test(gallivm, &key->depth, key->stencil, type, @@ -330,15 +330,14 @@ generate_fs(struct lp_fragment_shader *shader, } } - lp_build_interp_soa_update_inputs(interp, i); + lp_build_interp_soa_update_inputs(interp, gallivm, i); /* Build the actual shader */ - lp_build_tgsi_soa(builder, tokens, type, &mask, + lp_build_tgsi_soa(gallivm, tokens, type, &mask, consts_ptr, NULL, /* sys values array */ interp->pos, interp->inputs, outputs, sampler, &shader->info.base); - /* Alpha test */ if (key->alpha.enabled) { int color0 = find_output_by_semantic(&shader->info.base, @@ -349,10 +348,10 @@ generate_fs(struct lp_fragment_shader *shader, LLVMValueRef alpha = LLVMBuildLoad(builder, outputs[color0][3], "alpha"); LLVMValueRef alpha_ref_value; - alpha_ref_value = lp_jit_context_alpha_ref_value(builder, context_ptr); - alpha_ref_value = lp_build_broadcast(builder, vec_type, alpha_ref_value); + alpha_ref_value = lp_jit_context_alpha_ref_value(gallivm, context_ptr); + alpha_ref_value = lp_build_broadcast(gallivm, vec_type, alpha_ref_value); - lp_build_alpha_test(builder, key->alpha.func, type, + lp_build_alpha_test(gallivm, key->alpha.func, type, &mask, alpha, alpha_ref_value, (depth_mode & LATE_DEPTH_TEST) != 0); } @@ -368,7 +367,7 @@ generate_fs(struct lp_fragment_shader *shader, z = LLVMBuildLoad(builder, outputs[pos0][2], "output.z"); } - lp_build_depth_stencil_test(builder, + lp_build_depth_stencil_test(gallivm, &key->depth, key->stencil, type, @@ -391,7 +390,7 @@ generate_fs(struct lp_fragment_shader *shader, * depth value, update from zs_value with the new mask value and * write that out. */ - lp_build_deferred_depth_write(builder, + lp_build_deferred_depth_write(gallivm, type, zs_format_desc, &mask, @@ -421,7 +420,7 @@ generate_fs(struct lp_fragment_shader *shader, } if (counter) - lp_build_occlusion_count(builder, type, + lp_build_occlusion_count(gallivm, type, lp_build_mask_value(&mask), counter); *pmask = lp_build_mask_end(&mask); @@ -438,7 +437,8 @@ generate_fs(struct lp_fragment_shader *shader, * \param dst_ptr the destination color buffer pointer */ static void -generate_blend(const struct pipe_blend_state *blend, +generate_blend(struct gallivm_state *gallivm, + const struct pipe_blend_state *blend, unsigned rt, LLVMBuilderRef builder, struct lp_type type, @@ -457,21 +457,21 @@ generate_blend(const struct pipe_blend_state *blend, LLVMValueRef res[4]; unsigned chan; - lp_build_context_init(&bld, builder, type); + lp_build_context_init(&bld, gallivm, type); - lp_build_mask_begin(&mask_ctx, builder, type, mask); + lp_build_mask_begin(&mask_ctx, gallivm, type, mask); if (do_branch) lp_build_mask_check(&mask_ctx); - vec_type = lp_build_vec_type(type); + vec_type = lp_build_vec_type(gallivm, type); - const_ptr = lp_jit_context_blend_color(builder, context_ptr); + const_ptr = lp_jit_context_blend_color(gallivm, context_ptr); const_ptr = LLVMBuildBitCast(builder, const_ptr, LLVMPointerType(vec_type, 0), ""); /* load constant blend color and colors from the dest color buffer */ for(chan = 0; chan < 4; ++chan) { - LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), chan, 0); + LLVMValueRef index = lp_build_const_int32(gallivm, chan); con[chan] = LLVMBuildLoad(builder, LLVMBuildGEP(builder, const_ptr, &index, 1, ""), ""); dst[chan] = LLVMBuildLoad(builder, LLVMBuildGEP(builder, dst_ptr, &index, 1, ""), ""); @@ -481,12 +481,12 @@ generate_blend(const struct pipe_blend_state *blend, } /* do blend */ - lp_build_blend_soa(builder, blend, type, rt, src, dst, con, res); + lp_build_blend_soa(gallivm, blend, type, rt, src, dst, con, res); /* store results to color buffer */ for(chan = 0; chan < 4; ++chan) { if(blend->rt[rt].colormask & (1 << chan)) { - LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), chan, 0); + LLVMValueRef index = lp_build_const_int32(gallivm, chan); lp_build_name(res[chan], "res.%c", "rgba"[chan]); res[chan] = lp_build_select(&bld, mask, res[chan], dst[chan]); LLVMBuildStore(builder, res[chan], LLVMBuildGEP(builder, dst_ptr, &index, 1, "")); @@ -504,11 +504,12 @@ generate_blend(const struct pipe_blend_state *blend, * 2x2 pixels. */ static void -generate_fragment(struct llvmpipe_screen *screen, +generate_fragment(struct llvmpipe_context *lp, struct lp_fragment_shader *shader, struct lp_fragment_shader_variant *variant, unsigned partial_mask) { + struct gallivm_state *gallivm = lp->gallivm; const struct lp_fragment_shader_variant_key *key = &variant->key; struct lp_shader_input inputs[PIPE_MAX_SHADER_INPUTS]; char func_name[256]; @@ -519,6 +520,8 @@ generate_fragment(struct llvmpipe_screen *screen, LLVMTypeRef blend_vec_type; LLVMTypeRef arg_types[11]; LLVMTypeRef func_type; + LLVMTypeRef int32_type = LLVMInt32TypeInContext(gallivm->context); + LLVMTypeRef int8_type = LLVMInt8TypeInContext(gallivm->context); LLVMValueRef context_ptr; LLVMValueRef x; LLVMValueRef y; @@ -580,29 +583,30 @@ generate_fragment(struct llvmpipe_screen *screen, * lp_jit.h's lp_jit_frag_func function pointer type, and vice-versa. */ - fs_elem_type = lp_build_elem_type(fs_type); - fs_int_vec_type = lp_build_int_vec_type(fs_type); + fs_elem_type = lp_build_elem_type(gallivm, fs_type); + fs_int_vec_type = lp_build_int_vec_type(gallivm, fs_type); - blend_vec_type = lp_build_vec_type(blend_type); + blend_vec_type = lp_build_vec_type(gallivm, blend_type); util_snprintf(func_name, sizeof(func_name), "fs%u_variant%u_%s", shader->no, variant->no, partial_mask ? "partial" : "whole"); - arg_types[0] = screen->context_ptr_type; /* context */ - arg_types[1] = LLVMInt32Type(); /* x */ - arg_types[2] = LLVMInt32Type(); /* y */ - arg_types[3] = LLVMInt32Type(); /* facing */ + arg_types[0] = lp_jit_get_context_type(lp); /* context */ + arg_types[1] = int32_type; /* x */ + arg_types[2] = int32_type; /* y */ + arg_types[3] = int32_type; /* facing */ arg_types[4] = LLVMPointerType(fs_elem_type, 0); /* a0 */ arg_types[5] = LLVMPointerType(fs_elem_type, 0); /* dadx */ arg_types[6] = LLVMPointerType(fs_elem_type, 0); /* dady */ arg_types[7] = LLVMPointerType(LLVMPointerType(blend_vec_type, 0), 0); /* color */ - arg_types[8] = LLVMPointerType(LLVMInt8Type(), 0); /* depth */ - arg_types[9] = LLVMInt32Type(); /* mask_input */ - arg_types[10] = LLVMPointerType(LLVMInt32Type(), 0);/* counter */ + arg_types[8] = LLVMPointerType(int8_type, 0); /* depth */ + arg_types[9] = int32_type; /* mask_input */ + arg_types[10] = LLVMPointerType(int32_type, 0); /* counter */ - func_type = LLVMFunctionType(LLVMVoidType(), arg_types, Elements(arg_types), 0); + func_type = LLVMFunctionType(LLVMVoidTypeInContext(gallivm->context), + arg_types, Elements(arg_types), 0); - function = LLVMAddFunction(screen->module, func_name, func_type); + function = LLVMAddFunction(gallivm->module, func_name, func_type); LLVMSetFunctionCallConv(function, LLVMCCallConv); variant->function[partial_mask] = function; @@ -644,8 +648,9 @@ generate_fragment(struct llvmpipe_screen *screen, * Function body */ - block = LLVMAppendBasicBlock(function, "entry"); - builder = LLVMCreateBuilder(); + block = LLVMAppendBasicBlockInContext(gallivm->context, function, "entry"); + builder = gallivm->builder; + assert(builder); LLVMPositionBuilderAtEnd(builder, block); /* @@ -654,6 +659,7 @@ generate_fragment(struct llvmpipe_screen *screen, * already included in the shader key. */ lp_build_interp_soa_init(&interp, + gallivm, shader->info.base.num_inputs, inputs, builder, fs_type, @@ -667,7 +673,7 @@ generate_fragment(struct llvmpipe_screen *screen, zs_format_desc = util_format_description(key->zsbuf_format); for(i = 0; i < num_fs; ++i) { - LLVMValueRef depth_offset = LLVMConstInt(LLVMInt32Type(), + LLVMValueRef depth_offset = LLVMConstInt(int32_type, i*fs_type.length*zs_format_desc->block.bits/8, 0); LLVMValueRef out_color[PIPE_MAX_COLOR_BUFS][NUM_CHANNELS]; @@ -675,7 +681,8 @@ generate_fragment(struct llvmpipe_screen *screen, depth_ptr_i = LLVMBuildGEP(builder, depth_ptr, &depth_offset, 1, ""); - generate_fs(shader, key, + generate_fs(gallivm, + shader, key, builder, fs_type, context_ptr, @@ -701,7 +708,7 @@ generate_fragment(struct llvmpipe_screen *screen, */ for(cbuf = 0; cbuf < key->nr_cbufs; cbuf++) { LLVMValueRef color_ptr; - LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), cbuf, 0); + LLVMValueRef index = lp_build_const_int32(gallivm, cbuf); LLVMValueRef blend_in_color[NUM_CHANNELS]; unsigned rt; @@ -716,7 +723,7 @@ generate_fragment(struct llvmpipe_screen *screen, LLVMBuildLoad(builder, fs_out_color[cbuf][chan][i], "fs_color_vals"); } - lp_build_conv(builder, fs_type, blend_type, + lp_build_conv(gallivm, fs_type, blend_type, fs_color_vals, num_fs, &blend_in_color[chan], 1); @@ -725,11 +732,11 @@ generate_fragment(struct llvmpipe_screen *screen, } if (partial_mask || !variant->opaque) { - lp_build_conv_mask(builder, fs_type, blend_type, + lp_build_conv_mask(lp->gallivm, fs_type, blend_type, fs_mask, num_fs, &blend_mask, 1); } else { - blend_mask = lp_build_const_int_vec(blend_type, ~0); + blend_mask = lp_build_const_int_vec(lp->gallivm, blend_type, ~0); } color_ptr = LLVMBuildLoad(builder, @@ -750,7 +757,8 @@ generate_fragment(struct llvmpipe_screen *screen, !key->alpha.enabled && !shader->info.base.uses_kill); - generate_blend(&key->blend, + generate_blend(lp->gallivm, + &key->blend, rt, builder, blend_type, @@ -764,9 +772,6 @@ generate_fragment(struct llvmpipe_screen *screen, LLVMBuildRetVoid(builder); - LLVMDisposeBuilder(builder); - - /* Verify the LLVM IR. If invalid, dump and abort */ #ifdef DEBUG if(LLVMVerifyFunction(function, LLVMPrintMessageAction)) { @@ -777,7 +782,7 @@ generate_fragment(struct llvmpipe_screen *screen, #endif /* Apply optimizations to LLVM IR */ - LLVMRunFunctionPassManager(screen->pass, function); + LLVMRunFunctionPassManager(gallivm->passmgr, function); if ((gallivm_debug & GALLIVM_DEBUG_IR) || (LP_DEBUG & DEBUG_FS)) { /* Print the LLVM IR to stderr */ @@ -787,14 +792,14 @@ generate_fragment(struct llvmpipe_screen *screen, /* Dump byte code to a file */ if (0) { - LLVMWriteBitcodeToFile(lp_build_module, "llvmpipe.bc"); + LLVMWriteBitcodeToFile(gallivm->module, "llvmpipe.bc"); } /* * Translate the LLVM IR into machine code. */ { - void *f = LLVMGetPointerToGlobal(screen->engine, function); + void *f = LLVMGetPointerToGlobal(gallivm->engine, function); variant->jit_function[partial_mask] = (lp_jit_frag_func)pointer_to_func(f); @@ -898,8 +903,13 @@ lp_debug_fs_variant(const struct lp_fragment_shader_variant *variant) debug_printf("\n"); } + +/** + * Generate a new fragment shader variant from the shader code and + * other state indicated by the key. + */ static struct lp_fragment_shader_variant * -generate_variant(struct llvmpipe_screen *screen, +generate_variant(struct llvmpipe_context *lp, struct lp_fragment_shader *shader, const struct lp_fragment_shader_variant_key *key) { @@ -945,11 +955,11 @@ generate_variant(struct llvmpipe_screen *screen, lp_debug_fs_variant(variant); } - generate_fragment(screen, shader, variant, RAST_EDGE_TEST); + generate_fragment(lp, shader, variant, RAST_EDGE_TEST); if (variant->opaque) { /* Specialized shader, which doesn't need to read the color buffer. */ - generate_fragment(screen, shader, variant, RAST_WHOLE); + generate_fragment(lp, shader, variant, RAST_WHOLE); } else { variant->jit_function[RAST_WHOLE] = variant->jit_function[RAST_EDGE_TEST]; } @@ -1034,7 +1044,8 @@ llvmpipe_create_fs_state(struct pipe_context *pipe, if (LP_DEBUG & DEBUG_TGSI) { unsigned attrib; - debug_printf("llvmpipe: Create fragment shader #%u %p:\n", shader->no, (void *) shader); + debug_printf("llvmpipe: Create fragment shader #%u %p:\n", + shader->no, (void *) shader); tgsi_dump(templ->tokens, 0); debug_printf("usage masks:\n"); for (attrib = 0; attrib < shader->info.base.num_inputs; ++attrib) { @@ -1071,33 +1082,49 @@ llvmpipe_bind_fs_state(struct pipe_context *pipe, void *fs) llvmpipe->dirty |= LP_NEW_FS; } -static void -remove_shader_variant(struct llvmpipe_context *lp, - struct lp_fragment_shader_variant *variant) + +/** + * Remove shader variant from two lists: the shader's variant list + * and the context's variant list. + */ +void +llvmpipe_remove_shader_variant(struct llvmpipe_context *lp, + struct lp_fragment_shader_variant *variant) { - struct llvmpipe_screen *screen = llvmpipe_screen(lp->pipe.screen); unsigned i; if (gallivm_debug & GALLIVM_DEBUG_IR) { - debug_printf("llvmpipe: del fs #%u var #%u v created #%u v cached #%u v total cached #%u\n", - variant->shader->no, variant->no, variant->shader->variants_created, - variant->shader->variants_cached, lp->nr_fs_variants); + debug_printf("llvmpipe: del fs #%u var #%u v created #%u v cached" + " #%u v total cached #%u\n", + variant->shader->no, + variant->no, + variant->shader->variants_created, + variant->shader->variants_cached, + lp->nr_fs_variants); } + + /* free all the variant's JIT'd functions */ for (i = 0; i < Elements(variant->function); i++) { if (variant->function[i]) { if (variant->jit_function[i]) - LLVMFreeMachineCodeForFunction(screen->engine, + LLVMFreeMachineCodeForFunction(lp->gallivm->engine, variant->function[i]); LLVMDeleteFunction(variant->function[i]); } } + + /* remove from shader's list */ remove_from_list(&variant->list_item_local); variant->shader->variants_cached--; + + /* remove from context's list */ remove_from_list(&variant->list_item_global); lp->nr_fs_variants--; + FREE(variant); } + static void llvmpipe_delete_fs_state(struct pipe_context *pipe, void *fs) { @@ -1106,23 +1133,23 @@ llvmpipe_delete_fs_state(struct pipe_context *pipe, void *fs) struct lp_fs_variant_list_item *li; assert(fs != llvmpipe->fs); - (void) llvmpipe; /* * XXX: we need to flush the context until we have some sort of reference * counting in fragment shaders as they may still be binned * Flushing alone might not sufficient we need to wait on it too. */ - llvmpipe_finish(pipe, __FUNCTION__); + /* Delete all the variants */ li = first_elem(&shader->variants); while(!at_end(&shader->variants, li)) { struct lp_fs_variant_list_item *next = next_elem(li); - remove_shader_variant(llvmpipe, li->base); + llvmpipe_remove_shader_variant(llvmpipe, li->base); li = next; } + /* Delete draw module's data */ draw_delete_fragment_shader(llvmpipe->draw, shader->draw_data); assert(shader->variants_cached == 0); @@ -1278,14 +1305,15 @@ make_variant_key(struct llvmpipe_context *lp, } } + + /** - * Update fragment state. This is called just prior to drawing + * Update fragment shader state. This is called just prior to drawing * something when some fragment-related state has changed. */ void llvmpipe_update_fs(struct llvmpipe_context *lp) { - struct llvmpipe_screen *screen = llvmpipe_screen(lp->pipe.screen); struct lp_fragment_shader *shader = lp->fs; struct lp_fragment_shader_variant_key key; struct lp_fragment_shader_variant *variant = NULL; @@ -1293,6 +1321,7 @@ llvmpipe_update_fs(struct llvmpipe_context *lp) make_variant_key(lp, shader, &key); + /* Search the variants for one which matches the key */ li = first_elem(&shader->variants); while(!at_end(&shader->variants, li)) { if(memcmp(&li->base->key, &key, shader->variant_key_size) == 0) { @@ -1303,36 +1332,49 @@ llvmpipe_update_fs(struct llvmpipe_context *lp) } if (variant) { + /* Move this variant to the head of the list to implement LRU + * deletion of shader's when we have too many. + */ move_to_head(&lp->fs_variants_list, &variant->list_item_global); } else { - int64_t t0, t1; - int64_t dt; + /* variant not found, create it now */ + int64_t t0, t1, dt; unsigned i; + + /* First, check if we've exceeded the max number of shader variants. + * If so, free 25% of them (the least recently used ones). + */ if (lp->nr_fs_variants >= LP_MAX_SHADER_VARIANTS) { struct pipe_context *pipe = &lp->pipe; /* - * XXX: we need to flush the context until we have some sort of reference - * counting in fragment shaders as they may still be binned + * XXX: we need to flush the context until we have some sort of + * reference counting in fragment shaders as they may still be binned * Flushing alone might not be sufficient we need to wait on it too. */ llvmpipe_finish(pipe, __FUNCTION__); for (i = 0; i < LP_MAX_SHADER_VARIANTS / 4; i++) { - struct lp_fs_variant_list_item *item = last_elem(&lp->fs_variants_list); - remove_shader_variant(lp, item->base); + struct lp_fs_variant_list_item *item; + item = last_elem(&lp->fs_variants_list); + llvmpipe_remove_shader_variant(lp, item->base); } } - t0 = os_time_get(); - - variant = generate_variant(screen, shader, &key); + /* + * Generate the new variant. + */ + t0 = os_time_get(); + variant = generate_variant(lp, shader, &key); t1 = os_time_get(); dt = t1 - t0; LP_COUNT_ADD(llvm_compile_time, dt); LP_COUNT_ADD(nr_llvm_compiles, 2); /* emit vs. omit in/out test */ + llvmpipe_variant_count++; + + /* Put the new variant into the list */ if (variant) { insert_at_head(&shader->variants, &variant->list_item_local); insert_at_head(&lp->fs_variants_list, &variant->list_item_global); @@ -1341,6 +1383,7 @@ llvmpipe_update_fs(struct llvmpipe_context *lp) } } + /* Bind this variant */ lp_setup_set_fs_variant(lp->setup, variant); } diff --git a/src/gallium/drivers/llvmpipe/lp_state_fs.h b/src/gallium/drivers/llvmpipe/lp_state_fs.h index 7d58c4936c7..98410c69359 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_fs.h +++ b/src/gallium/drivers/llvmpipe/lp_state_fs.h @@ -69,12 +69,15 @@ struct lp_fragment_shader_variant_key struct lp_sampler_static_state sampler[PIPE_MAX_SAMPLERS]; }; + +/** doubly-linked list item */ struct lp_fs_variant_list_item { struct lp_fragment_shader_variant *base; struct lp_fs_variant_list_item *next, *prev; }; + struct lp_fragment_shader_variant { struct lp_fragment_shader_variant_key key; @@ -118,5 +121,9 @@ struct lp_fragment_shader void lp_debug_fs_variant(const struct lp_fragment_shader_variant *variant); +void +llvmpipe_remove_shader_variant(struct llvmpipe_context *lp, + struct lp_fragment_shader_variant *variant); + #endif /* LP_STATE_FS_H_ */ diff --git a/src/gallium/drivers/llvmpipe/lp_state_setup.c b/src/gallium/drivers/llvmpipe/lp_state_setup.c index 47f3d95320f..ad751b9ef42 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_setup.c +++ b/src/gallium/drivers/llvmpipe/lp_state_setup.c @@ -31,6 +31,7 @@ #include "util/u_simple_list.h" #include "os/os_time.h" #include "gallivm/lp_bld_arit.h" +#include "gallivm/lp_bld_const.h" #include "gallivm/lp_bld_debug.h" #include "gallivm/lp_bld_init.h" #include "gallivm/lp_bld_intr.h" @@ -83,24 +84,29 @@ struct lp_setup_args LLVMValueRef v2a; }; -static LLVMTypeRef type4f(void) + + +static LLVMTypeRef +type4f(struct gallivm_state *gallivm) { - return LLVMVectorType(LLVMFloatType(), 4); + return LLVMVectorType(LLVMFloatTypeInContext(gallivm->context), 4); } /* Equivalent of _mm_setr_ps(a,b,c,d) */ -static LLVMValueRef vec4f(LLVMBuilderRef bld, - LLVMValueRef a, LLVMValueRef b, LLVMValueRef c, LLVMValueRef d, - const char *name) +static LLVMValueRef +vec4f(struct gallivm_state *gallivm, + LLVMValueRef a, LLVMValueRef b, LLVMValueRef c, LLVMValueRef d, + const char *name) { - LLVMValueRef i0 = LLVMConstInt(LLVMInt32Type(), 0, 0); - LLVMValueRef i1 = LLVMConstInt(LLVMInt32Type(), 1, 0); - LLVMValueRef i2 = LLVMConstInt(LLVMInt32Type(), 2, 0); - LLVMValueRef i3 = LLVMConstInt(LLVMInt32Type(), 3, 0); + LLVMBuilderRef bld = gallivm->builder; + LLVMValueRef i0 = lp_build_const_int32(gallivm, 0); + LLVMValueRef i1 = lp_build_const_int32(gallivm, 1); + LLVMValueRef i2 = lp_build_const_int32(gallivm, 2); + LLVMValueRef i3 = lp_build_const_int32(gallivm, 3); - LLVMValueRef res = LLVMGetUndef(type4f()); + LLVMValueRef res = LLVMGetUndef(type4f(gallivm)); res = LLVMBuildInsertElement(bld, res, a, i0, ""); res = LLVMBuildInsertElement(bld, res, b, i1, ""); @@ -112,15 +118,17 @@ static LLVMValueRef vec4f(LLVMBuilderRef bld, /* Equivalent of _mm_set1_ps(a) */ -static LLVMValueRef vec4f_from_scalar(LLVMBuilderRef bld, - LLVMValueRef a, - const char *name) +static LLVMValueRef +vec4f_from_scalar(struct gallivm_state *gallivm, + LLVMValueRef a, + const char *name) { - LLVMValueRef res = LLVMGetUndef(type4f()); + LLVMBuilderRef bld = gallivm->builder; + LLVMValueRef res = LLVMGetUndef(type4f(gallivm)); int i; for(i = 0; i < 4; ++i) { - LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0); + LLVMValueRef index = lp_build_const_int32(gallivm, i); res = LLVMBuildInsertElement(bld, res, a, index, i == 3 ? name : ""); } @@ -128,14 +136,15 @@ static LLVMValueRef vec4f_from_scalar(LLVMBuilderRef bld, } static void -store_coef(LLVMBuilderRef builder, +store_coef(struct gallivm_state *gallivm, struct lp_setup_args *args, unsigned slot, LLVMValueRef a0, LLVMValueRef dadx, LLVMValueRef dady) { - LLVMValueRef idx = LLVMConstInt(LLVMInt32Type(), slot, 0); + LLVMBuilderRef builder = gallivm->builder; + LLVMValueRef idx = lp_build_const_int32(gallivm, slot); LLVMBuildStore(builder, a0, @@ -153,14 +162,14 @@ store_coef(LLVMBuilderRef builder, static void -emit_constant_coef4( LLVMBuilderRef builder, +emit_constant_coef4(struct gallivm_state *gallivm, struct lp_setup_args *args, unsigned slot, LLVMValueRef vert) { - LLVMValueRef zero = LLVMConstReal(LLVMFloatType(), 0.0); - LLVMValueRef zerovec = vec4f_from_scalar(builder, zero, "zero"); - store_coef(builder, args, slot, vert, zerovec, zerovec); + LLVMValueRef zero = lp_build_const_float(gallivm, 0.0); + LLVMValueRef zerovec = vec4f_from_scalar(gallivm, zero, "zero"); + store_coef(gallivm, args, slot, vert, zerovec, zerovec); } @@ -170,30 +179,33 @@ emit_constant_coef4( LLVMBuilderRef builder, * \param frontface is the triangle front facing? */ static void -emit_facing_coef( LLVMBuilderRef builder, +emit_facing_coef(struct gallivm_state *gallivm, struct lp_setup_args *args, unsigned slot ) { + LLVMBuilderRef builder = gallivm->builder; + LLVMTypeRef float_type = LLVMFloatTypeInContext(gallivm->context); LLVMValueRef a0_0 = args->facing; - LLVMValueRef a0_0f = LLVMBuildSIToFP(builder, a0_0, LLVMFloatType(), ""); - LLVMValueRef zero = LLVMConstReal(LLVMFloatType(), 0.0); - LLVMValueRef a0 = vec4f(builder, a0_0f, zero, zero, zero, "facing"); - LLVMValueRef zerovec = vec4f_from_scalar(builder, zero, "zero"); + LLVMValueRef a0_0f = LLVMBuildSIToFP(builder, a0_0, float_type, ""); + LLVMValueRef zero = lp_build_const_float(gallivm, 0.0); + LLVMValueRef a0 = vec4f(gallivm, a0_0f, zero, zero, zero, "facing"); + LLVMValueRef zerovec = vec4f_from_scalar(gallivm, zero, "zero"); - store_coef(builder, args, slot, a0, zerovec, zerovec); + store_coef(gallivm, args, slot, a0, zerovec, zerovec); } static LLVMValueRef -vert_attrib(LLVMBuilderRef b, +vert_attrib(struct gallivm_state *gallivm, LLVMValueRef vert, int attr, int elem, const char *name) { + LLVMBuilderRef b = gallivm->builder; LLVMValueRef idx[2]; - idx[0] = LLVMConstInt(LLVMInt32Type(), attr, 0); - idx[1] = LLVMConstInt(LLVMInt32Type(), elem, 0); + idx[0] = lp_build_const_int32(gallivm, attr); + idx[1] = lp_build_const_int32(gallivm, elem); return LLVMBuildLoad(b, LLVMBuildGEP(b, vert, idx, 2, ""), name); } @@ -214,16 +226,17 @@ vert_clamp(LLVMBuilderRef b, } static void -lp_twoside(LLVMBuilderRef b, +lp_twoside(struct gallivm_state *gallivm, struct lp_setup_args *args, const struct lp_setup_variant_key *key, int bcolor_slot) { + LLVMBuilderRef b = gallivm->builder; LLVMValueRef a0_back, a1_back, a2_back; - LLVMValueRef idx2 = LLVMConstInt(LLVMInt32Type(), bcolor_slot, 0); + LLVMValueRef idx2 = lp_build_const_int32(gallivm, bcolor_slot); LLVMValueRef facing = args->facing; - LLVMValueRef front_facing = LLVMBuildICmp(b, LLVMIntEQ, facing, LLVMConstInt(LLVMInt32Type(), 0, 0), ""); /** need i1 for if condition */ + LLVMValueRef front_facing = LLVMBuildICmp(b, LLVMIntEQ, facing, lp_build_const_int32(gallivm, 0), ""); /** need i1 for if condition */ a0_back = LLVMBuildLoad(b, LLVMBuildGEP(b, args->v0, &idx2, 1, ""), "v0a_back"); a1_back = LLVMBuildLoad(b, LLVMBuildGEP(b, args->v1, &idx2, 1, ""), "v1a_back"); @@ -241,29 +254,31 @@ lp_twoside(LLVMBuilderRef b, } static void -lp_do_offset_tri(LLVMBuilderRef b, +lp_do_offset_tri(struct gallivm_state *gallivm, struct lp_setup_args *args, const struct lp_setup_variant_key *key) { + LLVMBuilderRef b = gallivm->builder; struct lp_build_context bld; LLVMValueRef zoffset, mult; LLVMValueRef z0_new, z1_new, z2_new; LLVMValueRef dzdx0, dzdx, dzdy0, dzdy; LLVMValueRef max, max_value; - LLVMValueRef one = LLVMConstReal(LLVMFloatType(), 1.0); - LLVMValueRef zero = LLVMConstReal(LLVMFloatType(), 0.0); + LLVMValueRef one = lp_build_const_float(gallivm, 1.0); + LLVMValueRef zero = lp_build_const_float(gallivm, 0.0); + LLVMValueRef two = lp_build_const_int32(gallivm, 2); /* edge vectors: e = v0 - v2, f = v1 - v2 */ - LLVMValueRef v0_x = vert_attrib(b, args->v0, 0, 0, "v0_x"); - LLVMValueRef v1_x = vert_attrib(b, args->v1, 0, 0, "v1_x"); - LLVMValueRef v2_x = vert_attrib(b, args->v2, 0, 0, "v2_x"); - LLVMValueRef v0_y = vert_attrib(b, args->v0, 0, 1, "v0_y"); - LLVMValueRef v1_y = vert_attrib(b, args->v1, 0, 1, "v1_y"); - LLVMValueRef v2_y = vert_attrib(b, args->v2, 0, 1, "v2_y"); - LLVMValueRef v0_z = vert_attrib(b, args->v0, 0, 2, "v0_z"); - LLVMValueRef v1_z = vert_attrib(b, args->v1, 0, 2, "v1_z"); - LLVMValueRef v2_z = vert_attrib(b, args->v2, 0, 2, "v2_z"); + LLVMValueRef v0_x = vert_attrib(gallivm, args->v0, 0, 0, "v0_x"); + LLVMValueRef v1_x = vert_attrib(gallivm, args->v1, 0, 0, "v1_x"); + LLVMValueRef v2_x = vert_attrib(gallivm, args->v2, 0, 0, "v2_x"); + LLVMValueRef v0_y = vert_attrib(gallivm, args->v0, 0, 1, "v0_y"); + LLVMValueRef v1_y = vert_attrib(gallivm, args->v1, 0, 1, "v1_y"); + LLVMValueRef v2_y = vert_attrib(gallivm, args->v2, 0, 1, "v2_y"); + LLVMValueRef v0_z = vert_attrib(gallivm, args->v0, 0, 2, "v0_z"); + LLVMValueRef v1_z = vert_attrib(gallivm, args->v1, 0, 2, "v1_z"); + LLVMValueRef v2_z = vert_attrib(gallivm, args->v2, 0, 2, "v2_z"); /* edge vectors: e = v0 - v2, f = v1 - v2 */ LLVMValueRef dx02 = LLVMBuildFSub(b, v0_x, v2_x, "dx02"); @@ -288,7 +303,7 @@ lp_do_offset_tri(LLVMBuilderRef b, LLVMValueRef res2 = LLVMBuildFSub(b, dz02_dx12, dx02_dz12, "res2"); /* dzdx = fabsf(res1 * inv_det), dydx = fabsf(res2 * inv_det)*/ - lp_build_context_init(&bld, b, lp_type_float(32)); + lp_build_context_init(&bld, gallivm, lp_type_float(32)); dzdx0 = LLVMBuildFMul(b, res1, inv_det, "dzdx"); dzdx = lp_build_abs(&bld, dzdx0); dzdy0 = LLVMBuildFMul(b, res2, inv_det, "dzdy"); @@ -298,8 +313,8 @@ lp_do_offset_tri(LLVMBuilderRef b, max = LLVMBuildFCmp(b, LLVMRealUGT, dzdx, dzdy, ""); max_value = LLVMBuildSelect(b, max, dzdx, dzdy, "max"); - mult = LLVMBuildFMul(b, max_value, LLVMConstReal(LLVMFloatType(), key->scale), ""); - zoffset = LLVMBuildFAdd(b, LLVMConstReal(LLVMFloatType(), key->units), mult, "zoffset"); + mult = LLVMBuildFMul(b, max_value, lp_build_const_float(gallivm, key->scale), ""); + zoffset = LLVMBuildFAdd(b, lp_build_const_float(gallivm, key->units), mult, "zoffset"); /* clamp and do offset */ z0_new = vert_clamp(b, LLVMBuildFAdd(b, v0_z, zoffset, ""), zero, one); @@ -308,18 +323,19 @@ lp_do_offset_tri(LLVMBuilderRef b, /* insert into args->a0.z, a1.z, a2.z: */ - args->v0a = LLVMBuildInsertElement(b, args->v0a, z0_new, LLVMConstInt(LLVMInt32Type(), 2, 0), ""); - args->v1a = LLVMBuildInsertElement(b, args->v1a, z1_new, LLVMConstInt(LLVMInt32Type(), 2, 0), ""); - args->v2a = LLVMBuildInsertElement(b, args->v2a, z2_new, LLVMConstInt(LLVMInt32Type(), 2, 0), ""); + args->v0a = LLVMBuildInsertElement(b, args->v0a, z0_new, two, ""); + args->v1a = LLVMBuildInsertElement(b, args->v1a, z1_new, two, ""); + args->v2a = LLVMBuildInsertElement(b, args->v2a, z2_new, two, ""); } static void -load_attribute(LLVMBuilderRef b, +load_attribute(struct gallivm_state *gallivm, struct lp_setup_args *args, const struct lp_setup_variant_key *key, unsigned vert_attr) { - LLVMValueRef idx = LLVMConstInt(LLVMInt32Type(), vert_attr, 0); + LLVMBuilderRef b = gallivm->builder; + LLVMValueRef idx = lp_build_const_int32(gallivm, vert_attr); /* Load the vertex data */ @@ -331,25 +347,26 @@ load_attribute(LLVMBuilderRef b, /* Potentially modify it according to twoside, offset, etc: */ if (vert_attr == 0 && (key->scale != 0.0f || key->units != 0.0f)) { - lp_do_offset_tri(b, args, key); + lp_do_offset_tri(gallivm, args, key); } if (key->twoside) { if (vert_attr == key->color_slot && key->bcolor_slot != ~0) - lp_twoside(b, args, key, key->bcolor_slot); + lp_twoside(gallivm, args, key, key->bcolor_slot); else if (vert_attr == key->spec_slot && key->bspec_slot != ~0) - lp_twoside(b, args, key, key->bspec_slot); + lp_twoside(gallivm, args, key, key->bspec_slot); } } static void -emit_coef4( LLVMBuilderRef b, +emit_coef4( struct gallivm_state *gallivm, struct lp_setup_args *args, unsigned slot, LLVMValueRef a0, LLVMValueRef a1, LLVMValueRef a2) { + LLVMBuilderRef b = gallivm->builder; LLVMValueRef dy20_ooa = args->dy20_ooa; LLVMValueRef dy01_ooa = args->dy01_ooa; LLVMValueRef dx20_ooa = args->dx20_ooa; @@ -381,17 +398,17 @@ emit_coef4( LLVMBuilderRef b, LLVMValueRef attr_v0 = LLVMBuildFAdd(b, dadx_x0, dady_y0, "attr_v0"); LLVMValueRef attr_0 = LLVMBuildFSub(b, a0, attr_v0, "attr_0"); - store_coef(b, args, slot, attr_0, dadx, dady); + store_coef(gallivm, args, slot, attr_0, dadx, dady); } static void -emit_linear_coef( LLVMBuilderRef b, +emit_linear_coef( struct gallivm_state *gallivm, struct lp_setup_args *args, unsigned slot) { /* nothing to do anymore */ - emit_coef4(b, + emit_coef4(gallivm, args, slot, args->v0a, args->v1a, @@ -408,30 +425,32 @@ emit_linear_coef( LLVMBuilderRef b, * divide the interpolated value by the interpolated W at that fragment. */ static void -emit_perspective_coef( LLVMBuilderRef b, +emit_perspective_coef( struct gallivm_state *gallivm, struct lp_setup_args *args, unsigned slot) { + LLVMBuilderRef b = gallivm->builder; + /* premultiply by 1/w (v[0][3] is always 1/w): */ - LLVMValueRef v0_oow = vec4f_from_scalar(b, vert_attrib(b, args->v0, 0, 3, ""), "v0_oow"); - LLVMValueRef v1_oow = vec4f_from_scalar(b, vert_attrib(b, args->v1, 0, 3, ""), "v1_oow"); - LLVMValueRef v2_oow = vec4f_from_scalar(b, vert_attrib(b, args->v2, 0, 3, ""), "v2_oow"); + LLVMValueRef v0_oow = vec4f_from_scalar(gallivm, vert_attrib(gallivm, args->v0, 0, 3, ""), "v0_oow"); + LLVMValueRef v1_oow = vec4f_from_scalar(gallivm, vert_attrib(gallivm, args->v1, 0, 3, ""), "v1_oow"); + LLVMValueRef v2_oow = vec4f_from_scalar(gallivm, vert_attrib(gallivm, args->v2, 0, 3, ""), "v2_oow"); LLVMValueRef v0_oow_v0a = LLVMBuildFMul(b, args->v0a, v0_oow, "v0_oow_v0a"); LLVMValueRef v1_oow_v1a = LLVMBuildFMul(b, args->v1a, v1_oow, "v1_oow_v1a"); LLVMValueRef v2_oow_v2a = LLVMBuildFMul(b, args->v2a, v2_oow, "v2_oow_v2a"); - emit_coef4(b, args, slot, v0_oow_v0a, v1_oow_v1a, v2_oow_v2a); + emit_coef4(gallivm, args, slot, v0_oow_v0a, v1_oow_v1a, v2_oow_v2a); } static void -emit_position_coef( LLVMBuilderRef builder, +emit_position_coef( struct gallivm_state *gallivm, struct lp_setup_args *args, int slot ) { - emit_linear_coef(builder, args, slot); + emit_linear_coef(gallivm, args, slot); } @@ -441,7 +460,7 @@ emit_position_coef( LLVMBuilderRef builder, * Compute the inputs-> dadx, dady, a0 values. */ static void -emit_tri_coef( LLVMBuilderRef builder, +emit_tri_coef( struct gallivm_state *gallivm, const struct lp_setup_variant_key *key, struct lp_setup_args *args ) { @@ -449,8 +468,8 @@ emit_tri_coef( LLVMBuilderRef builder, /* The internal position input is in slot zero: */ - load_attribute(builder, args, key, 0); - emit_position_coef(builder, args, 0); + load_attribute(gallivm, args, key, 0); + emit_position_coef(gallivm, args, 0); /* setup interpolation for all the remaining attributes: */ @@ -459,24 +478,24 @@ emit_tri_coef( LLVMBuilderRef builder, if (key->inputs[slot].interp == LP_INTERP_CONSTANT || key->inputs[slot].interp == LP_INTERP_LINEAR || key->inputs[slot].interp == LP_INTERP_PERSPECTIVE) - load_attribute(builder, args, key, key->inputs[slot].src_index); + load_attribute(gallivm, args, key, key->inputs[slot].src_index); switch (key->inputs[slot].interp) { case LP_INTERP_CONSTANT: if (key->flatshade_first) { - emit_constant_coef4(builder, args, slot+1, args->v0a); + emit_constant_coef4(gallivm, args, slot+1, args->v0a); } else { - emit_constant_coef4(builder, args, slot+1, args->v2a); + emit_constant_coef4(gallivm, args, slot+1, args->v2a); } break; case LP_INTERP_LINEAR: - emit_linear_coef(builder, args, slot+1); + emit_linear_coef(gallivm, args, slot+1); break; case LP_INTERP_PERSPECTIVE: - emit_perspective_coef(builder, args, slot+1); + emit_perspective_coef(gallivm, args, slot+1); break; case LP_INTERP_POSITION: @@ -487,7 +506,7 @@ emit_tri_coef( LLVMBuilderRef builder, break; case LP_INTERP_FACING: - emit_facing_coef(builder, args, slot+1); + emit_facing_coef(gallivm, args, slot+1); break; default: @@ -500,7 +519,7 @@ emit_tri_coef( LLVMBuilderRef builder, /* XXX: This is generic code, share with fs/vs codegen: */ static lp_jit_setup_triangle -finalize_function(struct llvmpipe_screen *screen, +finalize_function(struct gallivm_state *gallivm, LLVMBuilderRef builder, LLVMValueRef function) { @@ -516,7 +535,7 @@ finalize_function(struct llvmpipe_screen *screen, #endif /* Apply optimizations to LLVM IR */ - LLVMRunFunctionPassManager(screen->pass, function); + LLVMRunFunctionPassManager(gallivm->passmgr, function); if (gallivm_debug & GALLIVM_DEBUG_IR) { @@ -528,7 +547,7 @@ finalize_function(struct llvmpipe_screen *screen, /* * Translate the LLVM IR into machine code. */ - f = LLVMGetPointerToGlobal(screen->engine, function); + f = LLVMGetPointerToGlobal(gallivm->engine, function); if (gallivm_debug & GALLIVM_DEBUG_ASM) { @@ -543,11 +562,12 @@ finalize_function(struct llvmpipe_screen *screen, /* XXX: Generic code: */ static void -lp_emit_emms(LLVMBuilderRef builder) +lp_emit_emms(struct gallivm_state *gallivm) { #ifdef PIPE_ARCH_X86 /* Avoid corrupting the FPU stack on 32bit OSes. */ - lp_build_intrinsic(builder, "llvm.x86.mmx.emms", LLVMVoidType(), NULL, 0); + lp_build_intrinsic(gallivm->builder, "llvm.x86.mmx.emms", + LLVMVoidTypeInContext(gallivm->context), NULL, 0); #endif } @@ -568,21 +588,23 @@ set_noalias(LLVMBuilderRef builder, } static void -init_args(LLVMBuilderRef b, +init_args(struct gallivm_state *gallivm, struct lp_setup_args *args, const struct lp_setup_variant *variant) { - LLVMValueRef v0_x = vert_attrib(b, args->v0, 0, 0, "v0_x"); - LLVMValueRef v0_y = vert_attrib(b, args->v0, 0, 1, "v0_y"); + LLVMBuilderRef b = gallivm->builder; - LLVMValueRef v1_x = vert_attrib(b, args->v1, 0, 0, "v1_x"); - LLVMValueRef v1_y = vert_attrib(b, args->v1, 0, 1, "v1_y"); + LLVMValueRef v0_x = vert_attrib(gallivm, args->v0, 0, 0, "v0_x"); + LLVMValueRef v0_y = vert_attrib(gallivm, args->v0, 0, 1, "v0_y"); - LLVMValueRef v2_x = vert_attrib(b, args->v2, 0, 0, "v2_x"); - LLVMValueRef v2_y = vert_attrib(b, args->v2, 0, 1, "v2_y"); + LLVMValueRef v1_x = vert_attrib(gallivm, args->v1, 0, 0, "v1_x"); + LLVMValueRef v1_y = vert_attrib(gallivm, args->v1, 0, 1, "v1_y"); - LLVMValueRef pixel_center = LLVMConstReal(LLVMFloatType(), - variant->key.pixel_center_half ? 0.5 : 0); + LLVMValueRef v2_x = vert_attrib(gallivm, args->v2, 0, 0, "v2_x"); + LLVMValueRef v2_y = vert_attrib(gallivm, args->v2, 0, 1, "v2_y"); + + LLVMValueRef pixel_center = lp_build_const_float(gallivm, + variant->key.pixel_center_half ? 0.5 : 0); LLVMValueRef x0_center = LLVMBuildFSub(b, v0_x, pixel_center, "x0_center" ); LLVMValueRef y0_center = LLVMBuildFSub(b, v0_y, pixel_center, "y0_center" ); @@ -592,7 +614,7 @@ init_args(LLVMBuilderRef b, LLVMValueRef dx20 = LLVMBuildFSub(b, v2_x, v0_x, "dx20"); LLVMValueRef dy20 = LLVMBuildFSub(b, v2_y, v0_y, "dy20"); - LLVMValueRef one = LLVMConstReal(LLVMFloatType(), 1.0); + LLVMValueRef one = lp_build_const_float(gallivm, 1.0); LLVMValueRef e = LLVMBuildFMul(b, dx01, dy20, "e"); LLVMValueRef f = LLVMBuildFMul(b, dx20, dy01, "f"); LLVMValueRef ooa = LLVMBuildFDiv(b, one, LLVMBuildFSub(b, e, f, ""), "ooa"); @@ -602,14 +624,14 @@ init_args(LLVMBuilderRef b, LLVMValueRef dx20_ooa = LLVMBuildFMul(b, dx20, ooa, "dx20_ooa"); LLVMValueRef dx01_ooa = LLVMBuildFMul(b, dx01, ooa, "dx01_ooa"); - args->dy20_ooa = vec4f_from_scalar(b, dy20_ooa, "dy20_ooa_4f"); - args->dy01_ooa = vec4f_from_scalar(b, dy01_ooa, "dy01_ooa_4f"); + args->dy20_ooa = vec4f_from_scalar(gallivm, dy20_ooa, "dy20_ooa_4f"); + args->dy01_ooa = vec4f_from_scalar(gallivm, dy01_ooa, "dy01_ooa_4f"); - args->dx20_ooa = vec4f_from_scalar(b, dx20_ooa, "dx20_ooa_4f"); - args->dx01_ooa = vec4f_from_scalar(b, dx01_ooa, "dx01_ooa_4f"); + args->dx20_ooa = vec4f_from_scalar(gallivm, dx20_ooa, "dx20_ooa_4f"); + args->dx01_ooa = vec4f_from_scalar(gallivm, dx01_ooa, "dx01_ooa_4f"); - args->x0_center = vec4f_from_scalar(b, x0_center, "x0_center_4f"); - args->y0_center = vec4f_from_scalar(b, y0_center, "y0_center_4f"); + args->x0_center = vec4f_from_scalar(gallivm, x0_center, "x0_center_4f"); + args->y0_center = vec4f_from_scalar(gallivm, y0_center, "y0_center_4f"); } /** @@ -617,7 +639,7 @@ init_args(LLVMBuilderRef b, * */ static struct lp_setup_variant * -generate_setup_variant(struct llvmpipe_screen *screen, +generate_setup_variant(struct gallivm_state *gallivm, struct lp_setup_variant_key *key, struct llvmpipe_context *lp) { @@ -628,7 +650,7 @@ generate_setup_variant(struct llvmpipe_screen *screen, LLVMTypeRef func_type; LLVMTypeRef arg_types[7]; LLVMBasicBlockRef block; - LLVMBuilderRef builder; + LLVMBuilderRef builder = gallivm->builder; int64_t t0, t1; if (0) @@ -653,19 +675,20 @@ generate_setup_variant(struct llvmpipe_screen *screen, * the vertices. */ - vec4f_type = LLVMVectorType(LLVMFloatType(), 4); + vec4f_type = LLVMVectorType(LLVMFloatTypeInContext(gallivm->context), 4); arg_types[0] = LLVMPointerType(vec4f_type, 0); /* v0 */ arg_types[1] = LLVMPointerType(vec4f_type, 0); /* v1 */ arg_types[2] = LLVMPointerType(vec4f_type, 0); /* v2 */ - arg_types[3] = LLVMInt32Type(); /* facing */ + arg_types[3] = LLVMInt32TypeInContext(gallivm->context); /* facing */ arg_types[4] = LLVMPointerType(vec4f_type, 0); /* a0, aligned */ arg_types[5] = LLVMPointerType(vec4f_type, 0); /* dadx, aligned */ arg_types[6] = LLVMPointerType(vec4f_type, 0); /* dady, aligned */ - func_type = LLVMFunctionType(LLVMVoidType(), arg_types, Elements(arg_types), 0); + func_type = LLVMFunctionType(LLVMVoidTypeInContext(gallivm->context), + arg_types, Elements(arg_types), 0); - variant->function = LLVMAddFunction(screen->module, func_name, func_type); + variant->function = LLVMAddFunction(gallivm->module, func_name, func_type); if (!variant->function) goto fail; @@ -690,19 +713,18 @@ generate_setup_variant(struct llvmpipe_screen *screen, /* * Function body */ - block = LLVMAppendBasicBlock(variant->function, "entry"); - builder = LLVMCreateBuilder(); + block = LLVMAppendBasicBlockInContext(gallivm->context, + variant->function, "entry"); LLVMPositionBuilderAtEnd(builder, block); set_noalias(builder, variant->function, arg_types, Elements(arg_types)); - init_args(builder, &args, variant); - emit_tri_coef(builder, &variant->key, &args); + init_args(gallivm, &args, variant); + emit_tri_coef(gallivm, &variant->key, &args); - lp_emit_emms(builder); + lp_emit_emms(gallivm); LLVMBuildRetVoid(builder); - LLVMDisposeBuilder(builder); - variant->jit_function = finalize_function(screen, builder, + variant->jit_function = finalize_function(gallivm, builder, variant->function); if (!variant->jit_function) goto fail; @@ -722,7 +744,7 @@ fail: if (variant) { if (variant->function) { if (variant->jit_function) - LLVMFreeMachineCodeForFunction(screen->engine, + LLVMFreeMachineCodeForFunction(gallivm->engine, variant->function); LLVMDeleteFunction(variant->function); } @@ -773,8 +795,6 @@ static void remove_setup_variant(struct llvmpipe_context *lp, struct lp_setup_variant *variant) { - struct llvmpipe_screen *screen = llvmpipe_screen(lp->pipe.screen); - if (gallivm_debug & GALLIVM_DEBUG_IR) { debug_printf("llvmpipe: del setup_variant #%u total %u\n", variant->no, lp->nr_setup_variants); @@ -782,7 +802,7 @@ remove_setup_variant(struct llvmpipe_context *lp, if (variant->function) { if (variant->jit_function) - LLVMFreeMachineCodeForFunction(screen->engine, + LLVMFreeMachineCodeForFunction(lp->gallivm->engine, variant->function); LLVMDeleteFunction(variant->function); } @@ -825,8 +845,6 @@ cull_setup_variants(struct llvmpipe_context *lp) void llvmpipe_update_setup(struct llvmpipe_context *lp) { - struct llvmpipe_screen *screen = llvmpipe_screen(lp->pipe.screen); - struct lp_setup_variant_key *key = &lp->setup_variant.key; struct lp_setup_variant *variant = NULL; struct lp_setup_variant_list_item *li; @@ -849,9 +867,11 @@ llvmpipe_update_setup(struct llvmpipe_context *lp) cull_setup_variants(lp); } - variant = generate_setup_variant(screen, key, lp); + variant = generate_setup_variant(lp->gallivm, key, lp); insert_at_head(&lp->setup_variants_list, &variant->list_item_global); lp->nr_setup_variants++; + + llvmpipe_variant_count++; } lp_setup_set_setup_variant(lp->setup, diff --git a/src/gallium/drivers/llvmpipe/lp_state_vertex.c b/src/gallium/drivers/llvmpipe/lp_state_vertex.c index fb29423dd35..fffdeb6ccde 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_vertex.c +++ b/src/gallium/drivers/llvmpipe/lp_state_vertex.c @@ -33,6 +33,7 @@ #include "lp_state.h" #include "draw/draw_context.h" +#include "util/u_inlines.h" static void * @@ -80,8 +81,9 @@ llvmpipe_set_vertex_buffers(struct pipe_context *pipe, assert(count <= PIPE_MAX_ATTRIBS); - memcpy(llvmpipe->vertex_buffer, buffers, count * sizeof(buffers[0])); - llvmpipe->num_vertex_buffers = count; + util_copy_vertex_buffers(llvmpipe->vertex_buffer, + &llvmpipe->num_vertex_buffers, + buffers, count); llvmpipe->dirty |= LP_NEW_VERTEX; diff --git a/src/gallium/drivers/llvmpipe/lp_surface.c b/src/gallium/drivers/llvmpipe/lp_surface.c index 164242eda67..e7e46a628a1 100644 --- a/src/gallium/drivers/llvmpipe/lp_surface.c +++ b/src/gallium/drivers/llvmpipe/lp_surface.c @@ -52,19 +52,23 @@ adjust_to_tile_bounds(unsigned x, unsigned y, unsigned width, unsigned height, static void lp_resource_copy(struct pipe_context *pipe, - struct pipe_resource *dst, struct pipe_subresource subdst, + struct pipe_resource *dst, unsigned dst_level, unsigned dstx, unsigned dsty, unsigned dstz, - struct pipe_resource *src, struct pipe_subresource subsrc, - unsigned srcx, unsigned srcy, unsigned srcz, - unsigned width, unsigned height) + struct pipe_resource *src, unsigned src_level, + const struct pipe_box *src_box) { - /* XXX what about the dstz/srcz parameters - zslice wasn't used... */ + /* XXX this used to ignore srcz/dstz + * assume it works the same for cube and 3d + */ struct llvmpipe_resource *src_tex = llvmpipe_resource(src); struct llvmpipe_resource *dst_tex = llvmpipe_resource(dst); const enum pipe_format format = src_tex->base.format; + unsigned width = src_box->width; + unsigned height = src_box->height; + assert(src_box->depth == 1); llvmpipe_flush_resource(pipe, - dst, subdst.face, subdst.level, + dst, dst_level, dstz, 0, /* flush_flags */ FALSE, /* read_only */ TRUE, /* cpu_access */ @@ -72,7 +76,7 @@ lp_resource_copy(struct pipe_context *pipe, "blit dest"); llvmpipe_flush_resource(pipe, - src, subsrc.face, subsrc.level, + src, src_level, src_box->z, 0, /* flush_flags */ TRUE, /* read_only */ TRUE, /* cpu_access */ @@ -80,9 +84,10 @@ lp_resource_copy(struct pipe_context *pipe, "blit src"); /* - printf("surface copy from %u to %u: %u,%u to %u,%u %u x %u\n", - src_tex->id, dst_tex->id, - srcx, srcy, dstx, dsty, width, height); + printf("surface copy from %u lvl %u to %u lvl %u: %u,%u,%u to %u,%u,%u %u x %u x %u\n", + src_tex->id, src_level, dst_tex->id, dst_level, + src_box->x, src_box->y, src_box->z, dstx, dsty, dstz, + src_box->width, src_box->height, src_box->depth); */ /* set src tiles to linear layout */ @@ -90,12 +95,13 @@ lp_resource_copy(struct pipe_context *pipe, unsigned tx, ty, tw, th; unsigned x, y; - adjust_to_tile_bounds(srcx, srcy, width, height, &tx, &ty, &tw, &th); + adjust_to_tile_bounds(src_box->x, src_box->y, width, height, + &tx, &ty, &tw, &th); for (y = 0; y < th; y += TILE_SIZE) { for (x = 0; x < tw; x += TILE_SIZE) { (void) llvmpipe_get_texture_tile_linear(src_tex, - subsrc.face, subsrc.level, + src_box->z, src_level, LP_TEX_USAGE_READ, tx + x, ty + y); } @@ -130,7 +136,7 @@ lp_resource_copy(struct pipe_context *pipe, usage = LP_TEX_USAGE_READ_WRITE; (void) llvmpipe_get_texture_tile_linear(dst_tex, - subdst.face, subdst.level, + dstz, dst_level, usage, tx + x, ty + y); } @@ -140,22 +146,22 @@ lp_resource_copy(struct pipe_context *pipe, /* copy */ { const ubyte *src_linear_ptr - = llvmpipe_get_texture_image_address(src_tex, subsrc.face, - subsrc.level, + = llvmpipe_get_texture_image_address(src_tex, src_box->z, + src_level, LP_TEX_LAYOUT_LINEAR); ubyte *dst_linear_ptr - = llvmpipe_get_texture_image_address(dst_tex, subdst.face, - subdst.level, + = llvmpipe_get_texture_image_address(dst_tex, dstz, + dst_level, LP_TEX_LAYOUT_LINEAR); if (dst_linear_ptr && src_linear_ptr) { util_copy_rect(dst_linear_ptr, format, - llvmpipe_resource_stride(&dst_tex->base, subdst.level), + llvmpipe_resource_stride(&dst_tex->base, dst_level), dstx, dsty, width, height, src_linear_ptr, - llvmpipe_resource_stride(&src_tex->base, subsrc.level), - srcx, srcy); + llvmpipe_resource_stride(&src_tex->base, src_level), + src_box->x, src_box->y); } } } diff --git a/src/gallium/drivers/llvmpipe/lp_test.h b/src/gallium/drivers/llvmpipe/lp_test.h index 90422e42588..c64f3e149fd 100644 --- a/src/gallium/drivers/llvmpipe/lp_test.h +++ b/src/gallium/drivers/llvmpipe/lp_test.h @@ -64,13 +64,14 @@ write_tsv_header(FILE *fp); boolean -test_some(unsigned verbose, FILE *fp, unsigned long n); +test_some(struct gallivm_state *gallivm,unsigned verbose, FILE *fp, + unsigned long n); boolean -test_single(unsigned verbose, FILE *fp); +test_single(struct gallivm_state *gallivm, unsigned verbose, FILE *fp); boolean -test_all(unsigned verbose, FILE *fp); +test_all(struct gallivm_state *gallivm, unsigned verbose, FILE *fp); #if defined(PIPE_CC_MSVC) diff --git a/src/gallium/drivers/llvmpipe/lp_test_blend.c b/src/gallium/drivers/llvmpipe/lp_test_blend.c index 8b6b5e1298f..b3ca134131d 100644 --- a/src/gallium/drivers/llvmpipe/lp_test_blend.c +++ b/src/gallium/drivers/llvmpipe/lp_test_blend.c @@ -163,11 +163,13 @@ dump_blend_type(FILE *fp, static LLVMValueRef -add_blend_test(LLVMModuleRef module, +add_blend_test(struct gallivm_state *gallivm, const struct pipe_blend_state *blend, enum vector_mode mode, struct lp_type type) { + LLVMModuleRef module = gallivm->module; + LLVMContextRef context = gallivm->context; LLVMTypeRef vec_type; LLVMTypeRef args[4]; LLVMValueRef func; @@ -179,18 +181,18 @@ add_blend_test(LLVMModuleRef module, LLVMBuilderRef builder; const unsigned rt = 0; - vec_type = lp_build_vec_type(type); + vec_type = lp_build_vec_type(gallivm, type); args[3] = args[2] = args[1] = args[0] = LLVMPointerType(vec_type, 0); - func = LLVMAddFunction(module, "test", LLVMFunctionType(LLVMVoidType(), args, 4, 0)); + func = LLVMAddFunction(module, "test", LLVMFunctionType(LLVMVoidTypeInContext(context), args, 4, 0)); LLVMSetFunctionCallConv(func, LLVMCCallConv); src_ptr = LLVMGetParam(func, 0); dst_ptr = LLVMGetParam(func, 1); const_ptr = LLVMGetParam(func, 2); res_ptr = LLVMGetParam(func, 3); - block = LLVMAppendBasicBlock(func, "entry"); - builder = LLVMCreateBuilder(); + block = LLVMAppendBasicBlockInContext(context, func, "entry"); + builder = gallivm->builder; LLVMPositionBuilderAtEnd(builder, block); if (mode == AoS) { @@ -203,7 +205,7 @@ add_blend_test(LLVMModuleRef module, dst = LLVMBuildLoad(builder, dst_ptr, "dst"); con = LLVMBuildLoad(builder, const_ptr, "const"); - res = lp_build_blend_aos(builder, blend, type, rt, src, dst, con, 3); + res = lp_build_blend_aos(gallivm, blend, type, rt, src, dst, con, 3); lp_build_name(res, "res"); @@ -218,7 +220,7 @@ add_blend_test(LLVMModuleRef module, unsigned i; for(i = 0; i < 4; ++i) { - LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0); + LLVMValueRef index = LLVMConstInt(LLVMInt32TypeInContext(context), i, 0); src[i] = LLVMBuildLoad(builder, LLVMBuildGEP(builder, src_ptr, &index, 1, ""), ""); dst[i] = LLVMBuildLoad(builder, LLVMBuildGEP(builder, dst_ptr, &index, 1, ""), ""); con[i] = LLVMBuildLoad(builder, LLVMBuildGEP(builder, const_ptr, &index, 1, ""), ""); @@ -227,10 +229,10 @@ add_blend_test(LLVMModuleRef module, lp_build_name(dst[i], "dst.%c", "rgba"[i]); } - lp_build_blend_soa(builder, blend, type, rt, src, dst, con, res); + lp_build_blend_soa(gallivm, blend, type, rt, src, dst, con, res); for(i = 0; i < 4; ++i) { - LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0); + LLVMValueRef index = LLVMConstInt(LLVMInt32TypeInContext(context), i, 0); lp_build_name(res[i], "res.%c", "rgba"[i]); LLVMBuildStore(builder, res[i], LLVMBuildGEP(builder, res_ptr, &index, 1, "")); } @@ -238,7 +240,6 @@ add_blend_test(LLVMModuleRef module, LLVMBuildRetVoid(builder);; - LLVMDisposeBuilder(builder); return func; } @@ -465,16 +466,16 @@ compute_blend_ref(const struct pipe_blend_state *blend, PIPE_ALIGN_STACK static boolean -test_one(unsigned verbose, +test_one(struct gallivm_state *gallivm, + unsigned verbose, FILE *fp, const struct pipe_blend_state *blend, enum vector_mode mode, struct lp_type type) { - LLVMModuleRef module = NULL; + LLVMModuleRef module = gallivm->module; LLVMValueRef func = NULL; - LLVMExecutionEngineRef engine = lp_build_engine; - LLVMPassManagerRef pass = NULL; + LLVMExecutionEngineRef engine = gallivm->engine; char *error = NULL; blend_test_ptr_t blend_test_ptr; boolean success; @@ -487,9 +488,7 @@ test_one(unsigned verbose, if(verbose >= 1) dump_blend_type(stdout, blend, mode, type); - module = LLVMModuleCreateWithName("test"); - - func = add_blend_test(module, blend, mode, type); + func = add_blend_test(gallivm, blend, mode, type); if(LLVMVerifyModule(module, LLVMPrintMessageAction, &error)) { LLVMDumpModule(module); @@ -497,24 +496,6 @@ test_one(unsigned verbose, } LLVMDisposeMessage(error); -#if 0 - pass = LLVMCreatePassManager(); - LLVMAddTargetData(LLVMGetExecutionEngineTargetData(engine), pass); - /* These are the passes currently listed in llvm-c/Transforms/Scalar.h, - * but there are more on SVN. */ - LLVMAddConstantPropagationPass(pass); - LLVMAddInstructionCombiningPass(pass); - LLVMAddPromoteMemoryToRegisterPass(pass); - LLVMAddGVNPass(pass); - LLVMAddCFGSimplificationPass(pass); - LLVMRunPassManager(pass, module); -#else - (void)pass; -#endif - - if(verbose >= 2) - LLVMDumpModule(module); - code = LLVMGetPointerToGlobal(engine, func); blend_test_ptr = voidptr_to_blend_test_ptr_t(code); @@ -715,9 +696,6 @@ test_one(unsigned verbose, LLVMFreeMachineCodeForFunction(engine, func); - if(pass) - LLVMDisposePassManager(pass); - return success; } @@ -773,7 +751,7 @@ const unsigned num_types = sizeof(blend_types)/sizeof(blend_types[0]); boolean -test_all(unsigned verbose, FILE *fp) +test_all(struct gallivm_state *gallivm, unsigned verbose, FILE *fp) { const unsigned *rgb_func; const unsigned *rgb_src_factor; @@ -809,7 +787,7 @@ test_all(unsigned verbose, FILE *fp) blend.rt[0].alpha_dst_factor = *alpha_dst_factor; blend.rt[0].colormask = PIPE_MASK_RGBA; - if(!test_one(verbose, fp, &blend, mode, *type)) + if(!test_one(gallivm, verbose, fp, &blend, mode, *type)) success = FALSE; } @@ -826,7 +804,8 @@ test_all(unsigned verbose, FILE *fp) boolean -test_some(unsigned verbose, FILE *fp, unsigned long n) +test_some(struct gallivm_state *gallivm, unsigned verbose, FILE *fp, + unsigned long n) { const unsigned *rgb_func; const unsigned *rgb_src_factor; @@ -868,7 +847,7 @@ test_some(unsigned verbose, FILE *fp, unsigned long n) blend.rt[0].alpha_dst_factor = *alpha_dst_factor; blend.rt[0].colormask = PIPE_MASK_RGBA; - if(!test_one(verbose, fp, &blend, mode, *type)) + if(!test_one(gallivm, verbose, fp, &blend, mode, *type)) success = FALSE; } @@ -877,7 +856,7 @@ test_some(unsigned verbose, FILE *fp, unsigned long n) boolean -test_single(unsigned verbose, FILE *fp) +test_single(struct gallivm_state *gallivm, unsigned verbose, FILE *fp) { printf("no test_single()"); return TRUE; diff --git a/src/gallium/drivers/llvmpipe/lp_test_conv.c b/src/gallium/drivers/llvmpipe/lp_test_conv.c index 3ba42bf11a6..f4a2f360c75 100644 --- a/src/gallium/drivers/llvmpipe/lp_test_conv.c +++ b/src/gallium/drivers/llvmpipe/lp_test_conv.c @@ -97,64 +97,65 @@ dump_conv_types(FILE *fp, static LLVMValueRef -add_conv_test(LLVMModuleRef module, +add_conv_test(struct gallivm_state *gallivm, struct lp_type src_type, unsigned num_srcs, struct lp_type dst_type, unsigned num_dsts) { + LLVMModuleRef module = gallivm->module; + LLVMContextRef context = gallivm->context; + LLVMBuilderRef builder = gallivm->builder; LLVMTypeRef args[2]; LLVMValueRef func; LLVMValueRef src_ptr; LLVMValueRef dst_ptr; LLVMBasicBlockRef block; - LLVMBuilderRef builder; LLVMValueRef src[LP_MAX_VECTOR_LENGTH]; LLVMValueRef dst[LP_MAX_VECTOR_LENGTH]; unsigned i; - args[0] = LLVMPointerType(lp_build_vec_type(src_type), 0); - args[1] = LLVMPointerType(lp_build_vec_type(dst_type), 0); + args[0] = LLVMPointerType(lp_build_vec_type(gallivm, src_type), 0); + args[1] = LLVMPointerType(lp_build_vec_type(gallivm, dst_type), 0); - func = LLVMAddFunction(module, "test", LLVMFunctionType(LLVMVoidType(), args, 2, 0)); + func = LLVMAddFunction(module, "test", + LLVMFunctionType(LLVMVoidTypeInContext(context), + args, 2, 0)); LLVMSetFunctionCallConv(func, LLVMCCallConv); src_ptr = LLVMGetParam(func, 0); dst_ptr = LLVMGetParam(func, 1); - block = LLVMAppendBasicBlock(func, "entry"); - builder = LLVMCreateBuilder(); + block = LLVMAppendBasicBlockInContext(context, func, "entry"); LLVMPositionBuilderAtEnd(builder, block); for(i = 0; i < num_srcs; ++i) { - LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0); + LLVMValueRef index = LLVMConstInt(LLVMInt32TypeInContext(context), i, 0); LLVMValueRef ptr = LLVMBuildGEP(builder, src_ptr, &index, 1, ""); src[i] = LLVMBuildLoad(builder, ptr, ""); } - lp_build_conv(builder, src_type, dst_type, src, num_srcs, dst, num_dsts); + lp_build_conv(gallivm, src_type, dst_type, src, num_srcs, dst, num_dsts); for(i = 0; i < num_dsts; ++i) { - LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0); + LLVMValueRef index = LLVMConstInt(LLVMInt32TypeInContext(context), i, 0); LLVMValueRef ptr = LLVMBuildGEP(builder, dst_ptr, &index, 1, ""); LLVMBuildStore(builder, dst[i], ptr); } LLVMBuildRetVoid(builder);; - LLVMDisposeBuilder(builder); return func; } PIPE_ALIGN_STACK static boolean -test_one(unsigned verbose, +test_one(struct gallivm_state *gallivm, unsigned verbose, FILE *fp, struct lp_type src_type, struct lp_type dst_type) { - LLVMModuleRef module = NULL; + LLVMModuleRef module = gallivm->module; + LLVMExecutionEngineRef engine = gallivm->engine; LLVMValueRef func = NULL; - LLVMExecutionEngineRef engine = lp_build_engine; - LLVMPassManagerRef pass = NULL; char *error = NULL; conv_test_ptr_t conv_test_ptr; boolean success; @@ -193,9 +194,7 @@ test_one(unsigned verbose, eps = MAX2(lp_const_eps(src_type), lp_const_eps(dst_type)); - module = LLVMModuleCreateWithName("test"); - - func = add_conv_test(module, src_type, num_srcs, dst_type, num_dsts); + func = add_conv_test(gallivm, src_type, num_srcs, dst_type, num_dsts); if(LLVMVerifyModule(module, LLVMPrintMessageAction, &error)) { LLVMDumpModule(module); @@ -203,21 +202,6 @@ test_one(unsigned verbose, } LLVMDisposeMessage(error); -#if 0 - pass = LLVMCreatePassManager(); - LLVMAddTargetData(LLVMGetExecutionEngineTargetData(engine), pass); - /* These are the passes currently listed in llvm-c/Transforms/Scalar.h, - * but there are more on SVN. */ - LLVMAddConstantPropagationPass(pass); - LLVMAddInstructionCombiningPass(pass); - LLVMAddPromoteMemoryToRegisterPass(pass); - LLVMAddGVNPass(pass); - LLVMAddCFGSimplificationPass(pass); - LLVMRunPassManager(pass, module); -#else - (void)pass; -#endif - if(verbose >= 2) LLVMDumpModule(module); @@ -342,9 +326,6 @@ test_one(unsigned verbose, LLVMFreeMachineCodeForFunction(engine, func); - if(pass) - LLVMDisposePassManager(pass); - return success; } @@ -390,7 +371,7 @@ const unsigned num_types = sizeof(conv_types)/sizeof(conv_types[0]); boolean -test_all(unsigned verbose, FILE *fp) +test_all(struct gallivm_state *gallivm, unsigned verbose, FILE *fp) { const struct lp_type *src_type; const struct lp_type *dst_type; @@ -405,7 +386,7 @@ test_all(unsigned verbose, FILE *fp) if(src_type->norm != dst_type->norm) continue; - if(!test_one(verbose, fp, *src_type, *dst_type)) + if(!test_one(gallivm, verbose, fp, *src_type, *dst_type)) success = FALSE; } @@ -416,7 +397,8 @@ test_all(unsigned verbose, FILE *fp) boolean -test_some(unsigned verbose, FILE *fp, unsigned long n) +test_some(struct gallivm_state *gallivm, unsigned verbose, FILE *fp, + unsigned long n) { const struct lp_type *src_type; const struct lp_type *dst_type; @@ -430,7 +412,7 @@ test_some(unsigned verbose, FILE *fp, unsigned long n) dst_type = &conv_types[rand() % num_types]; } while (src_type == dst_type || src_type->norm != dst_type->norm); - if(!test_one(verbose, fp, *src_type, *dst_type)) + if(!test_one(gallivm, verbose, fp, *src_type, *dst_type)) success = FALSE; } @@ -439,7 +421,7 @@ test_some(unsigned verbose, FILE *fp, unsigned long n) boolean -test_single(unsigned verbose, FILE *fp) +test_single(struct gallivm_state *gallivm, unsigned verbose, FILE *fp) { /* float, fixed, sign, norm, width, len */ struct lp_type f32x4_type = @@ -449,7 +431,7 @@ test_single(unsigned verbose, FILE *fp) boolean success; - success = test_one(verbose, fp, f32x4_type, ub8x4_type); + success = test_one(gallivm, verbose, fp, f32x4_type, ub8x4_type); return success; } diff --git a/src/gallium/drivers/llvmpipe/lp_test_format.c b/src/gallium/drivers/llvmpipe/lp_test_format.c index 2855d7cea4f..4152ca6cf63 100644 --- a/src/gallium/drivers/llvmpipe/lp_test_format.c +++ b/src/gallium/drivers/llvmpipe/lp_test_format.c @@ -30,13 +30,6 @@ #include <stdio.h> #include <float.h> -#include "gallivm/lp_bld.h" -#include "gallivm/lp_bld_debug.h" -#include "gallivm/lp_bld_init.h" -#include <llvm-c/Analysis.h> -#include <llvm-c/Target.h> -#include <llvm-c/Transforms/Scalar.h> - #include "util/u_memory.h" #include "util/u_pointer.h" #include "util/u_string.h" @@ -44,7 +37,11 @@ #include "util/u_format_tests.h" #include "util/u_format_s3tc.h" +#include "gallivm/lp_bld.h" +#include "gallivm/lp_bld_debug.h" #include "gallivm/lp_bld_format.h" +#include "gallivm/lp_bld_init.h" + #include "lp_test.h" @@ -78,56 +75,57 @@ typedef void static LLVMValueRef -add_fetch_rgba_test(unsigned verbose, +add_fetch_rgba_test(struct gallivm_state *gallivm, unsigned verbose, const struct util_format_description *desc, struct lp_type type) { char name[256]; + LLVMContextRef context = gallivm->context; + LLVMModuleRef module = gallivm->module; + LLVMBuilderRef builder = gallivm->builder; + LLVMPassManagerRef passmgr = gallivm->passmgr; LLVMTypeRef args[4]; LLVMValueRef func; LLVMValueRef packed_ptr; - LLVMValueRef offset = LLVMConstNull(LLVMInt32Type()); + LLVMValueRef offset = LLVMConstNull(LLVMInt32TypeInContext(context)); LLVMValueRef rgba_ptr; LLVMValueRef i; LLVMValueRef j; LLVMBasicBlockRef block; - LLVMBuilderRef builder; LLVMValueRef rgba; util_snprintf(name, sizeof name, "fetch_%s_%s", desc->short_name, type.floating ? "float" : "unorm8"); - args[0] = LLVMPointerType(lp_build_vec_type(type), 0); - args[1] = LLVMPointerType(LLVMInt8Type(), 0); - args[3] = args[2] = LLVMInt32Type(); + args[0] = LLVMPointerType(lp_build_vec_type(gallivm, type), 0); + args[1] = LLVMPointerType(LLVMInt8TypeInContext(context), 0); + args[3] = args[2] = LLVMInt32TypeInContext(context); - func = LLVMAddFunction(lp_build_module, name, - LLVMFunctionType(LLVMVoidType(), args, Elements(args), 0)); + func = LLVMAddFunction(module, name, + LLVMFunctionType(LLVMVoidTypeInContext(context), + args, Elements(args), 0)); LLVMSetFunctionCallConv(func, LLVMCCallConv); rgba_ptr = LLVMGetParam(func, 0); packed_ptr = LLVMGetParam(func, 1); i = LLVMGetParam(func, 2); j = LLVMGetParam(func, 3); - block = LLVMAppendBasicBlock(func, "entry"); - builder = LLVMCreateBuilder(); + block = LLVMAppendBasicBlockInContext(context, func, "entry"); LLVMPositionBuilderAtEnd(builder, block); - rgba = lp_build_fetch_rgba_aos(builder, desc, type, + rgba = lp_build_fetch_rgba_aos(gallivm, desc, type, packed_ptr, offset, i, j); LLVMBuildStore(builder, rgba, rgba_ptr); LLVMBuildRetVoid(builder); - LLVMDisposeBuilder(builder); - if (LLVMVerifyFunction(func, LLVMPrintMessageAction)) { LLVMDumpValue(func); abort(); } - LLVMRunFunctionPassManager(lp_build_pass, func); + LLVMRunFunctionPassManager(passmgr, func); if (verbose >= 1) { LLVMDumpValue(func); @@ -139,10 +137,11 @@ add_fetch_rgba_test(unsigned verbose, PIPE_ALIGN_STACK static boolean -test_format_float(unsigned verbose, FILE *fp, +test_format_float(struct gallivm_state *gallivm, unsigned verbose, FILE *fp, const struct util_format_description *desc) { LLVMValueRef fetch = NULL; + LLVMExecutionEngineRef engine = gallivm->engine; fetch_ptr_t fetch_ptr; PIPE_ALIGN_VAR(16) float unpacked[4]; boolean first = TRUE; @@ -150,9 +149,9 @@ test_format_float(unsigned verbose, FILE *fp, unsigned i, j, k, l; void *f; - fetch = add_fetch_rgba_test(verbose, desc, lp_float32_vec4_type()); + fetch = add_fetch_rgba_test(gallivm, verbose, desc, lp_float32_vec4_type()); - f = LLVMGetPointerToGlobal(lp_build_engine, fetch); + f = LLVMGetPointerToGlobal(engine, fetch); fetch_ptr = (fetch_ptr_t) pointer_to_func(f); if (verbose >= 2) { @@ -208,7 +207,7 @@ test_format_float(unsigned verbose, FILE *fp, } } - LLVMFreeMachineCodeForFunction(lp_build_engine, fetch); + LLVMFreeMachineCodeForFunction(engine, fetch); LLVMDeleteFunction(fetch); if(fp) @@ -220,7 +219,8 @@ test_format_float(unsigned verbose, FILE *fp, PIPE_ALIGN_STACK static boolean -test_format_unorm8(unsigned verbose, FILE *fp, +test_format_unorm8(struct gallivm_state *gallivm, + unsigned verbose, FILE *fp, const struct util_format_description *desc) { LLVMValueRef fetch = NULL; @@ -231,9 +231,9 @@ test_format_unorm8(unsigned verbose, FILE *fp, unsigned i, j, k, l; void *f; - fetch = add_fetch_rgba_test(verbose, desc, lp_unorm8_vec4_type()); + fetch = add_fetch_rgba_test(gallivm, verbose, desc, lp_unorm8_vec4_type()); - f = LLVMGetPointerToGlobal(lp_build_engine, fetch); + f = LLVMGetPointerToGlobal(gallivm->engine, fetch); fetch_ptr = (fetch_ptr_t) pointer_to_func(f); if (verbose >= 2) { @@ -290,7 +290,7 @@ test_format_unorm8(unsigned verbose, FILE *fp, if (!success) LLVMDumpValue(fetch); - LLVMFreeMachineCodeForFunction(lp_build_engine, fetch); + LLVMFreeMachineCodeForFunction(gallivm->engine, fetch); LLVMDeleteFunction(fetch); if(fp) @@ -303,16 +303,17 @@ test_format_unorm8(unsigned verbose, FILE *fp, static boolean -test_one(unsigned verbose, FILE *fp, +test_one(struct gallivm_state *gallivm, + unsigned verbose, FILE *fp, const struct util_format_description *format_desc) { boolean success = TRUE; - if (!test_format_float(verbose, fp, format_desc)) { + if (!test_format_float(gallivm, verbose, fp, format_desc)) { success = FALSE; } - if (!test_format_unorm8(verbose, fp, format_desc)) { + if (!test_format_unorm8(gallivm, verbose, fp, format_desc)) { success = FALSE; } @@ -321,7 +322,7 @@ test_one(unsigned verbose, FILE *fp, boolean -test_all(unsigned verbose, FILE *fp) +test_all(struct gallivm_state *gallivm, unsigned verbose, FILE *fp) { enum pipe_format format; boolean success = TRUE; @@ -349,7 +350,7 @@ test_all(unsigned verbose, FILE *fp) continue; } - if (!test_one(verbose, fp, format_desc)) { + if (!test_one(gallivm, verbose, fp, format_desc)) { success = FALSE; } } @@ -359,14 +360,15 @@ test_all(unsigned verbose, FILE *fp) boolean -test_some(unsigned verbose, FILE *fp, unsigned long n) +test_some(struct gallivm_state *gallivm, unsigned verbose, FILE *fp, + unsigned long n) { - return test_all(verbose, fp); + return test_all(gallivm, verbose, fp); } boolean -test_single(unsigned verbose, FILE *fp) +test_single(struct gallivm_state *gallivm, unsigned verbose, FILE *fp) { printf("no test_single()"); return TRUE; diff --git a/src/gallium/drivers/llvmpipe/lp_test_main.c b/src/gallium/drivers/llvmpipe/lp_test_main.c index 7a0d06ae2c8..149ee6f1256 100644 --- a/src/gallium/drivers/llvmpipe/lp_test_main.c +++ b/src/gallium/drivers/llvmpipe/lp_test_main.c @@ -380,6 +380,7 @@ int main(int argc, char **argv) unsigned i; boolean success; boolean single = FALSE; + struct gallivm_state *gallivm; for(i = 1; i < argc; ++i) { if(strcmp(argv[i], "-v") == 0) @@ -394,21 +395,23 @@ int main(int argc, char **argv) lp_build_init(); + gallivm = gallivm_create(); + util_cpu_detect(); if(fp) { /* Warm up the caches */ - test_some(0, NULL, 100); + test_some(gallivm, 0, NULL, 100); write_tsv_header(fp); } if (single) - success = test_single(verbose, fp); + success = test_single(gallivm, verbose, fp); else if (n) - success = test_some(verbose, fp, n); + success = test_some(gallivm, verbose, fp, n); else - success = test_all(verbose, fp); + success = test_all(gallivm, verbose, fp); if(fp) fclose(fp); diff --git a/src/gallium/drivers/llvmpipe/lp_test_printf.c b/src/gallium/drivers/llvmpipe/lp_test_printf.c index 4653f30e39d..620cdb57c13 100644 --- a/src/gallium/drivers/llvmpipe/lp_test_printf.c +++ b/src/gallium/drivers/llvmpipe/lp_test_printf.c @@ -35,11 +35,6 @@ #include "gallivm/lp_bld_assert.h" #include "gallivm/lp_bld_printf.h" -#include <llvm-c/Analysis.h> -#include <llvm-c/ExecutionEngine.h> -#include <llvm-c/Target.h> -#include <llvm-c/Transforms/Scalar.h> - #include "lp_test.h" @@ -63,48 +58,45 @@ typedef void (*test_printf_t)(int i); static LLVMValueRef -add_printf_test(LLVMModuleRef module) +add_printf_test(struct gallivm_state *gallivm) { - LLVMTypeRef args[1] = { LLVMIntType(32) }; - LLVMValueRef func = LLVMAddFunction(module, "test_printf", LLVMFunctionType(LLVMVoidType(), args, 1, 0)); - LLVMBuilderRef builder = LLVMCreateBuilder(); - LLVMBasicBlockRef block = LLVMAppendBasicBlock(func, "entry"); + LLVMModuleRef module = gallivm->module; + LLVMTypeRef args[1] = { LLVMIntTypeInContext(gallivm->context, 32) }; + LLVMValueRef func = LLVMAddFunction(module, "test_printf", LLVMFunctionType(LLVMVoidTypeInContext(gallivm->context), args, 1, 0)); + LLVMBuilderRef builder = gallivm->builder; + LLVMBasicBlockRef block = LLVMAppendBasicBlockInContext(gallivm->context, func, "entry"); LLVMSetFunctionCallConv(func, LLVMCCallConv); LLVMPositionBuilderAtEnd(builder, block); - lp_build_printf(builder, "hello, world\n"); - lp_build_printf(builder, "print 5 6: %d %d\n", LLVMConstInt(LLVMInt32Type(), 5, 0), - LLVMConstInt(LLVMInt32Type(), 6, 0)); + lp_build_printf(gallivm, "hello, world\n"); + lp_build_printf(gallivm, "print 5 6: %d %d\n", LLVMConstInt(LLVMInt32TypeInContext(gallivm->context), 5, 0), + LLVMConstInt(LLVMInt32TypeInContext(gallivm->context), 6, 0)); /* Also test lp_build_assert(). This should not fail. */ - lp_build_assert(builder, LLVMConstInt(LLVMInt32Type(), 1, 0), "assert(1)"); + lp_build_assert(gallivm, LLVMConstInt(LLVMInt32TypeInContext(gallivm->context), 1, 0), "assert(1)"); LLVMBuildRetVoid(builder); - LLVMDisposeBuilder(builder); + return func; } PIPE_ALIGN_STACK static boolean -test_printf(unsigned verbose, FILE *fp, const struct printf_test_case *testcase) +test_printf(struct gallivm_state *gallivm, + unsigned verbose, FILE *fp, + const struct printf_test_case *testcase) { - LLVMModuleRef module = NULL; - LLVMValueRef test = NULL; - LLVMExecutionEngineRef engine = NULL; - LLVMModuleProviderRef provider = NULL; - LLVMPassManagerRef pass = NULL; + LLVMExecutionEngineRef engine = gallivm->engine; + LLVMModuleRef module = gallivm->module; + LLVMValueRef test; char *error = NULL; - test_printf_t test_printf; - float unpacked[4]; - unsigned packed; + test_printf_t test_printf_func; boolean success = TRUE; void *code; - module = LLVMModuleCreateWithName("test"); - - test = add_printf_test(module); + test = add_printf_test(gallivm); if(LLVMVerifyModule(module, LLVMPrintMessageAction, &error)) { LLVMDumpModule(module); @@ -112,74 +104,40 @@ test_printf(unsigned verbose, FILE *fp, const struct printf_test_case *testcase) } LLVMDisposeMessage(error); - provider = LLVMCreateModuleProviderForExistingModule(module); -#if 0 - if (LLVMCreateJITCompiler(&engine, provider, 1, &error)) { - fprintf(stderr, "%s\n", error); - LLVMDisposeMessage(error); - abort(); - } -#else - (void) provider; - engine = lp_build_engine; -#endif - -#if 0 - pass = LLVMCreatePassManager(); - LLVMAddTargetData(LLVMGetExecutionEngineTargetData(engine), pass); - /* These are the passes currently listed in llvm-c/Transforms/Scalar.h, - * but there are more on SVN. */ - LLVMAddConstantPropagationPass(pass); - LLVMAddInstructionCombiningPass(pass); - LLVMAddPromoteMemoryToRegisterPass(pass); - LLVMAddGVNPass(pass); - LLVMAddCFGSimplificationPass(pass); - LLVMRunPassManager(pass, module); -#else - (void)pass; -#endif - code = LLVMGetPointerToGlobal(engine, test); - test_printf = (test_printf_t)pointer_to_func(code); - - memset(unpacked, 0, sizeof unpacked); - packed = 0; - + test_printf_func = (test_printf_t) pointer_to_func(code); // LLVMDumpModule(module); - test_printf(0); + test_printf_func(0); LLVMFreeMachineCodeForFunction(engine, test); - LLVMDisposeExecutionEngine(engine); - if(pass) - LLVMDisposePassManager(pass); - return success; } boolean -test_all(unsigned verbose, FILE *fp) +test_all(struct gallivm_state *gallivm, unsigned verbose, FILE *fp) { boolean success = TRUE; - test_printf(verbose, fp, NULL); + test_printf(gallivm, verbose, fp, NULL); return success; } boolean -test_some(unsigned verbose, FILE *fp, unsigned long n) +test_some(struct gallivm_state *gallivm, unsigned verbose, FILE *fp, + unsigned long n) { - return test_all(verbose, fp); + return test_all(gallivm, verbose, fp); } boolean -test_single(unsigned verbose, FILE *fp) +test_single(struct gallivm_state *gallivm, unsigned verbose, FILE *fp) { printf("no test_single()"); return TRUE; diff --git a/src/gallium/drivers/llvmpipe/lp_test_round.c b/src/gallium/drivers/llvmpipe/lp_test_round.c index 816518e5081..4edee4af123 100644 --- a/src/gallium/drivers/llvmpipe/lp_test_round.c +++ b/src/gallium/drivers/llvmpipe/lp_test_round.c @@ -34,11 +34,6 @@ #include "gallivm/lp_bld_init.h" #include "gallivm/lp_bld_arit.h" -#include <llvm-c/Analysis.h> -#include <llvm-c/ExecutionEngine.h> -#include <llvm-c/Target.h> -#include <llvm-c/Transforms/Scalar.h> - #include "lp_test.h" @@ -64,18 +59,21 @@ typedef LLVMValueRef (*lp_func_t)(struct lp_build_context *, LLVMValueRef); static LLVMValueRef -add_test(LLVMModuleRef module, const char *name, lp_func_t lp_func) +add_test(struct gallivm_state *gallivm, const char *name, lp_func_t lp_func) { - LLVMTypeRef v4sf = LLVMVectorType(LLVMFloatType(), 4); + LLVMModuleRef module = gallivm->module; + LLVMContextRef context = gallivm->context; + LLVMBuilderRef builder = gallivm->builder; + + LLVMTypeRef v4sf = LLVMVectorType(LLVMFloatTypeInContext(context), 4); LLVMTypeRef args[1] = { v4sf }; LLVMValueRef func = LLVMAddFunction(module, name, LLVMFunctionType(v4sf, args, 1, 0)); LLVMValueRef arg1 = LLVMGetParam(func, 0); - LLVMBuilderRef builder = LLVMCreateBuilder(); - LLVMBasicBlockRef block = LLVMAppendBasicBlock(func, "entry"); + LLVMBasicBlockRef block = LLVMAppendBasicBlockInContext(context, func, "entry"); LLVMValueRef ret; struct lp_build_context bld; - lp_build_context_init(&bld, builder, lp_float32_vec4_type()); + lp_build_context_init(&bld, gallivm, lp_float32_vec4_type()); LLVMSetFunctionCallConv(func, LLVMCCallConv); @@ -84,7 +82,7 @@ add_test(LLVMModuleRef module, const char *name, lp_func_t lp_func) ret = lp_func(&bld, arg1); LLVMBuildRet(builder, ret); - LLVMDisposeBuilder(builder); + return func; } @@ -117,12 +115,11 @@ compare(v4sf x, v4sf y) PIPE_ALIGN_STACK static boolean -test_round(unsigned verbose, FILE *fp) +test_round(struct gallivm_state *gallivm, unsigned verbose, FILE *fp) { - LLVMModuleRef module = NULL; + LLVMModuleRef module = gallivm->module; LLVMValueRef test_round = NULL, test_trunc, test_floor, test_ceil; - LLVMExecutionEngineRef engine = lp_build_engine; - LLVMPassManagerRef pass = NULL; + LLVMExecutionEngineRef engine = gallivm->engine; char *error = NULL; test_round_t round_func, trunc_func, floor_func, ceil_func; float unpacked[4]; @@ -130,12 +127,10 @@ test_round(unsigned verbose, FILE *fp) boolean success = TRUE; int i; - module = LLVMModuleCreateWithName("test"); - - test_round = add_test(module, "round", lp_build_round); - test_trunc = add_test(module, "trunc", lp_build_trunc); - test_floor = add_test(module, "floor", lp_build_floor); - test_ceil = add_test(module, "ceil", lp_build_ceil); + test_round = add_test(gallivm, "round", lp_build_round); + test_trunc = add_test(gallivm, "trunc", lp_build_trunc); + test_floor = add_test(gallivm, "floor", lp_build_floor); + test_ceil = add_test(gallivm, "ceil", lp_build_ceil); if(LLVMVerifyModule(module, LLVMPrintMessageAction, &error)) { printf("LLVMVerifyModule: %s\n", error); @@ -144,21 +139,6 @@ test_round(unsigned verbose, FILE *fp) } LLVMDisposeMessage(error); -#if 0 - pass = LLVMCreatePassManager(); - LLVMAddTargetData(LLVMGetExecutionEngineTargetData(engine), pass); - /* These are the passes currently listed in llvm-c/Transforms/Scalar.h, - * but there are more on SVN. */ - LLVMAddConstantPropagationPass(pass); - LLVMAddInstructionCombiningPass(pass); - LLVMAddPromoteMemoryToRegisterPass(pass); - LLVMAddGVNPass(pass); - LLVMAddCFGSimplificationPass(pass); - LLVMRunPassManager(pass, module); -#else - (void)pass; -#endif - round_func = (test_round_t) pointer_to_func(LLVMGetPointerToGlobal(engine, test_round)); trunc_func = (test_round_t) pointer_to_func(LLVMGetPointerToGlobal(engine, test_trunc)); floor_func = (test_round_t) pointer_to_func(LLVMGetPointerToGlobal(engine, test_floor)); @@ -229,17 +209,13 @@ test_round(unsigned verbose, FILE *fp) LLVMFreeMachineCodeForFunction(engine, test_floor); LLVMFreeMachineCodeForFunction(engine, test_ceil); - LLVMDisposeExecutionEngine(engine); - if(pass) - LLVMDisposePassManager(pass); - return success; } #else /* !PIPE_ARCH_SSE */ static boolean -test_round(unsigned verbose, FILE *fp) +test_round(struct gallivm_state *gallivm, unsigned verbose, FILE *fp) { return TRUE; } @@ -248,20 +224,21 @@ test_round(unsigned verbose, FILE *fp) boolean -test_all(unsigned verbose, FILE *fp) +test_all(struct gallivm_state *gallivm, unsigned verbose, FILE *fp) { - return test_round(verbose, fp); + return test_round(gallivm, verbose, fp); } boolean -test_some(unsigned verbose, FILE *fp, unsigned long n) +test_some(struct gallivm_state *gallivm, unsigned verbose, FILE *fp, + unsigned long n) { - return test_all(verbose, fp); + return test_all(gallivm, verbose, fp); } boolean -test_single(unsigned verbose, FILE *fp) +test_single(struct gallivm_state *gallivm, unsigned verbose, FILE *fp) { printf("no test_single()"); return TRUE; diff --git a/src/gallium/drivers/llvmpipe/lp_test_sincos.c b/src/gallium/drivers/llvmpipe/lp_test_sincos.c index 79939b1a393..066d633d443 100644 --- a/src/gallium/drivers/llvmpipe/lp_test_sincos.c +++ b/src/gallium/drivers/llvmpipe/lp_test_sincos.c @@ -29,15 +29,11 @@ #include <stdlib.h> #include <stdio.h> +#include "util/u_pointer.h" + #include "gallivm/lp_bld.h" #include "gallivm/lp_bld_init.h" #include "gallivm/lp_bld_arit.h" -#include "util/u_pointer.h" - -#include <llvm-c/Analysis.h> -#include <llvm-c/ExecutionEngine.h> -#include <llvm-c/Target.h> -#include <llvm-c/Transforms/Scalar.h> #include "lp_test.h" @@ -61,25 +57,25 @@ write_tsv_header(FILE *fp) typedef __m128 (*test_sincos_t)(__m128); static LLVMValueRef -add_sincos_test(LLVMModuleRef module, boolean sin) +add_sincos_test(struct gallivm_state *gallivm, LLVMModuleRef module, + LLVMContextRef context, boolean sin) { - LLVMTypeRef v4sf = LLVMVectorType(LLVMFloatType(), 4); + LLVMTypeRef v4sf = LLVMVectorType(LLVMFloatTypeInContext(context), 4); LLVMTypeRef args[1] = { v4sf }; LLVMValueRef func = LLVMAddFunction(module, "sincos", LLVMFunctionType(v4sf, args, 1, 0)); LLVMValueRef arg1 = LLVMGetParam(func, 0); - LLVMBuilderRef builder = LLVMCreateBuilder(); - LLVMBasicBlockRef block = LLVMAppendBasicBlock(func, "entry"); + LLVMBuilderRef builder = gallivm->builder; + LLVMBasicBlockRef block = LLVMAppendBasicBlockInContext(context, func, "entry"); LLVMValueRef ret; struct lp_build_context bld; - lp_build_context_init(&bld, builder, lp_float32_vec4_type()); + lp_build_context_init(&bld, gallivm, lp_float32_vec4_type()); LLVMSetFunctionCallConv(func, LLVMCCallConv); LLVMPositionBuilderAtEnd(builder, block); ret = sin ? lp_build_sin(&bld, arg1) : lp_build_cos(&bld, arg1); LLVMBuildRet(builder, ret); - LLVMDisposeBuilder(builder); return func; } @@ -95,22 +91,20 @@ printv(char* string, v4sf value) PIPE_ALIGN_STACK static boolean -test_sincos(unsigned verbose, FILE *fp) +test_sincos(struct gallivm_state *gallivm, unsigned verbose, FILE *fp) { - LLVMModuleRef module = NULL; + LLVMModuleRef module = gallivm->module; LLVMValueRef test_sin = NULL, test_cos = NULL; - LLVMExecutionEngineRef engine = lp_build_engine; - LLVMPassManagerRef pass = NULL; + LLVMExecutionEngineRef engine = gallivm->engine; + LLVMContextRef context = gallivm->context; char *error = NULL; test_sincos_t sin_func; test_sincos_t cos_func; float unpacked[4]; boolean success = TRUE; - module = LLVMModuleCreateWithName("test"); - - test_sin = add_sincos_test(module, TRUE); - test_cos = add_sincos_test(module, FALSE); + test_sin = add_sincos_test(gallivm, module, context, TRUE); + test_cos = add_sincos_test(gallivm, module, context,FALSE); if(LLVMVerifyModule(module, LLVMPrintMessageAction, &error)) { printf("LLVMVerifyModule: %s\n", error); @@ -119,21 +113,6 @@ test_sincos(unsigned verbose, FILE *fp) } LLVMDisposeMessage(error); -#if 0 - pass = LLVMCreatePassManager(); - LLVMAddTargetData(LLVMGetExecutionEngineTargetData(engine), pass); - /* These are the passes currently listed in llvm-c/Transforms/Scalar.h, - * but there are more on SVN. */ - LLVMAddConstantPropagationPass(pass); - LLVMAddInstructionCombiningPass(pass); - LLVMAddPromoteMemoryToRegisterPass(pass); - LLVMAddGVNPass(pass); - LLVMAddCFGSimplificationPass(pass); - LLVMRunPassManager(pass, module); -#else - (void)pass; -#endif - sin_func = (test_sincos_t) pointer_to_func(LLVMGetPointerToGlobal(engine, test_sin)); cos_func = (test_sincos_t) pointer_to_func(LLVMGetPointerToGlobal(engine, test_cos)); @@ -152,16 +131,13 @@ test_sincos(unsigned verbose, FILE *fp) LLVMFreeMachineCodeForFunction(engine, test_sin); LLVMFreeMachineCodeForFunction(engine, test_cos); - if(pass) - LLVMDisposePassManager(pass); - return success; } #else /* !PIPE_ARCH_SSE */ static boolean -test_sincos(unsigned verbose, FILE *fp) +test_sincos(struct gallivm_state *gallivm, unsigned verbose, FILE *fp) { return TRUE; } @@ -170,24 +146,25 @@ test_sincos(unsigned verbose, FILE *fp) boolean -test_all(unsigned verbose, FILE *fp) +test_all(struct gallivm_state *gallivm, unsigned verbose, FILE *fp) { boolean success = TRUE; - test_sincos(verbose, fp); + test_sincos(gallivm, verbose, fp); return success; } boolean -test_some(unsigned verbose, FILE *fp, unsigned long n) +test_some(struct gallivm_state *gallivm, unsigned verbose, FILE *fp, + unsigned long n) { - return test_all(verbose, fp); + return test_all(gallivm, verbose, fp); } boolean -test_single(unsigned verbose, FILE *fp) +test_single(struct gallivm_state *gallivm, unsigned verbose, FILE *fp) { printf("no test_single()"); return TRUE; diff --git a/src/gallium/drivers/llvmpipe/lp_tex_sample.c b/src/gallium/drivers/llvmpipe/lp_tex_sample.c index f417fc8a9ea..ed4282937f8 100644 --- a/src/gallium/drivers/llvmpipe/lp_tex_sample.c +++ b/src/gallium/drivers/llvmpipe/lp_tex_sample.c @@ -43,6 +43,7 @@ #include "pipe/p_defines.h" #include "pipe/p_shader_tokens.h" #include "gallivm/lp_bld_debug.h" +#include "gallivm/lp_bld_const.h" #include "gallivm/lp_bld_type.h" #include "gallivm/lp_bld_sample.h" #include "gallivm/lp_bld_tgsi.h" @@ -89,7 +90,7 @@ struct lp_llvm_sampler_soa */ static LLVMValueRef lp_llvm_texture_member(const struct lp_sampler_dynamic_state *base, - LLVMBuilderRef builder, + struct gallivm_state *gallivm, unsigned unit, unsigned member_index, const char *member_name, @@ -97,6 +98,7 @@ lp_llvm_texture_member(const struct lp_sampler_dynamic_state *base, { struct llvmpipe_sampler_dynamic_state *state = (struct llvmpipe_sampler_dynamic_state *)base; + LLVMBuilderRef builder = gallivm->builder; LLVMValueRef indices[4]; LLVMValueRef ptr; LLVMValueRef res; @@ -104,13 +106,13 @@ lp_llvm_texture_member(const struct lp_sampler_dynamic_state *base, assert(unit < PIPE_MAX_SAMPLERS); /* context[0] */ - indices[0] = LLVMConstInt(LLVMInt32Type(), 0, 0); + indices[0] = lp_build_const_int32(gallivm, 0); /* context[0].textures */ - indices[1] = LLVMConstInt(LLVMInt32Type(), LP_JIT_CTX_TEXTURES, 0); + indices[1] = lp_build_const_int32(gallivm, LP_JIT_CTX_TEXTURES); /* context[0].textures[unit] */ - indices[2] = LLVMConstInt(LLVMInt32Type(), unit, 0); + indices[2] = lp_build_const_int32(gallivm, unit); /* context[0].textures[unit].member */ - indices[3] = LLVMConstInt(LLVMInt32Type(), member_index, 0); + indices[3] = lp_build_const_int32(gallivm, member_index); ptr = LLVMBuildGEP(builder, state->context_ptr, indices, Elements(indices), ""); @@ -137,10 +139,10 @@ lp_llvm_texture_member(const struct lp_sampler_dynamic_state *base, #define LP_LLVM_TEXTURE_MEMBER(_name, _index, _emit_load) \ static LLVMValueRef \ lp_llvm_texture_##_name( const struct lp_sampler_dynamic_state *base, \ - LLVMBuilderRef builder, \ + struct gallivm_state *gallivm, \ unsigned unit) \ { \ - return lp_llvm_texture_member(base, builder, unit, _index, #_name, _emit_load ); \ + return lp_llvm_texture_member(base, gallivm, unit, _index, #_name, _emit_load ); \ } @@ -170,7 +172,7 @@ lp_llvm_sampler_soa_destroy(struct lp_build_sampler_soa *sampler) */ static void lp_llvm_sampler_soa_emit_fetch_texel(const struct lp_build_sampler_soa *base, - LLVMBuilderRef builder, + struct gallivm_state *gallivm, struct lp_type type, unsigned unit, unsigned num_coords, @@ -186,11 +188,11 @@ lp_llvm_sampler_soa_emit_fetch_texel(const struct lp_build_sampler_soa *base, assert(unit < PIPE_MAX_SAMPLERS); if (LP_PERF & PERF_NO_TEX) { - lp_build_sample_nop(type, texel); + lp_build_sample_nop(gallivm, type, texel); return; } - lp_build_sample_soa(builder, + lp_build_sample_soa(gallivm, &sampler->dynamic_state.static_state[unit], &sampler->dynamic_state.base, type, diff --git a/src/gallium/drivers/llvmpipe/lp_texture.c b/src/gallium/drivers/llvmpipe/lp_texture.c index a4b9f2590af..9753da5e57e 100644 --- a/src/gallium/drivers/llvmpipe/lp_texture.c +++ b/src/gallium/drivers/llvmpipe/lp_texture.c @@ -49,6 +49,7 @@ #include "lp_tile_image.h" #include "lp_texture.h" #include "lp_setup.h" +#include "lp_state.h" #include "state_tracker/sw_winsys.h" @@ -242,6 +243,7 @@ llvmpipe_resource_create(struct pipe_screen *_screen, /* other data (vertex buffer, const buffer, etc) */ const enum pipe_format format = templat->format; const uint w = templat->width0 / util_format_get_blockheight(format); + /* XXX buffers should only have one dimension, those values should be 1 */ const uint h = templat->height0 / util_format_get_blockwidth(format); const uint d = templat->depth0; const uint bpp = util_format_get_blocksize(format); @@ -329,17 +331,16 @@ llvmpipe_resource_destroy(struct pipe_screen *pscreen, */ void * llvmpipe_resource_map(struct pipe_resource *resource, - unsigned face, - unsigned level, - unsigned zslice, + unsigned level, + unsigned layer, enum lp_texture_usage tex_usage, enum lp_texture_layout layout) { struct llvmpipe_resource *lpr = llvmpipe_resource(resource); uint8_t *map; - assert(face < 6); assert(level < LP_MAX_TEXTURE_LEVELS); + assert(layer < (u_minify(resource->depth0, level) + resource->array_size - 1)); assert(tex_usage == LP_TEX_USAGE_READ || tex_usage == LP_TEX_USAGE_READ_WRITE || @@ -363,9 +364,8 @@ llvmpipe_resource_map(struct pipe_resource *resource, dt_usage = PIPE_TRANSFER_READ_WRITE; } - assert(face == 0); assert(level == 0); - assert(zslice == 0); + assert(layer == 0); /* FIXME: keep map count? */ map = winsys->displaytarget_map(winsys, lpr->dt, dt_usage); @@ -381,15 +381,8 @@ llvmpipe_resource_map(struct pipe_resource *resource, return map2; } else if (resource_is_texture(resource)) { - /* regular texture */ - if (resource->target != PIPE_TEXTURE_CUBE) { - assert(face == 0); - } - if (resource->target != PIPE_TEXTURE_3D) { - assert(zslice == 0); - } - map = llvmpipe_get_texture_image(lpr, face + zslice, level, + map = llvmpipe_get_texture_image(lpr, layer, level, tex_usage, layout); return map; } @@ -404,9 +397,8 @@ llvmpipe_resource_map(struct pipe_resource *resource, */ void llvmpipe_resource_unmap(struct pipe_resource *resource, - unsigned face, unsigned level, - unsigned zslice) + unsigned layer) { struct llvmpipe_resource *lpr = llvmpipe_resource(resource); @@ -415,12 +407,11 @@ llvmpipe_resource_unmap(struct pipe_resource *resource, struct llvmpipe_screen *lp_screen = llvmpipe_screen(resource->screen); struct sw_winsys *winsys = lp_screen->winsys; - assert(face == 0); assert(level == 0); - assert(zslice == 0); + assert(layer == 0); /* make sure linear image is up to date */ - (void) llvmpipe_get_texture_image(lpr, face + zslice, level, + (void) llvmpipe_get_texture_image(lpr, layer, level, LP_TEX_USAGE_READ, LP_TEX_LAYOUT_LINEAR); @@ -520,34 +511,35 @@ llvmpipe_resource_get_handle(struct pipe_screen *screen, static struct pipe_surface * -llvmpipe_get_tex_surface(struct pipe_screen *screen, - struct pipe_resource *pt, - unsigned face, unsigned level, unsigned zslice, - unsigned usage) +llvmpipe_create_surface(struct pipe_context *pipe, + struct pipe_resource *pt, + const struct pipe_surface *surf_tmpl) { struct pipe_surface *ps; - assert(level <= pt->last_level); + assert(surf_tmpl->u.tex.level <= pt->last_level); ps = CALLOC_STRUCT(pipe_surface); if (ps) { pipe_reference_init(&ps->reference, 1); pipe_resource_reference(&ps->texture, pt); - ps->format = pt->format; - ps->width = u_minify(pt->width0, level); - ps->height = u_minify(pt->height0, level); - ps->usage = usage; - - ps->face = face; - ps->level = level; - ps->zslice = zslice; + ps->context = pipe; + ps->format = surf_tmpl->format; + ps->width = u_minify(pt->width0, surf_tmpl->u.tex.level); + ps->height = u_minify(pt->height0, surf_tmpl->u.tex.level); + ps->usage = surf_tmpl->usage; + + ps->u.tex.level = surf_tmpl->u.tex.level; + ps->u.tex.first_layer = surf_tmpl->u.tex.first_layer; + ps->u.tex.last_layer = surf_tmpl->u.tex.last_layer; } return ps; } static void -llvmpipe_tex_surface_destroy(struct pipe_surface *surf) +llvmpipe_surface_destroy(struct pipe_context *pipe, + struct pipe_surface *surf) { /* Effectively do the texture_update work here - if texture images * needed post-processing to put them into hardware layout, this is @@ -561,16 +553,17 @@ llvmpipe_tex_surface_destroy(struct pipe_surface *surf) static struct pipe_transfer * llvmpipe_get_transfer(struct pipe_context *pipe, - struct pipe_resource *resource, - struct pipe_subresource sr, - unsigned usage, - const struct pipe_box *box) + struct pipe_resource *resource, + unsigned level, + unsigned usage, + const struct pipe_box *box) { + struct llvmpipe_context *llvmpipe = llvmpipe_context(pipe); struct llvmpipe_resource *lprex = llvmpipe_resource(resource); struct llvmpipe_transfer *lpr; assert(resource); - assert(sr.level <= resource->last_level); + assert(level <= resource->last_level); /* * Transfers, like other pipe operations, must happen in order, so flush the @@ -580,7 +573,8 @@ llvmpipe_get_transfer(struct pipe_context *pipe, boolean read_only = !(usage & PIPE_TRANSFER_WRITE); boolean do_not_block = !!(usage & PIPE_TRANSFER_DONTBLOCK); if (!llvmpipe_flush_resource(pipe, resource, - sr.face, sr.level, + level, + box->depth > 1 ? -1 : box->z, 0, /* flush_flags */ read_only, TRUE, /* cpu_access */ @@ -594,14 +588,17 @@ llvmpipe_get_transfer(struct pipe_context *pipe, } } + if (resource == llvmpipe->constants[PIPE_SHADER_FRAGMENT][0]) + llvmpipe->dirty |= LP_NEW_CONSTANTS; + lpr = CALLOC_STRUCT(llvmpipe_transfer); if (lpr) { struct pipe_transfer *pt = &lpr->base; pipe_resource_reference(&pt->resource, resource); pt->box = *box; - pt->sr = sr; - pt->stride = lprex->row_stride[sr.level]; - pt->slice_stride = lprex->img_stride[sr.level]; + pt->level = level; + pt->stride = lprex->row_stride[level]; + pt->layer_stride = lprex->img_stride[level]; pt->usage = usage; return pt; @@ -635,8 +632,7 @@ llvmpipe_transfer_map( struct pipe_context *pipe, enum lp_texture_usage tex_usage; const char *mode; - assert(transfer->sr.face < 6); - assert(transfer->sr.level < LP_MAX_TEXTURE_LEVELS); + assert(transfer->level < LP_MAX_TEXTURE_LEVELS); /* printf("tex_transfer_map(%d, %d %d x %d of %d x %d, usage %d )\n", @@ -666,9 +662,8 @@ llvmpipe_transfer_map( struct pipe_context *pipe, format = lpr->base.format; map = llvmpipe_resource_map(transfer->resource, - transfer->sr.face, - transfer->sr.level, - transfer->box.z, + transfer->level, + transfer->box.z, tex_usage, LP_TEX_LAYOUT_LINEAR); @@ -680,7 +675,7 @@ llvmpipe_transfer_map( struct pipe_context *pipe, */ screen->timestamp++; } - + map += transfer->box.y / util_format_get_blockheight(format) * transfer->stride + transfer->box.x / util_format_get_blockwidth(format) * util_format_get_blocksize(format); @@ -696,21 +691,20 @@ llvmpipe_transfer_unmap(struct pipe_context *pipe, assert(transfer->resource); llvmpipe_resource_unmap(transfer->resource, - transfer->sr.face, - transfer->sr.level, - transfer->box.z); + transfer->level, + transfer->box.z); } static unsigned int llvmpipe_is_resource_referenced( struct pipe_context *pipe, - struct pipe_resource *presource, - unsigned face, unsigned level) + struct pipe_resource *presource, + unsigned level, int layer) { struct llvmpipe_context *llvmpipe = llvmpipe_context( pipe ); if (presource->target == PIPE_BUFFER) return PIPE_UNREFERENCED; - + return lp_setup_is_resource_referenced(llvmpipe->setup, presource); } @@ -740,6 +734,7 @@ llvmpipe_user_buffer_create(struct pipe_screen *screen, buffer->base.width0 = bytes; buffer->base.height0 = 1; buffer->base.depth0 = 1; + buffer->base.array_size = 1; buffer->userBuffer = TRUE; buffer->data = ptr; @@ -1396,8 +1391,6 @@ llvmpipe_init_screen_resource_funcs(struct pipe_screen *screen) screen->resource_get_handle = llvmpipe_resource_get_handle; screen->user_buffer_create = llvmpipe_user_buffer_create; - screen->get_tex_surface = llvmpipe_get_tex_surface; - screen->tex_surface_destroy = llvmpipe_tex_surface_destroy; } @@ -1412,4 +1405,7 @@ llvmpipe_init_context_resource_funcs(struct pipe_context *pipe) pipe->transfer_flush_region = u_default_transfer_flush_region; pipe->transfer_inline_write = u_default_transfer_inline_write; + + pipe->create_surface = llvmpipe_create_surface; + pipe->surface_destroy = llvmpipe_surface_destroy; } diff --git a/src/gallium/drivers/llvmpipe/lp_texture.h b/src/gallium/drivers/llvmpipe/lp_texture.h index 4e4a65dcb40..b789c0f4090 100644 --- a/src/gallium/drivers/llvmpipe/lp_texture.h +++ b/src/gallium/drivers/llvmpipe/lp_texture.h @@ -172,17 +172,15 @@ llvmpipe_resource_stride(struct pipe_resource *resource, void * llvmpipe_resource_map(struct pipe_resource *resource, - unsigned face_slice, - unsigned level, - unsigned zslice, + unsigned level, + unsigned layer, enum lp_texture_usage tex_usage, enum lp_texture_layout layout); void llvmpipe_resource_unmap(struct pipe_resource *resource, - unsigned face_slice, unsigned level, - unsigned zslice); + unsigned layer); void * diff --git a/src/gallium/drivers/noop/noop_pipe.c b/src/gallium/drivers/noop/noop_pipe.c index fb5cdb46093..3680f4622da 100644 --- a/src/gallium/drivers/noop/noop_pipe.c +++ b/src/gallium/drivers/noop/noop_pipe.c @@ -30,10 +30,16 @@ #include <util/u_inlines.h> #include <util/u_format.h> #include "noop_public.h" -#include "state_tracker/sw_winsys.h" + +DEBUG_GET_ONCE_BOOL_OPTION(noop, "GALLIUM_NOOP", FALSE) void noop_init_state_functions(struct pipe_context *ctx); +struct noop_pipe_screen { + struct pipe_screen pscreen; + struct pipe_screen *oscreen; +}; + /* * query */ @@ -83,7 +89,7 @@ struct noop_resource { static unsigned noop_is_resource_referenced(struct pipe_context *pipe, struct pipe_resource *resource, - unsigned face, unsigned level) + unsigned level, int layer) { return PIPE_UNREFERENCED; } @@ -108,52 +114,29 @@ static struct pipe_resource *noop_resource_create(struct pipe_screen *screen, FREE(nresource); return NULL; } -#if 0 - if (nresource->base.bind & (PIPE_BIND_DISPLAY_TARGET | - PIPE_BIND_SCANOUT | - PIPE_BIND_SHARED)) { - struct sw_winsys *winsys = (struct sw_winsys *)screen->winsys; - unsigned stride; - - nresource->dt = winsys->displaytarget_create(winsys, nresource->base.bind, - nresource->base.format, - nresource->base.width0, - nresource->base.height0, - 16, &stride); - } -#endif return &nresource->base; } -static struct pipe_resource *noop_resource_from_handle(struct pipe_screen * screen, +static struct pipe_resource *noop_resource_from_handle(struct pipe_screen *screen, const struct pipe_resource *templ, - struct winsys_handle *whandle) + struct winsys_handle *handle) { - struct sw_winsys *winsys = (struct sw_winsys *)screen->winsys; - struct noop_resource *nresource; - struct sw_displaytarget *dt; - unsigned stride; + struct noop_pipe_screen *noop_screen = (struct noop_pipe_screen*)screen; + struct pipe_screen *oscreen = noop_screen->oscreen; + struct pipe_resource *result; + struct pipe_resource *noop_resource; - dt = winsys->displaytarget_from_handle(winsys, templ, whandle, &stride); - if (dt == NULL) { - return NULL; - } - nresource = (struct noop_resource *)noop_resource_create(screen, templ); - nresource->dt = dt; - return &nresource->base; + result = oscreen->resource_from_handle(oscreen, templ, handle); + noop_resource = noop_resource_create(screen, result); + pipe_resource_reference(&result, NULL); + return noop_resource; } static boolean noop_resource_get_handle(struct pipe_screen *screen, struct pipe_resource *resource, struct winsys_handle *handle) { - struct sw_winsys *winsys = (struct sw_winsys *)screen->winsys; - struct noop_resource *nresource = (struct noop_resource *)resource; - - if (nresource->dt == NULL) - return FALSE; - - return winsys->displaytarget_get_handle(winsys, nresource->dt, handle); + return FALSE; } static void noop_resource_destroy(struct pipe_screen *screen, @@ -161,11 +144,6 @@ static void noop_resource_destroy(struct pipe_screen *screen, { struct noop_resource *nresource = (struct noop_resource *)resource; - if (nresource->dt) { - /* display target */ - struct sw_winsys *winsys = (struct sw_winsys *)screen->winsys; - winsys->displaytarget_destroy(winsys, nresource->dt); - } free(nresource->data); FREE(resource); } @@ -193,7 +171,7 @@ static struct pipe_resource *noop_user_buffer_create(struct pipe_screen *screen, */ static struct pipe_transfer *noop_get_transfer(struct pipe_context *context, struct pipe_resource *resource, - struct pipe_subresource sr, + unsigned level, enum pipe_transfer_usage usage, const struct pipe_box *box) { @@ -203,11 +181,11 @@ static struct pipe_transfer *noop_get_transfer(struct pipe_context *context, if (transfer == NULL) return NULL; pipe_resource_reference(&transfer->resource, resource); - transfer->sr = sr; + transfer->level = level; transfer->usage = usage; transfer->box = *box; transfer->stride = 1; - transfer->slice_stride = 1; + transfer->layer_stride = 1; return transfer; } @@ -239,12 +217,12 @@ static void noop_transfer_destroy(struct pipe_context *pipe, static void noop_transfer_inline_write(struct pipe_context *pipe, struct pipe_resource *resource, - struct pipe_subresource sr, + unsigned level, unsigned usage, const struct pipe_box *box, const void *data, unsigned stride, - unsigned slice_stride) + unsigned layer_stride) { } @@ -277,12 +255,11 @@ static void noop_clear_depth_stencil(struct pipe_context *ctx, static void noop_resource_copy_region(struct pipe_context *ctx, struct pipe_resource *dst, - struct pipe_subresource subdst, + unsigned dst_level, unsigned dstx, unsigned dsty, unsigned dstz, struct pipe_resource *src, - struct pipe_subresource subsrc, - unsigned srcx, unsigned srcy, unsigned srcz, - unsigned width, unsigned height) + unsigned src_level, + const struct pipe_box *src_box) { } @@ -332,46 +309,14 @@ static struct pipe_context *noop_create_context(struct pipe_screen *screen, void return ctx; } -/* - * texture - */ -static struct pipe_surface *noop_get_tex_surface(struct pipe_screen *screen, - struct pipe_resource *texture, - unsigned face, unsigned level, - unsigned zslice, unsigned flags) -{ - struct pipe_surface *surface = CALLOC_STRUCT(pipe_surface); - - if (surface == NULL) - return NULL; - pipe_reference_init(&surface->reference, 1); - pipe_resource_reference(&surface->texture, texture); - surface->format = texture->format; - surface->width = texture->width0; - surface->height = texture->height0; - surface->offset = 0; - surface->usage = flags; - surface->zslice = zslice; - surface->texture = texture; - surface->face = face; - surface->level = level; - - return surface; -} - -static void noop_tex_surface_destroy(struct pipe_surface *surface) -{ - pipe_resource_reference(&surface->texture, NULL); - FREE(surface); -} - /* * pipe_screen */ static void noop_flush_frontbuffer(struct pipe_screen *_screen, - struct pipe_surface *surface, - void *context_private) + struct pipe_resource *resource, + unsigned level, unsigned layer, + void *context_private) { } @@ -516,19 +461,30 @@ static boolean noop_is_format_supported(struct pipe_screen* screen, static void noop_destroy_screen(struct pipe_screen *screen) { + struct noop_pipe_screen *noop_screen = (struct noop_pipe_screen*)screen; + struct pipe_screen *oscreen = noop_screen->oscreen; + + oscreen->destroy(oscreen); FREE(screen); } -struct pipe_screen *noop_screen_create(struct sw_winsys *winsys) +struct pipe_screen *noop_screen_create(struct pipe_screen *oscreen) { + struct noop_pipe_screen *noop_screen; struct pipe_screen *screen; - screen = CALLOC_STRUCT(pipe_screen); - if (screen == NULL) { + if (!debug_get_option_noop()) { + return oscreen; + } + + noop_screen = CALLOC_STRUCT(noop_pipe_screen); + if (noop_screen == NULL) { return NULL; } + noop_screen->oscreen = oscreen; + screen = &noop_screen->pscreen; - screen->winsys = (struct pipe_winsys*)winsys; + screen->winsys = oscreen->winsys; screen->destroy = noop_destroy_screen; screen->get_name = noop_get_name; screen->get_vendor = noop_get_vendor; @@ -537,8 +493,6 @@ struct pipe_screen *noop_screen_create(struct sw_winsys *winsys) screen->get_paramf = noop_get_paramf; screen->is_format_supported = noop_is_format_supported; screen->context_create = noop_create_context; - screen->get_tex_surface = noop_get_tex_surface; - screen->tex_surface_destroy = noop_tex_surface_destroy; screen->resource_create = noop_resource_create; screen->resource_from_handle = noop_resource_from_handle; screen->resource_get_handle = noop_resource_get_handle; diff --git a/src/gallium/drivers/noop/noop_public.h b/src/gallium/drivers/noop/noop_public.h index 8ce82bec698..180ea597fab 100644 --- a/src/gallium/drivers/noop/noop_public.h +++ b/src/gallium/drivers/noop/noop_public.h @@ -23,8 +23,7 @@ #ifndef NOOP_PUBLIC_H #define NOOP_PUBLIC_H -struct sw_winsys; - -struct pipe_screen *noop_screen_create(struct sw_winsys *winsys); +struct pipe_screen; +struct pipe_screen *noop_screen_create(struct pipe_screen *screen); #endif diff --git a/src/gallium/drivers/noop/noop_state.c b/src/gallium/drivers/noop/noop_state.c index 048ed42a9b6..ad324774c03 100644 --- a/src/gallium/drivers/noop/noop_state.c +++ b/src/gallium/drivers/noop/noop_state.c @@ -101,6 +101,28 @@ static struct pipe_sampler_view *noop_create_sampler_view(struct pipe_context *c return sampler_view; } +static struct pipe_surface *noop_create_surface(struct pipe_context *ctx, + struct pipe_resource *texture, + const struct pipe_surface *surf_tmpl) +{ + struct pipe_surface *surface = CALLOC_STRUCT(pipe_surface); + + if (surface == NULL) + return NULL; + pipe_reference_init(&surface->reference, 1); + pipe_resource_reference(&surface->texture, texture); + surface->context = ctx; + surface->format = surf_tmpl->format; + surface->width = texture->width0; + surface->height = texture->height0; + surface->usage = surf_tmpl->usage; + surface->texture = texture; + surface->u.tex.first_layer = surf_tmpl->u.tex.first_layer; + surface->u.tex.last_layer = surf_tmpl->u.tex.last_layer; + surface->u.tex.level = surf_tmpl->u.tex.level; + + return surface; +} static void noop_set_vs_sampler_view(struct pipe_context *ctx, unsigned count, struct pipe_sampler_view **views) { @@ -163,6 +185,14 @@ static void noop_sampler_view_destroy(struct pipe_context *ctx, FREE(state); } + +static void noop_surface_destroy(struct pipe_context *ctx, + struct pipe_surface *surface) +{ + pipe_resource_reference(&surface->texture, NULL); + FREE(surface); +} + static void noop_bind_state(struct pipe_context *ctx, void *state) { } @@ -213,6 +243,8 @@ static void *noop_create_shader_state(struct pipe_context *ctx, return nstate; } +void noop_init_state_functions(struct pipe_context *ctx); + void noop_init_state_functions(struct pipe_context *ctx) { ctx->create_blend_state = noop_create_blend_state; @@ -221,6 +253,7 @@ void noop_init_state_functions(struct pipe_context *ctx) ctx->create_rasterizer_state = noop_create_rs_state; ctx->create_sampler_state = noop_create_sampler_state; ctx->create_sampler_view = noop_create_sampler_view; + ctx->create_surface = noop_create_surface; ctx->create_vertex_elements_state = noop_create_vertex_elements; ctx->create_vs_state = noop_create_shader_state; ctx->bind_blend_state = noop_bind_state; @@ -252,5 +285,6 @@ void noop_init_state_functions(struct pipe_context *ctx) ctx->set_vertex_sampler_views = noop_set_vs_sampler_view; ctx->set_viewport_state = noop_set_viewport_state; ctx->sampler_view_destroy = noop_sampler_view_destroy; + ctx->surface_destroy = noop_surface_destroy; ctx->draw_vbo = noop_draw_vbo; } diff --git a/src/gallium/drivers/nouveau/nouveau_screen.h b/src/gallium/drivers/nouveau/nouveau_screen.h index 8c290273fb4..1f4e5171c01 100644 --- a/src/gallium/drivers/nouveau/nouveau_screen.h +++ b/src/gallium/drivers/nouveau/nouveau_screen.h @@ -66,7 +66,7 @@ void nouveau_screen_fini(struct nouveau_screen *); - +#ifndef NOUVEAU_NVC0 static INLINE unsigned RING_3D(unsigned mthd, unsigned size) { @@ -78,5 +78,6 @@ RING_3D_NI(unsigned mthd, unsigned size) { return 0x40000000 | (7 << 13) | (size << 18) | mthd; } +#endif #endif diff --git a/src/gallium/drivers/nouveau/nouveau_winsys.h b/src/gallium/drivers/nouveau/nouveau_winsys.h index ab480cabd09..8dfb84a596f 100644 --- a/src/gallium/drivers/nouveau/nouveau_winsys.h +++ b/src/gallium/drivers/nouveau/nouveau_winsys.h @@ -10,7 +10,9 @@ #include "nouveau/nouveau_grobj.h" #include "nouveau/nouveau_notifier.h" #include "nouveau/nouveau_resource.h" -#include "nouveau/nouveau_pushbuf.h" +#ifndef NOUVEAU_NVC0 +#include "nouveau/nv04_pushbuf.h" +#endif #ifndef NV04_PFIFO_MAX_PACKET_LEN #define NV04_PFIFO_MAX_PACKET_LEN 2047 @@ -41,4 +43,7 @@ nvfx_screen_create(struct pipe_winsys *ws, struct nouveau_device *); extern struct pipe_screen * nv50_screen_create(struct pipe_winsys *ws, struct nouveau_device *); +extern struct pipe_screen * +nvc0_screen_create(struct pipe_winsys *ws, struct nouveau_device *); + #endif diff --git a/src/gallium/drivers/nouveau/nv_object.xml.h b/src/gallium/drivers/nouveau/nv_object.xml.h index cb7653c3fe2..a5b0d0478c8 100644 --- a/src/gallium/drivers/nouveau/nv_object.xml.h +++ b/src/gallium/drivers/nouveau/nv_object.xml.h @@ -8,12 +8,10 @@ http://0x04.net/cgit/index.cgi/rules-ng-ng git clone git://0x04.net/rules-ng-ng The rules-ng-ng source files this header was generated from are: -- nv30-40_3d.xml ( 31709 bytes, from 2010-09-05 07:53:14) -- copyright.xml ( 6503 bytes, from 2010-04-10 23:15:50) -- nv_3ddefs.xml ( 15193 bytes, from 2010-09-05 07:50:15) -- nv_defs.xml ( 4437 bytes, from 2010-08-05 19:38:53) -- nv_object.xml ( 10424 bytes, from 2010-08-05 19:38:53) -- nvchipsets.xml ( 2824 bytes, from 2010-08-05 19:38:53) +- nv_object.xml ( 11547 bytes, from 2010-10-24 15:29:34) +- copyright.xml ( 6498 bytes, from 2010-10-03 13:18:37) +- nvchipsets.xml ( 2907 bytes, from 2010-10-15 16:28:21) +- nv_defs.xml ( 4437 bytes, from 2010-07-06 07:43:58) Copyright (C) 2006-2010 by the following authors: - Artur Huillet <[email protected]> (ahuillet) @@ -37,7 +35,7 @@ Copyright (C) 2006-2010 by the following authors: - Mark Carey <[email protected]> (careym) - Matthieu Castet <[email protected]> (mat-c) - nvidiaman <[email protected]> (nvidiaman) -- Patrice Mandin <[email protected]> (pmandin, pmdata) +- Patrice Mandin <[email protected]> (pmandin, pmdata) - Pekka Paalanen <[email protected]> (pq, ppaalanen) - Peter Popov <[email protected]> (ironpeter) - Richard Hughes <[email protected]> (hughsient) @@ -180,6 +178,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #define NV50_COMPUTE 0x000050c0 #define NVA3_COMPUTE 0x000085c0 #define NVC0_COMPUTE 0x000090c0 +#define NV84_CRYPT 0x000074c1 #define NV01_SUBCHAN__SIZE 0x00002000 #define NV01_SUBCHAN 0x00000000 @@ -194,9 +193,9 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #define NV84_SUBCHAN_QUERY_GET 0x0000001c -#define NV84_SUBCHAN_UNK20 0x00000020 +#define NV84_SUBCHAN_QUERY_INTR 0x00000020 -#define NV84_SUBCHAN_UNK24 0x00000024 +#define NV84_SUBCHAN_WRCACHE_FLUSH 0x00000024 #define NV10_SUBCHAN_REF_CNT 0x00000050 @@ -209,7 +208,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #define NV11_SUBCHAN_SEMAPHORE_RELEASE 0x0000006c -#define NV50_SUBCHAN_UNK80 0x00000080 +#define NV40_SUBCHAN_YIELD 0x00000080 #define NV01_GRAPH 0x00000000 @@ -227,5 +226,43 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #define NV40_GRAPH_PM_TRIGGER 0x00000140 +#define NVC0_SUBCHAN__SIZE 0x00008000 +#define NVC0_SUBCHAN 0x00000000 + +#define NVC0_SUBCHAN_OBJECT 0x00000000 + + +#define NVC0_SUBCHAN_QUERY_ADDRESS_HIGH 0x00000010 + +#define NVC0_SUBCHAN_QUERY_ADDRESS_LOW 0x00000014 + +#define NVC0_SUBCHAN_QUERY_SEQUENCE 0x00000018 + +#define NVC0_SUBCHAN_QUERY_GET 0x0000001c + +#define NVC0_SUBCHAN_REF_CNT 0x00000050 + +#define NVC0_GRAPH 0x00000000 + +#define NVC0_GRAPH_NOP 0x00000100 + +#define NVC0_GRAPH_NOTIFY_ADDRESS_HIGH 0x00000104 + +#define NVC0_GRAPH_NOTIFY_ADDRESS_LOW 0x00000108 + +#define NVC0_GRAPH_NOTIFY 0x0000010c +#define NVC0_GRAPH_NOTIFY_WRITE 0x00000000 +#define NVC0_GRAPH_NOTIFY_WRITE_AND_AWAKEN 0x00000001 + +#define NVC0_GRAPH_SERIALIZE 0x00000110 + +#define NVC0_GRAPH_MACRO_UPLOAD_POS 0x00000114 + +#define NVC0_GRAPH_MACRO_UPLOAD_DATA 0x00000118 + +#define NVC0_GRAPH_MACRO_ID 0x0000011c + +#define NVC0_GRAPH_MACRO_POS 0x00000120 + #endif /* NV_OBJECT_XML */ diff --git a/src/gallium/drivers/nv50/nv50_buffer.c b/src/gallium/drivers/nv50/nv50_buffer.c index dacfee9799c..45356f9f637 100644 --- a/src/gallium/drivers/nv50/nv50_buffer.c +++ b/src/gallium/drivers/nv50/nv50_buffer.c @@ -136,6 +136,7 @@ nv50_user_buffer_create(struct pipe_screen *pscreen, buffer->base.width0 = bytes; buffer->base.height0 = 1; buffer->base.depth0 = 1; + buffer->base.array_size = 1; buffer->bo = nouveau_screen_bo_user(pscreen, ptr, bytes); if (!buffer->bo) diff --git a/src/gallium/drivers/nv50/nv50_context.c b/src/gallium/drivers/nv50/nv50_context.c index 0874cb5e4ea..4f976161760 100644 --- a/src/gallium/drivers/nv50/nv50_context.c +++ b/src/gallium/drivers/nv50/nv50_context.c @@ -49,6 +49,10 @@ nv50_destroy(struct pipe_context *pipe) struct nv50_context *nv50 = nv50_context(pipe); int i; + for (i = 0; i < nv50->vtxbuf_nr; i++) { + pipe_resource_reference(&nv50->vtxbuf[i].buffer, NULL); + } + for (i = 0; i < 64; i++) { if (!nv50->state.hw[i]) continue; diff --git a/src/gallium/drivers/nv50/nv50_context.h b/src/gallium/drivers/nv50/nv50_context.h index bf6a577188b..b2b0b72fe26 100644 --- a/src/gallium/drivers/nv50/nv50_context.h +++ b/src/gallium/drivers/nv50/nv50_context.h @@ -108,6 +108,7 @@ get_tile_depth(uint32_t tile_mode) struct nv50_surface { struct pipe_surface base; + unsigned offset; }; static INLINE struct nv50_surface * diff --git a/src/gallium/drivers/nv50/nv50_miptree.c b/src/gallium/drivers/nv50/nv50_miptree.c index dd0e8fd41b1..309b6503ca5 100644 --- a/src/gallium/drivers/nv50/nv50_miptree.c +++ b/src/gallium/drivers/nv50/nv50_miptree.c @@ -276,46 +276,53 @@ nv50_miptree_from_handle(struct pipe_screen *pscreen, */ struct pipe_surface * -nv50_miptree_surface_new(struct pipe_screen *pscreen, struct pipe_resource *pt, - unsigned face, unsigned level, unsigned zslice, - unsigned flags) +nv50_miptree_surface_new(struct pipe_context *pipe, struct pipe_resource *pt, + const struct pipe_surface *surf_tmpl) { + unsigned level = surf_tmpl->u.tex.level; struct nv50_miptree *mt = nv50_miptree(pt); struct nv50_miptree_level *lvl = &mt->level[level]; - struct pipe_surface *ps; - unsigned img = 0; + struct nv50_surface *ns; + unsigned img = 0, zslice = 0; + assert(surf_tmpl->u.tex.first_layer == surf_tmpl->u.tex.last_layer); + + /* XXX can't unify these here? */ if (pt->target == PIPE_TEXTURE_CUBE) - img = face; + img = surf_tmpl->u.tex.first_layer; + else if (pt->target == PIPE_TEXTURE_3D) + zslice = surf_tmpl->u.tex.first_layer; - ps = CALLOC_STRUCT(pipe_surface); - if (!ps) + ns = CALLOC_STRUCT(nv50_surface); + if (!ns) return NULL; - pipe_resource_reference(&ps->texture, pt); - ps->format = pt->format; - ps->width = u_minify(pt->width0, level); - ps->height = u_minify(pt->height0, level); - ps->usage = flags; - pipe_reference_init(&ps->reference, 1); - ps->face = face; - ps->level = level; - ps->zslice = zslice; - ps->offset = lvl->image_offset[img]; + pipe_resource_reference(&ns->base.texture, pt); + ns->base.context = pipe; + ns->base.format = pt->format; + ns->base.width = u_minify(pt->width0, level); + ns->base.height = u_minify(pt->height0, level); + ns->base.usage = surf_tmpl->usage; + pipe_reference_init(&ns->base.reference, 1); + ns->base.u.tex.level = level; + ns->base.u.tex.first_layer = surf_tmpl->u.tex.first_layer; + ns->base.u.tex.last_layer = surf_tmpl->u.tex.last_layer; + ns->offset = lvl->image_offset[img]; if (pt->target == PIPE_TEXTURE_3D) { - unsigned nb_h = util_format_get_nblocksy(pt->format, ps->height); - ps->offset += get_zslice_offset(lvl->tile_mode, zslice, + unsigned nb_h = util_format_get_nblocksy(pt->format, ns->base.height); + ns->offset += get_zslice_offset(lvl->tile_mode, zslice, lvl->pitch, nb_h); } - return ps; + return &ns->base; } void -nv50_miptree_surface_del(struct pipe_surface *ps) +nv50_miptree_surface_del(struct pipe_context *pipe, + struct pipe_surface *ps) { struct nv50_surface *s = nv50_surface(ps); - pipe_resource_reference(&ps->texture, NULL); + pipe_resource_reference(&s->base.texture, NULL); FREE(s); } diff --git a/src/gallium/drivers/nv50/nv50_resource.c b/src/gallium/drivers/nv50/nv50_resource.c index cfdb60418b5..6c0a9696355 100644 --- a/src/gallium/drivers/nv50/nv50_resource.c +++ b/src/gallium/drivers/nv50/nv50_resource.c @@ -15,7 +15,7 @@ static unsigned int nv50_resource_is_referenced(struct pipe_context *pipe, struct pipe_resource *resource, - unsigned face, unsigned level) + unsigned level, int layer) { return nouveau_reference_flags(nv50_resource(resource)->bo); } @@ -51,6 +51,9 @@ nv50_init_resource_functions(struct pipe_context *pcontext) pcontext->transfer_destroy = u_transfer_destroy_vtbl; pcontext->transfer_inline_write = u_transfer_inline_write_vtbl; pcontext->is_resource_referenced = nv50_resource_is_referenced; + + pcontext->create_surface = nv50_miptree_surface_new; + pcontext->surface_destroy = nv50_miptree_surface_del; } void @@ -61,7 +64,4 @@ nv50_screen_init_resource_functions(struct pipe_screen *pscreen) pscreen->resource_get_handle = u_resource_get_handle_vtbl; pscreen->resource_destroy = u_resource_destroy_vtbl; pscreen->user_buffer_create = nv50_user_buffer_create; - - pscreen->get_tex_surface = nv50_miptree_surface_new; - pscreen->tex_surface_destroy = nv50_miptree_surface_del; } diff --git a/src/gallium/drivers/nv50/nv50_resource.h b/src/gallium/drivers/nv50/nv50_resource.h index f435a5892e5..4b2a75e11ad 100644 --- a/src/gallium/drivers/nv50/nv50_resource.h +++ b/src/gallium/drivers/nv50/nv50_resource.h @@ -87,12 +87,11 @@ nv50_user_buffer_create(struct pipe_screen *screen, struct pipe_surface * -nv50_miptree_surface_new(struct pipe_screen *pscreen, struct pipe_resource *pt, - unsigned face, unsigned level, unsigned zslice, - unsigned flags); +nv50_miptree_surface_new(struct pipe_context *pipe, struct pipe_resource *pt, + const struct pipe_surface *surf_tmpl); void -nv50_miptree_surface_del(struct pipe_surface *ps); +nv50_miptree_surface_del(struct pipe_context *pipe, struct pipe_surface *ps); #endif diff --git a/src/gallium/drivers/nv50/nv50_screen.c b/src/gallium/drivers/nv50/nv50_screen.c index 49522b74d5b..edc3d54d012 100644 --- a/src/gallium/drivers/nv50/nv50_screen.c +++ b/src/gallium/drivers/nv50/nv50_screen.c @@ -127,6 +127,10 @@ nv50_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param) return 0; case PIPE_CAP_DEPTH_CLAMP: return 1; + case PIPE_CAP_SHADER_STENCIL_EXPORT: + return 0; + case PIPE_CAP_PRIMITIVE_RESTART: + return 0; default: NOUVEAU_ERR("Unknown PIPE_CAP %d\n", param); return 0; diff --git a/src/gallium/drivers/nv50/nv50_shader_state.c b/src/gallium/drivers/nv50/nv50_shader_state.c index 306aa81d985..1c1b66deb3c 100644 --- a/src/gallium/drivers/nv50/nv50_shader_state.c +++ b/src/gallium/drivers/nv50/nv50_shader_state.c @@ -71,7 +71,7 @@ nv50_transfer_constbuf(struct nv50_context *nv50, map += nr; } - pipe_buffer_unmap(pipe, buf, transfer); + pipe_buffer_unmap(pipe, transfer); } static void diff --git a/src/gallium/drivers/nv50/nv50_state.c b/src/gallium/drivers/nv50/nv50_state.c index f42fa2d4d2b..b4eda0f617d 100644 --- a/src/gallium/drivers/nv50/nv50_state.c +++ b/src/gallium/drivers/nv50/nv50_state.c @@ -721,17 +721,16 @@ nv50_set_constant_buffer(struct pipe_context *pipe, uint shader, uint index, struct nv50_context *nv50 = nv50_context(pipe); if (shader == PIPE_SHADER_VERTEX) { - nv50->constbuf[PIPE_SHADER_VERTEX] = buf; nv50->dirty |= NV50_NEW_VERTPROG_CB; } else if (shader == PIPE_SHADER_FRAGMENT) { - nv50->constbuf[PIPE_SHADER_FRAGMENT] = buf; nv50->dirty |= NV50_NEW_FRAGPROG_CB; - } else - if (shader == PIPE_SHADER_GEOMETRY) { - nv50->constbuf[PIPE_SHADER_GEOMETRY] = buf; + } else { + assert(shader == PIPE_SHADER_GEOMETRY); nv50->dirty |= NV50_NEW_GEOMPROG_CB; } + + pipe_resource_reference(&nv50->constbuf[shader], buf); } static void @@ -780,8 +779,9 @@ nv50_set_vertex_buffers(struct pipe_context *pipe, unsigned count, { struct nv50_context *nv50 = nv50_context(pipe); - memcpy(nv50->vtxbuf, vb, sizeof(*vb) * count); - nv50->vtxbuf_nr = count; + util_copy_vertex_buffers(nv50->vtxbuf, + &nv50->vtxbuf_nr, + vb, count); nv50->dirty |= NV50_NEW_ARRAYS; } diff --git a/src/gallium/drivers/nv50/nv50_state_validate.c b/src/gallium/drivers/nv50/nv50_state_validate.c index 16c2dab9af6..ae02143e352 100644 --- a/src/gallium/drivers/nv50/nv50_state_validate.c +++ b/src/gallium/drivers/nv50/nv50_state_validate.c @@ -63,13 +63,13 @@ validate_fb(struct nv50_context *nv50) so_data (so, fb->cbufs[i]->height); so_method(so, tesla, NV50TCL_RT_ADDRESS_HIGH(i), 5); - so_reloc (so, bo, fb->cbufs[i]->offset, NOUVEAU_BO_VRAM | + so_reloc (so, bo, ((struct nv50_surface *)fb->cbufs[i])->offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_HIGH | NOUVEAU_BO_RDWR, 0, 0); - so_reloc (so, bo, fb->cbufs[i]->offset, NOUVEAU_BO_VRAM | + so_reloc (so, bo, ((struct nv50_surface *)fb->cbufs[i])->offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_LOW | NOUVEAU_BO_RDWR, 0, 0); so_data (so, nv50_format_table[fb->cbufs[i]->format].rt); so_data (so, nv50_miptree(pt)-> - level[fb->cbufs[i]->level].tile_mode << 4); + level[fb->cbufs[i]->u.tex.level].tile_mode << 4); so_data(so, 0x00000000); so_method(so, tesla, NV50TCL_RT_ARRAY_MODE, 1); @@ -92,13 +92,13 @@ validate_fb(struct nv50_context *nv50) assert(nv50_format_table[fb->zsbuf->format].rt); so_method(so, tesla, NV50TCL_ZETA_ADDRESS_HIGH, 5); - so_reloc (so, bo, fb->zsbuf->offset, NOUVEAU_BO_VRAM | + so_reloc (so, bo, ((struct nv50_surface *)(fb->zsbuf))->offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_HIGH | NOUVEAU_BO_RDWR, 0, 0); - so_reloc (so, bo, fb->zsbuf->offset, NOUVEAU_BO_VRAM | + so_reloc (so, bo, ((struct nv50_surface *)(fb->zsbuf))->offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_LOW | NOUVEAU_BO_RDWR, 0, 0); so_data (so, nv50_format_table[fb->zsbuf->format].rt); so_data (so, nv50_miptree(pt)-> - level[fb->zsbuf->level].tile_mode << 4); + level[fb->zsbuf->u.tex.level].tile_mode << 4); so_data (so, 0x00000000); so_method(so, tesla, NV50TCL_ZETA_ENABLE, 1); diff --git a/src/gallium/drivers/nv50/nv50_surface.c b/src/gallium/drivers/nv50/nv50_surface.c index f70c138fe1a..a99df76cee3 100644 --- a/src/gallium/drivers/nv50/nv50_surface.c +++ b/src/gallium/drivers/nv50/nv50_surface.c @@ -22,7 +22,7 @@ #define __NOUVEAU_PUSH_H__ #include <stdint.h> -#include "nouveau/nouveau_pushbuf.h" +#include "nouveau/nv04_pushbuf.h" #include "nv50_context.h" #include "nv50_resource.h" #include "pipe/p_defines.h" @@ -97,23 +97,23 @@ nv50_surface_set(struct nv50_screen *screen, struct pipe_surface *ps, int dst) OUT_RING (chan, format); OUT_RING (chan, 1); BEGIN_RING(chan, eng2d, mthd + 0x14, 5); - OUT_RING (chan, mt->level[ps->level].pitch); + OUT_RING (chan, mt->level[ps->u.tex.level].pitch); OUT_RING (chan, ps->width); OUT_RING (chan, ps->height); - OUT_RELOCh(chan, bo, ps->offset, flags); - OUT_RELOCl(chan, bo, ps->offset, flags); + OUT_RELOCh(chan, bo, ((struct nv50_surface *)ps)->offset, flags); + OUT_RELOCl(chan, bo, ((struct nv50_surface *)ps)->offset, flags); } else { BEGIN_RING(chan, eng2d, mthd, 5); OUT_RING (chan, format); OUT_RING (chan, 0); - OUT_RING (chan, mt->level[ps->level].tile_mode << 4); + OUT_RING (chan, mt->level[ps->u.tex.level].tile_mode << 4); OUT_RING (chan, 1); OUT_RING (chan, 0); BEGIN_RING(chan, eng2d, mthd + 0x18, 4); OUT_RING (chan, ps->width); OUT_RING (chan, ps->height); - OUT_RELOCh(chan, bo, ps->offset, flags); - OUT_RELOCl(chan, bo, ps->offset, flags); + OUT_RELOCh(chan, bo, ((struct nv50_surface *)ps)->offset, flags); + OUT_RELOCl(chan, bo, ((struct nv50_surface *)ps)->offset, flags); } #if 0 @@ -173,30 +173,41 @@ nv50_surface_do_copy(struct nv50_screen *screen, struct pipe_surface *dst, static void nv50_surface_copy(struct pipe_context *pipe, - struct pipe_resource *dest, struct pipe_subresource subdst, + struct pipe_resource *dest, unsigned dst_level, unsigned destx, unsigned desty, unsigned destz, - struct pipe_resource *src, struct pipe_subresource subsrc, - unsigned srcx, unsigned srcy, unsigned srcz, - unsigned width, unsigned height) + struct pipe_resource *src, unsigned src_level, + const struct pipe_box *src_box) { struct nv50_context *nv50 = nv50_context(pipe); struct nv50_screen *screen = nv50->screen; - struct pipe_surface *ps_dst, *ps_src; + struct pipe_surface *ps_dst, *ps_src, surf_tmpl; + assert((src->format == dest->format) || (nv50_2d_format_faithful(src->format) && nv50_2d_format_faithful(dest->format))); - - ps_src = nv50_miptree_surface_new(pipe->screen, src, subsrc.face, - subsrc.level, srcz, 0 /* bind flags */); - ps_dst = nv50_miptree_surface_new(pipe->screen, dest, subdst.face, - subdst.level, destz, 0 /* bindflags */); - - nv50_surface_do_copy(screen, ps_dst, destx, desty, ps_src, srcx, - srcy, width, height); - - nv50_miptree_surface_del(ps_src); - nv50_miptree_surface_del(ps_dst); + assert(src_box->depth == 1); + + memset(&surf_tmpl, 0, sizeof(surf_tmpl)); + surf_tmpl.format = src->format; + surf_tmpl.usage = 0; /* no bind flag - not a surface */ + surf_tmpl.u.tex.level = src_level; + surf_tmpl.u.tex.first_layer = src_box->z; + surf_tmpl.u.tex.last_layer = src_box->z; + /* XXX really need surfaces here? */ + ps_src = nv50_miptree_surface_new(pipe, src, &surf_tmpl); + surf_tmpl.format = dest->format; + surf_tmpl.usage = 0; /* no bind flag - not a surface */ + surf_tmpl.u.tex.level = dst_level; + surf_tmpl.u.tex.first_layer = destz; + surf_tmpl.u.tex.last_layer = destz; + ps_dst = nv50_miptree_surface_new(pipe, dest, &surf_tmpl); + + nv50_surface_do_copy(screen, ps_dst, destx, desty, ps_src, src_box->x, + src_box->y, src_box->width, src_box->height); + + nv50_miptree_surface_del(pipe, ps_src); + nv50_miptree_surface_del(pipe, ps_dst); } static void @@ -225,10 +236,10 @@ nv50_clear_render_target(struct pipe_context *pipe, BEGIN_RING(chan, tesla, NV50TCL_RT_CONTROL, 1); OUT_RING (chan, 1); BEGIN_RING(chan, tesla, NV50TCL_RT_ADDRESS_HIGH(0), 5); - OUT_RELOCh(chan, bo, dst->offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); - OUT_RELOCl(chan, bo, dst->offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); + OUT_RELOCh(chan, bo, ((struct nv50_surface *)dst)->offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); + OUT_RELOCl(chan, bo, ((struct nv50_surface *)dst)->offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); OUT_RING (chan, nv50_format_table[dst->format].rt); - OUT_RING (chan, mt->level[dst->level].tile_mode << 4); + OUT_RING (chan, mt->level[dst->u.tex.level].tile_mode << 4); OUT_RING (chan, 0); BEGIN_RING(chan, tesla, NV50TCL_RT_HORIZ(0), 2); OUT_RING (chan, dst->width); @@ -281,10 +292,10 @@ nv50_clear_depth_stencil(struct pipe_context *pipe, return; BEGIN_RING(chan, tesla, NV50TCL_ZETA_ADDRESS_HIGH, 5); - OUT_RELOCh(chan, bo, dst->offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); - OUT_RELOCl(chan, bo, dst->offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); + OUT_RELOCh(chan, bo, ((struct nv50_surface *)dst)->offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); + OUT_RELOCl(chan, bo, ((struct nv50_surface *)dst)->offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); OUT_RING (chan, nv50_format_table[dst->format].rt); - OUT_RING (chan, mt->level[dst->level].tile_mode << 4); + OUT_RING (chan, mt->level[dst->u.tex.level].tile_mode << 4); OUT_RING (chan, 0); BEGIN_RING(chan, tesla, NV50TCL_ZETA_ENABLE, 1); OUT_RING (chan, 1); diff --git a/src/gallium/drivers/nv50/nv50_tex.c b/src/gallium/drivers/nv50/nv50_tex.c index 658324ec5be..9243f9edced 100644 --- a/src/gallium/drivers/nv50/nv50_tex.c +++ b/src/gallium/drivers/nv50/nv50_tex.c @@ -106,7 +106,7 @@ nv50_tex_construct(struct nv50_sampler_view *view) tic[6] = 0x03000000; - tic[7] = (view->pipe.last_level << 4) | view->pipe.first_level; + tic[7] = (view->pipe.u.tex.last_level << 4) | view->pipe.u.tex.first_level; return TRUE; } diff --git a/src/gallium/drivers/nv50/nv50_transfer.c b/src/gallium/drivers/nv50/nv50_transfer.c index 0cc2f4a837f..bf5af4ddc65 100644 --- a/src/gallium/drivers/nv50/nv50_transfer.c +++ b/src/gallium/drivers/nv50/nv50_transfer.c @@ -126,20 +126,23 @@ nv50_transfer_rect_m2mf(struct pipe_screen *pscreen, struct pipe_transfer * nv50_miptree_transfer_new(struct pipe_context *pcontext, struct pipe_resource *pt, - struct pipe_subresource sr, + unsigned level, unsigned usage, const struct pipe_box *box) { struct pipe_screen *pscreen = pcontext->screen; struct nouveau_device *dev = nouveau_screen(pscreen)->device; struct nv50_miptree *mt = nv50_miptree(pt); - struct nv50_miptree_level *lvl = &mt->level[sr.level]; + struct nv50_miptree_level *lvl = &mt->level[level]; struct nv50_transfer *tx; - unsigned nx, ny, image = 0; + unsigned nx, ny, image = 0, boxz = 0; int ret; + /* XXX can't unify these here? */ if (pt->target == PIPE_TEXTURE_CUBE) - image = sr.face; + image = box->z; + else if (pt->target == PIPE_TEXTURE_3D) + boxz = box->z; tx = CALLOC_STRUCT(nv50_transfer); if (!tx) @@ -151,21 +154,21 @@ nv50_miptree_transfer_new(struct pipe_context *pcontext, pipe_resource_reference(&tx->base.resource, pt); - tx->base.sr = sr; + tx->base.level = level; tx->base.usage = usage; tx->base.box = *box; - tx->nblocksx = util_format_get_nblocksx(pt->format, u_minify(pt->width0, sr.level)); - tx->nblocksy = util_format_get_nblocksy(pt->format, u_minify(pt->height0, sr.level)); + tx->nblocksx = util_format_get_nblocksx(pt->format, u_minify(pt->width0, level)); + tx->nblocksy = util_format_get_nblocksy(pt->format, u_minify(pt->height0, level)); tx->base.stride = tx->nblocksx * util_format_get_blocksize(pt->format); tx->base.usage = usage; tx->level_pitch = lvl->pitch; - tx->level_width = u_minify(mt->base.base.width0, sr.level); - tx->level_height = u_minify(mt->base.base.height0, sr.level); - tx->level_depth = u_minify(mt->base.base.depth0, sr.level); + tx->level_width = u_minify(mt->base.base.width0, level); + tx->level_height = u_minify(mt->base.base.height0, level); + tx->level_depth = u_minify(mt->base.base.depth0, level); tx->level_offset = lvl->image_offset[image]; tx->level_tiling = lvl->tile_mode; - tx->level_z = box->z; + tx->level_z = boxz; tx->level_x = util_format_get_nblocksx(pt->format, box->x); tx->level_y = util_format_get_nblocksy(pt->format, box->y); ret = nouveau_bo_new(dev, NOUVEAU_BO_GART | NOUVEAU_BO_MAP, 0, @@ -181,7 +184,7 @@ nv50_miptree_transfer_new(struct pipe_context *pcontext, nv50_transfer_rect_m2mf(pscreen, mt->base.bo, tx->level_offset, tx->level_pitch, tx->level_tiling, - box->x, box->y, box->z, + box->x, box->y, boxz, tx->nblocksx, tx->nblocksy, tx->level_depth, tx->bo, 0, diff --git a/src/gallium/drivers/nv50/nv50_transfer.h b/src/gallium/drivers/nv50/nv50_transfer.h index 663503547cb..6699bf546ea 100644 --- a/src/gallium/drivers/nv50/nv50_transfer.h +++ b/src/gallium/drivers/nv50/nv50_transfer.h @@ -8,7 +8,7 @@ struct pipe_transfer * nv50_miptree_transfer_new(struct pipe_context *pcontext, struct pipe_resource *pt, - struct pipe_subresource sr, + unsigned level, unsigned usage, const struct pipe_box *box); void diff --git a/src/gallium/drivers/nv50/nv50_vbo.c b/src/gallium/drivers/nv50/nv50_vbo.c index d41a59d05db..53f319acf46 100644 --- a/src/gallium/drivers/nv50/nv50_vbo.c +++ b/src/gallium/drivers/nv50/nv50_vbo.c @@ -284,7 +284,7 @@ nv50_draw_elements_inline(struct pipe_context *pipe, nzi = TRUE; } - pipe_buffer_unmap(pipe, indexBuffer, transfer); + pipe_buffer_unmap(pipe, transfer); } static void diff --git a/src/gallium/drivers/nvc0/Makefile b/src/gallium/drivers/nvc0/Makefile new file mode 100644 index 00000000000..da8f9a2ab4d --- /dev/null +++ b/src/gallium/drivers/nvc0/Makefile @@ -0,0 +1,34 @@ +TOP = ../../../.. +include $(TOP)/configs/current + +LIBNAME = nvc0 + +C_SOURCES = \ + nvc0_buffer.c \ + nvc0_context.c \ + nvc0_draw.c \ + nvc0_formats.c \ + nvc0_miptree.c \ + nvc0_resource.c \ + nvc0_screen.c \ + nvc0_state.c \ + nvc0_state_validate.c \ + nvc0_surface.c \ + nvc0_tex.c \ + nvc0_transfer.c \ + nvc0_vbo.c \ + nvc0_program.c \ + nvc0_shader_state.c \ + nvc0_pc.c \ + nvc0_pc_print.c \ + nvc0_pc_emit.c \ + nvc0_tgsi_to_nc.c \ + nvc0_pc_optimize.c \ + nvc0_pc_regalloc.c \ + nvc0_push.c \ + nvc0_push2.c \ + nvc0_fence.c \ + nvc0_mm.c \ + nvc0_query.c + +include ../../Makefile.template diff --git a/src/gallium/drivers/nvc0/SConscript b/src/gallium/drivers/nvc0/SConscript new file mode 100644 index 00000000000..c49e0ddbc52 --- /dev/null +++ b/src/gallium/drivers/nvc0/SConscript @@ -0,0 +1,36 @@ +Import('*') + +env = env.Clone() + +nvc0 = env.ConvenienceLibrary( + target = 'nvc0', + source = [ + 'nvc0_buffer.c', + 'nvc0_context.c', + 'nvc0_draw.c', + 'nvc0_formats.c', + 'nvc0_miptree.c', + 'nvc0_resource.c', + 'nvc0_screen.c', + 'nvc0_state.c', + 'nvc0_state_validate.c', + 'nvc0_surface.c', + 'nvc0_tex.c', + 'nvc0_transfer.c', + 'nvc0_vbo.c', + 'nvc0_program.c', + 'nvc0_shader_state.c', + 'nvc0_pc.c', + 'nvc0_pc_print.c', + 'nvc0_pc_emit.c', + 'nvc0_tgsi_to_nc.c', + 'nvc0_pc_optimize.c', + 'nvc0_pc_regalloc.c', + 'nvc0_push.c', + 'nvc0_push2.c', + 'nvc0_fence.c', + 'nvc0_mm.c', + 'nvc0_query.c' + ]) + +Export('nvc0') diff --git a/src/gallium/drivers/nvc0/nv50_defs.xml.h b/src/gallium/drivers/nvc0/nv50_defs.xml.h new file mode 100644 index 00000000000..1bf2f802b56 --- /dev/null +++ b/src/gallium/drivers/nvc0/nv50_defs.xml.h @@ -0,0 +1,142 @@ +#ifndef NV50_DEFS_XML +#define NV50_DEFS_XML + +/* Autogenerated file, DO NOT EDIT manually! + +This file was generated by the rules-ng-ng headergen tool in this git repository: +http://0x04.net/cgit/index.cgi/rules-ng-ng +git clone git://0x04.net/rules-ng-ng + +The rules-ng-ng source files this header was generated from are: +- nv50_defs.xml ( 4482 bytes, from 2010-10-03 13:18:37) +- copyright.xml ( 6498 bytes, from 2010-10-03 13:18:37) + +Copyright (C) 2006-2010 by the following authors: +- Artur Huillet <[email protected]> (ahuillet) +- Ben Skeggs (darktama, darktama_) +- B. R. <[email protected]> (koala_br) +- Carlos Martin <[email protected]> (carlosmn) +- Christoph Bumiller <[email protected]> (calim, chrisbmr) +- Dawid Gajownik <[email protected]> (gajownik) +- Dmitry Baryshkov +- Dmitry Eremin-Solenikov <[email protected]> (lumag) +- EdB <[email protected]> (edb_) +- Erik Waling <[email protected]> (erikwaling) +- Francisco Jerez <[email protected]> (curro, curro_, currojerez) +- imirkin <[email protected]> (imirkin) +- jb17bsome <[email protected]> (jb17bsome) +- Jeremy Kolb <[email protected]> (kjeremy) +- Laurent Carlier <[email protected]> (lordheavy) +- Luca Barbieri <[email protected]> (lb, lb1) +- Maarten Maathuis <[email protected]> (stillunknown) +- Marcin Kościelnicki <[email protected]> (mwk, koriakin) +- Mark Carey <[email protected]> (careym) +- Matthieu Castet <[email protected]> (mat-c) +- nvidiaman <[email protected]> (nvidiaman) +- Patrice Mandin <[email protected]> (pmandin, pmdata) +- Pekka Paalanen <[email protected]> (pq, ppaalanen) +- Peter Popov <[email protected]> (ironpeter) +- Richard Hughes <[email protected]> (hughsient) +- Rudi Cilibrasi <[email protected]> (cilibrar) +- Serge Martin +- Simon Raffeiner +- Stephane Loeuillet <[email protected]> (leroutier) +- Stephane Marchesin <[email protected]> (marcheu) +- sturmflut <[email protected]> (sturmflut) +- Sylvain Munaut <[email protected]> +- Victor Stinner <[email protected]> (haypo) +- Wladmir van der Laan <[email protected]> (miathan6) +- Younes Manton <[email protected]> (ymanton) + +Permission is hereby granted, free of charge, to any person obtaining +a copy of this software and associated documentation files (the +"Software"), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sublicense, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions: + +The above copyright notice and this permission notice (including the +next paragraph) shall be included in all copies or substantial +portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE +LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +*/ + + +#define NV50_SURFACE_FORMAT_R32G32B32A32_FLOAT 0x000000c0 +#define NV50_SURFACE_FORMAT_R32G32B32A32_SINT 0x000000c1 +#define NV50_SURFACE_FORMAT_R32G32B32A32_UINT 0x000000c2 +#define NV50_SURFACE_FORMAT_R32G32B32X32_FLOAT 0x000000c3 +#define NV50_SURFACE_FORMAT_R16G16B16A16_UNORM 0x000000c6 +#define NV50_SURFACE_FORMAT_R16G16B16A16_SNORM 0x000000c7 +#define NV50_SURFACE_FORMAT_R16G16B16A16_SINT 0x000000c8 +#define NV50_SURFACE_FORMAT_R16G16B16A16_UINT 0x000000c9 +#define NV50_SURFACE_FORMAT_R16G16B16A16_FLOAT 0x000000ca +#define NV50_SURFACE_FORMAT_R32G32_FLOAT 0x000000cb +#define NV50_SURFACE_FORMAT_R32G32_SINT 0x000000cc +#define NV50_SURFACE_FORMAT_R32G32_UINT 0x000000cd +#define NV50_SURFACE_FORMAT_R16G16B16X16_FLOAT 0x000000ce +#define NV50_SURFACE_FORMAT_A8R8G8B8_UNORM 0x000000cf +#define NV50_SURFACE_FORMAT_A8R8G8B8_SRGB 0x000000d0 +#define NV50_SURFACE_FORMAT_A2B10G10R10_UNORM 0x000000d1 +#define NV50_SURFACE_FORMAT_A2B10G10R10_UINT 0x000000d2 +#define NV50_SURFACE_FORMAT_A8B8G8R8_UNORM 0x000000d5 +#define NV50_SURFACE_FORMAT_A8B8G8R8_SRGB 0x000000d6 +#define NV50_SURFACE_FORMAT_A8B8G8R8_SNORM 0x000000d7 +#define NV50_SURFACE_FORMAT_A8B8G8R8_SINT 0x000000d8 +#define NV50_SURFACE_FORMAT_A8B8G8R8_UINT 0x000000d9 +#define NV50_SURFACE_FORMAT_R16G16_UNORM 0x000000da +#define NV50_SURFACE_FORMAT_R16G16_SNORM 0x000000db +#define NV50_SURFACE_FORMAT_R16G16_SINT 0x000000dc +#define NV50_SURFACE_FORMAT_R16G16_UINT 0x000000dd +#define NV50_SURFACE_FORMAT_R16G16_FLOAT 0x000000de +#define NV50_SURFACE_FORMAT_A2R10G10B10_UNORM 0x000000df +#define NV50_SURFACE_FORMAT_B10G11R11_FLOAT 0x000000e0 +#define NV50_SURFACE_FORMAT_R32_FLOAT 0x000000e5 +#define NV50_SURFACE_FORMAT_X8R8G8B8_UNORM 0x000000e6 +#define NV50_SURFACE_FORMAT_X8R8G8B8_SRGB 0x000000e7 +#define NV50_SURFACE_FORMAT_R5G6B5_UNORM 0x000000e8 +#define NV50_SURFACE_FORMAT_A1R5G5B5_UNORM 0x000000e9 +#define NV50_SURFACE_FORMAT_R8G8_UNORM 0x000000ea +#define NV50_SURFACE_FORMAT_R8G8_SNORM 0x000000eb +#define NV50_SURFACE_FORMAT_R8G8_SINT 0x000000ec +#define NV50_SURFACE_FORMAT_R8G8_UINT 0x000000ed +#define NV50_SURFACE_FORMAT_R16_UNORM 0x000000ee +#define NV50_SURFACE_FORMAT_R16_SNORM 0x000000ef +#define NV50_SURFACE_FORMAT_R16_SINT 0x000000f0 +#define NV50_SURFACE_FORMAT_R16_UINT 0x000000f1 +#define NV50_SURFACE_FORMAT_R16_FLOAT 0x000000f2 +#define NV50_SURFACE_FORMAT_R8_UNORM 0x000000f3 +#define NV50_SURFACE_FORMAT_R8_SNORM 0x000000f4 +#define NV50_SURFACE_FORMAT_R8_SINT 0x000000f5 +#define NV50_SURFACE_FORMAT_R8_UINT 0x000000f6 +#define NV50_SURFACE_FORMAT_A8_UNORM 0x000000f7 +#define NV50_SURFACE_FORMAT_X1R5G5B5_UNORM 0x000000f8 +#define NV50_SURFACE_FORMAT_X8B8G8R8_UNORM 0x000000f9 +#define NV50_SURFACE_FORMAT_X8B8G8R8_SRGB 0x000000fa +#define NV50_ZETA_FORMAT_Z32_FLOAT 0x0000000a +#define NV50_ZETA_FORMAT_Z16_UNORM 0x00000013 +#define NV50_ZETA_FORMAT_Z24S8_UNORM 0x00000014 +#define NV50_ZETA_FORMAT_X8Z24_UNORM 0x00000015 +#define NV50_ZETA_FORMAT_S8Z24_UNORM 0x00000016 +#define NV50_ZETA_FORMAT_UNK18 0x00000018 +#define NV50_ZETA_FORMAT_Z32_FLOAT_X24S8_UNORM 0x00000019 +#define NV50_ZETA_FORMAT_UNK1D 0x0000001d +#define NV50_ZETA_FORMAT_UNK1E 0x0000001e +#define NV50_ZETA_FORMAT_UNK1F 0x0000001f +#define NV50_QUERY__SIZE 0x00000010 +#define NV50_QUERY_COUNTER 0x00000000 + +#define NV50_QUERY_RES 0x00000004 + +#define NV50_QUERY_TIME 0x00000008 + + +#endif /* NV50_DEFS_XML */ diff --git a/src/gallium/drivers/nvc0/nv50_texture.xml.h b/src/gallium/drivers/nvc0/nv50_texture.xml.h new file mode 100644 index 00000000000..9f83206516f --- /dev/null +++ b/src/gallium/drivers/nvc0/nv50_texture.xml.h @@ -0,0 +1,259 @@ +#ifndef NV50_TEXTURE_XML +#define NV50_TEXTURE_XML + +/* Autogenerated file, DO NOT EDIT manually! + +This file was generated by the rules-ng-ng headergen tool in this git repository: +http://0x04.net/cgit/index.cgi/rules-ng-ng +git clone git://0x04.net/rules-ng-ng + +The rules-ng-ng source files this header was generated from are: +- nv50_texture.xml ( 6871 bytes, from 2010-10-03 13:18:37) +- copyright.xml ( 6498 bytes, from 2010-10-03 13:18:37) + +Copyright (C) 2006-2010 by the following authors: +- Artur Huillet <[email protected]> (ahuillet) +- Ben Skeggs (darktama, darktama_) +- B. R. <[email protected]> (koala_br) +- Carlos Martin <[email protected]> (carlosmn) +- Christoph Bumiller <[email protected]> (calim, chrisbmr) +- Dawid Gajownik <[email protected]> (gajownik) +- Dmitry Baryshkov +- Dmitry Eremin-Solenikov <[email protected]> (lumag) +- EdB <[email protected]> (edb_) +- Erik Waling <[email protected]> (erikwaling) +- Francisco Jerez <[email protected]> (curro, curro_, currojerez) +- imirkin <[email protected]> (imirkin) +- jb17bsome <[email protected]> (jb17bsome) +- Jeremy Kolb <[email protected]> (kjeremy) +- Laurent Carlier <[email protected]> (lordheavy) +- Luca Barbieri <[email protected]> (lb, lb1) +- Maarten Maathuis <[email protected]> (stillunknown) +- Marcin Kościelnicki <[email protected]> (mwk, koriakin) +- Mark Carey <[email protected]> (careym) +- Matthieu Castet <[email protected]> (mat-c) +- nvidiaman <[email protected]> (nvidiaman) +- Patrice Mandin <[email protected]> (pmandin, pmdata) +- Pekka Paalanen <[email protected]> (pq, ppaalanen) +- Peter Popov <[email protected]> (ironpeter) +- Richard Hughes <[email protected]> (hughsient) +- Rudi Cilibrasi <[email protected]> (cilibrar) +- Serge Martin +- Simon Raffeiner +- Stephane Loeuillet <[email protected]> (leroutier) +- Stephane Marchesin <[email protected]> (marcheu) +- sturmflut <[email protected]> (sturmflut) +- Sylvain Munaut <[email protected]> +- Victor Stinner <[email protected]> (haypo) +- Wladmir van der Laan <[email protected]> (miathan6) +- Younes Manton <[email protected]> (ymanton) + +Permission is hereby granted, free of charge, to any person obtaining +a copy of this software and associated documentation files (the +"Software"), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sublicense, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions: + +The above copyright notice and this permission notice (including the +next paragraph) shall be included in all copies or substantial +portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE +LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +*/ + + +#define NV50_TIC_MAP_ZERO 0x00000000 +#define NV50_TIC_MAP_C0 0x00000002 +#define NV50_TIC_MAP_C1 0x00000003 +#define NV50_TIC_MAP_C2 0x00000004 +#define NV50_TIC_MAP_C3 0x00000005 +#define NV50_TIC_MAP_ONE 0x00000007 +#define NV50_TIC_TYPE_SNORM 0x00000001 +#define NV50_TIC_TYPE_UNORM 0x00000002 +#define NV50_TIC_TYPE_SINT 0x00000003 +#define NV50_TIC_TYPE_UINT 0x00000004 +#define NV50_TIC_TYPE_SSCALED 0x00000005 +#define NV50_TIC_TYPE_USCALED 0x00000006 +#define NV50_TIC_TYPE_FLOAT 0x00000007 +#define NV50_TSC_WRAP_REPEAT 0x00000000 +#define NV50_TSC_WRAP_MIRROR_REPEAT 0x00000001 +#define NV50_TSC_WRAP_CLAMP_TO_EDGE 0x00000002 +#define NV50_TSC_WRAP_CLAMP_TO_BORDER 0x00000003 +#define NV50_TSC_WRAP_CLAMP 0x00000004 +#define NV50_TSC_WRAP_MIRROR_CLAMP_TO_EDGE 0x00000005 +#define NV50_TSC_WRAP_MIRROR_CLAMP_TO_BORDER 0x00000006 +#define NV50_TSC_WRAP_MIRROR_CLAMP 0x00000007 +#define NV50_TIC__SIZE 0x00000020 +#define NV50_TIC_0 0x00000000 +#define NV50_TIC_0_MAPA__MASK 0x38000000 +#define NV50_TIC_0_MAPA__SHIFT 27 +#define NV50_TIC_0_MAPB__MASK 0x07000000 +#define NV50_TIC_0_MAPB__SHIFT 24 +#define NV50_TIC_0_MAPG__MASK 0x00e00000 +#define NV50_TIC_0_MAPG__SHIFT 21 +#define NV50_TIC_0_MAPR__MASK 0x001c0000 +#define NV50_TIC_0_MAPR__SHIFT 18 +#define NV50_TIC_0_TYPE3__MASK 0x00038000 +#define NV50_TIC_0_TYPE3__SHIFT 15 +#define NV50_TIC_0_TYPE2__MASK 0x00007000 +#define NV50_TIC_0_TYPE2__SHIFT 12 +#define NV50_TIC_0_TYPE1__MASK 0x00000e00 +#define NV50_TIC_0_TYPE1__SHIFT 9 +#define NV50_TIC_0_TYPE0__MASK 0x000001c0 +#define NV50_TIC_0_TYPE0__SHIFT 6 +#define NV50_TIC_0_SWIZZLE__MASK 0x3ffc0000 +#define NV50_TIC_0_FMT__MASK 0x0000003f +#define NV50_TIC_0_FMT__SHIFT 0 +#define NV50_TIC_0_FMT_32_32_32_32 0x00000001 +#define NV50_TIC_0_FMT_16_16_16_16 0x00000003 +#define NV50_TIC_0_FMT_32_32 0x00000004 +#define NV50_TIC_0_FMT_32_8 0x00000005 +#define NV50_TIC_0_FMT_8_8_8_8 0x00000008 +#define NV50_TIC_0_FMT_2_10_10_10 0x00000009 +#define NV50_TIC_0_FMT_16_16 0x0000000c +#define NV50_TIC_0_FMT_8_24 0x0000000d +#define NV50_TIC_0_FMT_24_8 0x0000000e +#define NV50_TIC_0_FMT_32 0x0000000f +#define NV50_TIC_0_FMT_4_4_4_4 0x00000012 +#define NV50_TIC_0_FMT_5_5_5_1 0x00000013 +#define NV50_TIC_0_FMT_1_5_5_5 0x00000014 +#define NV50_TIC_0_FMT_5_6_5 0x00000015 +#define NV50_TIC_0_FMT_6_5_5 0x00000016 +#define NV50_TIC_0_FMT_8_8 0x00000018 +#define NV50_TIC_0_FMT_16 0x0000001b +#define NV50_TIC_0_FMT_8 0x0000001d +#define NV50_TIC_0_FMT_4_4 0x0000001e +#define NV50_TIC_0_FMT_UNK1F 0x0000001f +#define NV50_TIC_0_FMT_E5_9_9_9 0x00000020 +#define NV50_TIC_0_FMT_10_11_11 0x00000021 +#define NV50_TIC_0_FMT_C1_C2_C1_C0 0x00000022 +#define NV50_TIC_0_FMT_C2_C1_C0_C1 0x00000023 +#define NV50_TIC_0_FMT_DXT1 0x00000024 +#define NV50_TIC_0_FMT_DXT3 0x00000025 +#define NV50_TIC_0_FMT_DXT5 0x00000026 +#define NV50_TIC_0_FMT_RGTC1 0x00000027 +#define NV50_TIC_0_FMT_RGTC2 0x00000028 +#define NV50_TIC_0_FMT_24_8_ZETA 0x00000029 +#define NV50_TIC_0_FMT_8_24_ZETA 0x0000002a +#define NV50_TIC_0_FMT_UNK2C_ZETA 0x0000002c +#define NV50_TIC_0_FMT_UNK2D_ZETA 0x0000002d +#define NV50_TIC_0_FMT_UNK2E_ZETA 0x0000002e +#define NV50_TIC_0_FMT_32_ZETA 0x0000002f +#define NV50_TIC_0_FMT_32_8_ZETA 0x00000030 +#define NV50_TIC_0_FMT_16_ZETA 0x0000003a + +#define NV50_TIC_1 0x00000004 +#define NV50_TIC_1_OFFSET_LOW__MASK 0xffffffff +#define NV50_TIC_1_OFFSET_LOW__SHIFT 0 + +#define NV50_TIC_2 0x00000008 +#define NV50_TIC_2_OFFSET_HIGH__MASK 0x000000ff +#define NV50_TIC_2_OFFSET_HIGH__SHIFT 0 +#define NV50_TIC_2_COLORSPACE_SRGB 0x00000400 +#define NV50_TIC_2_TARGET__MASK 0x0003c000 +#define NV50_TIC_2_TARGET__SHIFT 14 +#define NV50_TIC_2_TARGET_1D 0x00000000 +#define NV50_TIC_2_TARGET_2D 0x00004000 +#define NV50_TIC_2_TARGET_3D 0x00008000 +#define NV50_TIC_2_TARGET_CUBE 0x0000c000 +#define NV50_TIC_2_TARGET_1D_ARRAY 0x00010000 +#define NV50_TIC_2_TARGET_2D_ARRAY 0x00014000 +#define NV50_TIC_2_TARGET_BUFFER 0x00018000 +#define NV50_TIC_2_TARGET_RECT 0x0001c000 +#define NV50_TIC_2_TARGET_CUBE_ARRAY 0x00020000 +#define NV50_TIC_2_TILE_MODE_LINEAR 0x00040000 +#define NV50_TIC_2_TILE_MODE_Y__MASK 0x01c00000 +#define NV50_TIC_2_TILE_MODE_Y__SHIFT 22 +#define NV50_TIC_2_TILE_MODE_Z__MASK 0x0e000000 +#define NV50_TIC_2_TILE_MODE_Z__SHIFT 25 +#define NV50_TIC_2_2D_UNK0258__MASK 0x30000000 +#define NV50_TIC_2_2D_UNK0258__SHIFT 28 +#define NV50_TIC_2_NORMALIZED_COORDS 0x80000000 + +#define NV50_TIC_3 0x0000000c +#define NV50_TIC_3_PITCH__MASK 0xffffffff +#define NV50_TIC_3_PITCH__SHIFT 0 + +#define NV50_TIC_4 0x00000010 +#define NV50_TIC_4_WIDTH__MASK 0xffffffff +#define NV50_TIC_4_WIDTH__SHIFT 0 + +#define NV50_TIC_5 0x00000014 +#define NV50_TIC_5_LAST_LEVEL__MASK 0xf0000000 +#define NV50_TIC_5_LAST_LEVEL__SHIFT 28 +#define NV50_TIC_5_DEPTH__MASK 0x0fff0000 +#define NV50_TIC_5_DEPTH__SHIFT 16 +#define NV50_TIC_5_HEIGHT__MASK 0x0000ffff +#define NV50_TIC_5_HEIGHT__SHIFT 0 + +#define NV50_TIC_7 0x0000001c +#define NV50_TIC_7_BASE_LEVEL__MASK 0x0000000f +#define NV50_TIC_7_BASE_LEVEL__SHIFT 0 +#define NV50_TIC_7_MAX_LEVEL__MASK 0x000000f0 +#define NV50_TIC_7_MAX_LEVEL__SHIFT 4 + +#define NV50_TSC__SIZE 0x00000020 +#define NV50_TSC_0 0x00000000 +#define NV50_TSC_0_WRAPS__MASK 0x00000007 +#define NV50_TSC_0_WRAPS__SHIFT 0 +#define NV50_TSC_0_WRAPT__MASK 0x00000038 +#define NV50_TSC_0_WRAPT__SHIFT 3 +#define NV50_TSC_0_WRAPR__MASK 0x000001c0 +#define NV50_TSC_0_WRAPR__SHIFT 6 +#define NV50_TSC_0_SHADOW_COMPARE_ENABLE 0x00000200 +#define NV50_TSC_0_SHADOW_COMPARE_FUNC__MASK 0x00001c00 +#define NV50_TSC_0_SHADOW_COMPARE_FUNC__SHIFT 10 +#define NV50_TSC_0_ANISOTROPY_MASK__MASK 0x00700000 +#define NV50_TSC_0_ANISOTROPY_MASK__SHIFT 20 + +#define NV50_TSC_1 0x00000004 +#define NV50_TSC_1_UNKN_ANISO_15 0x10000000 +#define NV50_TSC_1_UNKN_ANISO_35 0x18000000 +#define NV50_TSC_1_MAGF__MASK 0x00000003 +#define NV50_TSC_1_MAGF__SHIFT 0 +#define NV50_TSC_1_MAGF_NEAREST 0x00000001 +#define NV50_TSC_1_MAGF_LINEAR 0x00000002 +#define NV50_TSC_1_MINF__MASK 0x00000030 +#define NV50_TSC_1_MINF__SHIFT 4 +#define NV50_TSC_1_MINF_NEAREST 0x00000010 +#define NV50_TSC_1_MINF_LINEAR 0x00000020 +#define NV50_TSC_1_MIPF__MASK 0x000000c0 +#define NV50_TSC_1_MIPF__SHIFT 6 +#define NV50_TSC_1_MIPF_NONE 0x00000040 +#define NV50_TSC_1_MIPF_NEAREST 0x00000080 +#define NV50_TSC_1_MIPF_LINEAR 0x000000c0 +#define NV50_TSC_1_LOD_BIAS__MASK 0x01fff000 +#define NV50_TSC_1_LOD_BIAS__SHIFT 12 + +#define NV50_TSC_2 0x00000008 +#define NV50_TSC_2_MIN_LOD__MASK 0x00000f00 +#define NV50_TSC_2_MIN_LOD__SHIFT 8 +#define NV50_TSC_2_MAX_LOD__MASK 0x00f00000 +#define NV50_TSC_2_MAX_LOD__SHIFT 20 + +#define NV50_TSC_4 0x00000010 +#define NV50_TSC_4_BORDER_COLOR_RED__MASK 0xffffffff +#define NV50_TSC_4_BORDER_COLOR_RED__SHIFT 0 + +#define NV50_TSC_5 0x00000014 +#define NV50_TSC_5_BORDER_COLOR_GREEN__MASK 0xffffffff +#define NV50_TSC_5_BORDER_COLOR_GREEN__SHIFT 0 + +#define NV50_TSC_6 0x00000018 +#define NV50_TSC_6_BORDER_COLOR_BLUE__MASK 0xffffffff +#define NV50_TSC_6_BORDER_COLOR_BLUE__SHIFT 0 + +#define NV50_TSC_7 0x0000001c +#define NV50_TSC_7_BORDER_COLOR_ALPHA__MASK 0xffffffff +#define NV50_TSC_7_BORDER_COLOR_ALPHA__SHIFT 0 + + +#endif /* NV50_TEXTURE_XML */ diff --git a/src/gallium/drivers/nvc0/nvc0_2d.xml.h b/src/gallium/drivers/nvc0/nvc0_2d.xml.h new file mode 100644 index 00000000000..aebcd510e8e --- /dev/null +++ b/src/gallium/drivers/nvc0/nvc0_2d.xml.h @@ -0,0 +1,380 @@ +#ifndef NVC0_2D_XML +#define NVC0_2D_XML + +/* Autogenerated file, DO NOT EDIT manually! + +This file was generated by the rules-ng-ng headergen tool in this git repository: +http://0x04.net/cgit/index.cgi/rules-ng-ng +git clone git://0x04.net/rules-ng-ng + +The rules-ng-ng source files this header was generated from are: +- nvc0_2d.xml ( 9454 bytes, from 2010-10-16 16:03:11) +- copyright.xml ( 6498 bytes, from 2010-10-03 13:18:37) +- nv_object.xml ( 11379 bytes, from 2010-10-16 11:43:24) +- nvchipsets.xml ( 2907 bytes, from 2010-10-15 16:28:21) +- nv_defs.xml ( 4437 bytes, from 2010-07-06 07:43:58) +- nv50_defs.xml ( 4482 bytes, from 2010-10-03 13:18:37) + +Copyright (C) 2006-2010 by the following authors: +- Artur Huillet <[email protected]> (ahuillet) +- Ben Skeggs (darktama, darktama_) +- B. R. <[email protected]> (koala_br) +- Carlos Martin <[email protected]> (carlosmn) +- Christoph Bumiller <[email protected]> (calim, chrisbmr) +- Dawid Gajownik <[email protected]> (gajownik) +- Dmitry Baryshkov +- Dmitry Eremin-Solenikov <[email protected]> (lumag) +- EdB <[email protected]> (edb_) +- Erik Waling <[email protected]> (erikwaling) +- Francisco Jerez <[email protected]> (curro, curro_, currojerez) +- imirkin <[email protected]> (imirkin) +- jb17bsome <[email protected]> (jb17bsome) +- Jeremy Kolb <[email protected]> (kjeremy) +- Laurent Carlier <[email protected]> (lordheavy) +- Luca Barbieri <[email protected]> (lb, lb1) +- Maarten Maathuis <[email protected]> (stillunknown) +- Marcin Kościelnicki <[email protected]> (mwk, koriakin) +- Mark Carey <[email protected]> (careym) +- Matthieu Castet <[email protected]> (mat-c) +- nvidiaman <[email protected]> (nvidiaman) +- Patrice Mandin <[email protected]> (pmandin, pmdata) +- Pekka Paalanen <[email protected]> (pq, ppaalanen) +- Peter Popov <[email protected]> (ironpeter) +- Richard Hughes <[email protected]> (hughsient) +- Rudi Cilibrasi <[email protected]> (cilibrar) +- Serge Martin +- Simon Raffeiner +- Stephane Loeuillet <[email protected]> (leroutier) +- Stephane Marchesin <[email protected]> (marcheu) +- sturmflut <[email protected]> (sturmflut) +- Sylvain Munaut <[email protected]> +- Victor Stinner <[email protected]> (haypo) +- Wladmir van der Laan <[email protected]> (miathan6) +- Younes Manton <[email protected]> (ymanton) + +Permission is hereby granted, free of charge, to any person obtaining +a copy of this software and associated documentation files (the +"Software"), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sublicense, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions: + +The above copyright notice and this permission notice (including the +next paragraph) shall be included in all copies or substantial +portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE +LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +*/ + + + +#define NVC0_2D_DST_FORMAT 0x00000200 + +#define NVC0_2D_DST_LINEAR 0x00000204 + +#define NVC0_2D_DST_TILE_MODE 0x00000208 + +#define NVC0_2D_DST_DEPTH 0x0000020c + +#define NVC0_2D_DST_LAYER 0x00000210 + +#define NVC0_2D_DST_PITCH 0x00000214 + +#define NVC0_2D_DST_WIDTH 0x00000218 + +#define NVC0_2D_DST_HEIGHT 0x0000021c + +#define NVC0_2D_DST_ADDRESS_HIGH 0x00000220 + +#define NVC0_2D_DST_ADDRESS_LOW 0x00000224 + +#define NVC0_2D_UNK228 0x00000228 + +#define NVC0_2D_SRC_FORMAT 0x00000230 + +#define NVC0_2D_SRC_LINEAR 0x00000234 + +#define NVC0_2D_SRC_TILE_MODE 0x00000238 + +#define NVC0_2D_SRC_DEPTH 0x0000023c + +#define NVC0_2D_SRC_LAYER 0x00000240 + +#define NVC0_2D_SRC_PITCH 0x00000244 +#define NVC0_2D_SRC_PITCH__MAX 0x00040000 + +#define NVC0_2D_SRC_WIDTH 0x00000248 +#define NVC0_2D_SRC_WIDTH__MAX 0x00010000 + +#define NVC0_2D_SRC_HEIGHT 0x0000024c +#define NVC0_2D_SRC_HEIGHT__MAX 0x00010000 + +#define NVC0_2D_SRC_ADDRESS_HIGH 0x00000250 + +#define NVC0_2D_SRC_ADDRESS_LOW 0x00000254 + +#define NVC0_2D_UNK258 0x00000258 + +#define NVC0_2D_UNK260 0x00000260 + +#define NVC0_2D_COND_ADDRESS_HIGH 0x00000264 + +#define NVC0_2D_COND_ADDRESS_LOW 0x00000268 + +#define NVC0_2D_COND_MODE 0x0000026c +#define NVC0_2D_COND_MODE_NEVER 0x00000000 +#define NVC0_2D_COND_MODE_ALWAYS 0x00000001 +#define NVC0_2D_COND_MODE_RES_NON_ZERO 0x00000002 +#define NVC0_2D_COND_MODE_EQUAL 0x00000003 +#define NVC0_2D_COND_MODE_NOT_EQUAL 0x00000004 + +#define NVC0_2D_CLIP_X 0x00000280 + +#define NVC0_2D_CLIP_Y 0x00000284 + +#define NVC0_2D_CLIP_W 0x00000288 + +#define NVC0_2D_CLIP_H 0x0000028c + +#define NVC0_2D_CLIP_ENABLE 0x00000290 + +#define NVC0_2D_COLOR_KEY_FORMAT 0x00000294 +#define NVC0_2D_COLOR_KEY_FORMAT_16BPP 0x00000000 +#define NVC0_2D_COLOR_KEY_FORMAT_15BPP 0x00000001 +#define NVC0_2D_COLOR_KEY_FORMAT_24BPP 0x00000002 +#define NVC0_2D_COLOR_KEY_FORMAT_30BPP 0x00000003 +#define NVC0_2D_COLOR_KEY_FORMAT_8BPP 0x00000004 +#define NVC0_2D_COLOR_KEY_FORMAT_16BPP2 0x00000005 +#define NVC0_2D_COLOR_KEY_FORMAT_32BPP 0x00000006 + +#define NVC0_2D_COLOR_KEY 0x00000298 + +#define NVC0_2D_COLOR_KEY_ENABLE 0x0000029c + +#define NVC0_2D_ROP 0x000002a0 + +#define NVC0_2D_BETA1 0x000002a4 + +#define NVC0_2D_BETA4 0x000002a8 + +#define NVC0_2D_OPERATION 0x000002ac +#define NVC0_2D_OPERATION_SRCCOPY_AND 0x00000000 +#define NVC0_2D_OPERATION_ROP_AND 0x00000001 +#define NVC0_2D_OPERATION_BLEND_AND 0x00000002 +#define NVC0_2D_OPERATION_SRCCOPY 0x00000003 +#define NVC0_2D_OPERATION_UNK4 0x00000004 +#define NVC0_2D_OPERATION_SRCCOPY_PREMULT 0x00000005 +#define NVC0_2D_OPERATION_BLEND_PREMULT 0x00000006 + +#define NVC0_2D_UNK2B0 0x000002b0 +#define NVC0_2D_UNK2B0_UNK0__MASK 0x0000003f +#define NVC0_2D_UNK2B0_UNK0__SHIFT 0 +#define NVC0_2D_UNK2B0_UNK1__MASK 0x00003f00 +#define NVC0_2D_UNK2B0_UNK1__SHIFT 8 + +#define NVC0_2D_PATTERN_SELECT 0x000002b4 +#define NVC0_2D_PATTERN_SELECT_MONO_8X8 0x00000000 +#define NVC0_2D_PATTERN_SELECT_MONO_64X1 0x00000001 +#define NVC0_2D_PATTERN_SELECT_MONO_1X64 0x00000002 +#define NVC0_2D_PATTERN_SELECT_COLOR 0x00000003 + +#define NVC0_2D_PATTERN_COLOR_FORMAT 0x000002e8 +#define NVC0_2D_PATTERN_COLOR_FORMAT_16BPP 0x00000000 +#define NVC0_2D_PATTERN_COLOR_FORMAT_15BPP 0x00000001 +#define NVC0_2D_PATTERN_COLOR_FORMAT_32BPP 0x00000002 +#define NVC0_2D_PATTERN_COLOR_FORMAT_8BPP 0x00000003 +#define NVC0_2D_PATTERN_COLOR_FORMAT_UNK4 0x00000004 +#define NVC0_2D_PATTERN_COLOR_FORMAT_UNK5 0x00000005 + +#define NVC0_2D_PATTERN_MONO_FORMAT 0x000002ec +#define NVC0_2D_PATTERN_MONO_FORMAT_CGA6 0x00000000 +#define NVC0_2D_PATTERN_MONO_FORMAT_LE 0x00000001 + +#define NVC0_2D_PATTERN_COLOR(i0) (0x000002f0 + 0x4*(i0)) +#define NVC0_2D_PATTERN_COLOR__ESIZE 0x00000004 +#define NVC0_2D_PATTERN_COLOR__LEN 0x00000002 + +#define NVC0_2D_PATTERN_BITMAP(i0) (0x000002f8 + 0x4*(i0)) +#define NVC0_2D_PATTERN_BITMAP__ESIZE 0x00000004 +#define NVC0_2D_PATTERN_BITMAP__LEN 0x00000002 + +#define NVC0_2D_PATTERN_X8R8G8B8(i0) (0x00000300 + 0x4*(i0)) +#define NVC0_2D_PATTERN_X8R8G8B8__ESIZE 0x00000004 +#define NVC0_2D_PATTERN_X8R8G8B8__LEN 0x00000040 +#define NVC0_2D_PATTERN_X8R8G8B8_B__MASK 0x000000ff +#define NVC0_2D_PATTERN_X8R8G8B8_B__SHIFT 0 +#define NVC0_2D_PATTERN_X8R8G8B8_G__MASK 0x0000ff00 +#define NVC0_2D_PATTERN_X8R8G8B8_G__SHIFT 8 +#define NVC0_2D_PATTERN_X8R8G8B8_R__MASK 0x00ff0000 +#define NVC0_2D_PATTERN_X8R8G8B8_R__SHIFT 16 + +#define NVC0_2D_PATTERN_R5G6B5(i0) (0x00000400 + 0x4*(i0)) +#define NVC0_2D_PATTERN_R5G6B5__ESIZE 0x00000004 +#define NVC0_2D_PATTERN_R5G6B5__LEN 0x00000020 +#define NVC0_2D_PATTERN_R5G6B5_B0__MASK 0x0000001f +#define NVC0_2D_PATTERN_R5G6B5_B0__SHIFT 0 +#define NVC0_2D_PATTERN_R5G6B5_G0__MASK 0x000007e0 +#define NVC0_2D_PATTERN_R5G6B5_G0__SHIFT 5 +#define NVC0_2D_PATTERN_R5G6B5_R0__MASK 0x0000f800 +#define NVC0_2D_PATTERN_R5G6B5_R0__SHIFT 11 +#define NVC0_2D_PATTERN_R5G6B5_B1__MASK 0x001f0000 +#define NVC0_2D_PATTERN_R5G6B5_B1__SHIFT 16 +#define NVC0_2D_PATTERN_R5G6B5_G1__MASK 0x07e00000 +#define NVC0_2D_PATTERN_R5G6B5_G1__SHIFT 21 +#define NVC0_2D_PATTERN_R5G6B5_R1__MASK 0xf8000000 +#define NVC0_2D_PATTERN_R5G6B5_R1__SHIFT 27 + +#define NVC0_2D_PATTERN_X1R5G5B5(i0) (0x00000480 + 0x4*(i0)) +#define NVC0_2D_PATTERN_X1R5G5B5__ESIZE 0x00000004 +#define NVC0_2D_PATTERN_X1R5G5B5__LEN 0x00000020 +#define NVC0_2D_PATTERN_X1R5G5B5_B0__MASK 0x0000001f +#define NVC0_2D_PATTERN_X1R5G5B5_B0__SHIFT 0 +#define NVC0_2D_PATTERN_X1R5G5B5_G0__MASK 0x000003e0 +#define NVC0_2D_PATTERN_X1R5G5B5_G0__SHIFT 5 +#define NVC0_2D_PATTERN_X1R5G5B5_R0__MASK 0x00007c00 +#define NVC0_2D_PATTERN_X1R5G5B5_R0__SHIFT 10 +#define NVC0_2D_PATTERN_X1R5G5B5_B1__MASK 0x001f0000 +#define NVC0_2D_PATTERN_X1R5G5B5_B1__SHIFT 16 +#define NVC0_2D_PATTERN_X1R5G5B5_G1__MASK 0x03e00000 +#define NVC0_2D_PATTERN_X1R5G5B5_G1__SHIFT 21 +#define NVC0_2D_PATTERN_X1R5G5B5_R1__MASK 0x7c000000 +#define NVC0_2D_PATTERN_X1R5G5B5_R1__SHIFT 26 + +#define NVC0_2D_PATTERN_Y8(i0) (0x00000500 + 0x4*(i0)) +#define NVC0_2D_PATTERN_Y8__ESIZE 0x00000004 +#define NVC0_2D_PATTERN_Y8__LEN 0x00000010 +#define NVC0_2D_PATTERN_Y8_Y0__MASK 0x000000ff +#define NVC0_2D_PATTERN_Y8_Y0__SHIFT 0 +#define NVC0_2D_PATTERN_Y8_Y1__MASK 0x0000ff00 +#define NVC0_2D_PATTERN_Y8_Y1__SHIFT 8 +#define NVC0_2D_PATTERN_Y8_Y2__MASK 0x00ff0000 +#define NVC0_2D_PATTERN_Y8_Y2__SHIFT 16 +#define NVC0_2D_PATTERN_Y8_Y3__MASK 0xff000000 +#define NVC0_2D_PATTERN_Y8_Y3__SHIFT 24 + +#define NVC0_2D_DRAW_SHAPE 0x00000580 +#define NVC0_2D_DRAW_SHAPE_POINTS 0x00000000 +#define NVC0_2D_DRAW_SHAPE_LINES 0x00000001 +#define NVC0_2D_DRAW_SHAPE_LINE_STRIP 0x00000002 +#define NVC0_2D_DRAW_SHAPE_TRIANGLES 0x00000003 +#define NVC0_2D_DRAW_SHAPE_RECTANGLES 0x00000004 + +#define NVC0_2D_DRAW_COLOR_FORMAT 0x00000584 + +#define NVC0_2D_DRAW_COLOR 0x00000588 + +#define NVC0_2D_UNK58C 0x0000058c +#define NVC0_2D_UNK58C_0 0x00000001 +#define NVC0_2D_UNK58C_1 0x00000010 +#define NVC0_2D_UNK58C_2 0x00000100 +#define NVC0_2D_UNK58C_3 0x00001000 + +#define NVC0_2D_DRAW_POINT16 0x000005e0 +#define NVC0_2D_DRAW_POINT16_X__MASK 0x0000ffff +#define NVC0_2D_DRAW_POINT16_X__SHIFT 0 +#define NVC0_2D_DRAW_POINT16_Y__MASK 0xffff0000 +#define NVC0_2D_DRAW_POINT16_Y__SHIFT 16 + +#define NVC0_2D_DRAW_POINT32_X(i0) (0x00000600 + 0x8*(i0)) +#define NVC0_2D_DRAW_POINT32_X__ESIZE 0x00000008 +#define NVC0_2D_DRAW_POINT32_X__LEN 0x00000040 + +#define NVC0_2D_DRAW_POINT32_Y(i0) (0x00000604 + 0x8*(i0)) +#define NVC0_2D_DRAW_POINT32_Y__ESIZE 0x00000008 +#define NVC0_2D_DRAW_POINT32_Y__LEN 0x00000040 + +#define NVC0_2D_SIFC_BITMAP_ENABLE 0x00000800 + +#define NVC0_2D_SIFC_FORMAT 0x00000804 + +#define NVC0_2D_SIFC_BITMAP_FORMAT 0x00000808 +#define NVC0_2D_SIFC_BITMAP_FORMAT_I1 0x00000000 +#define NVC0_2D_SIFC_BITMAP_FORMAT_I4 0x00000001 +#define NVC0_2D_SIFC_BITMAP_FORMAT_I8 0x00000002 + +#define NVC0_2D_SIFC_BITMAP_LSB_FIRST 0x0000080c + +#define NVC0_2D_SIFC_BITMAP_LINE_PACK_MODE 0x00000810 +#define NVC0_2D_SIFC_BITMAP_LINE_PACK_MODE_PACKED 0x00000000 +#define NVC0_2D_SIFC_BITMAP_LINE_PACK_MODE_ALIGN_BYTE 0x00000001 +#define NVC0_2D_SIFC_BITMAP_LINE_PACK_MODE_ALIGN_WORD 0x00000002 + +#define NVC0_2D_SIFC_BITMAP_COLOR_BIT0 0x00000814 + +#define NVC0_2D_SIFC_BITMAP_COLOR_BIT1 0x00000818 + +#define NVC0_2D_SIFC_BITMAP_WRITE_BIT0_ENABLE 0x0000081c + +#define NVC0_2D_SIFC_WIDTH 0x00000838 + +#define NVC0_2D_SIFC_HEIGHT 0x0000083c + +#define NVC0_2D_SIFC_DX_DU_FRACT 0x00000840 + +#define NVC0_2D_SIFC_DX_DU_INT 0x00000844 + +#define NVC0_2D_SIFC_DY_DV_FRACT 0x00000848 + +#define NVC0_2D_SIFC_DY_DV_INT 0x0000084c + +#define NVC0_2D_SIFC_DST_X_FRACT 0x00000850 + +#define NVC0_2D_SIFC_DST_X_INT 0x00000854 + +#define NVC0_2D_SIFC_DST_Y_FRACT 0x00000858 + +#define NVC0_2D_SIFC_DST_Y_INT 0x0000085c + +#define NVC0_2D_SIFC_DATA 0x00000860 + +#define NVC0_2D_UNK0870 0x00000870 + +#define NVC0_2D_UNK0880 0x00000880 + +#define NVC0_2D_UNK0884 0x00000884 + +#define NVC0_2D_UNK0888 0x00000888 + +#define NVC0_2D_BLIT_CONTROL 0x0000088c +#define NVC0_2D_BLIT_CONTROL_ORIGIN__MASK 0x00000001 +#define NVC0_2D_BLIT_CONTROL_ORIGIN__SHIFT 0 +#define NVC0_2D_BLIT_CONTROL_ORIGIN_CENTER 0x00000000 +#define NVC0_2D_BLIT_CONTROL_ORIGIN_CORNER 0x00000001 +#define NVC0_2D_BLIT_CONTROL_FILTER__MASK 0x00000010 +#define NVC0_2D_BLIT_CONTROL_FILTER__SHIFT 4 +#define NVC0_2D_BLIT_CONTROL_FILTER_POINT_SAMPLE 0x00000000 +#define NVC0_2D_BLIT_CONTROL_FILTER_BILINEAR 0x00000010 + +#define NVC0_2D_BLIT_DST_X 0x000008b0 + +#define NVC0_2D_BLIT_DST_Y 0x000008b4 + +#define NVC0_2D_BLIT_DST_W 0x000008b8 + +#define NVC0_2D_BLIT_DST_H 0x000008bc + +#define NVC0_2D_BLIT_DU_DX_FRACT 0x000008c0 + +#define NVC0_2D_BLIT_DU_DX_INT 0x000008c4 + +#define NVC0_2D_BLIT_DV_DY_FRACT 0x000008c8 + +#define NVC0_2D_BLIT_DV_DY_INT 0x000008cc + +#define NVC0_2D_BLIT_SRC_X_FRACT 0x000008d0 + +#define NVC0_2D_BLIT_SRC_X_INT 0x000008d4 + +#define NVC0_2D_BLIT_SRC_Y_FRACT 0x000008d8 + +#define NVC0_2D_BLIT_SRC_Y_INT 0x000008dc + + +#endif /* NVC0_2D_XML */ diff --git a/src/gallium/drivers/nvc0/nvc0_3d.xml.h b/src/gallium/drivers/nvc0/nvc0_3d.xml.h new file mode 100644 index 00000000000..31302949d5e --- /dev/null +++ b/src/gallium/drivers/nvc0/nvc0_3d.xml.h @@ -0,0 +1,1182 @@ +#ifndef NVC0_3D_XML +#define NVC0_3D_XML + +/* Autogenerated file, DO NOT EDIT manually! + +This file was generated by the rules-ng-ng headergen tool in this git repository: +http://0x04.net/cgit/index.cgi/rules-ng-ng +git clone git://0x04.net/rules-ng-ng + +The rules-ng-ng source files this header was generated from are: +- nvc0_3d.xml ( 30827 bytes, from 2011-01-13 18:23:07) +- copyright.xml ( 6452 bytes, from 2010-11-25 23:28:20) +- nv_defs.xml ( 4437 bytes, from 2010-07-06 07:43:58) +- nv_3ddefs.xml ( 16394 bytes, from 2010-12-17 15:10:40) +- nv_object.xml ( 11898 bytes, from 2010-12-23 14:14:20) +- nvchipsets.xml ( 3074 bytes, from 2010-11-07 00:36:28) +- nv50_defs.xml ( 4487 bytes, from 2010-12-10 00:37:17) + +Copyright (C) 2006-2011 by the following authors: +- Artur Huillet <[email protected]> (ahuillet) +- Ben Skeggs (darktama, darktama_) +- B. R. <[email protected]> (koala_br) +- Carlos Martin <[email protected]> (carlosmn) +- Christoph Bumiller <[email protected]> (calim, chrisbmr) +- Dawid Gajownik <[email protected]> (gajownik) +- Dmitry Baryshkov +- Dmitry Eremin-Solenikov <[email protected]> (lumag) +- EdB <[email protected]> (edb_) +- Erik Waling <[email protected]> (erikwaling) +- Francisco Jerez <[email protected]> (curro) +- imirkin <[email protected]> (imirkin) +- jb17bsome <[email protected]> (jb17bsome) +- Jeremy Kolb <[email protected]> (kjeremy) +- Laurent Carlier <[email protected]> (lordheavy) +- Luca Barbieri <[email protected]> (lb, lb1) +- Maarten Maathuis <[email protected]> (stillunknown) +- Marcin Kościelnicki <[email protected]> (mwk, koriakin) +- Mark Carey <[email protected]> (careym) +- Matthieu Castet <[email protected]> (mat-c) +- nvidiaman <[email protected]> (nvidiaman) +- Patrice Mandin <[email protected]> (pmandin, pmdata) +- Pekka Paalanen <[email protected]> (pq, ppaalanen) +- Peter Popov <[email protected]> (ironpeter) +- Richard Hughes <[email protected]> (hughsient) +- Rudi Cilibrasi <[email protected]> (cilibrar) +- Serge Martin +- Simon Raffeiner +- Stephane Loeuillet <[email protected]> (leroutier) +- Stephane Marchesin <[email protected]> (marcheu) +- sturmflut <[email protected]> (sturmflut) +- Sylvain Munaut <[email protected]> +- Victor Stinner <[email protected]> (haypo) +- Wladmir van der Laan <[email protected]> (miathan6) +- Younes Manton <[email protected]> (ymanton) + +Permission is hereby granted, free of charge, to any person obtaining +a copy of this software and associated documentation files (the +"Software"), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sublicense, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions: + +The above copyright notice and this permission notice (including the +next paragraph) shall be included in all copies or substantial +portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE +LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +*/ + + + +#define NVC0_3D_NOTIFY_ADDRESS_HIGH 0x00000104 +#define NVC0_3D_NOTIFY_ADDRESS_LOW 0x00000108 +#define NVC0_3D_NOTIFY 0x0000010c + +#define NVC0_3D_SERIALIZE 0x00000110 + +#define NVC0_3D_EARLY_FRAGMENT_TESTS 0x00000210 + +#define NVC0_3D_TESS_MODE 0x00000320 +#define NVC0_3D_TESS_MODE_PRIM__MASK 0x0000000f +#define NVC0_3D_TESS_MODE_PRIM__SHIFT 0 +#define NVC0_3D_TESS_MODE_PRIM_ISOLINES 0x00000000 +#define NVC0_3D_TESS_MODE_PRIM_TRIANGLES 0x00000001 +#define NVC0_3D_TESS_MODE_PRIM_QUADS 0x00000002 +#define NVC0_3D_TESS_MODE_SPACING__MASK 0x000000f0 +#define NVC0_3D_TESS_MODE_SPACING__SHIFT 4 +#define NVC0_3D_TESS_MODE_SPACING_EQUAL 0x00000000 +#define NVC0_3D_TESS_MODE_SPACING_FRACTIONAL_ODD 0x00000010 +#define NVC0_3D_TESS_MODE_SPACING_FRACTIONAL_EVEN 0x00000020 +#define NVC0_3D_TESS_MODE_CW 0x00000100 +#define NVC0_3D_TESS_MODE_CONNECTED 0x00000200 + +#define NVC0_3D_TESS_LEVEL_OUTER(i0) (0x00000324 + 0x4*(i0)) +#define NVC0_3D_TESS_LEVEL_OUTER__ESIZE 0x00000004 +#define NVC0_3D_TESS_LEVEL_OUTER__LEN 0x00000004 + +#define NVC0_3D_TESS_LEVEL_INNER(i0) (0x00000334 + 0x4*(i0)) +#define NVC0_3D_TESS_LEVEL_INNER__ESIZE 0x00000004 +#define NVC0_3D_TESS_LEVEL_INNER__LEN 0x00000002 + +#define NVC0_3D_RASTERIZE_ENABLE 0x0000037c + +#define NVC0_3D_TFB(i0) (0x00000380 + 0x20*(i0)) +#define NVC0_3D_TFB__ESIZE 0x00000020 +#define NVC0_3D_TFB__LEN 0x00000004 + +#define NVC0_3D_TFB_BUFFER_ENABLE(i0) (0x00000380 + 0x20*(i0)) + +#define NVC0_3D_TFB_ADDRESS_HIGH(i0) (0x00000384 + 0x20*(i0)) + +#define NVC0_3D_TFB_ADDRESS_LOW(i0) (0x00000388 + 0x20*(i0)) + +#define NVC0_3D_TFB_BUFFER_SIZE(i0) (0x0000038c + 0x20*(i0)) + +#define NVC0_3D_TFB_PRIMITIVE_ID(i0) (0x00000390 + 0x20*(i0)) + +#define NVC0_3D_TFB_UNK0700(i0) (0x00000700 + 0x10*(i0)) + +#define NVC0_3D_TFB_VARYING_COUNT(i0) (0x00000704 + 0x10*(i0)) + +#define NVC0_3D_TFB_BUFFER_STRIDE(i0) (0x00000708 + 0x10*(i0)) + +#define NVC0_3D_TFB_ENABLE 0x00000744 + +#define NVC0_3D_LOCAL_BASE 0x0000077c + +#define NVC0_3D_LOCAL_ADDRESS_HIGH 0x00000790 + +#define NVC0_3D_LOCAL_ADDRESS_LOW 0x00000794 + +#define NVC0_3D_LOCAL_SIZE_HIGH 0x00000798 + +#define NVC0_3D_LOCAL_SIZE_LOW 0x0000079c + +#define NVC0_3D_RT(i0) (0x00000800 + 0x20*(i0)) +#define NVC0_3D_RT__ESIZE 0x00000020 +#define NVC0_3D_RT__LEN 0x00000008 + +#define NVC0_3D_RT_ADDRESS_HIGH(i0) (0x00000800 + 0x20*(i0)) + +#define NVC0_3D_RT_ADDRESS_LOW(i0) (0x00000804 + 0x20*(i0)) + +#define NVC0_3D_RT_HORIZ(i0) (0x00000808 + 0x20*(i0)) + +#define NVC0_3D_RT_VERT(i0) (0x0000080c + 0x20*(i0)) + +#define NVC0_3D_RT_FORMAT(i0) (0x00000810 + 0x20*(i0)) + +#define NVC0_3D_RT_TILE_MODE(i0) (0x00000814 + 0x20*(i0)) +#define NVC0_3D_RT_TILE_MODE_UNK0 0x00000001 +#define NVC0_3D_RT_TILE_MODE_Y__MASK 0x00000070 +#define NVC0_3D_RT_TILE_MODE_Y__SHIFT 4 +#define NVC0_3D_RT_TILE_MODE_Z__MASK 0x00000700 +#define NVC0_3D_RT_TILE_MODE_Z__SHIFT 8 + +#define NVC0_3D_RT_ARRAY_MODE(i0) (0x00000818 + 0x20*(i0)) +#define NVC0_3D_RT_ARRAY_MODE_LAYERS__MASK 0x0000ffff +#define NVC0_3D_RT_ARRAY_MODE_LAYERS__SHIFT 0 +#define NVC0_3D_RT_ARRAY_MODE_VOLUME 0x00010000 + +#define NVC0_3D_RT_LAYER_STRIDE(i0) (0x0000081c + 0x20*(i0)) + +#define NVC0_3D_VIEWPORT_SCALE_X(i0) (0x00000a00 + 0x20*(i0)) +#define NVC0_3D_VIEWPORT_SCALE_X__ESIZE 0x00000020 +#define NVC0_3D_VIEWPORT_SCALE_X__LEN 0x00000010 + +#define NVC0_3D_VIEWPORT_SCALE_Y(i0) (0x00000a04 + 0x20*(i0)) +#define NVC0_3D_VIEWPORT_SCALE_Y__ESIZE 0x00000020 +#define NVC0_3D_VIEWPORT_SCALE_Y__LEN 0x00000010 + +#define NVC0_3D_VIEWPORT_SCALE_Z(i0) (0x00000a08 + 0x20*(i0)) +#define NVC0_3D_VIEWPORT_SCALE_Z__ESIZE 0x00000020 +#define NVC0_3D_VIEWPORT_SCALE_Z__LEN 0x00000010 + +#define NVC0_3D_VIEWPORT_TRANSLATE_X(i0) (0x00000a0c + 0x20*(i0)) +#define NVC0_3D_VIEWPORT_TRANSLATE_X__ESIZE 0x00000020 +#define NVC0_3D_VIEWPORT_TRANSLATE_X__LEN 0x00000010 + +#define NVC0_3D_VIEWPORT_TRANSLATE_Y(i0) (0x00000a10 + 0x20*(i0)) +#define NVC0_3D_VIEWPORT_TRANSLATE_Y__ESIZE 0x00000020 +#define NVC0_3D_VIEWPORT_TRANSLATE_Y__LEN 0x00000010 + +#define NVC0_3D_VIEWPORT_TRANSLATE_Z(i0) (0x00000a14 + 0x20*(i0)) +#define NVC0_3D_VIEWPORT_TRANSLATE_Z__ESIZE 0x00000020 +#define NVC0_3D_VIEWPORT_TRANSLATE_Z__LEN 0x00000010 + +#define NVC0_3D_VIEWPORT_HORIZ(i0) (0x00000c00 + 0x10*(i0)) +#define NVC0_3D_VIEWPORT_HORIZ__ESIZE 0x00000010 +#define NVC0_3D_VIEWPORT_HORIZ__LEN 0x00000010 +#define NVC0_3D_VIEWPORT_HORIZ_X__MASK 0x0000ffff +#define NVC0_3D_VIEWPORT_HORIZ_X__SHIFT 0 +#define NVC0_3D_VIEWPORT_HORIZ_W__MASK 0xffff0000 +#define NVC0_3D_VIEWPORT_HORIZ_W__SHIFT 16 + +#define NVC0_3D_VIEWPORT_VERT(i0) (0x00000c04 + 0x10*(i0)) +#define NVC0_3D_VIEWPORT_VERT__ESIZE 0x00000010 +#define NVC0_3D_VIEWPORT_VERT__LEN 0x00000010 +#define NVC0_3D_VIEWPORT_VERT_Y__MASK 0x0000ffff +#define NVC0_3D_VIEWPORT_VERT_Y__SHIFT 0 +#define NVC0_3D_VIEWPORT_VERT_H__MASK 0xffff0000 +#define NVC0_3D_VIEWPORT_VERT_H__SHIFT 16 + +#define NVC0_3D_DEPTH_RANGE_NEAR(i0) (0x00000c08 + 0x10*(i0)) +#define NVC0_3D_DEPTH_RANGE_NEAR__ESIZE 0x00000010 +#define NVC0_3D_DEPTH_RANGE_NEAR__LEN 0x00000010 + +#define NVC0_3D_DEPTH_RANGE_FAR(i0) (0x00000c0c + 0x10*(i0)) +#define NVC0_3D_DEPTH_RANGE_FAR__ESIZE 0x00000010 +#define NVC0_3D_DEPTH_RANGE_FAR__LEN 0x00000010 + +#define NVC0_3D_VIEWPORT_CLIP_HORIZ(i0) (0x00000d00 + 0x8*(i0)) +#define NVC0_3D_VIEWPORT_CLIP_HORIZ__ESIZE 0x00000008 +#define NVC0_3D_VIEWPORT_CLIP_HORIZ__LEN 0x00000008 +#define NVC0_3D_VIEWPORT_CLIP_HORIZ_MIN__MASK 0x0000ffff +#define NVC0_3D_VIEWPORT_CLIP_HORIZ_MIN__SHIFT 0 +#define NVC0_3D_VIEWPORT_CLIP_HORIZ_MAX__MASK 0xffff0000 +#define NVC0_3D_VIEWPORT_CLIP_HORIZ_MAX__SHIFT 16 + +#define NVC0_3D_VIEWPORT_CLIP_VERT(i0) (0x00000d04 + 0x8*(i0)) +#define NVC0_3D_VIEWPORT_CLIP_VERT__ESIZE 0x00000008 +#define NVC0_3D_VIEWPORT_CLIP_VERT__LEN 0x00000008 +#define NVC0_3D_VIEWPORT_CLIP_VERT_MIN__MASK 0x0000ffff +#define NVC0_3D_VIEWPORT_CLIP_VERT_MIN__SHIFT 0 +#define NVC0_3D_VIEWPORT_CLIP_VERT_MAX__MASK 0xffff0000 +#define NVC0_3D_VIEWPORT_CLIP_VERT_MAX__SHIFT 16 + +#define NVC0_3D_CLIPID_REGION_HORIZ(i0) (0x00000d40 + 0x8*(i0)) +#define NVC0_3D_CLIPID_REGION_HORIZ__ESIZE 0x00000008 +#define NVC0_3D_CLIPID_REGION_HORIZ__LEN 0x00000004 +#define NVC0_3D_CLIPID_REGION_HORIZ_X__MASK 0x0000ffff +#define NVC0_3D_CLIPID_REGION_HORIZ_X__SHIFT 0 +#define NVC0_3D_CLIPID_REGION_HORIZ_W__MASK 0xffff0000 +#define NVC0_3D_CLIPID_REGION_HORIZ_W__SHIFT 16 + +#define NVC0_3D_CLIPID_REGION_VERT(i0) (0x00000d44 + 0x8*(i0)) +#define NVC0_3D_CLIPID_REGION_VERT__ESIZE 0x00000008 +#define NVC0_3D_CLIPID_REGION_VERT__LEN 0x00000004 +#define NVC0_3D_CLIPID_REGION_VERT_Y__MASK 0x0000ffff +#define NVC0_3D_CLIPID_REGION_VERT_Y__SHIFT 0 +#define NVC0_3D_CLIPID_REGION_VERT_H__MASK 0xffff0000 +#define NVC0_3D_CLIPID_REGION_VERT_H__SHIFT 16 + +#define NVC0_3D_COUNTER_ENABLE 0x00000d68 +#define NVC0_3D_COUNTER_ENABLE_UNK00 0x00000001 +#define NVC0_3D_COUNTER_ENABLE_UNK01 0x00000002 +#define NVC0_3D_COUNTER_ENABLE_UNK02 0x00000004 +#define NVC0_3D_COUNTER_ENABLE_UNK03 0x00000008 +#define NVC0_3D_COUNTER_ENABLE_UNK04 0x00000010 +#define NVC0_3D_COUNTER_ENABLE_EMITTED_PRIMITIVES 0x00000020 +#define NVC0_3D_COUNTER_ENABLE_UNK06 0x00000040 +#define NVC0_3D_COUNTER_ENABLE_UNK07 0x00000080 +#define NVC0_3D_COUNTER_ENABLE_UNK08 0x00000100 +#define NVC0_3D_COUNTER_ENABLE_UNK09 0x00000200 +#define NVC0_3D_COUNTER_ENABLE_GENERATED_PRIMITIVES 0x00000400 +#define NVC0_3D_COUNTER_ENABLE_UNK0B 0x00000800 +#define NVC0_3D_COUNTER_ENABLE_UNK0C 0x00001000 +#define NVC0_3D_COUNTER_ENABLE_UNK0D 0x00002000 +#define NVC0_3D_COUNTER_ENABLE_UNK0E 0x00004000 +#define NVC0_3D_COUNTER_ENABLE_UNK0F 0x00008000 + +#define NVC0_3D_VERTEX_BUFFER_FIRST 0x00000d74 + +#define NVC0_3D_VERTEX_BUFFER_COUNT 0x00000d78 + +#define NVC0_3D_CLEAR_COLOR(i0) (0x00000d80 + 0x4*(i0)) +#define NVC0_3D_CLEAR_COLOR__ESIZE 0x00000004 +#define NVC0_3D_CLEAR_COLOR__LEN 0x00000004 + +#define NVC0_3D_CLEAR_DEPTH 0x00000d90 + +#define NVC0_3D_CLEAR_STENCIL 0x00000da0 + +#define NVC0_3D_POLYGON_SMOOTH_ENABLE 0x00000db4 + +#define NVC0_3D_POLYGON_OFFSET_POINT_ENABLE 0x00000dc0 + +#define NVC0_3D_POLYGON_OFFSET_LINE_ENABLE 0x00000dc4 + +#define NVC0_3D_POLYGON_OFFSET_FILL_ENABLE 0x00000dc8 + +#define NVC0_3D_PATCH_VERTICES 0x00000dcc + +#define NVC0_3D_WINDOW_OFFSET_X 0x00000df8 + +#define NVC0_3D_WINDOW_OFFSET_Y 0x00000dfc + +#define NVC0_3D_SCISSOR_ENABLE(i0) (0x00000e00 + 0x10*(i0)) +#define NVC0_3D_SCISSOR_ENABLE__ESIZE 0x00000010 +#define NVC0_3D_SCISSOR_ENABLE__LEN 0x00000010 + +#define NVC0_3D_SCISSOR_HORIZ(i0) (0x00000e04 + 0x10*(i0)) +#define NVC0_3D_SCISSOR_HORIZ__ESIZE 0x00000010 +#define NVC0_3D_SCISSOR_HORIZ__LEN 0x00000010 +#define NVC0_3D_SCISSOR_HORIZ_MIN__MASK 0x0000ffff +#define NVC0_3D_SCISSOR_HORIZ_MIN__SHIFT 0 +#define NVC0_3D_SCISSOR_HORIZ_MAX__MASK 0xffff0000 +#define NVC0_3D_SCISSOR_HORIZ_MAX__SHIFT 16 + +#define NVC0_3D_SCISSOR_VERT(i0) (0x00000e08 + 0x10*(i0)) +#define NVC0_3D_SCISSOR_VERT__ESIZE 0x00000010 +#define NVC0_3D_SCISSOR_VERT__LEN 0x00000010 +#define NVC0_3D_SCISSOR_VERT_MIN__MASK 0x0000ffff +#define NVC0_3D_SCISSOR_VERT_MIN__SHIFT 0 +#define NVC0_3D_SCISSOR_VERT_MAX__MASK 0xffff0000 +#define NVC0_3D_SCISSOR_VERT_MAX__SHIFT 16 + +#define NVC0_3D_STENCIL_BACK_FUNC_REF 0x00000f54 + +#define NVC0_3D_STENCIL_BACK_MASK 0x00000f58 + +#define NVC0_3D_STENCIL_BACK_FUNC_MASK 0x00000f5c + +#define NVC0_3D_VERTEX_RUNOUT_ADDRESS_HIGH 0x00000f84 + +#define NVC0_3D_VERTEX_RUNOUT_ADDRESS_LOW 0x00000f88 + +#define NVC0_3D_DEPTH_BOUNDS(i0) (0x00000f9c + 0x4*(i0)) +#define NVC0_3D_DEPTH_BOUNDS__ESIZE 0x00000004 +#define NVC0_3D_DEPTH_BOUNDS__LEN 0x00000002 + +#define NVC0_3D_MSAA_MASK(i0) (0x00000fbc + 0x4*(i0)) +#define NVC0_3D_MSAA_MASK__ESIZE 0x00000004 +#define NVC0_3D_MSAA_MASK__LEN 0x00000004 + +#define NVC0_3D_CLIPID_ADDRESS_HIGH 0x00000fcc + +#define NVC0_3D_CLIPID_ADDRESS_LOW 0x00000fd0 + +#define NVC0_3D_ZETA_ADDRESS_HIGH 0x00000fe0 + +#define NVC0_3D_ZETA_ADDRESS_LOW 0x00000fe4 + +#define NVC0_3D_ZETA_FORMAT 0x00000fe8 + +#define NVC0_3D_ZETA_TILE_MODE 0x00000fec + +#define NVC0_3D_ZETA_LAYER_STRIDE 0x00000ff0 + +#define NVC0_3D_SCREEN_SCISSOR_HORIZ 0x00000ff4 +#define NVC0_3D_SCREEN_SCISSOR_HORIZ_W__MASK 0xffff0000 +#define NVC0_3D_SCREEN_SCISSOR_HORIZ_W__SHIFT 16 +#define NVC0_3D_SCREEN_SCISSOR_HORIZ_X__MASK 0x0000ffff +#define NVC0_3D_SCREEN_SCISSOR_HORIZ_X__SHIFT 0 + +#define NVC0_3D_SCREEN_SCISSOR_VERT 0x00000ff8 +#define NVC0_3D_SCREEN_SCISSOR_VERT_H__MASK 0xffff0000 +#define NVC0_3D_SCREEN_SCISSOR_VERT_H__SHIFT 16 +#define NVC0_3D_SCREEN_SCISSOR_VERT_Y__MASK 0x0000ffff +#define NVC0_3D_SCREEN_SCISSOR_VERT_Y__SHIFT 0 + +#define NVC0_3D_VERTEX_ID 0x00001118 + +#define NVC0_3D_VTX_ATTR_DEFINE 0x0000114c +#define NVC0_3D_VTX_ATTR_DEFINE_ATTR__MASK 0x000000ff +#define NVC0_3D_VTX_ATTR_DEFINE_ATTR__SHIFT 0 +#define NVC0_3D_VTX_ATTR_DEFINE_COMP__MASK 0x00000700 +#define NVC0_3D_VTX_ATTR_DEFINE_COMP__SHIFT 8 +#define NVC0_3D_VTX_ATTR_DEFINE_COMP__MIN 0x00000001 +#define NVC0_3D_VTX_ATTR_DEFINE_COMP__MAX 0x00000004 +#define NVC0_3D_VTX_ATTR_DEFINE_SIZE__MASK 0x00007000 +#define NVC0_3D_VTX_ATTR_DEFINE_SIZE__SHIFT 12 +#define NVC0_3D_VTX_ATTR_DEFINE_SIZE_8 0x00001000 +#define NVC0_3D_VTX_ATTR_DEFINE_SIZE_16 0x00002000 +#define NVC0_3D_VTX_ATTR_DEFINE_SIZE_32 0x00004000 +#define NVC0_3D_VTX_ATTR_DEFINE_TYPE__MASK 0x00070000 +#define NVC0_3D_VTX_ATTR_DEFINE_TYPE__SHIFT 16 +#define NVC0_3D_VTX_ATTR_DEFINE_TYPE_SNORM 0x00010000 +#define NVC0_3D_VTX_ATTR_DEFINE_TYPE_UNORM 0x00020000 +#define NVC0_3D_VTX_ATTR_DEFINE_TYPE_SINT 0x00030000 +#define NVC0_3D_VTX_ATTR_DEFINE_TYPE_UINT 0x00040000 +#define NVC0_3D_VTX_ATTR_DEFINE_TYPE_USCALED 0x00050000 +#define NVC0_3D_VTX_ATTR_DEFINE_TYPE_SSCALED 0x00060000 +#define NVC0_3D_VTX_ATTR_DEFINE_TYPE_FLOAT 0x00070000 + +#define NVC0_3D_VTX_ATTR_DATA(i0) (0x00001150 + 0x4*(i0)) +#define NVC0_3D_VTX_ATTR_DATA__ESIZE 0x00000004 +#define NVC0_3D_VTX_ATTR_DATA__LEN 0x00000004 + +#define NVC0_3D_VERTEX_ATTRIB_FORMAT(i0) (0x00001160 + 0x4*(i0)) +#define NVC0_3D_VERTEX_ATTRIB_FORMAT__ESIZE 0x00000004 +#define NVC0_3D_VERTEX_ATTRIB_FORMAT__LEN 0x00000020 +#define NVC0_3D_VERTEX_ATTRIB_FORMAT_BUFFER__MASK 0x0000003f +#define NVC0_3D_VERTEX_ATTRIB_FORMAT_BUFFER__SHIFT 0 +#define NVC0_3D_VERTEX_ATTRIB_FORMAT_CONST 0x00000040 +#define NVC0_3D_VERTEX_ATTRIB_FORMAT_OFFSET__MASK 0x001fff80 +#define NVC0_3D_VERTEX_ATTRIB_FORMAT_OFFSET__SHIFT 7 +#define NVC0_3D_VERTEX_ATTRIB_FORMAT_SIZE__MASK 0x07e00000 +#define NVC0_3D_VERTEX_ATTRIB_FORMAT_SIZE__SHIFT 21 +#define NVC0_3D_VERTEX_ATTRIB_FORMAT_SIZE_32_32_32_32 0x00200000 +#define NVC0_3D_VERTEX_ATTRIB_FORMAT_SIZE_32_32_32 0x00400000 +#define NVC0_3D_VERTEX_ATTRIB_FORMAT_SIZE_16_16_16_16 0x00600000 +#define NVC0_3D_VERTEX_ATTRIB_FORMAT_SIZE_32_32 0x00800000 +#define NVC0_3D_VERTEX_ATTRIB_FORMAT_SIZE_16_16_16 0x00a00000 +#define NVC0_3D_VERTEX_ATTRIB_FORMAT_SIZE_8_8_8_8 0x01400000 +#define NVC0_3D_VERTEX_ATTRIB_FORMAT_SIZE_16_16 0x01e00000 +#define NVC0_3D_VERTEX_ATTRIB_FORMAT_SIZE_32 0x02400000 +#define NVC0_3D_VERTEX_ATTRIB_FORMAT_SIZE_8_8_8 0x02600000 +#define NVC0_3D_VERTEX_ATTRIB_FORMAT_SIZE_8_8 0x03000000 +#define NVC0_3D_VERTEX_ATTRIB_FORMAT_SIZE_16 0x03600000 +#define NVC0_3D_VERTEX_ATTRIB_FORMAT_SIZE_8 0x03a00000 +#define NVC0_3D_VERTEX_ATTRIB_FORMAT_SIZE_2_10_10_10 0x06000000 +#define NVC0_3D_VERTEX_ATTRIB_FORMAT_TYPE__MASK 0x78000000 +#define NVC0_3D_VERTEX_ATTRIB_FORMAT_TYPE__SHIFT 27 +#define NVC0_3D_VERTEX_ATTRIB_FORMAT_TYPE_SNORM 0x08000000 +#define NVC0_3D_VERTEX_ATTRIB_FORMAT_TYPE_UNORM 0x10000000 +#define NVC0_3D_VERTEX_ATTRIB_FORMAT_TYPE_SINT 0x18000000 +#define NVC0_3D_VERTEX_ATTRIB_FORMAT_TYPE_UINT 0x20000000 +#define NVC0_3D_VERTEX_ATTRIB_FORMAT_TYPE_USCALED 0x28000000 +#define NVC0_3D_VERTEX_ATTRIB_FORMAT_TYPE_SSCALED 0x30000000 +#define NVC0_3D_VERTEX_ATTRIB_FORMAT_TYPE_FLOAT 0x38000000 +#define NVC0_3D_VERTEX_ATTRIB_FORMAT_BGRA 0x80000000 + +#define NVC0_3D_RT_CONTROL 0x0000121c +#define NVC0_3D_RT_CONTROL_COUNT__MASK 0x0000000f +#define NVC0_3D_RT_CONTROL_COUNT__SHIFT 0 +#define NVC0_3D_RT_CONTROL_MAP0__MASK 0x00000070 +#define NVC0_3D_RT_CONTROL_MAP0__SHIFT 4 +#define NVC0_3D_RT_CONTROL_MAP1__MASK 0x00000380 +#define NVC0_3D_RT_CONTROL_MAP1__SHIFT 7 +#define NVC0_3D_RT_CONTROL_MAP2__MASK 0x00001c00 +#define NVC0_3D_RT_CONTROL_MAP2__SHIFT 10 +#define NVC0_3D_RT_CONTROL_MAP3__MASK 0x0000e000 +#define NVC0_3D_RT_CONTROL_MAP3__SHIFT 13 +#define NVC0_3D_RT_CONTROL_MAP4__MASK 0x00070000 +#define NVC0_3D_RT_CONTROL_MAP4__SHIFT 16 +#define NVC0_3D_RT_CONTROL_MAP5__MASK 0x00380000 +#define NVC0_3D_RT_CONTROL_MAP5__SHIFT 19 +#define NVC0_3D_RT_CONTROL_MAP6__MASK 0x01c00000 +#define NVC0_3D_RT_CONTROL_MAP6__SHIFT 22 +#define NVC0_3D_RT_CONTROL_MAP7__MASK 0x0e000000 +#define NVC0_3D_RT_CONTROL_MAP7__SHIFT 25 + +#define NVC0_3D_ZETA_HORIZ 0x00001228 + +#define NVC0_3D_ZETA_VERT 0x0000122c + +#define NVC0_3D_ZETA_ARRAY_MODE 0x00001230 +#define NVC0_3D_ZETA_ARRAY_MODE_LAYERS__MASK 0x0000ffff +#define NVC0_3D_ZETA_ARRAY_MODE_LAYERS__SHIFT 0 +#define NVC0_3D_ZETA_ARRAY_MODE_UNK 0x00010000 + +#define NVC0_3D_LINKED_TSC 0x00001234 + +#define NVC0_3D_DRAW_TFB_BYTES 0x0000123c + +#define NVC0_3D_FP_RESULT_COUNT 0x00001298 + +#define NVC0_3D_DEPTH_TEST_ENABLE 0x000012cc + +#define NVC0_3D_D3D_FILL_MODE 0x000012d0 +#define NVC0_3D_D3D_FILL_MODE_POINT 0x00000001 +#define NVC0_3D_D3D_FILL_MODE_WIREFRAME 0x00000002 +#define NVC0_3D_D3D_FILL_MODE_SOLID 0x00000003 + +#define NVC0_3D_SHADE_MODEL 0x000012d4 +#define NVC0_3D_SHADE_MODEL_FLAT 0x00001d00 +#define NVC0_3D_SHADE_MODEL_SMOOTH 0x00001d01 + +#define NVC0_3D_BLEND_INDEPENDENT 0x000012e4 + +#define NVC0_3D_DEPTH_WRITE_ENABLE 0x000012e8 + +#define NVC0_3D_ALPHA_TEST_ENABLE 0x000012ec + +#define NVC0_3D_VB_ELEMENT_U8_SETUP 0x00001300 +#define NVC0_3D_VB_ELEMENT_U8_SETUP_OFFSET__MASK 0xc0000000 +#define NVC0_3D_VB_ELEMENT_U8_SETUP_OFFSET__SHIFT 30 +#define NVC0_3D_VB_ELEMENT_U8_SETUP_COUNT__MASK 0x3fffffff +#define NVC0_3D_VB_ELEMENT_U8_SETUP_COUNT__SHIFT 0 + +#define NVC0_3D_VB_ELEMENT_U8 0x00001304 +#define NVC0_3D_VB_ELEMENT_U8_I0__MASK 0x000000ff +#define NVC0_3D_VB_ELEMENT_U8_I0__SHIFT 0 +#define NVC0_3D_VB_ELEMENT_U8_I1__MASK 0x0000ff00 +#define NVC0_3D_VB_ELEMENT_U8_I1__SHIFT 8 +#define NVC0_3D_VB_ELEMENT_U8_I2__MASK 0x00ff0000 +#define NVC0_3D_VB_ELEMENT_U8_I2__SHIFT 16 +#define NVC0_3D_VB_ELEMENT_U8_I3__MASK 0xff000000 +#define NVC0_3D_VB_ELEMENT_U8_I3__SHIFT 24 + +#define NVC0_3D_D3D_CULL_MODE 0x00001308 +#define NVC0_3D_D3D_CULL_MODE_NONE 0x00000001 +#define NVC0_3D_D3D_CULL_MODE_FRONT 0x00000002 +#define NVC0_3D_D3D_CULL_MODE_BACK 0x00000003 + +#define NVC0_3D_DEPTH_TEST_FUNC 0x0000130c +#define NVC0_3D_DEPTH_TEST_FUNC_NEVER 0x00000200 +#define NVC0_3D_DEPTH_TEST_FUNC_LESS 0x00000201 +#define NVC0_3D_DEPTH_TEST_FUNC_EQUAL 0x00000202 +#define NVC0_3D_DEPTH_TEST_FUNC_LEQUAL 0x00000203 +#define NVC0_3D_DEPTH_TEST_FUNC_GREATER 0x00000204 +#define NVC0_3D_DEPTH_TEST_FUNC_NOTEQUAL 0x00000205 +#define NVC0_3D_DEPTH_TEST_FUNC_GEQUAL 0x00000206 +#define NVC0_3D_DEPTH_TEST_FUNC_ALWAYS 0x00000207 + +#define NVC0_3D_ALPHA_TEST_REF 0x00001310 + +#define NVC0_3D_ALPHA_TEST_FUNC 0x00001314 +#define NVC0_3D_ALPHA_TEST_FUNC_NEVER 0x00000200 +#define NVC0_3D_ALPHA_TEST_FUNC_LESS 0x00000201 +#define NVC0_3D_ALPHA_TEST_FUNC_EQUAL 0x00000202 +#define NVC0_3D_ALPHA_TEST_FUNC_LEQUAL 0x00000203 +#define NVC0_3D_ALPHA_TEST_FUNC_GREATER 0x00000204 +#define NVC0_3D_ALPHA_TEST_FUNC_NOTEQUAL 0x00000205 +#define NVC0_3D_ALPHA_TEST_FUNC_GEQUAL 0x00000206 +#define NVC0_3D_ALPHA_TEST_FUNC_ALWAYS 0x00000207 + +#define NVC0_3D_DRAW_TFB_STRIDE 0x00001318 +#define NVC0_3D_DRAW_TFB_STRIDE__MIN 0x00000001 +#define NVC0_3D_DRAW_TFB_STRIDE__MAX 0x00000fff + +#define NVC0_3D_BLEND_COLOR(i0) (0x0000131c + 0x4*(i0)) +#define NVC0_3D_BLEND_COLOR__ESIZE 0x00000004 +#define NVC0_3D_BLEND_COLOR__LEN 0x00000004 + +#define NVC0_3D_TSC_FLUSH 0x00001330 +#define NVC0_3D_TSC_FLUSH_SPECIFIC 0x00000001 +#define NVC0_3D_TSC_FLUSH_ENTRY__MASK 0x03fffff0 +#define NVC0_3D_TSC_FLUSH_ENTRY__SHIFT 4 + +#define NVC0_3D_TIC_FLUSH 0x00001334 +#define NVC0_3D_TIC_FLUSH_SPECIFIC 0x00000001 +#define NVC0_3D_TIC_FLUSH_ENTRY__MASK 0x03fffff0 +#define NVC0_3D_TIC_FLUSH_ENTRY__SHIFT 4 + +#define NVC0_3D_TEX_CACHE_CTL 0x00001338 +#define NVC0_3D_TEX_CACHE_CTL_UNK1__MASK 0x00000030 +#define NVC0_3D_TEX_CACHE_CTL_UNK1__SHIFT 4 + +#define NVC0_3D_BLEND_EQUATION_RGB 0x00001340 +#define NVC0_3D_BLEND_EQUATION_RGB_FUNC_ADD 0x00008006 +#define NVC0_3D_BLEND_EQUATION_RGB_MIN 0x00008007 +#define NVC0_3D_BLEND_EQUATION_RGB_MAX 0x00008008 +#define NVC0_3D_BLEND_EQUATION_RGB_FUNC_SUBTRACT 0x0000800a +#define NVC0_3D_BLEND_EQUATION_RGB_FUNC_REVERSE_SUBTRACT 0x0000800b + +#define NVC0_3D_BLEND_FUNC_SRC_RGB 0x00001344 + +#define NVC0_3D_BLEND_FUNC_DST_RGB 0x00001348 + +#define NVC0_3D_BLEND_EQUATION_ALPHA 0x0000134c +#define NVC0_3D_BLEND_EQUATION_ALPHA_FUNC_ADD 0x00008006 +#define NVC0_3D_BLEND_EQUATION_ALPHA_MIN 0x00008007 +#define NVC0_3D_BLEND_EQUATION_ALPHA_MAX 0x00008008 +#define NVC0_3D_BLEND_EQUATION_ALPHA_FUNC_SUBTRACT 0x0000800a +#define NVC0_3D_BLEND_EQUATION_ALPHA_FUNC_REVERSE_SUBTRACT 0x0000800b + +#define NVC0_3D_BLEND_FUNC_SRC_ALPHA 0x00001350 + +#define NVC0_3D_BLEND_FUNC_DST_ALPHA 0x00001358 + +#define NVC0_3D_BLEND_ENABLE(i0) (0x00001360 + 0x4*(i0)) +#define NVC0_3D_BLEND_ENABLE__ESIZE 0x00000004 +#define NVC0_3D_BLEND_ENABLE__LEN 0x00000008 + +#define NVC0_3D_STENCIL_FRONT_ENABLE 0x00001380 + +#define NVC0_3D_STENCIL_FRONT_OP_FAIL 0x00001384 +#define NVC0_3D_STENCIL_FRONT_OP_FAIL_ZERO 0x00000000 +#define NVC0_3D_STENCIL_FRONT_OP_FAIL_INVERT 0x0000150a +#define NVC0_3D_STENCIL_FRONT_OP_FAIL_KEEP 0x00001e00 +#define NVC0_3D_STENCIL_FRONT_OP_FAIL_REPLACE 0x00001e01 +#define NVC0_3D_STENCIL_FRONT_OP_FAIL_INCR 0x00001e02 +#define NVC0_3D_STENCIL_FRONT_OP_FAIL_DECR 0x00001e03 +#define NVC0_3D_STENCIL_FRONT_OP_FAIL_INCR_WRAP 0x00008507 +#define NVC0_3D_STENCIL_FRONT_OP_FAIL_DECR_WRAP 0x00008508 + +#define NVC0_3D_STENCIL_FRONT_OP_ZFAIL 0x00001388 +#define NVC0_3D_STENCIL_FRONT_OP_ZFAIL_ZERO 0x00000000 +#define NVC0_3D_STENCIL_FRONT_OP_ZFAIL_INVERT 0x0000150a +#define NVC0_3D_STENCIL_FRONT_OP_ZFAIL_KEEP 0x00001e00 +#define NVC0_3D_STENCIL_FRONT_OP_ZFAIL_REPLACE 0x00001e01 +#define NVC0_3D_STENCIL_FRONT_OP_ZFAIL_INCR 0x00001e02 +#define NVC0_3D_STENCIL_FRONT_OP_ZFAIL_DECR 0x00001e03 +#define NVC0_3D_STENCIL_FRONT_OP_ZFAIL_INCR_WRAP 0x00008507 +#define NVC0_3D_STENCIL_FRONT_OP_ZFAIL_DECR_WRAP 0x00008508 + +#define NVC0_3D_STENCIL_FRONT_OP_ZPASS 0x0000138c +#define NVC0_3D_STENCIL_FRONT_OP_ZPASS_ZERO 0x00000000 +#define NVC0_3D_STENCIL_FRONT_OP_ZPASS_INVERT 0x0000150a +#define NVC0_3D_STENCIL_FRONT_OP_ZPASS_KEEP 0x00001e00 +#define NVC0_3D_STENCIL_FRONT_OP_ZPASS_REPLACE 0x00001e01 +#define NVC0_3D_STENCIL_FRONT_OP_ZPASS_INCR 0x00001e02 +#define NVC0_3D_STENCIL_FRONT_OP_ZPASS_DECR 0x00001e03 +#define NVC0_3D_STENCIL_FRONT_OP_ZPASS_INCR_WRAP 0x00008507 +#define NVC0_3D_STENCIL_FRONT_OP_ZPASS_DECR_WRAP 0x00008508 + +#define NVC0_3D_STENCIL_FRONT_FUNC_FUNC 0x00001390 +#define NVC0_3D_STENCIL_FRONT_FUNC_FUNC_NEVER 0x00000200 +#define NVC0_3D_STENCIL_FRONT_FUNC_FUNC_LESS 0x00000201 +#define NVC0_3D_STENCIL_FRONT_FUNC_FUNC_EQUAL 0x00000202 +#define NVC0_3D_STENCIL_FRONT_FUNC_FUNC_LEQUAL 0x00000203 +#define NVC0_3D_STENCIL_FRONT_FUNC_FUNC_GREATER 0x00000204 +#define NVC0_3D_STENCIL_FRONT_FUNC_FUNC_NOTEQUAL 0x00000205 +#define NVC0_3D_STENCIL_FRONT_FUNC_FUNC_GEQUAL 0x00000206 +#define NVC0_3D_STENCIL_FRONT_FUNC_FUNC_ALWAYS 0x00000207 + +#define NVC0_3D_STENCIL_FRONT_FUNC_REF 0x00001394 + +#define NVC0_3D_STENCIL_FRONT_MASK 0x00001398 + +#define NVC0_3D_STENCIL_FRONT_FUNC_MASK 0x0000139c + +#define NVC0_3D_DRAW_TFB_BASE 0x000013a4 + +#define NVC0_3D_FRAG_COLOR_CLAMP_EN 0x000013a8 +#define NVC0_3D_FRAG_COLOR_CLAMP_EN_0 0x00000001 +#define NVC0_3D_FRAG_COLOR_CLAMP_EN_1 0x00000010 +#define NVC0_3D_FRAG_COLOR_CLAMP_EN_2 0x00000100 +#define NVC0_3D_FRAG_COLOR_CLAMP_EN_3 0x00001000 +#define NVC0_3D_FRAG_COLOR_CLAMP_EN_4 0x00010000 +#define NVC0_3D_FRAG_COLOR_CLAMP_EN_5 0x00100000 +#define NVC0_3D_FRAG_COLOR_CLAMP_EN_6 0x01000000 +#define NVC0_3D_FRAG_COLOR_CLAMP_EN_7 0x10000000 + +#define NVC0_3D_SCREEN_Y_CONTROL 0x000013ac +#define NVC0_3D_SCREEN_Y_CONTROL_Y_NEGATE 0x00000001 +#define NVC0_3D_SCREEN_Y_CONTROL_TRIANGLE_RAST_FLIP 0x00000010 + +#define NVC0_3D_LINE_WIDTH 0x000013b0 + +#define NVC0_3D_GP_VERTEX_OUTPUT_COUNT 0x00001420 +#define NVC0_3D_GP_VERTEX_OUTPUT_COUNT__MIN 0x00000001 +#define NVC0_3D_GP_VERTEX_OUTPUT_COUNT__MAX 0x00000400 + +#define NVC0_3D_VERTEX_ARRAY_FLUSH 0x0000142c + +#define NVC0_3D_VB_ELEMENT_BASE 0x00001434 + +#define NVC0_3D_VB_INSTANCE_BASE 0x00001438 + +#define NVC0_3D_CODE_CB_FLUSH 0x00001440 + +#define NVC0_3D_CLIPID_HEIGHT 0x00001504 +#define NVC0_3D_CLIPID_HEIGHT__MAX 0x00002000 + +#define NVC0_3D_VP_CLIP_DISTANCE_ENABLE 0x00001510 +#define NVC0_3D_VP_CLIP_DISTANCE_ENABLE_0 0x00000001 +#define NVC0_3D_VP_CLIP_DISTANCE_ENABLE_1 0x00000002 +#define NVC0_3D_VP_CLIP_DISTANCE_ENABLE_2 0x00000004 +#define NVC0_3D_VP_CLIP_DISTANCE_ENABLE_3 0x00000008 +#define NVC0_3D_VP_CLIP_DISTANCE_ENABLE_4 0x00000010 +#define NVC0_3D_VP_CLIP_DISTANCE_ENABLE_5 0x00000020 +#define NVC0_3D_VP_CLIP_DISTANCE_ENABLE_6 0x00000040 +#define NVC0_3D_VP_CLIP_DISTANCE_ENABLE_7 0x00000080 + +#define NVC0_3D_SAMPLECNT_ENABLE 0x00001514 + +#define NVC0_3D_POINT_SIZE 0x00001518 + +#define NVC0_3D_POINT_SPRITE_ENABLE 0x00001520 + +#define NVC0_3D_COUNTER_RESET 0x00001530 +#define NVC0_3D_COUNTER_RESET_SAMPLECNT 0x00000001 +#define NVC0_3D_COUNTER_RESET_UNK02 0x00000002 +#define NVC0_3D_COUNTER_RESET_UNK03 0x00000003 +#define NVC0_3D_COUNTER_RESET_UNK04 0x00000004 +#define NVC0_3D_COUNTER_RESET_EMITTED_PRIMITIVES 0x00000010 +#define NVC0_3D_COUNTER_RESET_UNK11 0x00000011 +#define NVC0_3D_COUNTER_RESET_UNK12 0x00000012 +#define NVC0_3D_COUNTER_RESET_UNK13 0x00000013 +#define NVC0_3D_COUNTER_RESET_UNK15 0x00000015 +#define NVC0_3D_COUNTER_RESET_UNK16 0x00000016 +#define NVC0_3D_COUNTER_RESET_UNK17 0x00000017 +#define NVC0_3D_COUNTER_RESET_UNK18 0x00000018 +#define NVC0_3D_COUNTER_RESET_UNK1A 0x0000001a +#define NVC0_3D_COUNTER_RESET_UNK1B 0x0000001b +#define NVC0_3D_COUNTER_RESET_UNK1C 0x0000001c +#define NVC0_3D_COUNTER_RESET_UNK1D 0x0000001d +#define NVC0_3D_COUNTER_RESET_UNK1E 0x0000001e +#define NVC0_3D_COUNTER_RESET_GENERATED_PRIMITIVES 0x0000001f + +#define NVC0_3D_MULTISAMPLE_ENABLE 0x00001534 + +#define NVC0_3D_ZETA_ENABLE 0x00001538 + +#define NVC0_3D_MULTISAMPLE_CTRL 0x0000153c +#define NVC0_3D_MULTISAMPLE_CTRL_ALPHA_TO_COVERAGE 0x00000001 +#define NVC0_3D_MULTISAMPLE_CTRL_ALPHA_TO_ONE 0x00000010 + +#define NVC0_3D_COND_ADDRESS_HIGH 0x00001550 + +#define NVC0_3D_COND_ADDRESS_LOW 0x00001554 + +#define NVC0_3D_COND_MODE 0x00001558 +#define NVC0_3D_COND_MODE_NEVER 0x00000000 +#define NVC0_3D_COND_MODE_ALWAYS 0x00000001 +#define NVC0_3D_COND_MODE_RES_NON_ZERO 0x00000002 +#define NVC0_3D_COND_MODE_EQUAL 0x00000003 +#define NVC0_3D_COND_MODE_NOT_EQUAL 0x00000004 + +#define NVC0_3D_TSC_ADDRESS_HIGH 0x0000155c + +#define NVC0_3D_TSC_ADDRESS_LOW 0x00001560 +#define NVC0_3D_TSC_ADDRESS_LOW__ALIGN 0x00000020 + +#define NVC0_3D_TSC_LIMIT 0x00001564 +#define NVC0_3D_TSC_LIMIT__MAX 0x00001fff + +#define NVC0_3D_POLYGON_OFFSET_FACTOR 0x0000156c + +#define NVC0_3D_LINE_SMOOTH_ENABLE 0x00001570 + +#define NVC0_3D_TIC_ADDRESS_HIGH 0x00001574 + +#define NVC0_3D_TIC_ADDRESS_LOW 0x00001578 + +#define NVC0_3D_TIC_LIMIT 0x0000157c + +#define NVC0_3D_STENCIL_TWO_SIDE_ENABLE 0x00001594 + +#define NVC0_3D_STENCIL_BACK_OP_FAIL 0x00001598 +#define NVC0_3D_STENCIL_BACK_OP_FAIL_ZERO 0x00000000 +#define NVC0_3D_STENCIL_BACK_OP_FAIL_INVERT 0x0000150a +#define NVC0_3D_STENCIL_BACK_OP_FAIL_KEEP 0x00001e00 +#define NVC0_3D_STENCIL_BACK_OP_FAIL_REPLACE 0x00001e01 +#define NVC0_3D_STENCIL_BACK_OP_FAIL_INCR 0x00001e02 +#define NVC0_3D_STENCIL_BACK_OP_FAIL_DECR 0x00001e03 +#define NVC0_3D_STENCIL_BACK_OP_FAIL_INCR_WRAP 0x00008507 +#define NVC0_3D_STENCIL_BACK_OP_FAIL_DECR_WRAP 0x00008508 + +#define NVC0_3D_STENCIL_BACK_OP_ZFAIL 0x0000159c +#define NVC0_3D_STENCIL_BACK_OP_ZFAIL_ZERO 0x00000000 +#define NVC0_3D_STENCIL_BACK_OP_ZFAIL_INVERT 0x0000150a +#define NVC0_3D_STENCIL_BACK_OP_ZFAIL_KEEP 0x00001e00 +#define NVC0_3D_STENCIL_BACK_OP_ZFAIL_REPLACE 0x00001e01 +#define NVC0_3D_STENCIL_BACK_OP_ZFAIL_INCR 0x00001e02 +#define NVC0_3D_STENCIL_BACK_OP_ZFAIL_DECR 0x00001e03 +#define NVC0_3D_STENCIL_BACK_OP_ZFAIL_INCR_WRAP 0x00008507 +#define NVC0_3D_STENCIL_BACK_OP_ZFAIL_DECR_WRAP 0x00008508 + +#define NVC0_3D_STENCIL_BACK_OP_ZPASS 0x000015a0 +#define NVC0_3D_STENCIL_BACK_OP_ZPASS_ZERO 0x00000000 +#define NVC0_3D_STENCIL_BACK_OP_ZPASS_INVERT 0x0000150a +#define NVC0_3D_STENCIL_BACK_OP_ZPASS_KEEP 0x00001e00 +#define NVC0_3D_STENCIL_BACK_OP_ZPASS_REPLACE 0x00001e01 +#define NVC0_3D_STENCIL_BACK_OP_ZPASS_INCR 0x00001e02 +#define NVC0_3D_STENCIL_BACK_OP_ZPASS_DECR 0x00001e03 +#define NVC0_3D_STENCIL_BACK_OP_ZPASS_INCR_WRAP 0x00008507 +#define NVC0_3D_STENCIL_BACK_OP_ZPASS_DECR_WRAP 0x00008508 + +#define NVC0_3D_STENCIL_BACK_FUNC_FUNC 0x000015a4 +#define NVC0_3D_STENCIL_BACK_FUNC_FUNC_NEVER 0x00000200 +#define NVC0_3D_STENCIL_BACK_FUNC_FUNC_LESS 0x00000201 +#define NVC0_3D_STENCIL_BACK_FUNC_FUNC_EQUAL 0x00000202 +#define NVC0_3D_STENCIL_BACK_FUNC_FUNC_LEQUAL 0x00000203 +#define NVC0_3D_STENCIL_BACK_FUNC_FUNC_GREATER 0x00000204 +#define NVC0_3D_STENCIL_BACK_FUNC_FUNC_NOTEQUAL 0x00000205 +#define NVC0_3D_STENCIL_BACK_FUNC_FUNC_GEQUAL 0x00000206 +#define NVC0_3D_STENCIL_BACK_FUNC_FUNC_ALWAYS 0x00000207 + +#define NVC0_3D_CSAA_ENABLE 0x000015b4 + +#define NVC0_3D_FRAMEBUFFER_SRGB 0x000015b8 + +#define NVC0_3D_POLYGON_OFFSET_UNITS 0x000015bc + +#define NVC0_3D_GP_BUILTIN_RESULT_EN 0x000015cc +#define NVC0_3D_GP_BUILTIN_RESULT_EN_LAYER 0x00010000 + +#define NVC0_3D_MULTISAMPLE_MODE 0x000015d0 +#define NVC0_3D_MULTISAMPLE_MODE_1X 0x00000000 +#define NVC0_3D_MULTISAMPLE_MODE_2XMS 0x00000001 +#define NVC0_3D_MULTISAMPLE_MODE_4XMS 0x00000002 +#define NVC0_3D_MULTISAMPLE_MODE_8XMS 0x00000003 +#define NVC0_3D_MULTISAMPLE_MODE_4XMS_4XCS 0x00000008 +#define NVC0_3D_MULTISAMPLE_MODE_4XMS_12XCS 0x00000009 +#define NVC0_3D_MULTISAMPLE_MODE_8XMS_8XCS 0x0000000a + +#define NVC0_3D_VERTEX_BEGIN_D3D 0x000015d4 +#define NVC0_3D_VERTEX_BEGIN_D3D_PRIMITIVE__MASK 0x0fffffff +#define NVC0_3D_VERTEX_BEGIN_D3D_PRIMITIVE__SHIFT 0 +#define NVC0_3D_VERTEX_BEGIN_D3D_PRIMITIVE_POINTS 0x00000001 +#define NVC0_3D_VERTEX_BEGIN_D3D_PRIMITIVE_LINES 0x00000002 +#define NVC0_3D_VERTEX_BEGIN_D3D_PRIMITIVE_LINE_STRIP 0x00000003 +#define NVC0_3D_VERTEX_BEGIN_D3D_PRIMITIVE_TRIANGLES 0x00000004 +#define NVC0_3D_VERTEX_BEGIN_D3D_PRIMITIVE_TRIANGLE_STRIP 0x00000005 +#define NVC0_3D_VERTEX_BEGIN_D3D_PRIMITIVE_LINES_ADJACENCY 0x0000000a +#define NVC0_3D_VERTEX_BEGIN_D3D_PRIMITIVE_LINE_STRIP_ADJACENCY 0x0000000b +#define NVC0_3D_VERTEX_BEGIN_D3D_PRIMITIVE_TRIANGLES_ADJACENCY 0x0000000c +#define NVC0_3D_VERTEX_BEGIN_D3D_PRIMITIVE_TRIANGLE_STRIP_ADJACENCY 0x0000000d +#define NVC0_3D_VERTEX_BEGIN_D3D_INSTANCE_NEXT 0x10000000 + +#define NVC0_3D_VERTEX_END_D3D 0x000015d8 +#define NVC0_3D_VERTEX_END_D3D_UNK0 0x00000001 +#define NVC0_3D_VERTEX_END_D3D_UNK1 0x00000002 + +#define NVC0_3D_EDGEFLAG_ENABLE 0x000015e4 + +#define NVC0_3D_VB_ELEMENT_U32 0x000015e8 + +#define NVC0_3D_VB_ELEMENT_U16_SETUP 0x000015ec +#define NVC0_3D_VB_ELEMENT_U16_SETUP_OFFSET__MASK 0xc0000000 +#define NVC0_3D_VB_ELEMENT_U16_SETUP_OFFSET__SHIFT 30 +#define NVC0_3D_VB_ELEMENT_U16_SETUP_COUNT__MASK 0x3fffffff +#define NVC0_3D_VB_ELEMENT_U16_SETUP_COUNT__SHIFT 0 + +#define NVC0_3D_VB_ELEMENT_U16 0x000015f0 +#define NVC0_3D_VB_ELEMENT_U16_I0__MASK 0x0000ffff +#define NVC0_3D_VB_ELEMENT_U16_I0__SHIFT 0 +#define NVC0_3D_VB_ELEMENT_U16_I1__MASK 0xffff0000 +#define NVC0_3D_VB_ELEMENT_U16_I1__SHIFT 16 + +#define NVC0_3D_VERTEX_BASE_HIGH 0x000015f4 + +#define NVC0_3D_VERTEX_BASE_LOW 0x000015f8 + +#define NVC0_3D_POINT_COORD_REPLACE 0x00001604 +#define NVC0_3D_POINT_COORD_REPLACE_BITS__MASK 0x00001fff +#define NVC0_3D_POINT_COORD_REPLACE_BITS__SHIFT 0 + +#define NVC0_3D_CODE_ADDRESS_HIGH 0x00001608 + +#define NVC0_3D_CODE_ADDRESS_LOW 0x0000160c + +#define NVC0_3D_VERTEX_END_GL 0x00001614 +#define NVC0_3D_VERTEX_END_GL_UNK0 0x00000001 +#define NVC0_3D_VERTEX_END_GL_UNK1 0x00000002 + +#define NVC0_3D_VERTEX_BEGIN_GL 0x00001618 +#define NVC0_3D_VERTEX_BEGIN_GL_PRIMITIVE__MASK 0x0fffffff +#define NVC0_3D_VERTEX_BEGIN_GL_PRIMITIVE__SHIFT 0 +#define NVC0_3D_VERTEX_BEGIN_GL_PRIMITIVE_POINTS 0x00000000 +#define NVC0_3D_VERTEX_BEGIN_GL_PRIMITIVE_LINES 0x00000001 +#define NVC0_3D_VERTEX_BEGIN_GL_PRIMITIVE_LINE_LOOP 0x00000002 +#define NVC0_3D_VERTEX_BEGIN_GL_PRIMITIVE_LINE_STRIP 0x00000003 +#define NVC0_3D_VERTEX_BEGIN_GL_PRIMITIVE_TRIANGLES 0x00000004 +#define NVC0_3D_VERTEX_BEGIN_GL_PRIMITIVE_TRIANGLE_STRIP 0x00000005 +#define NVC0_3D_VERTEX_BEGIN_GL_PRIMITIVE_TRIANGLE_FAN 0x00000006 +#define NVC0_3D_VERTEX_BEGIN_GL_PRIMITIVE_QUADS 0x00000007 +#define NVC0_3D_VERTEX_BEGIN_GL_PRIMITIVE_QUAD_STRIP 0x00000008 +#define NVC0_3D_VERTEX_BEGIN_GL_PRIMITIVE_POLYGON 0x00000009 +#define NVC0_3D_VERTEX_BEGIN_GL_PRIMITIVE_LINES_ADJACENCY 0x0000000a +#define NVC0_3D_VERTEX_BEGIN_GL_PRIMITIVE_LINE_STRIP_ADJACENCY 0x0000000b +#define NVC0_3D_VERTEX_BEGIN_GL_PRIMITIVE_TRIANGLES_ADJACENCY 0x0000000c +#define NVC0_3D_VERTEX_BEGIN_GL_PRIMITIVE_TRIANGLE_STRIP_ADJACENCY 0x0000000d +#define NVC0_3D_VERTEX_BEGIN_GL_PRIMITIVE_PATCHES 0x0000000e +#define NVC0_3D_VERTEX_BEGIN_GL_INSTANCE_NEXT 0x10000000 + +#define NVC0_3D_VERTEX_DATA 0x00001640 + +#define NVC0_3D_PRIM_RESTART_ENABLE 0x00001644 + +#define NVC0_3D_PRIM_RESTART_INDEX 0x00001648 + +#define NVC0_3D_VP_GP_BUILTIN_ATTR_EN 0x0000164c +#define NVC0_3D_VP_GP_BUILTIN_ATTR_EN_VERTEX_ID 0x00000001 +#define NVC0_3D_VP_GP_BUILTIN_ATTR_EN_INSTANCE_ID 0x00000010 +#define NVC0_3D_VP_GP_BUILTIN_ATTR_EN_PRIMITIVE_ID 0x00000100 +#define NVC0_3D_VP_GP_BUILTIN_ATTR_EN_UNK12 0x00001000 + +#define NVC0_3D_POINT_SMOOTH_ENABLE 0x00001658 + +#define NVC0_3D_POINT_RASTER_RULES 0x0000165c +#define NVC0_3D_POINT_RASTER_RULES_OGL 0x00000000 +#define NVC0_3D_POINT_RASTER_RULES_D3D 0x00000001 + +#define NVC0_3D_POINT_SPRITE_CTRL 0x00001660 + +#define NVC0_3D_TEX_MISC 0x00001664 +#define NVC0_3D_TEX_MISC_SEAMLESS_CUBE_MAP 0x00000004 + +#define NVC0_3D_LINE_STIPPLE_ENABLE 0x0000166c + +#define NVC0_3D_LINE_STIPPLE_PATTERN 0x00001680 + +#define NVC0_3D_PROVOKING_VERTEX_LAST 0x00001684 + +#define NVC0_3D_VERTEX_TWO_SIDE_ENABLE 0x00001688 + +#define NVC0_3D_POLYGON_STIPPLE_ENABLE 0x0000168c + +#define NVC0_3D_POLYGON_STIPPLE_PATTERN(i0) (0x00001700 + 0x4*(i0)) +#define NVC0_3D_POLYGON_STIPPLE_PATTERN__ESIZE 0x00000004 +#define NVC0_3D_POLYGON_STIPPLE_PATTERN__LEN 0x00000020 + +#define NVC0_3D_STRMOUT_UNK1780(i0) (0x00001780 + 0x4*(i0)) +#define NVC0_3D_STRMOUT_UNK1780__ESIZE 0x00000004 +#define NVC0_3D_STRMOUT_UNK1780__LEN 0x00000004 + +#define NVC0_3D_UNK17BC_ADDRESS_HIGH 0x000017bc + +#define NVC0_3D_UNK17BC_ADDRESS_LOW 0x000017c0 + +#define NVC0_3D_UNK17BC_LIMIT 0x000017c4 + +#define NVC0_3D_INDEX_ARRAY_START_HIGH 0x000017c8 + +#define NVC0_3D_INDEX_ARRAY_START_LOW 0x000017cc + +#define NVC0_3D_INDEX_ARRAY_LIMIT_HIGH 0x000017d0 + +#define NVC0_3D_INDEX_ARRAY_LIMIT_LOW 0x000017d4 + +#define NVC0_3D_INDEX_LOG2_SIZE 0x000017d8 + +#define NVC0_3D_INDEX_BATCH_FIRST 0x000017dc + +#define NVC0_3D_INDEX_BATCH_COUNT 0x000017e0 + +#define NVC0_3D_VERTEX_ARRAY_PER_INSTANCE(i0) (0x00001880 + 0x4*(i0)) +#define NVC0_3D_VERTEX_ARRAY_PER_INSTANCE__ESIZE 0x00000004 +#define NVC0_3D_VERTEX_ARRAY_PER_INSTANCE__LEN 0x00000020 + +#define NVC0_3D_VP_POINT_SIZE_EN 0x00001910 + +#define NVC0_3D_CULL_FACE_ENABLE 0x00001918 + +#define NVC0_3D_FRONT_FACE 0x0000191c +#define NVC0_3D_FRONT_FACE_CW 0x00000900 +#define NVC0_3D_FRONT_FACE_CCW 0x00000901 + +#define NVC0_3D_CULL_FACE 0x00001920 +#define NVC0_3D_CULL_FACE_FRONT 0x00000404 +#define NVC0_3D_CULL_FACE_BACK 0x00000405 +#define NVC0_3D_CULL_FACE_FRONT_AND_BACK 0x00000408 + +#define NVC0_3D_VIEWPORT_TRANSFORM_EN 0x0000192c + +#define NVC0_3D_VIEW_VOLUME_CLIP_CTRL 0x0000193c +#define NVC0_3D_VIEW_VOLUME_CLIP_CTRL_UNK0 0x00000001 +#define NVC0_3D_VIEW_VOLUME_CLIP_CTRL_UNK1 0x00000002 +#define NVC0_3D_VIEW_VOLUME_CLIP_CTRL_UNK2 0x00000004 +#define NVC0_3D_VIEW_VOLUME_CLIP_CTRL_UNK3 0x00000008 +#define NVC0_3D_VIEW_VOLUME_CLIP_CTRL_UNK4 0x00000010 +#define NVC0_3D_VIEW_VOLUME_CLIP_CTRL_UNK7 0x00000080 +#define NVC0_3D_VIEW_VOLUME_CLIP_CTRL_UNK10 0x00000400 +#define NVC0_3D_VIEW_VOLUME_CLIP_CTRL_UNK11 0x00000800 +#define NVC0_3D_VIEW_VOLUME_CLIP_CTRL_UNK12 0x00001000 +#define NVC0_3D_VIEW_VOLUME_CLIP_CTRL_UNK13 0x00002000 + +#define NVC0_3D_VIEWPORT_CLIP_RECTS_EN 0x0000194c + +#define NVC0_3D_VIEWPORT_CLIP_MODE 0x00001950 +#define NVC0_3D_VIEWPORT_CLIP_MODE_INSIDE_ANY 0x00000000 +#define NVC0_3D_VIEWPORT_CLIP_MODE_OUTSIDE_ALL 0x00000001 +#define NVC0_3D_VIEWPORT_CLIP_MODE_NEVER 0x00000002 + +#define NVC0_3D_FP_ZORDER_CTRL 0x0000196c +#define NVC0_3D_FP_ZORDER_CTRL_0 0x00000001 +#define NVC0_3D_FP_ZORDER_CTRL_1 0x00000010 + +#define NVC0_3D_CLIPID_ENABLE 0x0000197c + +#define NVC0_3D_CLIPID_WIDTH 0x00001980 +#define NVC0_3D_CLIPID_WIDTH__MAX 0x00002000 +#define NVC0_3D_CLIPID_WIDTH__ALIGN 0x00000040 + +#define NVC0_3D_CLIPID_ID 0x00001984 + +#define NVC0_3D_FP_CONTROL 0x000019a8 +#define NVC0_3D_FP_CONTROL_MULTIPLE_RESULTS 0x00000001 +#define NVC0_3D_FP_CONTROL_EXPORTS_Z 0x00000100 +#define NVC0_3D_FP_CONTROL_USES_KIL 0x00100000 + +#define NVC0_3D_DEPTH_BOUNDS_EN 0x000019bc + +#define NVC0_3D_LOGIC_OP_ENABLE 0x000019c4 + +#define NVC0_3D_LOGIC_OP 0x000019c8 +#define NVC0_3D_LOGIC_OP_CLEAR 0x00001500 +#define NVC0_3D_LOGIC_OP_AND 0x00001501 +#define NVC0_3D_LOGIC_OP_AND_REVERSE 0x00001502 +#define NVC0_3D_LOGIC_OP_COPY 0x00001503 +#define NVC0_3D_LOGIC_OP_AND_INVERTED 0x00001504 +#define NVC0_3D_LOGIC_OP_NOOP 0x00001505 +#define NVC0_3D_LOGIC_OP_XOR 0x00001506 +#define NVC0_3D_LOGIC_OP_OR 0x00001507 +#define NVC0_3D_LOGIC_OP_NOR 0x00001508 +#define NVC0_3D_LOGIC_OP_EQUIV 0x00001509 +#define NVC0_3D_LOGIC_OP_INVERT 0x0000150a +#define NVC0_3D_LOGIC_OP_OR_REVERSE 0x0000150b +#define NVC0_3D_LOGIC_OP_COPY_INVERTED 0x0000150c +#define NVC0_3D_LOGIC_OP_OR_INVERTED 0x0000150d +#define NVC0_3D_LOGIC_OP_NAND 0x0000150e +#define NVC0_3D_LOGIC_OP_SET 0x0000150f + +#define NVC0_3D_CLEAR_BUFFERS 0x000019d0 +#define NVC0_3D_CLEAR_BUFFERS_Z 0x00000001 +#define NVC0_3D_CLEAR_BUFFERS_S 0x00000002 +#define NVC0_3D_CLEAR_BUFFERS_R 0x00000004 +#define NVC0_3D_CLEAR_BUFFERS_G 0x00000008 +#define NVC0_3D_CLEAR_BUFFERS_B 0x00000010 +#define NVC0_3D_CLEAR_BUFFERS_A 0x00000020 +#define NVC0_3D_CLEAR_BUFFERS_RT__MASK 0x000003c0 +#define NVC0_3D_CLEAR_BUFFERS_RT__SHIFT 6 +#define NVC0_3D_CLEAR_BUFFERS_LAYER__MASK 0x001ffc00 +#define NVC0_3D_CLEAR_BUFFERS_LAYER__SHIFT 10 + +#define NVC0_3D_COLOR_MASK(i0) (0x00001a00 + 0x4*(i0)) +#define NVC0_3D_COLOR_MASK__ESIZE 0x00000004 +#define NVC0_3D_COLOR_MASK__LEN 0x00000008 +#define NVC0_3D_COLOR_MASK_R 0x0000000f +#define NVC0_3D_COLOR_MASK_G 0x000000f0 +#define NVC0_3D_COLOR_MASK_B 0x00000f00 +#define NVC0_3D_COLOR_MASK_A 0x0000f000 + +#define NVC0_3D_QUERY_ADDRESS_HIGH 0x00001b00 + +#define NVC0_3D_QUERY_ADDRESS_LOW 0x00001b04 + +#define NVC0_3D_QUERY_SEQUENCE 0x00001b08 + +#define NVC0_3D_QUERY_GET 0x00001b0c +#define NVC0_3D_QUERY_GET_MODE__MASK 0x00000003 +#define NVC0_3D_QUERY_GET_MODE__SHIFT 0 +#define NVC0_3D_QUERY_GET_MODE_WRITE_UNK0 0x00000000 +#define NVC0_3D_QUERY_GET_MODE_SYNC 0x00000001 +#define NVC0_3D_QUERY_GET_MODE_WRITE_UNK2 0x00000002 +#define NVC0_3D_QUERY_GET_FENCE 0x00000010 +#define NVC0_3D_QUERY_GET_STREAM__MASK 0x000000e0 +#define NVC0_3D_QUERY_GET_STREAM__SHIFT 5 +#define NVC0_3D_QUERY_GET_UNK8 0x00000100 +#define NVC0_3D_QUERY_GET_UNIT__MASK 0x0000f000 +#define NVC0_3D_QUERY_GET_UNIT__SHIFT 12 +#define NVC0_3D_QUERY_GET_SYNC_COND__MASK 0x00010000 +#define NVC0_3D_QUERY_GET_SYNC_COND__SHIFT 16 +#define NVC0_3D_QUERY_GET_SYNC_COND_NEQUAL 0x00000000 +#define NVC0_3D_QUERY_GET_SYNC_COND_GREATER 0x00010000 +#define NVC0_3D_QUERY_GET_INTR 0x00100000 +#define NVC0_3D_QUERY_GET_UNK21 0x00200000 +#define NVC0_3D_QUERY_GET_SELECT__MASK 0x0f800000 +#define NVC0_3D_QUERY_GET_SELECT__SHIFT 23 +#define NVC0_3D_QUERY_GET_SELECT_ZERO 0x00000000 +#define NVC0_3D_QUERY_GET_SELECT_SAMPLECNT 0x01000000 +#define NVC0_3D_QUERY_GET_SELECT_EMITTED_PRIMS 0x05800000 +#define NVC0_3D_QUERY_GET_SELECT_GENERATED_PRIMS 0x09000000 +#define NVC0_3D_QUERY_GET_SHORT 0x10000000 + +#define NVC0_3D_VERTEX_ARRAY_FETCH(i0) (0x00001c00 + 0x10*(i0)) +#define NVC0_3D_VERTEX_ARRAY_FETCH__ESIZE 0x00000010 +#define NVC0_3D_VERTEX_ARRAY_FETCH__LEN 0x00000020 +#define NVC0_3D_VERTEX_ARRAY_FETCH_STRIDE__MASK 0x00000fff +#define NVC0_3D_VERTEX_ARRAY_FETCH_STRIDE__SHIFT 0 +#define NVC0_3D_VERTEX_ARRAY_FETCH_ENABLE 0x00001000 + +#define NVC0_3D_VERTEX_ARRAY_START_HIGH(i0) (0x00001c04 + 0x10*(i0)) +#define NVC0_3D_VERTEX_ARRAY_START_HIGH__ESIZE 0x00000010 +#define NVC0_3D_VERTEX_ARRAY_START_HIGH__LEN 0x00000020 + +#define NVC0_3D_VERTEX_ARRAY_START_LOW(i0) (0x00001c08 + 0x10*(i0)) +#define NVC0_3D_VERTEX_ARRAY_START_LOW__ESIZE 0x00000010 +#define NVC0_3D_VERTEX_ARRAY_START_LOW__LEN 0x00000020 + +#define NVC0_3D_VERTEX_ARRAY_DIVISOR(i0) (0x00001c0c + 0x10*(i0)) +#define NVC0_3D_VERTEX_ARRAY_DIVISOR__ESIZE 0x00000010 +#define NVC0_3D_VERTEX_ARRAY_DIVISOR__LEN 0x00000020 + +#define NVC0_3D_IBLEND(i0) (0x00001e00 + 0x20*(i0)) +#define NVC0_3D_IBLEND__ESIZE 0x00000020 +#define NVC0_3D_IBLEND__LEN 0x00000008 + +#define NVC0_3D_IBLEND_EQUATION_RGB(i0) (0x00001e04 + 0x20*(i0)) +#define NVC0_3D_IBLEND_EQUATION_RGB_FUNC_ADD 0x00008006 +#define NVC0_3D_IBLEND_EQUATION_RGB_MIN 0x00008007 +#define NVC0_3D_IBLEND_EQUATION_RGB_MAX 0x00008008 +#define NVC0_3D_IBLEND_EQUATION_RGB_FUNC_SUBTRACT 0x0000800a +#define NVC0_3D_IBLEND_EQUATION_RGB_FUNC_REVERSE_SUBTRACT 0x0000800b + +#define NVC0_3D_IBLEND_FUNC_SRC_RGB(i0) (0x00001e08 + 0x20*(i0)) + +#define NVC0_3D_IBLEND_FUNC_DST_RGB(i0) (0x00001e0c + 0x20*(i0)) + +#define NVC0_3D_IBLEND_EQUATION_ALPHA(i0) (0x00001e10 + 0x20*(i0)) +#define NVC0_3D_IBLEND_EQUATION_ALPHA_FUNC_ADD 0x00008006 +#define NVC0_3D_IBLEND_EQUATION_ALPHA_MIN 0x00008007 +#define NVC0_3D_IBLEND_EQUATION_ALPHA_MAX 0x00008008 +#define NVC0_3D_IBLEND_EQUATION_ALPHA_FUNC_SUBTRACT 0x0000800a +#define NVC0_3D_IBLEND_EQUATION_ALPHA_FUNC_REVERSE_SUBTRACT 0x0000800b + +#define NVC0_3D_IBLEND_FUNC_SRC_ALPHA(i0) (0x00001e14 + 0x20*(i0)) + +#define NVC0_3D_IBLEND_FUNC_DST_ALPHA(i0) (0x00001e18 + 0x20*(i0)) + +#define NVC0_3D_VERTEX_ARRAY_LIMIT_HIGH(i0) (0x00001f00 + 0x8*(i0)) +#define NVC0_3D_VERTEX_ARRAY_LIMIT_HIGH__ESIZE 0x00000008 +#define NVC0_3D_VERTEX_ARRAY_LIMIT_HIGH__LEN 0x00000020 + +#define NVC0_3D_VERTEX_ARRAY_LIMIT_LOW(i0) (0x00001f04 + 0x8*(i0)) +#define NVC0_3D_VERTEX_ARRAY_LIMIT_LOW__ESIZE 0x00000008 +#define NVC0_3D_VERTEX_ARRAY_LIMIT_LOW__LEN 0x00000020 + +#define NVC0_3D_SP(i0) (0x00002000 + 0x40*(i0)) +#define NVC0_3D_SP__ESIZE 0x00000040 +#define NVC0_3D_SP__LEN 0x00000006 + +#define NVC0_3D_SP_SELECT(i0) (0x00002000 + 0x40*(i0)) +#define NVC0_3D_SP_SELECT_ENABLE 0x00000001 +#define NVC0_3D_SP_SELECT_PROGRAM__MASK 0x00000070 +#define NVC0_3D_SP_SELECT_PROGRAM__SHIFT 4 +#define NVC0_3D_SP_SELECT_PROGRAM_VP_A 0x00000000 +#define NVC0_3D_SP_SELECT_PROGRAM_VP_B 0x00000010 +#define NVC0_3D_SP_SELECT_PROGRAM_TCP 0x00000020 +#define NVC0_3D_SP_SELECT_PROGRAM_TEP 0x00000030 +#define NVC0_3D_SP_SELECT_PROGRAM_GP 0x00000040 +#define NVC0_3D_SP_SELECT_PROGRAM_FP 0x00000050 + +#define NVC0_3D_SP_START_ID(i0) (0x00002004 + 0x40*(i0)) + +#define NVC0_3D_SP_GPR_ALLOC(i0) (0x0000200c + 0x40*(i0)) + +#define NVC0_3D_TEX_LIMITS(i0) (0x00002200 + 0x10*(i0)) +#define NVC0_3D_TEX_LIMITS__ESIZE 0x00000010 +#define NVC0_3D_TEX_LIMITS__LEN 0x00000005 + +#define NVC0_3D_FIRMWARE(i0) (0x00002300 + 0x4*(i0)) +#define NVC0_3D_FIRMWARE__ESIZE 0x00000004 +#define NVC0_3D_FIRMWARE__LEN 0x00000020 + +#define NVC0_3D_CB_SIZE 0x00002380 + +#define NVC0_3D_CB_ADDRESS_HIGH 0x00002384 + +#define NVC0_3D_CB_ADDRESS_LOW 0x00002388 + +#define NVC0_3D_CB_POS 0x0000238c + +#define NVC0_3D_CB_DATA(i0) (0x00002390 + 0x4*(i0)) +#define NVC0_3D_CB_DATA__ESIZE 0x00000004 +#define NVC0_3D_CB_DATA__LEN 0x00000010 + +#define NVC0_3D_BIND_TSC(i0) (0x00002400 + 0x20*(i0)) +#define NVC0_3D_BIND_TSC__ESIZE 0x00000020 +#define NVC0_3D_BIND_TSC__LEN 0x00000005 +#define NVC0_3D_BIND_TSC_ACTIVE 0x00000001 +#define NVC0_3D_BIND_TSC_SAMPLER__MASK 0x00000ff0 +#define NVC0_3D_BIND_TSC_SAMPLER__SHIFT 4 +#define NVC0_3D_BIND_TSC_TSC__MASK 0x01fff000 +#define NVC0_3D_BIND_TSC_TSC__SHIFT 12 + +#define NVC0_3D_BIND_TIC(i0) (0x00002404 + 0x20*(i0)) +#define NVC0_3D_BIND_TIC__ESIZE 0x00000020 +#define NVC0_3D_BIND_TIC__LEN 0x00000005 +#define NVC0_3D_BIND_TIC_ACTIVE 0x00000001 +#define NVC0_3D_BIND_TIC_TEXTURE__MASK 0x000001fe +#define NVC0_3D_BIND_TIC_TEXTURE__SHIFT 1 +#define NVC0_3D_BIND_TIC_TIC__MASK 0x7ffffe00 +#define NVC0_3D_BIND_TIC_TIC__SHIFT 9 + +#define NVC0_3D_CB_BIND(i0) (0x00002410 + 0x20*(i0)) +#define NVC0_3D_CB_BIND__ESIZE 0x00000020 +#define NVC0_3D_CB_BIND__LEN 0x00000005 +#define NVC0_3D_CB_BIND_VALID 0x00000001 +#define NVC0_3D_CB_BIND_INDEX__MASK 0x000000f0 +#define NVC0_3D_CB_BIND_INDEX__SHIFT 4 + +#define NVC0_3D_VERT_COLOR_CLAMP_EN 0x00002600 + +#define NVC0_3D_TFB_VARYING_LOCS(i0) (0x00002800 + 0x4*(i0)) +#define NVC0_3D_TFB_VARYING_LOCS__ESIZE 0x00000004 +#define NVC0_3D_TFB_VARYING_LOCS__LEN 0x00000080 + +#define NVC0_3D_COLOR_MASK_BROADCAST 0x00003808 + +#define NVC0_3D_VERTEX_ARRAY_SELECT 0x00003820 + +#define NVC0_3D_BLEND_ENABLES 0x00003858 + +#define NVC0_3D_POLYGON_MODE_FRONT 0x00003868 +#define NVC0_3D_POLYGON_MODE_FRONT_POINT 0x00001b00 +#define NVC0_3D_POLYGON_MODE_FRONT_LINE 0x00001b01 +#define NVC0_3D_POLYGON_MODE_FRONT_FILL 0x00001b02 + +#define NVC0_3D_POLYGON_MODE_BACK 0x00003870 +#define NVC0_3D_POLYGON_MODE_BACK_POINT 0x00001b00 +#define NVC0_3D_POLYGON_MODE_BACK_LINE 0x00001b01 +#define NVC0_3D_POLYGON_MODE_BACK_FILL 0x00001b02 + +#define NVC0_3D_GP_SELECT 0x00003878 + +#define NVC0_3D_TEP_SELECT 0x00003880 + + +#endif /* NVC0_3D_XML */ diff --git a/src/gallium/drivers/nvc0/nvc0_3ddefs.xml.h b/src/gallium/drivers/nvc0/nvc0_3ddefs.xml.h new file mode 100644 index 00000000000..84b152213a2 --- /dev/null +++ b/src/gallium/drivers/nvc0/nvc0_3ddefs.xml.h @@ -0,0 +1,98 @@ +#ifndef NV_3DDEFS_XML +#define NV_3DDEFS_XML + +/* Autogenerated file, DO NOT EDIT manually! + +This file was generated by the rules-ng-ng headergen tool in this git repository: +http://0x04.net/cgit/index.cgi/rules-ng-ng +git clone git://0x04.net/rules-ng-ng + +The rules-ng-ng source files this header was generated from are: +- nvc0_3d.xml ( 26312 bytes, from 2010-10-08 10:10:01) +- copyright.xml ( 6498 bytes, from 2010-10-03 13:18:37) +- nv_defs.xml ( 4437 bytes, from 2010-07-06 07:43:58) +- nv_3ddefs.xml ( 16397 bytes, from 2010-10-08 13:30:38) +- nv_object.xml ( 11249 bytes, from 2010-10-07 15:31:28) +- nvchipsets.xml ( 2824 bytes, from 2010-07-07 13:41:20) +- nv50_defs.xml ( 4482 bytes, from 2010-10-03 13:18:37) + +Copyright (C) 2006-2010 by the following authors: +- Artur Huillet <[email protected]> (ahuillet) +- Ben Skeggs (darktama, darktama_) +- B. R. <[email protected]> (koala_br) +- Carlos Martin <[email protected]> (carlosmn) +- Christoph Bumiller <[email protected]> (calim, chrisbmr) +- Dawid Gajownik <[email protected]> (gajownik) +- Dmitry Baryshkov +- Dmitry Eremin-Solenikov <[email protected]> (lumag) +- EdB <[email protected]> (edb_) +- Erik Waling <[email protected]> (erikwaling) +- Francisco Jerez <[email protected]> (curro, curro_, currojerez) +- imirkin <[email protected]> (imirkin) +- jb17bsome <[email protected]> (jb17bsome) +- Jeremy Kolb <[email protected]> (kjeremy) +- Laurent Carlier <[email protected]> (lordheavy) +- Luca Barbieri <[email protected]> (lb, lb1) +- Maarten Maathuis <[email protected]> (stillunknown) +- Marcin Kościelnicki <[email protected]> (mwk, koriakin) +- Mark Carey <[email protected]> (careym) +- Matthieu Castet <[email protected]> (mat-c) +- nvidiaman <[email protected]> (nvidiaman) +- Patrice Mandin <[email protected]> (pmandin, pmdata) +- Pekka Paalanen <[email protected]> (pq, ppaalanen) +- Peter Popov <[email protected]> (ironpeter) +- Richard Hughes <[email protected]> (hughsient) +- Rudi Cilibrasi <[email protected]> (cilibrar) +- Serge Martin +- Simon Raffeiner +- Stephane Loeuillet <[email protected]> (leroutier) +- Stephane Marchesin <[email protected]> (marcheu) +- sturmflut <[email protected]> (sturmflut) +- Sylvain Munaut <[email protected]> +- Victor Stinner <[email protected]> (haypo) +- Wladmir van der Laan <[email protected]> (miathan6) +- Younes Manton <[email protected]> (ymanton) + +Permission is hereby granted, free of charge, to any person obtaining +a copy of this software and associated documentation files (the +"Software"), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sublicense, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions: + +The above copyright notice and this permission notice (including the +next paragraph) shall be included in all copies or substantial +portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE +LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +*/ + + +#define NV50_3D_BLEND_FACTOR_ZERO 0x00004000 +#define NV50_3D_BLEND_FACTOR_ONE 0x00004001 +#define NV50_3D_BLEND_FACTOR_SRC_COLOR 0x00004300 +#define NV50_3D_BLEND_FACTOR_ONE_MINUS_SRC_COLOR 0x00004301 +#define NV50_3D_BLEND_FACTOR_SRC_ALPHA 0x00004302 +#define NV50_3D_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA 0x00004303 +#define NV50_3D_BLEND_FACTOR_DST_ALPHA 0x00004304 +#define NV50_3D_BLEND_FACTOR_ONE_MINUS_DST_ALPHA 0x00004305 +#define NV50_3D_BLEND_FACTOR_DST_COLOR 0x00004306 +#define NV50_3D_BLEND_FACTOR_ONE_MINUS_DST_COLOR 0x00004307 +#define NV50_3D_BLEND_FACTOR_SRC_ALPHA_SATURATE 0x00004308 +#define NV50_3D_BLEND_FACTOR_CONSTANT_COLOR 0x0000c001 +#define NV50_3D_BLEND_FACTOR_ONE_MINUS_CONSTANT_COLOR 0x0000c002 +#define NV50_3D_BLEND_FACTOR_CONSTANT_ALPHA 0x0000c003 +#define NV50_3D_BLEND_FACTOR_ONE_MINUS_CONSTANT_ALPHA 0x0000c004 +#define NV50_3D_BLEND_FACTOR_SRC1_COLOR 0x0000c900 +#define NV50_3D_BLEND_FACTOR_ONE_MINUS_SRC1_COLOR 0x0000c901 +#define NV50_3D_BLEND_FACTOR_SRC1_ALPHA 0x0000c902 +#define NV50_3D_BLEND_FACTOR_ONE_MINUS_SRC1_ALPHA 0x0000c903 + +#endif /* NV_3DDEFS_XML */ diff --git a/src/gallium/drivers/nvc0/nvc0_buffer.c b/src/gallium/drivers/nvc0/nvc0_buffer.c new file mode 100644 index 00000000000..ea3e642a448 --- /dev/null +++ b/src/gallium/drivers/nvc0/nvc0_buffer.c @@ -0,0 +1,489 @@ + +#include "util/u_inlines.h" +#include "util/u_memory.h" +#include "util/u_math.h" + +#define NOUVEAU_NVC0 +#include "nouveau/nouveau_screen.h" +#include "nouveau/nouveau_winsys.h" +#undef NOUVEAU_NVC0 + +#include "nvc0_context.h" +#include "nvc0_resource.h" + +struct nvc0_transfer { + struct pipe_transfer base; +}; + +static INLINE struct nvc0_transfer * +nvc0_transfer(struct pipe_transfer *transfer) +{ + return (struct nvc0_transfer *)transfer; +} + +static INLINE boolean +nvc0_buffer_allocate(struct nvc0_screen *screen, struct nvc0_resource *buf, + unsigned domain) +{ + if (domain == NOUVEAU_BO_VRAM) { + buf->mm = nvc0_mm_allocate(screen->mm_VRAM, buf->base.width0, &buf->bo, + &buf->offset); + if (!buf->bo) + return nvc0_buffer_allocate(screen, buf, NOUVEAU_BO_GART); + } else + if (domain == NOUVEAU_BO_GART) { + buf->mm = nvc0_mm_allocate(screen->mm_GART, buf->base.width0, &buf->bo, + &buf->offset); + if (!buf->bo) + return FALSE; + } + if (domain != NOUVEAU_BO_GART) { + if (!buf->data) { + buf->data = MALLOC(buf->base.width0); + if (!buf->data) + return FALSE; + } + } + buf->domain = domain; + return TRUE; +} + +static INLINE void +release_allocation(struct nvc0_mm_allocation **mm, struct nvc0_fence *fence) +{ + if (fence && fence->state != NVC0_FENCE_STATE_SIGNALLED) { + nvc0_fence_sched_release(fence, *mm); + } else { + nvc0_mm_free(*mm); + } + (*mm) = NULL; +} + +static INLINE boolean +nvc0_buffer_reallocate(struct nvc0_screen *screen, struct nvc0_resource *buf, + unsigned domain) +{ + nouveau_bo_ref(NULL, &buf->bo); + + if (buf->mm) + release_allocation(&buf->mm, buf->fence); + + return nvc0_buffer_allocate(screen, buf, domain); +} + +static void +nvc0_buffer_destroy(struct pipe_screen *pscreen, + struct pipe_resource *presource) +{ + struct nvc0_resource *res = nvc0_resource(presource); + + nouveau_bo_ref(NULL, &res->bo); + + if (res->mm) + release_allocation(&res->mm, res->fence); + + if (res->data && !(res->status & NVC0_BUFFER_STATUS_USER_MEMORY)) + FREE(res->data); + + FREE(res); +} + +/* Maybe just migrate to GART right away if we actually need to do this. */ +boolean +nvc0_buffer_download(struct nvc0_context *nvc0, struct nvc0_resource *buf, + unsigned start, unsigned size) +{ + struct nvc0_mm_allocation *mm; + struct nouveau_bo *bounce = NULL; + uint32_t offset; + + assert(buf->domain == NOUVEAU_BO_VRAM); + + mm = nvc0_mm_allocate(nvc0->screen->mm_GART, size, &bounce, &offset); + if (!bounce) + return FALSE; + + nvc0_m2mf_copy_linear(nvc0, bounce, offset, NOUVEAU_BO_GART, + buf->bo, buf->offset + start, NOUVEAU_BO_VRAM, + size); + + if (nouveau_bo_map_range(bounce, offset, size, NOUVEAU_BO_RD)) + return FALSE; + memcpy(buf->data + start, bounce->map, size); + nouveau_bo_unmap(bounce); + + buf->status &= ~NVC0_BUFFER_STATUS_DIRTY; + + nouveau_bo_ref(NULL, &bounce); + if (mm) + nvc0_mm_free(mm); + return TRUE; +} + +static boolean +nvc0_buffer_upload(struct nvc0_context *nvc0, struct nvc0_resource *buf, + unsigned start, unsigned size) +{ + struct nvc0_mm_allocation *mm; + struct nouveau_bo *bounce = NULL; + uint32_t offset; + + if (size <= 192) { + nvc0_m2mf_push_linear(nvc0, buf->bo, buf->domain, buf->offset + start, + size, buf->data + start); + return TRUE; + } + + mm = nvc0_mm_allocate(nvc0->screen->mm_GART, size, &bounce, &offset); + if (!bounce) + return FALSE; + + nouveau_bo_map_range(bounce, offset, size, + NOUVEAU_BO_WR | NOUVEAU_BO_NOSYNC); + memcpy(bounce->map, buf->data + start, size); + nouveau_bo_unmap(bounce); + + nvc0_m2mf_copy_linear(nvc0, buf->bo, buf->offset + start, NOUVEAU_BO_VRAM, + bounce, offset, NOUVEAU_BO_GART, size); + + nouveau_bo_ref(NULL, &bounce); + if (mm) + release_allocation(&mm, nvc0->screen->fence.current); + + if (start == 0 && size == buf->base.width0) + buf->status &= ~NVC0_BUFFER_STATUS_DIRTY; + return TRUE; +} + +static struct pipe_transfer * +nvc0_buffer_transfer_get(struct pipe_context *pipe, + struct pipe_resource *resource, + unsigned level, + unsigned usage, + const struct pipe_box *box) +{ + struct nvc0_resource *buf = nvc0_resource(resource); + struct nvc0_transfer *xfr = CALLOC_STRUCT(nvc0_transfer); + if (!xfr) + return NULL; + + xfr->base.resource = resource; + xfr->base.box.x = box->x; + xfr->base.box.width = box->width; + xfr->base.usage = usage; + + if (buf->domain == NOUVEAU_BO_VRAM) { + if (usage & PIPE_TRANSFER_READ) { + if (buf->status & NVC0_BUFFER_STATUS_DIRTY) + nvc0_buffer_download(nvc0_context(pipe), buf, 0, buf->base.width0); + } + } + + return &xfr->base; +} + +static void +nvc0_buffer_transfer_destroy(struct pipe_context *pipe, + struct pipe_transfer *transfer) +{ + struct nvc0_resource *buf = nvc0_resource(transfer->resource); + struct nvc0_transfer *xfr = nvc0_transfer(transfer); + + if (xfr->base.usage & PIPE_TRANSFER_WRITE) { + /* writing is worse */ + nvc0_buffer_adjust_score(nvc0_context(pipe), buf, -5000); + + if (buf->domain == NOUVEAU_BO_VRAM) { + nvc0_buffer_upload(nvc0_context(pipe), buf, + transfer->box.x, transfer->box.width); + } + + if (buf->domain != 0 && (buf->base.bind & (PIPE_BIND_VERTEX_BUFFER | + PIPE_BIND_INDEX_BUFFER))) + nvc0_context(pipe)->vbo_dirty = TRUE; + } + + FREE(xfr); +} + +static INLINE boolean +nvc0_buffer_sync(struct nvc0_resource *buf, unsigned rw) +{ + if (rw == PIPE_TRANSFER_READ) { + if (!buf->fence_wr) + return TRUE; + if (!nvc0_fence_wait(buf->fence_wr)) + return FALSE; + } else { + if (!buf->fence) + return TRUE; + if (!nvc0_fence_wait(buf->fence)) + return FALSE; + + nvc0_fence_reference(&buf->fence, NULL); + } + nvc0_fence_reference(&buf->fence_wr, NULL); + + return TRUE; +} + +static INLINE boolean +nvc0_buffer_busy(struct nvc0_resource *buf, unsigned rw) +{ + if (rw == PIPE_TRANSFER_READ) + return (buf->fence_wr && !nvc0_fence_signalled(buf->fence_wr)); + else + return (buf->fence && !nvc0_fence_signalled(buf->fence)); +} + +static void * +nvc0_buffer_transfer_map(struct pipe_context *pipe, + struct pipe_transfer *transfer) +{ + struct nvc0_transfer *xfr = nvc0_transfer(transfer); + struct nvc0_resource *buf = nvc0_resource(transfer->resource); + struct nouveau_bo *bo = buf->bo; + uint8_t *map; + int ret; + uint32_t offset = xfr->base.box.x; + uint32_t flags; + + nvc0_buffer_adjust_score(nvc0_context(pipe), buf, -250); + + if (buf->domain != NOUVEAU_BO_GART) + return buf->data + offset; + + if (buf->mm) + flags = NOUVEAU_BO_NOSYNC | NOUVEAU_BO_RDWR; + else + flags = nouveau_screen_transfer_flags(xfr->base.usage); + + offset += buf->offset; + + ret = nouveau_bo_map_range(buf->bo, offset, xfr->base.box.width, flags); + if (ret) + return NULL; + map = bo->map; + + /* Unmap right now. Since multiple buffers can share a single nouveau_bo, + * not doing so might make future maps fail or trigger "reloc while mapped" + * errors. For now, mappings to userspace are guaranteed to be persistent. + */ + nouveau_bo_unmap(bo); + + if (buf->mm) { + if (xfr->base.usage & PIPE_TRANSFER_DONTBLOCK) { + if (nvc0_buffer_busy(buf, xfr->base.usage & PIPE_TRANSFER_READ_WRITE)) + return NULL; + } else + if (!(xfr->base.usage & PIPE_TRANSFER_UNSYNCHRONIZED)) { + nvc0_buffer_sync(buf, xfr->base.usage & PIPE_TRANSFER_READ_WRITE); + } + } + return map; +} + + + +static void +nvc0_buffer_transfer_flush_region(struct pipe_context *pipe, + struct pipe_transfer *transfer, + const struct pipe_box *box) +{ + struct nvc0_resource *res = nvc0_resource(transfer->resource); + struct nouveau_bo *bo = res->bo; + unsigned offset = res->offset + transfer->box.x + box->x; + + /* not using non-snoop system memory yet, no need for cflush */ + if (1) + return; + + /* XXX: maybe need to upload for VRAM buffers here */ + + nouveau_screen_bo_map_flush_range(pipe->screen, bo, offset, box->width); +} + +static void +nvc0_buffer_transfer_unmap(struct pipe_context *pipe, + struct pipe_transfer *transfer) +{ + /* we've called nouveau_bo_unmap right after map */ +} + +const struct u_resource_vtbl nvc0_buffer_vtbl = +{ + u_default_resource_get_handle, /* get_handle */ + nvc0_buffer_destroy, /* resource_destroy */ + NULL, /* is_resource_referenced */ + nvc0_buffer_transfer_get, /* get_transfer */ + nvc0_buffer_transfer_destroy, /* transfer_destroy */ + nvc0_buffer_transfer_map, /* transfer_map */ + nvc0_buffer_transfer_flush_region, /* transfer_flush_region */ + nvc0_buffer_transfer_unmap, /* transfer_unmap */ + u_default_transfer_inline_write /* transfer_inline_write */ +}; + +struct pipe_resource * +nvc0_buffer_create(struct pipe_screen *pscreen, + const struct pipe_resource *templ) +{ + struct nvc0_screen *screen = nvc0_screen(pscreen); + struct nvc0_resource *buffer; + boolean ret; + + buffer = CALLOC_STRUCT(nvc0_resource); + if (!buffer) + return NULL; + + buffer->base = *templ; + buffer->vtbl = &nvc0_buffer_vtbl; + pipe_reference_init(&buffer->base.reference, 1); + buffer->base.screen = pscreen; + + if (buffer->base.bind & PIPE_BIND_CONSTANT_BUFFER) + ret = nvc0_buffer_allocate(screen, buffer, 0); + else + ret = nvc0_buffer_allocate(screen, buffer, NOUVEAU_BO_GART); + + if (ret == FALSE) + goto fail; + + return &buffer->base; + +fail: + FREE(buffer); + return NULL; +} + + +struct pipe_resource * +nvc0_user_buffer_create(struct pipe_screen *pscreen, + void *ptr, + unsigned bytes, + unsigned bind) +{ + struct nvc0_resource *buffer; + + buffer = CALLOC_STRUCT(nvc0_resource); + if (!buffer) + return NULL; + + pipe_reference_init(&buffer->base.reference, 1); + buffer->vtbl = &nvc0_buffer_vtbl; + buffer->base.screen = pscreen; + buffer->base.format = PIPE_FORMAT_R8_UNORM; + buffer->base.usage = PIPE_USAGE_IMMUTABLE; + buffer->base.bind = bind; + buffer->base.width0 = bytes; + buffer->base.height0 = 1; + buffer->base.depth0 = 1; + + buffer->data = ptr; + buffer->status = NVC0_BUFFER_STATUS_USER_MEMORY; + + return &buffer->base; +} + +/* Like download, but for GART buffers. Merge ? */ +static INLINE boolean +nvc0_buffer_data_fetch(struct nvc0_resource *buf, + struct nouveau_bo *bo, unsigned offset, unsigned size) +{ + if (!buf->data) { + buf->data = MALLOC(size); + if (!buf->data) + return FALSE; + } + if (nouveau_bo_map_range(bo, offset, size, NOUVEAU_BO_RD)) + return FALSE; + memcpy(buf->data, bo->map, size); + nouveau_bo_unmap(bo); + + return TRUE; +} + +/* Migrate a linear buffer (vertex, index, constants) USER -> GART -> VRAM. */ +boolean +nvc0_buffer_migrate(struct nvc0_context *nvc0, + struct nvc0_resource *buf, const unsigned new_domain) +{ + struct nvc0_screen *screen = nvc0_screen(buf->base.screen); + struct nouveau_bo *bo; + const unsigned old_domain = buf->domain; + unsigned size = buf->base.width0; + unsigned offset; + int ret; + + assert(new_domain != old_domain); + + if (new_domain == NOUVEAU_BO_GART && old_domain == 0) { + if (!nvc0_buffer_allocate(screen, buf, new_domain)) + return FALSE; + ret = nouveau_bo_map_range(buf->bo, buf->offset, size, NOUVEAU_BO_WR | + NOUVEAU_BO_NOSYNC); + if (ret) + return ret; + memcpy(buf->bo->map, buf->data, size); + nouveau_bo_unmap(buf->bo); + FREE(buf->data); + } else + if (old_domain != 0 && new_domain != 0) { + struct nvc0_mm_allocation *mm = buf->mm; + + if (new_domain == NOUVEAU_BO_VRAM) { + /* keep a system memory copy of our data in case we hit a fallback */ + if (!nvc0_buffer_data_fetch(buf, buf->bo, buf->offset, size)) + return FALSE; + debug_printf("migrating %u KiB to VRAM\n", size / 1024); + } + + offset = buf->offset; + bo = buf->bo; + buf->bo = NULL; + buf->mm = NULL; + nvc0_buffer_allocate(screen, buf, new_domain); + + nvc0_m2mf_copy_linear(nvc0, buf->bo, buf->offset, new_domain, + bo, offset, old_domain, buf->base.width0); + + nouveau_bo_ref(NULL, &bo); + if (mm) + release_allocation(&mm, screen->fence.current); + } else + if (new_domain == NOUVEAU_BO_VRAM && old_domain == 0) { + if (!nvc0_buffer_allocate(screen, buf, NOUVEAU_BO_VRAM)) + return FALSE; + if (!nvc0_buffer_upload(nvc0, buf, 0, buf->base.width0)) + return FALSE; + } else + return FALSE; + + assert(buf->domain == new_domain); + return TRUE; +} + +/* Migrate data from glVertexAttribPointer(non-VBO) user buffers to GART. + * We'd like to only allocate @size bytes here, but then we'd have to rebase + * the vertex indices ... + */ +boolean +nvc0_user_buffer_upload(struct nvc0_resource *buf, unsigned base, unsigned size) +{ + struct nvc0_screen *screen = nvc0_screen(buf->base.screen); + int ret; + + assert(buf->status & NVC0_BUFFER_STATUS_USER_MEMORY); + + buf->base.width0 = base + size; + if (!nvc0_buffer_reallocate(screen, buf, NOUVEAU_BO_GART)) + return FALSE; + + ret = nouveau_bo_map_range(buf->bo, buf->offset + base, size, + NOUVEAU_BO_WR | NOUVEAU_BO_NOSYNC); + if (ret) + return FALSE; + memcpy(buf->bo->map, buf->data + base, size); + nouveau_bo_unmap(buf->bo); + + return TRUE; +} diff --git a/src/gallium/drivers/nvc0/nvc0_context.c b/src/gallium/drivers/nvc0/nvc0_context.c new file mode 100644 index 00000000000..2118abb5d5d --- /dev/null +++ b/src/gallium/drivers/nvc0/nvc0_context.c @@ -0,0 +1,164 @@ +/* + * Copyright 2010 Christoph Bumiller + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF + * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "draw/draw_context.h" +#include "pipe/p_defines.h" + +#include "nvc0_context.h" +#include "nvc0_screen.h" +#include "nvc0_resource.h" + +#include "nouveau/nouveau_reloc.h" + +static void +nvc0_flush(struct pipe_context *pipe, unsigned flags, + struct pipe_fence_handle **fence) +{ + struct nvc0_context *nvc0 = nvc0_context(pipe); + struct nouveau_channel *chan = nvc0->screen->base.channel; + + if (flags & PIPE_FLUSH_TEXTURE_CACHE) { + BEGIN_RING(chan, RING_3D(SERIALIZE), 1); + OUT_RING (chan, 0); + BEGIN_RING(chan, RING_3D(TEX_CACHE_CTL), 1); + OUT_RING (chan, 0x00); + } + + if (fence) { + nvc0_screen_fence_new(nvc0->screen, (struct nvc0_fence **)fence, TRUE); + } + + if (flags & (PIPE_FLUSH_SWAPBUFFERS | PIPE_FLUSH_FRAME)) { + FIRE_RING(chan); + + nvc0_screen_fence_next(nvc0->screen); + } +} + +static void +nvc0_destroy(struct pipe_context *pipe) +{ + struct nvc0_context *nvc0 = nvc0_context(pipe); + + draw_destroy(nvc0->draw); + + if (nvc0->screen->cur_ctx == nvc0) + nvc0->screen->cur_ctx = NULL; + + FREE(nvc0); +} + +struct pipe_context * +nvc0_create(struct pipe_screen *pscreen, void *priv) +{ + struct pipe_winsys *pipe_winsys = pscreen->winsys; + struct nvc0_screen *screen = nvc0_screen(pscreen); + struct nvc0_context *nvc0; + + nvc0 = CALLOC_STRUCT(nvc0_context); + if (!nvc0) + return NULL; + nvc0->screen = screen; + + nvc0->pipe.winsys = pipe_winsys; + nvc0->pipe.screen = pscreen; + nvc0->pipe.priv = priv; + + nvc0->pipe.destroy = nvc0_destroy; + + nvc0->pipe.draw_vbo = nvc0_draw_vbo; + nvc0->pipe.clear = nvc0_clear; + + nvc0->pipe.flush = nvc0_flush; + + screen->base.channel->user_private = nvc0; + + nvc0_init_query_functions(nvc0); + nvc0_init_surface_functions(nvc0); + nvc0_init_state_functions(nvc0); + nvc0_init_resource_functions(&nvc0->pipe); + + nvc0->draw = draw_create(&nvc0->pipe); + assert(nvc0->draw); + draw_set_rasterize_stage(nvc0->draw, nvc0_draw_render_stage(nvc0)); + + return &nvc0->pipe; +} + +struct resident { + struct nvc0_resource *res; + uint32_t flags; +}; + +void +nvc0_bufctx_add_resident(struct nvc0_context *nvc0, int ctx, + struct nvc0_resource *resource, uint32_t flags) +{ + struct resident rsd = { resource, flags }; + + if (!resource->bo) + return; + + /* We don't need to reference the resource here, it will be referenced + * in the context/state, and bufctx will be reset when state changes. + */ + util_dynarray_append(&nvc0->residents[ctx], struct resident, rsd); +} + +void +nvc0_bufctx_del_resident(struct nvc0_context *nvc0, int ctx, + struct nvc0_resource *resource) +{ + struct resident *rsd, *top; + unsigned i; + + for (i = 0; i < nvc0->residents[ctx].size / sizeof(struct resident); ++i) { + rsd = util_dynarray_element(&nvc0->residents[ctx], struct resident, i); + + if (rsd->res == resource) { + top = util_dynarray_pop_ptr(&nvc0->residents[ctx], struct resident); + if (rsd != top) + *rsd = *top; + break; + } + } +} + +void +nvc0_bufctx_emit_relocs(struct nvc0_context *nvc0) +{ + struct resident *rsd; + struct util_dynarray *array; + unsigned ctx, i; + + for (ctx = 0; ctx < NVC0_BUFCTX_COUNT; ++ctx) { + array = &nvc0->residents[ctx]; + + for (i = 0; i < array->size / sizeof(struct resident); ++i) { + rsd = util_dynarray_element(array, struct resident, i); + + nvc0_resource_validate(rsd->res, rsd->flags); + } + } + + nvc0_screen_make_buffers_resident(nvc0->screen); +} diff --git a/src/gallium/drivers/nvc0/nvc0_context.h b/src/gallium/drivers/nvc0/nvc0_context.h new file mode 100644 index 00000000000..eeb5beff7a7 --- /dev/null +++ b/src/gallium/drivers/nvc0/nvc0_context.h @@ -0,0 +1,227 @@ +#ifndef __NVC0_CONTEXT_H__ +#define __NVC0_CONTEXT_H__ + +#include <stdio.h> +#include "pipe/p_context.h" +#include "pipe/p_defines.h" +#include "pipe/p_state.h" + +#include "util/u_memory.h" +#include "util/u_math.h" +#include "util/u_inlines.h" +#include "util/u_dynarray.h" + +#include "draw/draw_vertex.h" + +#include "nvc0_winsys.h" +#include "nvc0_stateobj.h" +#include "nvc0_screen.h" +#include "nvc0_program.h" +#include "nvc0_resource.h" + +#include "nvc0_3ddefs.xml.h" +#include "nvc0_3d.xml.h" +#include "nvc0_2d.xml.h" +#include "nvc0_m2mf.xml.h" + +#define NOUVEAU_ERR(fmt, args...) \ + fprintf(stderr, "%s:%d - "fmt, __FUNCTION__, __LINE__, ##args); + +#ifdef NOUVEAU_DEBUG +# define NOUVEAU_DBG(args...) printf(args); +#else +# define NOUVEAU_DBG(args...) +#endif + +#define NVC0_NEW_BLEND (1 << 0) +#define NVC0_NEW_RASTERIZER (1 << 1) +#define NVC0_NEW_ZSA (1 << 2) +#define NVC0_NEW_VERTPROG (1 << 3) +#define NVC0_NEW_TCTLPROG (1 << 4) +#define NVC0_NEW_TEVLPROG (1 << 5) +#define NVC0_NEW_GMTYPROG (1 << 6) +#define NVC0_NEW_FRAGPROG (1 << 7) +#define NVC0_NEW_BLEND_COLOUR (1 << 8) +#define NVC0_NEW_STENCIL_REF (1 << 9) +#define NVC0_NEW_CLIP (1 << 10) +#define NVC0_NEW_SAMPLE_MASK (1 << 11) +#define NVC0_NEW_FRAMEBUFFER (1 << 12) +#define NVC0_NEW_STIPPLE (1 << 13) +#define NVC0_NEW_SCISSOR (1 << 14) +#define NVC0_NEW_VIEWPORT (1 << 15) +#define NVC0_NEW_ARRAYS (1 << 16) +#define NVC0_NEW_VERTEX (1 << 17) +#define NVC0_NEW_CONSTBUF (1 << 18) +#define NVC0_NEW_TEXTURES (1 << 19) +#define NVC0_NEW_SAMPLERS (1 << 20) + +#define NVC0_BUFCTX_CONSTANT 0 +#define NVC0_BUFCTX_FRAME 1 +#define NVC0_BUFCTX_VERTEX 2 +#define NVC0_BUFCTX_TEXTURES 3 +#define NVC0_BUFCTX_COUNT 4 + +struct nvc0_context { + struct pipe_context pipe; + + struct nvc0_screen *screen; + + struct util_dynarray residents[NVC0_BUFCTX_COUNT]; + + uint32_t dirty; + + struct { + uint32_t instance_bits; + uint32_t instance_base; + int32_t index_bias; + boolean prim_restart; + uint8_t num_vtxbufs; + uint8_t num_vtxelts; + uint8_t num_textures[5]; + uint8_t num_samplers[5]; + uint16_t scissor; + uint32_t uniform_buffer_bound[5]; + } state; + + struct nvc0_blend_stateobj *blend; + struct nvc0_rasterizer_stateobj *rast; + struct nvc0_zsa_stateobj *zsa; + struct nvc0_vertex_stateobj *vertex; + + struct nvc0_program *vertprog; + struct nvc0_program *tctlprog; + struct nvc0_program *tevlprog; + struct nvc0_program *gmtyprog; + struct nvc0_program *fragprog; + + struct pipe_resource *constbuf[5][16]; + uint16_t constbuf_dirty[5]; + + struct pipe_vertex_buffer vtxbuf[PIPE_MAX_ATTRIBS]; + unsigned num_vtxbufs; + struct pipe_index_buffer idxbuf; + uint32_t vbo_fifo; /* bitmask of vertex elements to be pushed to FIFO */ + uint32_t vbo_user; /* bitmask of vertex buffers pointing to user memory */ + unsigned vbo_min_index; /* from pipe_draw_info, for vertex upload */ + unsigned vbo_max_index; + + struct pipe_sampler_view *textures[5][PIPE_MAX_SAMPLERS]; + unsigned num_textures[5]; + struct nvc0_tsc_entry *samplers[5][PIPE_MAX_SAMPLERS]; + unsigned num_samplers[5]; + + struct pipe_framebuffer_state framebuffer; + struct pipe_blend_color blend_colour; + struct pipe_stencil_ref stencil_ref; + struct pipe_poly_stipple stipple; + struct pipe_scissor_state scissor; + struct pipe_viewport_state viewport; + struct pipe_clip_state clip; + + unsigned sample_mask; + + boolean vbo_dirty; + boolean vbo_push_hint; + + struct draw_context *draw; +}; + +static INLINE struct nvc0_context * +nvc0_context(struct pipe_context *pipe) +{ + return (struct nvc0_context *)pipe; +} + +struct nvc0_surface { + struct pipe_surface base; + uint32_t offset; + uint32_t width; + uint16_t height; + uint16_t depth; +}; + +static INLINE struct nvc0_surface * +nvc0_surface(struct pipe_surface *ps) +{ + return (struct nvc0_surface *)ps; +} + +/* nvc0_context.c */ +struct pipe_context *nvc0_create(struct pipe_screen *, void *); + +void nvc0_bufctx_emit_relocs(struct nvc0_context *); +void nvc0_bufctx_add_resident(struct nvc0_context *, int ctx, + struct nvc0_resource *, uint32_t flags); +void nvc0_bufctx_del_resident(struct nvc0_context *, int ctx, + struct nvc0_resource *); +static INLINE void +nvc0_bufctx_reset(struct nvc0_context *nvc0, int ctx) +{ + util_dynarray_resize(&nvc0->residents[ctx], 0); +} + +/* nvc0_draw.c */ +extern struct draw_stage *nvc0_draw_render_stage(struct nvc0_context *); + +/* nvc0_program.c */ +boolean nvc0_program_translate(struct nvc0_program *); +void nvc0_program_destroy(struct nvc0_context *, struct nvc0_program *); + +/* nvc0_query.c */ +void nvc0_init_query_functions(struct nvc0_context *); + +/* nvc0_shader_state.c */ +void nvc0_vertprog_validate(struct nvc0_context *); +void nvc0_tctlprog_validate(struct nvc0_context *); +void nvc0_tevlprog_validate(struct nvc0_context *); +void nvc0_gmtyprog_validate(struct nvc0_context *); +void nvc0_fragprog_validate(struct nvc0_context *); + +/* nvc0_state.c */ +extern void nvc0_init_state_functions(struct nvc0_context *); + +/* nvc0_state_validate.c */ +extern boolean nvc0_state_validate(struct nvc0_context *); + +/* nvc0_surface.c */ +extern void nvc0_clear(struct pipe_context *, unsigned buffers, + const float *rgba, double depth, unsigned stencil); +extern void nvc0_init_surface_functions(struct nvc0_context *); + +/* nvc0_tex.c */ +void nvc0_validate_textures(struct nvc0_context *); +void nvc0_validate_samplers(struct nvc0_context *); + +struct pipe_sampler_view * +nvc0_create_sampler_view(struct pipe_context *, + struct pipe_resource *, + const struct pipe_sampler_view *); + +/* nvc0_transfer.c */ +void +nvc0_m2mf_push_linear(struct nvc0_context *nvc0, + struct nouveau_bo *dst, unsigned domain, int offset, + unsigned size, void *data); +void +nvc0_m2mf_copy_linear(struct nvc0_context *nvc0, + struct nouveau_bo *dst, unsigned dstoff, unsigned dstdom, + struct nouveau_bo *src, unsigned srcoff, unsigned srcdom, + unsigned size); + +/* nvc0_vbo.c */ +void nvc0_draw_vbo(struct pipe_context *, const struct pipe_draw_info *); + +void * +nvc0_vertex_state_create(struct pipe_context *pipe, + unsigned num_elements, + const struct pipe_vertex_element *elements); +void +nvc0_vertex_state_delete(struct pipe_context *pipe, void *hwcso); + +void nvc0_vertex_arrays_validate(struct nvc0_context *nvc0); + +/* nvc0_push.c */ +void nvc0_push_vbo(struct nvc0_context *, const struct pipe_draw_info *); +void nvc0_push_vbo2(struct nvc0_context *, const struct pipe_draw_info *); + +#endif diff --git a/src/gallium/drivers/nvc0/nvc0_draw.c b/src/gallium/drivers/nvc0/nvc0_draw.c new file mode 100644 index 00000000000..ac7e9f66a11 --- /dev/null +++ b/src/gallium/drivers/nvc0/nvc0_draw.c @@ -0,0 +1,88 @@ +/* + * Copyright 2008 Ben Skeggs + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF + * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "draw/draw_pipe.h" + +#include "nvc0_context.h" + +struct nvc0_render_stage { + struct draw_stage stage; + struct nvc0_context *nvc0; +}; + +static INLINE struct nvc0_render_stage * +nvc0_render_stage(struct draw_stage *stage) +{ + return (struct nvc0_render_stage *)stage; +} + +static void +nvc0_render_point(struct draw_stage *stage, struct prim_header *prim) +{ + NOUVEAU_ERR("\n"); +} + +static void +nvc0_render_line(struct draw_stage *stage, struct prim_header *prim) +{ + NOUVEAU_ERR("\n"); +} + +static void +nvc0_render_tri(struct draw_stage *stage, struct prim_header *prim) +{ + NOUVEAU_ERR("\n"); +} + +static void +nvc0_render_flush(struct draw_stage *stage, unsigned flags) +{ +} + +static void +nvc0_render_reset_stipple_counter(struct draw_stage *stage) +{ + NOUVEAU_ERR("\n"); +} + +static void +nvc0_render_destroy(struct draw_stage *stage) +{ + FREE(stage); +} + +struct draw_stage * +nvc0_draw_render_stage(struct nvc0_context *nvc0) +{ + struct nvc0_render_stage *rs = CALLOC_STRUCT(nvc0_render_stage); + + rs->nvc0 = nvc0; + rs->stage.draw = nvc0->draw; + rs->stage.destroy = nvc0_render_destroy; + rs->stage.point = nvc0_render_point; + rs->stage.line = nvc0_render_line; + rs->stage.tri = nvc0_render_tri; + rs->stage.flush = nvc0_render_flush; + rs->stage.reset_stipple_counter = nvc0_render_reset_stipple_counter; + + return &rs->stage; +} diff --git a/src/gallium/drivers/nvc0/nvc0_fence.c b/src/gallium/drivers/nvc0/nvc0_fence.c new file mode 100644 index 00000000000..9d2c48cf14d --- /dev/null +++ b/src/gallium/drivers/nvc0/nvc0_fence.c @@ -0,0 +1,203 @@ +/* + * Copyright 2010 Christoph Bumiller + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF + * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "nvc0_fence.h" +#include "nvc0_context.h" +#include "nvc0_screen.h" + +#ifdef PIPE_OS_UNIX +#include <sched.h> +#endif + +boolean +nvc0_screen_fence_new(struct nvc0_screen *screen, struct nvc0_fence **fence, + boolean emit) +{ + *fence = CALLOC_STRUCT(nvc0_fence); + if (!*fence) + return FALSE; + + (*fence)->screen = screen; + (*fence)->ref = 1; + + if (emit) + nvc0_fence_emit(*fence); + + return TRUE; +} + +void +nvc0_fence_emit(struct nvc0_fence *fence) +{ + struct nvc0_screen *screen = fence->screen; + struct nouveau_channel *chan = screen->base.channel; + + fence->sequence = ++screen->fence.sequence; + + assert(fence->state == NVC0_FENCE_STATE_AVAILABLE); + + BEGIN_RING(chan, RING_3D(QUERY_ADDRESS_HIGH), 4); + OUT_RELOCh(chan, screen->fence.bo, 0, NOUVEAU_BO_WR); + OUT_RELOCl(chan, screen->fence.bo, 0, NOUVEAU_BO_WR); + OUT_RING (chan, fence->sequence); + OUT_RING (chan, NVC0_3D_QUERY_GET_FENCE | NVC0_3D_QUERY_GET_SHORT | + (0xf << NVC0_3D_QUERY_GET_UNIT__SHIFT)); + + ++fence->ref; + + if (screen->fence.tail) + screen->fence.tail->next = fence; + else + screen->fence.head = fence; + + screen->fence.tail = fence; + + fence->state = NVC0_FENCE_STATE_EMITTED; +} + +static void +nvc0_fence_trigger_release_buffers(struct nvc0_fence *fence); + +void +nvc0_fence_del(struct nvc0_fence *fence) +{ + struct nvc0_fence *it; + struct nvc0_screen *screen = fence->screen; + + if (fence->state == NVC0_FENCE_STATE_EMITTED) { + if (fence == screen->fence.head) { + screen->fence.head = fence->next; + if (!screen->fence.head) + screen->fence.tail = NULL; + } else { + for (it = screen->fence.head; it && it->next != fence; it = it->next); + it->next = fence->next; + if (screen->fence.tail == fence) + screen->fence.tail = it; + } + } + + if (fence->buffers) { + debug_printf("WARNING: deleting fence with buffers " + "still hooked to it !\n"); + nvc0_fence_trigger_release_buffers(fence); + } + + FREE(fence); +} + +static void +nvc0_fence_trigger_release_buffers(struct nvc0_fence *fence) +{ + struct nvc0_mm_allocation *alloc = fence->buffers; + + while (alloc) { + struct nvc0_mm_allocation *next = alloc->next; + nvc0_mm_free(alloc); + alloc = next; + }; + fence->buffers = NULL; +} + +static void +nvc0_screen_fence_update(struct nvc0_screen *screen) +{ + struct nvc0_fence *fence; + struct nvc0_fence *next = NULL; + uint32_t sequence = screen->fence.map[0]; + + if (screen->fence.sequence_ack == sequence) + return; + screen->fence.sequence_ack = sequence; + + for (fence = screen->fence.head; fence; fence = next) { + next = fence->next; + sequence = fence->sequence; + + fence->state = NVC0_FENCE_STATE_SIGNALLED; + + if (fence->buffers) + nvc0_fence_trigger_release_buffers(fence); + + nvc0_fence_reference(&fence, NULL); + + if (sequence == screen->fence.sequence_ack) + break; + } + screen->fence.head = next; + if (!next) + screen->fence.tail = NULL; +} + +#define NVC0_FENCE_MAX_SPINS (1 << 17) + +boolean +nvc0_fence_signalled(struct nvc0_fence *fence) +{ + struct nvc0_screen *screen = fence->screen; + + if (fence->state == NVC0_FENCE_STATE_EMITTED) + nvc0_screen_fence_update(screen); + + return fence->state == NVC0_FENCE_STATE_SIGNALLED; +} + +boolean +nvc0_fence_wait(struct nvc0_fence *fence) +{ + struct nvc0_screen *screen = fence->screen; + int spins = 0; + + if (fence->state == NVC0_FENCE_STATE_AVAILABLE) { + nvc0_fence_emit(fence); + + FIRE_RING(screen->base.channel); + + if (fence == screen->fence.current) + nvc0_screen_fence_new(screen, &screen->fence.current, FALSE); + } + + do { + nvc0_screen_fence_update(screen); + + if (fence->state == NVC0_FENCE_STATE_SIGNALLED) + return TRUE; + spins++; +#ifdef PIPE_OS_UNIX + if (!(spins % 8)) /* donate a few cycles */ + sched_yield(); +#endif + } while (spins < NVC0_FENCE_MAX_SPINS); + + if (spins > 9000) + NOUVEAU_ERR("fence %x: been spinning too long\n", fence->sequence); + + return FALSE; +} + +void +nvc0_screen_fence_next(struct nvc0_screen *screen) +{ + nvc0_fence_emit(screen->fence.current); + nvc0_screen_fence_new(screen, &screen->fence.current, FALSE); + nvc0_screen_fence_update(screen); +} diff --git a/src/gallium/drivers/nvc0/nvc0_fence.h b/src/gallium/drivers/nvc0/nvc0_fence.h new file mode 100644 index 00000000000..e63c164bda4 --- /dev/null +++ b/src/gallium/drivers/nvc0/nvc0_fence.h @@ -0,0 +1,48 @@ + +#ifndef __NVC0_FENCE_H__ +#define __NVC0_FENCE_H__ + +#include "util/u_inlines.h" +#include "util/u_double_list.h" + +#define NVC0_FENCE_STATE_AVAILABLE 0 +#define NVC0_FENCE_STATE_EMITTED 1 +#define NVC0_FENCE_STATE_SIGNALLED 2 + +struct nvc0_mm_allocation; + +struct nvc0_fence { + struct nvc0_fence *next; + struct nvc0_screen *screen; + int state; + int ref; + uint32_t sequence; + struct nvc0_mm_allocation *buffers; +}; + +void nvc0_fence_emit(struct nvc0_fence *); +void nvc0_fence_del(struct nvc0_fence *); + +boolean nvc0_fence_wait(struct nvc0_fence *); +boolean nvc0_fence_signalled(struct nvc0_fence *); + +static INLINE void +nvc0_fence_reference(struct nvc0_fence **ref, struct nvc0_fence *fence) +{ + if (*ref) { + if (--(*ref)->ref == 0) + nvc0_fence_del(*ref); + } + if (fence) + ++fence->ref; + + *ref = fence; +} + +static INLINE struct nvc0_fence * +nvc0_fence(struct pipe_fence_handle *fence) +{ + return (struct nvc0_fence *)fence; +} + +#endif // __NVC0_FENCE_H__ diff --git a/src/gallium/drivers/nvc0/nvc0_formats.c b/src/gallium/drivers/nvc0/nvc0_formats.c new file mode 100644 index 00000000000..5d023573818 --- /dev/null +++ b/src/gallium/drivers/nvc0/nvc0_formats.c @@ -0,0 +1,462 @@ +/* + * Copyright 2010 Christoph Bumiller + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF + * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "nvc0_screen.h" +#include "nv50_texture.xml.h" +#include "nvc0_3d.xml.h" +#include "nv50_defs.xml.h" +#include "nv50_texture.xml.h" +#include "pipe/p_defines.h" + +#define A_(cr, cg, cb, ca, t0, t1, t2, t3, sz, r) \ + (NV50_TIC_MAP_##cr << NV50_TIC_0_MAPR__SHIFT) | \ + (NV50_TIC_TYPE_##t0 << NV50_TIC_0_TYPE0__SHIFT) | \ + (NV50_TIC_MAP_##cg << NV50_TIC_0_MAPG__SHIFT) | \ + (NV50_TIC_TYPE_##t1 << NV50_TIC_0_TYPE1__SHIFT) | \ + (NV50_TIC_MAP_##cb << NV50_TIC_0_MAPB__SHIFT) | \ + (NV50_TIC_TYPE_##t2 << NV50_TIC_0_TYPE2__SHIFT) | \ + (NV50_TIC_MAP_##ca << NV50_TIC_0_MAPA__SHIFT) | \ + (NV50_TIC_TYPE_##t3 << NV50_TIC_0_TYPE3__SHIFT) | \ + NV50_TIC_0_FMT_##sz, \ + NVC0_3D_VERTEX_ATTRIB_FORMAT_SIZE_##sz | \ + NVC0_3D_VERTEX_ATTRIB_FORMAT_TYPE_##t0 | \ + (r << 31) + +#define B_(cr, cg, cb, ca, t0, t1, t2, t3, sz, r) \ + (NV50_TIC_MAP_##cr << NV50_TIC_0_MAPR__SHIFT) | \ + (NV50_TIC_TYPE_##t0 << NV50_TIC_0_TYPE0__SHIFT) | \ + (NV50_TIC_MAP_##cg << NV50_TIC_0_MAPG__SHIFT) | \ + (NV50_TIC_TYPE_##t1 << NV50_TIC_0_TYPE1__SHIFT) | \ + (NV50_TIC_MAP_##cb << NV50_TIC_0_MAPB__SHIFT) | \ + (NV50_TIC_TYPE_##t2 << NV50_TIC_0_TYPE2__SHIFT) | \ + (NV50_TIC_MAP_##ca << NV50_TIC_0_MAPA__SHIFT) | \ + (NV50_TIC_TYPE_##t3 << NV50_TIC_0_TYPE3__SHIFT) | \ + NV50_TIC_0_FMT_##sz, 0 + +#define VERTEX_BUFFER PIPE_BIND_VERTEX_BUFFER +#define SAMPLER_VIEW PIPE_BIND_SAMPLER_VIEW +#define RENDER_TARGET PIPE_BIND_RENDER_TARGET +#define DEPTH_STENCIL PIPE_BIND_DEPTH_STENCIL +#define SCANOUT PIPE_BIND_SCANOUT + +/* for vertex buffers: */ +#define NV50_TIC_0_FMT_8_8_8 NV50_TIC_0_FMT_8_8_8_8 +#define NV50_TIC_0_FMT_16_16_16 NV50_TIC_0_FMT_16_16_16_16 +#define NV50_TIC_0_FMT_32_32_32 NV50_TIC_0_FMT_32_32_32_32 + +const struct nvc0_format nvc0_format_table[PIPE_FORMAT_COUNT] = +{ + /* COMMON FORMATS */ + + [PIPE_FORMAT_B8G8R8A8_UNORM] = { NV50_SURFACE_FORMAT_A8R8G8B8_UNORM, + A_(C2, C1, C0, C3, UNORM, UNORM, UNORM, UNORM, 8_8_8_8, 1), + VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET | SCANOUT }, + + [PIPE_FORMAT_B8G8R8X8_UNORM] = { NV50_SURFACE_FORMAT_X8R8G8B8_UNORM, + A_(C2, C1, C0, ONE, UNORM, UNORM, UNORM, UNORM, 8_8_8_8, 1), + VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET | SCANOUT }, + + [PIPE_FORMAT_B8G8R8A8_SRGB] = { NV50_SURFACE_FORMAT_A8R8G8B8_SRGB, + A_(C2, C1, C0, C3, UNORM, UNORM, UNORM, UNORM, 8_8_8_8, 1), + VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET }, + + [PIPE_FORMAT_B8G8R8X8_SRGB] = { NV50_SURFACE_FORMAT_X8R8G8B8_SRGB, + A_(C2, C1, C0, ONE, UNORM, UNORM, UNORM, UNORM, 8_8_8_8, 1), + VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET }, + + [PIPE_FORMAT_B5G6R5_UNORM] = { NV50_SURFACE_FORMAT_R5G6B5_UNORM, + B_(C2, C1, C0, ONE, UNORM, UNORM, UNORM, UNORM, 5_6_5, 1), + SAMPLER_VIEW | RENDER_TARGET | SCANOUT }, + + [PIPE_FORMAT_B5G5R5A1_UNORM] = { NV50_SURFACE_FORMAT_A1R5G5B5_UNORM, + B_(C2, C1, C0, C3, UNORM, UNORM, UNORM, UNORM, 1_5_5_5, 1), + SAMPLER_VIEW | RENDER_TARGET | SCANOUT }, + + [PIPE_FORMAT_B4G4R4A4_UNORM] = { NV50_SURFACE_FORMAT_R16_UNORM, + B_(C2, C1, C0, C3, UNORM, UNORM, UNORM, UNORM, 4_4_4_4, 1), + SAMPLER_VIEW }, + + [PIPE_FORMAT_R10G10B10A2_UNORM] = { NV50_SURFACE_FORMAT_A2B10G10R10_UNORM, + A_(C0, C1, C2, C3, UNORM, UNORM, UNORM, UNORM, 2_10_10_10, 0), + SAMPLER_VIEW | RENDER_TARGET | VERTEX_BUFFER | SCANOUT }, + + [PIPE_FORMAT_B10G10R10A2_UNORM] = { NV50_SURFACE_FORMAT_A2R10G10B10_UNORM, + A_(C2, C1, C0, C3, UNORM, UNORM, UNORM, UNORM, 2_10_10_10, 1), + SAMPLER_VIEW | RENDER_TARGET | VERTEX_BUFFER }, + + /* DEPTH/STENCIL FORMATS */ + + [PIPE_FORMAT_Z16_UNORM] = { NV50_ZETA_FORMAT_Z16_UNORM, + B_(C0, C0, C0, ONE, UNORM, UINT, UINT, UINT, 16_ZETA, 0), + SAMPLER_VIEW | DEPTH_STENCIL }, + + [PIPE_FORMAT_Z24_UNORM_S8_USCALED] = { NV50_ZETA_FORMAT_S8Z24_UNORM, + B_(C0, C0, C0, ONE, UNORM, UINT, UINT, UINT, 8_24, 0), + SAMPLER_VIEW | DEPTH_STENCIL }, + + [PIPE_FORMAT_Z24X8_UNORM] = { NV50_ZETA_FORMAT_X8Z24_UNORM, + B_(C0, C0, C0, ONE, UNORM, UINT, UINT, UINT, 8_24, 0), + SAMPLER_VIEW | DEPTH_STENCIL }, + + [PIPE_FORMAT_S8_USCALED_Z24_UNORM] = { NV50_ZETA_FORMAT_S8Z24_UNORM, + B_(C1, C1, C1, ONE, UINT, UNORM, UINT, UINT, 24_8, 0), + SAMPLER_VIEW | DEPTH_STENCIL }, + + [PIPE_FORMAT_Z32_FLOAT] = { NV50_ZETA_FORMAT_Z32_FLOAT, + B_(C0, C0, C0, ONE, FLOAT, UINT, UINT, UINT, 32_ZETA, 0), + SAMPLER_VIEW | DEPTH_STENCIL }, + + [PIPE_FORMAT_Z32_FLOAT_S8X24_USCALED] = { + NV50_ZETA_FORMAT_Z32_FLOAT_X24S8_UNORM, + B_(C0, C0, C0, ONE, FLOAT, UINT, UINT, UINT, 32_8, 0), + SAMPLER_VIEW | DEPTH_STENCIL }, + + /* LUMINANCE, ALPHA, INTENSITY */ + + [PIPE_FORMAT_L8_UNORM] = { NV50_SURFACE_FORMAT_R8_UNORM, + A_(C0, C0, C0, ONE, UNORM, UNORM, UNORM, UNORM, 8, 0), + SAMPLER_VIEW }, + + [PIPE_FORMAT_L8_SRGB] = { NV50_SURFACE_FORMAT_R8_UNORM, + A_(C0, C0, C0, ONE, UNORM, UNORM, UNORM, UNORM, 8, 0), + SAMPLER_VIEW }, + + [PIPE_FORMAT_I8_UNORM] = { NV50_SURFACE_FORMAT_R8_UNORM, + A_(C0, C0, C0, C0, UNORM, UNORM, UNORM, UNORM, 8, 0), + SAMPLER_VIEW }, + + [PIPE_FORMAT_A8_UNORM] = { NV50_SURFACE_FORMAT_A8_UNORM, + A_(ZERO, ZERO, ZERO, C0, UNORM, UNORM, UNORM, UNORM, 8, 0), + SAMPLER_VIEW | RENDER_TARGET }, + + [PIPE_FORMAT_L8A8_UNORM] = { NV50_SURFACE_FORMAT_R16_UNORM, + A_(C0, C0, C0, C1, UNORM, UNORM, UNORM, UNORM, 8_8, 0), + SAMPLER_VIEW }, + + [PIPE_FORMAT_L8A8_SRGB] = { 0, + A_(C0, C0, C0, C1, UNORM, UNORM, UNORM, UNORM, 8_8, 0), + SAMPLER_VIEW }, + + /* DXT, RGTC */ + + [PIPE_FORMAT_DXT1_RGB] = { 0, + B_(C0, C1, C2, ONE, UNORM, UNORM, UNORM, UNORM, DXT1, 0), + SAMPLER_VIEW }, + + [PIPE_FORMAT_DXT1_RGBA] = { 0, + B_(C0, C1, C2, C3, UNORM, UNORM, UNORM, UNORM, DXT1, 0), + SAMPLER_VIEW }, + + [PIPE_FORMAT_DXT3_RGBA] = { 0, + B_(C0, C1, C2, C3, UNORM, UNORM, UNORM, UNORM, DXT3, 0), + SAMPLER_VIEW }, + + [PIPE_FORMAT_DXT5_RGBA] = { 0, + B_(C0, C1, C2, C3, UNORM, UNORM, UNORM, UNORM, DXT5, 0), + SAMPLER_VIEW }, + + [PIPE_FORMAT_RGTC1_UNORM] = { 0, + B_(C0, ZERO, ZERO, ONE, UNORM, UNORM, UNORM, UNORM, RGTC1, 0), + SAMPLER_VIEW }, + + [PIPE_FORMAT_RGTC1_SNORM] = { 0, + B_(C0, ZERO, ZERO, ONE, SNORM, SNORM, SNORM, SNORM, RGTC1, 0), + SAMPLER_VIEW }, + + [PIPE_FORMAT_RGTC2_UNORM] = { 0, + B_(C0, C1, ZERO, ONE, UNORM, UNORM, UNORM, UNORM, RGTC2, 0), + SAMPLER_VIEW }, + + [PIPE_FORMAT_RGTC2_SNORM] = { 0, + B_(C0, C1, ZERO, ONE, SNORM, SNORM, SNORM, SNORM, RGTC2, 0), + SAMPLER_VIEW }, + + /* FLOAT 16 */ + + [PIPE_FORMAT_R16G16B16A16_FLOAT] = { NV50_SURFACE_FORMAT_R16G16B16A16_FLOAT, + A_(C0, C1, C2, C3, FLOAT, FLOAT, FLOAT, FLOAT, 16_16_16_16, 0), + VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET }, + + [PIPE_FORMAT_R16G16B16_FLOAT] = { NV50_SURFACE_FORMAT_R16G16B16X16_FLOAT, + A_(C0, C1, C2, ONE, FLOAT, FLOAT, FLOAT, FLOAT, 16_16_16, 0), + VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET }, + + [PIPE_FORMAT_R16G16_FLOAT] = { NV50_SURFACE_FORMAT_R16G16_FLOAT, + A_(C0, C1, ZERO, ONE, FLOAT, FLOAT, FLOAT, FLOAT, 16_16, 0), + VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET }, + + [PIPE_FORMAT_R16_FLOAT] = { NV50_SURFACE_FORMAT_R16_FLOAT, + A_(C0, ZERO, ZERO, ONE, FLOAT, FLOAT, FLOAT, FLOAT, 16, 0), + VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET }, + + /* FLOAT 32 */ + + [PIPE_FORMAT_R32G32B32A32_FLOAT] = { NV50_SURFACE_FORMAT_R32G32B32A32_FLOAT, + A_(C0, C1, C2, C3, FLOAT, FLOAT, FLOAT, FLOAT, 32_32_32_32, 0), + VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET }, + + [PIPE_FORMAT_R32G32B32_FLOAT] = { NV50_SURFACE_FORMAT_R32G32B32X32_FLOAT, + A_(C0, C1, C2, ONE, FLOAT, FLOAT, FLOAT, FLOAT, 32_32_32, 0), + VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET }, + + [PIPE_FORMAT_R32G32_FLOAT] = { NV50_SURFACE_FORMAT_R32G32_FLOAT, + A_(C0, C1, ZERO, ONE, FLOAT, FLOAT, FLOAT, FLOAT, 32_32, 0), + VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET }, + + [PIPE_FORMAT_R32_FLOAT] = { NV50_SURFACE_FORMAT_R32_FLOAT, + A_(C0, ZERO, ZERO, ONE, FLOAT, FLOAT, FLOAT, FLOAT, 32, 0), + VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET }, + + /* ODD FORMATS */ + + [PIPE_FORMAT_R11G11B10_FLOAT] = { NV50_SURFACE_FORMAT_B10G11R11_FLOAT, + B_(C0, C1, C2, ONE, FLOAT, FLOAT, FLOAT, FLOAT, 10_11_11, 0), + SAMPLER_VIEW | RENDER_TARGET }, + + [PIPE_FORMAT_R9G9B9E5_FLOAT] = { 0, + B_(C0, C1, C2, ONE, FLOAT, FLOAT, FLOAT, FLOAT, E5_9_9_9, 0), + SAMPLER_VIEW }, + + /* SNORM 32 */ + + [PIPE_FORMAT_R32G32B32A32_SNORM] = { 0, + A_(C0, C1, C2, C3, FLOAT, FLOAT, FLOAT, FLOAT, 32_32_32_32, 0), + VERTEX_BUFFER | SAMPLER_VIEW }, + + [PIPE_FORMAT_R32G32B32_SNORM] = { 0, + A_(C0, C1, C2, ONE, SNORM, SNORM, SNORM, SNORM, 32_32_32, 0), + VERTEX_BUFFER | SAMPLER_VIEW }, + + [PIPE_FORMAT_R32G32_SNORM] = { 0, + A_(C0, C1, ZERO, ONE, SNORM, SNORM, SNORM, SNORM, 32_32, 0), + VERTEX_BUFFER | SAMPLER_VIEW }, + + [PIPE_FORMAT_R32_SNORM] = { 0, + A_(C0, ZERO, ZERO, ONE, SNORM, SNORM, SNORM, SNORM, 32, 0), + VERTEX_BUFFER | SAMPLER_VIEW }, + + /* UNORM 32 */ + + [PIPE_FORMAT_R32G32B32A32_UNORM] = { 0, + A_(C0, C1, C2, C3, FLOAT, FLOAT, FLOAT, FLOAT, 32_32_32_32, 0), + VERTEX_BUFFER | SAMPLER_VIEW }, + + [PIPE_FORMAT_R32G32B32_UNORM] = { 0, + A_(C0, C1, C2, ONE, UNORM, UNORM, UNORM, UNORM, 32_32_32, 0), + VERTEX_BUFFER | SAMPLER_VIEW }, + + [PIPE_FORMAT_R32G32_UNORM] = { 0, + A_(C0, C1, ZERO, ONE, UNORM, UNORM, UNORM, UNORM, 32_32, 0), + VERTEX_BUFFER | SAMPLER_VIEW }, + + [PIPE_FORMAT_R32_UNORM] = { 0, + A_(C0, ZERO, ZERO, ONE, UNORM, UNORM, UNORM, UNORM, 32, 0), + VERTEX_BUFFER | SAMPLER_VIEW }, + + /* SNORM 16 */ + + [PIPE_FORMAT_R16G16B16A16_SNORM] = { NV50_SURFACE_FORMAT_R16G16B16A16_SNORM, + A_(C0, C1, C2, C3, SNORM, SNORM, SNORM, SNORM, 16_16_16_16, 0), + VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET }, + + [PIPE_FORMAT_R16G16B16_SNORM] = { 0, + A_(C0, C1, C2, ONE, SNORM, SNORM, SNORM, SNORM, 16_16_16, 0), + VERTEX_BUFFER | SAMPLER_VIEW }, + + [PIPE_FORMAT_R16G16_SNORM] = { NV50_SURFACE_FORMAT_R16G16_SNORM, + A_(C0, C1, C2, C3, SNORM, SNORM, SNORM, SNORM, 16_16, 0), + VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET }, + + [PIPE_FORMAT_R16_SNORM] = { NV50_SURFACE_FORMAT_R16_SNORM, + A_(C0, ZERO, ZERO, ONE, SNORM, SNORM, SNORM, SNORM, 16, 0), + VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET }, + + /* UNORM 16 */ + + [PIPE_FORMAT_R16G16B16A16_UNORM] = { NV50_SURFACE_FORMAT_R16G16B16A16_UNORM, + A_(C0, C1, C2, C3, UNORM, UNORM, UNORM, UNORM, 16_16_16_16, 0), + VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET }, + + [PIPE_FORMAT_R16G16B16_UNORM] = { 0, + A_(C0, C1, C2, ONE, UNORM, UNORM, UNORM, UNORM, 16_16_16, 0), + VERTEX_BUFFER | SAMPLER_VIEW }, + + [PIPE_FORMAT_R16G16_UNORM] = { NV50_SURFACE_FORMAT_R16G16_UNORM, + A_(C0, C1, C2, C3, UNORM, UNORM, UNORM, UNORM, 16_16, 0), + VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET }, + + [PIPE_FORMAT_R16_UNORM] = { NV50_SURFACE_FORMAT_R16_UNORM, + A_(C0, ZERO, ZERO, ONE, UNORM, UNORM, UNORM, UNORM, 16, 0), + VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET }, + + /* SNORM 8 */ + + [PIPE_FORMAT_R8G8B8A8_SNORM] = { NV50_SURFACE_FORMAT_A8B8G8R8_SNORM, + A_(C0, C1, C2, C3, SNORM, SNORM, SNORM, SNORM, 8_8_8_8, 0), + VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET }, + + [PIPE_FORMAT_R8G8B8_SNORM] = { 0, + A_(C0, C1, C2, ONE, SNORM, SNORM, SNORM, SNORM, 8_8_8, 0), + VERTEX_BUFFER | SAMPLER_VIEW }, + + [PIPE_FORMAT_R8G8_SNORM] = { NV50_SURFACE_FORMAT_R8G8_SNORM, + A_(C0, C1, ZERO, ONE, SNORM, SNORM, SNORM, SNORM, 8_8, 0), + VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET }, + + [PIPE_FORMAT_R8_SNORM] = { NV50_SURFACE_FORMAT_R8_SNORM, + A_(C0, ZERO, ZERO, ONE, SNORM, SNORM, SNORM, SNORM, 8, 0), + VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET }, + + /* UNORM 8 */ + + [PIPE_FORMAT_R8G8B8A8_UNORM] = { NV50_SURFACE_FORMAT_A8B8G8R8_UNORM, + A_(C0, C1, C2, C3, UNORM, UNORM, UNORM, UNORM, 8_8_8_8, 0), + VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET }, + + [PIPE_FORMAT_R8G8B8A8_SRGB] = { NV50_SURFACE_FORMAT_A8B8G8R8_SRGB, + A_(C0, C1, C2, C3, UNORM, UNORM, UNORM, UNORM, 8_8_8_8, 0), + SAMPLER_VIEW | RENDER_TARGET }, + + [PIPE_FORMAT_R8G8B8_UNORM] = { NV50_SURFACE_FORMAT_X8B8G8R8_UNORM, + A_(C0, C1, C2, ONE, UNORM, UNORM, UNORM, UNORM, 8_8_8, 0), + VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET }, + + [PIPE_FORMAT_R8G8B8_SRGB] = { NV50_SURFACE_FORMAT_X8B8G8R8_SRGB, + A_(C0, C1, C2, ONE, UNORM, UNORM, UNORM, UNORM, 8_8_8, 0), + SAMPLER_VIEW | RENDER_TARGET }, + + [PIPE_FORMAT_R8G8_UNORM] = { NV50_SURFACE_FORMAT_R8G8_UNORM, + A_(C0, C1, ZERO, ONE, UNORM, UNORM, UNORM, UNORM, 8_8, 0), + VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET }, + + [PIPE_FORMAT_R8_UNORM] = { NV50_SURFACE_FORMAT_R8_UNORM, + A_(C0, ZERO, ZERO, ONE, UNORM, UNORM, UNORM, UNORM, 8, 0), + VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET }, + + /* SSCALED 32 */ + + [PIPE_FORMAT_R32G32B32A32_SSCALED] = { NV50_SURFACE_FORMAT_R32G32B32A32_SINT, + A_(C0, C1, C2, C3, SSCALED, SSCALED, SSCALED, SSCALED, 32_32_32_32, 0), + VERTEX_BUFFER | SAMPLER_VIEW }, + + [PIPE_FORMAT_R32G32B32_SSCALED] = { 0, + A_(C0, C1, C2, ONE, SSCALED, SSCALED, SSCALED, SSCALED, 32_32_32, 0), + VERTEX_BUFFER | SAMPLER_VIEW }, + + [PIPE_FORMAT_R32G32_SSCALED] = { NV50_SURFACE_FORMAT_R32G32_SINT, + A_(C0, C1, ZERO, ONE, SSCALED, SSCALED, SSCALED, SSCALED, 32_32, 0), + VERTEX_BUFFER | SAMPLER_VIEW }, + + [PIPE_FORMAT_R32_SSCALED] = { 0, + A_(C0, ZERO, ZERO, ONE, SSCALED, SSCALED, SSCALED, SSCALED, 32, 0), + VERTEX_BUFFER | SAMPLER_VIEW }, + + /* USCALED 32 */ + + [PIPE_FORMAT_R32G32B32A32_USCALED] = { NV50_SURFACE_FORMAT_R32G32B32A32_UINT, + A_(C0, C1, C2, C3, USCALED, USCALED, USCALED, USCALED, 32_32_32_32, 0), + VERTEX_BUFFER | SAMPLER_VIEW }, + + [PIPE_FORMAT_R32G32B32_USCALED] = { 0, + A_(C0, C1, C2, ONE, USCALED, USCALED, USCALED, USCALED, 32_32_32, 0), + VERTEX_BUFFER | SAMPLER_VIEW }, + + [PIPE_FORMAT_R32G32_USCALED] = { NV50_SURFACE_FORMAT_R32G32_UINT, + A_(C0, C1, ZERO, ONE, USCALED, USCALED, USCALED, USCALED, 32_32, 0), + VERTEX_BUFFER | SAMPLER_VIEW }, + + [PIPE_FORMAT_R32_USCALED] = { 0, + A_(C0, ZERO, ZERO, ONE, USCALED, USCALED, USCALED, USCALED, 32, 0), + VERTEX_BUFFER | SAMPLER_VIEW }, + + /* SSCALED 16 */ + + [PIPE_FORMAT_R16G16B16A16_SSCALED] = { NV50_SURFACE_FORMAT_R16G16B16A16_SINT, + A_(C0, C1, C2, C3, SSCALED, SSCALED, SSCALED, SSCALED, 16_16_16_16, 0), + VERTEX_BUFFER | SAMPLER_VIEW }, + + [PIPE_FORMAT_R16G16B16_SSCALED] = { 0, + A_(C0, C1, C2, ONE, SSCALED, SSCALED, SSCALED, SSCALED, 16_16_16, 0), + VERTEX_BUFFER | SAMPLER_VIEW }, + + [PIPE_FORMAT_R16G16_SSCALED] = { NV50_SURFACE_FORMAT_R16G16_SINT, + A_(C0, C1, ZERO, ONE, SSCALED, SSCALED, SSCALED, SSCALED, 16_16, 0), + VERTEX_BUFFER | SAMPLER_VIEW }, + + [PIPE_FORMAT_R16_SSCALED] = { NV50_SURFACE_FORMAT_R16_SINT, + A_(C0, ZERO, ZERO, ONE, SSCALED, SSCALED, SSCALED, SSCALED, 16, 0), + VERTEX_BUFFER | SAMPLER_VIEW }, + + /* USCALED 16 */ + + [PIPE_FORMAT_R16G16B16A16_USCALED] = { NV50_SURFACE_FORMAT_R16G16B16A16_UINT, + A_(C0, C1, C2, C3, USCALED, USCALED, USCALED, USCALED, 16_16_16_16, 0), + VERTEX_BUFFER | SAMPLER_VIEW }, + + [PIPE_FORMAT_R16G16B16_USCALED] = { 0, + A_(C0, C1, C2, ONE, USCALED, USCALED, USCALED, USCALED, 16_16_16, 0), + VERTEX_BUFFER | SAMPLER_VIEW }, + + [PIPE_FORMAT_R16G16_USCALED] = { NV50_SURFACE_FORMAT_R16G16_UINT, + A_(C0, C1, ZERO, ONE, USCALED, USCALED, USCALED, USCALED, 16_16, 0), + VERTEX_BUFFER | SAMPLER_VIEW }, + + [PIPE_FORMAT_R16_USCALED] = { NV50_SURFACE_FORMAT_R16_UINT, + A_(C0, ZERO, ZERO, ONE, USCALED, USCALED, USCALED, USCALED, 16, 0), + VERTEX_BUFFER | SAMPLER_VIEW }, + + /* SSCALED 8 */ + + [PIPE_FORMAT_R8G8B8A8_SSCALED] = { NV50_SURFACE_FORMAT_A8B8G8R8_SINT, + A_(C0, C1, C2, C3, SSCALED, SSCALED, SSCALED, SSCALED, 8_8_8_8, 0), + VERTEX_BUFFER | SAMPLER_VIEW }, + + [PIPE_FORMAT_R8G8B8_SSCALED] = { 0, + A_(C0, C1, C2, ONE, SSCALED, SSCALED, SSCALED, SSCALED, 8_8_8, 0), + VERTEX_BUFFER | SAMPLER_VIEW }, + + [PIPE_FORMAT_R8G8_SSCALED] = { NV50_SURFACE_FORMAT_R8G8_SINT, + A_(C0, C1, ZERO, ONE, SSCALED, SSCALED, SSCALED, SSCALED, 8_8, 0), + VERTEX_BUFFER | SAMPLER_VIEW }, + + [PIPE_FORMAT_R8_SSCALED] = { NV50_SURFACE_FORMAT_R8_SINT, + A_(C0, ZERO, ZERO, ONE, SSCALED, SSCALED, SSCALED, SSCALED, 8, 0), + VERTEX_BUFFER | SAMPLER_VIEW }, + + /* USCALED 8 */ + + [PIPE_FORMAT_R8G8B8A8_USCALED] = { NV50_SURFACE_FORMAT_A8B8G8R8_UINT, + A_(C0, C1, C2, C3, USCALED, USCALED, USCALED, USCALED, 8_8_8_8, 0), + VERTEX_BUFFER | SAMPLER_VIEW }, + + [PIPE_FORMAT_R8G8B8_USCALED] = { 0, + A_(C0, C1, C2, ONE, USCALED, USCALED, USCALED, USCALED, 8_8_8, 0), + VERTEX_BUFFER | SAMPLER_VIEW }, + + [PIPE_FORMAT_R8G8_USCALED] = { NV50_SURFACE_FORMAT_R8G8_UINT, + A_(C0, C1, ZERO, ONE, USCALED, USCALED, USCALED, USCALED, 8_8, 0), + VERTEX_BUFFER | SAMPLER_VIEW }, + + [PIPE_FORMAT_R8_USCALED] = { NV50_SURFACE_FORMAT_R8_UINT, + A_(C0, ZERO, ZERO, ONE, USCALED, USCALED, USCALED, USCALED, 8, 0), + VERTEX_BUFFER | SAMPLER_VIEW }, +}; diff --git a/src/gallium/drivers/nvc0/nvc0_graph_macros.h b/src/gallium/drivers/nvc0/nvc0_graph_macros.h new file mode 100644 index 00000000000..8da963a4c5b --- /dev/null +++ b/src/gallium/drivers/nvc0/nvc0_graph_macros.h @@ -0,0 +1,235 @@ + +#ifndef __NVC0_PGRAPH_MACROS_H__ +#define __NVC0_PGRAPH_MACROS_H__ + +/* extrinsrt r1, r2, src, size, dst: replace bits [dst:dst+size) in r1 + * with bits [src:src+size) in r2 + * + * bra(n)z annul: no delay slot + */ + +/* The comments above the macros describe what they *should* be doing, + * but we use less functionality for now. + */ + +/* + * for (i = 0; i < 8; ++i) + * [NVC0_3D_BLEND_ENABLE(i)] = BIT(i of arg); + * + * [3428] = arg; + * + * if (arg == 0 || [NVC0_3D_MULTISAMPLE_ENABLE] == 0) + * [0d9c] = 0; + * else + * [0d9c] = [342c]; + */ +static const uint32_t nvc0_9097_blend_enables[] = +{ + 0x05360021, /* 0x00: maddr [NVC0_3D_BLEND_ENABLE(0), increment = 4] */ + 0x00404042, /* 0x01: send extrinsrt 0 $r1 0 0x1 0 */ + 0x00424042, /* 0x02: send extrinsrt 0 $r1 0x1 0x1 0 */ + 0x00444042, /* 0x03: send extrinsrt 0 $r1 0x2 0x1 0 */ + 0x00464042, /* 0x04: send extrinsrt 0 $r1 0x3 0x1 0 */ + 0x00484042, /* 0x05: send extrinsrt 0 $r1 0x4 0x1 0 */ + 0x004a4042, /* 0x06: send extrinsrt 0 $r1 0x5 0x1 0 */ + 0x004c40c2, /* 0x07: exit send extrinsrt 0 $r1 0x6 0x1 0 */ + 0x004e4042, /* 0x08: send extrinsrt 0 $r1 0x7 0x1 0 */ +}; + +/* + * uint64 limit = (parm(0) << 32) | parm(1); + * uint64 start = (parm(2) << 32); + * + * if (limit) { + * start |= parm(3); + * --limit; + * } else { + * start |= 1; + * } + * + * [0x1c04 + (arg & 0xf) * 16 + 0] = (start >> 32) & 0xff; + * [0x1c04 + (arg & 0xf) * 16 + 4] = start & 0xffffffff; + * [0x1f00 + (arg & 0xf) * 8 + 0] = (limit >> 32) & 0xff; + * [0x1f00 + (arg & 0xf) * 8 + 4] = limit & 0xffffffff; + */ +static const uint32_t nvc0_9097_vertex_array_select[] = +{ + 0x00000201, /* 0x00: parm $r2 */ + 0x00000301, /* 0x01: parm $r3 */ + 0x00000401, /* 0x02: parm $r4 */ + 0x00000501, /* 0x03: parm $r5 */ + 0x11004612, /* 0x04: mov $r6 extrinsrt 0 $r1 0 4 2 */ + 0x09004712, /* 0x05: mov $r7 extrinsrt 0 $r1 0 4 1 */ + 0x05c07621, /* 0x06: maddr $r6 add $6 0x1701 */ + 0x00002041, /* 0x07: send $r4 */ + 0x00002841, /* 0x08: send $r5 */ + 0x05f03f21, /* 0x09: maddr $r7 add $7 0x17c0 */ + 0x000010c1, /* 0x0a: exit send $r2 */ + 0x00001841, /* 0x0b: send $r3 */ +}; + +static const uint32_t nvc0_9097_color_mask_brdc[] = +{ + 0x05a00021, /* maddr [NVC0_3D_COLOR_MASK(0), increment = 4] */ + 0x00000841, /* send $r1 */ + 0x00000841, /* send $r1 */ + 0x00000841, /* send $r1 */ + 0x00000841, /* send $r1 */ + 0x00000841, /* send $r1 */ + 0x00000841, /* send $r1 */ + 0x000008c1, /* exit send $r1 */ + 0x00000841, /* send $r1 */ +}; + +/* + * [GL_POLYGON_MODE_FRONT] = arg; + * + * if (BIT(31 of [0x3410])) + * [1a24] = 0x7353; + * + * if ([NVC0_3D_SP_SELECT(3)] == 0x31 || [NVC0_3D_SP_SELECT(4)] == 0x41) + * [02ec] = 0; + * else + * if ([GL_POLYGON_MODE_BACK] == GL_LINE || arg == GL_LINE) + * [02ec] = BYTE(1 of [0x3410]) << 4; + * else + * [02ec] = BYTE(0 of [0x3410]) << 4; + */ +static const uint32_t nvc0_9097_poly_mode_front[] = +{ + 0x00db0215, /* 0x00: read $r2 [NVC0_3D_POLYGON_MODE_BACK] */ + 0x020c0315, /* 0x01: read $r3 [NVC0_3D_SP_SELECT(3)] */ + 0x00128f10, /* 0x02: mov $r7 or $r1 $r2 */ + 0x02100415, /* 0x03: read $r4 [NVC0_3D_SP_SELECT(4)] */ + 0x00004211, /* 0x04: mov $r2 0x1 */ + 0x00180611, /* 0x05: mov $r6 0x60 */ + 0x0014bf10, /* 0x06: mov $r7 and $r7 $r2 */ + 0x0000f807, /* 0x07: braz $r7 0xa */ + 0x00dac021, /* 0x08: maddr 0x36b */ + 0x00800611, /* 0x09: mov $r6 0x200 */ + 0x00131f10, /* 0x0a: mov $r7 or $r3 $r4 */ + 0x0014bf10, /* 0x0b: mov $r7 and $r7 $r2 */ + 0x0000f807, /* 0x0c: braz $r7 0xf */ + 0x00000841, /* 0x0d: send $r1 */ + 0x00000611, /* 0x0e: mov $r6 0 */ + 0x002ec0a1, /* 0x0f: exit maddr [02ec] */ + 0x00003041 /* 0x10: send $r6 */ +}; + +/* + * [GL_POLYGON_MODE_BACK] = arg; + * + * if (BIT(31 of [0x3410])) + * [1a24] = 0x7353; + * + * if ([NVC0_3D_SP_SELECT(3)] == 0x31 || [NVC0_3D_SP_SELECT(4)] == 0x41) + * [02ec] = 0; + * else + * if ([GL_POLYGON_MODE_FRONT] == GL_LINE || arg == GL_LINE) + * [02ec] = BYTE(1 of [0x3410]) << 4; + * else + * [02ec] = BYTE(0 of [0x3410]) << 4; + */ +/* NOTE: 0x3410 = 0x80002006 by default, + * POLYGON_MODE == GL_LINE check replaced by (MODE & 1) + * SP_SELECT(i) == (i << 4) | 1 check replaced by SP_SELECT(i) & 1 + */ +static const uint32_t nvc0_9097_poly_mode_back[] = +{ + 0x00dac215, /* 0x00: read $r2 [NVC0_3D_POLYGON_MODE_FRONT] */ + 0x020c0315, /* 0x01: read $r3 [NVC0_3D_SP_SELECT(3)] */ + 0x00128f10, /* 0x02: mov $r7 or $r1 $r2 */ + 0x02100415, /* 0x03: read $r4 [NVC0_3D_SP_SELECT(4)] */ + 0x00004211, /* 0x04: mov $r2 0x1 */ + 0x00180611, /* 0x05: mov $r6 0x60 */ + 0x0014bf10, /* 0x06: mov $r7 and $r7 $r2 */ + 0x0000f807, /* 0x07: braz $r7 0xa */ + 0x00dac021, /* 0x08: maddr 0x36b */ + 0x00800611, /* 0x09: mov $r6 0x200 */ + 0x00131f10, /* 0x0a: mov $r7 or $r3 $r4 */ + 0x0014bf10, /* 0x0b: mov $r7 and $r7 $r2 */ + 0x0000f807, /* 0x0c: braz $r7 0xf */ + 0x00000841, /* 0x0d: send $r1 */ + 0x00000611, /* 0x0e: mov $r6 0 */ + 0x002ec0a1, /* 0x0f: exit maddr [02ec] */ + 0x00003041 /* 0x10: send $r6 */ +}; + +/* + * [NVC0_3D_SP_SELECT(4)] = arg + * + * if BIT(31 of [0x3410]) == 0 + * [1a24] = 0x7353; + * + * if ([NVC0_3D_SP_SELECT(3)] == 0x31 || arg == 0x41) + * [02ec] = 0 + * else + * if (any POLYGON MODE == LINE) + * [02ec] = BYTE(1 of [3410]) << 4; + * else + * [02ec] = BYTE(0 of [3410]) << 4; // 02ec valid bits are 0xff1 + */ +static const uint32_t nvc0_9097_gp_select[] = /* 0x0f */ +{ + 0x00dac215, /* 0x00: read $r2 0x36b */ + 0x00db0315, /* 0x01: read $r3 0x36c */ + 0x0012d710, /* 0x02: mov $r7 or $r2 $r3 */ + 0x020c0415, /* 0x03: read $r4 0x830 */ + 0x00004211, /* 0x04: mov $r2 0x1 */ + 0x00180611, /* 0x05: mov $r6 0x60 */ + 0x0014bf10, /* 0x06: mov $r7 and $r7 $r2 */ + 0x0000f807, /* 0x07: braz $r7 0xa */ + 0x02100021, /* 0x08: maddr 0x840 */ + 0x00800611, /* 0x09: mov $r6 0x200 */ + 0x00130f10, /* 0x0a: mov $r7 or $r1 $r4 */ + 0x0014bf10, /* 0x0b: mov $r7 and $r7 $r2 */ + 0x0000f807, /* 0x0c: braz $r7 0xf */ + 0x00000841, /* 0x0d: send $r1 */ + 0x00000611, /* 0x0e: mov $r6 0 */ + 0x002ec0a1, /* 0x0f: exit maddr 0xbb */ + 0x00003041, /* 0x10: send $r6 */ +}; + +/* + * [NVC0_3D_SP_SELECT(3)] = arg + * + * if BIT(31 of [0x3410]) == 0 + * [1a24] = 0x7353; + * + * if (arg == 0x31) { + * if (BIT(2 of [0x3430])) { + * int i = 15; do { --i; } while(i); + * [0x1a2c] = 0; + * } + * } + * + * if ([NVC0_3D_SP_SELECT(4)] == 0x41 || arg == 0x31) + * [02ec] = 0 + * else + * if ([any POLYGON_MODE] == GL_LINE) + * [02ec] = BYTE(1 of [3410]) << 4; + * else + * [02ec] = BYTE(0 of [3410]) << 4; + */ +static const uint32_t nvc0_9097_tep_select[] = /* 0x10 */ +{ + 0x00dac215, /* 0x00: read $r2 0x36b */ + 0x00db0315, /* 0x01: read $r3 0x36c */ + 0x0012d710, /* 0x02: mov $r7 or $r2 $r3 */ + 0x02100415, /* 0x03: read $r4 0x840 */ + 0x00004211, /* 0x04: mov $r2 0x1 */ + 0x00180611, /* 0x05: mov $r6 0x60 */ + 0x0014bf10, /* 0x06: mov $r7 and $r7 $r2 */ + 0x0000f807, /* 0x07: braz $r7 0xa */ + 0x020c0021, /* 0x08: maddr 0x830 */ + 0x00800611, /* 0x09: mov $r6 0x200 */ + 0x00130f10, /* 0x0a: mov $r7 or $r1 $r4 */ + 0x0014bf10, /* 0x0b: mov $r7 and $r7 $r2 */ + 0x0000f807, /* 0x0c: braz $r7 0xf */ + 0x00000841, /* 0x0d: send $r1 */ + 0x00000611, /* 0x0e: mov $r6 0 */ + 0x002ec0a1, /* 0x0f: exit maddr 0xbb */ + 0x00003041, /* 0x10: send $r6 */ +}; + +#endif diff --git a/src/gallium/drivers/nvc0/nvc0_m2mf.xml.h b/src/gallium/drivers/nvc0/nvc0_m2mf.xml.h new file mode 100644 index 00000000000..3bf628d425e --- /dev/null +++ b/src/gallium/drivers/nvc0/nvc0_m2mf.xml.h @@ -0,0 +1,138 @@ +#ifndef NVC0_M2MF_XML +#define NVC0_M2MF_XML + +/* Autogenerated file, DO NOT EDIT manually! + +This file was generated by the rules-ng-ng headergen tool in this git repository: +http://0x04.net/cgit/index.cgi/rules-ng-ng +git clone git://0x04.net/rules-ng-ng + +The rules-ng-ng source files this header was generated from are: +- nvc0_m2mf.xml ( 2227 bytes, from 2010-10-16 16:10:29) +- copyright.xml ( 6498 bytes, from 2010-10-03 13:18:37) +- nv_object.xml ( 11379 bytes, from 2010-10-16 11:43:24) +- nvchipsets.xml ( 2907 bytes, from 2010-10-15 16:28:21) +- nv_defs.xml ( 4437 bytes, from 2010-07-06 07:43:58) + +Copyright (C) 2006-2010 by the following authors: +- Artur Huillet <[email protected]> (ahuillet) +- Ben Skeggs (darktama, darktama_) +- B. R. <[email protected]> (koala_br) +- Carlos Martin <[email protected]> (carlosmn) +- Christoph Bumiller <[email protected]> (calim, chrisbmr) +- Dawid Gajownik <[email protected]> (gajownik) +- Dmitry Baryshkov +- Dmitry Eremin-Solenikov <[email protected]> (lumag) +- EdB <[email protected]> (edb_) +- Erik Waling <[email protected]> (erikwaling) +- Francisco Jerez <[email protected]> (curro, curro_, currojerez) +- imirkin <[email protected]> (imirkin) +- jb17bsome <[email protected]> (jb17bsome) +- Jeremy Kolb <[email protected]> (kjeremy) +- Laurent Carlier <[email protected]> (lordheavy) +- Luca Barbieri <[email protected]> (lb, lb1) +- Maarten Maathuis <[email protected]> (stillunknown) +- Marcin Kościelnicki <[email protected]> (mwk, koriakin) +- Mark Carey <[email protected]> (careym) +- Matthieu Castet <[email protected]> (mat-c) +- nvidiaman <[email protected]> (nvidiaman) +- Patrice Mandin <[email protected]> (pmandin, pmdata) +- Pekka Paalanen <[email protected]> (pq, ppaalanen) +- Peter Popov <[email protected]> (ironpeter) +- Richard Hughes <[email protected]> (hughsient) +- Rudi Cilibrasi <[email protected]> (cilibrar) +- Serge Martin +- Simon Raffeiner +- Stephane Loeuillet <[email protected]> (leroutier) +- Stephane Marchesin <[email protected]> (marcheu) +- sturmflut <[email protected]> (sturmflut) +- Sylvain Munaut <[email protected]> +- Victor Stinner <[email protected]> (haypo) +- Wladmir van der Laan <[email protected]> (miathan6) +- Younes Manton <[email protected]> (ymanton) + +Permission is hereby granted, free of charge, to any person obtaining +a copy of this software and associated documentation files (the +"Software"), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sublicense, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions: + +The above copyright notice and this permission notice (including the +next paragraph) shall be included in all copies or substantial +portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE +LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +*/ + + + +#define NVC0_M2MF_TILING_MODE_IN 0x00000204 + +#define NVC0_M2MF_TILING_PITCH_IN 0x00000208 + +#define NVC0_M2MF_TILING_HEIGHT_IN 0x0000020c + +#define NVC0_M2MF_TILING_DEPTH_IN 0x00000210 + +#define NVC0_M2MF_TILING_POSITION_IN_Z 0x00000214 + +#define NVC0_M2MF_TILING_MODE_OUT 0x00000220 + +#define NVC0_M2MF_TILING_PITCH_OUT 0x00000224 + +#define NVC0_M2MF_TILING_HEIGHT_OUT 0x00000228 + +#define NVC0_M2MF_TILING_DEPTH_OUT 0x0000022c + +#define NVC0_M2MF_TILING_POSITION_OUT_Z 0x00000230 + +#define NVC0_M2MF_OFFSET_OUT_HIGH 0x00000238 + +#define NVC0_M2MF_OFFSET_OUT_LOW 0x0000023c + +#define NVC0_M2MF_EXEC 0x00000300 +#define NVC0_M2MF_EXEC_PUSH 0x00000001 +#define NVC0_M2MF_EXEC_LINEAR_IN 0x00000010 +#define NVC0_M2MF_EXEC_LINEAR_OUT 0x00000100 +#define NVC0_M2MF_EXEC_NOTIFY 0x00002000 +#define NVC0_M2MF_EXEC_INC__MASK 0x00f00000 +#define NVC0_M2MF_EXEC_INC__SHIFT 20 + +#define NVC0_M2MF_DATA 0x00000304 + +#define NVC0_M2MF_OFFSET_IN_HIGH 0x0000030c + +#define NVC0_M2MF_OFFSET_IN_LOW 0x00000310 + +#define NVC0_M2MF_PITCH_IN 0x00000314 + +#define NVC0_M2MF_PITCH_OUT 0x00000318 + +#define NVC0_M2MF_LINE_LENGTH_IN 0x0000031c + +#define NVC0_M2MF_LINE_COUNT 0x00000320 + +#define NVC0_M2MF_NOTIFY_ADDRESS_HIGH 0x0000032c + +#define NVC0_M2MF_NOTIFY_ADDRESS_LOW 0x00000330 + +#define NVC0_M2MF_NOTIFY 0x00000334 + +#define NVC0_M2MF_TILING_POSITION_IN_X 0x00000344 + +#define NVC0_M2MF_TILING_POSITION_IN_Y 0x00000348 + +#define NVC0_M2MF_TILING_POSITION_OUT_X 0x0000034c + +#define NVC0_M2MF_TILING_POSITION_OUT_Y 0x00000350 + + +#endif /* NVC0_M2MF_XML */ diff --git a/src/gallium/drivers/nvc0/nvc0_miptree.c b/src/gallium/drivers/nvc0/nvc0_miptree.c new file mode 100644 index 00000000000..7c7e134146e --- /dev/null +++ b/src/gallium/drivers/nvc0/nvc0_miptree.c @@ -0,0 +1,327 @@ +/* + * Copyright 2008 Ben Skeggs + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF + * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "pipe/p_state.h" +#include "pipe/p_defines.h" +#include "util/u_inlines.h" +#include "util/u_format.h" + +#include "nvc0_context.h" +#include "nvc0_resource.h" +#include "nvc0_transfer.h" + +static INLINE uint32_t +get_tile_dims(unsigned nx, unsigned ny, unsigned nz) +{ + uint32_t tile_mode = 0x000; + + if (ny > 64) tile_mode = 0x040; /* height 128 tiles */ + else + if (ny > 32) tile_mode = 0x030; /* height 64 tiles */ + else + if (ny > 16) tile_mode = 0x020; /* height 32 tiles */ + else + if (ny > 8) tile_mode = 0x010; /* height 16 tiles */ + + if (nz == 1) + return tile_mode; + else + if (tile_mode > 0x020) + tile_mode = 0x020; + + if (nz > 16 && tile_mode < 0x020) + return tile_mode | 0x500; /* depth 32 tiles */ + if (nz > 8) return tile_mode | 0x400; /* depth 16 tiles */ + if (nz > 4) return tile_mode | 0x300; /* depth 8 tiles */ + if (nz > 2) return tile_mode | 0x200; /* depth 4 tiles */ + + return tile_mode | 0x100; +} + +static INLINE unsigned +calc_zslice_offset(uint32_t tile_mode, unsigned z, unsigned pitch, unsigned nbh) +{ + unsigned tile_h = NVC0_TILE_HEIGHT(tile_mode); + unsigned tile_d_shift = NVC0_TILE_DIM_SHIFT(tile_mode, 2); + unsigned tile_d = 1 << tile_d_shift; + + /* stride_2d == to next slice within this volume tile */ + /* stride_3d == size (in bytes) of a volume tile */ + unsigned stride_2d = tile_h * NVC0_TILE_PITCH(tile_mode); + unsigned stride_3d = tile_d * align(nbh, tile_h) * pitch; + + return (z & (tile_d - 1)) * stride_2d + (z >> tile_d_shift) * stride_3d; +} + +static void +nvc0_miptree_destroy(struct pipe_screen *pscreen, struct pipe_resource *pt) +{ + struct nvc0_miptree *mt = nvc0_miptree(pt); + + nouveau_screen_bo_release(pscreen, mt->base.bo); + + FREE(mt); +} + +static boolean +nvc0_miptree_get_handle(struct pipe_screen *pscreen, + struct pipe_resource *pt, + struct winsys_handle *whandle) +{ + struct nvc0_miptree *mt = nvc0_miptree(pt); + unsigned stride; + + if (!mt || !mt->base.bo) + return FALSE; + + stride = util_format_get_stride(mt->base.base.format, + mt->base.base.width0); + + return nouveau_screen_bo_get_handle(pscreen, + mt->base.bo, + stride, + whandle); +} + +const struct u_resource_vtbl nvc0_miptree_vtbl = +{ + nvc0_miptree_get_handle, /* get_handle */ + nvc0_miptree_destroy, /* resource_destroy */ + NULL, /* is_resource_referenced */ + nvc0_miptree_transfer_new, /* get_transfer */ + nvc0_miptree_transfer_del, /* transfer_destroy */ + nvc0_miptree_transfer_map, /* transfer_map */ + u_default_transfer_flush_region, /* transfer_flush_region */ + nvc0_miptree_transfer_unmap, /* transfer_unmap */ + u_default_transfer_inline_write /* transfer_inline_write */ +}; + +struct pipe_resource * +nvc0_miptree_create(struct pipe_screen *pscreen, + const struct pipe_resource *templ) +{ + struct nouveau_device *dev = nouveau_screen(pscreen)->device; + struct nvc0_miptree *mt = CALLOC_STRUCT(nvc0_miptree); + struct pipe_resource *pt = &mt->base.base; + int ret; + unsigned w, h, d, l, alloc_size; + uint32_t tile_flags; + + if (!mt) + return NULL; + + mt->base.vtbl = &nvc0_miptree_vtbl; + *pt = *templ; + pipe_reference_init(&pt->reference, 1); + pt->screen = pscreen; + + mt->layout_3d = pt->target == PIPE_TEXTURE_3D; + + w = pt->width0; + h = pt->height0; + d = mt->layout_3d ? pt->depth0 : 1; + + switch (pt->format) { + case PIPE_FORMAT_Z16_UNORM: + tile_flags = 0x0700; /* COMPRESSED */ + tile_flags = 0x0200; /* NORMAL ? */ + tile_flags = 0x0100; /* NORMAL ? */ + break; + case PIPE_FORMAT_S8_USCALED_Z24_UNORM: + tile_flags = 0x5300; /* MSAA 4, COMPRESSED */ + tile_flags = 0x4600; /* NORMAL */ + break; + case PIPE_FORMAT_Z24X8_UNORM: + case PIPE_FORMAT_Z24_UNORM_S8_USCALED: + tile_flags = 0x1100; /* NORMAL */ + if (w * h >= 128 * 128 && 0) + tile_flags = 0x1700; /* COMPRESSED, requires magic */ + break; + case PIPE_FORMAT_R32G32B32A32_FLOAT: + tile_flags = 0xf500; /* COMPRESSED */ + tile_flags = 0xf700; /* MSAA 2 */ + tile_flags = 0xf900; /* MSAA 4 */ + tile_flags = 0xfe00; /* NORMAL */ + break; + case PIPE_FORMAT_Z32_FLOAT_S8X24_USCALED: + tile_flags = 0xce00; /* COMPRESSED */ + tile_flags = 0xcf00; /* MSAA 2, COMPRESSED */ + tile_flags = 0xd000; /* MSAA 4, COMPRESSED */ + tile_flags = 0xc300; /* NORMAL */ + break; + case PIPE_FORMAT_R16G16B16A16_UNORM: + tile_flags = 0xe900; /* COMPRESSED */ + break; + default: + tile_flags = 0xe000; /* MSAA 4, COMPRESSED 32 BIT */ + tile_flags = 0xfe00; /* NORMAL 32 BIT */ + if (w * h >= 128 * 128 && 0) + tile_flags = 0xdb00; /* COMPRESSED 32 BIT, requires magic */ + break; + } + + /* For 3D textures, a mipmap is spanned by all the layers, for array + * textures and cube maps, each layer contains its own mipmaps. + */ + for (l = 0; l <= pt->last_level; ++l) { + struct nvc0_miptree_level *lvl = &mt->level[l]; + unsigned nbx = util_format_get_nblocksx(pt->format, w); + unsigned nby = util_format_get_nblocksy(pt->format, h); + unsigned blocksize = util_format_get_blocksize(pt->format); + + lvl->offset = mt->total_size; + lvl->tile_mode = get_tile_dims(nbx, nby, d); + lvl->pitch = align(nbx * blocksize, NVC0_TILE_PITCH(lvl->tile_mode)); + + mt->total_size += lvl->pitch * + align(nby, NVC0_TILE_HEIGHT(lvl->tile_mode)) * + align(d, NVC0_TILE_DEPTH(lvl->tile_mode)); + + w = u_minify(w, 1); + h = u_minify(h, 1); + d = u_minify(d, 1); + } + + if (pt->array_size > 1) { + mt->layer_stride = align(mt->total_size, + NVC0_TILE_SIZE(mt->level[0].tile_mode)); + mt->total_size = mt->layer_stride * pt->array_size; + } + + alloc_size = mt->total_size; + if (tile_flags == 0x1700) + alloc_size *= 3; /* HiZ, XXX: correct size */ + + ret = nouveau_bo_new_tile(dev, NOUVEAU_BO_VRAM, 256, alloc_size, + mt->level[0].tile_mode, tile_flags, + &mt->base.bo); + if (ret) { + FREE(mt); + return NULL; + } + mt->base.domain = NOUVEAU_BO_VRAM; + + return pt; +} + +struct pipe_resource * +nvc0_miptree_from_handle(struct pipe_screen *pscreen, + const struct pipe_resource *templ, + struct winsys_handle *whandle) +{ + struct nvc0_miptree *mt; + unsigned stride; + + /* only supports 2D, non-mipmapped textures for the moment */ + if ((templ->target != PIPE_TEXTURE_2D && + templ->target != PIPE_TEXTURE_RECT) || + templ->last_level != 0 || + templ->depth0 != 1 || + templ->array_size > 1) + return NULL; + + mt = CALLOC_STRUCT(nvc0_miptree); + if (!mt) + return NULL; + + mt->base.bo = nouveau_screen_bo_from_handle(pscreen, whandle, &stride); + if (mt->base.bo == NULL) { + FREE(mt); + return NULL; + } + + mt->base.base = *templ; + mt->base.vtbl = &nvc0_miptree_vtbl; + pipe_reference_init(&mt->base.base.reference, 1); + mt->base.base.screen = pscreen; + mt->level[0].pitch = stride; + mt->level[0].offset = 0; + mt->level[0].tile_mode = mt->base.bo->tile_mode; + + /* no need to adjust bo reference count */ + return &mt->base.base; +} + + +/* Surface functions. + */ + +struct pipe_surface * +nvc0_miptree_surface_new(struct pipe_context *pipe, + struct pipe_resource *pt, + const struct pipe_surface *templ) +{ + struct nvc0_miptree *mt = nvc0_miptree(pt); /* guaranteed */ + struct nvc0_surface *ns; + struct pipe_surface *ps; + struct nvc0_miptree_level *lvl = &mt->level[templ->u.tex.level]; + + ns = CALLOC_STRUCT(nvc0_surface); + if (!ns) + return NULL; + ps = &ns->base; + + pipe_reference_init(&ps->reference, 1); + pipe_resource_reference(&ps->texture, pt); + ps->context = pipe; + ps->format = pt->format; + ps->usage = templ->usage; + ps->u.tex.level = templ->u.tex.level; + ps->u.tex.first_layer = templ->u.tex.first_layer; + ps->u.tex.last_layer = templ->u.tex.last_layer; + + ns->width = u_minify(pt->width0, ps->u.tex.level); + ns->height = u_minify(pt->height0, ps->u.tex.level); + ns->depth = ps->u.tex.last_layer - ps->u.tex.first_layer + 1; + ns->offset = lvl->offset; + + /* comment says there are going to be removed, but they're used by the st */ + ps->width = ns->width; + ps->height = ns->height; + + if (mt->layout_3d) { + unsigned zslice = ps->u.tex.first_layer; + + /* TODO: re-layout the texture to use only depth 1 tiles in this case: */ + if (ns->depth > 1 && (zslice & (NVC0_TILE_DEPTH(lvl->tile_mode) - 1))) + NOUVEAU_ERR("Creating unsupported 3D surface of slices [%u:%u].\n", + zslice, ps->u.tex.last_layer); + + ns->offset += calc_zslice_offset(lvl->tile_mode, zslice, lvl->pitch, + util_format_get_nblocksy(pt->format, + ns->height)); + } else { + ns->offset += mt->layer_stride * ps->u.tex.first_layer; + } + + return ps; +} + +void +nvc0_miptree_surface_del(struct pipe_context *pipe, struct pipe_surface *ps) +{ + struct nvc0_surface *s = nvc0_surface(ps); + + pipe_resource_reference(&ps->texture, NULL); + + FREE(s); +} diff --git a/src/gallium/drivers/nvc0/nvc0_mm.c b/src/gallium/drivers/nvc0/nvc0_mm.c new file mode 100644 index 00000000000..0629dad19c9 --- /dev/null +++ b/src/gallium/drivers/nvc0/nvc0_mm.c @@ -0,0 +1,274 @@ + +#include "util/u_inlines.h" +#include "util/u_memory.h" +#include "util/u_double_list.h" + +#include "nvc0_screen.h" + +#define MM_MIN_ORDER 7 +#define MM_MAX_ORDER 20 + +#define MM_NUM_BUCKETS (MM_MAX_ORDER - MM_MIN_ORDER + 1) + +#define MM_MIN_SIZE (1 << MM_MIN_ORDER) +#define MM_MAX_SIZE (1 << MM_MAX_ORDER) + +struct mm_bucket { + struct list_head free; + struct list_head used; + struct list_head full; + int num_free; +}; + +struct nvc0_mman { + struct nouveau_device *dev; + struct mm_bucket bucket[MM_NUM_BUCKETS]; + uint32_t storage_type; + uint32_t domain; + uint64_t allocated; +}; + +struct mm_slab { + struct list_head head; + struct nouveau_bo *bo; + struct nvc0_mman *cache; + int order; + int count; + int free; + uint32_t bits[0]; +}; + +static int +mm_slab_alloc(struct mm_slab *slab) +{ + int i, n, b; + + if (slab->free == 0) + return -1; + + for (i = 0; i < (slab->count + 31) / 32; ++i) { + b = ffs(slab->bits[i]) - 1; + if (b >= 0) { + n = i * 32 + b; + assert(n < slab->count); + slab->free--; + slab->bits[i] &= ~(1 << b); + return n; + } + } + return -1; +} + +static INLINE void +mm_slab_free(struct mm_slab *slab, int i) +{ + assert(i < slab->count); + slab->bits[i / 32] |= 1 << (i % 32); + slab->free++; + assert(slab->free <= slab->count); +} + +static INLINE int +mm_get_order(uint32_t size) +{ + int s = __builtin_clz(size) ^ 31; + + if (size > (1 << s)) + s += 1; + return s; +} + +static struct mm_bucket * +mm_bucket_by_order(struct nvc0_mman *cache, int order) +{ + if (order > MM_MAX_ORDER) + return NULL; + return &cache->bucket[MAX2(order, MM_MIN_ORDER) - MM_MIN_ORDER]; +} + +static struct mm_bucket * +mm_bucket_by_size(struct nvc0_mman *cache, unsigned size) +{ + return mm_bucket_by_order(cache, mm_get_order(size)); +} + +/* size of bo allocation for slab with chunks of (1 << chunk_order) bytes */ +static INLINE uint32_t +mm_default_slab_size(unsigned chunk_order) +{ + assert(chunk_order <= MM_MAX_ORDER && chunk_order >= MM_MIN_ORDER); + + static const int8_t slab_order[MM_MAX_ORDER - MM_MIN_ORDER + 1] = + { + 12, 12, 13, 14, 14, 17, 17, 17, 17, 19, 19, 20, 21, 22 + }; + + return 1 << slab_order[chunk_order - MM_MIN_ORDER]; +} + +static int +mm_slab_new(struct nvc0_mman *cache, int chunk_order) +{ + struct mm_slab *slab; + int words, ret; + const uint32_t size = mm_default_slab_size(chunk_order); + + words = ((size >> chunk_order) + 31) / 32; + assert(words); + + slab = MALLOC(sizeof(struct mm_slab) + words * 4); + if (!slab) + return PIPE_ERROR_OUT_OF_MEMORY; + + memset(&slab->bits[0], ~0, words * 4); + + slab->bo = NULL; + ret = nouveau_bo_new_tile(cache->dev, cache->domain, 0, size, + 0, cache->storage_type, &slab->bo); + if (ret) { + FREE(slab); + return PIPE_ERROR_OUT_OF_MEMORY; + } + + LIST_INITHEAD(&slab->head); + + slab->cache = cache; + slab->order = chunk_order; + slab->count = slab->free = size >> chunk_order; + + LIST_ADD(&slab->head, &mm_bucket_by_order(cache, chunk_order)->free); + + cache->allocated += size; + + debug_printf("MM: new slab, total memory = %lu KiB\n", + cache->allocated / 1024); + + return PIPE_OK; +} + +/* @return token to identify slab or NULL if we just allocated a new bo */ +struct nvc0_mm_allocation * +nvc0_mm_allocate(struct nvc0_mman *cache, + uint32_t size, struct nouveau_bo **bo, uint32_t *offset) +{ + struct mm_bucket *bucket; + struct mm_slab *slab; + struct nvc0_mm_allocation *alloc; + int ret; + + bucket = mm_bucket_by_size(cache, size); + if (!bucket) { + ret = nouveau_bo_new_tile(cache->dev, cache->domain, 0, size, + 0, cache->storage_type, bo); + if (ret) + debug_printf("bo_new(%x, %x): %i\n", size, cache->storage_type, ret); + + *offset = 0; + return NULL; + } + + if (!LIST_IS_EMPTY(&bucket->used)) { + slab = LIST_ENTRY(struct mm_slab, bucket->used.next, head); + } else { + if (LIST_IS_EMPTY(&bucket->free)) { + mm_slab_new(cache, MAX2(mm_get_order(size), MM_MIN_ORDER)); + } + slab = LIST_ENTRY(struct mm_slab, bucket->free.next, head); + + LIST_DEL(&slab->head); + LIST_ADD(&slab->head, &bucket->used); + } + + *offset = mm_slab_alloc(slab) << slab->order; + + alloc = MALLOC_STRUCT(nvc0_mm_allocation); + if (!alloc) + return NULL; + + nouveau_bo_ref(slab->bo, bo); + + if (slab->free == 0) { + LIST_DEL(&slab->head); + LIST_ADD(&slab->head, &bucket->full); + } + + alloc->next = NULL; + alloc->offset = *offset; + alloc->priv = (void *)slab; + + return alloc; +} + +void +nvc0_mm_free(struct nvc0_mm_allocation *alloc) +{ + struct mm_slab *slab = (struct mm_slab *)alloc->priv; + struct mm_bucket *bucket = mm_bucket_by_order(slab->cache, slab->order); + + mm_slab_free(slab, alloc->offset >> slab->order); + + if (slab->free == 1) { + LIST_DEL(&slab->head); + + if (slab->count > 1) + LIST_ADDTAIL(&slab->head, &bucket->used); + else + LIST_ADDTAIL(&slab->head, &bucket->free); + } + + FREE(alloc); +} + +struct nvc0_mman * +nvc0_mm_create(struct nouveau_device *dev, uint32_t domain, + uint32_t storage_type) +{ + struct nvc0_mman *cache = MALLOC_STRUCT(nvc0_mman); + int i; + + if (!cache) + return NULL; + + cache->dev = dev; + cache->domain = domain; + cache->storage_type = storage_type; + cache->allocated = 0; + + for (i = 0; i < MM_NUM_BUCKETS; ++i) { + LIST_INITHEAD(&cache->bucket[i].free); + LIST_INITHEAD(&cache->bucket[i].used); + LIST_INITHEAD(&cache->bucket[i].full); + } + + return cache; +} + +static INLINE void +nvc0_mm_free_slabs(struct list_head *head) +{ + struct mm_slab *slab, *next; + + LIST_FOR_EACH_ENTRY_SAFE(slab, next, head, head) { + LIST_DEL(&slab->head); + nouveau_bo_ref(NULL, &slab->bo); + FREE(slab); + } +} + +void +nvc0_mm_destroy(struct nvc0_mman *cache) +{ + int i; + + for (i = 0; i < MM_NUM_BUCKETS; ++i) { + if (!LIST_IS_EMPTY(&cache->bucket[i].used) || + !LIST_IS_EMPTY(&cache->bucket[i].full)) + debug_printf("WARNING: destroying GPU memory cache " + "with some buffers still in use\n"); + + nvc0_mm_free_slabs(&cache->bucket[i].free); + nvc0_mm_free_slabs(&cache->bucket[i].used); + nvc0_mm_free_slabs(&cache->bucket[i].full); + } +} + diff --git a/src/gallium/drivers/nvc0/nvc0_pc.c b/src/gallium/drivers/nvc0/nvc0_pc.c new file mode 100644 index 00000000000..304a1919768 --- /dev/null +++ b/src/gallium/drivers/nvc0/nvc0_pc.c @@ -0,0 +1,693 @@ +/* + * Copyright 2010 Christoph Bumiller + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF + * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#define NOUVEAU_DEBUG 1 + +#include "nvc0_pc.h" +#include "nvc0_program.h" + +boolean +nvc0_insn_can_load(struct nv_instruction *nvi, int s, + struct nv_instruction *ld) +{ + int i; + + if (ld->opcode == NV_OP_MOV && ld->src[0]->value->reg.file == NV_FILE_IMM) { + if (s > 1 || !(nvc0_op_info_table[nvi->opcode].immediate & (1 << s))) + return FALSE; + if (!(nvc0_op_info_table[nvi->opcode].immediate & 4)) + if (ld->src[0]->value->reg.imm.u32 & 0xfff) + return FALSE; + } else + if (!(nvc0_op_info_table[nvi->opcode].memory & (1 << s))) + return FALSE; + + if (ld->indirect >= 0) + return FALSE; + + for (i = 0; i < 3 && nvi->src[i]; ++i) + if (nvi->src[i]->value->reg.file == NV_FILE_IMM) + return FALSE; + + return TRUE; +} + +/* Return whether this instruction can be executed conditionally. */ +boolean +nvc0_insn_is_predicateable(struct nv_instruction *nvi) +{ + int s; + + if (!nv_op_predicateable(nvi->opcode)) + return FALSE; + if (nvi->predicate >= 0) + return FALSE; + for (s = 0; s < 4 && nvi->src[s]; ++s) + if (nvi->src[s]->value->reg.file == NV_FILE_IMM) + return FALSE; + return TRUE; +} + +int +nvc0_insn_refcount(struct nv_instruction *nvi) +{ + int rc = 0; + int i; + for (i = 0; i < 5 && nvi->def[i]; ++i) { + if (!nvi->def[i]) + return rc; + rc += nvi->def[i]->refc; + } + return rc; +} + +int +nvc0_pc_replace_value(struct nv_pc *pc, + struct nv_value *old_val, + struct nv_value *new_val) +{ + int i, n, s; + + if (old_val == new_val) + return old_val->refc; + + for (i = 0, n = 0; i < pc->num_refs; ++i) { + if (pc->refs[i]->value == old_val) { + ++n; + for (s = 0; s < 6 && pc->refs[i]->insn->src[s]; ++s) + if (pc->refs[i]->insn->src[s] == pc->refs[i]) + break; + assert(s < 6); + nv_reference(pc, pc->refs[i]->insn, s, new_val); + } + } + return n; +} + +struct nv_value * +nvc0_pc_find_constant(struct nv_ref *ref) +{ + struct nv_value *src; + + if (!ref) + return NULL; + + src = ref->value; + while (src->insn && src->insn->opcode == NV_OP_MOV) { + assert(!src->insn->src[0]->mod); + src = src->insn->src[0]->value; + } + if ((src->reg.file == NV_FILE_IMM) || + (src->insn && + src->insn->opcode == NV_OP_LD && + src->insn->src[0]->value->reg.file >= NV_FILE_MEM_C(0) && + src->insn->src[0]->value->reg.file <= NV_FILE_MEM_C(15))) + return src; + return NULL; +} + +struct nv_value * +nvc0_pc_find_immediate(struct nv_ref *ref) +{ + struct nv_value *src = nvc0_pc_find_constant(ref); + + return (src && src->reg.file == NV_FILE_IMM) ? src : NULL; +} + +static void +nv_pc_free_refs(struct nv_pc *pc) +{ + int i; + for (i = 0; i < pc->num_refs; i += 64) + FREE(pc->refs[i]); + FREE(pc->refs); +} + +static const char * +edge_name(ubyte type) +{ + switch (type) { + case CFG_EDGE_FORWARD: return "forward"; + case CFG_EDGE_BACK: return "back"; + case CFG_EDGE_LOOP_ENTER: return "loop"; + case CFG_EDGE_LOOP_LEAVE: return "break"; + case CFG_EDGE_FAKE: return "fake"; + default: + return "?"; + } +} + +void +nvc0_pc_pass_in_order(struct nv_basic_block *root, nv_pc_pass_func f, + void *priv) +{ + struct nv_basic_block *bb[64], *bbb[16], *b; + int j, p, pp; + + bb[0] = root; + p = 1; + pp = 0; + + while (p > 0) { + b = bb[--p]; + b->priv = 0; + + for (j = 1; j >= 0; --j) { + if (!b->out[j]) + continue; + + switch (b->out_kind[j]) { + case CFG_EDGE_BACK: + continue; + case CFG_EDGE_FORWARD: + case CFG_EDGE_FAKE: + if (++b->out[j]->priv == b->out[j]->num_in) + bb[p++] = b->out[j]; + break; + case CFG_EDGE_LOOP_ENTER: + bb[p++] = b->out[j]; + break; + case CFG_EDGE_LOOP_LEAVE: + bbb[pp++] = b->out[j]; + break; + default: + assert(0); + break; + } + } + + f(priv, b); + + if (!p) { + p = pp; + for (; pp > 0; --pp) + bb[pp - 1] = bbb[pp - 1]; + } + } +} + +static void +nv_do_print_function(void *priv, struct nv_basic_block *b) +{ + struct nv_instruction *i; + + debug_printf("=== BB %i ", b->id); + if (b->out[0]) + debug_printf("[%s -> %i] ", edge_name(b->out_kind[0]), b->out[0]->id); + if (b->out[1]) + debug_printf("[%s -> %i] ", edge_name(b->out_kind[1]), b->out[1]->id); + debug_printf("===\n"); + + i = b->phi; + if (!i) + i = b->entry; + for (; i; i = i->next) + nvc0_print_instruction(i); +} + +void +nvc0_print_function(struct nv_basic_block *root) +{ + if (root->subroutine) + debug_printf("SUBROUTINE %i\n", root->subroutine); + else + debug_printf("MAIN\n"); + + nvc0_pc_pass_in_order(root, nv_do_print_function, root); +} + +void +nvc0_print_program(struct nv_pc *pc) +{ + int i; + for (i = 0; i < pc->num_subroutines + 1; ++i) + if (pc->root[i]) + nvc0_print_function(pc->root[i]); +} + +#if NOUVEAU_DEBUG > 1 +static void +nv_do_print_cfgraph(struct nv_pc *pc, FILE *f, struct nv_basic_block *b) +{ + int i; + + b->pass_seq = pc->pass_seq; + + fprintf(f, "\t%i [shape=box]\n", b->id); + + for (i = 0; i < 2; ++i) { + if (!b->out[i]) + continue; + switch (b->out_kind[i]) { + case CFG_EDGE_FORWARD: + fprintf(f, "\t%i -> %i;\n", b->id, b->out[i]->id); + break; + case CFG_EDGE_LOOP_ENTER: + fprintf(f, "\t%i -> %i [color=green];\n", b->id, b->out[i]->id); + break; + case CFG_EDGE_LOOP_LEAVE: + fprintf(f, "\t%i -> %i [color=red];\n", b->id, b->out[i]->id); + break; + case CFG_EDGE_BACK: + fprintf(f, "\t%i -> %i;\n", b->id, b->out[i]->id); + continue; + case CFG_EDGE_FAKE: + fprintf(f, "\t%i -> %i [style=dotted];\n", b->id, b->out[i]->id); + break; + default: + assert(0); + break; + } + if (b->out[i]->pass_seq < pc->pass_seq) + nv_do_print_cfgraph(pc, f, b->out[i]); + } +} + +/* Print the control flow graph of subroutine @subr (0 == MAIN) to a file. */ +static void +nv_print_cfgraph(struct nv_pc *pc, const char *filepath, int subr) +{ + FILE *f; + + f = fopen(filepath, "a"); + if (!f) + return; + + fprintf(f, "digraph G {\n"); + + ++pc->pass_seq; + + nv_do_print_cfgraph(pc, f, pc->root[subr]); + + fprintf(f, "}\n"); + + fclose(f); +} +#endif + +static INLINE void +nvc0_pc_print_binary(struct nv_pc *pc) +{ + unsigned i; + + NOUVEAU_DBG("nvc0_pc_print_binary(%u ops)\n", pc->emit_size / 8); + + for (i = 0; i < pc->emit_size / 4; i += 2) { + debug_printf("0x%08x ", pc->emit[i + 0]); + debug_printf("0x%08x ", pc->emit[i + 1]); + if ((i % 16) == 15) + debug_printf("\n"); + } + debug_printf("\n"); +} + +static int +nvc0_emit_program(struct nv_pc *pc) +{ + uint32_t *code = pc->emit; + int n; + + NOUVEAU_DBG("emitting program: size = %u\n", pc->emit_size); + + pc->emit_pos = 0; + for (n = 0; n < pc->num_blocks; ++n) { + struct nv_instruction *i; + struct nv_basic_block *b = pc->bb_list[n]; + + for (i = b->entry; i; i = i->next) { + nvc0_emit_instruction(pc, i); + pc->emit += 2; + pc->emit_pos += 8; + } + } + assert(pc->emit == &code[pc->emit_size / 4]); + + pc->emit[0] = 0x00001de7; + pc->emit[1] = 0x80000000; + pc->emit_size += 8; + + pc->emit = code; + +#ifdef NOUVEAU_DEBUG + nvc0_pc_print_binary(pc); +#else + debug_printf("not printing binary\n"); +#endif + return 0; +} + +int +nvc0_generate_code(struct nvc0_translation_info *ti) +{ + struct nv_pc *pc; + int ret; + int i; + + pc = CALLOC_STRUCT(nv_pc); + if (!pc) + return 1; + + pc->is_fragprog = ti->prog->type == PIPE_SHADER_FRAGMENT; + + pc->root = CALLOC(ti->num_subrs + 1, sizeof(pc->root[0])); + if (!pc->root) { + FREE(pc); + return 1; + } + pc->num_subroutines = ti->num_subrs; + + ret = nvc0_tgsi_to_nc(pc, ti); + if (ret) + goto out; +#if NOUVEAU_DEBUG > 1 + nvc0_print_program(pc); +#endif + + pc->opt_reload_elim = ti->require_stores ? FALSE : TRUE; + + /* optimization */ + ret = nvc0_pc_exec_pass0(pc); + if (ret) + goto out; +#ifdef NOUVEAU_DEBUG + nvc0_print_program(pc); +#endif + + /* register allocation */ + ret = nvc0_pc_exec_pass1(pc); + if (ret) + goto out; +#if NOUVEAU_DEBUG > 1 + nvc0_print_program(pc); + nv_print_cfgraph(pc, "nvc0_shader_cfgraph.dot", 0); +#endif + + /* prepare for emission */ + ret = nvc0_pc_exec_pass2(pc); + if (ret) + goto out; + assert(!(pc->emit_size % 8)); + + pc->emit = CALLOC(pc->emit_size / 4 + 2, 4); + if (!pc->emit) { + ret = 3; + goto out; + } + ret = nvc0_emit_program(pc); + if (ret) + goto out; + + ti->prog->code = pc->emit; + ti->prog->code_base = 0; + ti->prog->code_size = pc->emit_size; + ti->prog->parm_size = 0; + + ti->prog->max_gpr = MAX2(4, pc->max_reg[NV_FILE_GPR] + 1); + + ti->prog->relocs = pc->reloc_entries; + ti->prog->num_relocs = pc->num_relocs; + + NOUVEAU_DBG("SHADER TRANSLATION - %s\n", ret ? "failure" : "success"); + +out: + nv_pc_free_refs(pc); + + for (i = 0; i < pc->num_blocks; ++i) + FREE(pc->bb_list[i]); + if (pc->root) + FREE(pc->root); + if (ret) { + /* on success, these will be referenced by struct nvc0_program */ + if (pc->emit) + FREE(pc->emit); + if (pc->immd_buf) + FREE(pc->immd_buf); + if (pc->reloc_entries) + FREE(pc->reloc_entries); + } + FREE(pc); + return ret; +} + +static void +nvbb_insert_phi(struct nv_basic_block *b, struct nv_instruction *i) +{ + if (!b->phi) { + i->prev = NULL; + b->phi = i; + i->next = b->entry; + if (b->entry) { + assert(!b->entry->prev && b->exit); + b->entry->prev = i; + } else { + b->entry = i; + b->exit = i; + } + } else { + assert(b->entry); + if (b->entry->opcode == NV_OP_PHI) { /* insert after entry */ + assert(b->entry == b->exit); + b->entry->next = i; + i->prev = b->entry; + b->entry = i; + b->exit = i; + } else { /* insert before entry */ + assert(b->entry->prev && b->exit); + i->next = b->entry; + i->prev = b->entry->prev; + b->entry->prev = i; + i->prev->next = i; + } + } +} + +void +nvc0_insn_append(struct nv_basic_block *b, struct nv_instruction *i) +{ + if (i->opcode == NV_OP_PHI) { + nvbb_insert_phi(b, i); + } else { + i->prev = b->exit; + if (b->exit) + b->exit->next = i; + b->exit = i; + if (!b->entry) + b->entry = i; + else + if (i->prev && i->prev->opcode == NV_OP_PHI) + b->entry = i; + } + + i->bb = b; + b->num_instructions++; +} + +void +nvc0_insn_insert_after(struct nv_instruction *at, struct nv_instruction *ni) +{ + if (!at->next) { + nvc0_insn_append(at->bb, ni); + return; + } + ni->next = at->next; + ni->prev = at; + ni->next->prev = ni; + ni->prev->next = ni; +} + +void +nvc0_insn_insert_before(struct nv_instruction *at, struct nv_instruction *ni) +{ + nvc0_insn_insert_after(at, ni); + nvc0_insns_permute(at, ni); +} + +void +nvc0_insn_delete(struct nv_instruction *nvi) +{ + struct nv_basic_block *b = nvi->bb; + int s; + + /* debug_printf("REM: "); nv_print_instruction(nvi); */ + + for (s = 0; s < 6 && nvi->src[s]; ++s) + nv_reference(NULL, nvi, s, NULL); + + if (nvi->next) + nvi->next->prev = nvi->prev; + else { + assert(nvi == b->exit); + b->exit = nvi->prev; + } + + if (nvi->prev) + nvi->prev->next = nvi->next; + + if (nvi == b->entry) { + /* PHIs don't get hooked to b->entry */ + b->entry = nvi->next; + assert(!nvi->prev || nvi->prev->opcode == NV_OP_PHI); + } + + if (nvi == b->phi) { + if (nvi->opcode != NV_OP_PHI) + NOUVEAU_DBG("NOTE: b->phi points to non-PHI instruction\n"); + + assert(!nvi->prev); + if (!nvi->next || nvi->next->opcode != NV_OP_PHI) + b->phi = NULL; + else + b->phi = nvi->next; + } +} + +void +nvc0_insns_permute(struct nv_instruction *i1, struct nv_instruction *i2) +{ + struct nv_basic_block *b = i1->bb; + + assert(i1->opcode != NV_OP_PHI && + i2->opcode != NV_OP_PHI); + assert(i1->next == i2); + + if (b->exit == i2) + b->exit = i1; + + if (b->entry == i1) + b->entry = i2; + + i2->prev = i1->prev; + i1->next = i2->next; + i2->next = i1; + i1->prev = i2; + + if (i2->prev) + i2->prev->next = i2; + if (i1->next) + i1->next->prev = i1; +} + +void +nvc0_bblock_attach(struct nv_basic_block *parent, + struct nv_basic_block *b, ubyte edge_kind) +{ + assert(b->num_in < 8); + + if (parent->out[0]) { + assert(!parent->out[1]); + parent->out[1] = b; + parent->out_kind[1] = edge_kind; + } else { + parent->out[0] = b; + parent->out_kind[0] = edge_kind; + } + + b->in[b->num_in] = parent; + b->in_kind[b->num_in++] = edge_kind; +} + +/* NOTE: all BRKs are treated as conditional, so there are 2 outgoing BBs */ + +boolean +nvc0_bblock_dominated_by(struct nv_basic_block *b, struct nv_basic_block *d) +{ + int j; + + if (b == d) + return TRUE; + + for (j = 0; j < b->num_in; ++j) + if ((b->in_kind[j] != CFG_EDGE_BACK) && + !nvc0_bblock_dominated_by(b->in[j], d)) + return FALSE; + + return j ? TRUE : FALSE; +} + +/* check if @bf (future) can be reached from @bp (past), stop at @bt */ +boolean +nvc0_bblock_reachable_by(struct nv_basic_block *bf, struct nv_basic_block *bp, + struct nv_basic_block *bt) +{ + struct nv_basic_block *q[NV_PC_MAX_BASIC_BLOCKS], *b; + int i, p, n; + + p = 0; + n = 1; + q[0] = bp; + + while (p < n) { + b = q[p++]; + + if (b == bf) + break; + if (b == bt) + continue; + assert(n <= (1024 - 2)); + + for (i = 0; i < 2; ++i) { + if (b->out[i] && !IS_WALL_EDGE(b->out_kind[i]) && !b->out[i]->priv) { + q[n] = b->out[i]; + q[n++]->priv = 1; + } + } + } + for (--n; n >= 0; --n) + q[n]->priv = 0; + + return (b == bf); +} + +static struct nv_basic_block * +nvbb_find_dom_frontier(struct nv_basic_block *b, struct nv_basic_block *df) +{ + struct nv_basic_block *out; + int i; + + if (!nvc0_bblock_dominated_by(df, b)) { + for (i = 0; i < df->num_in; ++i) { + if (df->in_kind[i] == CFG_EDGE_BACK) + continue; + if (nvc0_bblock_dominated_by(df->in[i], b)) + return df; + } + } + for (i = 0; i < 2 && df->out[i]; ++i) { + if (df->out_kind[i] == CFG_EDGE_BACK) + continue; + if ((out = nvbb_find_dom_frontier(b, df->out[i]))) + return out; + } + return NULL; +} + +struct nv_basic_block * +nvc0_bblock_dom_frontier(struct nv_basic_block *b) +{ + struct nv_basic_block *df; + int i; + + for (i = 0; i < 2 && b->out[i]; ++i) + if ((df = nvbb_find_dom_frontier(b, b->out[i]))) + return df; + return NULL; +} diff --git a/src/gallium/drivers/nvc0/nvc0_pc.h b/src/gallium/drivers/nvc0/nvc0_pc.h new file mode 100644 index 00000000000..969cc68c596 --- /dev/null +++ b/src/gallium/drivers/nvc0/nvc0_pc.h @@ -0,0 +1,653 @@ +/* + * Copyright 2010 Christoph Bumiller + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF + * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef __NVC0_COMPILER_H__ +#define __NVC0_COMPILER_H__ + +#include <stdio.h> + +#ifndef NOUVEAU_DBG +#ifdef NOUVEAU_DEBUG +# define NOUVEAU_DBG(args...) debug_printf(args); +#else +# define NOUVEAU_DBG(args...) +#endif +#endif + +#ifndef NOUVEAU_ERR +#define NOUVEAU_ERR(fmt, args...) \ + fprintf(stderr, "%s:%d - "fmt, __FUNCTION__, __LINE__, ##args); +#endif + +#include "pipe/p_defines.h" +#include "util/u_inlines.h" +#include "util/u_memory.h" +#include "util/u_double_list.h" + +/* pseudo opcodes */ +#define NV_OP_UNDEF 0 +#define NV_OP_BIND 1 +#define NV_OP_MERGE 2 +#define NV_OP_PHI 3 +#define NV_OP_SELECT 4 +#define NV_OP_NOP 5 + +/** + * BIND forces source operand i into the same register as destination operand i, + * and the operands will be assigned consecutive registers (needed for TEX) + * SELECT forces its multiple source operands and its destination operand into + * one and the same register. + */ + +/* base opcodes */ +#define NV_OP_LD 6 +#define NV_OP_ST 7 +#define NV_OP_MOV 8 +#define NV_OP_AND 9 +#define NV_OP_OR 10 +#define NV_OP_XOR 11 +#define NV_OP_SHL 12 +#define NV_OP_SHR 13 +#define NV_OP_NOT 14 +#define NV_OP_SET 15 +#define NV_OP_ADD 16 +#define NV_OP_SUB 17 +#define NV_OP_MUL 18 +#define NV_OP_MAD 19 +#define NV_OP_ABS 20 +#define NV_OP_NEG 21 +#define NV_OP_MAX 22 +#define NV_OP_MIN 23 +#define NV_OP_CVT 24 +#define NV_OP_CEIL 25 +#define NV_OP_FLOOR 26 +#define NV_OP_TRUNC 27 +#define NV_OP_SAD 28 + +/* shader opcodes */ +#define NV_OP_VFETCH 29 +#define NV_OP_PFETCH 30 +#define NV_OP_EXPORT 31 +#define NV_OP_LINTERP 32 +#define NV_OP_PINTERP 33 +#define NV_OP_EMIT 34 +#define NV_OP_RESTART 35 +#define NV_OP_TEX 36 +#define NV_OP_TXB 37 +#define NV_OP_TXL 38 +#define NV_OP_TXF 39 +#define NV_OP_TXQ 40 +#define NV_OP_QUADOP 41 +#define NV_OP_DFDX 42 +#define NV_OP_DFDY 43 +#define NV_OP_KIL 44 + +/* control flow opcodes */ +#define NV_OP_BRA 45 +#define NV_OP_CALL 46 +#define NV_OP_RET 47 +#define NV_OP_EXIT 48 +#define NV_OP_BREAK 49 +#define NV_OP_BREAKADDR 50 +#define NV_OP_JOINAT 51 +#define NV_OP_JOIN 52 + +/* typed opcodes */ +#define NV_OP_ADD_F32 NV_OP_ADD +#define NV_OP_ADD_B32 53 +#define NV_OP_MUL_F32 NV_OP_MUL +#define NV_OP_MUL_B32 54 +#define NV_OP_ABS_F32 NV_OP_ABS +#define NV_OP_ABS_S32 55 +#define NV_OP_NEG_F32 NV_OP_NEG +#define NV_OP_NEG_S32 56 +#define NV_OP_MAX_F32 NV_OP_MAX +#define NV_OP_MAX_S32 57 +#define NV_OP_MAX_U32 58 +#define NV_OP_MIN_F32 NV_OP_MIN +#define NV_OP_MIN_S32 59 +#define NV_OP_MIN_U32 60 +#define NV_OP_SET_F32 61 +#define NV_OP_SET_S32 62 +#define NV_OP_SET_U32 63 +#define NV_OP_SAR 64 +#define NV_OP_RCP 65 +#define NV_OP_RSQ 66 +#define NV_OP_LG2 67 +#define NV_OP_SIN 68 +#define NV_OP_COS 69 +#define NV_OP_EX2 70 +#define NV_OP_PRESIN 71 +#define NV_OP_PREEX2 72 +#define NV_OP_SAT 73 + +/* newly added opcodes */ +#define NV_OP_SET_F32_AND 74 +#define NV_OP_SET_F32_OR 75 +#define NV_OP_SET_F32_XOR 76 +#define NV_OP_SELP 77 +#define NV_OP_SLCT 78 +#define NV_OP_SLCT_F32 NV_OP_SLCT +#define NV_OP_SLCT_S32 79 +#define NV_OP_SLCT_U32 80 +#define NV_OP_SUB_F32 NV_OP_SUB +#define NV_OP_SUB_S32 81 +#define NV_OP_MAD_F32 NV_OP_MAD +#define NV_OP_FSET_F32 82 +#define NV_OP_TXG 83 + +#define NV_OP_COUNT 84 + +/* nv50 files omitted */ +#define NV_FILE_GPR 0 +#define NV_FILE_COND 1 +#define NV_FILE_PRED 2 +#define NV_FILE_IMM 16 +#define NV_FILE_MEM_S 32 +#define NV_FILE_MEM_V 34 +#define NV_FILE_MEM_A 35 +#define NV_FILE_MEM_L 48 +#define NV_FILE_MEM_G 64 +#define NV_FILE_MEM_C(i) (80 + i) + +#define NV_IS_MEMORY_FILE(f) ((f) >= NV_FILE_MEM_S) + +#define NV_MOD_NEG 1 +#define NV_MOD_ABS 2 +#define NV_MOD_NOT 4 +#define NV_MOD_SAT 8 + +#define NV_TYPE_U8 0x00 +#define NV_TYPE_S8 0x01 +#define NV_TYPE_U16 0x02 +#define NV_TYPE_S16 0x03 +#define NV_TYPE_U32 0x04 +#define NV_TYPE_S32 0x05 +#define NV_TYPE_P32 0x07 +#define NV_TYPE_F32 0x09 +#define NV_TYPE_F64 0x0b +#define NV_TYPE_VEC(x, n) (NV_TYPE_##x | (n << 4)) +#define NV_TYPE_ANY 0xff + +#define NV_TYPE_ISINT(t) ((t) < 7) +#define NV_TYPE_ISSGD(t) ((t) & 1) + +#define NV_CC_FL 0x0 +#define NV_CC_LT 0x1 +#define NV_CC_EQ 0x2 +#define NV_CC_LE 0x3 +#define NV_CC_GT 0x4 +#define NV_CC_NE 0x5 +#define NV_CC_GE 0x6 +#define NV_CC_U 0x8 +#define NV_CC_TR 0xf +#define NV_CC_O 0x10 +#define NV_CC_C 0x11 +#define NV_CC_A 0x12 +#define NV_CC_S 0x13 + +#define NV_PC_MAX_INSTRUCTIONS 2048 +#define NV_PC_MAX_VALUES (NV_PC_MAX_INSTRUCTIONS * 4) + +#define NV_PC_MAX_BASIC_BLOCKS 1024 + +struct nv_op_info { + uint base; /* e.g. ADD_S32 -> ADD */ + char name[12]; + uint8_t type; + uint8_t mods; + unsigned flow : 1; + unsigned commutative : 1; + unsigned vector : 1; + unsigned predicate : 1; + unsigned pseudo : 1; + unsigned immediate : 3; + unsigned memory : 3; +}; + +extern struct nv_op_info nvc0_op_info_table[]; + +#define NV_BASEOP(op) (nvc0_op_info_table[op].base) +#define NV_OPTYPE(op) (nvc0_op_info_table[op].type) + +static INLINE uint +nv_op_base(uint opcode) +{ + return nvc0_op_info_table[opcode].base; +} + +static INLINE boolean +nv_is_texture_op(uint opcode) +{ + return (opcode >= NV_OP_TEX && opcode <= NV_OP_TXQ); +} + +static INLINE boolean +nv_is_vector_op(uint opcode) +{ + return nvc0_op_info_table[opcode].vector ? TRUE : FALSE; +} + +static INLINE boolean +nv_op_commutative(uint opcode) +{ + return nvc0_op_info_table[opcode].commutative ? TRUE : FALSE; +} + +static INLINE uint8_t +nv_op_supported_src_mods(uint opcode) +{ + return nvc0_op_info_table[opcode].mods; +} + +static INLINE boolean +nv_op_predicateable(uint opcode) +{ + return nvc0_op_info_table[opcode].predicate ? TRUE : FALSE; +} + +static INLINE uint +nv_type_order(ubyte type) +{ + switch (type & 0xf) { + case NV_TYPE_U8: + case NV_TYPE_S8: + return 0; + case NV_TYPE_U16: + case NV_TYPE_S16: + return 1; + case NV_TYPE_U32: + case NV_TYPE_F32: + case NV_TYPE_S32: + case NV_TYPE_P32: + return 2; + case NV_TYPE_F64: + return 3; + } + assert(0); + return 0; +} + +static INLINE uint +nv_type_sizeof(ubyte type) +{ + if (type & 0xf0) + return (1 << nv_type_order(type)) * (type >> 4); + return 1 << nv_type_order(type); +} + +static INLINE uint +nv_type_sizeof_base(ubyte type) +{ + return 1 << nv_type_order(type); +} + +struct nv_reg { + uint32_t address; /* for memory locations */ + int id; /* for registers */ + ubyte file; + ubyte size; + union { + int32_t s32; + int64_t s64; + uint64_t u64; + uint32_t u32; + float f32; + double f64; + } imm; +}; + +struct nv_range { + struct nv_range *next; + int bgn; + int end; +}; + +struct nv_ref; + +struct nv_value { + struct nv_reg reg; + struct nv_instruction *insn; + struct nv_value *join; + struct nv_ref *last_use; + int n; + struct nv_range *livei; + int refc; + struct nv_value *next; + struct nv_value *prev; +}; + +struct nv_ref { + struct nv_value *value; + struct nv_instruction *insn; + struct list_head list; /* connects uses of the same value */ + uint8_t mod; + uint8_t flags; +}; + +struct nv_basic_block; + +struct nv_instruction { + struct nv_instruction *next; + struct nv_instruction *prev; + uint opcode; + uint serial; + + struct nv_value *def[5]; + struct nv_ref *src[6]; + + int8_t predicate; /* index of predicate src */ + int8_t indirect; /* index of pointer src */ + + union { + struct { + uint8_t t; /* TIC binding */ + uint8_t s; /* TSC binding */ + } tex; + struct { + uint8_t d; /* output type */ + uint8_t s; /* input type */ + } cvt; + } ext; + + struct nv_basic_block *bb; + struct nv_basic_block *target; /* target block of control flow insn */ + + unsigned cc : 5; /* condition code */ + unsigned fixed : 1; /* don't optimize away (prematurely) */ + unsigned terminator : 1; + unsigned join : 1; + unsigned set_cond : 4; /* 2nd byte */ + unsigned saturate : 1; + unsigned centroid : 1; + unsigned flat : 1; + unsigned patch : 1; + unsigned lanes : 4; /* 3rd byte */ + unsigned tex_dim : 2; + unsigned tex_array : 1; + unsigned tex_cube : 1; + unsigned tex_shadow : 1; /* 4th byte */ + unsigned tex_live : 1; + unsigned tex_mask : 4; + + uint8_t quadop; +}; + +static INLINE int +nvi_vector_size(struct nv_instruction *nvi) +{ + int i; + assert(nvi); + for (i = 0; i < 5 && nvi->def[i]; ++i); + return i; +} + +#define CFG_EDGE_FORWARD 0 +#define CFG_EDGE_BACK 1 +#define CFG_EDGE_LOOP_ENTER 2 +#define CFG_EDGE_LOOP_LEAVE 4 +#define CFG_EDGE_FAKE 8 + +/* 'WALL' edge means where reachability check doesn't follow */ +/* 'LOOP' edge means just having to do with loops */ +#define IS_LOOP_EDGE(k) ((k) & 7) +#define IS_WALL_EDGE(k) ((k) & 9) + +struct nv_basic_block { + struct nv_instruction *entry; /* first non-phi instruction */ + struct nv_instruction *exit; + struct nv_instruction *phi; /* very first instruction */ + int num_instructions; + + struct nv_basic_block *out[2]; /* no indirect branches -> 2 */ + struct nv_basic_block *in[8]; /* hope that suffices */ + uint num_in; + ubyte out_kind[2]; + ubyte in_kind[8]; + + int id; + int subroutine; + uint priv; /* reset to 0 after you're done */ + uint pass_seq; + + uint32_t emit_pos; /* position, size in emitted code (in bytes) */ + uint32_t emit_size; + + uint32_t live_set[NV_PC_MAX_VALUES / 32]; +}; + +struct nvc0_translation_info; + +struct nv_pc { + struct nv_basic_block **root; + struct nv_basic_block *current_block; + struct nv_basic_block *parent_block; + + int loop_nesting_bound; + uint pass_seq; + + struct nv_value values[NV_PC_MAX_VALUES]; + struct nv_instruction instructions[NV_PC_MAX_INSTRUCTIONS]; + struct nv_ref **refs; + struct nv_basic_block *bb_list[NV_PC_MAX_BASIC_BLOCKS]; + int num_values; + int num_instructions; + int num_refs; + int num_blocks; + int num_subroutines; + + int max_reg[4]; + + uint32_t *immd_buf; /* populated on emit */ + unsigned immd_count; + + uint32_t *emit; + uint32_t emit_size; + uint32_t emit_pos; + + void *reloc_entries; + unsigned num_relocs; + + /* optimization enables */ + boolean opt_reload_elim; + boolean is_fragprog; +}; + +void nvc0_insn_append(struct nv_basic_block *, struct nv_instruction *); +void nvc0_insn_insert_before(struct nv_instruction *, struct nv_instruction *); +void nvc0_insn_insert_after(struct nv_instruction *, struct nv_instruction *); + +static INLINE struct nv_instruction * +nv_alloc_instruction(struct nv_pc *pc, uint opcode) +{ + struct nv_instruction *insn; + + insn = &pc->instructions[pc->num_instructions++]; + assert(pc->num_instructions < NV_PC_MAX_INSTRUCTIONS); + + insn->opcode = opcode; + insn->cc = 0; + insn->indirect = -1; + insn->predicate = -1; + + return insn; +} + +static INLINE struct nv_instruction * +new_instruction(struct nv_pc *pc, uint opcode) +{ + struct nv_instruction *insn = nv_alloc_instruction(pc, opcode); + + nvc0_insn_append(pc->current_block, insn); + return insn; +} + +static INLINE struct nv_instruction * +new_instruction_at(struct nv_pc *pc, struct nv_instruction *at, uint opcode) +{ + struct nv_instruction *insn = nv_alloc_instruction(pc, opcode); + + nvc0_insn_insert_after(at, insn); + return insn; +} + +static INLINE struct nv_value * +new_value(struct nv_pc *pc, ubyte file, ubyte size) +{ + struct nv_value *value = &pc->values[pc->num_values]; + + assert(pc->num_values < NV_PC_MAX_VALUES - 1); + + value->n = pc->num_values++; + value->join = value; + value->reg.id = -1; + value->reg.file = file; + value->reg.size = size; + return value; +} + +static INLINE struct nv_value * +new_value_like(struct nv_pc *pc, struct nv_value *like) +{ + return new_value(pc, like->reg.file, like->reg.size); +} + +static INLINE struct nv_ref * +new_ref(struct nv_pc *pc, struct nv_value *val) +{ + int i; + struct nv_ref *ref; + + if ((pc->num_refs % 64) == 0) { + const unsigned old_size = pc->num_refs * sizeof(struct nv_ref *); + const unsigned new_size = (pc->num_refs + 64) * sizeof(struct nv_ref *); + + pc->refs = REALLOC(pc->refs, old_size, new_size); + + ref = CALLOC(64, sizeof(struct nv_ref)); + for (i = 0; i < 64; ++i) + pc->refs[pc->num_refs + i] = &ref[i]; + } + + ref = pc->refs[pc->num_refs++]; + ref->value = val; + + LIST_INITHEAD(&ref->list); + + ++val->refc; + return ref; +} + +static INLINE struct nv_basic_block * +new_basic_block(struct nv_pc *pc) +{ + struct nv_basic_block *bb; + + if (pc->num_blocks >= NV_PC_MAX_BASIC_BLOCKS) + return NULL; + + bb = CALLOC_STRUCT(nv_basic_block); + + bb->id = pc->num_blocks; + pc->bb_list[pc->num_blocks++] = bb; + return bb; +} + +static INLINE void +nv_reference(struct nv_pc *pc, + struct nv_instruction *nvi, int c, struct nv_value *s) +{ + struct nv_ref **d = &nvi->src[c]; + assert(c < 6); + + if (*d) { + --(*d)->value->refc; + LIST_DEL(&(*d)->list); + } + + if (s) { + if (!*d) { + *d = new_ref(pc, s); + (*d)->insn = nvi; + } else { + LIST_DEL(&(*d)->list); + (*d)->value = s; + ++(s->refc); + } + if (!s->last_use) + s->last_use = *d; + else + LIST_ADDTAIL(&s->last_use->list, &(*d)->list); + + s->last_use = *d; + (*d)->insn = nvi; + } else { + *d = NULL; + } +} + +/* nvc0_emit.c */ +void nvc0_emit_instruction(struct nv_pc *, struct nv_instruction *); + +/* nvc0_print.c */ +const char *nvc0_opcode_name(uint opcode); +void nvc0_print_instruction(struct nv_instruction *); + +/* nvc0_pc.c */ +void nvc0_print_function(struct nv_basic_block *root); +void nvc0_print_program(struct nv_pc *); + +boolean nvc0_insn_can_load(struct nv_instruction *, int s, + struct nv_instruction *); +boolean nvc0_insn_is_predicateable(struct nv_instruction *); + +int nvc0_insn_refcount(struct nv_instruction *); +void nvc0_insn_delete(struct nv_instruction *); +void nvc0_insns_permute(struct nv_instruction *prev, struct nv_instruction *); + +void nvc0_bblock_attach(struct nv_basic_block *parent, + struct nv_basic_block *child, ubyte edge_kind); +boolean nvc0_bblock_dominated_by(struct nv_basic_block *, + struct nv_basic_block *); +boolean nvc0_bblock_reachable_by(struct nv_basic_block *future, + struct nv_basic_block *past, + struct nv_basic_block *final); +struct nv_basic_block *nvc0_bblock_dom_frontier(struct nv_basic_block *); + +int nvc0_pc_replace_value(struct nv_pc *pc, + struct nv_value *old_val, + struct nv_value *new_val); + +struct nv_value *nvc0_pc_find_immediate(struct nv_ref *); +struct nv_value *nvc0_pc_find_constant(struct nv_ref *); + +typedef void (*nv_pc_pass_func)(void *priv, struct nv_basic_block *b); + +void nvc0_pc_pass_in_order(struct nv_basic_block *, nv_pc_pass_func, void *); + +int nvc0_pc_exec_pass0(struct nv_pc *pc); +int nvc0_pc_exec_pass1(struct nv_pc *pc); +int nvc0_pc_exec_pass2(struct nv_pc *pc); + +int nvc0_tgsi_to_nc(struct nv_pc *, struct nvc0_translation_info *); + +#endif // NV50_COMPILER_H diff --git a/src/gallium/drivers/nvc0/nvc0_pc_emit.c b/src/gallium/drivers/nvc0/nvc0_pc_emit.c new file mode 100644 index 00000000000..db8055d91cd --- /dev/null +++ b/src/gallium/drivers/nvc0/nvc0_pc_emit.c @@ -0,0 +1,979 @@ +/* + * Copyright 2010 Christoph Bumiller + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF + * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "nvc0_pc.h" +#include "nvc0_program.h" + +#define NVC0_FIXUP_CODE_RELOC 0 +#define NVC0_FIXUP_DATA_RELOC 1 + +struct nvc0_fixup { + uint8_t type; + int8_t shift; + uint32_t mask; + uint32_t data; + uint32_t ofst; +}; + +void +nvc0_relocate_program(struct nvc0_program *prog, + uint32_t code_base, + uint32_t data_base) +{ + struct nvc0_fixup *f = (struct nvc0_fixup *)prog->relocs; + unsigned i; + + for (i = 0; i < prog->num_relocs; ++i) { + uint32_t data; + + switch (f[i].type) { + case NVC0_FIXUP_CODE_RELOC: data = code_base + f[i].data; break; + case NVC0_FIXUP_DATA_RELOC: data = data_base + f[i].data; break; + default: + data = f[i].data; + break; + } + data = (f[i].shift < 0) ? (data >> -f[i].shift) : (data << f[i].shift); + + prog->code[f[i].ofst / 4] &= ~f[i].mask; + prog->code[f[i].ofst / 4] |= data & f[i].mask; + } +} + +static void +create_fixup(struct nv_pc *pc, uint8_t ty, + int w, uint32_t data, uint32_t m, int s) +{ + struct nvc0_fixup *f; + + const unsigned size = sizeof(struct nvc0_fixup); + const unsigned n = pc->num_relocs; + + if (!(n % 8)) + pc->reloc_entries = REALLOC(pc->reloc_entries, n * size, (n + 8) * size); + + f = (struct nvc0_fixup *)pc->reloc_entries; + + f[n].ofst = pc->emit_pos + w * 4; + f[n].type = ty; + f[n].data = data; + f[n].mask = m; + f[n].shift = s; + + ++pc->num_relocs; +} + +static INLINE ubyte +SSIZE(struct nv_instruction *nvi, int s) +{ + return nvi->src[s]->value->reg.size; +} + +static INLINE ubyte +DSIZE(struct nv_instruction *nvi, int d) +{ + return nvi->def[d]->reg.size; +} + +static INLINE struct nv_reg * +SREG(struct nv_ref *ref) +{ + if (!ref) + return NULL; + return &ref->value->join->reg; +} + +static INLINE struct nv_reg * +DREG(struct nv_value *val) +{ + if (!val) + return NULL; + return &val->join->reg; +} + +static INLINE ubyte +SFILE(struct nv_instruction *nvi, int s) +{ + return nvi->src[s]->value->reg.file; +} + +static INLINE ubyte +DFILE(struct nv_instruction *nvi, int d) +{ + return nvi->def[0]->reg.file; +} + +static INLINE void +SID(struct nv_pc *pc, struct nv_ref *ref, int pos) +{ + pc->emit[pos / 32] |= (SREG(ref) ? SREG(ref)->id : 63) << (pos % 32); +} + +static INLINE void +DID(struct nv_pc *pc, struct nv_value *val, int pos) +{ + pc->emit[pos / 32] |= (DREG(val) ? DREG(val)->id : 63) << (pos % 32); +} + +static INLINE uint32_t +get_immd_u32(struct nv_ref *ref) /* XXX: dependent on [0]:2 */ +{ + assert(ref->value->reg.file == NV_FILE_IMM); + return ref->value->reg.imm.u32; +} + +static INLINE void +set_immd_u32_l(struct nv_pc *pc, uint32_t u32) +{ + pc->emit[0] |= (u32 & 0x3f) << 26; + pc->emit[1] |= u32 >> 6; +} + +static INLINE void +set_immd_u32(struct nv_pc *pc, uint32_t u32) +{ + if ((pc->emit[0] & 0xf) == 0x2) { + set_immd_u32_l(pc, u32); + } else + if ((pc->emit[0] & 0xf) == 0x3) { + assert(!(pc->emit[1] & 0xc000)); + pc->emit[1] |= 0xc000; + assert(!(u32 & 0xfff00000)); + set_immd_u32_l(pc, u32); + } else { + assert(!(pc->emit[1] & 0xc000)); + pc->emit[1] |= 0xc000; + assert(!(u32 & 0xfff)); + set_immd_u32_l(pc, u32 >> 12); + } +} + +static INLINE void +set_immd(struct nv_pc *pc, struct nv_instruction *i, int s) +{ + set_immd_u32(pc, get_immd_u32(i->src[s])); +} + +static INLINE void +DVS(struct nv_pc *pc, struct nv_instruction *i) +{ + uint s = i->def[0]->reg.size; + int n; + for (n = 1; n < 4 && i->def[n]; ++n) + s += i->def[n]->reg.size; + pc->emit[0] |= ((s / 4) - 1) << 5; +} + +static INLINE void +SVS(struct nv_pc *pc, struct nv_ref *src) +{ + pc->emit[0] |= (SREG(src)->size / 4 - 1) << 5; +} + +static void +set_pred(struct nv_pc *pc, struct nv_instruction *i) +{ + if (i->predicate >= 0) { + SID(pc, i->src[i->predicate], 6); + if (i->cc) + pc->emit[0] |= 0x2000; /* negate */ + } else { + pc->emit[0] |= 0x1c00; + } +} + +static INLINE void +set_address_16(struct nv_pc *pc, struct nv_ref *src) +{ + pc->emit[0] |= (src->value->reg.address & 0x003f) << 26; + pc->emit[1] |= (src->value->reg.address & 0xffc0) >> 6; +} + +static INLINE unsigned +const_space_index(struct nv_instruction *i, int s) +{ + return SFILE(i, s) - NV_FILE_MEM_C(0); +} + +static void +emit_flow(struct nv_pc *pc, struct nv_instruction *i, uint8_t op) +{ + pc->emit[0] = 0x00000007; + pc->emit[1] = op << 24; + + if (op == 0x40 || (op >= 0x80 && op <= 0x98)) { + /* bra, exit, ret or kil */ + pc->emit[0] |= 0x1e0; + set_pred(pc, i); + } + + if (i->target) { + int32_t pcrel = i->target->emit_pos - (pc->emit_pos + 8); + + /* we will need relocations only for global functions */ + /* + create_fixup(pc, NVC0_FIXUP_CODE_RELOC, 0, pos, 26, 0xfc000000); + create_fixup(pc, NVC0_FIXUP_CODE_RELOC, 1, pos, -6, 0x0001ffff); + */ + + pc->emit[0] |= (pcrel & 0x3f) << 26; + pc->emit[1] |= (pcrel >> 6) & 0x1ffff; + } +} + +/* doesn't work for vfetch, export, ld, st, mov ... */ +static void +emit_form_0(struct nv_pc *pc, struct nv_instruction *i) +{ + int s; + + set_pred(pc, i); + + DID(pc, i->def[0], 14); + + for (s = 0; s < 3 && i->src[s]; ++s) { + if (SFILE(i, s) >= NV_FILE_MEM_C(0) && + SFILE(i, s) <= NV_FILE_MEM_C(15)) { + assert(!(pc->emit[1] & 0xc000)); + assert(s <= 1); + pc->emit[1] |= 0x4000 | (const_space_index(i, s) << 10); + set_address_16(pc, i->src[s]); + } else + if (SFILE(i, s) == NV_FILE_GPR) { + SID(pc, i->src[s], s ? ((s == 2) ? 49 : 26) : 20); + } else + if (SFILE(i, s) == NV_FILE_IMM) { + assert(!(pc->emit[1] & 0xc000)); + assert(s == 1 || i->opcode == NV_OP_MOV); + set_immd(pc, i, s); + } + } +} + +static void +emit_form_1(struct nv_pc *pc, struct nv_instruction *i) +{ + int s; + + set_pred(pc, i); + + DID(pc, i->def[0], 14); + + for (s = 0; s < 1 && i->src[s]; ++s) { + if (SFILE(i, s) >= NV_FILE_MEM_C(0) && + SFILE(i, s) <= NV_FILE_MEM_C(15)) { + assert(!(pc->emit[1] & 0xc000)); + assert(s <= 1); + pc->emit[1] |= 0x4000 | (const_space_index(i, s) << 10); + set_address_16(pc, i->src[s]); + } else + if (SFILE(i, s) == NV_FILE_GPR) { + SID(pc, i->src[s], 26); + } else + if (SFILE(i, s) == NV_FILE_IMM) { + assert(!(pc->emit[1] & 0xc000)); + assert(s == 1 || i->opcode == NV_OP_MOV); + set_immd(pc, i, s); + } + } +} + +static void +emit_neg_abs_1_2(struct nv_pc *pc, struct nv_instruction *i) +{ + if (i->src[0]->mod & NV_MOD_ABS) + pc->emit[0] |= 1 << 7; + if (i->src[0]->mod & NV_MOD_NEG) + pc->emit[0] |= 1 << 9; + if (i->src[1]->mod & NV_MOD_ABS) + pc->emit[0] |= 1 << 6; + if (i->src[1]->mod & NV_MOD_NEG) + pc->emit[0] |= 1 << 8; +} + +static void +emit_add_f32(struct nv_pc *pc, struct nv_instruction *i) +{ + pc->emit[0] = 0x00000000; + pc->emit[1] = 0x50000000; + + emit_form_0(pc, i); + + emit_neg_abs_1_2(pc, i); + + if (i->saturate) + pc->emit[1] |= 1 << 17; +} + +static void +emit_mul_f32(struct nv_pc *pc, struct nv_instruction *i) +{ + pc->emit[0] = 0x00000000; + pc->emit[1] = 0x58000000; + + emit_form_0(pc, i); + + if ((i->src[0]->mod ^ i->src[1]->mod) & NV_MOD_NEG) + pc->emit[1] |= 1 << 25; + + if (i->saturate) + pc->emit[0] |= 1 << 5; +} + +static void +emit_mad_f32(struct nv_pc *pc, struct nv_instruction *i) +{ + pc->emit[0] = 0x00000000; + pc->emit[1] = 0x30000000; + + emit_form_0(pc, i); + + if ((i->src[0]->mod ^ i->src[1]->mod) & NV_MOD_NEG) + pc->emit[0] |= 1 << 9; + + if (i->src[2]->mod & NV_MOD_NEG) + pc->emit[0] |= 1 << 8; + + if (i->saturate) + pc->emit[0] |= 1 << 5; +} + +static void +emit_minmax(struct nv_pc *pc, struct nv_instruction *i) +{ + pc->emit[0] = 0x00000000; + pc->emit[1] = 0x08000000; + + if (NV_BASEOP(i->opcode) == NV_OP_MAX) + pc->emit[1] |= 0x001e0000; + else + pc->emit[1] |= 0x000e0000; /* predicate ? */ + + emit_form_0(pc, i); + + emit_neg_abs_1_2(pc, i); + + switch (i->opcode) { + case NV_OP_MIN_U32: + case NV_OP_MAX_U32: + pc->emit[0] |= 3; + break; + case NV_OP_MIN_S32: + case NV_OP_MAX_S32: + pc->emit[0] |= 3 | (1 << 5); + break; + case NV_OP_MIN_F32: + case NV_OP_MAX_F32: + default: + break; + } +} + +static void +emit_tex(struct nv_pc *pc, struct nv_instruction *i) +{ + int src1 = i->tex_array + i->tex_dim + i->tex_cube; + + pc->emit[0] = 0x00000086; + pc->emit[1] = 0x80000000; + + switch (i->opcode) { + case NV_OP_TEX: pc->emit[1] = 0x80000000; break; + case NV_OP_TXB: pc->emit[1] = 0x84000000; break; + case NV_OP_TXL: pc->emit[1] = 0x86000000; break; + case NV_OP_TXF: pc->emit[1] = 0x90000000; break; + case NV_OP_TXG: pc->emit[1] = 0xe0000000; break; + default: + assert(0); + break; + } + + if (i->tex_array) + pc->emit[1] |= 0x00080000; /* layer index is u16, first value of SRC0 */ + if (i->tex_shadow) + pc->emit[1] |= 0x01000000; /* shadow is part of SRC1, after bias/lod */ + + set_pred(pc, i); + + DID(pc, i->def[0], 14); + SID(pc, i->src[0], 20); + SID(pc, i->src[src1], 26); /* may be NULL -> $r63 */ + + pc->emit[1] |= i->tex_mask << 14; + pc->emit[1] |= (i->tex_dim - 1) << 20; + if (i->tex_cube) + pc->emit[1] |= 3 << 20; + + assert(i->ext.tex.s < 16); + + pc->emit[1] |= i->ext.tex.t; + pc->emit[1] |= i->ext.tex.s << 8; + + if (i->tex_live) + pc->emit[0] |= 1 << 9; +} + +/* 0: cos, 1: sin, 2: ex2, 3: lg2, 4: rcp, 5: rsqrt */ +static void +emit_flop(struct nv_pc *pc, struct nv_instruction *i, ubyte op) +{ + pc->emit[0] = 0x00000000; + pc->emit[1] = 0xc8000000; + + set_pred(pc, i); + + DID(pc, i->def[0], 14); + SID(pc, i->src[0], 20); + + pc->emit[0] |= op << 26; + + if (op >= 4) { + if (i->src[0]->mod & NV_MOD_NEG) pc->emit[0] |= 1 << 9; + if (i->src[0]->mod & NV_MOD_ABS) pc->emit[0] |= 1 << 7; + } else { + assert(!i->src[0]->mod); + } +} + +static void +emit_quadop(struct nv_pc *pc, struct nv_instruction *i) +{ + pc->emit[0] = 0x00000000; + pc->emit[1] = 0x48000000; + + set_pred(pc, i); + + assert(SFILE(i, 0) == NV_FILE_GPR && SFILE(i, 1) == NV_FILE_GPR); + + DID(pc, i->def[0], 14); + SID(pc, i->src[0], 20); + SID(pc, i->src[0], 26); + + pc->emit[0] |= i->lanes << 6; /* l0, l1, l2, l3, dx, dy */ + pc->emit[1] |= i->quadop; +} + +static void +emit_ddx(struct nv_pc *pc, struct nv_instruction *i) +{ + i->quadop = 0x99; + i->lanes = 4; + emit_quadop(pc, i); +} + +static void +emit_ddy(struct nv_pc *pc, struct nv_instruction *i) +{ + i->quadop = 0xa5; + i->lanes = 5; + emit_quadop(pc, i); +} + +/* preparation op (preex2, presin / convert to fixed point) */ +static void +emit_preop(struct nv_pc *pc, struct nv_instruction *i) +{ + pc->emit[0] = 0x00000000; + pc->emit[1] = 0x60000000; + + if (i->opcode == NV_OP_PREEX2) + pc->emit[0] |= 0x20; + + emit_form_1(pc, i); + + if (i->src[0]->mod & NV_MOD_NEG) pc->emit[0] |= 1 << 8; + if (i->src[0]->mod & NV_MOD_ABS) pc->emit[0] |= 1 << 6; +} + +static void +emit_shift(struct nv_pc *pc, struct nv_instruction *i) +{ + pc->emit[0] = 0x00000003; + + switch (i->opcode) { + case NV_OP_SAR: + pc->emit[0] |= 0x20; /* fall through */ + case NV_OP_SHR: + pc->emit[1] = 0x58000000; + break; + case NV_OP_SHL: + default: + pc->emit[1] = 0x60000000; + break; + } + + emit_form_0(pc, i); +} + +static void +emit_bitop(struct nv_pc *pc, struct nv_instruction *i) +{ + if (SFILE(i, 1) == NV_FILE_IMM) { + pc->emit[0] = 0x00000002; + pc->emit[1] = 0x38000000; + } else { + pc->emit[0] = 0x00000003; + pc->emit[1] = 0x68000000; + } + + switch (i->opcode) { + case NV_OP_OR: + pc->emit[0] |= 0x40; + break; + case NV_OP_XOR: + pc->emit[0] |= 0x80; + break; + case NV_OP_AND: + default: + break; + } + + emit_form_0(pc, i); +} + +static void +emit_set(struct nv_pc *pc, struct nv_instruction *i) +{ + pc->emit[0] = 0x00000000; + + switch (i->opcode) { + case NV_OP_SET_S32: + pc->emit[0] |= 0x20; /* fall through */ + case NV_OP_SET_U32: + pc->emit[0] |= 0x3; + pc->emit[1] = 0x100e0000; + break; + case NV_OP_SET_F32_AND: + pc->emit[1] = 0x18000000; + break; + case NV_OP_SET_F32_OR: + pc->emit[1] = 0x18200000; + break; + case NV_OP_SET_F32_XOR: + pc->emit[1] = 0x18400000; + break; + case NV_OP_FSET_F32: + pc->emit[0] |= 0x20; /* fall through */ + case NV_OP_SET_F32: + default: + pc->emit[1] = 0x180e0000; + break; + } + + if (DFILE(i, 0) == NV_FILE_PRED) { + pc->emit[0] |= 0x1c000; + pc->emit[1] += 0x08000000; + } + + pc->emit[1] |= i->set_cond << 23; + + emit_form_0(pc, i); + + emit_neg_abs_1_2(pc, i); /* maybe assert that U/S32 don't use mods */ +} + +static void +emit_selp(struct nv_pc *pc, struct nv_instruction *i) +{ + pc->emit[0] = 0x00000004; + pc->emit[1] = 0x20000000; + + emit_form_0(pc, i); + + if (i->cc || (i->src[2]->mod & NV_MOD_NOT)) + pc->emit[1] |= 1 << 20; +} + +static void +emit_slct(struct nv_pc *pc, struct nv_instruction *i) +{ + pc->emit[0] = 0x00000000; + + switch (i->opcode) { + case NV_OP_SLCT_S32: + pc->emit[0] |= 0x20; /* fall through */ + case NV_OP_SLCT_U32: + pc->emit[0] |= 0x3; + pc->emit[1] = 0x30000000; + break; + case NV_OP_SLCT_F32: + default: + pc->emit[1] = 0x38000000; + break; + } + + emit_form_0(pc, i); + + pc->emit[1] |= i->set_cond << 23; +} + +static void +emit_cvt(struct nv_pc *pc, struct nv_instruction *i) +{ + pc->emit[0] = 0x00000004; + pc->emit[1] = 0x10000000; + + if (i->opcode != NV_OP_CVT) + i->ext.cvt.d = i->ext.cvt.s = NV_OPTYPE(i->opcode); + + switch (i->ext.cvt.d) { + case NV_TYPE_F32: + switch (i->ext.cvt.s) { + case NV_TYPE_F32: pc->emit[1] = 0x10000000; break; + case NV_TYPE_S32: pc->emit[0] |= 0x200; + case NV_TYPE_U32: pc->emit[1] = 0x18000000; break; + } + break; + case NV_TYPE_S32: pc->emit[0] |= 0x80; + case NV_TYPE_U32: + switch (i->ext.cvt.s) { + case NV_TYPE_F32: pc->emit[1] = 0x14000000; break; + case NV_TYPE_S32: pc->emit[0] |= 0x200; + case NV_TYPE_U32: pc->emit[1] = 0x1c000000; break; + } + break; + default: + assert(!"cvt: unknown type"); + break; + } + + if (i->opcode == NV_OP_FLOOR) + pc->emit[1] |= 0x00020000; + else + if (i->opcode == NV_OP_CEIL) + pc->emit[1] |= 0x00040000; + else + if (i->opcode == NV_OP_TRUNC) + pc->emit[1] |= 0x00060000; + + if (i->saturate || i->opcode == NV_OP_SAT) + pc->emit[0] |= 0x20; + + if (NV_BASEOP(i->opcode) == NV_OP_ABS || i->src[0]->mod & NV_MOD_ABS) + pc->emit[0] |= 1 << 6; + if (NV_BASEOP(i->opcode) == NV_OP_NEG || i->src[0]->mod & NV_MOD_NEG) + pc->emit[0] |= 1 << 8; + + pc->emit[0] |= util_logbase2(DREG(i->def[0])->size) << 20; + pc->emit[0] |= util_logbase2(SREG(i->src[0])->size) << 23; + + emit_form_1(pc, i); +} + +static void +emit_interp(struct nv_pc *pc, struct nv_instruction *i) +{ + pc->emit[0] = 0x00000000; + pc->emit[1] = 0xc07e0000; + + DID(pc, i->def[0], 14); + + set_pred(pc, i); + + if (i->indirect) + SID(pc, i->src[i->indirect], 20); + else + SID(pc, NULL, 20); + + if (i->opcode == NV_OP_PINTERP) { + pc->emit[0] |= 0x040; + SID(pc, i->src[1], 26); + } else { + SID(pc, NULL, 26); + } + + pc->emit[1] |= i->src[0]->value->reg.address & 0xffff; + + if (i->centroid) + pc->emit[0] |= 0x100; + else + if (i->flat) + pc->emit[0] |= 0x080; +} + +static void +emit_vfetch(struct nv_pc *pc, struct nv_instruction *i) +{ + pc->emit[0] = 0x03f00006; + pc->emit[1] = 0x06000000 | i->src[0]->value->reg.address; + if (i->patch) + pc->emit[0] |= 0x100; + + set_pred(pc, i); + + DVS(pc, i); + DID(pc, i->def[0], 14); + + SID(pc, (i->indirect >= 0) ? i->src[i->indirect] : NULL, 26); +} + +static void +emit_export(struct nv_pc *pc, struct nv_instruction *i) +{ + pc->emit[0] = 0x00000006; + pc->emit[1] = 0x0a000000; + if (i->patch) + pc->emit[0] |= 0x100; + + set_pred(pc, i); + + assert(SFILE(i, 0) == NV_FILE_MEM_V); + assert(SFILE(i, 1) == NV_FILE_GPR); + + SID(pc, i->src[1], 26); /* register source */ + SVS(pc, i->src[0]); + + pc->emit[1] |= i->src[0]->value->reg.address & 0xfff; + + SID(pc, (i->indirect >= 0) ? i->src[i->indirect] : NULL, 20); +} + +static void +emit_mov(struct nv_pc *pc, struct nv_instruction *i) +{ + if (i->opcode == NV_OP_MOV) + i->lanes = 0xf; + + if (SFILE(i, 0) == NV_FILE_IMM) { + pc->emit[0] = 0x000001e2; + pc->emit[1] = 0x18000000; + } else + if (SFILE(i, 0) == NV_FILE_PRED) { + pc->emit[0] = 0x1c000004; + pc->emit[1] = 0x080e0000; + } else { + pc->emit[0] = 0x00000004 | (i->lanes << 5); + pc->emit[1] = 0x28000000; + } + + emit_form_1(pc, i); +} + +static void +emit_ldst_size(struct nv_pc *pc, struct nv_instruction *i) +{ + assert(NV_IS_MEMORY_FILE(SFILE(i, 0))); + + switch (SSIZE(i, 0)) { + case 1: + if (NV_TYPE_ISSGD(i->ext.cvt.s)) + pc->emit[0] |= 0x20; + break; + case 2: + pc->emit[0] |= 0x40; + if (NV_TYPE_ISSGD(i->ext.cvt.s)) + pc->emit[0] |= 0x20; + break; + case 4: pc->emit[0] |= 0x80; break; + case 8: pc->emit[0] |= 0xa0; break; + case 16: pc->emit[0] |= 0xc0; break; + default: + NOUVEAU_ERR("invalid load/store size %u\n", SSIZE(i, 0)); + break; + } +} + +static void +emit_ld_const(struct nv_pc *pc, struct nv_instruction *i) +{ + pc->emit[0] = 0x00000006; + pc->emit[1] = 0x14000000 | (const_space_index(i, 0) << 10); + + emit_ldst_size(pc, i); + + set_pred(pc, i); + set_address_16(pc, i->src[0]); + + SID(pc, (i->indirect >= 0) ? i->src[i->indirect] : NULL, 20); + DID(pc, i->def[0], 14); +} + +static void +emit_ld(struct nv_pc *pc, struct nv_instruction *i) +{ + if (SFILE(i, 0) >= NV_FILE_MEM_C(0) && + SFILE(i, 0) <= NV_FILE_MEM_C(15)) { + if (SSIZE(i, 0) == 4 && i->indirect < 0) { + i->lanes = 0xf; + emit_mov(pc, i); + } else { + emit_ld_const(pc, i); + } + } else { + NOUVEAU_ERR("emit_ld(%u): not handled yet\n", SFILE(i, 0)); + abort(); + } +} + +static void +emit_st(struct nv_pc *pc, struct nv_instruction *i) +{ + NOUVEAU_ERR("emit_st: not handled yet\n"); + abort(); +} + +void +nvc0_emit_instruction(struct nv_pc *pc, struct nv_instruction *i) +{ + debug_printf("EMIT: "); nvc0_print_instruction(i); + + switch (i->opcode) { + case NV_OP_VFETCH: + emit_vfetch(pc, i); + break; + case NV_OP_EXPORT: + if (!pc->is_fragprog) + emit_export(pc, i); + break; + case NV_OP_MOV: + emit_mov(pc, i); + break; + case NV_OP_LD: + emit_ld(pc, i); + break; + case NV_OP_ST: + emit_st(pc, i); + break; + case NV_OP_LINTERP: + case NV_OP_PINTERP: + emit_interp(pc, i); + break; + case NV_OP_ADD_F32: + emit_add_f32(pc, i); + break; + case NV_OP_AND: + case NV_OP_OR: + case NV_OP_XOR: + emit_bitop(pc, i); + break; + case NV_OP_CVT: + case NV_OP_ABS_F32: + case NV_OP_ABS_S32: + case NV_OP_NEG_F32: + case NV_OP_NEG_S32: + case NV_OP_SAT: + case NV_OP_CEIL: + case NV_OP_FLOOR: + case NV_OP_TRUNC: + emit_cvt(pc, i); + break; + case NV_OP_DFDX: + emit_ddx(pc, i); + break; + case NV_OP_DFDY: + emit_ddy(pc, i); + break; + case NV_OP_COS: + emit_flop(pc, i, 0); + break; + case NV_OP_SIN: + emit_flop(pc, i, 1); + break; + case NV_OP_EX2: + emit_flop(pc, i, 2); + break; + case NV_OP_LG2: + emit_flop(pc, i, 3); + break; + case NV_OP_RCP: + emit_flop(pc, i, 4); + break; + case NV_OP_RSQ: + emit_flop(pc, i, 5); + break; + case NV_OP_PRESIN: + case NV_OP_PREEX2: + emit_preop(pc, i); + break; + case NV_OP_MAD_F32: + emit_mad_f32(pc, i); + break; + case NV_OP_MAX_F32: + case NV_OP_MAX_S32: + case NV_OP_MAX_U32: + case NV_OP_MIN_F32: + case NV_OP_MIN_S32: + case NV_OP_MIN_U32: + emit_minmax(pc, i); + break; + case NV_OP_MUL_F32: + emit_mul_f32(pc, i); + break; + case NV_OP_SET_F32: + case NV_OP_SET_F32_AND: + case NV_OP_SET_F32_OR: + case NV_OP_SET_F32_XOR: + case NV_OP_SET_S32: + case NV_OP_SET_U32: + case NV_OP_FSET_F32: + emit_set(pc, i); + break; + case NV_OP_SHL: + case NV_OP_SHR: + case NV_OP_SAR: + emit_shift(pc, i); + break; + case NV_OP_TEX: + case NV_OP_TXB: + case NV_OP_TXL: + emit_tex(pc, i); + break; + case NV_OP_BRA: + emit_flow(pc, i, 0x40); + break; + case NV_OP_CALL: + emit_flow(pc, i, 0x50); + break; + case NV_OP_JOINAT: + emit_flow(pc, i, 0x60); + break; + case NV_OP_EXIT: + emit_flow(pc, i, 0x80); + break; + case NV_OP_RET: + emit_flow(pc, i, 0x90); + break; + case NV_OP_KIL: + emit_flow(pc, i, 0x98); + break; + case NV_OP_JOIN: + case NV_OP_NOP: + pc->emit[0] = 0x00003de4; + pc->emit[1] = 0x40000000; + break; + case NV_OP_SELP: + emit_selp(pc, i); + break; + case NV_OP_SLCT_F32: + case NV_OP_SLCT_S32: + case NV_OP_SLCT_U32: + emit_slct(pc, i); + break; + default: + NOUVEAU_ERR("unhandled NV_OP: %d\n", i->opcode); + abort(); + break; + } + + if (i->join) + pc->emit[0] |= 0x10; +} diff --git a/src/gallium/drivers/nvc0/nvc0_pc_optimize.c b/src/gallium/drivers/nvc0/nvc0_pc_optimize.c new file mode 100644 index 00000000000..acc72bff14c --- /dev/null +++ b/src/gallium/drivers/nvc0/nvc0_pc_optimize.c @@ -0,0 +1,1236 @@ +/* + * Copyright 2010 Christoph Bumiller + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF + * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "nvc0_pc.h" +#include "nvc0_program.h" + +#define DESCEND_ARBITRARY(j, f) \ +do { \ + b->pass_seq = ctx->pc->pass_seq; \ + \ + for (j = 0; j < 2; ++j) \ + if (b->out[j] && b->out[j]->pass_seq < ctx->pc->pass_seq) \ + f(ctx, b->out[j]); \ +} while (0) + +static INLINE boolean +registers_interfere(struct nv_value *a, struct nv_value *b) +{ + if (a->reg.file != b->reg.file) + return FALSE; + if (NV_IS_MEMORY_FILE(a->reg.file) || NV_IS_MEMORY_FILE(b->reg.file)) + return FALSE; + + assert(a->join->reg.id >= 0 && b->join->reg.id >= 0); + + if (a->join->reg.id < b->join->reg.id) { + return (a->join->reg.id + a->reg.size >= b->join->reg.id); + } else + if (a->join->reg.id > b->join->reg.id) { + return (b->join->reg.id + b->reg.size >= a->join->reg.id); + } + + return FALSE; +} + +static INLINE boolean +values_equal(struct nv_value *a, struct nv_value *b) +{ + if (a->reg.file != b->reg.file || a->reg.size != b->reg.size) + return FALSE; + if (NV_IS_MEMORY_FILE(a->reg.file)) + return a->reg.address == b->reg.address; + else + return a->join->reg.id == b->join->reg.id; +} + +#if 0 +static INLINE boolean +inst_commutation_check(struct nv_instruction *a, struct nv_instruction *b) +{ + int si, di; + + for (di = 0; di < 4 && a->def[di]; ++di) + for (si = 0; si < 5 && b->src[si]; ++si) + if (registers_interfere(a->def[di], b->src[si]->value)) + return FALSE; + + return TRUE; +} + +/* Check whether we can swap the order of the instructions, + * where a & b may be either the earlier or the later one. + */ +static boolean +inst_commutation_legal(struct nv_instruction *a, struct nv_instruction *b) +{ + return inst_commutation_check(a, b) && inst_commutation_check(b, a); +} +#endif + +static INLINE boolean +inst_removable(struct nv_instruction *nvi) +{ + if (nvi->opcode == NV_OP_ST) + return FALSE; + return (!(nvi->terminator || + nvi->join || + nvi->target || + nvi->fixed || + nvc0_insn_refcount(nvi))); +} + +static INLINE boolean +inst_is_noop(struct nv_instruction *nvi) +{ + if (nvi->opcode == NV_OP_UNDEF || nvi->opcode == NV_OP_BIND) + return TRUE; + if (nvi->terminator || nvi->join) + return FALSE; + if (nvi->def[0] && nvi->def[0]->join->reg.id < 0) + return TRUE; + if (nvi->opcode != NV_OP_MOV && nvi->opcode != NV_OP_SELECT) + return FALSE; + if (nvi->def[0]->reg.file != nvi->src[0]->value->reg.file) + return FALSE; + + if (nvi->src[0]->value->join->reg.id < 0) { + NOUVEAU_DBG("inst_is_noop: orphaned value detected\n"); + return TRUE; + } + + if (nvi->opcode == NV_OP_SELECT) + if (!values_equal(nvi->def[0], nvi->src[1]->value)) + return FALSE; + return values_equal(nvi->def[0], nvi->src[0]->value); +} + +struct nv_pass { + struct nv_pc *pc; + int n; + void *priv; +}; + +static int +nv_pass_flatten(struct nv_pass *ctx, struct nv_basic_block *b); + +static void +nv_pc_pass_pre_emission(void *priv, struct nv_basic_block *b) +{ + struct nv_pc *pc = (struct nv_pc *)priv; + struct nv_basic_block *in; + struct nv_instruction *nvi, *next; + int j; + + for (j = pc->num_blocks - 1; j >= 0 && !pc->bb_list[j]->emit_size; --j); + + if (j >= 0) { + in = pc->bb_list[j]; + + /* check for no-op branches (BRA $PC+8) */ + if (in->exit && in->exit->opcode == NV_OP_BRA && in->exit->target == b) { + in->emit_size -= 8; + pc->emit_size -= 8; + + for (++j; j < pc->num_blocks; ++j) + pc->bb_list[j]->emit_pos -= 8; + + nvc0_insn_delete(in->exit); + } + b->emit_pos = in->emit_pos + in->emit_size; + } + + pc->bb_list[pc->num_blocks++] = b; + + /* visit node */ + + for (nvi = b->entry; nvi; nvi = next) { + next = nvi->next; + if (inst_is_noop(nvi) || + (pc->is_fragprog && nvi->opcode == NV_OP_EXPORT)) { + nvc0_insn_delete(nvi); + } else + b->emit_size += 8; + } + pc->emit_size += b->emit_size; + +#ifdef NOUVEAU_DEBUG + if (!b->entry) + debug_printf("BB:%i is now empty\n", b->id); + else + debug_printf("BB:%i size = %u\n", b->id, b->emit_size); +#endif +} + +static int +nv_pc_pass2(struct nv_pc *pc, struct nv_basic_block *root) +{ + struct nv_pass pass; + + pass.pc = pc; + + pc->pass_seq++; + nv_pass_flatten(&pass, root); + + nvc0_pc_pass_in_order(root, nv_pc_pass_pre_emission, pc); + + return 0; +} + +int +nvc0_pc_exec_pass2(struct nv_pc *pc) +{ + int i, ret; + + NOUVEAU_DBG("preparing %u blocks for emission\n", pc->num_blocks); + + pc->num_blocks = 0; /* will reorder bb_list */ + + for (i = 0; i < pc->num_subroutines + 1; ++i) + if (pc->root[i] && (ret = nv_pc_pass2(pc, pc->root[i]))) + return ret; + return 0; +} + +static INLINE boolean +is_cspace_load(struct nv_instruction *nvi) +{ + if (!nvi) + return FALSE; + assert(nvi->indirect != 0); + return (nvi->opcode == NV_OP_LD && + nvi->src[0]->value->reg.file >= NV_FILE_MEM_C(0) && + nvi->src[0]->value->reg.file <= NV_FILE_MEM_C(15)); +} + +static INLINE boolean +is_immd32_load(struct nv_instruction *nvi) +{ + if (!nvi) + return FALSE; + return (nvi->opcode == NV_OP_MOV && + nvi->src[0]->value->reg.file == NV_FILE_IMM && + nvi->src[0]->value->reg.size == 4); +} + +static INLINE void +check_swap_src_0_1(struct nv_instruction *nvi) +{ + static const uint8_t cc_swapped[8] = { 0, 4, 2, 6, 1, 5, 3, 7 }; + + struct nv_ref *src0 = nvi->src[0]; + struct nv_ref *src1 = nvi->src[1]; + + if (!nv_op_commutative(nvi->opcode)) + return; + assert(src0 && src1 && src0->value && src1->value); + + if (is_cspace_load(src0->value->insn)) { + if (!is_cspace_load(src1->value->insn)) { + nvi->src[0] = src1; + nvi->src[1] = src0; + } + } else + if (is_immd32_load(src0->value->insn)) { + if (!is_cspace_load(src1->value->insn) && + !is_immd32_load(src1->value->insn)) { + nvi->src[0] = src1; + nvi->src[1] = src0; + } + } + + if (nvi->src[0] != src0 && nvi->opcode == NV_OP_SET) + nvi->set_cond = cc_swapped[nvi->set_cond]; +} + +static void +nvi_set_indirect_load(struct nv_pc *pc, + struct nv_instruction *nvi, struct nv_value *val) +{ + for (nvi->indirect = 0; nvi->indirect < 6 && nvi->src[nvi->indirect]; + ++nvi->indirect); + assert(nvi->indirect < 6); + nv_reference(pc, nvi, nvi->indirect, val); +} + +static int +nvc0_pass_fold_loads(struct nv_pass *ctx, struct nv_basic_block *b) +{ + struct nv_instruction *nvi, *ld; + int s; + + for (nvi = b->entry; nvi; nvi = nvi->next) { + check_swap_src_0_1(nvi); + + for (s = 0; s < 3 && nvi->src[s]; ++s) { + ld = nvi->src[s]->value->insn; + if (!ld || (ld->opcode != NV_OP_LD && ld->opcode != NV_OP_MOV)) + continue; + if (!nvc0_insn_can_load(nvi, s, ld)) + continue; + + /* fold it ! */ + nv_reference(ctx->pc, nvi, s, ld->src[0]->value); + if (ld->indirect >= 0) + nvi_set_indirect_load(ctx->pc, nvi, ld->src[ld->indirect]->value); + + if (!nvc0_insn_refcount(ld)) + nvc0_insn_delete(ld); + } + } + DESCEND_ARBITRARY(s, nvc0_pass_fold_loads); + + return 0; +} + +/* NOTE: Assumes loads have not yet been folded. */ +static int +nv_pass_lower_mods(struct nv_pass *ctx, struct nv_basic_block *b) +{ + struct nv_instruction *nvi, *mi, *next; + int j; + uint8_t mod; + + for (nvi = b->entry; nvi; nvi = next) { + next = nvi->next; + if (nvi->opcode == NV_OP_SUB) { + nvi->src[1]->mod ^= NV_MOD_NEG; + nvi->opcode = NV_OP_ADD; + } + + for (j = 0; j < 3 && nvi->src[j]; ++j) { + mi = nvi->src[j]->value->insn; + if (!mi) + continue; + if (mi->def[0]->refc > 1 || mi->predicate >= 0) + continue; + + if (NV_BASEOP(mi->opcode) == NV_OP_NEG) mod = NV_MOD_NEG; + else + if (NV_BASEOP(mi->opcode) == NV_OP_ABS) mod = NV_MOD_ABS; + else + continue; + assert(!(mod & mi->src[0]->mod & NV_MOD_NEG)); + + mod |= mi->src[0]->mod; + + if ((nvi->opcode == NV_OP_ABS) || (nvi->src[j]->mod & NV_MOD_ABS)) { + /* abs neg [abs] = abs */ + mod &= ~(NV_MOD_NEG | NV_MOD_ABS); + } else + if ((nvi->opcode == NV_OP_NEG) && (mod & NV_MOD_NEG)) { + /* neg as opcode and modifier on same insn cannot occur */ + /* neg neg abs = abs, neg neg = identity */ + assert(j == 0); + if (mod & NV_MOD_ABS) + nvi->opcode = NV_OP_ABS; + else + nvi->opcode = NV_OP_MOV; + mod = 0; + } + + if ((nv_op_supported_src_mods(nvi->opcode) & mod) != mod) + continue; + + nv_reference(ctx->pc, nvi, j, mi->src[0]->value); + + nvi->src[j]->mod ^= mod; + } + + if (nvi->opcode == NV_OP_SAT) { + mi = nvi->src[0]->value->insn; + + if (mi->def[0]->refc > 1 || + (mi->opcode != NV_OP_ADD && + mi->opcode != NV_OP_MUL && + mi->opcode != NV_OP_MAD)) + continue; + mi->saturate = 1; + mi->def[0] = nvi->def[0]; + mi->def[0]->insn = mi; + nvc0_insn_delete(nvi); + } + } + DESCEND_ARBITRARY(j, nv_pass_lower_mods); + + return 0; +} + +#define SRC_IS_MUL(s) ((s)->insn && (s)->insn->opcode == NV_OP_MUL) + +static void +apply_modifiers(uint32_t *val, uint8_t type, uint8_t mod) +{ + if (mod & NV_MOD_ABS) { + if (type == NV_TYPE_F32) + *val &= 0x7fffffff; + else + if ((*val) & (1 << 31)) + *val = ~(*val) + 1; + } + if (mod & NV_MOD_NEG) { + if (type == NV_TYPE_F32) + *val ^= 0x80000000; + else + *val = ~(*val) + 1; + } + if (mod & NV_MOD_SAT) { + union { + float f; + uint32_t u; + int32_t i; + } u; + u.u = *val; + if (type == NV_TYPE_F32) { + u.f = CLAMP(u.f, -1.0f, 1.0f); + } else + if (type == NV_TYPE_U16) { + u.u = MIN2(u.u, 0xffff); + } else + if (type == NV_TYPE_S16) { + u.i = CLAMP(u.i, -32768, 32767); + } + *val = u.u; + } + if (mod & NV_MOD_NOT) + *val = ~*val; +} + +static void +constant_expression(struct nv_pc *pc, struct nv_instruction *nvi, + struct nv_value *src0, struct nv_value *src1) +{ + struct nv_value *val; + union { + float f32; + uint32_t u32; + int32_t s32; + } u0, u1, u; + ubyte type; + + if (!nvi->def[0]) + return; + type = NV_OPTYPE(nvi->opcode); + + u.u32 = 0; + u0.u32 = src0->reg.imm.u32; + u1.u32 = src1->reg.imm.u32; + + apply_modifiers(&u0.u32, type, nvi->src[0]->mod); + apply_modifiers(&u1.u32, type, nvi->src[1]->mod); + + switch (nvi->opcode) { + case NV_OP_MAD_F32: + if (nvi->src[2]->value->reg.file != NV_FILE_GPR) + return; + /* fall through */ + case NV_OP_MUL_F32: + u.f32 = u0.f32 * u1.f32; + break; + case NV_OP_MUL_B32: + u.u32 = u0.u32 * u1.u32; + break; + case NV_OP_ADD_F32: + u.f32 = u0.f32 + u1.f32; + break; + case NV_OP_ADD_B32: + u.u32 = u0.u32 + u1.u32; + break; + case NV_OP_SUB_F32: + u.f32 = u0.f32 - u1.f32; + break; + /* + case NV_OP_SUB_B32: + u.u32 = u0.u32 - u1.u32; + break; + */ + default: + return; + } + + val = new_value(pc, NV_FILE_IMM, nv_type_sizeof(type)); + val->reg.imm.u32 = u.u32; + + nv_reference(pc, nvi, 1, NULL); + nv_reference(pc, nvi, 0, val); + + if (nvi->opcode == NV_OP_MAD_F32) { + nvi->src[1] = nvi->src[0]; + nvi->src[0] = nvi->src[2]; + nvi->src[2] = NULL; + nvi->opcode = NV_OP_ADD_F32; + + if (val->reg.imm.u32 == 0) { + nvi->src[1] = NULL; + nvi->opcode = NV_OP_MOV; + } + } else { + nvi->opcode = NV_OP_MOV; + } +} + +static void +constant_operand(struct nv_pc *pc, + struct nv_instruction *nvi, struct nv_value *val, int s) +{ + union { + float f32; + uint32_t u32; + int32_t s32; + } u; + int shift; + int t = s ? 0 : 1; + uint op; + ubyte type; + + if (!nvi->def[0]) + return; + type = NV_OPTYPE(nvi->opcode); + + u.u32 = val->reg.imm.u32; + apply_modifiers(&u.u32, type, nvi->src[s]->mod); + + if (u.u32 == 0 && NV_BASEOP(nvi->opcode) == NV_OP_MUL) { + nvi->opcode = NV_OP_MOV; + nv_reference(pc, nvi, t, NULL); + if (s) { + nvi->src[0] = nvi->src[1]; + nvi->src[1] = NULL; + } + return; + } + + switch (nvi->opcode) { + case NV_OP_MUL_F32: + if (u.f32 == 1.0f || u.f32 == -1.0f) { + if (u.f32 == -1.0f) + nvi->src[t]->mod ^= NV_MOD_NEG; + switch (nvi->src[t]->mod) { + case 0: op = nvi->saturate ? NV_OP_SAT : NV_OP_MOV; break; + case NV_MOD_NEG: op = NV_OP_NEG_F32; break; + case NV_MOD_ABS: op = NV_OP_ABS_F32; break; + default: + return; + } + nvi->opcode = op; + nv_reference(pc, nvi, 0, nvi->src[t]->value); + nv_reference(pc, nvi, 1, NULL); + nvi->src[0]->mod = 0; + } else + if (u.f32 == 2.0f || u.f32 == -2.0f) { + if (u.f32 == -2.0f) + nvi->src[t]->mod ^= NV_MOD_NEG; + nvi->opcode = NV_OP_ADD_F32; + nv_reference(pc, nvi, s, nvi->src[t]->value); + nvi->src[s]->mod = nvi->src[t]->mod; + } + case NV_OP_ADD_F32: + if (u.u32 == 0) { + switch (nvi->src[t]->mod) { + case 0: op = nvi->saturate ? NV_OP_SAT : NV_OP_MOV; break; + case NV_MOD_NEG: op = NV_OP_NEG_F32; break; + case NV_MOD_ABS: op = NV_OP_ABS_F32; break; + case NV_MOD_NEG | NV_MOD_ABS: + op = NV_OP_CVT; + nvi->ext.cvt.s = nvi->ext.cvt.d = type; + break; + default: + return; + } + nvi->opcode = op; + nv_reference(pc, nvi, 0, nvi->src[t]->value); + nv_reference(pc, nvi, 1, NULL); + if (nvi->opcode != NV_OP_CVT) + nvi->src[0]->mod = 0; + } + case NV_OP_ADD_B32: + if (u.u32 == 0) { + assert(nvi->src[t]->mod == 0); + nvi->opcode = nvi->saturate ? NV_OP_CVT : NV_OP_MOV; + nvi->ext.cvt.s = nvi->ext.cvt.d = type; + nv_reference(pc, nvi, 0, nvi->src[t]->value); + nv_reference(pc, nvi, 1, NULL); + } + break; + case NV_OP_MUL_B32: + /* multiplication by 0 already handled above */ + assert(nvi->src[s]->mod == 0); + shift = ffs(u.s32) - 1; + if (shift == 0) { + nvi->opcode = NV_OP_MOV; + nv_reference(pc, nvi, 0, nvi->src[t]->value); + nv_reference(pc, nvi, 1, NULL); + } else + if (u.s32 > 0 && u.s32 == (1 << shift)) { + nvi->opcode = NV_OP_SHL; + (val = new_value(pc, NV_FILE_IMM, NV_TYPE_U32))->reg.imm.s32 = shift; + nv_reference(pc, nvi, 0, nvi->src[t]->value); + nv_reference(pc, nvi, 1, val); + break; + } + break; + case NV_OP_RCP: + u.f32 = 1.0f / u.f32; + (val = new_value(pc, NV_FILE_IMM, NV_TYPE_F32))->reg.imm.f32 = u.f32; + nvi->opcode = NV_OP_MOV; + assert(s == 0); + nv_reference(pc, nvi, 0, val); + break; + case NV_OP_RSQ: + u.f32 = 1.0f / sqrtf(u.f32); + (val = new_value(pc, NV_FILE_IMM, NV_TYPE_F32))->reg.imm.f32 = u.f32; + nvi->opcode = NV_OP_MOV; + assert(s == 0); + nv_reference(pc, nvi, 0, val); + break; + default: + break; + } +} + +static int +nv_pass_lower_arith(struct nv_pass *ctx, struct nv_basic_block *b) +{ + struct nv_instruction *nvi, *next; + int j; + + for (nvi = b->entry; nvi; nvi = next) { + struct nv_value *src0, *src1, *src; + int s; + uint8_t mod[4]; + + next = nvi->next; + + src0 = nvc0_pc_find_immediate(nvi->src[0]); + src1 = nvc0_pc_find_immediate(nvi->src[1]); + + if (src0 && src1) + constant_expression(ctx->pc, nvi, src0, src1); + else { + if (src0) + constant_operand(ctx->pc, nvi, src0, 0); + else + if (src1) + constant_operand(ctx->pc, nvi, src1, 1); + } + + /* check if we can MUL + ADD -> MAD/FMA */ + if (nvi->opcode != NV_OP_ADD) + continue; + + src0 = nvi->src[0]->value; + src1 = nvi->src[1]->value; + + if (SRC_IS_MUL(src0) && src0->refc == 1) + src = src0; + else + if (SRC_IS_MUL(src1) && src1->refc == 1) + src = src1; + else + continue; + + /* could have an immediate from above constant_* */ + if (src0->reg.file != NV_FILE_GPR || src1->reg.file != NV_FILE_GPR) + continue; + s = (src == src0) ? 0 : 1; + + mod[0] = nvi->src[0]->mod; + mod[1] = nvi->src[1]->mod; + mod[2] = src->insn->src[0]->mod; + mod[3] = src->insn->src[0]->mod; + + if ((mod[0] | mod[1] | mod[2] | mod[3]) & ~NV_MOD_NEG) + continue; + + nvi->opcode = NV_OP_MAD; + nv_reference(ctx->pc, nvi, s, NULL); + nvi->src[2] = nvi->src[!s]; + + nvi->src[0] = new_ref(ctx->pc, src->insn->src[0]->value); + nvi->src[1] = new_ref(ctx->pc, src->insn->src[1]->value); + nvi->src[0]->mod = mod[2] ^ mod[s]; + nvi->src[1]->mod = mod[3]; + } + DESCEND_ARBITRARY(j, nv_pass_lower_arith); + + return 0; +} + +/* TODO: redundant store elimination */ + +struct mem_record { + struct mem_record *next; + struct nv_instruction *insn; + uint32_t ofst; + uint32_t base; + uint32_t size; +}; + +#define MEM_RECORD_POOL_SIZE 1024 + +struct pass_reld_elim { + struct nv_pc *pc; + + struct mem_record *imm; + struct mem_record *mem_v; + struct mem_record *mem_a; + struct mem_record *mem_c[16]; + struct mem_record *mem_l; + + struct mem_record pool[MEM_RECORD_POOL_SIZE]; + int alloc; +}; + +static void +combine_load(struct mem_record *rec, struct nv_instruction *ld) +{ + struct nv_instruction *fv = rec->insn; + struct nv_value *mem = ld->src[0]->value; + uint32_t size = rec->size + mem->reg.size; + int j; + int d = rec->size / 4; + + assert(rec->size < 16); + if (rec->ofst > mem->reg.address) { + if ((size == 8 && mem->reg.address & 3) || + (size > 8 && mem->reg.address & 7)) + return; + rec->ofst = mem->reg.address; + for (j = 0; j < d; ++j) + fv->def[d + j] = fv->def[j]; + d = 0; + } else + if ((size == 8 && rec->ofst & 3) || + (size > 8 && rec->ofst & 7)) { + return; + } + + for (j = 0; j < mem->reg.size / 4; ++j) { + fv->def[d] = ld->def[j]; + fv->def[d++]->insn = fv; + } + + fv->src[0]->value->reg.size = rec->size = size; + + nvc0_insn_delete(ld); +} + +static void +combine_export(struct mem_record *rec, struct nv_instruction *ex) +{ + +} + +static INLINE void +add_mem_record(struct pass_reld_elim *ctx, struct mem_record **rec, + uint32_t base, uint32_t ofst, struct nv_instruction *nvi) +{ + struct mem_record *it = &ctx->pool[ctx->alloc++]; + + it->next = *rec; + *rec = it; + it->base = base; + it->ofst = ofst; + it->insn = nvi; + it->size = nvi->src[0]->value->reg.size; +} + +/* vectorize and reuse loads from memory or of immediates */ +static int +nv_pass_mem_opt(struct pass_reld_elim *ctx, struct nv_basic_block *b) +{ + struct mem_record **rec, *it; + struct nv_instruction *ld, *next; + struct nv_value *mem; + uint32_t base, ofst; + int s; + + for (ld = b->entry; ld; ld = next) { + next = ld->next; + + if (is_cspace_load(ld)) { + mem = ld->src[0]->value; + rec = &ctx->mem_c[ld->src[0]->value->reg.file - NV_FILE_MEM_C(0)]; + } else + if (ld->opcode == NV_OP_VFETCH) { + mem = ld->src[0]->value; + rec = &ctx->mem_a; + } else + if (ld->opcode == NV_OP_EXPORT) { + mem = ld->src[0]->value; + if (mem->reg.file != NV_FILE_MEM_V) + continue; + rec = &ctx->mem_v; + } else { + continue; + } + if (ld->def[0] && ld->def[0]->refc == 0) + continue; + ofst = mem->reg.address; + base = (ld->indirect >= 0) ? ld->src[ld->indirect]->value->n : 0; + + for (it = *rec; it; it = it->next) { + if (it->base == base && + ((it->ofst >> 4) == (ofst >> 4)) && + ((it->ofst + it->size == ofst) || + (it->ofst - mem->reg.size == ofst))) { + if (ld->opcode == NV_OP_LD && it->size + mem->reg.size == 12) + continue; + if (it->ofst < ofst) { + if ((it->ofst & 0xf) == 4) + continue; + } else + if ((ofst & 0xf) == 4) + continue; + break; + } + } + if (it) { + switch (ld->opcode) { + case NV_OP_EXPORT: combine_export(it, ld); break; + default: + combine_load(it, ld); + break; + } + } else + if (ctx->alloc < MEM_RECORD_POOL_SIZE) { + add_mem_record(ctx, rec, base, ofst, ld); + } + } + + DESCEND_ARBITRARY(s, nv_pass_mem_opt); + return 0; +} + +static void +eliminate_store(struct mem_record *rec, struct nv_instruction *st) +{ +} + +/* elimination of redundant stores */ +static int +pass_store_elim(struct pass_reld_elim *ctx, struct nv_basic_block *b) +{ + struct mem_record **rec, *it; + struct nv_instruction *st, *next; + struct nv_value *mem; + uint32_t base, ofst, size; + int s; + + for (st = b->entry; st; st = next) { + next = st->next; + + if (st->opcode == NV_OP_ST) { + mem = st->src[0]->value; + rec = &ctx->mem_l; + } else + if (st->opcode == NV_OP_EXPORT) { + mem = st->src[0]->value; + if (mem->reg.file != NV_FILE_MEM_V) + continue; + rec = &ctx->mem_v; + } else + if (st->opcode == NV_OP_ST) { + /* TODO: purge */ + } + ofst = mem->reg.address; + base = (st->indirect >= 0) ? st->src[st->indirect]->value->n : 0; + size = mem->reg.size; + + for (it = *rec; it; it = it->next) { + if (it->base == base && + (it->ofst <= ofst && (it->ofst + size) > ofst)) + break; + } + if (it) + eliminate_store(it, st); + else + add_mem_record(ctx, rec, base, ofst, st); + } + + DESCEND_ARBITRARY(s, nv_pass_mem_opt); + return 0; +} + +/* TODO: properly handle loads from l[] memory in the presence of stores */ +static int +nv_pass_reload_elim(struct pass_reld_elim *ctx, struct nv_basic_block *b) +{ +#if 0 + struct load_record **rec, *it; + struct nv_instruction *ld, *next; + uint64_t data[2]; + struct nv_value *val; + int j; + + for (ld = b->entry; ld; ld = next) { + next = ld->next; + if (!ld->src[0]) + continue; + val = ld->src[0]->value; + rec = NULL; + + if (ld->opcode == NV_OP_LINTERP || ld->opcode == NV_OP_PINTERP) { + data[0] = val->reg.id; + data[1] = 0; + rec = &ctx->mem_v; + } else + if (ld->opcode == NV_OP_LDA) { + data[0] = val->reg.id; + data[1] = ld->src[4] ? ld->src[4]->value->n : ~0ULL; + if (val->reg.file >= NV_FILE_MEM_C(0) && + val->reg.file <= NV_FILE_MEM_C(15)) + rec = &ctx->mem_c[val->reg.file - NV_FILE_MEM_C(0)]; + else + if (val->reg.file == NV_FILE_MEM_S) + rec = &ctx->mem_s; + else + if (val->reg.file == NV_FILE_MEM_L) + rec = &ctx->mem_l; + } else + if ((ld->opcode == NV_OP_MOV) && (val->reg.file == NV_FILE_IMM)) { + data[0] = val->reg.imm.u32; + data[1] = 0; + rec = &ctx->imm; + } + + if (!rec || !ld->def[0]->refc) + continue; + + for (it = *rec; it; it = it->next) + if (it->data[0] == data[0] && it->data[1] == data[1]) + break; + + if (it) { + if (ld->def[0]->reg.id >= 0) + it->value = ld->def[0]; + else + if (!ld->fixed) + nvc0_pc_replace_value(ctx->pc, ld->def[0], it->value); + } else { + if (ctx->alloc == LOAD_RECORD_POOL_SIZE) + continue; + it = &ctx->pool[ctx->alloc++]; + it->next = *rec; + it->data[0] = data[0]; + it->data[1] = data[1]; + it->value = ld->def[0]; + *rec = it; + } + } + + ctx->imm = NULL; + ctx->mem_s = NULL; + ctx->mem_v = NULL; + for (j = 0; j < 16; ++j) + ctx->mem_c[j] = NULL; + ctx->mem_l = NULL; + ctx->alloc = 0; + + DESCEND_ARBITRARY(j, nv_pass_reload_elim); +#endif + return 0; +} + +static int +nv_pass_tex_mask(struct nv_pass *ctx, struct nv_basic_block *b) +{ + int i, c, j; + + for (i = 0; i < ctx->pc->num_instructions; ++i) { + struct nv_instruction *nvi = &ctx->pc->instructions[i]; + struct nv_value *def[4]; + + if (!nv_is_texture_op(nvi->opcode)) + continue; + nvi->tex_mask = 0; + + for (c = 0; c < 4; ++c) { + if (nvi->def[c]->refc) + nvi->tex_mask |= 1 << c; + def[c] = nvi->def[c]; + } + + j = 0; + for (c = 0; c < 4; ++c) + if (nvi->tex_mask & (1 << c)) + nvi->def[j++] = def[c]; + for (c = 0; c < 4; ++c) + if (!(nvi->tex_mask & (1 << c))) + nvi->def[j++] = def[c]; + assert(j == 4); + } + return 0; +} + +struct nv_pass_dce { + struct nv_pc *pc; + uint removed; +}; + +static int +nv_pass_dce(struct nv_pass_dce *ctx, struct nv_basic_block *b) +{ + int j; + struct nv_instruction *nvi, *next; + + for (nvi = b->phi ? b->phi : b->entry; nvi; nvi = next) { + next = nvi->next; + + if (inst_removable(nvi)) { + nvc0_insn_delete(nvi); + ++ctx->removed; + } + } + DESCEND_ARBITRARY(j, nv_pass_dce); + + return 0; +} + +#if 0 +/* Register allocation inserted ELSE blocks for all IF/ENDIF without ELSE. + * Returns TRUE if @bb initiates an IF/ELSE/ENDIF clause, or is an IF with + * BREAK and dummy ELSE block. + */ +static INLINE boolean +bb_is_if_else_endif(struct nv_basic_block *bb) +{ + if (!bb->out[0] || !bb->out[1]) + return FALSE; + + if (bb->out[0]->out_kind[0] == CFG_EDGE_LOOP_LEAVE) { + return (bb->out[0]->out[1] == bb->out[1]->out[0] && + !bb->out[1]->out[1]); + } else { + return (bb->out[0]->out[0] == bb->out[1]->out[0] && + !bb->out[0]->out[1] && + !bb->out[1]->out[1]); + } +} + +/* predicate instructions and remove branch at the end */ +static void +predicate_instructions(struct nv_pc *pc, struct nv_basic_block *b, + struct nv_value *p, ubyte cc) +{ + +} +#endif + +/* NOTE: Run this after register allocation, we can just cut out the cflow + * instructions and hook the predicates to the conditional OPs if they are + * not using immediates; better than inserting SELECT to join definitions. + * + * NOTE: Should adapt prior optimization to make this possible more often. + */ +static int +nv_pass_flatten(struct nv_pass *ctx, struct nv_basic_block *b) +{ + return 0; +} + +/* local common subexpression elimination, stupid O(n^2) implementation */ +static int +nv_pass_cse(struct nv_pass *ctx, struct nv_basic_block *b) +{ + struct nv_instruction *ir, *ik, *next; + struct nv_instruction *entry = b->phi ? b->phi : b->entry; + int s; + unsigned int reps; + + do { + reps = 0; + for (ir = entry; ir; ir = next) { + next = ir->next; + for (ik = entry; ik != ir; ik = ik->next) { + if (ir->opcode != ik->opcode || ir->fixed) + continue; + + if (!ir->def[0] || !ik->def[0] || ir->def[1] || ik->def[1]) + continue; + + if (ik->indirect != ir->indirect || ik->predicate != ir->predicate) + continue; + + if (!values_equal(ik->def[0], ir->def[0])) + continue; + + for (s = 0; s < 3; ++s) { + struct nv_value *a, *b; + + if (!ik->src[s]) { + if (ir->src[s]) + break; + continue; + } + if (ik->src[s]->mod != ir->src[s]->mod) + break; + a = ik->src[s]->value; + b = ir->src[s]->value; + if (a == b) + continue; + if (a->reg.file != b->reg.file || + a->reg.id < 0 || + a->reg.id != b->reg.id) + break; + } + if (s == 3) { + nvc0_insn_delete(ir); + ++reps; + nvc0_pc_replace_value(ctx->pc, ir->def[0], ik->def[0]); + break; + } + } + } + } while(reps); + + DESCEND_ARBITRARY(s, nv_pass_cse); + + return 0; +} + +/* Make sure all sources of an NV_OP_BIND are distinct, they need to occupy + * neighbouring registers. CSE might have messed this up. + */ +static int +nv_pass_fix_bind(struct nv_pass *ctx, struct nv_basic_block *b) +{ + struct nv_value *val; + struct nv_instruction *bnd, *nvi, *next; + int s, t; + + for (bnd = b->entry; bnd; bnd = next) { + next = bnd->next; + if (bnd->opcode != NV_OP_BIND) + continue; + for (s = 0; s < 4 && bnd->src[s]; ++s) { + val = bnd->src[s]->value; + for (t = s + 1; t < 4 && bnd->src[t]; ++t) { + if (bnd->src[t]->value != val) + continue; + nvi = nv_alloc_instruction(ctx->pc, NV_OP_MOV); + nvi->def[0] = new_value_like(ctx->pc, val); + nvi->def[0]->insn = nvi; + nv_reference(ctx->pc, nvi, 0, val); + nvc0_insn_insert_before(bnd, nvi); + + nv_reference(ctx->pc, bnd, t, nvi->def[0]); + } + } + } + DESCEND_ARBITRARY(t, nv_pass_fix_bind); + + return 0; +} + +static int +nv_pc_pass0(struct nv_pc *pc, struct nv_basic_block *root) +{ + struct pass_reld_elim *reldelim; + struct nv_pass pass; + struct nv_pass_dce dce; + int ret; + + pass.n = 0; + pass.pc = pc; + + /* Do this first, so we don't have to pay attention + * to whether sources are supported memory loads. + */ + pc->pass_seq++; + ret = nv_pass_lower_arith(&pass, root); + if (ret) + return ret; + + pc->pass_seq++; + ret = nv_pass_lower_mods(&pass, root); + if (ret) + return ret; + + pc->pass_seq++; + ret = nvc0_pass_fold_loads(&pass, root); + if (ret) + return ret; + + if (pc->opt_reload_elim) { + reldelim = CALLOC_STRUCT(pass_reld_elim); + reldelim->pc = pc; + + pc->pass_seq++; + ret = nv_pass_reload_elim(reldelim, root); + if (ret) { + FREE(reldelim); + return ret; + } + memset(reldelim, 0, sizeof(struct pass_reld_elim)); + reldelim->pc = pc; + } + + pc->pass_seq++; + ret = nv_pass_cse(&pass, root); + if (ret) + return ret; + + dce.pc = pc; + do { + dce.removed = 0; + pc->pass_seq++; + ret = nv_pass_dce(&dce, root); + if (ret) + return ret; + } while (dce.removed); + + if (pc->opt_reload_elim) { + pc->pass_seq++; + ret = nv_pass_mem_opt(reldelim, root); + if (!ret) { + memset(reldelim, 0, sizeof(struct pass_reld_elim)); + reldelim->pc = pc; + + pc->pass_seq++; + ret = nv_pass_mem_opt(reldelim, root); + } + FREE(reldelim); + if (ret) + return ret; + } + + ret = nv_pass_tex_mask(&pass, root); + if (ret) + return ret; + + pc->pass_seq++; + ret = nv_pass_fix_bind(&pass, root); + + return ret; +} + +int +nvc0_pc_exec_pass0(struct nv_pc *pc) +{ + int i, ret; + + for (i = 0; i < pc->num_subroutines + 1; ++i) + if (pc->root[i] && (ret = nv_pc_pass0(pc, pc->root[i]))) + return ret; + return 0; +} diff --git a/src/gallium/drivers/nvc0/nvc0_pc_print.c b/src/gallium/drivers/nvc0/nvc0_pc_print.c new file mode 100644 index 00000000000..b03826484e4 --- /dev/null +++ b/src/gallium/drivers/nvc0/nvc0_pc_print.c @@ -0,0 +1,377 @@ +/* + * Copyright 2010 Christoph Bumiller + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF + * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "nvc0_pc.h" + +#define PRINT(args...) debug_printf(args) + +#ifndef ARRAY_SIZE +#define ARRAY_SIZE(a) (sizeof(a) / sizeof(a[0])) +#endif + +static const char *norm = "\x1b[00m"; +static const char *gree = "\x1b[32m"; +static const char *blue = "\x1b[34m"; +static const char *cyan = "\x1b[36m"; +static const char *yllw = "\x1b[33m"; +static const char *mgta = "\x1b[35m"; + +static const char *nv_cond_names[] = +{ + "never", "lt" , "eq" , "le" , "gt" , "ne" , "ge" , "", + "never", "ltu", "equ", "leu", "gtu", "neu", "geu", "", + "o", "c", "a", "s" +}; + +static const char *nv_modifier_strings[] = +{ + "", + "neg", + "abs", + "neg abs", + "not", + "not neg" + "not abs", + "not neg abs", + "sat", + "BAD_MOD" +}; + +const char * +nvc0_opcode_name(uint opcode) +{ + return nvc0_op_info_table[MIN2(opcode, NV_OP_COUNT)].name; +} + +static INLINE const char * +nv_type_name(ubyte type, ubyte size) +{ + switch (type) { + case NV_TYPE_U16: return "u16"; + case NV_TYPE_S16: return "s16"; + case NV_TYPE_F32: return "f32"; + case NV_TYPE_U32: return "u32"; + case NV_TYPE_S32: return "s32"; + case NV_TYPE_P32: return "p32"; + case NV_TYPE_F64: return "f64"; + case NV_TYPE_ANY: + { + switch (size) { + case 1: return "b8"; + case 2: return "b16"; + case 4: return "b32"; + case 8: return "b64"; + case 12: return "b96"; + case 16: return "b128"; + default: + return "BAD_SIZE"; + } + } + default: + return "BAD_TYPE"; + } +} + +static INLINE const char * +nv_cond_name(ubyte cc) +{ + return nv_cond_names[MIN2(cc, 19)]; +} + +static INLINE const char * +nv_modifier_string(ubyte mod) +{ + return nv_modifier_strings[MIN2(mod, 9)]; +} + +static INLINE int +nv_value_id(struct nv_value *value) +{ + if (value->join->reg.id >= 0) + return value->join->reg.id; + return value->n; +} + +static INLINE boolean +nv_value_allocated(struct nv_value *value) +{ + return (value->reg.id >= 0) ? TRUE : FALSE; +} + +static INLINE void +nv_print_address(const char c, int buf, struct nv_value *a, int offset) +{ + const char ac = (a && nv_value_allocated(a)) ? '$' : '%'; + char sg; + + if (offset < 0) { + sg = '-'; + offset = -offset; + } else { + sg = '+'; + } + + if (buf >= 0) + PRINT(" %s%c%i[", cyan, c, buf); + else + PRINT(" %s%c[", cyan, c); + if (a) + PRINT("%s%ca%i%s%c", mgta, ac, nv_value_id(a), cyan, sg); + PRINT("%s0x%x%s]", yllw, offset, cyan); +} + +static INLINE void +nv_print_value(struct nv_value *value, struct nv_value *indir, ubyte type) +{ + char reg_pfx = nv_value_allocated(value->join) ? '$' : '%'; + + if (value->reg.file != NV_FILE_PRED) + PRINT(" %s%s", gree, nv_type_name(type, value->reg.size)); + + switch (value->reg.file) { + case NV_FILE_GPR: + PRINT(" %s%cr%i", blue, reg_pfx, nv_value_id(value)); + if (value->reg.size == 8) + PRINT("d"); + if (value->reg.size == 16) + PRINT("q"); + break; + case NV_FILE_PRED: + PRINT(" %s%cp%i", mgta, reg_pfx, nv_value_id(value)); + break; + case NV_FILE_COND: + PRINT(" %s%cc%i", mgta, reg_pfx, nv_value_id(value)); + break; + case NV_FILE_MEM_L: + nv_print_address('l', -1, indir, value->reg.address); + break; + case NV_FILE_MEM_G: + nv_print_address('g', -1, indir, value->reg.address); + break; + case NV_FILE_MEM_A: + nv_print_address('a', -1, indir, value->reg.address); + break; + case NV_FILE_MEM_V: + nv_print_address('v', -1, indir, value->reg.address); + break; + case NV_FILE_IMM: + switch (type) { + case NV_TYPE_U16: + case NV_TYPE_S16: + PRINT(" %s0x%04x", yllw, value->reg.imm.u32); + break; + case NV_TYPE_F32: + PRINT(" %s%f", yllw, value->reg.imm.f32); + break; + case NV_TYPE_F64: + PRINT(" %s%f", yllw, value->reg.imm.f64); + break; + case NV_TYPE_U32: + case NV_TYPE_S32: + case NV_TYPE_P32: + case NV_TYPE_ANY: + PRINT(" %s0x%08x", yllw, value->reg.imm.u32); + break; + } + break; + default: + if (value->reg.file >= NV_FILE_MEM_C(0) && + value->reg.file <= NV_FILE_MEM_C(15)) + nv_print_address('c', value->reg.file - NV_FILE_MEM_C(0), indir, + value->reg.address); + else + NOUVEAU_ERR(" BAD_FILE[%i]", nv_value_id(value)); + break; + } +} + +static INLINE void +nv_print_ref(struct nv_ref *ref, struct nv_value *indir, ubyte type) +{ + nv_print_value(ref->value, indir, type); +} + +void +nvc0_print_instruction(struct nv_instruction *i) +{ + int s; + + PRINT("%i: ", i->serial); + + if (i->predicate >= 0) { + PRINT("%s%s", gree, i->cc ? "fl" : "tr"); + nv_print_ref(i->src[i->predicate], NULL, NV_TYPE_U8); + PRINT(" "); + } + + PRINT("%s", gree); + if (NV_BASEOP(i->opcode) == NV_OP_SET) + PRINT("set %s", nv_cond_name(i->set_cond)); + else + if (i->saturate) + PRINT("sat %s", nvc0_opcode_name(i->opcode)); + else + PRINT("%s", nvc0_opcode_name(i->opcode)); + + if (i->opcode == NV_OP_CVT) + nv_print_value(i->def[0], NULL, i->ext.cvt.d); + else + if (i->def[0]) + nv_print_value(i->def[0], NULL, NV_OPTYPE(i->opcode)); + else + if (i->target) + PRINT(" %s(BB:%i)", yllw, i->target->id); + else + PRINT(" #"); + + for (s = 1; s < 4 && i->def[s]; ++s) + nv_print_value(i->def[s], NULL, NV_OPTYPE(i->opcode)); + if (s > 1) + PRINT("%s ,", norm); + + for (s = 0; s < 6 && i->src[s]; ++s) { + ubyte type; + if (s == i->indirect || s == i->predicate) + continue; + if (i->opcode == NV_OP_CVT) + type = i->ext.cvt.s; + else + type = NV_OPTYPE(i->opcode); + + if (i->src[s]->mod) + PRINT(" %s%s", gree, nv_modifier_string(i->src[s]->mod)); + + if (i->indirect >= 0 && + NV_IS_MEMORY_FILE(i->src[s]->value->reg.file)) + nv_print_ref(i->src[s], i->src[i->indirect]->value, type); + else + nv_print_ref(i->src[s], NULL, type); + } + PRINT(" %s\n", norm); +} + +#define NV_MOD_SGN NV_MOD_ABS | NV_MOD_NEG + +struct nv_op_info nvc0_op_info_table[NV_OP_COUNT + 1] = +{ + { NV_OP_UNDEF, "undef", NV_TYPE_ANY, 0, /* fcvpoi */ 0, 0, 0, 0, 1, 0, 0 }, + { NV_OP_BIND, "bind", NV_TYPE_ANY, 0, /* fcvpoi */ 0, 0, 1, 0, 1, 0, 0 }, + { NV_OP_MERGE, "merge", NV_TYPE_ANY, 0, /* fcvpoi */ 0, 0, 1, 0, 1, 0, 0 }, + { NV_OP_PHI, "phi", NV_TYPE_ANY, 0, /* fcvpoi */ 0, 0, 0, 0, 1, 0, 0 }, + { NV_OP_SELECT, "select", NV_TYPE_ANY, 0, /* fcvpoi */ 0, 0, 0, 0, 1, 0, 0 }, + { NV_OP_NOP, "nop", NV_TYPE_ANY, 0, /* fcvpoi */ 0, 0, 0, 0, 0, 0, 0 }, + + { NV_OP_LD, "ld", NV_TYPE_ANY, 0, 0, 0, 0, 0, 0, 0, 0 }, + { NV_OP_ST, "st", NV_TYPE_ANY, 0, 0, 0, 0, 0, 0, 0, 0 }, + { NV_OP_MOV, "mov", NV_TYPE_ANY, 0, 0, 0, 0, 1, 0, 1, 0 }, + { NV_OP_AND, "and", NV_TYPE_U32, NV_MOD_NOT, 0, 1, 0, 1, 0, 6, 0 }, + { NV_OP_OR, "or", NV_TYPE_U32, NV_MOD_NOT, 0, 1, 0, 1, 0, 6, 0 }, + { NV_OP_XOR, "xor", NV_TYPE_U32, NV_MOD_NOT, 0, 1, 0, 1, 0, 6, 0 }, + { NV_OP_SHL, "shl", NV_TYPE_U32, 0, 0, 0, 0, 1, 0, 1, 0 }, + { NV_OP_SHR, "shr", NV_TYPE_U32, 0, 0, 0, 0, 1, 0, 1, 0 }, + { NV_OP_NOT, "not", NV_TYPE_U32, 0, 0, 0, 0, 1, 0, 0, 0 }, + { NV_OP_SET, "set", NV_TYPE_ANY, NV_MOD_SGN, 0, 0, 0, 1, 0, 0, 0 }, + { NV_OP_ADD, "add", NV_TYPE_F32, NV_MOD_SGN, 0, 1, 0, 1, 0, 2, 2 }, + { NV_OP_SUB, "sub", NV_TYPE_F32, NV_MOD_SGN, 0, 0, 0, 1, 0, 2, 2 }, + { NV_OP_MUL, "mul", NV_TYPE_F32, NV_MOD_SGN, 0, 1, 0, 1, 0, 2, 2 }, + { NV_OP_MAD, "mad", NV_TYPE_F32, NV_MOD_SGN, 0, 1, 0, 1, 0, 2, 2 }, + { NV_OP_ABS, "abs", NV_TYPE_F32, 0, 0, 0, 0, 1, 0, 0, 0 }, + { NV_OP_NEG, "neg", NV_TYPE_F32, NV_MOD_ABS, 0, 0, 0, 1, 0, 0, 0 }, + { NV_OP_MAX, "max", NV_TYPE_F32, NV_MOD_SGN, 0, 1, 0, 1, 0, 2, 2 }, + { NV_OP_MIN, "min", NV_TYPE_F32, NV_MOD_SGN, 0, 1, 0, 1, 0, 2, 2 }, + { NV_OP_CVT, "cvt", NV_TYPE_ANY, NV_MOD_SGN, 0, 0, 0, 1, 0, 0, 0 }, + + { NV_OP_CEIL, "ceil", NV_TYPE_F32, NV_MOD_SGN, 0, 0, 0, 1, 0, 0, 0 }, + { NV_OP_FLOOR, "floor", NV_TYPE_F32, NV_MOD_SGN, 0, 0, 0, 1, 0, 0, 0 }, + { NV_OP_TRUNC, "floor", NV_TYPE_F32, NV_MOD_SGN, 0, 0, 0, 1, 0, 0, 0 }, + + { NV_OP_SAD, "sad", NV_TYPE_S32, 0, 0, 1, 0, 1, 0, 0, 0 }, + + { NV_OP_VFETCH, "vfetch", NV_TYPE_ANY, 0, 0, 0, 1, 1, 0, 0, 0 }, + { NV_OP_PFETCH, "pfetch", NV_TYPE_U32, 0, 0, 0, 0, 1, 0, 0, 0 }, + { NV_OP_EXPORT, "export", NV_TYPE_ANY, 0, 0, 0, 1, 1, 0, 0, 0 }, + { NV_OP_LINTERP, "linterp", NV_TYPE_F32, 0, 0, 0, 0, 1, 0, 0, 0 }, + { NV_OP_PINTERP, "pinterp", NV_TYPE_F32, 0, 0, 0, 0, 1, 0, 0, 0 }, + { NV_OP_EMIT, "emit", NV_TYPE_ANY, 0, 0, 0, 0, 1, 0, 0, 0 }, + { NV_OP_RESTART, "restart", NV_TYPE_ANY, 0, 0, 0, 0, 1, 0, 0, 0 }, + + { NV_OP_TEX, "tex", NV_TYPE_F32, 0, 0, 0, 1, 1, 0, 0, 0 }, + { NV_OP_TXB, "texbias", NV_TYPE_F32, 0, 0, 0, 1, 1, 0, 0, 0 }, + { NV_OP_TXL, "texlod", NV_TYPE_F32, 0, 0, 0, 1, 1, 0, 0, 0 }, + { NV_OP_TXF, "texfetch", NV_TYPE_U32, 0, 0, 0, 1, 1, 0, 0, 0 }, + { NV_OP_TXQ, "texquery", NV_TYPE_U32, 0, 0, 0, 1, 1, 0, 0, 0 }, + + { NV_OP_QUADOP, "quadop", NV_TYPE_F32, 0, 0, 0, 0, 1, 0, 0, 0 }, + { NV_OP_DFDX, "dfdx", NV_TYPE_F32, 0, 0, 0, 0, 1, 0, 0, 0 }, + { NV_OP_DFDY, "dfdy", NV_TYPE_F32, 0, 0, 0, 0, 1, 0, 0, 0 }, + + { NV_OP_KIL, "kil", NV_TYPE_ANY, 0, 0, 0, 0, 1, 0, 0, 0 }, + { NV_OP_BRA, "bra", NV_TYPE_ANY, 0, 1, 0, 0, 1, 0, 0, 0 }, + { NV_OP_CALL, "call", NV_TYPE_ANY, 0, 1, 0, 0, 1, 0, 0, 0 }, + { NV_OP_RET, "ret", NV_TYPE_ANY, 0, 1, 0, 0, 1, 0, 0, 0 }, + { NV_OP_RET, "exit", NV_TYPE_ANY, 0, 1, 0, 0, 1, 0, 0, 0 }, + { NV_OP_NOP, "ud", NV_TYPE_ANY, 0, 1, 0, 0, 1, 0, 0, 0 }, + { NV_OP_NOP, "ud", NV_TYPE_ANY, 0, 1, 0, 0, 1, 0, 0, 0 }, + + { NV_OP_JOINAT, "joinat", NV_TYPE_ANY, 0, 1, 0, 0, 1, 0, 0, 0 }, + { NV_OP_JOIN, "join", NV_TYPE_ANY, 0, 1, 0, 0, 1, 0, 0, 0 }, + + { NV_OP_ADD, "add", NV_TYPE_S32, 0, 0, 1, 0, 1, 0, 1, 0 }, + { NV_OP_MUL, "mul", NV_TYPE_S32, 0, 0, 1, 0, 1, 0, 1, 0 }, + { NV_OP_ABS, "abs", NV_TYPE_S32, 0, 0, 0, 0, 1, 0, 0, 0 }, + { NV_OP_NEG, "neg", NV_TYPE_S32, 0, 0, 0, 0, 1, 0, 0, 0 }, + { NV_OP_MAX, "max", NV_TYPE_S32, 0, 0, 1, 0, 1, 0, 0, 0 }, + { NV_OP_MIN, "max", NV_TYPE_U32, 0, 0, 1, 0, 1, 0, 0, 0 }, + { NV_OP_MAX, "min", NV_TYPE_S32, 0, 0, 1, 0, 1, 0, 0, 0 }, + { NV_OP_MIN, "min", NV_TYPE_U32, 0, 0, 1, 0, 1, 0, 0, 0 }, + { NV_OP_SET, "set", NV_TYPE_F32, NV_MOD_SGN, 0, 0, 0, 1, 0, 0, 2 }, + { NV_OP_SET, "set", NV_TYPE_S32, 0, 0, 0, 0, 1, 0, 0, 0 }, + { NV_OP_SET, "set", NV_TYPE_U32, 0, 0, 0, 0, 1, 0, 0, 0 }, + { NV_OP_SHR, "sar", NV_TYPE_S32, 0, 0, 0, 0, 1, 0, 1, 0 }, + { NV_OP_RCP, "rcp", NV_TYPE_F32, NV_MOD_SGN, 0, 0, 0, 0, 0, 0, 0 }, + { NV_OP_RSQ, "rsqrt", NV_TYPE_F32, NV_MOD_SGN, 0, 0, 0, 0, 0, 0, 0 }, + { NV_OP_LG2, "lg2", NV_TYPE_F32, NV_MOD_SGN, 0, 0, 0, 0, 0, 0, 0 }, + { NV_OP_SIN, "sin", NV_TYPE_F32, 0, 0, 0, 0, 0, 0, 0, 0 }, + { NV_OP_COS, "cos", NV_TYPE_F32, 0, 0, 0, 0, 0, 0, 0, 0 }, + { NV_OP_EX2, "ex2", NV_TYPE_F32, 0, 0, 0, 0, 0, 0, 0, 0 }, + { NV_OP_PRESIN, "presin", NV_TYPE_F32, NV_MOD_SGN, 0, 0, 0, 0, 0, 0, 0 }, + { NV_OP_PREEX2, "preex2", NV_TYPE_F32, NV_MOD_SGN, 0, 0, 0, 0, 0, 0, 0 }, + { NV_OP_SAT, "sat", NV_TYPE_F32, 0, 0, 0, 0, 1, 0, 0, 0 }, + + { NV_OP_SET_F32_AND, "and set", NV_TYPE_F32, 0, 0, 0, 0, 1, 0, 0, 0 }, + { NV_OP_SET_F32_OR, "or set", NV_TYPE_F32, 0, 0, 0, 0, 1, 0, 0, 0 }, + { NV_OP_SET_F32_XOR, "xor set", NV_TYPE_F32, 0, 0, 0, 0, 1, 0, 0, 0 }, + + { NV_OP_SELP, "selp", NV_TYPE_U32, 0, 0, 0, 0, 1, 0, 0, 0 }, + + { NV_OP_SLCT_F32, "slct", NV_TYPE_F32, 0, 0, 0, 0, 1, 0, 0, 0 }, + { NV_OP_SLCT_F32, "slct", NV_TYPE_S32, 0, 0, 0, 0, 1, 0, 0, 0 }, + { NV_OP_SLCT_F32, "slct", NV_TYPE_U32, 0, 0, 0, 0, 1, 0, 0, 0 }, + + { NV_OP_ADD, "sub", NV_TYPE_F32, 0, 0, 0, 0, 1, 0, 1, 0 }, + + { NV_OP_FSET_F32, "fset", NV_TYPE_F32, NV_MOD_SGN, 0, 0, 0, 1, 0, 0, 2 }, + + { NV_OP_TXG, "texgrad", NV_TYPE_F32, 0, 0, 0, 1, 1, 0, 0, 0 }, + + { NV_OP_UNDEF, "BAD_OP", NV_TYPE_ANY, 0, 0, 0, 0, 0, 0, 0, 0 } +}; diff --git a/src/gallium/drivers/nvc0/nvc0_pc_regalloc.c b/src/gallium/drivers/nvc0/nvc0_pc_regalloc.c new file mode 100644 index 00000000000..d24f09a1507 --- /dev/null +++ b/src/gallium/drivers/nvc0/nvc0_pc_regalloc.c @@ -0,0 +1,927 @@ +/* + * Copyright 2010 Christoph Bumiller + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF + * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#define NOUVEAU_DEBUG 1 + +/* #define NVC0_RA_DEBUG_LIVEI */ +/* #define NVC0_RA_DEBUG_LIVE_SETS */ +/* #define NVC0_RA_DEBUG_JOIN */ + +#include "nvc0_pc.h" +#include "util/u_simple_list.h" + +#define NVC0_NUM_REGISTER_FILES 3 + +/* @unit_shift: log2 of min allocation unit for register */ +struct register_set { + uint32_t bits[NVC0_NUM_REGISTER_FILES][2]; + uint32_t last[NVC0_NUM_REGISTER_FILES]; + int log2_unit[NVC0_NUM_REGISTER_FILES]; + struct nv_pc *pc; +}; + +struct nv_pc_pass { + struct nv_pc *pc; + struct nv_instruction **insns; + uint num_insns; + uint pass_seq; +}; + +static void +ranges_coalesce(struct nv_range *range) +{ + while (range->next && range->end >= range->next->bgn) { + struct nv_range *rnn = range->next->next; + assert(range->bgn <= range->next->bgn); + range->end = MAX2(range->end, range->next->end); + FREE(range->next); + range->next = rnn; + } +} + +static boolean +add_range_ex(struct nv_value *val, int bgn, int end, struct nv_range *new_range) +{ + struct nv_range *range, **nextp = &val->livei; + + for (range = val->livei; range; range = range->next) { + if (end < range->bgn) + break; /* insert before */ + + if (bgn > range->end) { + nextp = &range->next; + continue; /* insert after */ + } + + /* overlap */ + if (bgn < range->bgn) { + range->bgn = bgn; + if (end > range->end) + range->end = end; + ranges_coalesce(range); + return TRUE; + } + if (end > range->end) { + range->end = end; + ranges_coalesce(range); + return TRUE; + } + assert(bgn >= range->bgn); + assert(end <= range->end); + return TRUE; + } + + if (!new_range) + new_range = CALLOC_STRUCT(nv_range); + + new_range->bgn = bgn; + new_range->end = end; + new_range->next = range; + *(nextp) = new_range; + return FALSE; +} + +static void +add_range(struct nv_value *val, struct nv_basic_block *b, int end) +{ + int bgn; + + if (!val->insn) /* ignore non-def values */ + return; + assert(b->entry->serial <= b->exit->serial); + assert(b->phi->serial <= end); + assert(b->exit->serial + 1 >= end); + + bgn = val->insn->serial; + if (bgn < b->entry->serial || bgn > b->exit->serial) + bgn = b->entry->serial; + + assert(bgn <= end); + + add_range_ex(val, bgn, end, NULL); +} + +#if defined(NVC0_RA_DEBUG_JOIN) || defined(NVC0_RA_DEBUG_LIVEI) +static void +livei_print(struct nv_value *a) +{ + struct nv_range *r = a->livei; + + debug_printf("livei %i: ", a->n); + while (r) { + debug_printf("[%i, %i) ", r->bgn, r->end); + r = r->next; + } + debug_printf("\n"); +} +#endif + +static void +livei_unify(struct nv_value *dst, struct nv_value *src) +{ + struct nv_range *range, *next; + + for (range = src->livei; range; range = next) { + next = range->next; + if (add_range_ex(dst, range->bgn, range->end, range)) + FREE(range); + } + src->livei = NULL; +} + +static void +livei_release(struct nv_value *val) +{ + struct nv_range *range, *next; + + for (range = val->livei; range; range = next) { + next = range->next; + FREE(range); + } +} + +static boolean +livei_have_overlap(struct nv_value *a, struct nv_value *b) +{ + struct nv_range *r_a, *r_b; + + for (r_a = a->livei; r_a; r_a = r_a->next) { + for (r_b = b->livei; r_b; r_b = r_b->next) { + if (r_b->bgn < r_a->end && + r_b->end > r_a->bgn) + return TRUE; + } + } + return FALSE; +} + +static int +livei_end(struct nv_value *a) +{ + struct nv_range *r = a->livei; + + assert(r); + while (r->next) + r = r->next; + return r->end; +} + +static boolean +livei_contains(struct nv_value *a, int pos) +{ + struct nv_range *r; + + for (r = a->livei; r && r->bgn <= pos; r = r->next) + if (r->end > pos) + return TRUE; + return FALSE; +} + +static boolean +reg_assign(struct register_set *set, struct nv_value **def, int n) +{ + int i, id, s, k; + uint32_t m; + int f = def[0]->reg.file; + + k = n; + if (k == 3) + k = 4; + s = (k * def[0]->reg.size) >> set->log2_unit[f]; + m = (1 << s) - 1; + + id = set->last[f]; + + for (i = 0; i * 32 < set->last[f]; ++i) { + if (set->bits[f][i] == 0xffffffff) + continue; + + for (id = 0; id < 32; id += s) + if (!(set->bits[f][i] & (m << id))) + break; + if (id < 32) + break; + } + if (i * 32 + id > set->last[f]) + return FALSE; + + set->bits[f][i] |= m << id; + + id += i * 32; + + set->pc->max_reg[f] = MAX2(set->pc->max_reg[f], id + s - 1); + + for (i = 0; i < n; ++i) + if (def[i]->livei) + def[i]->reg.id = id++; + + return TRUE; +} + +static INLINE void +reg_occupy(struct register_set *set, struct nv_value *val) +{ + int id = val->reg.id, f = val->reg.file; + uint32_t m; + + if (id < 0) + return; + m = (1 << (val->reg.size >> set->log2_unit[f])) - 1; + + set->bits[f][id / 32] |= m << (id % 32); + + if (set->pc->max_reg[f] < id) + set->pc->max_reg[f] = id; +} + +static INLINE void +reg_release(struct register_set *set, struct nv_value *val) +{ + int id = val->reg.id, f = val->reg.file; + uint32_t m; + + if (id < 0) + return; + m = (1 << (val->reg.size >> set->log2_unit[f])) - 1; + + set->bits[f][id / 32] &= ~(m << (id % 32)); +} + +static INLINE boolean +join_allowed(struct nv_pc_pass *ctx, struct nv_value *a, struct nv_value *b) +{ + int i; + struct nv_value *val; + + if (a->reg.file != b->reg.file || a->reg.size != b->reg.size) + return FALSE; + + if (a->join->reg.id == b->join->reg.id) + return TRUE; + + /* either a or b or both have been assigned */ + + if (a->join->reg.id >= 0 && b->join->reg.id >= 0) + return FALSE; + else + if (b->join->reg.id >= 0) { + if (b->join->reg.id == 63) + return FALSE; + val = a; + a = b; + b = val; + } else + if (a->join->reg.id == 63) + return FALSE; + + for (i = 0; i < ctx->pc->num_values; ++i) { + val = &ctx->pc->values[i]; + + if (val->join->reg.id != a->join->reg.id) + continue; + if (val->join != a->join && livei_have_overlap(val->join, b->join)) + return FALSE; + } + return TRUE; +} + +static INLINE void +do_join_values(struct nv_pc_pass *ctx, struct nv_value *a, struct nv_value *b) +{ + int j; + struct nv_value *bjoin = b->join; + + if (b->join->reg.id >= 0) + a->join->reg.id = b->join->reg.id; + + livei_unify(a->join, b->join); + +#ifdef NVC0_RA_DEBUG_JOIN + debug_printf("joining %i to %i\n", b->n, a->n); +#endif + + /* make a->join the new representative */ + for (j = 0; j < ctx->pc->num_values; ++j) + if (ctx->pc->values[j].join == bjoin) + ctx->pc->values[j].join = a->join; + + assert(b->join == a->join); +} + +static INLINE void +try_join_values(struct nv_pc_pass *ctx, struct nv_value *a, struct nv_value *b) +{ + if (!join_allowed(ctx, a, b)) { +#ifdef NVC0_RA_DEBUG_JOIN + debug_printf("cannot join %i to %i: not allowed\n", b->n, a->n); +#endif + return; + } + if (livei_have_overlap(a->join, b->join)) { +#ifdef NVC0_RA_DEBUG_JOIN + debug_printf("cannot join %i to %i: livei overlap\n", b->n, a->n); + livei_print(a); + livei_print(b); +#endif + return; + } + + do_join_values(ctx, a, b); +} + +static INLINE boolean +need_new_else_block(struct nv_basic_block *b, struct nv_basic_block *p) +{ + int i = 0, n = 0; + + for (; i < 2; ++i) + if (p->out[i] && !IS_LOOP_EDGE(p->out_kind[i])) + ++n; + + return (b->num_in > 1) && (n == 2); +} + +static int +phi_opnd_for_bb(struct nv_instruction *phi, struct nv_basic_block *b, + struct nv_basic_block *tb) +{ + int i, j; + + for (j = -1, i = 0; i < 6 && phi->src[i]; ++i) { + if (!nvc0_bblock_reachable_by(b, phi->src[i]->value->insn->bb, tb)) + continue; + /* NOTE: back-edges are ignored by the reachable-by check */ + if (j < 0 || !nvc0_bblock_reachable_by(phi->src[j]->value->insn->bb, + phi->src[i]->value->insn->bb, tb)) + j = i; + } + return j; +} + +/* For each operand of each PHI in b, generate a new value by inserting a MOV + * at the end of the block it is coming from and replace the operand with its + * result. This eliminates liveness conflicts and enables us to let values be + * copied to the right register if such a conflict exists nonetheless. + * + * These MOVs are also crucial in making sure the live intervals of phi srces + * are extended until the end of the loop, since they are not included in the + * live-in sets. + */ +static int +pass_generate_phi_movs(struct nv_pc_pass *ctx, struct nv_basic_block *b) +{ + struct nv_instruction *i, *ni; + struct nv_value *val; + struct nv_basic_block *p, *pn; + int n, j; + + b->pass_seq = ctx->pc->pass_seq; + + for (n = 0; n < b->num_in; ++n) { + p = pn = b->in[n]; + assert(p); + + if (need_new_else_block(b, p)) { + pn = new_basic_block(ctx->pc); + + if (p->out[0] == b) + p->out[0] = pn; + else + p->out[1] = pn; + + if (p->exit->target == b) /* target to new else-block */ + p->exit->target = pn; + + b->in[n] = pn; + + pn->out[0] = b; + pn->in[0] = p; + pn->num_in = 1; + } + ctx->pc->current_block = pn; + + for (i = b->phi; i && i->opcode == NV_OP_PHI; i = i->next) { + if ((j = phi_opnd_for_bb(i, p, b)) < 0) + continue; + val = i->src[j]->value; + + if (i->src[j]->flags) { + /* value already encountered from a different in-block */ + val = val->insn->src[0]->value; + while (j < 6 && i->src[j]) + ++j; + assert(j < 6); + } + + ni = new_instruction(ctx->pc, NV_OP_MOV); + + /* TODO: insert instruction at correct position in the first place */ + if (ni->prev && ni->prev->target) + nvc0_insns_permute(ni->prev, ni); + + ni->def[0] = new_value_like(ctx->pc, val); + ni->def[0]->insn = ni; + nv_reference(ctx->pc, ni, 0, val); + nv_reference(ctx->pc, i, j, ni->def[0]); /* new phi source = MOV def */ + i->src[j]->flags = 1; + } + + if (pn != p && pn->exit) { + ctx->pc->current_block = b->in[n ? 0 : 1]; + ni = new_instruction(ctx->pc, NV_OP_BRA); + ni->target = b; + ni->terminator = 1; + } + } + + for (j = 0; j < 2; ++j) + if (b->out[j] && b->out[j]->pass_seq < ctx->pc->pass_seq) + pass_generate_phi_movs(ctx, b->out[j]); + + return 0; +} + +static int +pass_join_values(struct nv_pc_pass *ctx, int iter) +{ + int c, n; + + for (n = 0; n < ctx->num_insns; ++n) { + struct nv_instruction *i = ctx->insns[n]; + + switch (i->opcode) { + case NV_OP_PHI: + if (iter != 2) + break; + for (c = 0; c < 6 && i->src[c]; ++c) + try_join_values(ctx, i->def[0], i->src[c]->value); + break; + case NV_OP_MOV: + if ((iter == 2) && i->src[0]->value->insn && + !nv_is_texture_op(i->src[0]->value->join->insn->opcode)) + try_join_values(ctx, i->def[0], i->src[0]->value); + break; + case NV_OP_SELECT: + if (iter != 1) + break; + for (c = 0; c < 6 && i->src[c]; ++c) { + assert(join_allowed(ctx, i->def[0], i->src[c]->value)); + do_join_values(ctx, i->def[0], i->src[c]->value); + } + break; + case NV_OP_TEX: + case NV_OP_TXB: + case NV_OP_TXL: + case NV_OP_TXQ: + /* on nvc0, TEX src and dst can differ */ + break; + case NV_OP_BIND: + if (iter) + break; + for (c = 0; c < 6 && i->src[c]; ++c) + do_join_values(ctx, i->def[c], i->src[c]->value); + break; + default: + break; + } + } + return 0; +} + +/* Order the instructions so that live intervals can be expressed in numbers. */ +static void +pass_order_instructions(void *priv, struct nv_basic_block *b) +{ + struct nv_pc_pass *ctx = (struct nv_pc_pass *)priv; + struct nv_instruction *i; + + b->pass_seq = ctx->pc->pass_seq; + + assert(!b->exit || !b->exit->next); + for (i = b->phi; i; i = i->next) { + i->serial = ctx->num_insns; + ctx->insns[ctx->num_insns++] = i; + } +} + +static void +bb_live_set_print(struct nv_pc *pc, struct nv_basic_block *b) +{ +#ifdef NVC0_RA_DEBUG_LIVE_SETS + struct nv_value *val; + int j; + + debug_printf("LIVE-INs of BB:%i: ", b->id); + + for (j = 0; j < pc->num_values; ++j) { + if (!(b->live_set[j / 32] & (1 << (j % 32)))) + continue; + val = &pc->values[j]; + if (!val->insn) + continue; + debug_printf("%i ", val->n); + } + debug_printf("\n"); +#endif +} + +static INLINE void +live_set_add(struct nv_basic_block *b, struct nv_value *val) +{ + if (!val->insn) /* don't add non-def values */ + return; + b->live_set[val->n / 32] |= 1 << (val->n % 32); +} + +static INLINE void +live_set_rem(struct nv_basic_block *b, struct nv_value *val) +{ + b->live_set[val->n / 32] &= ~(1 << (val->n % 32)); +} + +static INLINE boolean +live_set_test(struct nv_basic_block *b, struct nv_ref *ref) +{ + int n = ref->value->n; + return b->live_set[n / 32] & (1 << (n % 32)); +} + +/* The live set of a block contains those values that are live immediately + * before the beginning of the block, so do a backwards scan. + */ +static int +pass_build_live_sets(struct nv_pc_pass *ctx, struct nv_basic_block *b) +{ + struct nv_instruction *i; + int j, n, ret = 0; + + if (b->pass_seq >= ctx->pc->pass_seq) + return 0; + b->pass_seq = ctx->pc->pass_seq; + + /* slight hack for undecidedness: set phi = entry if it's undefined */ + if (!b->phi) + b->phi = b->entry; + + for (n = 0; n < 2; ++n) { + if (!b->out[n] || b->out[n] == b) + continue; + ret = pass_build_live_sets(ctx, b->out[n]); + if (ret) + return ret; + + if (n == 0) { + for (j = 0; j < (ctx->pc->num_values + 31) / 32; ++j) + b->live_set[j] = b->out[n]->live_set[j]; + } else { + for (j = 0; j < (ctx->pc->num_values + 31) / 32; ++j) + b->live_set[j] |= b->out[n]->live_set[j]; + } + } + + if (!b->entry) + return 0; + + bb_live_set_print(ctx->pc, b); + + for (i = b->exit; i != b->entry->prev; i = i->prev) { + for (j = 0; j < 5 && i->def[j]; j++) + live_set_rem(b, i->def[j]); + for (j = 0; j < 6 && i->src[j]; j++) + live_set_add(b, i->src[j]->value); + } + for (i = b->phi; i && i->opcode == NV_OP_PHI; i = i->next) + live_set_rem(b, i->def[0]); + + bb_live_set_print(ctx->pc, b); + + return 0; +} + +static void collect_live_values(struct nv_basic_block *b, const int n) +{ + int i; + + if (b->out[0]) { + if (b->out[1]) { /* what to do about back-edges ? */ + for (i = 0; i < n; ++i) + b->live_set[i] = b->out[0]->live_set[i] | b->out[1]->live_set[i]; + } else { + memcpy(b->live_set, b->out[0]->live_set, n * sizeof(uint32_t)); + } + } else + if (b->out[1]) { + memcpy(b->live_set, b->out[1]->live_set, n * sizeof(uint32_t)); + } else { + memset(b->live_set, 0, n * sizeof(uint32_t)); + } +} + +/* NOTE: the live intervals of phi functions start at the first non-phi insn. */ +static int +pass_build_intervals(struct nv_pc_pass *ctx, struct nv_basic_block *b) +{ + struct nv_instruction *i, *i_stop; + int j, s; + const int n = (ctx->pc->num_values + 31) / 32; + + /* verify that first block does not have live-in values */ + if (b->num_in == 0) + for (j = 0; j < n; ++j) + assert(b->live_set[j] == 0); + + collect_live_values(b, n); + + /* remove live-outs def'd in a parallel block, hopefully they're all phi'd */ + for (j = 0; j < 2; ++j) { + if (!b->out[j] || !b->out[j]->phi) + continue; + for (i = b->out[j]->phi; i->opcode == NV_OP_PHI; i = i->next) { + live_set_rem(b, i->def[0]); + + for (s = 0; s < 6 && i->src[s]; ++s) { + assert(i->src[s]->value->insn); + if (nvc0_bblock_reachable_by(b, i->src[s]->value->insn->bb, + b->out[j])) + live_set_add(b, i->src[s]->value); + else + live_set_rem(b, i->src[s]->value); + } + } + } + + /* remaining live-outs are live until the end */ + if (b->exit) { + for (j = 0; j < ctx->pc->num_values; ++j) { + if (!(b->live_set[j / 32] & (1 << (j % 32)))) + continue; + add_range(&ctx->pc->values[j], b, b->exit->serial + 1); +#ifdef NVC0_RA_DEBUG_LIVEI + debug_printf("adding range for live value %i: ", j); + livei_print(&ctx->pc->values[j]); +#endif + } + } + + i_stop = b->entry ? b->entry->prev : NULL; + + /* don't have to include phi functions here (will have 0 live range) */ + for (i = b->exit; i != i_stop; i = i->prev) { + assert(i->serial >= b->phi->serial && i->serial <= b->exit->serial); + for (j = 0; j < 4 && i->def[j]; ++j) + live_set_rem(b, i->def[j]); + + for (j = 0; j < 6 && i->src[j]; ++j) { + if (!live_set_test(b, i->src[j])) { + live_set_add(b, i->src[j]->value); + add_range(i->src[j]->value, b, i->serial); +#ifdef NVC0_RA_DEBUG_LIVEI + debug_printf("adding range for source %i (ends living): ", + i->src[j]->value->n); + livei_print(i->src[j]->value); +#endif + } + } + } + + b->pass_seq = ctx->pc->pass_seq; + + if (b->out[0] && b->out[0]->pass_seq < ctx->pc->pass_seq) + pass_build_intervals(ctx, b->out[0]); + + if (b->out[1] && b->out[1]->pass_seq < ctx->pc->pass_seq) + pass_build_intervals(ctx, b->out[1]); + + return 0; +} + +static INLINE void +nvc0_ctor_register_set(struct nv_pc *pc, struct register_set *set) +{ + memset(set, 0, sizeof(*set)); + + set->last[NV_FILE_GPR] = 62; + set->last[NV_FILE_PRED] = 6; + set->last[NV_FILE_COND] = 1; + + set->log2_unit[NV_FILE_GPR] = 2; + set->log2_unit[NV_FILE_COND] = 0; + set->log2_unit[NV_FILE_PRED] = 0; + + set->pc = pc; +} + +static void +insert_ordered_tail(struct nv_value *list, struct nv_value *nval) +{ + struct nv_value *elem; + + for (elem = list->prev; + elem != list && elem->livei->bgn > nval->livei->bgn; + elem = elem->prev); + /* now elem begins before or at the same time as val */ + + nval->prev = elem; + nval->next = elem->next; + elem->next->prev = nval; + elem->next = nval; +} + +static int +pass_linear_scan(struct nv_pc_pass *ctx, int iter) +{ + struct nv_instruction *i; + struct register_set f, free; + int k, n; + struct nv_value *cur, *val, *tmp[2]; + struct nv_value active, inactive, handled, unhandled; + + make_empty_list(&active); + make_empty_list(&inactive); + make_empty_list(&handled); + make_empty_list(&unhandled); + + nvc0_ctor_register_set(ctx->pc, &free); + + /* joined values should have range = NULL and thus not be added; + * also, fixed memory values won't be added because they're not + * def'd, just used + */ + for (n = 0; n < ctx->num_insns; ++n) { + i = ctx->insns[n]; + + for (k = 0; k < 5; ++k) { + if (i->def[k] && i->def[k]->livei) + insert_ordered_tail(&unhandled, i->def[k]); + else + if (0 && i->def[k]) + debug_printf("skipping def'd value %i: no livei\n", i->def[k]->n); + } + } + + for (val = unhandled.next; val != unhandled.prev; val = val->next) { + assert(val->join == val); + assert(val->livei->bgn <= val->next->livei->bgn); + } + + foreach_s(cur, tmp[0], &unhandled) { + remove_from_list(cur); + + foreach_s(val, tmp[1], &active) { + if (livei_end(val) <= cur->livei->bgn) { + reg_release(&free, val); + move_to_head(&handled, val); + } else + if (!livei_contains(val, cur->livei->bgn)) { + reg_release(&free, val); + move_to_head(&inactive, val); + } + } + + foreach_s(val, tmp[1], &inactive) { + if (livei_end(val) <= cur->livei->bgn) + move_to_head(&handled, val); + else + if (livei_contains(val, cur->livei->bgn)) { + reg_occupy(&free, val); + move_to_head(&active, val); + } + } + + f = free; + + foreach(val, &inactive) + if (livei_have_overlap(val, cur)) + reg_occupy(&f, val); + + foreach(val, &unhandled) + if (val->reg.id >= 0 && livei_have_overlap(val, cur)) + reg_occupy(&f, val); + + if (cur->reg.id < 0) { + boolean mem = FALSE; + int v = nvi_vector_size(cur->insn); + + if (v > 1) + mem = !reg_assign(&f, &cur->insn->def[0], v); + else + if (iter) + mem = !reg_assign(&f, &cur, 1); + + if (mem) { + NOUVEAU_ERR("out of registers\n"); + abort(); + } + } + insert_at_head(&active, cur); + reg_occupy(&free, cur); + } + + return 0; +} + +static int +nv_pc_pass1(struct nv_pc *pc, struct nv_basic_block *root) +{ + struct nv_pc_pass *ctx; + int i, ret; + + NOUVEAU_DBG("REGISTER ALLOCATION - entering\n"); + + ctx = CALLOC_STRUCT(nv_pc_pass); + if (!ctx) + return -1; + ctx->pc = pc; + + ctx->insns = CALLOC(NV_PC_MAX_INSTRUCTIONS, sizeof(struct nv_instruction *)); + if (!ctx->insns) { + FREE(ctx); + return -1; + } + + pc->pass_seq++; + ret = pass_generate_phi_movs(ctx, root); + assert(!ret); + + for (i = 0; i < pc->loop_nesting_bound; ++i) { + pc->pass_seq++; + ret = pass_build_live_sets(ctx, root); + assert(!ret && "live sets"); + if (ret) { + NOUVEAU_ERR("failed to build live sets (iteration %d)\n", i); + goto out; + } + } + + pc->pass_seq++; + nvc0_pc_pass_in_order(root, pass_order_instructions, ctx); + + pc->pass_seq++; + ret = pass_build_intervals(ctx, root); + assert(!ret && "build intervals"); + if (ret) { + NOUVEAU_ERR("failed to build live intervals\n"); + goto out; + } + +#ifdef NVC0_RA_DEBUG_LIVEI + for (i = 0; i < pc->num_values; ++i) + livei_print(&pc->values[i]); +#endif + + ret = pass_join_values(ctx, 0); + if (ret) + goto out; + ret = pass_linear_scan(ctx, 0); + if (ret) + goto out; + ret = pass_join_values(ctx, 1); + if (ret) + goto out; + ret = pass_join_values(ctx, 2); + if (ret) + goto out; + ret = pass_linear_scan(ctx, 1); + if (ret) + goto out; + + for (i = 0; i < pc->num_values; ++i) + livei_release(&pc->values[i]); + + NOUVEAU_DBG("REGISTER ALLOCATION - leaving\n"); + +out: + FREE(ctx->insns); + FREE(ctx); + return ret; +} + +int +nvc0_pc_exec_pass1(struct nv_pc *pc) +{ + int i, ret; + + for (i = 0; i < pc->num_subroutines + 1; ++i) + if (pc->root[i] && (ret = nv_pc_pass1(pc, pc->root[i]))) + return ret; + return 0; +} diff --git a/src/gallium/drivers/nvc0/nvc0_program.c b/src/gallium/drivers/nvc0/nvc0_program.c new file mode 100644 index 00000000000..57a0874e679 --- /dev/null +++ b/src/gallium/drivers/nvc0/nvc0_program.c @@ -0,0 +1,678 @@ +/* + * Copyright 2010 Christoph Bumiller + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF + * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "pipe/p_shader_tokens.h" +#include "pipe/p_defines.h" + +#define NOUVEAU_DEBUG + +#include "tgsi/tgsi_parse.h" +#include "tgsi/tgsi_util.h" +#include "tgsi/tgsi_dump.h" + +#include "nvc0_context.h" +#include "nvc0_pc.h" + +static unsigned +nvc0_tgsi_src_mask(const struct tgsi_full_instruction *inst, int c) +{ + unsigned mask = inst->Dst[0].Register.WriteMask; + + switch (inst->Instruction.Opcode) { + case TGSI_OPCODE_COS: + case TGSI_OPCODE_SIN: + return (mask & 0x8) | ((mask & 0x7) ? 0x1 : 0x0); + case TGSI_OPCODE_DP3: + return 0x7; + case TGSI_OPCODE_DP4: + case TGSI_OPCODE_DPH: + case TGSI_OPCODE_KIL: /* WriteMask ignored */ + return 0xf; + case TGSI_OPCODE_DST: + return mask & (c ? 0xa : 0x6); + case TGSI_OPCODE_EX2: + case TGSI_OPCODE_EXP: + case TGSI_OPCODE_LG2: + case TGSI_OPCODE_LOG: + case TGSI_OPCODE_POW: + case TGSI_OPCODE_RCP: + case TGSI_OPCODE_RSQ: + case TGSI_OPCODE_SCS: + return 0x1; + case TGSI_OPCODE_IF: + return 0x1; + case TGSI_OPCODE_LIT: + return 0xb; + case TGSI_OPCODE_TEX: + case TGSI_OPCODE_TXB: + case TGSI_OPCODE_TXL: + case TGSI_OPCODE_TXP: + { + const struct tgsi_instruction_texture *tex; + + assert(inst->Instruction.Texture); + tex = &inst->Texture; + + mask = 0x7; + if (inst->Instruction.Opcode != TGSI_OPCODE_TEX && + inst->Instruction.Opcode != TGSI_OPCODE_TXD) + mask |= 0x8; /* bias, lod or proj */ + + switch (tex->Texture) { + case TGSI_TEXTURE_1D: + mask &= 0x9; + break; + case TGSI_TEXTURE_SHADOW1D: + mask &= 0x5; + break; + case TGSI_TEXTURE_2D: + mask &= 0xb; + break; + default: + break; + } + } + return mask; + case TGSI_OPCODE_XPD: + { + unsigned x = 0; + if (mask & 1) x |= 0x6; + if (mask & 2) x |= 0x5; + if (mask & 4) x |= 0x3; + return x; + } + default: + break; + } + + return mask; +} + +static void +nvc0_indirect_inputs(struct nvc0_translation_info *ti, int id) +{ + int i, c; + + for (i = 0; i < PIPE_MAX_SHADER_INPUTS; ++i) + for (c = 0; c < 4; ++c) + ti->input_access[i][c] = id; + + ti->indirect_inputs = TRUE; +} + +static void +nvc0_indirect_outputs(struct nvc0_translation_info *ti, int id) +{ + int i, c; + + for (i = 0; i < PIPE_MAX_SHADER_OUTPUTS; ++i) + for (c = 0; c < 4; ++c) + ti->output_access[i][c] = id; + + ti->indirect_outputs = TRUE; +} + +static INLINE unsigned +nvc0_system_value_location(unsigned sn, unsigned si) +{ + /* NOTE: locations 0xfxx indicate special regs */ + switch (sn) { + /* + case TGSI_SEMANTIC_VERTEXID: + return 0x2fc; + */ + case TGSI_SEMANTIC_PRIMID: + return 0x60; + /* + case TGSI_SEMANTIC_LAYER_INDEX: + return 0x64; + case TGSI_SEMANTIC_VIEWPORT_INDEX: + return 0x68; + */ + case TGSI_SEMANTIC_INSTANCEID: + return 0x2f8; + case TGSI_SEMANTIC_FACE: + return 0x3fc; + /* + case TGSI_SEMANTIC_INVOCATIONID: + return 0xf11; + */ + default: + assert(0); + return 0x000; + } +} + +static INLINE unsigned +nvc0_varying_location(unsigned sn, unsigned si) +{ + switch (sn) { + case TGSI_SEMANTIC_POSITION: + return 0x70; + case TGSI_SEMANTIC_COLOR: + return 0x280 + (si * 16); /* are these hard-wired ? */ + case TGSI_SEMANTIC_BCOLOR: + return 0x2a0 + (si * 16); + case TGSI_SEMANTIC_FOG: + return 0x270; + case TGSI_SEMANTIC_PSIZE: + return 0x6c; + /* + case TGSI_SEMANTIC_PNTC: + return 0x2e0; + */ + case TGSI_SEMANTIC_GENERIC: + assert(si < 31); + return 0x80 + (si * 16); + case TGSI_SEMANTIC_NORMAL: + return 0x360; + case TGSI_SEMANTIC_PRIMID: + return 0x40; + case TGSI_SEMANTIC_FACE: + return 0x3fc; + /* + case TGSI_SEMANTIC_CLIP_DISTANCE: + return 0x2c0 + (si * 4); + */ + default: + assert(0); + return 0x000; + } +} + +static INLINE unsigned +nvc0_interp_mode(const struct tgsi_full_declaration *decl) +{ + unsigned mode; + + if (decl->Declaration.Interpolate == TGSI_INTERPOLATE_CONSTANT) + mode = NVC0_INTERP_FLAT; + else + if (decl->Declaration.Interpolate == TGSI_INTERPOLATE_PERSPECTIVE) + mode = NVC0_INTERP_PERSPECTIVE; + else + mode = NVC0_INTERP_LINEAR; + + if (decl->Declaration.Centroid) + mode |= NVC0_INTERP_CENTROID; + + return mode; +} + +static void +prog_immediate(struct nvc0_translation_info *ti, + const struct tgsi_full_immediate *imm) +{ + int c; + unsigned n = ti->immd32_nr++; + + assert(ti->immd32_nr <= ti->scan.immediate_count); + + for (c = 0; c < 4; ++c) + ti->immd32[n * 4 + c] = imm->u[c].Uint; + + ti->immd32_ty[n] = imm->Immediate.DataType; +} + +static boolean +prog_decl(struct nvc0_translation_info *ti, + const struct tgsi_full_declaration *decl) +{ + unsigned i, c; + unsigned sn = TGSI_SEMANTIC_GENERIC; + unsigned si = 0; + const unsigned first = decl->Range.First; + const unsigned last = decl->Range.Last; + + if (decl->Declaration.Semantic) { + sn = decl->Semantic.Name; + si = decl->Semantic.Index; + } + + switch (decl->Declaration.File) { + case TGSI_FILE_INPUT: + for (i = first; i <= last; ++i) { + if (ti->prog->type == PIPE_SHADER_VERTEX) { + sn = TGSI_SEMANTIC_GENERIC; + si = i; + } + for (c = 0; c < 4; ++c) + ti->input_loc[i][c] = nvc0_varying_location(sn, si) + c * 4; + + if (ti->prog->type == PIPE_SHADER_FRAGMENT) + ti->interp_mode[i] = nvc0_interp_mode(decl); + } + break; + case TGSI_FILE_OUTPUT: + for (i = first; i <= last; ++i, ++si) { + if (ti->prog->type == PIPE_SHADER_FRAGMENT) { + si = i; + if (i == ti->fp_depth_output) { + ti->output_loc[i][2] = (ti->scan.num_outputs - 1) * 4; + } else { + if (i > ti->fp_depth_output) + si -= 1; + for (c = 0; c < 4; ++c) + ti->output_loc[i][c] = si * 4 + c; + } + } else { + for (c = 0; c < 4; ++c) + ti->output_loc[i][c] = nvc0_varying_location(sn, si) + c * 4; + } + } + break; + case TGSI_FILE_SYSTEM_VALUE: + ti->sysval_loc[i] = nvc0_system_value_location(sn, si); + assert(first == last); + break; + case TGSI_FILE_NULL: + case TGSI_FILE_CONSTANT: + case TGSI_FILE_TEMPORARY: + case TGSI_FILE_SAMPLER: + case TGSI_FILE_ADDRESS: + case TGSI_FILE_IMMEDIATE: + case TGSI_FILE_PREDICATE: + break; + default: + NOUVEAU_ERR("unhandled TGSI_FILE %d\n", decl->Declaration.File); + return FALSE; + } + return TRUE; +} + +static void +prog_inst(struct nvc0_translation_info *ti, + const struct tgsi_full_instruction *inst, int id) +{ + const struct tgsi_dst_register *dst; + const struct tgsi_src_register *src; + int s, c, k; + unsigned mask; + + if (inst->Instruction.Opcode == TGSI_OPCODE_BGNSUB) { + ti->subr[ti->num_subrs].first_insn = id - 1; + ti->subr[ti->num_subrs].id = ti->num_subrs + 1; /* id 0 is main program */ + ++ti->num_subrs; + } + + if (inst->Dst[0].Register.File == TGSI_FILE_OUTPUT) { + dst = &inst->Dst[0].Register; + + for (c = 0; c < 4; ++c) { + if (dst->Indirect) + nvc0_indirect_outputs(ti, id); + if (!(dst->WriteMask & (1 << c))) + continue; + ti->output_access[dst->Index][c] = id; + } + + if (inst->Instruction.Opcode == TGSI_OPCODE_MOV && + inst->Src[0].Register.File == TGSI_FILE_INPUT && + dst->Index == ti->edgeflag_out) + ti->prog->vp.edgeflag = inst->Src[0].Register.Index; + } else + if (inst->Dst[0].Register.File == TGSI_FILE_TEMPORARY) { + if (inst->Dst[0].Register.Indirect) + ti->require_stores = TRUE; + } + + for (s = 0; s < inst->Instruction.NumSrcRegs; ++s) { + src = &inst->Src[s].Register; + if (src->File == TGSI_FILE_TEMPORARY) + if (inst->Src[s].Register.Indirect) + ti->require_stores = TRUE; + if (src->File != TGSI_FILE_INPUT) + continue; + mask = nvc0_tgsi_src_mask(inst, s); + + if (inst->Src[s].Register.Indirect) + nvc0_indirect_inputs(ti, id); + + for (c = 0; c < 4; ++c) { + if (!(mask & (1 << c))) + continue; + k = tgsi_util_get_full_src_register_swizzle(&inst->Src[s], c); + if (k <= TGSI_SWIZZLE_W) + ti->input_access[src->Index][k] = id; + } + } +} + +/* Probably should introduce something like struct tgsi_function_declaration + * instead of trying to guess inputs/outputs. + */ +static void +prog_subroutine_inst(struct nvc0_subroutine *subr, + const struct tgsi_full_instruction *inst) +{ + const struct tgsi_dst_register *dst; + const struct tgsi_src_register *src; + int s, c, k; + unsigned mask; + + for (s = 0; s < inst->Instruction.NumSrcRegs; ++s) { + src = &inst->Src[s].Register; + if (src->File != TGSI_FILE_TEMPORARY) + continue; + mask = nvc0_tgsi_src_mask(inst, s); + + for (c = 0; c < 4; ++c) { + k = tgsi_util_get_full_src_register_swizzle(&inst->Src[s], c); + + if ((mask & (1 << c)) && k < TGSI_SWIZZLE_W) + if (!(subr->retv[src->Index / 32][k] & (1 << (src->Index % 32)))) + subr->argv[src->Index / 32][k] |= 1 << (src->Index % 32); + } + } + + if (inst->Dst[0].Register.File == TGSI_FILE_TEMPORARY) { + dst = &inst->Dst[0].Register; + + for (c = 0; c < 4; ++c) + if (dst->WriteMask & (1 << c)) + subr->retv[dst->Index / 32][c] |= 1 << (dst->Index % 32); + } +} + +static int +nvc0_vp_gp_gen_header(struct nvc0_program *vp, struct nvc0_translation_info *ti) +{ + int i, c; + unsigned a; + + for (a = 0x80/4, i = 0; i <= ti->scan.file_max[TGSI_FILE_INPUT]; ++i) { + for (c = 0; c < 4; ++c, ++a) + if (ti->input_access[i][c]) + vp->hdr[5 + a / 32] |= 1 << (a % 32); /* VP_ATTR_EN */ + } + + for (i = 0; i <= ti->scan.file_max[TGSI_FILE_OUTPUT]; ++i) { + a = (ti->output_loc[i][0] - 0x40) / 4; + for (c = 0; c < 4; ++c, ++a) { + if (!ti->output_access[i][c]) + continue; + vp->hdr[13 + a / 32] |= 1 << (a % 32); /* VP_EXPORT_EN */ + } + } + + return 0; +} + +static int +nvc0_vp_gen_header(struct nvc0_program *vp, struct nvc0_translation_info *ti) +{ + vp->hdr[0] = 0x20461; + vp->hdr[4] = 0xff000; + + vp->hdr[18] = (1 << vp->vp.num_ucps) - 1; + + return nvc0_vp_gp_gen_header(vp, ti); +} + +static int +nvc0_gp_gen_header(struct nvc0_program *gp, struct nvc0_translation_info *ti) +{ + unsigned invocations = 1; + unsigned max_output_verts, output_prim; + unsigned i; + + gp->hdr[0] = 0x21061; + + for (i = 0; i < ti->scan.num_properties; ++i) { + switch (ti->scan.properties[i].name) { + case TGSI_PROPERTY_GS_OUTPUT_PRIM: + output_prim = ti->scan.properties[i].data[0]; + break; + case TGSI_PROPERTY_GS_MAX_OUTPUT_VERTICES: + max_output_verts = ti->scan.properties[i].data[0]; + assert(max_output_verts < 512); + break; + /* + case TGSI_PROPERTY_GS_INVOCATIONS: + invocations = ti->scan.properties[i].data[0]; + assert(invocations <= 32); + break; + */ + default: + break; + } + } + + gp->hdr[2] = MIN2(invocations, 32) << 24; + + switch (output_prim) { + case PIPE_PRIM_POINTS: + gp->hdr[3] = 0x01000000; + gp->hdr[0] |= 0xf0000000; + break; + case PIPE_PRIM_LINE_STRIP: + gp->hdr[3] = 0x06000000; + gp->hdr[0] |= 0x10000000; + break; + case PIPE_PRIM_TRIANGLE_STRIP: + gp->hdr[3] = 0x07000000; + gp->hdr[0] |= 0x10000000; + break; + default: + assert(0); + break; + } + + gp->hdr[4] = max_output_verts & 0x1ff; + + return nvc0_vp_gp_gen_header(gp, ti); +} + +static int +nvc0_fp_gen_header(struct nvc0_program *fp, struct nvc0_translation_info *ti) +{ + int i, c; + unsigned a, m; + + fp->hdr[0] = 0x21462; + fp->hdr[5] = 0x80000000; /* getting a trap if FRAG_COORD_UMASK.w = 0 */ + + if (ti->scan.uses_kill) + fp->hdr[0] |= 0x8000; + if (ti->scan.writes_z) { + fp->hdr[19] |= 0x2; + if (ti->scan.num_outputs > 2) + fp->hdr[0] |= 0x4000; /* FP_MULTIPLE_COLOR_OUTPUTS */ + } else { + if (ti->scan.num_outputs > 1) + fp->hdr[0] |= 0x4000; /* FP_MULTIPLE_COLOR_OUTPUTS */ + } + + for (i = 0; i <= ti->scan.file_max[TGSI_FILE_INPUT]; ++i) { + m = ti->interp_mode[i]; + for (c = 0; c < 4; ++c) { + if (!ti->input_access[i][c]) + continue; + a = ti->input_loc[i][c] / 2; + if ((a & ~7) == 0x70/2) + fp->hdr[5] |= 1 << (28 + (a & 7) / 2); /* FRAG_COORD_UMASK */ + else + fp->hdr[4 + a / 32] |= m << (a % 32); + } + } + + for (i = 0; i <= ti->scan.file_max[TGSI_FILE_OUTPUT]; ++i) { + if (i != ti->fp_depth_output) + fp->hdr[18] |= 0xf << ti->output_loc[i][0]; + } + + return 0; +} + +static boolean +nvc0_prog_scan(struct nvc0_translation_info *ti) +{ + struct nvc0_program *prog = ti->prog; + struct tgsi_parse_context parse; + int ret; + unsigned i; + +#ifdef NOUVEAU_DEBUG + tgsi_dump(prog->pipe.tokens, 0); +#endif + + tgsi_scan_shader(prog->pipe.tokens, &ti->scan); + + if (ti->prog->type == PIPE_SHADER_FRAGMENT) { + ti->fp_depth_output = 255; + for (i = 0; i < ti->scan.num_outputs; ++i) + if (ti->scan.output_semantic_name[i] == TGSI_SEMANTIC_POSITION) + ti->fp_depth_output = i; + } + + ti->subr = + CALLOC(ti->scan.opcode_count[TGSI_OPCODE_BGNSUB], sizeof(ti->subr[0])); + + ti->immd32 = (uint32_t *)MALLOC(ti->scan.immediate_count * 16); + ti->immd32_ty = (ubyte *)MALLOC(ti->scan.immediate_count * sizeof(ubyte)); + + ti->insns = MALLOC(ti->scan.num_instructions * sizeof(ti->insns[0])); + + tgsi_parse_init(&parse, prog->pipe.tokens); + while (!tgsi_parse_end_of_tokens(&parse)) { + tgsi_parse_token(&parse); + + switch (parse.FullToken.Token.Type) { + case TGSI_TOKEN_TYPE_IMMEDIATE: + prog_immediate(ti, &parse.FullToken.FullImmediate); + break; + case TGSI_TOKEN_TYPE_DECLARATION: + prog_decl(ti, &parse.FullToken.FullDeclaration); + break; + case TGSI_TOKEN_TYPE_INSTRUCTION: + ti->insns[ti->num_insns] = parse.FullToken.FullInstruction; + prog_inst(ti, &parse.FullToken.FullInstruction, ++ti->num_insns); + break; + default: + break; + } + } + + for (i = 0; i < ti->num_subrs; ++i) { + unsigned pc = ti->subr[i].id; + while (ti->insns[pc].Instruction.Opcode != TGSI_OPCODE_ENDSUB) + prog_subroutine_inst(&ti->subr[i], &ti->insns[pc++]); + } + + switch (prog->type) { + case PIPE_SHADER_VERTEX: + ti->input_file = NV_FILE_MEM_A; + ti->output_file = NV_FILE_MEM_V; + ret = nvc0_vp_gen_header(prog, ti); + break; + /* + case PIPE_SHADER_TESSELLATION_CONTROL: + ret = nvc0_tcp_gen_header(ti); + break; + case PIPE_SHADER_TESSELLATION_EVALUATION: + ret = nvc0_tep_gen_header(ti); + break; + case PIPE_SHADER_GEOMETRY: + ret = nvc0_gp_gen_header(ti); + break; + */ + case PIPE_SHADER_FRAGMENT: + ti->input_file = NV_FILE_MEM_V; + ti->output_file = NV_FILE_GPR; + + if (ti->scan.writes_z) + prog->flags[0] = 0x11; /* ? */ + else + if (!ti->global_stores) + prog->fp.early_z = 1; + + ret = nvc0_fp_gen_header(prog, ti); + break; + default: + assert(!"unsupported program type"); + ret = -1; + break; + } + + assert(!ret); + return ret; +} + +boolean +nvc0_program_translate(struct nvc0_program *prog) +{ + struct nvc0_translation_info *ti; + int ret; + + ti = CALLOC_STRUCT(nvc0_translation_info); + ti->prog = prog; + + ti->edgeflag_out = PIPE_MAX_SHADER_OUTPUTS; + + if (prog->type == PIPE_SHADER_VERTEX && prog->vp.num_ucps) + ti->append_ucp = TRUE; + + ret = nvc0_prog_scan(ti); + if (ret) { + NOUVEAU_ERR("unsupported shader program\n"); + goto out; + } + + ret = nvc0_generate_code(ti); + if (ret) + NOUVEAU_ERR("shader translation failed\n"); + + { + unsigned i; + for (i = 0; i < sizeof(prog->hdr) / sizeof(prog->hdr[0]); ++i) + debug_printf("HDR[%02lx] = 0x%08x\n", + i * sizeof(prog->hdr[0]), prog->hdr[i]); + } + +out: + if (ti->immd32) + FREE(ti->immd32); + if (ti->immd32_ty) + FREE(ti->immd32_ty); + if (ti->insns) + FREE(ti->insns); + if (ti->subr) + FREE(ti->subr); + FREE(ti); + return ret ? FALSE : TRUE; +} + +void +nvc0_program_destroy(struct nvc0_context *nvc0, struct nvc0_program *prog) +{ + if (prog->res) + nouveau_resource_free(&prog->res); + + if (prog->code) + FREE(prog->code); + if (prog->relocs) + FREE(prog->relocs); + + memset(prog->hdr, 0, sizeof(prog->hdr)); + + prog->translated = FALSE; +} diff --git a/src/gallium/drivers/nvc0/nvc0_program.h b/src/gallium/drivers/nvc0/nvc0_program.h new file mode 100644 index 00000000000..2e84caecc9e --- /dev/null +++ b/src/gallium/drivers/nvc0/nvc0_program.h @@ -0,0 +1,88 @@ + +#ifndef __NVC0_PROGRAM_H__ +#define __NVC0_PROGRAM_H__ + +#include "pipe/p_state.h" +#include "tgsi/tgsi_scan.h" + +#define NVC0_CAP_MAX_PROGRAM_TEMPS 64 + +#define NVC0_SHADER_HEADER_SIZE (20 * 4) + +struct nvc0_program { + struct pipe_shader_state pipe; + + ubyte type; + boolean translated; + ubyte max_gpr; + + uint32_t *code; + unsigned code_base; + unsigned code_size; + unsigned parm_size; + + uint32_t hdr[20]; + + uint32_t flags[2]; + + struct { + uint8_t edgeflag; + uint8_t num_ucps; + } vp; + struct { + uint8_t early_z; + } fp; + + void *relocs; + unsigned num_relocs; + + struct nouveau_resource *res; +}; + +/* first 2 bits are written into the program header, for each input */ +#define NVC0_INTERP_FLAT (1 << 0) +#define NVC0_INTERP_PERSPECTIVE (2 << 0) +#define NVC0_INTERP_LINEAR (3 << 0) +#define NVC0_INTERP_CENTROID (1 << 2) + +/* analyze TGSI and see which TEMP[] are used as subroutine inputs/outputs */ +struct nvc0_subroutine { + unsigned id; + unsigned first_insn; + uint32_t argv[NVC0_CAP_MAX_PROGRAM_TEMPS][4]; + uint32_t retv[NVC0_CAP_MAX_PROGRAM_TEMPS][4]; +}; + +struct nvc0_translation_info { + struct nvc0_program *prog; + struct tgsi_full_instruction *insns; + unsigned num_insns; + ubyte input_file; + ubyte output_file; + ubyte fp_depth_output; + uint16_t input_loc[PIPE_MAX_SHADER_INPUTS][4]; + uint16_t output_loc[PIPE_MAX_SHADER_OUTPUTS][4]; + uint16_t sysval_loc[TGSI_SEMANTIC_COUNT]; + int input_access[PIPE_MAX_SHADER_INPUTS][4]; + int output_access[PIPE_MAX_SHADER_OUTPUTS][4]; + ubyte interp_mode[PIPE_MAX_SHADER_INPUTS]; + boolean indirect_inputs; + boolean indirect_outputs; + boolean require_stores; + boolean global_stores; + uint32_t *immd32; + ubyte *immd32_ty; + unsigned immd32_nr; + ubyte edgeflag_out; + struct nvc0_subroutine *subr; + unsigned num_subrs; + boolean append_ucp; + struct tgsi_shader_info scan; +}; + +int nvc0_generate_code(struct nvc0_translation_info *); + +void nvc0_relocate_program(struct nvc0_program *, + uint32_t code_base, uint32_t data_base); + +#endif diff --git a/src/gallium/drivers/nvc0/nvc0_push.c b/src/gallium/drivers/nvc0/nvc0_push.c new file mode 100644 index 00000000000..941be678586 --- /dev/null +++ b/src/gallium/drivers/nvc0/nvc0_push.c @@ -0,0 +1,279 @@ + +#include "pipe/p_context.h" +#include "pipe/p_state.h" +#include "util/u_inlines.h" +#include "util/u_format.h" +#include "translate/translate.h" + +#include "nvc0_context.h" +#include "nvc0_resource.h" + +#include "nvc0_3d.xml.h" + +struct push_context { + struct nouveau_channel *chan; + + void *idxbuf; + + float edgeflag; + int edgeflag_attr; + + uint32_t vertex_words; + uint32_t packet_vertex_limit; + + struct translate *translate; + + boolean primitive_restart; + uint32_t prim; + uint32_t restart_index; +}; + +static INLINE unsigned +prim_restart_search_i08(uint8_t *elts, unsigned push, uint8_t index) +{ + unsigned i; + for (i = 0; i < push; ++i) + if (elts[i] == index) + break; + return i; +} + +static INLINE unsigned +prim_restart_search_i16(uint16_t *elts, unsigned push, uint16_t index) +{ + unsigned i; + for (i = 0; i < push; ++i) + if (elts[i] == index) + break; + return i; +} + +static INLINE unsigned +prim_restart_search_i32(uint32_t *elts, unsigned push, uint32_t index) +{ + unsigned i; + for (i = 0; i < push; ++i) + if (elts[i] == index) + break; + return i; +} + +static void +emit_vertices_i08(struct push_context *ctx, unsigned start, unsigned count) +{ + uint8_t *elts = (uint8_t *)ctx->idxbuf + start; + + while (count) { + unsigned push = MIN2(count, ctx->packet_vertex_limit); + unsigned size, nr; + + nr = push; + if (ctx->primitive_restart) + nr = prim_restart_search_i08(elts, push, ctx->restart_index); + + size = ctx->vertex_words * nr; + + BEGIN_RING_NI(ctx->chan, RING_3D(VERTEX_DATA), size); + + ctx->translate->run_elts8(ctx->translate, elts, nr, 0, ctx->chan->cur); + + ctx->chan->cur += size; + count -= nr; + elts += nr; + + if (nr != push) { + count--; + elts++; + BEGIN_RING(ctx->chan, RING_3D(VERTEX_END_GL), 2); + OUT_RING (ctx->chan, 0); + OUT_RING (ctx->chan, ctx->prim); + } + } +} + +static void +emit_vertices_i16(struct push_context *ctx, unsigned start, unsigned count) +{ + uint16_t *elts = (uint16_t *)ctx->idxbuf + start; + + while (count) { + unsigned push = MIN2(count, ctx->packet_vertex_limit); + unsigned size, nr; + + nr = push; + if (ctx->primitive_restart) + nr = prim_restart_search_i16(elts, push, ctx->restart_index); + + size = ctx->vertex_words * nr; + + BEGIN_RING_NI(ctx->chan, RING_3D(VERTEX_DATA), size); + + ctx->translate->run_elts16(ctx->translate, elts, nr, 0, ctx->chan->cur); + + ctx->chan->cur += size; + count -= nr; + elts += nr; + + if (nr != push) { + count--; + elts++; + BEGIN_RING(ctx->chan, RING_3D(VERTEX_END_GL), 2); + OUT_RING (ctx->chan, 0); + OUT_RING (ctx->chan, ctx->prim); + } + } +} + +static void +emit_vertices_i32(struct push_context *ctx, unsigned start, unsigned count) +{ + uint32_t *elts = (uint32_t *)ctx->idxbuf + start; + + while (count) { + unsigned push = MIN2(count, ctx->packet_vertex_limit); + unsigned size, nr; + + nr = push; + if (ctx->primitive_restart) + nr = prim_restart_search_i32(elts, push, ctx->restart_index); + + size = ctx->vertex_words * nr; + + BEGIN_RING_NI(ctx->chan, RING_3D(VERTEX_DATA), size); + + ctx->translate->run_elts(ctx->translate, elts, nr, 0, ctx->chan->cur); + + ctx->chan->cur += size; + count -= nr; + elts += nr; + + if (nr != push) { + count--; + elts++; + BEGIN_RING(ctx->chan, RING_3D(VERTEX_END_GL), 2); + OUT_RING (ctx->chan, 0); + OUT_RING (ctx->chan, ctx->prim); + } + } +} + +static void +emit_vertices_seq(struct push_context *ctx, unsigned start, unsigned count) +{ + while (count) { + unsigned push = MIN2(count, ctx->packet_vertex_limit); + unsigned size = ctx->vertex_words * push; + + BEGIN_RING_NI(ctx->chan, RING_3D(VERTEX_DATA), size); + + ctx->translate->run(ctx->translate, start, push, 0, ctx->chan->cur); + ctx->chan->cur += size; + count -= push; + start += push; + } +} + + +#define NVC0_PRIM_GL_CASE(n) \ + case PIPE_PRIM_##n: return NVC0_3D_VERTEX_BEGIN_GL_PRIMITIVE_##n + +static INLINE unsigned +nvc0_prim_gl(unsigned prim) +{ + switch (prim) { + NVC0_PRIM_GL_CASE(POINTS); + NVC0_PRIM_GL_CASE(LINES); + NVC0_PRIM_GL_CASE(LINE_LOOP); + NVC0_PRIM_GL_CASE(LINE_STRIP); + NVC0_PRIM_GL_CASE(TRIANGLES); + NVC0_PRIM_GL_CASE(TRIANGLE_STRIP); + NVC0_PRIM_GL_CASE(TRIANGLE_FAN); + NVC0_PRIM_GL_CASE(QUADS); + NVC0_PRIM_GL_CASE(QUAD_STRIP); + NVC0_PRIM_GL_CASE(POLYGON); + NVC0_PRIM_GL_CASE(LINES_ADJACENCY); + NVC0_PRIM_GL_CASE(LINE_STRIP_ADJACENCY); + NVC0_PRIM_GL_CASE(TRIANGLES_ADJACENCY); + NVC0_PRIM_GL_CASE(TRIANGLE_STRIP_ADJACENCY); + /* + NVC0_PRIM_GL_CASE(PATCHES); */ + default: + return NVC0_3D_VERTEX_BEGIN_GL_PRIMITIVE_POINTS; + break; + } +} + +void +nvc0_push_vbo(struct nvc0_context *nvc0, const struct pipe_draw_info *info) +{ + struct push_context ctx; + unsigned i, index_size; + unsigned inst = info->instance_count; + + ctx.chan = nvc0->screen->base.channel; + ctx.translate = nvc0->vertex->translate; + ctx.packet_vertex_limit = nvc0->vertex->vtx_per_packet_max; + ctx.vertex_words = nvc0->vertex->vtx_size; + + for (i = 0; i < nvc0->num_vtxbufs; ++i) { + uint8_t *data; + struct pipe_vertex_buffer *vb = &nvc0->vtxbuf[i]; + struct nvc0_resource *res = nvc0_resource(vb->buffer); + + data = nvc0_resource_map_offset(nvc0, res, + vb->buffer_offset, NOUVEAU_BO_RD); + if (info->indexed) + data += info->index_bias * vb->stride; + + ctx.translate->set_buffer(ctx.translate, i, data, vb->stride, ~0); + } + + if (info->indexed) { + ctx.idxbuf = nvc0_resource_map_offset(nvc0, + nvc0_resource(nvc0->idxbuf.buffer), + nvc0->idxbuf.offset, NOUVEAU_BO_RD); + if (!ctx.idxbuf) + return; + index_size = nvc0->idxbuf.index_size; + ctx.primitive_restart = info->primitive_restart; + ctx.restart_index = info->restart_index; + } else { + ctx.idxbuf = NULL; + index_size = 0; + ctx.primitive_restart = FALSE; + ctx.restart_index = 0; + } + + ctx.prim = nvc0_prim_gl(info->mode); + + while (inst--) { + BEGIN_RING(ctx.chan, RING_3D(VERTEX_BEGIN_GL), 1); + OUT_RING (ctx.chan, ctx.prim); + switch (index_size) { + case 0: + emit_vertices_seq(&ctx, info->start, info->count); + break; + case 1: + emit_vertices_i08(&ctx, info->start, info->count); + break; + case 2: + emit_vertices_i16(&ctx, info->start, info->count); + break; + case 4: + emit_vertices_i32(&ctx, info->start, info->count); + break; + default: + assert(0); + break; + } + IMMED_RING(ctx.chan, RING_3D(VERTEX_END_GL), 0); + + ctx.prim |= NVC0_3D_VERTEX_BEGIN_GL_INSTANCE_NEXT; + } + + if (info->indexed) + nvc0_resource_unmap(nvc0_resource(nvc0->idxbuf.buffer)); + + for (i = 0; i < nvc0->num_vtxbufs; ++i) + nvc0_resource_unmap(nvc0_resource(nvc0->vtxbuf[i].buffer)); +} diff --git a/src/gallium/drivers/nvc0/nvc0_push2.c b/src/gallium/drivers/nvc0/nvc0_push2.c new file mode 100644 index 00000000000..6f51600558a --- /dev/null +++ b/src/gallium/drivers/nvc0/nvc0_push2.c @@ -0,0 +1,333 @@ + +#if 0 /* not used, kept for now to compare with util/translate */ + +#include "pipe/p_context.h" +#include "pipe/p_state.h" +#include "util/u_inlines.h" +#include "util/u_format.h" +#include "translate/translate.h" + +#include "nvc0_context.h" +#include "nvc0_resource.h" + +#include "nvc0_3d.xml.h" + +struct push_context { + struct nvc0_context *nvc0; + + uint vertex_size; + + void *idxbuf; + uint idxsize; + + float edgeflag; + int edgeflag_input; + + struct { + void *map; + void (*push)(struct nouveau_channel *, void *); + uint32_t stride; + uint32_t divisor; + uint32_t step; + } attr[32]; + int num_attrs; +}; + +static void +emit_b32_1(struct nouveau_channel *chan, void *data) +{ + uint32_t *v = data; + + OUT_RING(chan, v[0]); +} + +static void +emit_b32_2(struct nouveau_channel *chan, void *data) +{ + uint32_t *v = data; + + OUT_RING(chan, v[0]); + OUT_RING(chan, v[1]); +} + +static void +emit_b32_3(struct nouveau_channel *chan, void *data) +{ + uint32_t *v = data; + + OUT_RING(chan, v[0]); + OUT_RING(chan, v[1]); + OUT_RING(chan, v[2]); +} + +static void +emit_b32_4(struct nouveau_channel *chan, void *data) +{ + uint32_t *v = data; + + OUT_RING(chan, v[0]); + OUT_RING(chan, v[1]); + OUT_RING(chan, v[2]); + OUT_RING(chan, v[3]); +} + +static void +emit_b16_1(struct nouveau_channel *chan, void *data) +{ + uint16_t *v = data; + + OUT_RING(chan, v[0]); +} + +static void +emit_b16_3(struct nouveau_channel *chan, void *data) +{ + uint16_t *v = data; + + OUT_RING(chan, (v[1] << 16) | v[0]); + OUT_RING(chan, v[2]); +} + +static void +emit_b08_1(struct nouveau_channel *chan, void *data) +{ + uint8_t *v = data; + + OUT_RING(chan, v[0]); +} + +static void +emit_b08_3(struct nouveau_channel *chan, void *data) +{ + uint8_t *v = data; + + OUT_RING(chan, (v[2] << 16) | (v[1] << 8) | v[0]); +} + +static void +emit_b64_1(struct nouveau_channel *chan, void *data) +{ + double *v = data; + + OUT_RINGf(chan, v[0]); +} + +static void +emit_b64_2(struct nouveau_channel *chan, void *data) +{ + double *v = data; + + OUT_RINGf(chan, v[0]); + OUT_RINGf(chan, v[1]); +} + +static void +emit_b64_3(struct nouveau_channel *chan, void *data) +{ + double *v = data; + + OUT_RINGf(chan, v[0]); + OUT_RINGf(chan, v[1]); + OUT_RINGf(chan, v[2]); +} + +static void +emit_b64_4(struct nouveau_channel *chan, void *data) +{ + double *v = data; + + OUT_RINGf(chan, v[0]); + OUT_RINGf(chan, v[1]); + OUT_RINGf(chan, v[2]); + OUT_RINGf(chan, v[3]); +} + +static INLINE void +emit_vertex(struct push_context *ctx, unsigned n) +{ + struct nouveau_channel *chan = ctx->nvc0->screen->base.channel; + int i; + + if (ctx->edgeflag_input < 32) { + /* TODO */ + } + + BEGIN_RING_NI(chan, RING_3D(VERTEX_DATA), ctx->vertex_size); + for (i = 0; i < ctx->num_attrs; ++i) + ctx->attr[i].push(chan, + (uint8_t *)ctx->attr[i].map + n * ctx->attr[i].stride); +} + +static void +emit_edgeflag(struct push_context *ctx, boolean enabled) +{ + struct nouveau_channel *chan = ctx->nvc0->screen->base.channel; + + IMMED_RING(chan, RING_3D(EDGEFLAG_ENABLE), enabled); +} + +static void +emit_elt08(struct push_context *ctx, unsigned start, unsigned count) +{ + uint8_t *idxbuf = ctx->idxbuf; + + while (count--) + emit_vertex(ctx, idxbuf[start++]); +} + +static void +emit_elt16(struct push_context *ctx, unsigned start, unsigned count) +{ + uint16_t *idxbuf = ctx->idxbuf; + + while (count--) + emit_vertex(ctx, idxbuf[start++]); +} + +static void +emit_elt32(struct push_context *ctx, unsigned start, unsigned count) +{ + uint32_t *idxbuf = ctx->idxbuf; + + while (count--) + emit_vertex(ctx, idxbuf[start++]); +} + +static void +emit_seq(struct push_context *ctx, unsigned start, unsigned count) +{ + while (count--) + emit_vertex(ctx, start++); +} + +#define NVC0_PRIM_GL_CASE(n) \ + case PIPE_PRIM_##n: return NVC0_3D_VERTEX_BEGIN_GL_PRIMITIVE_##n + +static INLINE unsigned +nvc0_prim_gl(unsigned prim) +{ + switch (prim) { + NVC0_PRIM_GL_CASE(POINTS); + NVC0_PRIM_GL_CASE(LINES); + NVC0_PRIM_GL_CASE(LINE_LOOP); + NVC0_PRIM_GL_CASE(LINE_STRIP); + NVC0_PRIM_GL_CASE(TRIANGLES); + NVC0_PRIM_GL_CASE(TRIANGLE_STRIP); + NVC0_PRIM_GL_CASE(TRIANGLE_FAN); + NVC0_PRIM_GL_CASE(QUADS); + NVC0_PRIM_GL_CASE(QUAD_STRIP); + NVC0_PRIM_GL_CASE(POLYGON); + NVC0_PRIM_GL_CASE(LINES_ADJACENCY); + NVC0_PRIM_GL_CASE(LINE_STRIP_ADJACENCY); + NVC0_PRIM_GL_CASE(TRIANGLES_ADJACENCY); + NVC0_PRIM_GL_CASE(TRIANGLE_STRIP_ADJACENCY); + /* + NVC0_PRIM_GL_CASE(PATCHES); */ + default: + return NVC0_3D_VERTEX_BEGIN_GL_PRIMITIVE_POINTS; + break; + } +} + +void +nvc0_push_vbo2(struct nvc0_context *nvc0, const struct pipe_draw_info *info) +{ + struct push_context ctx; + unsigned i, n; + unsigned inst = info->instance_count; + unsigned prim = nvc0_prim_gl(info->mode); + + ctx.nvc0 = nvc0; + ctx.vertex_size = nvc0->vertex->vtx_size; + ctx.idxbuf = NULL; + ctx.num_attrs = 0; + ctx.edgeflag = 0.5f; + ctx.edgeflag_input = 32; + + for (i = 0; i < nvc0->vertex->num_elements; ++i) { + struct pipe_vertex_element *ve = &nvc0->vertex->element[i].pipe; + struct pipe_vertex_buffer *vb = &nvc0->vtxbuf[ve->vertex_buffer_index]; + struct nouveau_bo *bo = nvc0_resource(vb->buffer)->bo; + unsigned nr_components; + + if (!(nvc0->vbo_fifo & (1 << i))) + continue; + n = ctx.num_attrs++; + + if (nouveau_bo_map(bo, NOUVEAU_BO_RD)) + return; + ctx.attr[n].map = (uint8_t *)bo->map + vb->buffer_offset + ve->src_offset; + + nouveau_bo_unmap(bo); + + ctx.attr[n].stride = vb->stride; + ctx.attr[n].divisor = ve->instance_divisor; + + nr_components = util_format_get_nr_components(ve->src_format); + switch (util_format_get_component_bits(ve->src_format, + UTIL_FORMAT_COLORSPACE_RGB, 0)) { + case 8: + switch (nr_components) { + case 1: ctx.attr[n].push = emit_b08_1; break; + case 2: ctx.attr[n].push = emit_b16_1; break; + case 3: ctx.attr[n].push = emit_b08_3; break; + case 4: ctx.attr[n].push = emit_b32_1; break; + } + break; + case 16: + switch (nr_components) { + case 1: ctx.attr[n].push = emit_b16_1; break; + case 2: ctx.attr[n].push = emit_b32_1; break; + case 3: ctx.attr[n].push = emit_b16_3; break; + case 4: ctx.attr[n].push = emit_b32_2; break; + } + break; + case 32: + switch (nr_components) { + case 1: ctx.attr[n].push = emit_b32_1; break; + case 2: ctx.attr[n].push = emit_b32_2; break; + case 3: ctx.attr[n].push = emit_b32_3; break; + case 4: ctx.attr[n].push = emit_b32_4; break; + } + break; + default: + assert(0); + break; + } + } + + if (info->indexed) { + struct nvc0_resource *res = nvc0_resource(nvc0->idxbuf.buffer); + if (!res || nouveau_bo_map(res->bo, NOUVEAU_BO_RD)) + return; + ctx.idxbuf = (uint8_t *)res->bo->map + nvc0->idxbuf.offset + res->offset; + nouveau_bo_unmap(res->bo); + ctx.idxsize = nvc0->idxbuf.index_size; + } else { + ctx.idxsize = 0; + } + + while (inst--) { + BEGIN_RING(nvc0->screen->base.channel, RING_3D(VERTEX_BEGIN_GL), 1); + OUT_RING (nvc0->screen->base.channel, prim); + switch (ctx.idxsize) { + case 0: + emit_seq(&ctx, info->start, info->count); + break; + case 1: + emit_elt08(&ctx, info->start, info->count); + break; + case 2: + emit_elt16(&ctx, info->start, info->count); + break; + case 4: + emit_elt32(&ctx, info->start, info->count); + break; + } + IMMED_RING(nvc0->screen->base.channel, RING_3D(VERTEX_END_GL), 0); + + prim |= NVC0_3D_VERTEX_BEGIN_GL_INSTANCE_NEXT; + } +} + +#endif diff --git a/src/gallium/drivers/nvc0/nvc0_query.c b/src/gallium/drivers/nvc0/nvc0_query.c new file mode 100644 index 00000000000..cc83fbe771c --- /dev/null +++ b/src/gallium/drivers/nvc0/nvc0_query.c @@ -0,0 +1,337 @@ +/* + * Copyright 2011 Nouveau Project + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF + * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Authors: Christoph Bumiller + */ + +#include "nvc0_context.h" +#include "nouveau/nv_object.xml.h" + +/* XXX: Nested queries, and simultaneous queries on multiple gallium contexts + * (since we use only a single GPU channel per screen) will not work properly. + * + * The first is not that big of an issue because OpenGL does not allow nested + * queries anyway. + */ + +struct nvc0_query { + uint32_t *data; + uint32_t type; + uint32_t sequence; + struct nouveau_bo *bo; + uint32_t base; + uint32_t offset; /* base + i * 16 */ + boolean ready; + boolean is64bit; + struct nvc0_mm_allocation *mm; +}; + +#define NVC0_QUERY_ALLOC_SPACE 128 + +static INLINE struct nvc0_query * +nvc0_query(struct pipe_query *pipe) +{ + return (struct nvc0_query *)pipe; +} + +static boolean +nvc0_query_allocate(struct nvc0_context *nvc0, struct nvc0_query *q, int size) +{ + struct nvc0_screen *screen = nvc0->screen; + int ret; + + if (q->bo) { + nouveau_bo_ref(NULL, &q->bo); + if (q->mm) { + if (q->ready) + nvc0_mm_free(q->mm); + else + nvc0_fence_sched_release(screen->fence.current, q->mm); + } + } + if (size) { + q->mm = nvc0_mm_allocate(screen->mm_GART, size, &q->bo, &q->base); + if (!q->bo) + return FALSE; + q->offset = q->base; + + ret = nouveau_bo_map_range(q->bo, q->base, size, NOUVEAU_BO_RD | + NOUVEAU_BO_NOSYNC); + if (ret) { + nvc0_query_allocate(nvc0, q, 0); + return FALSE; + } + q->data = q->bo->map; + nouveau_bo_unmap(q->bo); + } + return TRUE; +} + +static void +nvc0_query_destroy(struct pipe_context *pipe, struct pipe_query *pq) +{ + nvc0_query_allocate(nvc0_context(pipe), nvc0_query(pq), 0); + FREE(nvc0_query(pq)); +} + +static struct pipe_query * +nvc0_query_create(struct pipe_context *pipe, unsigned type) +{ + struct nvc0_context *nvc0 = nvc0_context(pipe); + struct nvc0_query *q; + + q = CALLOC_STRUCT(nvc0_query); + if (!q) + return NULL; + + if (!nvc0_query_allocate(nvc0, q, NVC0_QUERY_ALLOC_SPACE)) { + FREE(q); + return NULL; + } + + q->is64bit = (type == PIPE_QUERY_PRIMITIVES_GENERATED || + type == PIPE_QUERY_PRIMITIVES_EMITTED || + type == PIPE_QUERY_SO_STATISTICS); + q->type = type; + + if (q->type == PIPE_QUERY_OCCLUSION_COUNTER) { + q->offset -= 16; + q->data -= 16 / sizeof(*q->data); /* we advance before query_begin ! */ + } + + return (struct pipe_query *)q; +} + +static void +nvc0_query_get(struct nouveau_channel *chan, struct nvc0_query *q, + unsigned offset, uint32_t get) +{ + offset += q->offset; + + MARK_RING (chan, 5, 2); + BEGIN_RING(chan, RING_3D(QUERY_ADDRESS_HIGH), 4); + OUT_RELOCh(chan, q->bo, offset, NOUVEAU_BO_GART | NOUVEAU_BO_WR); + OUT_RELOCl(chan, q->bo, offset, NOUVEAU_BO_GART | NOUVEAU_BO_WR); + OUT_RING (chan, q->sequence); + OUT_RING (chan, get); +} + +static void +nvc0_query_begin(struct pipe_context *pipe, struct pipe_query *pq) +{ + struct nvc0_context *nvc0 = nvc0_context(pipe); + struct nouveau_channel *chan = nvc0->screen->base.channel; + struct nvc0_query *q = nvc0_query(pq); + + /* For occlusion queries we have to change the storage, because a previous + * query might set the initial render conition to FALSE even *after* we re- + * initialized it to TRUE. + */ + if (q->type == PIPE_QUERY_OCCLUSION_COUNTER) { + q->offset += 16; + q->data += 16 / sizeof(*q->data); + if (q->offset - q->base == NVC0_QUERY_ALLOC_SPACE) + nvc0_query_allocate(nvc0, q, NVC0_QUERY_ALLOC_SPACE); + + /* XXX: can we do this with the GPU, and sync with respect to a previous + * query ? + */ + q->data[1] = 1; /* initial render condition = TRUE */ + } + if (!q->is64bit) + q->data[0] = q->sequence++; /* the previously used one */ + + switch (q->type) { + case PIPE_QUERY_OCCLUSION_COUNTER: + IMMED_RING(chan, RING_3D(COUNTER_RESET), NVC0_3D_COUNTER_RESET_SAMPLECNT); + IMMED_RING(chan, RING_3D(SAMPLECNT_ENABLE), 1); + break; + case PIPE_QUERY_PRIMITIVES_GENERATED: /* store before & after instead ? */ + IMMED_RING(chan, RING_3D(COUNTER_RESET), + NVC0_3D_COUNTER_RESET_GENERATED_PRIMITIVES); + break; + case PIPE_QUERY_PRIMITIVES_EMITTED: + IMMED_RING(chan, RING_3D(COUNTER_RESET), + NVC0_3D_COUNTER_RESET_EMITTED_PRIMITIVES); + break; + case PIPE_QUERY_SO_STATISTICS: + BEGIN_RING_NI(chan, RING_3D(COUNTER_RESET), 2); + OUT_RING (chan, NVC0_3D_COUNTER_RESET_EMITTED_PRIMITIVES); + OUT_RING (chan, NVC0_3D_COUNTER_RESET_GENERATED_PRIMITIVES); + break; + case PIPE_QUERY_TIMESTAMP_DISJOINT: + case PIPE_QUERY_TIME_ELAPSED: + nvc0_query_get(chan, q, 0x10, 0x00005002); + break; + default: + break; + } + q->ready = FALSE; +} + +static void +nvc0_query_end(struct pipe_context *pipe, struct pipe_query *pq) +{ + struct nvc0_context *nvc0 = nvc0_context(pipe); + struct nouveau_channel *chan = nvc0->screen->base.channel; + struct nvc0_query *q = nvc0_query(pq); + + const int index = 0; /* for multiple vertex streams */ + + switch (q->type) { + case PIPE_QUERY_OCCLUSION_COUNTER: + nvc0_query_get(chan, q, 0, 0x0100f002); + BEGIN_RING(chan, RING_3D(SAMPLECNT_ENABLE), 1); + OUT_RING (chan, 0); + break; + case PIPE_QUERY_PRIMITIVES_GENERATED: + nvc0_query_get(chan, q, 0, 0x09005002 | (index << 5)); + break; + case PIPE_QUERY_PRIMITIVES_EMITTED: + nvc0_query_get(chan, q, 0, 0x05805002 | (index << 5)); + break; + case PIPE_QUERY_SO_STATISTICS: + nvc0_query_get(chan, q, 0x00, 0x05805002 | (index << 5)); + nvc0_query_get(chan, q, 0x10, 0x09005002 | (index << 5)); + break; + case PIPE_QUERY_TIMESTAMP_DISJOINT: + case PIPE_QUERY_TIME_ELAPSED: + nvc0_query_get(chan, q, 0, 0x00005002); + break; + case PIPE_QUERY_GPU_FINISHED: + nvc0_query_get(chan, q, 0, 0x1000f010); + break; + default: + assert(0); + break; + } +} + +static INLINE boolean +nvc0_query_ready(struct nvc0_query *q) +{ + return q->ready || (!q->is64bit && (q->data[0] == q->sequence)); +} + +static INLINE boolean +nvc0_query_wait(struct nvc0_query *q) +{ + int ret = nouveau_bo_map(q->bo, NOUVEAU_BO_RD); + if (ret) + return FALSE; + nouveau_bo_unmap(q->bo); + return TRUE; +} + +static boolean +nvc0_query_result(struct pipe_context *pipe, struct pipe_query *pq, + boolean wait, void *result) +{ + struct nvc0_query *q = nvc0_query(pq); + uint64_t *res64 = result; + uint32_t *res32 = result; + boolean *res8 = result; + uint64_t *data64 = (uint64_t *)q->data; + + if (q->type == PIPE_QUERY_GPU_FINISHED) { + res8[0] = nvc0_query_ready(q); + return TRUE; + } + + if (!q->ready) /* update ? */ + q->ready = nvc0_query_ready(q); + if (!q->ready) { + struct nouveau_channel *chan = nvc0_context(pipe)->screen->base.channel; + if (!wait) { + if (nouveau_bo_pending(q->bo) & NOUVEAU_BO_WR) /* for daft apps */ + FIRE_RING(chan); + return FALSE; + } + if (!nvc0_query_wait(q)) + return FALSE; + } + q->ready = TRUE; + + switch (q->type) { + case PIPE_QUERY_OCCLUSION_COUNTER: /* u32 sequence, u32 count, u64 time */ + res32[0] = q->data[1]; + break; + case PIPE_QUERY_PRIMITIVES_GENERATED: /* u64 count, u64 time */ + case PIPE_QUERY_PRIMITIVES_EMITTED: /* u64 count, u64 time */ + res64[0] = data64[0]; + break; + case PIPE_QUERY_SO_STATISTICS: + res64[0] = data64[0]; + res64[1] = data64[1]; + break; + case PIPE_QUERY_TIMESTAMP_DISJOINT: /* u32 sequence, u32 0, u64 time */ + res64[0] = 1000000000; + res8[8] = (data64[0] == data64[2]) ? FALSE : TRUE; + break; + case PIPE_QUERY_TIME_ELAPSED: + res64[0] = data64[1] - data64[3]; + break; + default: + return FALSE; + } + + return TRUE; +} + +static void +nvc0_render_condition(struct pipe_context *pipe, + struct pipe_query *pq, uint mode) +{ + struct nvc0_context *nvc0 = nvc0_context(pipe); + struct nouveau_channel *chan = nvc0->screen->base.channel; + struct nvc0_query *q; + + if (!pq) { + IMMED_RING(chan, RING_3D(COND_MODE), NVC0_3D_COND_MODE_ALWAYS); + return; + } + q = nvc0_query(pq); + + if (mode == PIPE_RENDER_COND_WAIT || + mode == PIPE_RENDER_COND_BY_REGION_WAIT) { + BEGIN_RING(chan, RING_3D_(NV84_SUBCHAN_QUERY_ADDRESS_HIGH), 4); + OUT_RELOCh(chan, q->bo, q->offset, NOUVEAU_BO_GART | NOUVEAU_BO_RD); + OUT_RELOCl(chan, q->bo, q->offset, NOUVEAU_BO_GART | NOUVEAU_BO_RD); + OUT_RING (chan, q->sequence); + OUT_RING (chan, 0x00001001); + } + + BEGIN_RING(chan, RING_3D(COND_ADDRESS_HIGH), 3); + OUT_RELOCh(chan, q->bo, q->offset, NOUVEAU_BO_GART | NOUVEAU_BO_RD); + OUT_RELOCl(chan, q->bo, q->offset, NOUVEAU_BO_GART | NOUVEAU_BO_RD); + OUT_RING (chan, NVC0_3D_COND_MODE_RES_NON_ZERO); +} + +void +nvc0_init_query_functions(struct nvc0_context *nvc0) +{ + nvc0->pipe.create_query = nvc0_query_create; + nvc0->pipe.destroy_query = nvc0_query_destroy; + nvc0->pipe.begin_query = nvc0_query_begin; + nvc0->pipe.end_query = nvc0_query_end; + nvc0->pipe.get_query_result = nvc0_query_result; + nvc0->pipe.render_condition = nvc0_render_condition; +} diff --git a/src/gallium/drivers/nvc0/nvc0_resource.c b/src/gallium/drivers/nvc0/nvc0_resource.c new file mode 100644 index 00000000000..7e42cedd163 --- /dev/null +++ b/src/gallium/drivers/nvc0/nvc0_resource.c @@ -0,0 +1,71 @@ + +#include "pipe/p_context.h" +#include "nvc0_resource.h" +#include "nouveau/nouveau_screen.h" + +static unsigned +nvc0_resource_is_referenced(struct pipe_context *pipe, + struct pipe_resource *resource, + unsigned face, int layer) +{ + struct nvc0_resource *res = nvc0_resource(resource); + unsigned flags = 0; + +#ifdef NOUVEAU_USERSPACE_MM + flags = res->status; +#else + unsigned bo_flags = nouveau_bo_pending(res->bo); + if (bo_flags & NOUVEAU_BO_RD) + flags = PIPE_REFERENCED_FOR_READ; + if (bo_flags & NOUVEAU_BO_WR) + flags |= PIPE_REFERENCED_FOR_WRITE; +#endif + return flags; +} + +static struct pipe_resource * +nvc0_resource_create(struct pipe_screen *screen, + const struct pipe_resource *templ) +{ + switch (templ->target) { + case PIPE_BUFFER: + return nvc0_buffer_create(screen, templ); + default: + return nvc0_miptree_create(screen, templ); + } +} + +static struct pipe_resource * +nvc0_resource_from_handle(struct pipe_screen * screen, + const struct pipe_resource *templ, + struct winsys_handle *whandle) +{ + if (templ->target == PIPE_BUFFER) + return NULL; + else + return nvc0_miptree_from_handle(screen, templ, whandle); +} + +void +nvc0_init_resource_functions(struct pipe_context *pcontext) +{ + pcontext->get_transfer = u_get_transfer_vtbl; + pcontext->transfer_map = u_transfer_map_vtbl; + pcontext->transfer_flush_region = u_transfer_flush_region_vtbl; + pcontext->transfer_unmap = u_transfer_unmap_vtbl; + pcontext->transfer_destroy = u_transfer_destroy_vtbl; + pcontext->transfer_inline_write = u_transfer_inline_write_vtbl; + pcontext->is_resource_referenced = nvc0_resource_is_referenced; + pcontext->create_surface = nvc0_miptree_surface_new; + pcontext->surface_destroy = nvc0_miptree_surface_del; +} + +void +nvc0_screen_init_resource_functions(struct pipe_screen *pscreen) +{ + pscreen->resource_create = nvc0_resource_create; + pscreen->resource_from_handle = nvc0_resource_from_handle; + pscreen->resource_get_handle = u_resource_get_handle_vtbl; + pscreen->resource_destroy = u_resource_destroy_vtbl; + pscreen->user_buffer_create = nvc0_user_buffer_create; +} diff --git a/src/gallium/drivers/nvc0/nvc0_resource.h b/src/gallium/drivers/nvc0/nvc0_resource.h new file mode 100644 index 00000000000..17e79642a6d --- /dev/null +++ b/src/gallium/drivers/nvc0/nvc0_resource.h @@ -0,0 +1,201 @@ + +#ifndef __NVC0_RESOURCE_H__ +#define __NVC0_RESOURCE_H__ + +#include "util/u_transfer.h" +#include "util/u_double_list.h" +#define NOUVEAU_NVC0 +#include "nouveau/nouveau_winsys.h" +#undef NOUVEAU_NVC0 + +#include "nvc0_fence.h" + +struct pipe_resource; +struct nouveau_bo; +struct nvc0_context; + +#define NVC0_BUFFER_SCORE_MIN -25000 +#define NVC0_BUFFER_SCORE_MAX 25000 +#define NVC0_BUFFER_SCORE_VRAM_THRESHOLD 20000 + +/* DIRTY: buffer was (or will be after the next flush) written to by GPU and + * resource->data has not been updated to reflect modified VRAM contents + * + * USER_MEMORY: resource->data is a pointer to client memory and may change + * between GL calls + */ +#define NVC0_BUFFER_STATUS_DIRTY (1 << 0) +#define NVC0_BUFFER_STATUS_USER_MEMORY (1 << 7) + +/* Resources, if mapped into the GPU's address space, are guaranteed to + * have constant virtual addresses. + * The address of a resource will lie within the nouveau_bo referenced, + * and this bo should be added to the memory manager's validation list. + */ +struct nvc0_resource { + struct pipe_resource base; + const struct u_resource_vtbl *vtbl; + + uint8_t *data; + struct nouveau_bo *bo; + uint32_t offset; + + uint8_t status; + uint8_t domain; + + int16_t score; /* low if mapped very often, if high can move to VRAM */ + + struct nvc0_fence *fence; + struct nvc0_fence *fence_wr; + + struct nvc0_mm_allocation *mm; +}; + +boolean +nvc0_buffer_download(struct nvc0_context *, struct nvc0_resource *, + unsigned start, unsigned size); + +boolean +nvc0_buffer_migrate(struct nvc0_context *, + struct nvc0_resource *, unsigned domain); + +static INLINE void +nvc0_buffer_adjust_score(struct nvc0_context *nvc0, struct nvc0_resource *res, + int16_t score) +{ + if (score < 0) { + if (res->score > NVC0_BUFFER_SCORE_MIN) + res->score += score; + } else + if (score > 0){ + if (res->score < NVC0_BUFFER_SCORE_MAX) + res->score += score; + if (res->domain == NOUVEAU_BO_GART && + res->score > NVC0_BUFFER_SCORE_VRAM_THRESHOLD) + nvc0_buffer_migrate(nvc0, res, NOUVEAU_BO_VRAM); + } +} + +/* XXX: wait for fence (atm only using this for vertex push) */ +static INLINE void * +nvc0_resource_map_offset(struct nvc0_context *nvc0, + struct nvc0_resource *res, uint32_t offset, + uint32_t flags) +{ + void *map; + + nvc0_buffer_adjust_score(nvc0, res, -250); + + if ((res->domain == NOUVEAU_BO_VRAM) && + (res->status & NVC0_BUFFER_STATUS_DIRTY)) + nvc0_buffer_download(nvc0, res, 0, res->base.width0); + + if ((res->domain != NOUVEAU_BO_GART) || + (res->status & NVC0_BUFFER_STATUS_USER_MEMORY)) + return res->data + offset; + + if (res->mm) + flags |= NOUVEAU_BO_NOSYNC; + + if (nouveau_bo_map_range(res->bo, res->offset + offset, + res->base.width0, flags)) + return NULL; + + map = res->bo->map; + nouveau_bo_unmap(res->bo); + return map; +} + +static INLINE void +nvc0_resource_unmap(struct nvc0_resource *res) +{ + /* no-op */ +} + +#define NVC0_TILE_DIM_SHIFT(m, d) (((m) >> (d * 4)) & 0xf) + +#define NVC0_TILE_PITCH(m) (64 << NVC0_TILE_DIM_SHIFT(m, 0)) +#define NVC0_TILE_HEIGHT(m) ( 8 << NVC0_TILE_DIM_SHIFT(m, 1)) +#define NVC0_TILE_DEPTH(m) ( 1 << NVC0_TILE_DIM_SHIFT(m, 2)) + +#define NVC0_TILE_SIZE_2D(m) (((64 * 8) << \ + NVC0_TILE_DIM_SHIFT(m, 0)) << \ + NVC0_TILE_DIM_SHIFT(m, 1)) + +#define NVC0_TILE_SIZE(m) (NVC0_TILE_SIZE_2D(m) << NVC0_TILE_DIM_SHIFT(m, 2)) + +struct nvc0_miptree_level { + uint32_t offset; + uint32_t pitch; + uint32_t tile_mode; +}; + +#define NVC0_MAX_TEXTURE_LEVELS 16 + +struct nvc0_miptree { + struct nvc0_resource base; + struct nvc0_miptree_level level[NVC0_MAX_TEXTURE_LEVELS]; + uint32_t total_size; + uint32_t layer_stride; + boolean layout_3d; /* TRUE if layer count varies with mip level */ +}; + +static INLINE struct nvc0_miptree * +nvc0_miptree(struct pipe_resource *pt) +{ + return (struct nvc0_miptree *)pt; +} + +static INLINE struct nvc0_resource * +nvc0_resource(struct pipe_resource *resource) +{ + return (struct nvc0_resource *)resource; +} + +/* is resource mapped into the GPU's address space (i.e. VRAM or GART) ? */ +static INLINE boolean +nvc0_resource_mapped_by_gpu(struct pipe_resource *resource) +{ + return nvc0_resource(resource)->domain != 0; +} + +void +nvc0_init_resource_functions(struct pipe_context *pcontext); + +void +nvc0_screen_init_resource_functions(struct pipe_screen *pscreen); + +/* Internal functions: + */ +struct pipe_resource * +nvc0_miptree_create(struct pipe_screen *pscreen, + const struct pipe_resource *tmp); + +struct pipe_resource * +nvc0_miptree_from_handle(struct pipe_screen *pscreen, + const struct pipe_resource *template, + struct winsys_handle *whandle); + +struct pipe_resource * +nvc0_buffer_create(struct pipe_screen *pscreen, + const struct pipe_resource *templ); + +struct pipe_resource * +nvc0_user_buffer_create(struct pipe_screen *screen, + void *ptr, + unsigned bytes, + unsigned usage); + + +struct pipe_surface * +nvc0_miptree_surface_new(struct pipe_context *, + struct pipe_resource *, + const struct pipe_surface *templ); + +void +nvc0_miptree_surface_del(struct pipe_context *, struct pipe_surface *); + +boolean +nvc0_user_buffer_upload(struct nvc0_resource *, unsigned base, unsigned size); + +#endif diff --git a/src/gallium/drivers/nvc0/nvc0_screen.c b/src/gallium/drivers/nvc0/nvc0_screen.c new file mode 100644 index 00000000000..54eec660b2a --- /dev/null +++ b/src/gallium/drivers/nvc0/nvc0_screen.c @@ -0,0 +1,669 @@ +/* + * Copyright 2010 Christoph Bumiller + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF + * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "util/u_format_s3tc.h" +#include "pipe/p_screen.h" + +#include "nvc0_fence.h" +#include "nvc0_context.h" +#include "nvc0_screen.h" + +#include "nouveau/nv_object.xml.h" +#include "nvc0_graph_macros.h" + +static boolean +nvc0_screen_is_format_supported(struct pipe_screen *pscreen, + enum pipe_format format, + enum pipe_texture_target target, + unsigned sample_count, + unsigned bindings, unsigned geom_flags) +{ + if (sample_count > 1) + return FALSE; + + if (!util_format_s3tc_enabled) { + switch (format) { + case PIPE_FORMAT_DXT1_RGB: + case PIPE_FORMAT_DXT1_RGBA: + case PIPE_FORMAT_DXT3_RGBA: + case PIPE_FORMAT_DXT5_RGBA: + return FALSE; + default: + break; + } + } + + /* transfers & shared are always supported */ + bindings &= ~(PIPE_BIND_TRANSFER_READ | + PIPE_BIND_TRANSFER_WRITE | + PIPE_BIND_SHARED); + + return (nvc0_format_table[format].usage & bindings) == bindings; +} + +static int +nvc0_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param) +{ + switch (param) { + case PIPE_CAP_MAX_TEXTURE_IMAGE_UNITS: + case PIPE_CAP_MAX_VERTEX_TEXTURE_UNITS: + return 32; + case PIPE_CAP_MAX_COMBINED_SAMPLERS: + return 64; + case PIPE_CAP_MAX_TEXTURE_2D_LEVELS: + return 13; + case PIPE_CAP_MAX_TEXTURE_3D_LEVELS: + return 10; + case PIPE_CAP_MAX_TEXTURE_CUBE_LEVELS: + return 13; + case PIPE_CAP_TEXTURE_MIRROR_CLAMP: + case PIPE_CAP_TEXTURE_MIRROR_REPEAT: + case PIPE_CAP_TEXTURE_SWIZZLE: + case PIPE_CAP_TEXTURE_SHADOW_MAP: + case PIPE_CAP_NPOT_TEXTURES: + case PIPE_CAP_ANISOTROPIC_FILTER: + return 1; + case PIPE_CAP_TWO_SIDED_STENCIL: + case PIPE_CAP_DEPTH_CLAMP: + case PIPE_CAP_DEPTHSTENCIL_CLEAR_SEPARATE: + case PIPE_CAP_POINT_SPRITE: + return 1; + case PIPE_CAP_GLSL: + case PIPE_CAP_SM3: + return 1; + case PIPE_CAP_MAX_RENDER_TARGETS: + return 8; + case PIPE_CAP_TIMER_QUERY: + case PIPE_CAP_OCCLUSION_QUERY: + return 1; + case PIPE_CAP_STREAM_OUTPUT: + return 0; + case PIPE_CAP_BLEND_EQUATION_SEPARATE: + case PIPE_CAP_INDEP_BLEND_ENABLE: + case PIPE_CAP_INDEP_BLEND_FUNC: + return 1; + case PIPE_CAP_TGSI_FS_COORD_ORIGIN_UPPER_LEFT: + case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER: + return 1; + case PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT: + case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER: + return 0; + case PIPE_CAP_SHADER_STENCIL_EXPORT: + return 0; + case PIPE_CAP_PRIMITIVE_RESTART: + return 1; + default: + NOUVEAU_ERR("unknown PIPE_CAP %d\n", param); + return 0; + } +} + +static int +nvc0_screen_get_shader_param(struct pipe_screen *pscreen, unsigned shader, + enum pipe_shader_cap param) +{ + switch (shader) { + case PIPE_SHADER_VERTEX: + /* + case PIPE_SHADER_TESSELLATION_CONTROL: + case PIPE_SHADER_TESSELLATION_EVALUATION: + */ + case PIPE_SHADER_GEOMETRY: + case PIPE_SHADER_FRAGMENT: + break; + default: + return 0; + } + + switch (param) { + case PIPE_SHADER_CAP_MAX_INSTRUCTIONS: + case PIPE_SHADER_CAP_MAX_ALU_INSTRUCTIONS: + case PIPE_SHADER_CAP_MAX_TEX_INSTRUCTIONS: + case PIPE_SHADER_CAP_MAX_TEX_INDIRECTIONS: + return 16384; + case PIPE_SHADER_CAP_MAX_CONTROL_FLOW_DEPTH: + return 4; + case PIPE_SHADER_CAP_MAX_INPUTS: + if (shader == PIPE_SHADER_VERTEX) + return 32; + return 0x300 / 16; + case PIPE_SHADER_CAP_MAX_CONSTS: + return 65536 / 16; + case PIPE_SHADER_CAP_MAX_CONST_BUFFERS: + return 14; + case PIPE_SHADER_CAP_MAX_ADDRS: + return 1; + case PIPE_SHADER_CAP_INDIRECT_INPUT_ADDR: + case PIPE_SHADER_CAP_INDIRECT_OUTPUT_ADDR: + return shader != PIPE_SHADER_FRAGMENT; + case PIPE_SHADER_CAP_INDIRECT_TEMP_ADDR: + case PIPE_SHADER_CAP_INDIRECT_CONST_ADDR: + return 1; + case PIPE_SHADER_CAP_MAX_PREDS: + return 0; + case PIPE_SHADER_CAP_MAX_TEMPS: + return NVC0_CAP_MAX_PROGRAM_TEMPS; + case PIPE_SHADER_CAP_TGSI_CONT_SUPPORTED: + return 1; + case PIPE_SHADER_CAP_SUBROUTINES: + return 0; /* please inline, or provide function declarations */ + default: + NOUVEAU_ERR("unknown PIPE_SHADER_CAP %d\n", param); + return 0; + } +} + +static float +nvc0_screen_get_paramf(struct pipe_screen *pscreen, enum pipe_cap param) +{ + switch (param) { + case PIPE_CAP_MAX_LINE_WIDTH: + case PIPE_CAP_MAX_LINE_WIDTH_AA: + return 10.0f; + case PIPE_CAP_MAX_POINT_WIDTH: + case PIPE_CAP_MAX_POINT_WIDTH_AA: + return 64.0f; + case PIPE_CAP_MAX_TEXTURE_ANISOTROPY: + return 16.0f; + case PIPE_CAP_MAX_TEXTURE_LOD_BIAS: + return 4.0f; + default: + NOUVEAU_ERR("unknown PIPE_CAP %d\n", param); + return 0.0f; + } +} + +static void +nvc0_screen_destroy(struct pipe_screen *pscreen) +{ + struct nvc0_screen *screen = nvc0_screen(pscreen); + + nvc0_fence_wait(screen->fence.current); + nvc0_fence_reference(&screen->fence.current, NULL); + + nouveau_bo_ref(NULL, &screen->text); + nouveau_bo_ref(NULL, &screen->tls); + nouveau_bo_ref(NULL, &screen->txc); + nouveau_bo_ref(NULL, &screen->fence.bo); + nouveau_bo_ref(NULL, &screen->mp_stack_bo); + + nouveau_resource_destroy(&screen->text_heap); + + if (screen->tic.entries) + FREE(screen->tic.entries); + + nvc0_mm_destroy(screen->mm_GART); + nvc0_mm_destroy(screen->mm_VRAM); + nvc0_mm_destroy(screen->mm_VRAM_fe0); + + nouveau_grobj_free(&screen->fermi); + nouveau_grobj_free(&screen->eng2d); + nouveau_grobj_free(&screen->m2mf); + + nouveau_screen_fini(&screen->base); + + FREE(screen); +} + +static int +nvc0_graph_set_macro(struct nvc0_screen *screen, uint32_t m, unsigned pos, + unsigned size, const uint32_t *data) +{ + struct nouveau_channel *chan = screen->base.channel; + + size /= 4; + + BEGIN_RING(chan, RING_3D_(NVC0_GRAPH_MACRO_ID), 2); + OUT_RING (chan, (m - 0x3800) / 8); + OUT_RING (chan, pos); + BEGIN_RING_1I(chan, RING_3D_(NVC0_GRAPH_MACRO_UPLOAD_POS), size + 1); + OUT_RING (chan, pos); + OUT_RINGp (chan, data, size); + + return pos + size; +} + +static void +nvc0_screen_fence_reference(struct pipe_screen *pscreen, + struct pipe_fence_handle **ptr, + struct pipe_fence_handle *fence) +{ + nvc0_fence_reference((struct nvc0_fence **)ptr, nvc0_fence(fence)); +} + +static int +nvc0_screen_fence_signalled(struct pipe_screen *pscreen, + struct pipe_fence_handle *fence, + unsigned flags) +{ + return !(nvc0_fence_signalled(nvc0_fence(fence))); +} + +static int +nvc0_screen_fence_finish(struct pipe_screen *pscreen, + struct pipe_fence_handle *fence, + unsigned flags) +{ + return nvc0_fence_wait((struct nvc0_fence *)fence) != TRUE; +} + +static void +nvc0_magic_3d_init(struct nouveau_channel *chan) +{ + BEGIN_RING(chan, RING_3D_(0x10cc), 1); + OUT_RING (chan, 0xff); + BEGIN_RING(chan, RING_3D_(0x10e0), 2); + OUT_RING(chan, 0xff); + OUT_RING(chan, 0xff); + BEGIN_RING(chan, RING_3D_(0x10ec), 2); + OUT_RING(chan, 0xff); + OUT_RING(chan, 0xff); + BEGIN_RING(chan, RING_3D_(0x074c), 1); + OUT_RING (chan, 0x3f); + + BEGIN_RING(chan, RING_3D_(0x10f8), 1); + OUT_RING (chan, 0x0101); + + BEGIN_RING(chan, RING_3D_(0x16a8), 1); + OUT_RING (chan, (3 << 16) | 3); + BEGIN_RING(chan, RING_3D_(0x1794), 1); + OUT_RING (chan, (2 << 16) | 2); + BEGIN_RING(chan, RING_3D_(0x0de8), 1); + OUT_RING (chan, 1); + +#if 0 /* software method */ + BEGIN_RING(chan, RING_3D_(0x1528), 1); /* MP poke */ + OUT_RING (chan, 0); +#endif + + BEGIN_RING(chan, RING_3D_(0x12ac), 1); + OUT_RING (chan, 0); + BEGIN_RING(chan, RING_3D_(0x0218), 1); + OUT_RING (chan, 0x10); + BEGIN_RING(chan, RING_3D_(0x10fc), 1); + OUT_RING (chan, 0x10); + BEGIN_RING(chan, RING_3D_(0x1290), 1); + OUT_RING (chan, 0x10); + BEGIN_RING(chan, RING_3D_(0x12d8), 2); + OUT_RING (chan, 0x10); + OUT_RING (chan, 0x10); + BEGIN_RING(chan, RING_3D_(0x06d4), 1); + OUT_RING (chan, 8); + BEGIN_RING(chan, RING_3D_(0x1140), 1); + OUT_RING (chan, 0x10); + BEGIN_RING(chan, RING_3D_(0x1610), 1); + OUT_RING (chan, 0xe); + + BEGIN_RING(chan, RING_3D_(0x164c), 1); + OUT_RING (chan, 1 << 12); + BEGIN_RING(chan, RING_3D_(0x151c), 1); + OUT_RING (chan, 1); + BEGIN_RING(chan, RING_3D_(0x020c), 1); + OUT_RING (chan, 1); + BEGIN_RING(chan, RING_3D_(0x030c), 1); + OUT_RING (chan, 0); + BEGIN_RING(chan, RING_3D_(0x0300), 1); + OUT_RING (chan, 3); +#if 0 /* software method */ + BEGIN_RING(chan, RING_3D_(0x1280), 1); /* PGRAPH poke */ + OUT_RING (chan, 0); +#endif + BEGIN_RING(chan, RING_3D_(0x02d0), 1); + OUT_RING (chan, 0x1f40); + BEGIN_RING(chan, RING_3D_(0x00fdc), 1); + OUT_RING (chan, 1); + BEGIN_RING(chan, RING_3D_(0x19c0), 1); + OUT_RING (chan, 1); + BEGIN_RING(chan, RING_3D_(0x075c), 1); + OUT_RING (chan, 3); + + BEGIN_RING(chan, RING_3D_(0x0fac), 1); + OUT_RING (chan, 0); + BEGIN_RING(chan, RING_3D_(0x0f90), 1); + OUT_RING (chan, 0); +} + +#define FAIL_SCREEN_INIT(str, err) \ + do { \ + NOUVEAU_ERR(str, err); \ + nvc0_screen_destroy(pscreen); \ + return NULL; \ + } while(0) + +struct pipe_screen * +nvc0_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev) +{ + struct nvc0_screen *screen; + struct nouveau_channel *chan; + struct pipe_screen *pscreen; + int ret; + unsigned i; + + screen = CALLOC_STRUCT(nvc0_screen); + if (!screen) + return NULL; + pscreen = &screen->base.base; + + ret = nouveau_screen_init(&screen->base, dev); + if (ret) { + nvc0_screen_destroy(pscreen); + return NULL; + } + chan = screen->base.channel; + + pscreen->winsys = ws; + pscreen->destroy = nvc0_screen_destroy; + pscreen->context_create = nvc0_create; + pscreen->is_format_supported = nvc0_screen_is_format_supported; + pscreen->get_param = nvc0_screen_get_param; + pscreen->get_shader_param = nvc0_screen_get_shader_param; + pscreen->get_paramf = nvc0_screen_get_paramf; + pscreen->fence_reference = nvc0_screen_fence_reference; + pscreen->fence_signalled = nvc0_screen_fence_signalled; + pscreen->fence_finish = nvc0_screen_fence_finish; + + nvc0_screen_init_resource_functions(pscreen); + + screen->base.vertex_buffer_flags = NOUVEAU_BO_GART; + screen->base.index_buffer_flags = 0; + + ret = nouveau_bo_new(dev, NOUVEAU_BO_GART | NOUVEAU_BO_MAP, 0, 4096, + &screen->fence.bo); + if (ret) + goto fail; + nouveau_bo_map(screen->fence.bo, NOUVEAU_BO_RDWR); + screen->fence.map = screen->fence.bo->map; + nouveau_bo_unmap(screen->fence.bo); + + for (i = 0; i < NVC0_SCRATCH_NR_BUFFERS; ++i) { + ret = nouveau_bo_new(dev, NOUVEAU_BO_GART, 0, NVC0_SCRATCH_SIZE, + &screen->scratch.bo[i]); + if (ret) + goto fail; + } + + ret = nouveau_grobj_alloc(chan, 0xbeef9039, NVC0_M2MF, &screen->m2mf); + if (ret) + FAIL_SCREEN_INIT("Error allocating PGRAPH context for M2MF: %d\n", ret); + + BIND_RING (chan, screen->m2mf, NVC0_SUBCH_MF); + BEGIN_RING(chan, RING_MF(NOTIFY_ADDRESS_HIGH), 3); + OUT_RELOCh(chan, screen->fence.bo, 16, NOUVEAU_BO_GART | NOUVEAU_BO_RDWR); + OUT_RELOCl(chan, screen->fence.bo, 16, NOUVEAU_BO_GART | NOUVEAU_BO_RDWR); + OUT_RING (chan, 0); + + ret = nouveau_grobj_alloc(chan, 0xbeef902d, NVC0_2D, &screen->eng2d); + if (ret) + FAIL_SCREEN_INIT("Error allocating PGRAPH context for 2D: %d\n", ret); + + BIND_RING (chan, screen->eng2d, NVC0_SUBCH_2D); + BEGIN_RING(chan, RING_2D(OPERATION), 1); + OUT_RING (chan, NVC0_2D_OPERATION_SRCCOPY); + BEGIN_RING(chan, RING_2D(CLIP_ENABLE), 1); + OUT_RING (chan, 0); + BEGIN_RING(chan, RING_2D(COLOR_KEY_ENABLE), 1); + OUT_RING (chan, 0); + BEGIN_RING(chan, RING_2D_(0x0884), 1); + OUT_RING (chan, 0x3f); + BEGIN_RING(chan, RING_2D_(0x0888), 1); + OUT_RING (chan, 1); + + ret = nouveau_grobj_alloc(chan, 0xbeef9097, NVC0_3D, &screen->fermi); + if (ret) + FAIL_SCREEN_INIT("Error allocating PGRAPH context for 3D: %d\n", ret); + + BIND_RING (chan, screen->fermi, NVC0_SUBCH_3D); + BEGIN_RING(chan, RING_3D(NOTIFY_ADDRESS_HIGH), 3); + OUT_RELOCh(chan, screen->fence.bo, 32, NOUVEAU_BO_GART | NOUVEAU_BO_RDWR); + OUT_RELOCl(chan, screen->fence.bo, 32, NOUVEAU_BO_GART | NOUVEAU_BO_RDWR); + OUT_RING (chan, 0); + + BEGIN_RING(chan, RING_3D(COND_MODE), 1); + OUT_RING (chan, NVC0_3D_COND_MODE_ALWAYS); + + BEGIN_RING(chan, RING_3D(RT_CONTROL), 1); + OUT_RING (chan, 1); + + BEGIN_RING(chan, RING_3D(CSAA_ENABLE), 1); + OUT_RING (chan, 0); + BEGIN_RING(chan, RING_3D(MULTISAMPLE_ENABLE), 1); + OUT_RING (chan, 0); + BEGIN_RING(chan, RING_3D(MULTISAMPLE_MODE), 1); + OUT_RING (chan, NVC0_3D_MULTISAMPLE_MODE_1X); + BEGIN_RING(chan, RING_3D(MULTISAMPLE_CTRL), 1); + OUT_RING (chan, 0); + + nvc0_magic_3d_init(chan); + + ret = nouveau_bo_new(dev, NOUVEAU_BO_VRAM, 1 << 17, 1 << 20, &screen->text); + if (ret) + goto fail; + + nouveau_resource_init(&screen->text_heap, 0, 1 << 20); + + ret = nouveau_bo_new(dev, NOUVEAU_BO_VRAM, 1 << 12, 6 << 16, + &screen->uniforms); + if (ret) + goto fail; + + /* auxiliary constants (6 user clip planes, base instance id) */ + BEGIN_RING(chan, RING_3D(CB_SIZE), 3); + OUT_RING (chan, 256); + OUT_RELOCh(chan, screen->uniforms, 5 << 16, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD); + OUT_RELOCl(chan, screen->uniforms, 5 << 16, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD); + for (i = 0; i < 5; ++i) { + BEGIN_RING(chan, RING_3D(CB_BIND(i)), 1); + OUT_RING (chan, (15 << 4) | 1); + } + + screen->tls_size = 4 * 4 * 32 * 128 * 4; + ret = nouveau_bo_new(dev, NOUVEAU_BO_VRAM, 1 << 17, + screen->tls_size, &screen->tls); + if (ret) + goto fail; + + BEGIN_RING(chan, RING_3D(CODE_ADDRESS_HIGH), 2); + OUT_RELOCh(chan, screen->text, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD); + OUT_RELOCl(chan, screen->text, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD); + BEGIN_RING(chan, RING_3D(LOCAL_ADDRESS_HIGH), 4); + OUT_RELOCh(chan, screen->tls, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RDWR); + OUT_RELOCl(chan, screen->tls, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RDWR); + OUT_RING (chan, screen->tls_size >> 32); + OUT_RING (chan, screen->tls_size); + BEGIN_RING(chan, RING_3D(LOCAL_BASE), 1); + OUT_RING (chan, 0); + + for (i = 0; i < 5; ++i) { + BEGIN_RING(chan, RING_3D(TEX_LIMITS(i)), 1); + OUT_RING (chan, 0x54); + } + BEGIN_RING(chan, RING_3D(LINKED_TSC), 1); + OUT_RING (chan, 0); + + ret = nouveau_bo_new(dev, NOUVEAU_BO_VRAM, 1 << 17, 1 << 20, + &screen->mp_stack_bo); + if (ret) + goto fail; + + BEGIN_RING(chan, RING_3D_(0x17bc), 3); + OUT_RELOCh(chan, screen->mp_stack_bo, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RDWR); + OUT_RELOCl(chan, screen->mp_stack_bo, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RDWR); + OUT_RING (chan, 1); + + ret = nouveau_bo_new(dev, NOUVEAU_BO_VRAM, 1 << 17, 1 << 17, &screen->txc); + if (ret) + goto fail; + + BEGIN_RING(chan, RING_3D(TIC_ADDRESS_HIGH), 3); + OUT_RELOCh(chan, screen->txc, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD); + OUT_RELOCl(chan, screen->txc, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD); + OUT_RING (chan, NVC0_TIC_MAX_ENTRIES - 1); + + BEGIN_RING(chan, RING_3D(TSC_ADDRESS_HIGH), 3); + OUT_RELOCh(chan, screen->txc, 65536, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD); + OUT_RELOCl(chan, screen->txc, 65536, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD); + OUT_RING (chan, NVC0_TSC_MAX_ENTRIES - 1); + + BEGIN_RING(chan, RING_3D(SCREEN_Y_CONTROL), 1); + OUT_RING (chan, 0); + BEGIN_RING(chan, RING_3D(WINDOW_OFFSET_X), 2); + OUT_RING (chan, 0); + OUT_RING (chan, 0); + BEGIN_RING(chan, RING_3D_(0x1590), 1); /* deactivate ZCULL */ + OUT_RING (chan, 0x3f); + + BEGIN_RING(chan, RING_3D(VIEWPORT_CLIP_RECTS_EN), 1); + OUT_RING (chan, 0); + BEGIN_RING(chan, RING_3D(CLIPID_ENABLE), 1); + OUT_RING (chan, 0); + + BEGIN_RING(chan, RING_3D(VIEWPORT_TRANSFORM_EN), 1); + OUT_RING (chan, 1); + BEGIN_RING(chan, RING_3D(DEPTH_RANGE_NEAR(0)), 2); + OUT_RINGf (chan, 0.0f); + OUT_RINGf (chan, 1.0f); + + /* We use scissors instead of exact view volume clipping, + * so they're always enabled. + */ + BEGIN_RING(chan, RING_3D(SCISSOR_ENABLE(0)), 3); + OUT_RING (chan, 1); + OUT_RING (chan, 8192 << 16); + OUT_RING (chan, 8192 << 16); + + BEGIN_RING(chan, RING_3D_(0x0fac), 1); + OUT_RING (chan, 0); + BEGIN_RING(chan, RING_3D_(0x3484), 1); + OUT_RING (chan, 0); + BEGIN_RING(chan, RING_3D_(0x0dbc), 1); + OUT_RING (chan, 0x00010000); + BEGIN_RING(chan, RING_3D_(0x0dd8), 1); + OUT_RING (chan, 0xff800006); + BEGIN_RING(chan, RING_3D_(0x3488), 1); + OUT_RING (chan, 0); + +#define MK_MACRO(m, n) i = nvc0_graph_set_macro(screen, m, i, sizeof(n), n); + + i = 0; + MK_MACRO(NVC0_3D_BLEND_ENABLES, nvc0_9097_blend_enables); + MK_MACRO(NVC0_3D_VERTEX_ARRAY_SELECT, nvc0_9097_vertex_array_select); + MK_MACRO(NVC0_3D_TEP_SELECT, nvc0_9097_tep_select); + MK_MACRO(NVC0_3D_GP_SELECT, nvc0_9097_gp_select); + MK_MACRO(NVC0_3D_POLYGON_MODE_FRONT, nvc0_9097_poly_mode_front); + MK_MACRO(NVC0_3D_POLYGON_MODE_BACK, nvc0_9097_poly_mode_back); + MK_MACRO(NVC0_3D_COLOR_MASK_BROADCAST, nvc0_9097_color_mask_brdc); + + BEGIN_RING(chan, RING_3D(RASTERIZE_ENABLE), 1); + OUT_RING (chan, 1); + BEGIN_RING(chan, RING_3D(GP_SELECT), 1); + OUT_RING (chan, 0x40); + BEGIN_RING(chan, RING_3D(GP_BUILTIN_RESULT_EN), 1); + OUT_RING (chan, 0); + BEGIN_RING(chan, RING_3D(TEP_SELECT), 1); + OUT_RING (chan, 0x30); + BEGIN_RING(chan, RING_3D(PATCH_VERTICES), 1); + OUT_RING (chan, 3); + BEGIN_RING(chan, RING_3D(SP_SELECT(2)), 1); + OUT_RING (chan, 0x20); + BEGIN_RING(chan, RING_3D(SP_SELECT(0)), 1); + OUT_RING (chan, 0x00); + + BEGIN_RING(chan, RING_3D(POINT_COORD_REPLACE), 1); + OUT_RING (chan, 0); + BEGIN_RING(chan, RING_3D(POINT_RASTER_RULES), 1); + OUT_RING (chan, NVC0_3D_POINT_RASTER_RULES_OGL); + + BEGIN_RING(chan, RING_3D(FRAG_COLOR_CLAMP_EN), 1); + OUT_RING (chan, 0x11111111); + BEGIN_RING(chan, RING_3D(EDGEFLAG_ENABLE), 1); + OUT_RING (chan, 1); + + BEGIN_RING(chan, RING_3D(VERTEX_RUNOUT_ADDRESS_HIGH), 2); + OUT_RING (chan, 0xab); + OUT_RING (chan, 0x00000000); + + FIRE_RING (chan); + + screen->tic.entries = CALLOC(4096, sizeof(void *)); + screen->tsc.entries = screen->tic.entries + 2048; + + screen->mm_GART = nvc0_mm_create(dev, NOUVEAU_BO_GART | NOUVEAU_BO_MAP, + 0x000); + screen->mm_VRAM = nvc0_mm_create(dev, NOUVEAU_BO_VRAM, 0x000); + screen->mm_VRAM_fe0 = nvc0_mm_create(dev, NOUVEAU_BO_VRAM, 0xfe0); + + nvc0_screen_fence_new(screen, &screen->fence.current, FALSE); + + return pscreen; + +fail: + nvc0_screen_destroy(pscreen); + return NULL; +} + +void +nvc0_screen_make_buffers_resident(struct nvc0_screen *screen) +{ + struct nouveau_channel *chan = screen->base.channel; + + const unsigned flags = NOUVEAU_BO_VRAM | NOUVEAU_BO_RD; + + nouveau_bo_validate(chan, screen->text, flags); + nouveau_bo_validate(chan, screen->uniforms, flags); + nouveau_bo_validate(chan, screen->txc, flags); + nouveau_bo_validate(chan, screen->tls, flags); + nouveau_bo_validate(chan, screen->mp_stack_bo, flags); +} + +int +nvc0_screen_tic_alloc(struct nvc0_screen *screen, void *entry) +{ + int i = screen->tic.next; + + while (screen->tic.lock[i / 32] & (1 << (i % 32))) + i = (i + 1) & (NVC0_TIC_MAX_ENTRIES - 1); + + screen->tic.next = (i + 1) & (NVC0_TIC_MAX_ENTRIES - 1); + + if (screen->tic.entries[i]) + nvc0_tic_entry(screen->tic.entries[i])->id = -1; + + screen->tic.entries[i] = entry; + return i; +} + +int +nvc0_screen_tsc_alloc(struct nvc0_screen *screen, void *entry) +{ + int i = screen->tsc.next; + + while (screen->tsc.lock[i / 32] & (1 << (i % 32))) + i = (i + 1) & (NVC0_TSC_MAX_ENTRIES - 1); + + screen->tsc.next = (i + 1) & (NVC0_TSC_MAX_ENTRIES - 1); + + if (screen->tsc.entries[i]) + nvc0_tsc_entry(screen->tsc.entries[i])->id = -1; + + screen->tsc.entries[i] = entry; + return i; +} diff --git a/src/gallium/drivers/nvc0/nvc0_screen.h b/src/gallium/drivers/nvc0/nvc0_screen.h new file mode 100644 index 00000000000..1fac142e2be --- /dev/null +++ b/src/gallium/drivers/nvc0/nvc0_screen.h @@ -0,0 +1,192 @@ +#ifndef __NVC0_SCREEN_H__ +#define __NVC0_SCREEN_H__ + +#define NOUVEAU_NVC0 +#include "nouveau/nouveau_screen.h" +#undef NOUVEAU_NVC0 +#include "nvc0_winsys.h" +#include "nvc0_stateobj.h" + +#define NVC0_TIC_MAX_ENTRIES 2048 +#define NVC0_TSC_MAX_ENTRIES 2048 + +struct nvc0_mman; +struct nvc0_context; +struct nvc0_fence; + +#define NVC0_SCRATCH_SIZE (2 << 20) +#define NVC0_SCRATCH_NR_BUFFERS 2 + +struct nvc0_screen { + struct nouveau_screen base; + struct nouveau_winsys *nvws; + + struct nvc0_context *cur_ctx; + + struct nouveau_bo *text; + struct nouveau_bo *uniforms; + struct nouveau_bo *tls; + struct nouveau_bo *txc; /* TIC (offset 0) and TSC (65536) */ + struct nouveau_bo *mp_stack_bo; + + uint64_t tls_size; + + struct nouveau_resource *text_heap; + + struct { + struct nouveau_bo *bo[NVC0_SCRATCH_NR_BUFFERS]; + uint8_t *buf; + int index; + uint32_t offset; + } scratch; + + struct { + void **entries; + int next; + uint32_t lock[NVC0_TIC_MAX_ENTRIES / 32]; + } tic; + + struct { + void **entries; + int next; + uint32_t lock[NVC0_TSC_MAX_ENTRIES / 32]; + } tsc; + + struct { + uint32_t *map; + struct nvc0_fence *head; + struct nvc0_fence *tail; + struct nvc0_fence *current; + uint32_t sequence; + uint32_t sequence_ack; + struct nouveau_bo *bo; + } fence; + + struct nvc0_mman *mm_GART; + struct nvc0_mman *mm_VRAM; + struct nvc0_mman *mm_VRAM_fe0; + + struct nouveau_grobj *fermi; + struct nouveau_grobj *eng2d; + struct nouveau_grobj *m2mf; +}; + +static INLINE struct nvc0_screen * +nvc0_screen(struct pipe_screen *screen) +{ + return (struct nvc0_screen *)screen; +} + +/* Since a resource can be migrated, we need to decouple allocations from + * them. This struct is linked with fences for delayed freeing of allocs. + */ +struct nvc0_mm_allocation { + struct nvc0_mm_allocation *next; + void *priv; + uint32_t offset; +}; + +static INLINE void +nvc0_fence_sched_release(struct nvc0_fence *nf, struct nvc0_mm_allocation *mm) +{ + mm->next = nf->buffers; + nf->buffers = mm; +} + +extern struct nvc0_mman * +nvc0_mm_create(struct nouveau_device *, uint32_t domain, uint32_t storage_type); + +extern void +nvc0_mm_destroy(struct nvc0_mman *); + +extern struct nvc0_mm_allocation * +nvc0_mm_allocate(struct nvc0_mman *, + uint32_t size, struct nouveau_bo **, uint32_t *offset); +extern void +nvc0_mm_free(struct nvc0_mm_allocation *); + +void nvc0_screen_make_buffers_resident(struct nvc0_screen *); + +int nvc0_screen_tic_alloc(struct nvc0_screen *, void *); +int nvc0_screen_tsc_alloc(struct nvc0_screen *, void *); + +static INLINE void +nvc0_resource_fence(struct nvc0_resource *res, uint32_t flags) +{ + struct nvc0_screen *screen = nvc0_screen(res->base.screen); + + if (res->mm) { + nvc0_fence_reference(&res->fence, screen->fence.current); + + if (flags & NOUVEAU_BO_WR) + nvc0_fence_reference(&res->fence_wr, screen->fence.current); + } +} + +static INLINE void +nvc0_resource_validate(struct nvc0_resource *res, uint32_t flags) +{ + struct nvc0_screen *screen = nvc0_screen(res->base.screen); + + nouveau_bo_validate(screen->base.channel, res->bo, flags); + + nvc0_resource_fence(res, flags); +} + + +boolean +nvc0_screen_fence_new(struct nvc0_screen *, struct nvc0_fence **, boolean emit); + +void +nvc0_screen_fence_next(struct nvc0_screen *); + +static INLINE boolean +nvc0_screen_fence_emit(struct nvc0_screen *screen) +{ + nvc0_fence_emit(screen->fence.current); + + return nvc0_screen_fence_new(screen, &screen->fence.current, FALSE); +} + +struct nvc0_format { + uint32_t rt; + uint32_t tic; + uint32_t vtx; + uint32_t usage; +}; + +extern const struct nvc0_format nvc0_format_table[]; + +static INLINE void +nvc0_screen_tic_unlock(struct nvc0_screen *screen, struct nvc0_tic_entry *tic) +{ + if (tic->id >= 0) + screen->tic.lock[tic->id / 32] &= ~(1 << (tic->id % 32)); +} + +static INLINE void +nvc0_screen_tsc_unlock(struct nvc0_screen *screen, struct nvc0_tsc_entry *tsc) +{ + if (tsc->id >= 0) + screen->tsc.lock[tsc->id / 32] &= ~(1 << (tsc->id % 32)); +} + +static INLINE void +nvc0_screen_tic_free(struct nvc0_screen *screen, struct nvc0_tic_entry *tic) +{ + if (tic->id >= 0) { + screen->tic.entries[tic->id] = NULL; + screen->tic.lock[tic->id / 32] &= ~(1 << (tic->id % 32)); + } +} + +static INLINE void +nvc0_screen_tsc_free(struct nvc0_screen *screen, struct nvc0_tsc_entry *tsc) +{ + if (tsc->id >= 0) { + screen->tsc.entries[tsc->id] = NULL; + screen->tsc.lock[tsc->id / 32] &= ~(1 << (tsc->id % 32)); + } +} + +#endif diff --git a/src/gallium/drivers/nvc0/nvc0_shader_state.c b/src/gallium/drivers/nvc0/nvc0_shader_state.c new file mode 100644 index 00000000000..981b5488d08 --- /dev/null +++ b/src/gallium/drivers/nvc0/nvc0_shader_state.c @@ -0,0 +1,180 @@ +/* + * Copyright 2010 Christoph Bumiller + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF + * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "pipe/p_context.h" +#include "pipe/p_defines.h" +#include "pipe/p_state.h" +#include "util/u_inlines.h" + +#include "nvc0_context.h" + +static boolean +nvc0_program_validate(struct nvc0_context *nvc0, struct nvc0_program *prog) +{ + int ret; + unsigned size; + + if (prog->translated) + return TRUE; + + prog->translated = nvc0_program_translate(prog); + if (!prog->translated) + return FALSE; + + size = align(prog->code_size + NVC0_SHADER_HEADER_SIZE, 0x100); + + ret = nouveau_resource_alloc(nvc0->screen->text_heap, size, prog, + &prog->res); + if (ret) + return FALSE; + + prog->code_base = prog->res->start; + + nvc0_m2mf_push_linear(nvc0, nvc0->screen->text, NOUVEAU_BO_VRAM, + prog->code_base, NVC0_SHADER_HEADER_SIZE, prog->hdr); + nvc0_m2mf_push_linear(nvc0, nvc0->screen->text, NOUVEAU_BO_VRAM, + prog->code_base + NVC0_SHADER_HEADER_SIZE, + prog->code_size, prog->code); + + BEGIN_RING(nvc0->screen->base.channel, RING_3D_(0x021c), 1); + OUT_RING (nvc0->screen->base.channel, 0x1111); + + return TRUE; +} + +void +nvc0_vertprog_validate(struct nvc0_context *nvc0) +{ + struct nouveau_channel *chan = nvc0->screen->base.channel; + struct nvc0_program *vp = nvc0->vertprog; + + if (nvc0->clip.nr > vp->vp.num_ucps) { + assert(nvc0->clip.nr <= 6); + vp->vp.num_ucps = 6; + + if (vp->translated) + nvc0_program_destroy(nvc0, vp); + } + + if (!nvc0_program_validate(nvc0, vp)) + return; + + BEGIN_RING(chan, RING_3D(SP_SELECT(1)), 2); + OUT_RING (chan, 0x11); + OUT_RING (chan, vp->code_base); + BEGIN_RING(chan, RING_3D(SP_GPR_ALLOC(1)), 1); + OUT_RING (chan, vp->max_gpr); + + // BEGIN_RING(chan, RING_3D_(0x163c), 1); + // OUT_RING (chan, 0); + BEGIN_RING(chan, RING_3D(VERT_COLOR_CLAMP_EN), 1); + OUT_RING (chan, 1); +} + +void +nvc0_fragprog_validate(struct nvc0_context *nvc0) +{ + struct nouveau_channel *chan = nvc0->screen->base.channel; + struct nvc0_program *fp = nvc0->fragprog; + + if (!nvc0_program_validate(nvc0, fp)) + return; + + BEGIN_RING(chan, RING_3D(EARLY_FRAGMENT_TESTS), 1); + OUT_RING (chan, fp->fp.early_z); + BEGIN_RING(chan, RING_3D(SP_SELECT(5)), 2); + OUT_RING (chan, 0x51); + OUT_RING (chan, fp->code_base); + BEGIN_RING(chan, RING_3D(SP_GPR_ALLOC(5)), 1); + OUT_RING (chan, fp->max_gpr); + + BEGIN_RING(chan, RING_3D_(0x0360), 2); + OUT_RING (chan, 0x20164010); + OUT_RING (chan, 0x20); + BEGIN_RING(chan, RING_3D_(0x196c), 1); + OUT_RING (chan, fp->flags[0]); +} + +void +nvc0_tctlprog_validate(struct nvc0_context *nvc0) +{ + struct nouveau_channel *chan = nvc0->screen->base.channel; + struct nvc0_program *tp = nvc0->tctlprog; + + if (!tp) { + BEGIN_RING(chan, RING_3D(SP_SELECT(2)), 1); + OUT_RING (chan, 0x20); + return; + } + if (!nvc0_program_validate(nvc0, tp)) + return; + + BEGIN_RING(chan, RING_3D(SP_SELECT(2)), 2); + OUT_RING (chan, 0x21); + OUT_RING (chan, tp->code_base); + BEGIN_RING(chan, RING_3D(SP_GPR_ALLOC(2)), 1); + OUT_RING (chan, tp->max_gpr); +} + +void +nvc0_tevlprog_validate(struct nvc0_context *nvc0) +{ + struct nouveau_channel *chan = nvc0->screen->base.channel; + struct nvc0_program *tp = nvc0->tevlprog; + + if (!tp) { + BEGIN_RING(chan, RING_3D(TEP_SELECT), 1); + OUT_RING (chan, 0x30); + return; + } + if (!nvc0_program_validate(nvc0, tp)) + return; + + BEGIN_RING(chan, RING_3D(TEP_SELECT), 1); + OUT_RING (chan, 0x31); + BEGIN_RING(chan, RING_3D(SP_START_ID(3)), 1); + OUT_RING (chan, tp->code_base); + BEGIN_RING(chan, RING_3D(SP_GPR_ALLOC(3)), 1); + OUT_RING (chan, tp->max_gpr); +} + +void +nvc0_gmtyprog_validate(struct nvc0_context *nvc0) +{ + struct nouveau_channel *chan = nvc0->screen->base.channel; + struct nvc0_program *gp = nvc0->gmtyprog; + + if (!gp) { + BEGIN_RING(chan, RING_3D(GP_SELECT), 1); + OUT_RING (chan, 0x40); + return; + } + if (!nvc0_program_validate(nvc0, gp)) + return; + + BEGIN_RING(chan, RING_3D(GP_SELECT), 1); + OUT_RING (chan, 0x41); + BEGIN_RING(chan, RING_3D(SP_START_ID(4)), 1); + OUT_RING (chan, gp->code_base); + BEGIN_RING(chan, RING_3D(SP_GPR_ALLOC(4)), 1); + OUT_RING (chan, gp->max_gpr); +} diff --git a/src/gallium/drivers/nvc0/nvc0_state.c b/src/gallium/drivers/nvc0/nvc0_state.c new file mode 100644 index 00000000000..c08f3693f5e --- /dev/null +++ b/src/gallium/drivers/nvc0/nvc0_state.c @@ -0,0 +1,865 @@ +/* + * Copyright 2010 Christoph Bumiller + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF + * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "pipe/p_defines.h" +#include "util/u_inlines.h" + +#include "tgsi/tgsi_parse.h" + +#include "nvc0_stateobj.h" +#include "nvc0_context.h" + +#include "nvc0_3d.xml.h" +#include "nv50_texture.xml.h" + +#include "nouveau/nouveau_gldefs.h" + +static INLINE uint32_t +nvc0_colormask(unsigned mask) +{ + uint32_t ret = 0; + + if (mask & PIPE_MASK_R) + ret |= 0x0001; + if (mask & PIPE_MASK_G) + ret |= 0x0010; + if (mask & PIPE_MASK_B) + ret |= 0x0100; + if (mask & PIPE_MASK_A) + ret |= 0x1000; + + return ret; +} + +static INLINE uint32_t +nvc0_blend_fac(unsigned factor) +{ + static const uint16_t bf[] = { + NV50_3D_BLEND_FACTOR_ZERO, /* 0x00 */ + NV50_3D_BLEND_FACTOR_ONE, + NV50_3D_BLEND_FACTOR_SRC_COLOR, + NV50_3D_BLEND_FACTOR_SRC_ALPHA, + NV50_3D_BLEND_FACTOR_DST_ALPHA, + NV50_3D_BLEND_FACTOR_DST_COLOR, + NV50_3D_BLEND_FACTOR_SRC_ALPHA_SATURATE, + NV50_3D_BLEND_FACTOR_CONSTANT_COLOR, + NV50_3D_BLEND_FACTOR_CONSTANT_ALPHA, + NV50_3D_BLEND_FACTOR_SRC1_COLOR, + NV50_3D_BLEND_FACTOR_SRC1_ALPHA, + NV50_3D_BLEND_FACTOR_ZERO, /* 0x0b */ + NV50_3D_BLEND_FACTOR_ZERO, /* 0x0c */ + NV50_3D_BLEND_FACTOR_ZERO, /* 0x0d */ + NV50_3D_BLEND_FACTOR_ZERO, /* 0x0e */ + NV50_3D_BLEND_FACTOR_ZERO, /* 0x0f */ + NV50_3D_BLEND_FACTOR_ZERO, /* 0x10 */ + NV50_3D_BLEND_FACTOR_ZERO, /* 0x11 */ + NV50_3D_BLEND_FACTOR_ONE_MINUS_SRC_COLOR, + NV50_3D_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA, + NV50_3D_BLEND_FACTOR_ONE_MINUS_DST_ALPHA, + NV50_3D_BLEND_FACTOR_ONE_MINUS_DST_COLOR, + NV50_3D_BLEND_FACTOR_ONE_MINUS_CONSTANT_COLOR, + NV50_3D_BLEND_FACTOR_ONE_MINUS_CONSTANT_ALPHA, + NV50_3D_BLEND_FACTOR_ONE_MINUS_SRC1_COLOR, + NV50_3D_BLEND_FACTOR_ONE_MINUS_SRC1_ALPHA + }; + + assert(factor < (sizeof(bf) / sizeof(bf[0]))); + return bf[factor]; +} + +static void * +nvc0_blend_state_create(struct pipe_context *pipe, + const struct pipe_blend_state *cso) +{ + struct nvc0_blend_stateobj *so = CALLOC_STRUCT(nvc0_blend_stateobj); + int i; + + so->pipe = *cso; + + SB_IMMED_3D(so, BLEND_INDEPENDENT, cso->independent_blend_enable); + + if (!cso->independent_blend_enable) { + SB_BEGIN_3D(so, BLEND_ENABLES, 1); + SB_DATA (so, cso->rt[0].blend_enable ? 0xff : 0); + + if (cso->rt[0].blend_enable) { + SB_BEGIN_3D(so, BLEND_EQUATION_RGB, 5); + SB_DATA (so, nvgl_blend_eqn(cso->rt[0].rgb_func)); + SB_DATA (so, nvc0_blend_fac(cso->rt[0].rgb_src_factor)); + SB_DATA (so, nvc0_blend_fac(cso->rt[0].rgb_dst_factor)); + SB_DATA (so, nvgl_blend_eqn(cso->rt[0].alpha_func)); + SB_DATA (so, nvc0_blend_fac(cso->rt[0].alpha_src_factor)); + SB_BEGIN_3D(so, BLEND_FUNC_DST_ALPHA, 1); + SB_DATA (so, nvc0_blend_fac(cso->rt[0].alpha_dst_factor)); + } + + SB_BEGIN_3D(so, COLOR_MASK_BROADCAST, 1); + SB_DATA (so, nvc0_colormask(cso->rt[0].colormask)); + } else { + uint8_t en = 0; + + for (i = 0; i < 8; ++i) { + if (!cso->rt[i].blend_enable) + continue; + en |= 1 << i; + + SB_BEGIN_3D(so, IBLEND_EQUATION_RGB(i), 6); + SB_DATA (so, nvgl_blend_eqn(cso->rt[i].rgb_func)); + SB_DATA (so, nvc0_blend_fac(cso->rt[i].rgb_src_factor)); + SB_DATA (so, nvc0_blend_fac(cso->rt[i].rgb_dst_factor)); + SB_DATA (so, nvgl_blend_eqn(cso->rt[i].alpha_func)); + SB_DATA (so, nvc0_blend_fac(cso->rt[i].alpha_src_factor)); + SB_DATA (so, nvc0_blend_fac(cso->rt[i].alpha_dst_factor)); + } + SB_BEGIN_3D(so, BLEND_ENABLES, 1); + SB_DATA (so, en); + + SB_BEGIN_3D(so, COLOR_MASK(0), 8); + for (i = 0; i < 8; ++i) + SB_DATA(so, nvc0_colormask(cso->rt[i].colormask)); + } + + if (cso->logicop_enable) { + SB_BEGIN_3D(so, LOGIC_OP_ENABLE, 2); + SB_DATA (so, 1); + SB_DATA (so, nvgl_logicop_func(cso->logicop_func)); + } else { + SB_IMMED_3D(so, LOGIC_OP_ENABLE, 0); + } + + assert(so->size < (sizeof(so->state) / sizeof(so->state[0]))); + return so; +} + +static void +nvc0_blend_state_bind(struct pipe_context *pipe, void *hwcso) +{ + struct nvc0_context *nvc0 = nvc0_context(pipe); + + nvc0->blend = hwcso; + nvc0->dirty |= NVC0_NEW_BLEND; +} + +static void +nvc0_blend_state_delete(struct pipe_context *pipe, void *hwcso) +{ + FREE(hwcso); +} + +static void * +nvc0_rasterizer_state_create(struct pipe_context *pipe, + const struct pipe_rasterizer_state *cso) +{ + struct nvc0_rasterizer_stateobj *so; + + so = CALLOC_STRUCT(nvc0_rasterizer_stateobj); + if (!so) + return NULL; + so->pipe = *cso; + +#ifndef NVC0_SCISSORS_CLIPPING + SB_IMMED_3D(so, SCISSOR_ENABLE(0), cso->scissor); +#endif + + SB_BEGIN_3D(so, SHADE_MODEL, 1); + SB_DATA (so, cso->flatshade ? NVC0_3D_SHADE_MODEL_FLAT : + NVC0_3D_SHADE_MODEL_SMOOTH); + SB_IMMED_3D(so, PROVOKING_VERTEX_LAST, !cso->flatshade_first); + SB_IMMED_3D(so, VERTEX_TWO_SIDE_ENABLE, cso->light_twoside); + + SB_BEGIN_3D(so, LINE_WIDTH, 1); + SB_DATA (so, fui(cso->line_width)); + SB_IMMED_3D(so, LINE_SMOOTH_ENABLE, cso->line_smooth); + + SB_BEGIN_3D(so, LINE_STIPPLE_ENABLE, 1); + if (cso->line_stipple_enable) { + SB_DATA (so, 1); + SB_BEGIN_3D(so, LINE_STIPPLE_PATTERN, 1); + SB_DATA (so, (cso->line_stipple_pattern << 8) | + cso->line_stipple_factor); + + } else { + SB_DATA (so, 0); + } + + SB_IMMED_3D(so, VP_POINT_SIZE_EN, cso->point_size_per_vertex); + if (!cso->point_size_per_vertex) { + SB_BEGIN_3D(so, POINT_SIZE, 1); + SB_DATA (so, fui(cso->point_size)); + } + SB_IMMED_3D(so, POINT_SPRITE_ENABLE, cso->point_quad_rasterization); + SB_IMMED_3D(so, POINT_SMOOTH_ENABLE, cso->point_smooth); + + SB_BEGIN_3D(so, POLYGON_MODE_FRONT, 1); + SB_DATA (so, nvgl_polygon_mode(cso->fill_front)); + SB_BEGIN_3D(so, POLYGON_MODE_BACK, 1); + SB_DATA (so, nvgl_polygon_mode(cso->fill_back)); + SB_IMMED_3D(so, POLYGON_SMOOTH_ENABLE, cso->poly_smooth); + + SB_BEGIN_3D(so, CULL_FACE_ENABLE, 3); + SB_DATA (so, cso->cull_face != PIPE_FACE_NONE); + SB_DATA (so, cso->front_ccw ? NVC0_3D_FRONT_FACE_CCW : + NVC0_3D_FRONT_FACE_CW); + switch (cso->cull_face) { + case PIPE_FACE_FRONT_AND_BACK: + SB_DATA(so, NVC0_3D_CULL_FACE_FRONT_AND_BACK); + break; + case PIPE_FACE_FRONT: + SB_DATA(so, NVC0_3D_CULL_FACE_FRONT); + break; + case PIPE_FACE_BACK: + default: + SB_DATA(so, NVC0_3D_CULL_FACE_BACK); + break; + } + + SB_IMMED_3D(so, POLYGON_STIPPLE_ENABLE, cso->poly_stipple_enable); + SB_BEGIN_3D(so, POLYGON_OFFSET_POINT_ENABLE, 3); + SB_DATA (so, cso->offset_point); + SB_DATA (so, cso->offset_line); + SB_DATA (so, cso->offset_tri); + + if (cso->offset_point || cso->offset_line || cso->offset_tri) { + SB_BEGIN_3D(so, POLYGON_OFFSET_FACTOR, 1); + SB_DATA (so, fui(cso->offset_scale)); + SB_BEGIN_3D(so, POLYGON_OFFSET_UNITS, 1); + SB_DATA (so, fui(cso->offset_units)); /* XXX: multiply by 2 ? */ + } + + assert(so->size < (sizeof(so->state) / sizeof(so->state[0]))); + return (void *)so; +} + +static void +nvc0_rasterizer_state_bind(struct pipe_context *pipe, void *hwcso) +{ + struct nvc0_context *nvc0 = nvc0_context(pipe); + + nvc0->rast = hwcso; + nvc0->dirty |= NVC0_NEW_RASTERIZER; +} + +static void +nvc0_rasterizer_state_delete(struct pipe_context *pipe, void *hwcso) +{ + FREE(hwcso); +} + +static void * +nvc0_zsa_state_create(struct pipe_context *pipe, + const struct pipe_depth_stencil_alpha_state *cso) +{ + struct nvc0_zsa_stateobj *so = CALLOC_STRUCT(nvc0_zsa_stateobj); + + so->pipe = *cso; + + SB_IMMED_3D(so, DEPTH_WRITE_ENABLE, cso->depth.writemask); + SB_BEGIN_3D(so, DEPTH_TEST_ENABLE, 1); + if (cso->depth.enabled) { + SB_DATA (so, 1); + SB_BEGIN_3D(so, DEPTH_TEST_FUNC, 1); + SB_DATA (so, nvgl_comparison_op(cso->depth.func)); + } else { + SB_DATA (so, 0); + } + + if (cso->stencil[0].enabled) { + SB_BEGIN_3D(so, STENCIL_FRONT_ENABLE, 5); + SB_DATA (so, 1); + SB_DATA (so, nvgl_stencil_op(cso->stencil[0].fail_op)); + SB_DATA (so, nvgl_stencil_op(cso->stencil[0].zfail_op)); + SB_DATA (so, nvgl_stencil_op(cso->stencil[0].zpass_op)); + SB_DATA (so, nvgl_comparison_op(cso->stencil[0].func)); + SB_BEGIN_3D(so, STENCIL_FRONT_MASK, 2); + SB_DATA (so, cso->stencil[0].writemask); + SB_DATA (so, cso->stencil[0].valuemask); + } else { + SB_IMMED_3D(so, STENCIL_FRONT_ENABLE, 0); + } + + if (cso->stencil[1].enabled) { + SB_BEGIN_3D(so, STENCIL_TWO_SIDE_ENABLE, 5); + SB_DATA (so, 1); + SB_DATA (so, nvgl_stencil_op(cso->stencil[1].fail_op)); + SB_DATA (so, nvgl_stencil_op(cso->stencil[1].zfail_op)); + SB_DATA (so, nvgl_stencil_op(cso->stencil[1].zpass_op)); + SB_DATA (so, nvgl_comparison_op(cso->stencil[1].func)); + SB_BEGIN_3D(so, STENCIL_BACK_MASK, 2); + SB_DATA (so, cso->stencil[1].writemask); + SB_DATA (so, cso->stencil[1].valuemask); + } else { + SB_IMMED_3D(so, STENCIL_TWO_SIDE_ENABLE, 0); + } + + SB_BEGIN_3D(so, ALPHA_TEST_ENABLE, 1); + if (cso->alpha.enabled) { + SB_DATA (so, 1); + SB_BEGIN_3D(so, ALPHA_TEST_REF, 2); + SB_DATA (so, fui(cso->alpha.ref_value)); + SB_DATA (so, nvgl_comparison_op(cso->alpha.func)); + } else { + SB_DATA (so, 0); + } + + assert(so->size < (sizeof(so->state) / sizeof(so->state[0]))); + return (void *)so; +} + +static void +nvc0_zsa_state_bind(struct pipe_context *pipe, void *hwcso) +{ + struct nvc0_context *nvc0 = nvc0_context(pipe); + + nvc0->zsa = hwcso; + nvc0->dirty |= NVC0_NEW_ZSA; +} + +static void +nvc0_zsa_state_delete(struct pipe_context *pipe, void *hwcso) +{ + FREE(hwcso); +} + +/* ====================== SAMPLERS AND TEXTURES ================================ + */ + +#define NV50_TSC_WRAP_CASE(n) \ + case PIPE_TEX_WRAP_##n: return NV50_TSC_WRAP_##n + +static INLINE unsigned +nv50_tsc_wrap_mode(unsigned wrap) +{ + switch (wrap) { + NV50_TSC_WRAP_CASE(REPEAT); + NV50_TSC_WRAP_CASE(MIRROR_REPEAT); + NV50_TSC_WRAP_CASE(CLAMP_TO_EDGE); + NV50_TSC_WRAP_CASE(CLAMP_TO_BORDER); + NV50_TSC_WRAP_CASE(CLAMP); + NV50_TSC_WRAP_CASE(MIRROR_CLAMP_TO_EDGE); + NV50_TSC_WRAP_CASE(MIRROR_CLAMP_TO_BORDER); + NV50_TSC_WRAP_CASE(MIRROR_CLAMP); + default: + NOUVEAU_ERR("unknown wrap mode: %d\n", wrap); + return NV50_TSC_WRAP_REPEAT; + } +} + +static void * +nvc0_sampler_state_create(struct pipe_context *pipe, + const struct pipe_sampler_state *cso) +{ + struct nvc0_tsc_entry *so = CALLOC_STRUCT(nvc0_tsc_entry); + float f[2]; + + so->id = -1; + + so->tsc[0] = (0x00026000 | + (nv50_tsc_wrap_mode(cso->wrap_s) << 0) | + (nv50_tsc_wrap_mode(cso->wrap_t) << 3) | + (nv50_tsc_wrap_mode(cso->wrap_r) << 6)); + + switch (cso->mag_img_filter) { + case PIPE_TEX_FILTER_LINEAR: + so->tsc[1] |= NV50_TSC_1_MAGF_LINEAR; + break; + case PIPE_TEX_FILTER_NEAREST: + default: + so->tsc[1] |= NV50_TSC_1_MAGF_NEAREST; + break; + } + + switch (cso->min_img_filter) { + case PIPE_TEX_FILTER_LINEAR: + so->tsc[1] |= NV50_TSC_1_MINF_LINEAR; + break; + case PIPE_TEX_FILTER_NEAREST: + default: + so->tsc[1] |= NV50_TSC_1_MINF_NEAREST; + break; + } + + switch (cso->min_mip_filter) { + case PIPE_TEX_MIPFILTER_LINEAR: + so->tsc[1] |= NV50_TSC_1_MIPF_LINEAR; + break; + case PIPE_TEX_MIPFILTER_NEAREST: + so->tsc[1] |= NV50_TSC_1_MIPF_NEAREST; + break; + case PIPE_TEX_MIPFILTER_NONE: + default: + so->tsc[1] |= NV50_TSC_1_MIPF_NONE; + break; + } + + if (cso->max_anisotropy >= 16) + so->tsc[0] |= (7 << 20); + else + if (cso->max_anisotropy >= 12) + so->tsc[0] |= (6 << 20); + else { + so->tsc[0] |= (cso->max_anisotropy >> 1) << 20; + + if (cso->max_anisotropy >= 4) + so->tsc[1] |= NV50_TSC_1_UNKN_ANISO_35; + else + if (cso->max_anisotropy >= 2) + so->tsc[1] |= NV50_TSC_1_UNKN_ANISO_15; + } + + if (cso->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE) { + /* NOTE: must be deactivated for non-shadow textures */ + so->tsc[0] |= (1 << 9); + so->tsc[0] |= (nvgl_comparison_op(cso->compare_func) & 0x7) << 10; + } + + f[0] = CLAMP(cso->lod_bias, -16.0f, 15.0f); + so->tsc[1] |= ((int)(f[0] * 256.0f) & 0x1fff) << 12; + + f[0] = CLAMP(cso->min_lod, 0.0f, 15.0f); + f[1] = CLAMP(cso->max_lod, 0.0f, 15.0f); + so->tsc[2] |= + (((int)(f[1] * 256.0f) & 0xfff) << 12) | ((int)(f[0] * 256.0f) & 0xfff); + + so->tsc[4] = fui(cso->border_color[0]); + so->tsc[5] = fui(cso->border_color[1]); + so->tsc[6] = fui(cso->border_color[2]); + so->tsc[7] = fui(cso->border_color[3]); + + return (void *)so; +} + +static void +nvc0_sampler_state_delete(struct pipe_context *pipe, void *hwcso) +{ + unsigned s, i; + + for (s = 0; s < 5; ++s) + for (i = 0; i < nvc0_context(pipe)->num_samplers[s]; ++i) + if (nvc0_context(pipe)->samplers[s][i] == hwcso) + nvc0_context(pipe)->samplers[s][i] = NULL; + + nvc0_screen_tsc_free(nvc0_context(pipe)->screen, nvc0_tsc_entry(hwcso)); + + FREE(hwcso); +} + +static INLINE void +nvc0_stage_sampler_states_bind(struct nvc0_context *nvc0, int s, + unsigned nr, void **hwcso) +{ + unsigned i; + + for (i = 0; i < nr; ++i) { + struct nvc0_tsc_entry *old = nvc0->samplers[s][i]; + + nvc0->samplers[s][i] = nvc0_tsc_entry(hwcso[i]); + if (old) + nvc0_screen_tsc_unlock(nvc0->screen, old); + } + for (; i < nvc0->num_samplers[s]; ++i) + if (nvc0->samplers[s][i]) + nvc0_screen_tsc_unlock(nvc0->screen, nvc0->samplers[s][i]); + + nvc0->num_samplers[s] = nr; + + nvc0->dirty |= NVC0_NEW_SAMPLERS; +} + +static void +nvc0_vp_sampler_states_bind(struct pipe_context *pipe, unsigned nr, void **s) +{ + nvc0_stage_sampler_states_bind(nvc0_context(pipe), 0, nr, s); +} + +static void +nvc0_fp_sampler_states_bind(struct pipe_context *pipe, unsigned nr, void **s) +{ + nvc0_stage_sampler_states_bind(nvc0_context(pipe), 4, nr, s); +} + +static void +nvc0_gp_sampler_states_bind(struct pipe_context *pipe, unsigned nr, void **s) +{ + nvc0_stage_sampler_states_bind(nvc0_context(pipe), 3, nr, s); +} + +/* NOTE: only called when not referenced anywhere, won't be bound */ +static void +nvc0_sampler_view_destroy(struct pipe_context *pipe, + struct pipe_sampler_view *view) +{ + pipe_resource_reference(&view->texture, NULL); + + nvc0_screen_tic_free(nvc0_context(pipe)->screen, nvc0_tic_entry(view)); + + FREE(nvc0_tic_entry(view)); +} + +static INLINE void +nvc0_stage_set_sampler_views(struct nvc0_context *nvc0, int s, + unsigned nr, + struct pipe_sampler_view **views) +{ + unsigned i; + + for (i = 0; i < nr; ++i) { + struct nvc0_tic_entry *old = nvc0_tic_entry(nvc0->textures[s][i]); + if (old) + nvc0_screen_tic_unlock(nvc0->screen, old); + + pipe_sampler_view_reference(&nvc0->textures[s][i], views[i]); + } + + for (i = nr; i < nvc0->num_textures[s]; ++i) { + struct nvc0_tic_entry *old = nvc0_tic_entry(nvc0->textures[s][i]); + if (!old) + continue; + nvc0_screen_tic_unlock(nvc0->screen, old); + + pipe_sampler_view_reference(&nvc0->textures[s][i], NULL); + } + + nvc0->num_textures[s] = nr; + + nvc0_bufctx_reset(nvc0, NVC0_BUFCTX_TEXTURES); + + nvc0->dirty |= NVC0_NEW_TEXTURES; +} + +static void +nvc0_vp_set_sampler_views(struct pipe_context *pipe, + unsigned nr, + struct pipe_sampler_view **views) +{ + nvc0_stage_set_sampler_views(nvc0_context(pipe), 0, nr, views); +} + +static void +nvc0_fp_set_sampler_views(struct pipe_context *pipe, + unsigned nr, + struct pipe_sampler_view **views) +{ + nvc0_stage_set_sampler_views(nvc0_context(pipe), 4, nr, views); +} + +static void +nvc0_gp_set_sampler_views(struct pipe_context *pipe, + unsigned nr, + struct pipe_sampler_view **views) +{ + nvc0_stage_set_sampler_views(nvc0_context(pipe), 3, nr, views); +} + +/* ============================= SHADERS ======================================= + */ + +static void * +nvc0_sp_state_create(struct pipe_context *pipe, + const struct pipe_shader_state *cso, unsigned type) +{ + struct nvc0_program *prog; + + prog = CALLOC_STRUCT(nvc0_program); + if (!prog) + return NULL; + + prog->type = type; + prog->pipe.tokens = tgsi_dup_tokens(cso->tokens); + + return (void *)prog; +} + +static void +nvc0_sp_state_delete(struct pipe_context *pipe, void *hwcso) +{ + struct nvc0_program *prog = (struct nvc0_program *)hwcso; + + nvc0_program_destroy(nvc0_context(pipe), prog); + + FREE((void *)prog->pipe.tokens); + FREE(prog); +} + +static void * +nvc0_vp_state_create(struct pipe_context *pipe, + const struct pipe_shader_state *cso) +{ + return nvc0_sp_state_create(pipe, cso, PIPE_SHADER_VERTEX); +} + +static void +nvc0_vp_state_bind(struct pipe_context *pipe, void *hwcso) +{ + struct nvc0_context *nvc0 = nvc0_context(pipe); + + nvc0->vertprog = hwcso; + nvc0->dirty |= NVC0_NEW_VERTPROG; +} + +static void * +nvc0_fp_state_create(struct pipe_context *pipe, + const struct pipe_shader_state *cso) +{ + return nvc0_sp_state_create(pipe, cso, PIPE_SHADER_FRAGMENT); +} + +static void +nvc0_fp_state_bind(struct pipe_context *pipe, void *hwcso) +{ + struct nvc0_context *nvc0 = nvc0_context(pipe); + + nvc0->fragprog = hwcso; + nvc0->dirty |= NVC0_NEW_FRAGPROG; +} + +static void * +nvc0_gp_state_create(struct pipe_context *pipe, + const struct pipe_shader_state *cso) +{ + return nvc0_sp_state_create(pipe, cso, PIPE_SHADER_GEOMETRY); +} + +static void +nvc0_gp_state_bind(struct pipe_context *pipe, void *hwcso) +{ + struct nvc0_context *nvc0 = nvc0_context(pipe); + + nvc0->gmtyprog = hwcso; + nvc0->dirty |= NVC0_NEW_GMTYPROG; +} + +static void +nvc0_set_constant_buffer(struct pipe_context *pipe, uint shader, uint index, + struct pipe_resource *res) +{ + struct nvc0_context *nvc0 = nvc0_context(pipe); + + switch (shader) { + case PIPE_SHADER_VERTEX: shader = 0; break; + /* + case PIPE_SHADER_TESSELLATION_CONTROL: shader = 1; break; + case PIPE_SHADER_TESSELLATION_EVALUATION: shader = 2; break; + */ + case PIPE_SHADER_GEOMETRY: shader = 3; break; + case PIPE_SHADER_FRAGMENT: shader = 4; break; + default: + assert(0); + break; + } + + if (nvc0->constbuf[shader][index]) + nvc0_bufctx_del_resident(nvc0, NVC0_BUFCTX_CONSTANT, + nvc0_resource( + nvc0->constbuf[shader][index])); + + pipe_resource_reference(&nvc0->constbuf[shader][index], res); + + nvc0->constbuf_dirty[shader] |= 1 << index; + + nvc0->dirty |= NVC0_NEW_CONSTBUF; +} + +/* ============================================================================= + */ + +static void +nvc0_set_blend_color(struct pipe_context *pipe, + const struct pipe_blend_color *bcol) +{ + struct nvc0_context *nvc0 = nvc0_context(pipe); + + nvc0->blend_colour = *bcol; + nvc0->dirty |= NVC0_NEW_BLEND_COLOUR; +} + +static void +nvc0_set_stencil_ref(struct pipe_context *pipe, + const struct pipe_stencil_ref *sr) +{ + struct nvc0_context *nvc0 = nvc0_context(pipe); + + nvc0->stencil_ref = *sr; + nvc0->dirty |= NVC0_NEW_STENCIL_REF; +} + +static void +nvc0_set_clip_state(struct pipe_context *pipe, + const struct pipe_clip_state *clip) +{ + struct nvc0_context *nvc0 = nvc0_context(pipe); + const unsigned size = clip->nr * sizeof(clip->ucp[0]); + + memcpy(&nvc0->clip.ucp[0][0], &clip->ucp[0][0], size); + nvc0->clip.nr = clip->nr; + + nvc0->clip.depth_clamp = clip->depth_clamp; + + nvc0->dirty |= NVC0_NEW_CLIP; +} + +static void +nvc0_set_sample_mask(struct pipe_context *pipe, unsigned sample_mask) +{ + struct nvc0_context *nvc0 = nvc0_context(pipe); + + nvc0->sample_mask = sample_mask; + nvc0->dirty |= NVC0_NEW_SAMPLE_MASK; +} + + +static void +nvc0_set_framebuffer_state(struct pipe_context *pipe, + const struct pipe_framebuffer_state *fb) +{ + struct nvc0_context *nvc0 = nvc0_context(pipe); + + nvc0->framebuffer = *fb; + nvc0->dirty |= NVC0_NEW_FRAMEBUFFER; +} + +static void +nvc0_set_polygon_stipple(struct pipe_context *pipe, + const struct pipe_poly_stipple *stipple) +{ + struct nvc0_context *nvc0 = nvc0_context(pipe); + + nvc0->stipple = *stipple; + nvc0->dirty |= NVC0_NEW_STIPPLE; +} + +static void +nvc0_set_scissor_state(struct pipe_context *pipe, + const struct pipe_scissor_state *scissor) +{ + struct nvc0_context *nvc0 = nvc0_context(pipe); + + nvc0->scissor = *scissor; + nvc0->dirty |= NVC0_NEW_SCISSOR; +} + +static void +nvc0_set_viewport_state(struct pipe_context *pipe, + const struct pipe_viewport_state *vpt) +{ + struct nvc0_context *nvc0 = nvc0_context(pipe); + + nvc0->viewport = *vpt; + nvc0->dirty |= NVC0_NEW_VIEWPORT; +} + +static void +nvc0_set_vertex_buffers(struct pipe_context *pipe, + unsigned count, + const struct pipe_vertex_buffer *vb) +{ + struct nvc0_context *nvc0 = nvc0_context(pipe); + unsigned i; + + for (i = 0; i < count; ++i) + pipe_resource_reference(&nvc0->vtxbuf[i].buffer, vb[i].buffer); + for (; i < nvc0->num_vtxbufs; ++i) + pipe_resource_reference(&nvc0->vtxbuf[i].buffer, NULL); + + memcpy(nvc0->vtxbuf, vb, sizeof(*vb) * count); + nvc0->num_vtxbufs = count; + + nvc0_bufctx_reset(nvc0, NVC0_BUFCTX_VERTEX); + + nvc0->dirty |= NVC0_NEW_ARRAYS; +} + +static void +nvc0_set_index_buffer(struct pipe_context *pipe, + const struct pipe_index_buffer *ib) +{ + struct nvc0_context *nvc0 = nvc0_context(pipe); + + if (ib) + memcpy(&nvc0->idxbuf, ib, sizeof(nvc0->idxbuf)); + else + nvc0->idxbuf.buffer = NULL; +} + +static void +nvc0_vertex_state_bind(struct pipe_context *pipe, void *hwcso) +{ + struct nvc0_context *nvc0 = nvc0_context(pipe); + + nvc0->vertex = hwcso; + nvc0->dirty |= NVC0_NEW_VERTEX; +} + +void +nvc0_init_state_functions(struct nvc0_context *nvc0) +{ + nvc0->pipe.create_blend_state = nvc0_blend_state_create; + nvc0->pipe.bind_blend_state = nvc0_blend_state_bind; + nvc0->pipe.delete_blend_state = nvc0_blend_state_delete; + + nvc0->pipe.create_rasterizer_state = nvc0_rasterizer_state_create; + nvc0->pipe.bind_rasterizer_state = nvc0_rasterizer_state_bind; + nvc0->pipe.delete_rasterizer_state = nvc0_rasterizer_state_delete; + + nvc0->pipe.create_depth_stencil_alpha_state = nvc0_zsa_state_create; + nvc0->pipe.bind_depth_stencil_alpha_state = nvc0_zsa_state_bind; + nvc0->pipe.delete_depth_stencil_alpha_state = nvc0_zsa_state_delete; + + nvc0->pipe.create_sampler_state = nvc0_sampler_state_create; + nvc0->pipe.delete_sampler_state = nvc0_sampler_state_delete; + nvc0->pipe.bind_vertex_sampler_states = nvc0_vp_sampler_states_bind; + nvc0->pipe.bind_fragment_sampler_states = nvc0_fp_sampler_states_bind; + nvc0->pipe.bind_geometry_sampler_states = nvc0_gp_sampler_states_bind; + + nvc0->pipe.create_sampler_view = nvc0_create_sampler_view; + nvc0->pipe.sampler_view_destroy = nvc0_sampler_view_destroy; + nvc0->pipe.set_vertex_sampler_views = nvc0_vp_set_sampler_views; + nvc0->pipe.set_fragment_sampler_views = nvc0_fp_set_sampler_views; + nvc0->pipe.set_geometry_sampler_views = nvc0_gp_set_sampler_views; + + nvc0->pipe.create_vs_state = nvc0_vp_state_create; + nvc0->pipe.create_fs_state = nvc0_fp_state_create; + nvc0->pipe.create_gs_state = nvc0_gp_state_create; + nvc0->pipe.bind_vs_state = nvc0_vp_state_bind; + nvc0->pipe.bind_fs_state = nvc0_fp_state_bind; + nvc0->pipe.bind_gs_state = nvc0_gp_state_bind; + nvc0->pipe.delete_vs_state = nvc0_sp_state_delete; + nvc0->pipe.delete_fs_state = nvc0_sp_state_delete; + nvc0->pipe.delete_gs_state = nvc0_sp_state_delete; + + nvc0->pipe.set_blend_color = nvc0_set_blend_color; + nvc0->pipe.set_stencil_ref = nvc0_set_stencil_ref; + nvc0->pipe.set_clip_state = nvc0_set_clip_state; + nvc0->pipe.set_sample_mask = nvc0_set_sample_mask; + nvc0->pipe.set_constant_buffer = nvc0_set_constant_buffer; + nvc0->pipe.set_framebuffer_state = nvc0_set_framebuffer_state; + nvc0->pipe.set_polygon_stipple = nvc0_set_polygon_stipple; + nvc0->pipe.set_scissor_state = nvc0_set_scissor_state; + nvc0->pipe.set_viewport_state = nvc0_set_viewport_state; + + nvc0->pipe.create_vertex_elements_state = nvc0_vertex_state_create; + nvc0->pipe.delete_vertex_elements_state = nvc0_vertex_state_delete; + nvc0->pipe.bind_vertex_elements_state = nvc0_vertex_state_bind; + + nvc0->pipe.set_vertex_buffers = nvc0_set_vertex_buffers; + nvc0->pipe.set_index_buffer = nvc0_set_index_buffer; +} + diff --git a/src/gallium/drivers/nvc0/nvc0_state_validate.c b/src/gallium/drivers/nvc0/nvc0_state_validate.c new file mode 100644 index 00000000000..25aec0244db --- /dev/null +++ b/src/gallium/drivers/nvc0/nvc0_state_validate.c @@ -0,0 +1,430 @@ + +#include "nvc0_context.h" +#include "os/os_time.h" + +static void +nvc0_validate_zcull(struct nvc0_context *nvc0) +{ + struct nouveau_channel *chan = nvc0->screen->base.channel; + struct pipe_framebuffer_state *fb = &nvc0->framebuffer; + struct nvc0_surface *sf = nvc0_surface(fb->zsbuf); + struct nvc0_miptree *mt = nvc0_miptree(sf->base.texture); + struct nouveau_bo *bo = mt->base.bo; + uint32_t size; + uint32_t offset = align(mt->total_size, 1 << 17); + unsigned width, height; + + assert(mt->base.base.depth0 == 1 && mt->base.base.array_size < 2); + + size = mt->total_size * 2; + + height = align(fb->height, 32); + width = fb->width % 224; + if (width) + width = fb->width + (224 - width); + else + width = fb->width; + + BEGIN_RING(chan, RING_3D_(0x1590), 1); /* ZCULL_REGION_INDEX (bits 0x3f) */ + OUT_RING (chan, 0); + BEGIN_RING(chan, RING_3D_(0x07e8), 2); /* ZCULL_ADDRESS_A_HIGH */ + OUT_RELOCh(chan, bo, offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_RDWR); + OUT_RELOCl(chan, bo, offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_RDWR); + offset += 1 << 17; + BEGIN_RING(chan, RING_3D_(0x07f0), 2); /* ZCULL_ADDRESS_B_HIGH */ + OUT_RELOCh(chan, bo, offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_RDWR); + OUT_RELOCl(chan, bo, offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_RDWR); + BEGIN_RING(chan, RING_3D_(0x07e0), 2); + OUT_RING (chan, size); + OUT_RING (chan, size >> 16); + BEGIN_RING(chan, RING_3D_(0x15c8), 1); /* bits 0x3 */ + OUT_RING (chan, 2); + BEGIN_RING(chan, RING_3D_(0x07c0), 4); /* ZCULL dimensions */ + OUT_RING (chan, width); + OUT_RING (chan, height); + OUT_RING (chan, 1); + OUT_RING (chan, 0); + BEGIN_RING(chan, RING_3D_(0x15fc), 2); + OUT_RING (chan, 0); /* bits 0xffff */ + OUT_RING (chan, 0); /* bits 0xffff */ + BEGIN_RING(chan, RING_3D_(0x1958), 1); + OUT_RING (chan, 0); /* bits ~0 */ +} + +static void +nvc0_validate_fb(struct nvc0_context *nvc0) +{ + struct nouveau_channel *chan = nvc0->screen->base.channel; + struct pipe_framebuffer_state *fb = &nvc0->framebuffer; + unsigned i; + + nvc0_bufctx_reset(nvc0, NVC0_BUFCTX_FRAME); + + BEGIN_RING(chan, RING_3D(RT_CONTROL), 1); + OUT_RING (chan, (076543210 << 4) | fb->nr_cbufs); + BEGIN_RING(chan, RING_3D(SCREEN_SCISSOR_HORIZ), 2); + OUT_RING (chan, fb->width << 16); + OUT_RING (chan, fb->height << 16); + + for (i = 0; i < fb->nr_cbufs; ++i) { + struct nvc0_miptree *mt = nvc0_miptree(fb->cbufs[i]->texture); + struct nvc0_surface *sf = nvc0_surface(fb->cbufs[i]); + struct nouveau_bo *bo = mt->base.bo; + uint32_t offset = sf->offset; + + BEGIN_RING(chan, RING_3D(RT_ADDRESS_HIGH(i)), 8); + OUT_RELOCh(chan, bo, offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_RDWR); + OUT_RELOCl(chan, bo, offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_RDWR); + OUT_RING (chan, sf->width); + OUT_RING (chan, sf->height); + OUT_RING (chan, nvc0_format_table[sf->base.format].rt); + OUT_RING (chan, (mt->layout_3d << 16) | + mt->level[sf->base.u.tex.level].tile_mode); + OUT_RING (chan, sf->depth); + OUT_RING (chan, mt->layer_stride >> 2); + + nvc0_bufctx_add_resident(nvc0, NVC0_BUFCTX_FRAME, &mt->base, + NOUVEAU_BO_VRAM | NOUVEAU_BO_RDWR); + } + + if (fb->zsbuf) { + struct nvc0_miptree *mt = nvc0_miptree(fb->zsbuf->texture); + struct nvc0_surface *sf = nvc0_surface(fb->zsbuf); + struct nouveau_bo *bo = mt->base.bo; + int unk = mt->base.base.target == PIPE_TEXTURE_2D; + uint32_t offset = sf->offset; + + BEGIN_RING(chan, RING_3D(ZETA_ADDRESS_HIGH), 5); + OUT_RELOCh(chan, bo, offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_RDWR); + OUT_RELOCl(chan, bo, offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_RDWR); + OUT_RING (chan, nvc0_format_table[fb->zsbuf->format].rt); + OUT_RING (chan, mt->level[sf->base.u.tex.level].tile_mode); + OUT_RING (chan, mt->layer_stride >> 2); + BEGIN_RING(chan, RING_3D(ZETA_ENABLE), 1); + OUT_RING (chan, 1); + BEGIN_RING(chan, RING_3D(ZETA_HORIZ), 3); + OUT_RING (chan, sf->width); + OUT_RING (chan, sf->height); + OUT_RING (chan, (unk << 16) | sf->depth); + + nvc0_bufctx_add_resident(nvc0, NVC0_BUFCTX_FRAME, &mt->base, + NOUVEAU_BO_VRAM | NOUVEAU_BO_RDWR); + } else { + BEGIN_RING(chan, RING_3D(ZETA_ENABLE), 1); + OUT_RING (chan, 0); + } + +#ifndef NVC0_SCISSORS_CLIPPING + BEGIN_RING(chan, RING_3D(VIEWPORT_HORIZ(0)), 2); + OUT_RING (chan, fb->width << 16); + OUT_RING (chan, fb->height << 16); +#endif +} + +static void +nvc0_validate_blend_colour(struct nvc0_context *nvc0) +{ + struct nouveau_channel *chan = nvc0->screen->base.channel; + + BEGIN_RING(chan, RING_3D(BLEND_COLOR(0)), 4); + OUT_RINGf (chan, nvc0->blend_colour.color[0]); + OUT_RINGf (chan, nvc0->blend_colour.color[1]); + OUT_RINGf (chan, nvc0->blend_colour.color[2]); + OUT_RINGf (chan, nvc0->blend_colour.color[3]); +} + +static void +nvc0_validate_stencil_ref(struct nvc0_context *nvc0) +{ + struct nouveau_channel *chan = nvc0->screen->base.channel; + + BEGIN_RING(chan, RING_3D(STENCIL_FRONT_FUNC_REF), 1); + OUT_RING (chan, nvc0->stencil_ref.ref_value[0]); + BEGIN_RING(chan, RING_3D(STENCIL_BACK_FUNC_REF), 1); + OUT_RING (chan, nvc0->stencil_ref.ref_value[1]); +} + +static void +nvc0_validate_stipple(struct nvc0_context *nvc0) +{ + struct nouveau_channel *chan = nvc0->screen->base.channel; + unsigned i; + + BEGIN_RING(chan, RING_3D(POLYGON_STIPPLE_PATTERN(0)), 32); + for (i = 0; i < 32; ++i) + OUT_RING(chan, util_bswap32(nvc0->stipple.stipple[i])); +} + +static void +nvc0_validate_scissor(struct nvc0_context *nvc0) +{ + struct nouveau_channel *chan = nvc0->screen->base.channel; + struct pipe_scissor_state *s = &nvc0->scissor; +#ifdef NVC0_SCISSORS_CLIPPING + struct pipe_viewport_state *vp = &nvc0->viewport; + int minx, maxx, miny, maxy; + + if (!(nvc0->dirty & + (NVC0_NEW_SCISSOR | NVC0_NEW_VIEWPORT | NVC0_NEW_FRAMEBUFFER)) && + nvc0->state.scissor == nvc0->rast->pipe.scissor) + return; + nvc0->state.scissor = nvc0->rast->pipe.scissor; + + if (nvc0->state.scissor) { + minx = s->minx; + maxx = s->maxx; + miny = s->miny; + maxy = s->maxy; + } else { + minx = 0; + maxx = nvc0->framebuffer.width; + miny = 0; + maxy = nvc0->framebuffer.height; + } + + minx = MAX2(minx, (int)(vp->translate[0] - fabsf(vp->scale[0]))); + maxx = MIN2(maxx, (int)(vp->translate[0] + fabsf(vp->scale[0]))); + miny = MAX2(miny, (int)(vp->translate[1] - fabsf(vp->scale[1]))); + maxy = MIN2(maxy, (int)(vp->translate[1] + fabsf(vp->scale[1]))); + + BEGIN_RING(chan, RING_3D(SCISSOR_HORIZ(0)), 2); + OUT_RING (chan, (maxx << 16) | minx); + OUT_RING (chan, (maxy << 16) | miny); + BEGIN_RING(chan, RING_3D(VIEWPORT_HORIZ(0)), 2); + OUT_RING (chan, ((maxx - minx) << 16) | minx); + OUT_RING (chan, ((maxy - miny) << 16) | miny); +#else + BEGIN_RING(chan, RING_3D(SCISSOR_HORIZ(0)), 2); + OUT_RING (chan, (s->maxx << 16) | s->minx); + OUT_RING (chan, (s->maxy << 16) | s->miny); +#endif +} + +static void +nvc0_validate_viewport(struct nvc0_context *nvc0) +{ + struct nouveau_channel *chan = nvc0->screen->base.channel; + + BEGIN_RING(chan, RING_3D(VIEWPORT_TRANSLATE_X(0)), 3); + OUT_RINGf (chan, nvc0->viewport.translate[0]); + OUT_RINGf (chan, nvc0->viewport.translate[1]); + OUT_RINGf (chan, nvc0->viewport.translate[2]); + BEGIN_RING(chan, RING_3D(VIEWPORT_SCALE_X(0)), 3); + OUT_RINGf (chan, nvc0->viewport.scale[0]); + OUT_RINGf (chan, nvc0->viewport.scale[1]); + OUT_RINGf (chan, nvc0->viewport.scale[2]); + +#ifdef NVC0_SCISSORS_CLIPPING + BEGIN_RING(chan, RING_3D(DEPTH_RANGE_NEAR(0)), 2); + OUT_RINGf (chan, nvc0->viewport.translate[2] - nvc0->viewport.scale[2]); + OUT_RINGf (chan, nvc0->viewport.translate[2] + nvc0->viewport.scale[2]); +#endif +} + +static void +nvc0_validate_clip(struct nvc0_context *nvc0) +{ + struct nouveau_channel *chan = nvc0->screen->base.channel; + uint32_t clip; + + clip = nvc0->clip.depth_clamp ? 0x201a : 0x0002; +#ifndef NVC0_SCISSORS_CLIPPING + clip |= 0x1080; +#endif + + BEGIN_RING(chan, RING_3D(VIEW_VOLUME_CLIP_CTRL), 1); + OUT_RING (chan, clip); + + if (nvc0->clip.nr) { + struct nouveau_bo *bo = nvc0->screen->uniforms; + + BEGIN_RING(chan, RING_3D(CB_SIZE), 3); + OUT_RING (chan, 256); + OUT_RELOCh(chan, bo, 5 << 16, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD); + OUT_RELOCl(chan, bo, 5 << 16, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD); + BEGIN_RING_1I(chan, RING_3D(CB_POS), nvc0->clip.nr * 4 + 1); + OUT_RING (chan, 0); + OUT_RINGp (chan, &nvc0->clip.ucp[0][0], nvc0->clip.nr * 4); + + BEGIN_RING(chan, RING_3D(VP_CLIP_DISTANCE_ENABLE), 1); + OUT_RING (chan, (1 << nvc0->clip.nr) - 1); + } else { + IMMED_RING(chan, RING_3D(VP_CLIP_DISTANCE_ENABLE), 0); + } +} + +static void +nvc0_validate_blend(struct nvc0_context *nvc0) +{ + struct nouveau_channel *chan = nvc0->screen->base.channel; + + WAIT_RING(chan, nvc0->blend->size); + OUT_RINGp(chan, nvc0->blend->state, nvc0->blend->size); +} + +static void +nvc0_validate_zsa(struct nvc0_context *nvc0) +{ + struct nouveau_channel *chan = nvc0->screen->base.channel; + + WAIT_RING(chan, nvc0->zsa->size); + OUT_RINGp(chan, nvc0->zsa->state, nvc0->zsa->size); +} + +static void +nvc0_validate_rasterizer(struct nvc0_context *nvc0) +{ + struct nouveau_channel *chan = nvc0->screen->base.channel; + + WAIT_RING(chan, nvc0->rast->size); + OUT_RINGp(chan, nvc0->rast->state, nvc0->rast->size); +} + +static void +nvc0_constbufs_validate(struct nvc0_context *nvc0) +{ + struct nouveau_channel *chan = nvc0->screen->base.channel; + struct nouveau_bo *bo; + unsigned s; + + for (s = 0; s < 5; ++s) { + struct nvc0_resource *res; + int i; + + while (nvc0->constbuf_dirty[s]) { + unsigned base = 0; + unsigned offset = 0, words = 0; + boolean rebind = TRUE; + + i = ffs(nvc0->constbuf_dirty[s]) - 1; + nvc0->constbuf_dirty[s] &= ~(1 << i); + + res = nvc0_resource(nvc0->constbuf[s][i]); + if (!res) { + BEGIN_RING(chan, RING_3D(CB_BIND(s)), 1); + OUT_RING (chan, (i << 4) | 0); + if (i == 0) + nvc0->state.uniform_buffer_bound[s] = 0; + continue; + } + + if (!nvc0_resource_mapped_by_gpu(&res->base)) { + if (i == 0) { + base = s << 16; + bo = nvc0->screen->uniforms; + + if (nvc0->state.uniform_buffer_bound[s] >= res->base.width0) + rebind = FALSE; + else + nvc0->state.uniform_buffer_bound[s] = + align(res->base.width0, 0x100); + } else { + bo = res->bo; + } +#if 0 + nvc0_m2mf_push_linear(nvc0, bo, NOUVEAU_BO_VRAM, + base, res->base.width0, res->data); + BEGIN_RING(chan, RING_3D_(0x021c), 1); + OUT_RING (chan, 0x1111); +#else + words = res->base.width0 / 4; +#endif + } else { + bo = res->bo; + if (i == 0) + nvc0->state.uniform_buffer_bound[s] = 0; + } + + if (bo != nvc0->screen->uniforms) + nvc0_bufctx_add_resident(nvc0, NVC0_BUFCTX_CONSTANT, res, + NOUVEAU_BO_VRAM | NOUVEAU_BO_RD); + + if (rebind) { + BEGIN_RING(chan, RING_3D(CB_SIZE), 3); + OUT_RING (chan, align(res->base.width0, 0x100)); + OUT_RELOCh(chan, bo, base, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD); + OUT_RELOCl(chan, bo, base, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD); + BEGIN_RING(chan, RING_3D(CB_BIND(s)), 1); + OUT_RING (chan, (i << 4) | 1); + } + + while (words) { + unsigned nr = AVAIL_RING(chan); + + if (nr < 16) { + FIRE_RING(chan); + continue; + } + nr = MIN2(MIN2(nr - 6, words), NV04_PFIFO_MAX_PACKET_LEN - 1); + + BEGIN_RING(chan, RING_3D(CB_SIZE), 3); + OUT_RING (chan, align(res->base.width0, 0x100)); + OUT_RELOCh(chan, bo, base, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD); + OUT_RELOCl(chan, bo, base, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD); + BEGIN_RING_1I(chan, RING_3D(CB_POS), nr + 1); + OUT_RING (chan, offset); + OUT_RINGp (chan, &res->data[offset], nr); + + offset += nr * 4; + words -= nr; + } + } + } +} + +static struct state_validate { + void (*func)(struct nvc0_context *); + uint32_t states; +} validate_list[] = { + { nvc0_validate_fb, NVC0_NEW_FRAMEBUFFER }, + { nvc0_validate_blend, NVC0_NEW_BLEND }, + { nvc0_validate_zsa, NVC0_NEW_ZSA }, + { nvc0_validate_rasterizer, NVC0_NEW_RASTERIZER }, + { nvc0_validate_blend_colour, NVC0_NEW_BLEND_COLOUR }, + { nvc0_validate_stencil_ref, NVC0_NEW_STENCIL_REF }, + { nvc0_validate_stipple, NVC0_NEW_STIPPLE }, +#ifdef NVC0_SCISSORS_CLIPPING + { nvc0_validate_scissor, NVC0_NEW_SCISSOR | NVC0_NEW_VIEWPORT | + NVC0_NEW_RASTERIZER | + NVC0_NEW_FRAMEBUFFER }, +#else + { nvc0_validate_scissor, NVC0_NEW_SCISSOR }, +#endif + { nvc0_validate_viewport, NVC0_NEW_VIEWPORT }, + { nvc0_validate_clip, NVC0_NEW_CLIP }, + { nvc0_vertprog_validate, NVC0_NEW_VERTPROG }, + { nvc0_tctlprog_validate, NVC0_NEW_TCTLPROG }, + { nvc0_tevlprog_validate, NVC0_NEW_TEVLPROG }, + { nvc0_gmtyprog_validate, NVC0_NEW_GMTYPROG }, + { nvc0_fragprog_validate, NVC0_NEW_FRAGPROG }, + { nvc0_constbufs_validate, NVC0_NEW_CONSTBUF }, + { nvc0_validate_textures, NVC0_NEW_TEXTURES }, + { nvc0_validate_samplers, NVC0_NEW_SAMPLERS }, + { nvc0_vertex_arrays_validate, NVC0_NEW_VERTEX | NVC0_NEW_ARRAYS } +}; +#define validate_list_len (sizeof(validate_list) / sizeof(validate_list[0])) + +boolean +nvc0_state_validate(struct nvc0_context *nvc0) +{ + unsigned i; +#if 0 + if (nvc0->screen->cur_ctx != nvc0) /* FIXME: not everything is valid */ + nvc0->dirty = 0xffffffff; +#endif + nvc0->screen->cur_ctx = nvc0; + + if (nvc0->dirty) { + for (i = 0; i < validate_list_len; ++i) { + struct state_validate *validate = &validate_list[i]; + + if (nvc0->dirty & validate->states) + validate->func(nvc0); + } + nvc0->dirty = 0; + } + + nvc0_bufctx_emit_relocs(nvc0); + + return TRUE; +} diff --git a/src/gallium/drivers/nvc0/nvc0_stateobj.h b/src/gallium/drivers/nvc0/nvc0_stateobj.h new file mode 100644 index 00000000000..ee788c5bb9c --- /dev/null +++ b/src/gallium/drivers/nvc0/nvc0_stateobj.h @@ -0,0 +1,81 @@ + +#ifndef __NVC0_STATEOBJ_H__ +#define __NVC0_STATEOBJ_H__ + +#include "pipe/p_state.h" + +#define NVC0_SCISSORS_CLIPPING + +#define SB_BEGIN_3D(so, m, s) \ + (so)->state[(so)->size++] = \ + (0x2 << 28) | ((s) << 16) | (NVC0_SUBCH_3D << 13) | ((NVC0_3D_##m) >> 2) + +#define SB_IMMED_3D(so, m, d) \ + (so)->state[(so)->size++] = \ + (0x8 << 28) | ((d) << 16) | (NVC0_SUBCH_3D << 13) | ((NVC0_3D_##m) >> 2) + +#define SB_DATA(so, u) (so)->state[(so)->size++] = (u) + +struct nvc0_blend_stateobj { + struct pipe_blend_state pipe; + int size; + uint32_t state[72]; +}; + +struct nvc0_tsc_entry { + int id; + uint32_t tsc[8]; +}; + +static INLINE struct nvc0_tsc_entry * +nvc0_tsc_entry(void *hwcso) +{ + return (struct nvc0_tsc_entry *)hwcso; +} + +struct nvc0_tic_entry { + struct pipe_sampler_view pipe; + int id; + uint32_t tic[8]; +}; + +static INLINE struct nvc0_tic_entry * +nvc0_tic_entry(struct pipe_sampler_view *view) +{ + return (struct nvc0_tic_entry *)view; +} + +struct nvc0_rasterizer_stateobj { + struct pipe_rasterizer_state pipe; + int size; + uint32_t state[36]; +}; + +struct nvc0_zsa_stateobj { + struct pipe_depth_stencil_alpha_state pipe; + int size; + uint32_t state[29]; +}; + +struct nvc0_vertex_element { + struct pipe_vertex_element pipe; + uint32_t state; +}; + +struct nvc0_vertex_stateobj { + struct translate *translate; + unsigned num_elements; + uint32_t instance_bits; + unsigned vtx_size; + unsigned vtx_per_packet_max; + struct nvc0_vertex_element element[1]; +}; + +/* will have to lookup index -> location qualifier from nvc0_program */ +struct nvc0_tfb_state { + uint8_t varying_count[4]; + uint32_t stride[4]; + uint8_t varying_indices[1]; +}; + +#endif diff --git a/src/gallium/drivers/nvc0/nvc0_surface.c b/src/gallium/drivers/nvc0/nvc0_surface.c new file mode 100644 index 00000000000..cc0a65687dc --- /dev/null +++ b/src/gallium/drivers/nvc0/nvc0_surface.c @@ -0,0 +1,377 @@ +/* + * Copyright 2008 Ben Skeggs + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF + * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <stdint.h> + +#include "pipe/p_defines.h" + +#include "util/u_inlines.h" +#include "util/u_pack_color.h" +#include "util/u_format.h" + +#include "nvc0_context.h" +#include "nvc0_resource.h" + +#include "nv50_defs.xml.h" + +/* return TRUE for formats that can be converted among each other by NVC0_2D */ +static INLINE boolean +nvc0_2d_format_faithful(enum pipe_format format) +{ + switch (format) { + case PIPE_FORMAT_B8G8R8A8_UNORM: + case PIPE_FORMAT_B8G8R8X8_UNORM: + case PIPE_FORMAT_B8G8R8A8_SRGB: + case PIPE_FORMAT_B8G8R8X8_SRGB: + case PIPE_FORMAT_B5G6R5_UNORM: + case PIPE_FORMAT_B5G5R5A1_UNORM: + case PIPE_FORMAT_B10G10R10A2_UNORM: + case PIPE_FORMAT_R8_UNORM: + case PIPE_FORMAT_R32G32B32A32_FLOAT: + case PIPE_FORMAT_R32G32B32_FLOAT: + return TRUE; + default: + return FALSE; + } +} + +static INLINE uint8_t +nvc0_2d_format(enum pipe_format format) +{ + uint8_t id = nvc0_format_table[format].rt; + + /* Hardware values for color formats range from 0xc0 to 0xff, + * but the 2D engine doesn't support all of them. + */ + if ((id >= 0xc0) && (0xff0843e080608409ULL & (1ULL << (id - 0xc0)))) + return id; + + switch (util_format_get_blocksize(format)) { + case 1: + return NV50_SURFACE_FORMAT_R8_UNORM; + case 2: + return NV50_SURFACE_FORMAT_R16_UNORM; + case 4: + return NV50_SURFACE_FORMAT_A8R8G8B8_UNORM; + default: + return 0; + } +} + +static int +nvc0_2d_texture_set(struct nouveau_channel *chan, int dst, + struct nvc0_miptree *mt, unsigned level, unsigned layer) +{ + struct nouveau_bo *bo = mt->base.bo; + uint32_t width, height, depth; + uint32_t format; + uint32_t mthd = dst ? NVC0_2D_DST_FORMAT : NVC0_2D_SRC_FORMAT; + uint32_t flags = mt->base.domain | (dst ? NOUVEAU_BO_WR : NOUVEAU_BO_RD); + uint32_t offset = mt->level[level].offset; + + format = nvc0_2d_format(mt->base.base.format); + if (!format) { + NOUVEAU_ERR("invalid/unsupported surface format: %s\n", + util_format_name(mt->base.base.format)); + return 1; + } + + width = u_minify(mt->base.base.width0, level); + height = u_minify(mt->base.base.height0, level); + + offset = mt->level[level].offset; + if (!mt->layout_3d) { + offset += mt->layer_stride * layer; + depth = 1; + layer = 0; + } else { + depth = u_minify(mt->base.base.depth0, level); + } + + if (!(bo->tile_flags & NOUVEAU_BO_TILE_LAYOUT_MASK)) { + BEGIN_RING(chan, RING_2D_(mthd), 2); + OUT_RING (chan, format); + OUT_RING (chan, 1); + BEGIN_RING(chan, RING_2D_(mthd + 0x14), 5); + OUT_RING (chan, mt->level[level].pitch); + OUT_RING (chan, width); + OUT_RING (chan, height); + OUT_RELOCh(chan, bo, offset, flags); + OUT_RELOCl(chan, bo, offset, flags); + } else { + BEGIN_RING(chan, RING_2D_(mthd), 5); + OUT_RING (chan, format); + OUT_RING (chan, 0); + OUT_RING (chan, mt->level[level].tile_mode); + OUT_RING (chan, depth); + OUT_RING (chan, layer); + BEGIN_RING(chan, RING_2D_(mthd + 0x18), 4); + OUT_RING (chan, width); + OUT_RING (chan, height); + OUT_RELOCh(chan, bo, offset, flags); + OUT_RELOCl(chan, bo, offset, flags); + } + +#if 0 + if (dst) { + BEGIN_RING(chan, RING_2D_(NVC0_2D_CLIP_X), 4); + OUT_RING (chan, 0); + OUT_RING (chan, 0); + OUT_RING (chan, width); + OUT_RING (chan, height); + } +#endif + return 0; +} + +static int +nvc0_2d_texture_do_copy(struct nouveau_channel *chan, + struct nvc0_miptree *dst, unsigned dst_level, + unsigned dx, unsigned dy, unsigned dz, + struct nvc0_miptree *src, unsigned src_level, + unsigned sx, unsigned sy, unsigned sz, + unsigned w, unsigned h) +{ + int ret; + + ret = MARK_RING(chan, 2 * 16 + 32, 4); + if (ret) + return ret; + + ret = nvc0_2d_texture_set(chan, 1, dst, dst_level, dz); + if (ret) + return ret; + + ret = nvc0_2d_texture_set(chan, 0, src, src_level, sz); + if (ret) + return ret; + + /* 0/1 = CENTER/CORNER, 10/00 = POINT/BILINEAR */ + BEGIN_RING(chan, RING_2D(BLIT_CONTROL), 1); + OUT_RING (chan, 0); + BEGIN_RING(chan, RING_2D(BLIT_DST_X), 4); + OUT_RING (chan, dx); + OUT_RING (chan, dy); + OUT_RING (chan, w); + OUT_RING (chan, h); + BEGIN_RING(chan, RING_2D(BLIT_DU_DX_FRACT), 4); + OUT_RING (chan, 0); + OUT_RING (chan, 1); + OUT_RING (chan, 0); + OUT_RING (chan, 1); + BEGIN_RING(chan, RING_2D(BLIT_SRC_X_FRACT), 4); + OUT_RING (chan, 0); + OUT_RING (chan, sx); + OUT_RING (chan, 0); + OUT_RING (chan, sy); + + return 0; +} + +static void +nvc0_resource_copy_region(struct pipe_context *pipe, + struct pipe_resource *dst, unsigned dst_level, + unsigned dstx, unsigned dsty, unsigned dstz, + struct pipe_resource *src, unsigned src_level, + const struct pipe_box *src_box) +{ + struct nvc0_screen *screen = nvc0_context(pipe)->screen; + int ret; + unsigned dst_layer = dstz, src_layer = src_box->z; + + assert((src->format == dst->format) || + (nvc0_2d_format_faithful(src->format) && + nvc0_2d_format_faithful(dst->format))); + + for (; dst_layer < dstz + src_box->depth; ++dst_layer, ++src_layer) { + ret = nvc0_2d_texture_do_copy(screen->base.channel, + nvc0_miptree(dst), dst_level, + dstx, dsty, dst_layer, + nvc0_miptree(src), src_level, + src_box->x, src_box->y, src_layer, + src_box->width, src_box->height); + if (ret) + return; + } +} + +static void +nvc0_clear_render_target(struct pipe_context *pipe, + struct pipe_surface *dst, + const float *rgba, + unsigned dstx, unsigned dsty, + unsigned width, unsigned height) +{ + struct nvc0_context *nv50 = nvc0_context(pipe); + struct nvc0_screen *screen = nv50->screen; + struct nouveau_channel *chan = screen->base.channel; + struct nvc0_miptree *mt = nvc0_miptree(dst->texture); + struct nvc0_surface *sf = nvc0_surface(dst); + struct nouveau_bo *bo = mt->base.bo; + + BEGIN_RING(chan, RING_3D(CLEAR_COLOR(0)), 4); + OUT_RINGf (chan, rgba[0]); + OUT_RINGf (chan, rgba[1]); + OUT_RINGf (chan, rgba[2]); + OUT_RINGf (chan, rgba[3]); + + if (MARK_RING(chan, 18, 2)) + return; + + BEGIN_RING(chan, RING_3D(RT_CONTROL), 1); + OUT_RING (chan, 1); + BEGIN_RING(chan, RING_3D(RT_ADDRESS_HIGH(0)), 8); + OUT_RELOCh(chan, bo, sf->offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); + OUT_RELOCl(chan, bo, sf->offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); + OUT_RING (chan, sf->width); + OUT_RING (chan, sf->height); + OUT_RING (chan, nvc0_format_table[dst->format].rt); + OUT_RING (chan, mt->level[sf->base.u.tex.level].tile_mode); + OUT_RING (chan, 1); + OUT_RING (chan, 0); + + /* NOTE: only works with D3D clear flag (5097/0x143c bit 4) */ + + BEGIN_RING(chan, RING_3D(VIEWPORT_HORIZ(0)), 2); + OUT_RING (chan, (width << 16) | dstx); + OUT_RING (chan, (height << 16) | dsty); + + BEGIN_RING(chan, RING_3D(CLEAR_BUFFERS), 1); + OUT_RING (chan, 0x3c); + + nv50->dirty |= NVC0_NEW_FRAMEBUFFER; +} + +static void +nvc0_clear_depth_stencil(struct pipe_context *pipe, + struct pipe_surface *dst, + unsigned clear_flags, + double depth, + unsigned stencil, + unsigned dstx, unsigned dsty, + unsigned width, unsigned height) +{ + struct nvc0_context *nv50 = nvc0_context(pipe); + struct nvc0_screen *screen = nv50->screen; + struct nouveau_channel *chan = screen->base.channel; + struct nvc0_miptree *mt = nvc0_miptree(dst->texture); + struct nvc0_surface *sf = nvc0_surface(dst); + struct nouveau_bo *bo = mt->base.bo; + uint32_t mode = 0; + + if (clear_flags & PIPE_CLEAR_DEPTH) { + BEGIN_RING(chan, RING_3D(CLEAR_DEPTH), 1); + OUT_RINGf (chan, depth); + mode |= NVC0_3D_CLEAR_BUFFERS_Z; + } + + if (clear_flags & PIPE_CLEAR_STENCIL) { + BEGIN_RING(chan, RING_3D(CLEAR_STENCIL), 1); + OUT_RING (chan, stencil & 0xff); + mode |= NVC0_3D_CLEAR_BUFFERS_S; + } + + if (MARK_RING(chan, 17, 2)) + return; + + BEGIN_RING(chan, RING_3D(ZETA_ADDRESS_HIGH), 5); + OUT_RELOCh(chan, bo, sf->offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); + OUT_RELOCl(chan, bo, sf->offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); + OUT_RING (chan, nvc0_format_table[dst->format].rt); + OUT_RING (chan, mt->level[sf->base.u.tex.level].tile_mode); + OUT_RING (chan, 0); + BEGIN_RING(chan, RING_3D(ZETA_ENABLE), 1); + OUT_RING (chan, 1); + BEGIN_RING(chan, RING_3D(ZETA_HORIZ), 3); + OUT_RING (chan, sf->width); + OUT_RING (chan, sf->height); + OUT_RING (chan, (1 << 16) | 1); + + BEGIN_RING(chan, RING_3D(VIEWPORT_HORIZ(0)), 2); + OUT_RING (chan, (width << 16) | dstx); + OUT_RING (chan, (height << 16) | dsty); + + BEGIN_RING(chan, RING_3D(CLEAR_BUFFERS), 1); + OUT_RING (chan, mode); + + nv50->dirty |= NVC0_NEW_FRAMEBUFFER; +} + +void +nvc0_clear(struct pipe_context *pipe, unsigned buffers, + const float *rgba, double depth, unsigned stencil) +{ + struct nvc0_context *nvc0 = nvc0_context(pipe); + struct nouveau_channel *chan = nvc0->screen->base.channel; + struct pipe_framebuffer_state *fb = &nvc0->framebuffer; + unsigned i; + const unsigned dirty = nvc0->dirty; + uint32_t mode = 0; + + /* don't need NEW_BLEND, COLOR_MASK doesn't affect CLEAR_BUFFERS */ + nvc0->dirty &= NVC0_NEW_FRAMEBUFFER; + if (!nvc0_state_validate(nvc0)) + return; + + if (buffers & PIPE_CLEAR_COLOR && fb->nr_cbufs) { + BEGIN_RING(chan, RING_3D(CLEAR_COLOR(0)), 4); + OUT_RINGf (chan, rgba[0]); + OUT_RINGf (chan, rgba[1]); + OUT_RINGf (chan, rgba[2]); + OUT_RINGf (chan, rgba[3]); + mode = + NVC0_3D_CLEAR_BUFFERS_R | NVC0_3D_CLEAR_BUFFERS_G | + NVC0_3D_CLEAR_BUFFERS_B | NVC0_3D_CLEAR_BUFFERS_A; + } + + if (buffers & PIPE_CLEAR_DEPTH) { + BEGIN_RING(chan, RING_3D(CLEAR_DEPTH), 1); + OUT_RING (chan, fui(depth)); + mode |= NVC0_3D_CLEAR_BUFFERS_Z; + } + + if (buffers & PIPE_CLEAR_STENCIL) { + BEGIN_RING(chan, RING_3D(CLEAR_STENCIL), 1); + OUT_RING (chan, stencil & 0xff); + mode |= NVC0_3D_CLEAR_BUFFERS_S; + } + + BEGIN_RING(chan, RING_3D(CLEAR_BUFFERS), 1); + OUT_RING (chan, mode); + + for (i = 1; i < fb->nr_cbufs; i++) { + BEGIN_RING(chan, RING_3D(CLEAR_BUFFERS), 1); + OUT_RING (chan, (i << 6) | 0x3c); + } + + nvc0->dirty = dirty & ~NVC0_NEW_FRAMEBUFFER; +} + +void +nvc0_init_surface_functions(struct nvc0_context *nvc0) +{ + nvc0->pipe.resource_copy_region = nvc0_resource_copy_region; + nvc0->pipe.clear_render_target = nvc0_clear_render_target; + nvc0->pipe.clear_depth_stencil = nvc0_clear_depth_stencil; +} + + diff --git a/src/gallium/drivers/nvc0/nvc0_tex.c b/src/gallium/drivers/nvc0/nvc0_tex.c new file mode 100644 index 00000000000..b219f82c903 --- /dev/null +++ b/src/gallium/drivers/nvc0/nvc0_tex.c @@ -0,0 +1,277 @@ +/* + * Copyright 2008 Ben Skeggs + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF + * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "nvc0_context.h" +#include "nvc0_resource.h" +#include "nv50_texture.xml.h" + +#include "util/u_format.h" + +static INLINE uint32_t +nv50_tic_swizzle(uint32_t tc, unsigned swz) +{ + switch (swz) { + case PIPE_SWIZZLE_RED: + return (tc & NV50_TIC_0_MAPR__MASK) >> NV50_TIC_0_MAPR__SHIFT; + case PIPE_SWIZZLE_GREEN: + return (tc & NV50_TIC_0_MAPG__MASK) >> NV50_TIC_0_MAPG__SHIFT; + case PIPE_SWIZZLE_BLUE: + return (tc & NV50_TIC_0_MAPB__MASK) >> NV50_TIC_0_MAPB__SHIFT; + case PIPE_SWIZZLE_ALPHA: + return (tc & NV50_TIC_0_MAPA__MASK) >> NV50_TIC_0_MAPA__SHIFT; + case PIPE_SWIZZLE_ONE: + return NV50_TIC_MAP_ONE; + case PIPE_SWIZZLE_ZERO: + default: + return NV50_TIC_MAP_ZERO; + } +} + +struct pipe_sampler_view * +nvc0_create_sampler_view(struct pipe_context *pipe, + struct pipe_resource *texture, + const struct pipe_sampler_view *templ) +{ + const struct util_format_description *desc; + uint32_t *tic; + uint32_t swz[4]; + uint32_t depth; + struct nvc0_tic_entry *view; + struct nvc0_miptree *mt = nvc0_miptree(texture); + + view = MALLOC_STRUCT(nvc0_tic_entry); + if (!view) + return NULL; + + view->pipe = *templ; + view->pipe.reference.count = 1; + view->pipe.texture = NULL; + view->pipe.context = pipe; + + view->id = -1; + + pipe_resource_reference(&view->pipe.texture, texture); + + tic = &view->tic[0]; + + desc = util_format_description(mt->base.base.format); + + /* TIC[0] */ + + tic[0] = nvc0_format_table[view->pipe.format].tic; + + swz[0] = nv50_tic_swizzle(tic[0], view->pipe.swizzle_r); + swz[1] = nv50_tic_swizzle(tic[0], view->pipe.swizzle_g); + swz[2] = nv50_tic_swizzle(tic[0], view->pipe.swizzle_b); + swz[3] = nv50_tic_swizzle(tic[0], view->pipe.swizzle_a); + tic[0] = (tic[0] & ~NV50_TIC_0_SWIZZLE__MASK) | + (swz[0] << NV50_TIC_0_MAPR__SHIFT) | + (swz[1] << NV50_TIC_0_MAPG__SHIFT) | + (swz[2] << NV50_TIC_0_MAPB__SHIFT) | + (swz[3] << NV50_TIC_0_MAPA__SHIFT); + + /* tic[1] = mt->base.bo->offset; */ + tic[2] = /* mt->base.bo->offset >> 32 */ 0; + + tic[2] |= 0x10001000 | /* NV50_TIC_2_NO_BORDER */ 0x40000000; + + if (desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB) + tic[2] |= NV50_TIC_2_COLORSPACE_SRGB; + + if (mt->base.base.target != PIPE_TEXTURE_RECT) + tic[2] |= NV50_TIC_2_NORMALIZED_COORDS; + + tic[2] |= + ((mt->base.bo->tile_mode & 0x0f0) << (22 - 4)) | + ((mt->base.bo->tile_mode & 0xf00) << (25 - 8)); + + depth = MAX2(mt->base.base.array_size, mt->base.base.depth0); + + switch (mt->base.base.target) { + case PIPE_TEXTURE_1D: + tic[2] |= NV50_TIC_2_TARGET_1D; + break; + case PIPE_TEXTURE_2D: + tic[2] |= NV50_TIC_2_TARGET_2D; + break; + case PIPE_TEXTURE_RECT: + tic[2] |= NV50_TIC_2_TARGET_RECT; + break; + case PIPE_TEXTURE_3D: + tic[2] |= NV50_TIC_2_TARGET_3D; + break; + case PIPE_TEXTURE_CUBE: + depth /= 6; + if (depth > 1) + tic[2] |= NV50_TIC_2_TARGET_CUBE_ARRAY; + else + tic[2] |= NV50_TIC_2_TARGET_CUBE; + break; + case PIPE_TEXTURE_1D_ARRAY: + tic[2] |= NV50_TIC_2_TARGET_1D_ARRAY; + break; + case PIPE_TEXTURE_2D_ARRAY: + tic[2] |= NV50_TIC_2_TARGET_2D_ARRAY; + break; + case PIPE_BUFFER: + tic[2] |= NV50_TIC_2_TARGET_BUFFER | /* NV50_TIC_2_LINEAR */ (1 << 18); + default: + NOUVEAU_ERR("invalid texture target: %d\n", mt->base.base.target); + return FALSE; + } + + if (mt->base.base.target == PIPE_BUFFER) + tic[3] = mt->base.base.width0; + else + tic[3] = 0x00300000; + + tic[4] = (1 << 31) | mt->base.base.width0; + + tic[5] = mt->base.base.height0 & 0xffff; + tic[5] |= depth << 16; + tic[5] |= mt->base.base.last_level << 28; + + tic[6] = 0x03000000; + + tic[7] = (view->pipe.u.tex.last_level << 4) | view->pipe.u.tex.first_level; + + return &view->pipe; +} + +static boolean +nvc0_validate_tic(struct nvc0_context *nvc0, int s) +{ + struct nouveau_channel *chan = nvc0->screen->base.channel; + struct nouveau_bo *txc = nvc0->screen->txc; + unsigned i; + boolean need_flush = FALSE; + + for (i = 0; i < nvc0->num_textures[s]; ++i) { + struct nvc0_tic_entry *tic = nvc0_tic_entry(nvc0->textures[s][i]); + struct nvc0_resource *res; + + if (!tic) { + BEGIN_RING(chan, RING_3D(BIND_TIC(s)), 1); + OUT_RING (chan, (i << 1) | 0); + continue; + } + res = &nvc0_miptree(tic->pipe.texture)->base; + + if (tic->id < 0) { + tic->id = nvc0_screen_tic_alloc(nvc0->screen, tic); + + MARK_RING (chan, 9 + 8, 4); + BEGIN_RING(chan, RING_MF(OFFSET_OUT_HIGH), 2); + OUT_RELOCh(chan, txc, tic->id * 32, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); + OUT_RELOCl(chan, txc, tic->id * 32, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); + BEGIN_RING(chan, RING_MF(LINE_LENGTH_IN), 2); + OUT_RING (chan, 32); + OUT_RING (chan, 1); + BEGIN_RING(chan, RING_MF(EXEC), 1); + OUT_RING (chan, 0x100111); + BEGIN_RING_NI(chan, RING_MF(DATA), 8); + OUT_RING (chan, tic->tic[0]); + OUT_RELOCl(chan, res->bo, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD); + OUT_RELOC (chan, res->bo, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD | + NOUVEAU_BO_HIGH | NOUVEAU_BO_OR, tic->tic[2], tic->tic[2]); + OUT_RINGp (chan, &tic->tic[3], 5); + + need_flush = TRUE; + } + nvc0->screen->tic.lock[tic->id / 32] |= 1 << (tic->id % 32); + + nvc0_bufctx_add_resident(nvc0, NVC0_BUFCTX_TEXTURES, res, + NOUVEAU_BO_VRAM | NOUVEAU_BO_RD); + + BEGIN_RING(chan, RING_3D(BIND_TIC(s)), 1); + OUT_RING (chan, (tic->id << 9) | (i << 1) | 1); + } + for (; i < nvc0->state.num_textures[s]; ++i) { + BEGIN_RING(chan, RING_3D(BIND_TIC(s)), 1); + OUT_RING (chan, (i << 1) | 0); + } + nvc0->state.num_textures[s] = nvc0->num_textures[s]; + + return need_flush; +} + +void nvc0_validate_textures(struct nvc0_context *nvc0) +{ + boolean need_flush; + + need_flush = nvc0_validate_tic(nvc0, 0); + need_flush |= nvc0_validate_tic(nvc0, 4); + + if (need_flush) { + BEGIN_RING(nvc0->screen->base.channel, RING_3D(TIC_FLUSH), 1); + OUT_RING (nvc0->screen->base.channel, 0); + } +} + +static boolean +nvc0_validate_tsc(struct nvc0_context *nvc0, int s) +{ + struct nouveau_channel *chan = nvc0->screen->base.channel; + unsigned i; + boolean need_flush = FALSE; + + for (i = 0; i < nvc0->num_samplers[s]; ++i) { + struct nvc0_tsc_entry *tsc = nvc0_tsc_entry(nvc0->samplers[s][i]); + + if (!tsc) { + BEGIN_RING(chan, RING_3D(BIND_TSC(s)), 1); + OUT_RING (chan, (i << 4) | 0); + continue; + } + if (tsc->id < 0) { + tsc->id = nvc0_screen_tsc_alloc(nvc0->screen, tsc); + + nvc0_m2mf_push_linear(nvc0, nvc0->screen->txc, NOUVEAU_BO_VRAM, + 65536 + tsc->id * 32, 32, tsc->tsc); + need_flush = TRUE; + } + nvc0->screen->tsc.lock[tsc->id / 32] |= 1 << (tsc->id % 32); + + BEGIN_RING(chan, RING_3D(BIND_TSC(s)), 1); + OUT_RING (chan, (tsc->id << 12) | (i << 4) | 1); + } + for (; i < nvc0->state.num_samplers[s]; ++i) { + BEGIN_RING(chan, RING_3D(BIND_TSC(s)), 1); + OUT_RING (chan, (i << 4) | 0); + } + nvc0->state.num_samplers[s] = nvc0->num_samplers[s]; + + return need_flush; +} + +void nvc0_validate_samplers(struct nvc0_context *nvc0) +{ + boolean need_flush; + + need_flush = nvc0_validate_tsc(nvc0, 0); + need_flush |= nvc0_validate_tsc(nvc0, 4); + + if (need_flush) { + BEGIN_RING(nvc0->screen->base.channel, RING_3D(TSC_FLUSH), 1); + OUT_RING (nvc0->screen->base.channel, 0); + } +} diff --git a/src/gallium/drivers/nvc0/nvc0_tgsi_to_nc.c b/src/gallium/drivers/nvc0/nvc0_tgsi_to_nc.c new file mode 100644 index 00000000000..fecfc76fb79 --- /dev/null +++ b/src/gallium/drivers/nvc0/nvc0_tgsi_to_nc.c @@ -0,0 +1,2004 @@ +/* + * Copyright 2010 Christoph Bumiller + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF + * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <unistd.h> + +#define NOUVEAU_DEBUG 1 + +#include "pipe/p_shader_tokens.h" +#include "tgsi/tgsi_parse.h" +#include "tgsi/tgsi_util.h" +#include "tgsi/tgsi_dump.h" +#include "util/u_dynarray.h" + +#include "nvc0_pc.h" +#include "nvc0_program.h" + +/* Arbitrary internal limits. */ +#define BLD_MAX_TEMPS 64 +#define BLD_MAX_ADDRS 4 +#define BLD_MAX_PREDS 4 +#define BLD_MAX_IMMDS 128 +#define BLD_MAX_OUTPS PIPE_MAX_SHADER_OUTPUTS + +#define BLD_MAX_COND_NESTING 8 +#define BLD_MAX_LOOP_NESTING 4 +#define BLD_MAX_CALL_NESTING 2 + +/* This structure represents a TGSI register. */ +struct bld_register { + struct nv_value *current; + /* collect all SSA values assigned to it */ + struct util_dynarray vals; + /* 1 bit per loop level, indicates if used/defd, reset when loop ends */ + uint16_t loop_use; + uint16_t loop_def; +}; + +static INLINE struct nv_value ** +bld_register_access(struct bld_register *reg, unsigned i) +{ + return util_dynarray_element(®->vals, struct nv_value *, i); +} + +static INLINE void +bld_register_add_val(struct bld_register *reg, struct nv_value *val) +{ + util_dynarray_append(®->vals, struct nv_value *, val); +} + +static INLINE boolean +bld_register_del_val(struct bld_register *reg, struct nv_value *val) +{ + unsigned i; + + for (i = reg->vals.size / sizeof(struct nv_value *); i > 0; --i) + if (*bld_register_access(reg, i - 1) == val) + break; + if (!i) + return FALSE; + + if (i != reg->vals.size / sizeof(struct nv_value *)) + *bld_register_access(reg, i - 1) = util_dynarray_pop(®->vals, + struct nv_value *); + else + reg->vals.size -= sizeof(struct nv_value *); + + return TRUE; +} + +struct bld_context { + struct nvc0_translation_info *ti; + + struct nv_pc *pc; + struct nv_basic_block *b; + + struct tgsi_parse_context parse[BLD_MAX_CALL_NESTING]; + int call_lvl; + + struct nv_basic_block *cond_bb[BLD_MAX_COND_NESTING]; + struct nv_basic_block *join_bb[BLD_MAX_COND_NESTING]; + struct nv_basic_block *else_bb[BLD_MAX_COND_NESTING]; + int cond_lvl; + struct nv_basic_block *loop_bb[BLD_MAX_LOOP_NESTING]; + struct nv_basic_block *brkt_bb[BLD_MAX_LOOP_NESTING]; + int loop_lvl; + + ubyte out_kind; /* CFG_EDGE_FORWARD, or FAKE in case of BREAK/CONT */ + + struct bld_register tvs[BLD_MAX_TEMPS][4]; /* TGSI_FILE_TEMPORARY */ + struct bld_register avs[BLD_MAX_ADDRS][4]; /* TGSI_FILE_ADDRESS */ + struct bld_register pvs[BLD_MAX_PREDS][4]; /* TGSI_FILE_PREDICATE */ + struct bld_register ovs[BLD_MAX_OUTPS][4]; /* TGSI_FILE_OUTPUT, FP only */ + + uint32_t outputs_written[(PIPE_MAX_SHADER_OUTPUTS + 7) / 8]; + int hpos_index; + + struct nv_value *zero; + struct nv_value *frag_coord[4]; + + /* wipe on new BB */ + struct nv_value *saved_sysvals[4]; + struct nv_value *saved_addr[4][2]; + struct nv_value *saved_inputs[PIPE_MAX_SHADER_INPUTS][4]; + struct nv_value *saved_immd[BLD_MAX_IMMDS]; + uint num_immds; +}; + +static INLINE ubyte +bld_register_file(struct bld_context *bld, struct bld_register *reg) +{ + if (reg < &bld->avs[0][0]) return NV_FILE_GPR; + else + if (reg < &bld->pvs[0][0]) return NV_FILE_GPR; + else + if (reg < &bld->ovs[0][0]) return NV_FILE_PRED; + else + return NV_FILE_MEM_V; +} + +static INLINE struct nv_value * +bld_fetch(struct bld_context *bld, struct bld_register *regs, int i, int c) +{ + regs[i * 4 + c].loop_use |= 1 << bld->loop_lvl; + return regs[i * 4 + c].current; +} + +static struct nv_value * +bld_loop_phi(struct bld_context *, struct bld_register *, struct nv_value *); + +/* If a variable is defined in a loop without prior use, we don't need + * a phi in the loop header to account for backwards flow. + * + * However, if this variable is then also used outside the loop, we do + * need a phi after all. But we must not use this phi's def inside the + * loop, so we can eliminate the phi if it is unused later. + */ +static INLINE void +bld_store(struct bld_context *bld, + struct bld_register *regs, int i, int c, struct nv_value *val) +{ + const uint16_t m = 1 << bld->loop_lvl; + struct bld_register *reg = ®s[i * 4 + c]; + + if (bld->loop_lvl && !(m & (reg->loop_def | reg->loop_use))) + bld_loop_phi(bld, reg, val); + + reg->current = val; + bld_register_add_val(reg, reg->current); + + reg->loop_def |= 1 << bld->loop_lvl; +} + +#define FETCH_TEMP(i, c) bld_fetch(bld, &bld->tvs[0][0], i, c) +#define STORE_TEMP(i, c, v) bld_store(bld, &bld->tvs[0][0], i, c, (v)) +#define FETCH_ADDR(i, c) bld_fetch(bld, &bld->avs[0][0], i, c) +#define STORE_ADDR(i, c, v) bld_store(bld, &bld->avs[0][0], i, c, (v)) +#define FETCH_PRED(i, c) bld_fetch(bld, &bld->pvs[0][0], i, c) +#define STORE_PRED(i, c, v) bld_store(bld, &bld->pvs[0][0], i, c, (v)) +#define STORE_OUTP(i, c, v) \ + do { \ + bld_store(bld, &bld->ovs[0][0], i, c, (v)); \ + bld->outputs_written[(i) / 8] |= 1 << (((i) * 4 + (c)) % 32); \ + } while (0) + +static INLINE void +bld_clear_def_use(struct bld_register *regs, int n, int lvl) +{ + int i; + const uint16_t mask = ~(1 << lvl); + + for (i = 0; i < n * 4; ++i) { + regs[i].loop_def &= mask; + regs[i].loop_use &= mask; + } +} + +static INLINE void +bld_warn_uninitialized(struct bld_context *bld, int kind, + struct bld_register *reg, struct nv_basic_block *b) +{ +#ifdef NOUVEAU_DEBUG + long i = (reg - &bld->tvs[0][0]) / 4; + long c = (reg - &bld->tvs[0][0]) & 3; + + if (c == 3) + c = -1; + debug_printf("WARNING: TEMP[%li].%c %s used uninitialized in BB:%i\n", + i, (int)('x' + c), kind ? "may be" : "is", b->id); +#endif +} + +static INLINE struct nv_value * +bld_def(struct nv_instruction *i, int c, struct nv_value *value) +{ + i->def[c] = value; + value->insn = i; + return value; +} + +static INLINE struct nv_value * +find_by_bb(struct bld_register *reg, struct nv_basic_block *b) +{ + int i; + + if (reg->current && reg->current->insn->bb == b) + return reg->current; + + for (i = 0; i < reg->vals.size / sizeof(struct nv_value *); ++i) + if ((*bld_register_access(reg, i))->insn->bb == b) + return *bld_register_access(reg, i); + return NULL; +} + +/* Fetch value from register that was defined in the specified BB, + * or search for first definitions in all of its predecessors. + */ +static void +fetch_by_bb(struct bld_register *reg, + struct nv_value **vals, int *n, + struct nv_basic_block *b) +{ + int i; + struct nv_value *val; + + assert(*n < 16); /* MAX_COND_NESTING */ + + val = find_by_bb(reg, b); + if (val) { + for (i = 0; i < *n; ++i) + if (vals[i] == val) + return; + vals[(*n)++] = val; + return; + } + for (i = 0; i < b->num_in; ++i) + if (!IS_WALL_EDGE(b->in_kind[i])) + fetch_by_bb(reg, vals, n, b->in[i]); +} + +static INLINE struct nv_value * +bld_load_imm_u32(struct bld_context *bld, uint32_t u); + +static INLINE struct nv_value * +bld_undef(struct bld_context *bld, ubyte file) +{ + struct nv_instruction *nvi = new_instruction(bld->pc, NV_OP_UNDEF); + + return bld_def(nvi, 0, new_value(bld->pc, file, 4)); +} + +static struct nv_value * +bld_phi(struct bld_context *bld, struct nv_basic_block *b, + struct bld_register *reg) +{ + struct nv_basic_block *in; + struct nv_value *vals[16] = { NULL }; + struct nv_value *val; + struct nv_instruction *phi; + int i, j, n; + + do { + i = n = 0; + fetch_by_bb(reg, vals, &n, b); + + if (!n) { + bld_warn_uninitialized(bld, 0, reg, b); + return NULL; + } + + if (n == 1) { + if (nvc0_bblock_dominated_by(b, vals[0]->insn->bb)) + break; + + bld_warn_uninitialized(bld, 1, reg, b); + + /* back-tracking to insert missing value of other path */ + in = b; + while (in->in[0]) { + if (in->num_in == 1) { + in = in->in[0]; + } else { + if (!nvc0_bblock_reachable_by(in->in[0], vals[0]->insn->bb, b)) + in = in->in[0]; + else + if (!nvc0_bblock_reachable_by(in->in[1], vals[0]->insn->bb, b)) + in = in->in[1]; + else + in = in->in[0]; + } + } + bld->pc->current_block = in; + + /* should make this a no-op */ + bld_register_add_val(reg, bld_undef(bld, vals[0]->reg.file)); + continue; + } + + for (i = 0; i < n; ++i) { + /* if value dominates b, continue to the redefinitions */ + if (nvc0_bblock_dominated_by(b, vals[i]->insn->bb)) + continue; + + /* if value dominates any in-block, b should be the dom frontier */ + for (j = 0; j < b->num_in; ++j) + if (nvc0_bblock_dominated_by(b->in[j], vals[i]->insn->bb)) + break; + /* otherwise, find the dominance frontier and put the phi there */ + if (j == b->num_in) { + in = nvc0_bblock_dom_frontier(vals[i]->insn->bb); + val = bld_phi(bld, in, reg); + bld_register_add_val(reg, val); + break; + } + } + } while(i < n); + + bld->pc->current_block = b; + + if (n == 1) + return vals[0]; + + phi = new_instruction(bld->pc, NV_OP_PHI); + + bld_def(phi, 0, new_value(bld->pc, vals[0]->reg.file, vals[0]->reg.size)); + for (i = 0; i < n; ++i) + nv_reference(bld->pc, phi, i, vals[i]); + + return phi->def[0]; +} + +/* Insert a phi function in the loop header. + * For nested loops, we need to insert phi functions in all the outer + * loop headers if they don't have one yet. + * + * @def: redefinition from inside loop, or NULL if to be replaced later + */ +static struct nv_value * +bld_loop_phi(struct bld_context *bld, struct bld_register *reg, + struct nv_value *def) +{ + struct nv_instruction *phi; + struct nv_basic_block *bb = bld->pc->current_block; + struct nv_value *val = NULL; + + if (bld->loop_lvl > 1) { + --bld->loop_lvl; + if (!((reg->loop_def | reg->loop_use) & (1 << bld->loop_lvl))) + val = bld_loop_phi(bld, reg, NULL); + ++bld->loop_lvl; + } + + if (!val) + val = bld_phi(bld, bld->pc->current_block, reg); /* old definition */ + if (!val) { + bld->pc->current_block = bld->loop_bb[bld->loop_lvl - 1]->in[0]; + val = bld_undef(bld, bld_register_file(bld, reg)); + } + + bld->pc->current_block = bld->loop_bb[bld->loop_lvl - 1]; + + phi = new_instruction(bld->pc, NV_OP_PHI); + + bld_def(phi, 0, new_value_like(bld->pc, val)); + if (!def) + def = phi->def[0]; + + bld_register_add_val(reg, phi->def[0]); + + phi->target = (struct nv_basic_block *)reg; /* cheat */ + + nv_reference(bld->pc, phi, 0, val); + nv_reference(bld->pc, phi, 1, def); + + bld->pc->current_block = bb; + + return phi->def[0]; +} + +static INLINE struct nv_value * +bld_fetch_global(struct bld_context *bld, struct bld_register *reg) +{ + const uint16_t m = 1 << bld->loop_lvl; + const uint16_t use = reg->loop_use; + + reg->loop_use |= m; + + /* If neither used nor def'd inside the loop, build a phi in foresight, + * so we don't have to replace stuff later on, which requires tracking. + */ + if (bld->loop_lvl && !((use | reg->loop_def) & m)) + return bld_loop_phi(bld, reg, NULL); + + return bld_phi(bld, bld->pc->current_block, reg); +} + +static INLINE struct nv_value * +bld_imm_u32(struct bld_context *bld, uint32_t u) +{ + int i; + unsigned n = bld->num_immds; + + for (i = 0; i < n; ++i) + if (bld->saved_immd[i]->reg.imm.u32 == u) + return bld->saved_immd[i]; + + assert(n < BLD_MAX_IMMDS); + bld->num_immds++; + + bld->saved_immd[n] = new_value(bld->pc, NV_FILE_IMM, 4); + bld->saved_immd[n]->reg.imm.u32 = u; + return bld->saved_immd[n]; +} + +static void +bld_replace_value(struct nv_pc *, struct nv_basic_block *, struct nv_value *, + struct nv_value *); + +/* Replace the source of the phi in the loop header by the last assignment, + * or eliminate the phi function if there is no assignment inside the loop. + * + * Redundancy situation 1 - (used) but (not redefined) value: + * %3 = phi %0, %3 = %3 is used + * %3 = phi %0, %4 = is new definition + * + * Redundancy situation 2 - (not used) but (redefined) value: + * %3 = phi %0, %2 = %2 is used, %3 could be used outside, deleted by DCE + */ +static void +bld_loop_end(struct bld_context *bld, struct nv_basic_block *bb) +{ + struct nv_basic_block *save = bld->pc->current_block; + struct nv_instruction *phi, *next; + struct nv_value *val; + struct bld_register *reg; + int i, s, n; + + for (phi = bb->phi; phi && phi->opcode == NV_OP_PHI; phi = next) { + next = phi->next; + + reg = (struct bld_register *)phi->target; + phi->target = NULL; + + for (s = 1, n = 0; n < bb->num_in; ++n) { + if (bb->in_kind[n] != CFG_EDGE_BACK) + continue; + + assert(s < 4); + bld->pc->current_block = bb->in[n]; + val = bld_fetch_global(bld, reg); + + for (i = 0; i < 4; ++i) + if (phi->src[i] && phi->src[i]->value == val) + break; + if (i == 4) + nv_reference(bld->pc, phi, s++, val); + } + bld->pc->current_block = save; + + if (phi->src[0]->value == phi->def[0] || + phi->src[0]->value == phi->src[1]->value) + s = 1; + else + if (phi->src[1]->value == phi->def[0]) + s = 0; + else + continue; + + if (s >= 0) { + /* eliminate the phi */ + bld_register_del_val(reg, phi->def[0]); + + ++bld->pc->pass_seq; + bld_replace_value(bld->pc, bb, phi->def[0], phi->src[s]->value); + + nvc0_insn_delete(phi); + } + } +} + +static INLINE struct nv_value * +bld_imm_f32(struct bld_context *bld, float f) +{ + return bld_imm_u32(bld, fui(f)); +} + +static struct nv_value * +bld_insn_1(struct bld_context *bld, uint opcode, struct nv_value *src0) +{ + struct nv_instruction *insn = new_instruction(bld->pc, opcode); + + nv_reference(bld->pc, insn, 0, src0); + + return bld_def(insn, 0, new_value(bld->pc, NV_FILE_GPR, src0->reg.size)); +} + +static struct nv_value * +bld_insn_2(struct bld_context *bld, uint opcode, + struct nv_value *src0, struct nv_value *src1) +{ + struct nv_instruction *insn = new_instruction(bld->pc, opcode); + + nv_reference(bld->pc, insn, 0, src0); + nv_reference(bld->pc, insn, 1, src1); + + return bld_def(insn, 0, new_value(bld->pc, NV_FILE_GPR, src0->reg.size)); +} + +static struct nv_value * +bld_insn_3(struct bld_context *bld, uint opcode, + struct nv_value *src0, struct nv_value *src1, + struct nv_value *src2) +{ + struct nv_instruction *insn = new_instruction(bld->pc, opcode); + + nv_reference(bld->pc, insn, 0, src0); + nv_reference(bld->pc, insn, 1, src1); + nv_reference(bld->pc, insn, 2, src2); + + return bld_def(insn, 0, new_value(bld->pc, NV_FILE_GPR, src0->reg.size)); +} + +static INLINE void +bld_src_predicate(struct bld_context *bld, + struct nv_instruction *nvi, int s, struct nv_value *val) +{ + nvi->predicate = s; + nv_reference(bld->pc, nvi, s, val); +} + +static INLINE void +bld_src_pointer(struct bld_context *bld, + struct nv_instruction *nvi, int s, struct nv_value *val) +{ + nvi->indirect = s; + nv_reference(bld->pc, nvi, s, val); +} + +static void +bld_lmem_store(struct bld_context *bld, struct nv_value *ptr, int ofst, + struct nv_value *val) +{ + struct nv_instruction *insn = new_instruction(bld->pc, NV_OP_ST); + struct nv_value *loc; + + loc = new_value(bld->pc, NV_FILE_MEM_L, nv_type_sizeof(NV_TYPE_U32)); + + loc->reg.id = ofst * 4; + + nv_reference(bld->pc, insn, 0, loc); + nv_reference(bld->pc, insn, 1, ptr); + nv_reference(bld->pc, insn, 2, val); +} + +static struct nv_value * +bld_lmem_load(struct bld_context *bld, struct nv_value *ptr, int ofst) +{ + struct nv_value *loc, *val; + + loc = new_value(bld->pc, NV_FILE_MEM_L, nv_type_sizeof(NV_TYPE_U32)); + + loc->reg.address = ofst * 4; + + val = bld_insn_2(bld, NV_OP_LD, loc, ptr); + + return val; +} + +static struct nv_value * +bld_pow(struct bld_context *bld, struct nv_value *x, struct nv_value *e) +{ + struct nv_value *val; + + val = bld_insn_1(bld, NV_OP_LG2, x); + val = bld_insn_2(bld, NV_OP_MUL_F32, e, val); + + val = bld_insn_1(bld, NV_OP_PREEX2, val); + val = bld_insn_1(bld, NV_OP_EX2, val); + + return val; +} + +static INLINE struct nv_value * +bld_load_imm_f32(struct bld_context *bld, float f) +{ + if (f == 0.0f) + return bld->zero; + return bld_insn_1(bld, NV_OP_MOV, bld_imm_f32(bld, f)); +} + +static INLINE struct nv_value * +bld_load_imm_u32(struct bld_context *bld, uint32_t u) +{ + if (u == 0) + return bld->zero; + return bld_insn_1(bld, NV_OP_MOV, bld_imm_u32(bld, u)); +} + +static INLINE struct nv_value * +bld_setp(struct bld_context *bld, uint op, uint8_t cc, + struct nv_value *src0, struct nv_value *src1) +{ + struct nv_value *val = bld_insn_2(bld, op, src0, src1); + + val->reg.file = NV_FILE_PRED; + val->reg.size = 1; + val->insn->set_cond = cc & 0xf; + return val; +} + +static INLINE struct nv_value * +bld_cvt(struct bld_context *bld, uint8_t dt, uint8_t st, struct nv_value *src) +{ + struct nv_value *val = bld_insn_1(bld, NV_OP_CVT, src); + val->insn->ext.cvt.d = dt; + val->insn->ext.cvt.s = st; + return val; +} + +static void +bld_kil(struct bld_context *bld, struct nv_value *src) +{ + struct nv_instruction *nvi; + + src = bld_setp(bld, NV_OP_SET_F32, NV_CC_LT, src, bld->zero); + + nvi = new_instruction(bld->pc, NV_OP_KIL); + nvi->fixed = 1; + + bld_src_predicate(bld, nvi, 0, src); +} + +static void +bld_flow(struct bld_context *bld, uint opcode, + struct nv_value *src, struct nv_basic_block *target, + boolean reconverge) +{ + struct nv_instruction *nvi; + + if (reconverge) + new_instruction(bld->pc, NV_OP_JOINAT)->fixed = 1; + + nvi = new_instruction(bld->pc, opcode); + nvi->target = target; + nvi->terminator = 1; + if (src) + bld_src_predicate(bld, nvi, 0, src); +} + +static ubyte +translate_setcc(unsigned opcode) +{ + switch (opcode) { + case TGSI_OPCODE_SLT: return NV_CC_LT; + case TGSI_OPCODE_SGE: return NV_CC_GE; + case TGSI_OPCODE_SEQ: return NV_CC_EQ; + case TGSI_OPCODE_SGT: return NV_CC_GT; + case TGSI_OPCODE_SLE: return NV_CC_LE; + case TGSI_OPCODE_SNE: return NV_CC_NE | NV_CC_U; + case TGSI_OPCODE_STR: return NV_CC_TR; + case TGSI_OPCODE_SFL: return NV_CC_FL; + + case TGSI_OPCODE_ISLT: return NV_CC_LT; + case TGSI_OPCODE_ISGE: return NV_CC_GE; + case TGSI_OPCODE_USEQ: return NV_CC_EQ; + case TGSI_OPCODE_USGE: return NV_CC_GE; + case TGSI_OPCODE_USLT: return NV_CC_LT; + case TGSI_OPCODE_USNE: return NV_CC_NE; + default: + assert(0); + return NV_CC_FL; + } +} + +static uint +translate_opcode(uint opcode) +{ + switch (opcode) { + case TGSI_OPCODE_ABS: return NV_OP_ABS_F32; + case TGSI_OPCODE_ADD: return NV_OP_ADD_F32; + case TGSI_OPCODE_SUB: return NV_OP_SUB_F32; + case TGSI_OPCODE_UADD: return NV_OP_ADD_B32; + case TGSI_OPCODE_AND: return NV_OP_AND; + case TGSI_OPCODE_EX2: return NV_OP_EX2; + case TGSI_OPCODE_CEIL: return NV_OP_CEIL; + case TGSI_OPCODE_FLR: return NV_OP_FLOOR; + case TGSI_OPCODE_TRUNC: return NV_OP_TRUNC; + case TGSI_OPCODE_COS: return NV_OP_COS; + case TGSI_OPCODE_SIN: return NV_OP_SIN; + case TGSI_OPCODE_DDX: return NV_OP_DFDX; + case TGSI_OPCODE_DDY: return NV_OP_DFDY; + case TGSI_OPCODE_F2I: + case TGSI_OPCODE_F2U: + case TGSI_OPCODE_I2F: + case TGSI_OPCODE_U2F: return NV_OP_CVT; + case TGSI_OPCODE_INEG: return NV_OP_NEG_S32; + case TGSI_OPCODE_LG2: return NV_OP_LG2; + case TGSI_OPCODE_ISHR: return NV_OP_SAR; + case TGSI_OPCODE_USHR: return NV_OP_SHR; + case TGSI_OPCODE_MAD: return NV_OP_MAD_F32; + case TGSI_OPCODE_MAX: return NV_OP_MAX_F32; + case TGSI_OPCODE_IMAX: return NV_OP_MAX_S32; + case TGSI_OPCODE_UMAX: return NV_OP_MAX_U32; + case TGSI_OPCODE_MIN: return NV_OP_MIN_F32; + case TGSI_OPCODE_IMIN: return NV_OP_MIN_S32; + case TGSI_OPCODE_UMIN: return NV_OP_MIN_U32; + case TGSI_OPCODE_MUL: return NV_OP_MUL_F32; + case TGSI_OPCODE_UMUL: return NV_OP_MUL_B32; + case TGSI_OPCODE_OR: return NV_OP_OR; + case TGSI_OPCODE_RCP: return NV_OP_RCP; + case TGSI_OPCODE_RSQ: return NV_OP_RSQ; + case TGSI_OPCODE_SAD: return NV_OP_SAD; + case TGSI_OPCODE_SHL: return NV_OP_SHL; + case TGSI_OPCODE_SLT: + case TGSI_OPCODE_SGE: + case TGSI_OPCODE_SEQ: + case TGSI_OPCODE_SGT: + case TGSI_OPCODE_SLE: + case TGSI_OPCODE_SNE: return NV_OP_FSET_F32; + case TGSI_OPCODE_ISLT: + case TGSI_OPCODE_ISGE: return NV_OP_SET_S32; + case TGSI_OPCODE_USEQ: + case TGSI_OPCODE_USGE: + case TGSI_OPCODE_USLT: + case TGSI_OPCODE_USNE: return NV_OP_SET_U32; + case TGSI_OPCODE_TEX: return NV_OP_TEX; + case TGSI_OPCODE_TXP: return NV_OP_TEX; + case TGSI_OPCODE_TXB: return NV_OP_TXB; + case TGSI_OPCODE_TXL: return NV_OP_TXL; + case TGSI_OPCODE_XOR: return NV_OP_XOR; + default: + return NV_OP_NOP; + } +} + +#if 0 +static ubyte +infer_src_type(unsigned opcode) +{ + switch (opcode) { + case TGSI_OPCODE_MOV: + case TGSI_OPCODE_AND: + case TGSI_OPCODE_OR: + case TGSI_OPCODE_XOR: + case TGSI_OPCODE_SAD: + case TGSI_OPCODE_U2F: + case TGSI_OPCODE_UADD: + case TGSI_OPCODE_UDIV: + case TGSI_OPCODE_UMOD: + case TGSI_OPCODE_UMAD: + case TGSI_OPCODE_UMUL: + case TGSI_OPCODE_UMAX: + case TGSI_OPCODE_UMIN: + case TGSI_OPCODE_USEQ: + case TGSI_OPCODE_USGE: + case TGSI_OPCODE_USLT: + case TGSI_OPCODE_USNE: + case TGSI_OPCODE_USHR: + return NV_TYPE_U32; + case TGSI_OPCODE_I2F: + case TGSI_OPCODE_IDIV: + case TGSI_OPCODE_IMAX: + case TGSI_OPCODE_IMIN: + case TGSI_OPCODE_INEG: + case TGSI_OPCODE_ISGE: + case TGSI_OPCODE_ISHR: + case TGSI_OPCODE_ISLT: + return NV_TYPE_S32; + default: + return NV_TYPE_F32; + } +} + +static ubyte +infer_dst_type(unsigned opcode) +{ + switch (opcode) { + case TGSI_OPCODE_MOV: + case TGSI_OPCODE_F2U: + case TGSI_OPCODE_AND: + case TGSI_OPCODE_OR: + case TGSI_OPCODE_XOR: + case TGSI_OPCODE_SAD: + case TGSI_OPCODE_UADD: + case TGSI_OPCODE_UDIV: + case TGSI_OPCODE_UMOD: + case TGSI_OPCODE_UMAD: + case TGSI_OPCODE_UMUL: + case TGSI_OPCODE_UMAX: + case TGSI_OPCODE_UMIN: + case TGSI_OPCODE_USEQ: + case TGSI_OPCODE_USGE: + case TGSI_OPCODE_USLT: + case TGSI_OPCODE_USNE: + case TGSI_OPCODE_USHR: + return NV_TYPE_U32; + case TGSI_OPCODE_F2I: + case TGSI_OPCODE_IDIV: + case TGSI_OPCODE_IMAX: + case TGSI_OPCODE_IMIN: + case TGSI_OPCODE_INEG: + case TGSI_OPCODE_ISGE: + case TGSI_OPCODE_ISHR: + case TGSI_OPCODE_ISLT: + return NV_TYPE_S32; + default: + return NV_TYPE_F32; + } +} +#endif + +static void +emit_store(struct bld_context *bld, const struct tgsi_full_instruction *inst, + unsigned chan, struct nv_value *res) +{ + const struct tgsi_full_dst_register *reg = &inst->Dst[0]; + struct nv_instruction *nvi; + struct nv_value *mem; + struct nv_value *ptr = NULL; + int idx; + + idx = reg->Register.Index; + assert(chan < 4); + + if (reg->Register.Indirect) + ptr = FETCH_ADDR(reg->Indirect.Index, + tgsi_util_get_src_register_swizzle(®->Indirect, 0)); + + switch (inst->Instruction.Saturate) { + case TGSI_SAT_NONE: + break; + case TGSI_SAT_ZERO_ONE: + res = bld_insn_1(bld, NV_OP_SAT, res); + break; + case TGSI_SAT_MINUS_PLUS_ONE: + res = bld_insn_2(bld, NV_OP_MAX_F32, res, bld_load_imm_f32(bld, -1.0f)); + res = bld_insn_2(bld, NV_OP_MIN_F32, res, bld_load_imm_f32(bld, 1.0f)); + break; + } + + switch (reg->Register.File) { + case TGSI_FILE_OUTPUT: + if (!res->insn) + res = bld_insn_1(bld, NV_OP_MOV, res); + + if (bld->pc->is_fragprog) { + assert(!ptr); + STORE_OUTP(idx, chan, res); + } else { + nvi = new_instruction(bld->pc, NV_OP_EXPORT); + mem = new_value(bld->pc, bld->ti->output_file, res->reg.size); + nv_reference(bld->pc, nvi, 0, mem); + nv_reference(bld->pc, nvi, 1, res); + if (!ptr) + mem->reg.address = bld->ti->output_loc[idx][chan]; + else + mem->reg.address = 0x80 + idx * 16 + chan * 4; + nvi->fixed = 1; + } + break; + case TGSI_FILE_TEMPORARY: + assert(idx < BLD_MAX_TEMPS); + if (!res->insn) + res = bld_insn_1(bld, NV_OP_MOV, res); + + assert(res->reg.file == NV_FILE_GPR); + assert(res->insn->bb = bld->pc->current_block); + + if (bld->ti->require_stores) + bld_lmem_store(bld, ptr, idx * 4 + chan, res); + else + STORE_TEMP(idx, chan, res); + break; + case TGSI_FILE_ADDRESS: + assert(idx < BLD_MAX_ADDRS); + STORE_ADDR(idx, chan, res); + break; + } +} + +static INLINE uint32_t +bld_is_output_written(struct bld_context *bld, int i, int c) +{ + if (c < 0) + return bld->outputs_written[i / 8] & (0xf << ((i * 4) % 32)); + return bld->outputs_written[i / 8] & (1 << ((i * 4 + c) % 32)); +} + +static void +bld_append_vp_ucp(struct bld_context *bld) +{ + struct nv_value *res[6]; + struct nv_value *ucp, *vtx, *out; + struct nv_instruction *insn; + int i, c; + + assert(bld->ti->prog->vp.num_ucps <= 6); + + for (c = 0; c < 4; ++c) { + vtx = bld_fetch_global(bld, &bld->ovs[bld->hpos_index][c]); + + for (i = 0; i < bld->ti->prog->vp.num_ucps; ++i) { + ucp = new_value(bld->pc, NV_FILE_MEM_C(15), 4); + ucp->reg.address = i * 16 + c * 4; + + if (c == 0) + res[i] = bld_insn_2(bld, NV_OP_MUL_F32, vtx, ucp); + else + res[i] = bld_insn_3(bld, NV_OP_MAD_F32, vtx, ucp, res[i]); + } + } + + for (i = 0; i < bld->ti->prog->vp.num_ucps; ++i) { + (out = new_value(bld->pc, NV_FILE_MEM_V, 4))->reg.address = 0x2c0 + i * 4; + (insn = new_instruction(bld->pc, NV_OP_EXPORT))->fixed = 1; + nv_reference(bld->pc, insn, 0, out); + nv_reference(bld->pc, insn, 1, res[i]); + } +} + +static void +bld_export_fp_outputs(struct bld_context *bld) +{ + struct nv_value *vals[4]; + struct nv_instruction *nvi; + int i, c, n; + + for (i = 0; i < PIPE_MAX_SHADER_OUTPUTS; ++i) { + if (!bld_is_output_written(bld, i, -1)) + continue; + for (n = 0, c = 0; c < 4; ++c) { + if (!bld_is_output_written(bld, i, c)) + continue; + vals[n] = bld_fetch_global(bld, &bld->ovs[i][c]); + assert(vals[n]); + vals[n] = bld_insn_1(bld, NV_OP_MOV, vals[n]); + vals[n++]->reg.id = bld->ti->output_loc[i][c]; + } + assert(n); + + (nvi = new_instruction(bld->pc, NV_OP_EXPORT))->fixed = 1; + for (c = 0; c < n; ++c) + nv_reference(bld->pc, nvi, c, vals[c]); + } +} + +static void +bld_new_block(struct bld_context *bld, struct nv_basic_block *b) +{ + int i, c; + + bld->pc->current_block = b; + + for (i = 0; i < 4; ++i) + bld->saved_addr[i][0] = NULL; + for (i = 0; i < PIPE_MAX_SHADER_INPUTS; ++i) + for (c = 0; c < 4; ++c) + bld->saved_inputs[i][c] = NULL; + + bld->out_kind = CFG_EDGE_FORWARD; +} + +static struct nv_value * +bld_get_saved_input(struct bld_context *bld, unsigned i, unsigned c) +{ + if (bld->saved_inputs[i][c]) + return bld->saved_inputs[i][c]; + return NULL; +} + +static struct nv_value * +bld_interp(struct bld_context *bld, unsigned mode, struct nv_value *val) +{ + unsigned cent = mode & NVC0_INTERP_CENTROID; + + mode &= ~NVC0_INTERP_CENTROID; + + if (val->reg.address == 0x3fc) { + /* gl_FrontFacing: 0/~0 to -1.0/+1.0 */ + val = bld_insn_1(bld, NV_OP_LINTERP, val); + val->insn->flat = 1; + val = bld_insn_2(bld, NV_OP_SHL, val, bld_imm_u32(bld, 31)); + val = bld_insn_2(bld, NV_OP_XOR, val, bld_imm_f32(bld, -1.0f)); + return val; + } else + if (mode == NVC0_INTERP_PERSPECTIVE) { + val = bld_insn_2(bld, NV_OP_PINTERP, val, bld->frag_coord[3]); + } else { + val = bld_insn_1(bld, NV_OP_LINTERP, val); + } + + val->insn->flat = mode == NVC0_INTERP_FLAT ? 1 : 0; + val->insn->centroid = cent ? 1 : 0; + return val; +} + +static struct nv_value * +emit_fetch(struct bld_context *bld, const struct tgsi_full_instruction *insn, + const unsigned s, const unsigned chan) +{ + const struct tgsi_full_src_register *src = &insn->Src[s]; + struct nv_value *res = NULL; + struct nv_value *ptr = NULL; + int idx, ind_idx, dim_idx; + unsigned swz, ind_swz, sgn; + + idx = src->Register.Index; + swz = tgsi_util_get_full_src_register_swizzle(src, chan); + + if (src->Register.Indirect) { + ind_idx = src->Indirect.Index; + ind_swz = tgsi_util_get_src_register_swizzle(&src->Indirect, 0); + + ptr = FETCH_ADDR(ind_idx, ind_swz); + } + + if (src->Register.Dimension) + dim_idx = src->Dimension.Index; + else + dim_idx = 0; + + switch (src->Register.File) { + case TGSI_FILE_CONSTANT: + assert(dim_idx < 14); + res = new_value(bld->pc, NV_FILE_MEM_C(dim_idx), 4); + res->reg.address = idx * 16 + swz * 4; + res = bld_insn_1(bld, NV_OP_LD, res); + if (ptr) + bld_src_pointer(bld, res->insn, 1, ptr); + break; + case TGSI_FILE_IMMEDIATE: /* XXX: type for MOV TEMP[0], -IMM[0] */ + assert(idx < bld->ti->immd32_nr); + res = bld_load_imm_u32(bld, bld->ti->immd32[idx * 4 + swz]); + break; + case TGSI_FILE_INPUT: + assert(!src->Register.Dimension); + if (!ptr) { + res = bld_get_saved_input(bld, idx, swz); + if (res) + return res; + } + res = new_value(bld->pc, bld->ti->input_file, 4); + if (ptr) + res->reg.address = 0x80 + idx * 16 + swz * 4; + else + res->reg.address = bld->ti->input_loc[idx][swz]; + + if (bld->pc->is_fragprog) + res = bld_interp(bld, bld->ti->interp_mode[idx], res); + else + res = bld_insn_1(bld, NV_OP_VFETCH, res); + + if (ptr) + bld_src_pointer(bld, res->insn, res->insn->src[1] ? 2 : 1, ptr); + else + bld->saved_inputs[idx][swz] = res; + break; + case TGSI_FILE_TEMPORARY: + if (bld->ti->require_stores) + res = bld_lmem_load(bld, ptr, idx * 4 + swz); + else + res = bld_fetch_global(bld, &bld->tvs[idx][swz]); + break; + case TGSI_FILE_ADDRESS: + res = bld_fetch_global(bld, &bld->avs[idx][swz]); + break; + case TGSI_FILE_PREDICATE: + res = bld_fetch_global(bld, &bld->pvs[idx][swz]); + break; + default: + NOUVEAU_ERR("illegal/unhandled src reg file: %d\n", src->Register.File); + abort(); + break; + } + if (!res) + return bld_undef(bld, NV_FILE_GPR); + + sgn = tgsi_util_get_full_src_register_sign_mode(src, chan); + + switch (sgn) { + case TGSI_UTIL_SIGN_KEEP: + break; + case TGSI_UTIL_SIGN_CLEAR: + res = bld_insn_1(bld, NV_OP_ABS_F32, res); + break; + case TGSI_UTIL_SIGN_TOGGLE: + res = bld_insn_1(bld, NV_OP_NEG_F32, res); + break; + case TGSI_UTIL_SIGN_SET: + res = bld_insn_1(bld, NV_OP_ABS_F32, res); + res = bld_insn_1(bld, NV_OP_NEG_F32, res); + break; + default: + NOUVEAU_ERR("illegal/unhandled src reg sign mode\n"); + abort(); + break; + } + + return res; +} + +static void +bld_lit(struct bld_context *bld, struct nv_value *dst0[4], + const struct tgsi_full_instruction *insn) +{ + struct nv_value *val0 = NULL; + unsigned mask = insn->Dst[0].Register.WriteMask; + + if (mask & ((1 << 0) | (1 << 3))) + dst0[3] = dst0[0] = bld_load_imm_f32(bld, 1.0f); + + if (mask & (3 << 1)) { + val0 = bld_insn_2(bld, NV_OP_MAX, emit_fetch(bld, insn, 0, 0), bld->zero); + if (mask & (1 << 1)) + dst0[1] = val0; + } + + if (mask & (1 << 2)) { + struct nv_value *val1, *val3, *src1, *src3, *pred; + struct nv_value *pos128 = bld_load_imm_f32(bld, 127.999999f); + struct nv_value *neg128 = bld_load_imm_f32(bld, -127.999999f); + + src1 = emit_fetch(bld, insn, 0, 1); + src3 = emit_fetch(bld, insn, 0, 3); + + pred = bld_setp(bld, NV_OP_SET_F32, NV_CC_LE, val0, bld->zero); + + val1 = bld_insn_2(bld, NV_OP_MAX_F32, src1, bld->zero); + val3 = bld_insn_2(bld, NV_OP_MAX_F32, src3, neg128); + val3 = bld_insn_2(bld, NV_OP_MIN_F32, val3, pos128); + val3 = bld_pow(bld, val1, val3); + + dst0[2] = bld_insn_1(bld, NV_OP_MOV, bld->zero); + bld_src_predicate(bld, dst0[2]->insn, 1, pred); + + dst0[2] = bld_insn_2(bld, NV_OP_SELECT, val3, dst0[2]); + } +} + +static INLINE void +describe_texture_target(unsigned target, int *dim, + int *array, int *cube, int *shadow) +{ + *array = *cube = *shadow = 0; + + switch (target) { + case TGSI_TEXTURE_1D: + *dim = 1; + break; + case TGSI_TEXTURE_SHADOW1D: + *dim = *shadow = 1; + break; + case TGSI_TEXTURE_UNKNOWN: + case TGSI_TEXTURE_2D: + case TGSI_TEXTURE_RECT: + *dim = 2; + break; + case TGSI_TEXTURE_SHADOW2D: + case TGSI_TEXTURE_SHADOWRECT: + *dim = 2; + *shadow = 1; + break; + case TGSI_TEXTURE_3D: + *dim = 3; + break; + case TGSI_TEXTURE_CUBE: + *dim = 2; + *cube = 1; + break; + /* + case TGSI_TEXTURE_CUBE_ARRAY: + *dim = 2; + *cube = *array = 1; + break; + case TGSI_TEXTURE_1D_ARRAY: + *dim = *array = 1; + break; + case TGSI_TEXTURE_2D_ARRAY: + *dim = 2; + *array = 1; + break; + case TGSI_TEXTURE_SHADOW1D_ARRAY: + *dim = *array = *shadow = 1; + break; + case TGSI_TEXTURE_SHADOW2D_ARRAY: + *dim = 2; + *array = *shadow = 1; + break; + case TGSI_TEXTURE_CUBE_ARRAY: + *dim = 2; + *array = *cube = 1; + break; + */ + default: + assert(0); + break; + } +} + +static struct nv_value * +bld_clone(struct bld_context *bld, struct nv_instruction *nvi) +{ + struct nv_instruction *dupi = new_instruction(bld->pc, nvi->opcode); + struct nv_instruction *next, *prev; + int c; + + next = dupi->next; + prev = dupi->prev; + + *dupi = *nvi; + + dupi->next = next; + dupi->prev = prev; + + for (c = 0; c < 5 && nvi->def[c]; ++c) + bld_def(dupi, c, new_value_like(bld->pc, nvi->def[c])); + + for (c = 0; c < 6 && nvi->src[c]; ++c) { + dupi->src[c] = NULL; + nv_reference(bld->pc, dupi, c, nvi->src[c]->value); + } + + return dupi->def[0]; +} + +/* NOTE: proj(t0) = (t0 / w) / (tc3 / w) = tc0 / tc2 handled by optimizer */ +static void +load_proj_tex_coords(struct bld_context *bld, + struct nv_value *t[4], int dim, int shadow, + const struct tgsi_full_instruction *insn) +{ + int c; + unsigned mask = (1 << dim) - 1; + + if (shadow) + mask |= 4; /* depth comparison value */ + + t[3] = emit_fetch(bld, insn, 0, 3); + if (t[3]->insn->opcode == NV_OP_PINTERP) { + t[3] = bld_clone(bld, t[3]->insn); + t[3]->insn->opcode = NV_OP_LINTERP; + nv_reference(bld->pc, t[3]->insn, 1, NULL); + } + t[3] = bld_insn_1(bld, NV_OP_RCP, t[3]); + + for (c = 0; c < 4; ++c) { + if (!(mask & (1 << c))) + continue; + t[c] = emit_fetch(bld, insn, 0, c); + + if (t[c]->insn->opcode != NV_OP_PINTERP) + continue; + mask &= ~(1 << c); + + t[c] = bld_clone(bld, t[c]->insn); + nv_reference(bld->pc, t[c]->insn, 1, t[3]); + } + if (mask == 0) + return; + + t[3] = emit_fetch(bld, insn, 0, 3); + t[3] = bld_insn_1(bld, NV_OP_RCP, t[3]); + + for (c = 0; c < 4; ++c) + if (mask & (1 << c)) + t[c] = bld_insn_2(bld, NV_OP_MUL_F32, t[c], t[3]); +} + +/* For a quad of threads / top left, top right, bottom left, bottom right + * pixels, do a different operation, and take src0 from a specific thread. + */ +#define QOP_ADD 0 +#define QOP_SUBR 1 +#define QOP_SUB 2 +#define QOP_MOV1 3 + +#define QOP(a, b, c, d) \ + ((QOP_##a << 0) | (QOP_##b << 2) | (QOP_##c << 4) | (QOP_##d << 6)) + +static INLINE struct nv_value * +bld_quadop(struct bld_context *bld, ubyte qop, struct nv_value *src0, int lane, + struct nv_value *src1, boolean wp) +{ + struct nv_value *val = bld_insn_2(bld, NV_OP_QUADOP, src0, src1); + val->insn->lanes = lane; + val->insn->quadop = qop; + if (wp) { + assert(!"quadop predicate write"); + } + return val; +} + +/* order of TGSI operands: x y z layer shadow lod/bias */ +/* order of native operands: layer x y z | lod/bias shadow */ +static struct nv_instruction * +emit_tex(struct bld_context *bld, uint opcode, int tic, int tsc, + struct nv_value *dst[4], struct nv_value *arg[4], + int dim, int array, int cube, int shadow) +{ + struct nv_value *src[4]; + struct nv_instruction *nvi, *bnd; + int c; + int s = 0; + boolean lodbias = opcode == NV_OP_TXB || opcode == NV_OP_TXL; + + if (array) + arg[dim] = bld_cvt(bld, NV_TYPE_U32, NV_TYPE_F32, arg[dim]); + + /* ensure that all inputs reside in a GPR */ + for (c = 0; c < dim + array + cube + shadow; ++c) + (src[c] = bld_insn_1(bld, NV_OP_MOV, arg[c]))->insn->fixed = 1; + + /* bind { layer x y z } and { lod/bias shadow } to adjacent regs */ + + bnd = new_instruction(bld->pc, NV_OP_BIND); + if (array) { + src[s] = new_value(bld->pc, NV_FILE_GPR, 4); + bld_def(bnd, s, src[s]); + nv_reference(bld->pc, bnd, s++, arg[dim + cube]); + } + for (c = 0; c < dim + cube; ++c, ++s) { + src[s] = bld_def(bnd, s, new_value(bld->pc, NV_FILE_GPR, 4)); + nv_reference(bld->pc, bnd, s, arg[c]); + } + + if (shadow || lodbias) { + bnd = new_instruction(bld->pc, NV_OP_BIND); + + if (lodbias) { + src[s] = new_value(bld->pc, NV_FILE_GPR, 4); + bld_def(bnd, 0, src[s++]); + nv_reference(bld->pc, bnd, 0, arg[dim + cube + array + shadow]); + } + if (shadow) { + src[s] = new_value(bld->pc, NV_FILE_GPR, 4); + bld_def(bnd, lodbias, src[s++]); + nv_reference(bld->pc, bnd, lodbias, arg[dim + cube + array]); + } + } + + nvi = new_instruction(bld->pc, opcode); + for (c = 0; c < 4; ++c) + dst[c] = bld_def(nvi, c, new_value(bld->pc, NV_FILE_GPR, 4)); + for (c = 0; c < s; ++c) + nv_reference(bld->pc, nvi, c, src[c]); + + nvi->ext.tex.t = tic; + nvi->ext.tex.s = tsc; + nvi->tex_mask = 0xf; + nvi->tex_cube = cube; + nvi->tex_dim = dim; + nvi->tex_cube = cube; + nvi->tex_shadow = shadow; + nvi->tex_live = 0; + + return nvi; +} + +/* +static boolean +bld_is_constant(struct nv_value *val) +{ + if (val->reg.file == NV_FILE_IMM) + return TRUE; + return val->insn && nvCG_find_constant(val->insn->src[0]); +} +*/ + +static void +bld_tex(struct bld_context *bld, struct nv_value *dst0[4], + const struct tgsi_full_instruction *insn) +{ + struct nv_value *t[4], *s[3]; + uint opcode = translate_opcode(insn->Instruction.Opcode); + int c, dim, array, cube, shadow; + const int lodbias = opcode == NV_OP_TXB || opcode == NV_OP_TXL; + const int tic = insn->Src[1].Register.Index; + const int tsc = tic; + + describe_texture_target(insn->Texture.Texture, &dim, &array, &cube, &shadow); + + assert(dim + array + shadow + lodbias <= 5); + + if (!cube && insn->Instruction.Opcode == TGSI_OPCODE_TXP) + load_proj_tex_coords(bld, t, dim, shadow, insn); + else { + for (c = 0; c < dim + cube + array; ++c) + t[c] = emit_fetch(bld, insn, 0, c); + if (shadow) + t[c] = emit_fetch(bld, insn, 0, MAX2(c, 2)); + } + + if (cube) { + for (c = 0; c < 3; ++c) + s[c] = bld_insn_1(bld, NV_OP_ABS_F32, t[c]); + + s[0] = bld_insn_2(bld, NV_OP_MAX_F32, s[0], s[1]); + s[0] = bld_insn_2(bld, NV_OP_MAX_F32, s[0], s[2]); + s[0] = bld_insn_1(bld, NV_OP_RCP, s[0]); + + for (c = 0; c < 3; ++c) + t[c] = bld_insn_2(bld, NV_OP_MUL_F32, t[c], s[0]); + } + + if (lodbias) + t[dim + cube + array + shadow] = emit_fetch(bld, insn, 0, 3); + + emit_tex(bld, opcode, tic, tsc, dst0, t, dim, array, cube, shadow); +} + +static INLINE struct nv_value * +bld_dot(struct bld_context *bld, const struct tgsi_full_instruction *insn, + int n) +{ + struct nv_value *dotp, *src0, *src1; + int c; + + src0 = emit_fetch(bld, insn, 0, 0); + src1 = emit_fetch(bld, insn, 1, 0); + dotp = bld_insn_2(bld, NV_OP_MUL_F32, src0, src1); + + for (c = 1; c < n; ++c) { + src0 = emit_fetch(bld, insn, 0, c); + src1 = emit_fetch(bld, insn, 1, c); + dotp = bld_insn_3(bld, NV_OP_MAD_F32, src0, src1, dotp); + } + return dotp; +} + +#define FOR_EACH_DST0_ENABLED_CHANNEL(chan, inst) \ + for (chan = 0; chan < 4; ++chan) \ + if ((inst)->Dst[0].Register.WriteMask & (1 << chan)) + +static void +bld_instruction(struct bld_context *bld, + const struct tgsi_full_instruction *insn) +{ + struct nv_value *src0; + struct nv_value *src1; + struct nv_value *src2; + struct nv_value *dst0[4] = { NULL }; + struct nv_value *temp; + int c; + uint opcode = translate_opcode(insn->Instruction.Opcode); + uint8_t mask = insn->Dst[0].Register.WriteMask; + +#ifdef NOUVEAU_DEBUG + debug_printf("bld_instruction:"); tgsi_dump_instruction(insn, 1); +#endif + + switch (insn->Instruction.Opcode) { + case TGSI_OPCODE_ADD: + case TGSI_OPCODE_MAX: + case TGSI_OPCODE_MIN: + case TGSI_OPCODE_MUL: + FOR_EACH_DST0_ENABLED_CHANNEL(c, insn) { + src0 = emit_fetch(bld, insn, 0, c); + src1 = emit_fetch(bld, insn, 1, c); + dst0[c] = bld_insn_2(bld, opcode, src0, src1); + } + break; + case TGSI_OPCODE_ARL: + src1 = bld_imm_u32(bld, 4); + FOR_EACH_DST0_ENABLED_CHANNEL(c, insn) { + src0 = emit_fetch(bld, insn, 0, c); + src0 = bld_insn_1(bld, NV_OP_FLOOR, src0); + src0->insn->ext.cvt.d = NV_TYPE_S32; + src0->insn->ext.cvt.s = NV_TYPE_F32; + dst0[c] = bld_insn_2(bld, NV_OP_SHL, src0, src1); + } + break; + case TGSI_OPCODE_CMP: + FOR_EACH_DST0_ENABLED_CHANNEL(c, insn) { + src0 = emit_fetch(bld, insn, 0, c); + src0 = bld_setp(bld, NV_OP_SET_F32, NV_CC_LT, src0, bld->zero); + src1 = emit_fetch(bld, insn, 1, c); + src2 = emit_fetch(bld, insn, 2, c); + dst0[c] = bld_insn_3(bld, NV_OP_SELP, src1, src2, src0); + } + break; + case TGSI_OPCODE_COS: + case TGSI_OPCODE_SIN: + src0 = emit_fetch(bld, insn, 0, 0); + temp = bld_insn_1(bld, NV_OP_PRESIN, src0); + if (insn->Dst[0].Register.WriteMask & 7) + temp = bld_insn_1(bld, opcode, temp); + for (c = 0; c < 3; ++c) + if (insn->Dst[0].Register.WriteMask & (1 << c)) + dst0[c] = temp; + if (!(insn->Dst[0].Register.WriteMask & (1 << 3))) + break; + src0 = emit_fetch(bld, insn, 0, 3); + temp = bld_insn_1(bld, NV_OP_PRESIN, src0); + dst0[3] = bld_insn_1(bld, opcode, temp); + break; + case TGSI_OPCODE_DP2: + temp = bld_dot(bld, insn, 2); + FOR_EACH_DST0_ENABLED_CHANNEL(c, insn) + dst0[c] = temp; + break; + case TGSI_OPCODE_DP3: + temp = bld_dot(bld, insn, 3); + FOR_EACH_DST0_ENABLED_CHANNEL(c, insn) + dst0[c] = temp; + break; + case TGSI_OPCODE_DP4: + temp = bld_dot(bld, insn, 4); + FOR_EACH_DST0_ENABLED_CHANNEL(c, insn) + dst0[c] = temp; + break; + case TGSI_OPCODE_DPH: + src0 = bld_dot(bld, insn, 3); + src1 = emit_fetch(bld, insn, 1, 3); + temp = bld_insn_2(bld, NV_OP_ADD_F32, src0, src1); + FOR_EACH_DST0_ENABLED_CHANNEL(c, insn) + dst0[c] = temp; + break; + case TGSI_OPCODE_DST: + if (insn->Dst[0].Register.WriteMask & 1) + dst0[0] = bld_imm_f32(bld, 1.0f); + if (insn->Dst[0].Register.WriteMask & 2) { + src0 = emit_fetch(bld, insn, 0, 1); + src1 = emit_fetch(bld, insn, 1, 1); + dst0[1] = bld_insn_2(bld, NV_OP_MUL_F32, src0, src1); + } + if (insn->Dst[0].Register.WriteMask & 4) + dst0[2] = emit_fetch(bld, insn, 0, 2); + if (insn->Dst[0].Register.WriteMask & 8) + dst0[3] = emit_fetch(bld, insn, 1, 3); + break; + case TGSI_OPCODE_EXP: + src0 = emit_fetch(bld, insn, 0, 0); + temp = bld_insn_1(bld, NV_OP_FLOOR, src0); + + if (insn->Dst[0].Register.WriteMask & 2) + dst0[1] = bld_insn_2(bld, NV_OP_SUB_F32, src0, temp); + if (insn->Dst[0].Register.WriteMask & 1) { + temp = bld_insn_1(bld, NV_OP_PREEX2, temp); + dst0[0] = bld_insn_1(bld, NV_OP_EX2, temp); + } + if (insn->Dst[0].Register.WriteMask & 4) { + temp = bld_insn_1(bld, NV_OP_PREEX2, src0); + dst0[2] = bld_insn_1(bld, NV_OP_EX2, temp); + } + if (insn->Dst[0].Register.WriteMask & 8) + dst0[3] = bld_imm_f32(bld, 1.0f); + break; + case TGSI_OPCODE_EX2: + src0 = emit_fetch(bld, insn, 0, 0); + temp = bld_insn_1(bld, NV_OP_PREEX2, src0); + temp = bld_insn_1(bld, NV_OP_EX2, temp); + FOR_EACH_DST0_ENABLED_CHANNEL(c, insn) + dst0[c] = temp; + break; + case TGSI_OPCODE_FRC: + FOR_EACH_DST0_ENABLED_CHANNEL(c, insn) { + src0 = emit_fetch(bld, insn, 0, c); + dst0[c] = bld_insn_1(bld, NV_OP_FLOOR, src0); + dst0[c] = bld_insn_2(bld, NV_OP_SUB_F32, src0, dst0[c]); + } + break; + case TGSI_OPCODE_KIL: + for (c = 0; c < 4; ++c) + bld_kil(bld, emit_fetch(bld, insn, 0, c)); + break; + case TGSI_OPCODE_KILP: + (new_instruction(bld->pc, NV_OP_KIL))->fixed = 1; + break; + case TGSI_OPCODE_IF: + { + struct nv_basic_block *b = new_basic_block(bld->pc); + + assert(bld->cond_lvl < BLD_MAX_COND_NESTING); + + nvc0_bblock_attach(bld->pc->current_block, b, CFG_EDGE_FORWARD); + + bld->join_bb[bld->cond_lvl] = bld->pc->current_block; + bld->cond_bb[bld->cond_lvl] = bld->pc->current_block; + + src1 = bld_setp(bld, NV_OP_SET_U32, NV_CC_EQ, + emit_fetch(bld, insn, 0, 0), bld->zero); + + bld_flow(bld, NV_OP_BRA, src1, NULL, (bld->cond_lvl == 0)); + + ++bld->cond_lvl; + bld_new_block(bld, b); + } + break; + case TGSI_OPCODE_ELSE: + { + struct nv_basic_block *b = new_basic_block(bld->pc); + + --bld->cond_lvl; + nvc0_bblock_attach(bld->join_bb[bld->cond_lvl], b, CFG_EDGE_FORWARD); + + bld->cond_bb[bld->cond_lvl]->exit->target = b; + bld->cond_bb[bld->cond_lvl] = bld->pc->current_block; + + new_instruction(bld->pc, NV_OP_BRA)->terminator = 1; + + ++bld->cond_lvl; + bld_new_block(bld, b); + } + break; + case TGSI_OPCODE_ENDIF: + { + struct nv_basic_block *b = new_basic_block(bld->pc); + + --bld->cond_lvl; + nvc0_bblock_attach(bld->pc->current_block, b, bld->out_kind); + nvc0_bblock_attach(bld->cond_bb[bld->cond_lvl], b, CFG_EDGE_FORWARD); + + bld->cond_bb[bld->cond_lvl]->exit->target = b; + + bld_new_block(bld, b); + + if (!bld->cond_lvl && bld->join_bb[bld->cond_lvl]) { + bld->join_bb[bld->cond_lvl]->exit->prev->target = b; + new_instruction(bld->pc, NV_OP_JOIN)->join = 1; + } + } + break; + case TGSI_OPCODE_BGNLOOP: + { + struct nv_basic_block *bl = new_basic_block(bld->pc); + struct nv_basic_block *bb = new_basic_block(bld->pc); + + assert(bld->loop_lvl < BLD_MAX_LOOP_NESTING); + + bld->loop_bb[bld->loop_lvl] = bl; + bld->brkt_bb[bld->loop_lvl] = bb; + + nvc0_bblock_attach(bld->pc->current_block, bl, CFG_EDGE_LOOP_ENTER); + + bld_new_block(bld, bld->loop_bb[bld->loop_lvl++]); + + if (bld->loop_lvl == bld->pc->loop_nesting_bound) + bld->pc->loop_nesting_bound++; + + bld_clear_def_use(&bld->tvs[0][0], BLD_MAX_TEMPS, bld->loop_lvl); + bld_clear_def_use(&bld->avs[0][0], BLD_MAX_ADDRS, bld->loop_lvl); + bld_clear_def_use(&bld->pvs[0][0], BLD_MAX_PREDS, bld->loop_lvl); + } + break; + case TGSI_OPCODE_BRK: + { + struct nv_basic_block *bb = bld->brkt_bb[bld->loop_lvl - 1]; + + bld_flow(bld, NV_OP_BRA, NULL, bb, FALSE); + + if (bld->out_kind == CFG_EDGE_FORWARD) /* else we already had BRK/CONT */ + nvc0_bblock_attach(bld->pc->current_block, bb, CFG_EDGE_LOOP_LEAVE); + + bld->out_kind = CFG_EDGE_FAKE; + } + break; + case TGSI_OPCODE_CONT: + { + struct nv_basic_block *bb = bld->loop_bb[bld->loop_lvl - 1]; + + bld_flow(bld, NV_OP_BRA, NULL, bb, FALSE); + + nvc0_bblock_attach(bld->pc->current_block, bb, CFG_EDGE_BACK); + + if ((bb = bld->join_bb[bld->cond_lvl - 1])) { + bld->join_bb[bld->cond_lvl - 1] = NULL; + nvc0_insn_delete(bb->exit->prev); + } + bld->out_kind = CFG_EDGE_FAKE; + } + break; + case TGSI_OPCODE_ENDLOOP: + { + struct nv_basic_block *bb = bld->loop_bb[bld->loop_lvl - 1]; + + bld_flow(bld, NV_OP_BRA, NULL, bb, FALSE); + + nvc0_bblock_attach(bld->pc->current_block, bb, CFG_EDGE_BACK); + + bld_loop_end(bld, bb); /* replace loop-side operand of the phis */ + + bld_new_block(bld, bld->brkt_bb[--bld->loop_lvl]); + } + break; + case TGSI_OPCODE_ABS: + case TGSI_OPCODE_CEIL: + case TGSI_OPCODE_FLR: + case TGSI_OPCODE_TRUNC: + case TGSI_OPCODE_DDX: + case TGSI_OPCODE_DDY: + FOR_EACH_DST0_ENABLED_CHANNEL(c, insn) { + src0 = emit_fetch(bld, insn, 0, c); + dst0[c] = bld_insn_1(bld, opcode, src0); + } + break; + case TGSI_OPCODE_LIT: + bld_lit(bld, dst0, insn); + break; + case TGSI_OPCODE_LRP: + FOR_EACH_DST0_ENABLED_CHANNEL(c, insn) { + src0 = emit_fetch(bld, insn, 0, c); + src1 = emit_fetch(bld, insn, 1, c); + src2 = emit_fetch(bld, insn, 2, c); + dst0[c] = bld_insn_2(bld, NV_OP_SUB_F32, src1, src2); + dst0[c] = bld_insn_3(bld, NV_OP_MAD_F32, dst0[c], src0, src2); + } + break; + case TGSI_OPCODE_MOV: + FOR_EACH_DST0_ENABLED_CHANNEL(c, insn) + dst0[c] = emit_fetch(bld, insn, 0, c); + break; + case TGSI_OPCODE_MAD: + FOR_EACH_DST0_ENABLED_CHANNEL(c, insn) { + src0 = emit_fetch(bld, insn, 0, c); + src1 = emit_fetch(bld, insn, 1, c); + src2 = emit_fetch(bld, insn, 2, c); + dst0[c] = bld_insn_3(bld, opcode, src0, src1, src2); + } + break; + case TGSI_OPCODE_POW: + src0 = emit_fetch(bld, insn, 0, 0); + src1 = emit_fetch(bld, insn, 1, 0); + temp = bld_pow(bld, src0, src1); + FOR_EACH_DST0_ENABLED_CHANNEL(c, insn) + dst0[c] = temp; + break; + case TGSI_OPCODE_LOG: + src0 = emit_fetch(bld, insn, 0, 0); + src0 = bld_insn_1(bld, NV_OP_ABS_F32, src0); + temp = bld_insn_1(bld, NV_OP_LG2, src0); + dst0[2] = temp; + if (insn->Dst[0].Register.WriteMask & 3) { + temp = bld_insn_1(bld, NV_OP_FLOOR, temp); + dst0[0] = temp; + } + if (insn->Dst[0].Register.WriteMask & 2) { + temp = bld_insn_1(bld, NV_OP_PREEX2, temp); + temp = bld_insn_1(bld, NV_OP_EX2, temp); + temp = bld_insn_1(bld, NV_OP_RCP, temp); + dst0[1] = bld_insn_2(bld, NV_OP_MUL_F32, src0, temp); + } + if (insn->Dst[0].Register.WriteMask & 8) + dst0[3] = bld_imm_f32(bld, 1.0f); + break; + case TGSI_OPCODE_RCP: + case TGSI_OPCODE_LG2: + src0 = emit_fetch(bld, insn, 0, 0); + temp = bld_insn_1(bld, opcode, src0); + FOR_EACH_DST0_ENABLED_CHANNEL(c, insn) + dst0[c] = temp; + break; + case TGSI_OPCODE_RSQ: + src0 = emit_fetch(bld, insn, 0, 0); + temp = bld_insn_1(bld, NV_OP_ABS_F32, src0); + temp = bld_insn_1(bld, NV_OP_RSQ, temp); + FOR_EACH_DST0_ENABLED_CHANNEL(c, insn) + dst0[c] = temp; + break; + case TGSI_OPCODE_SLT: + case TGSI_OPCODE_SGE: + case TGSI_OPCODE_SEQ: + case TGSI_OPCODE_SGT: + case TGSI_OPCODE_SLE: + case TGSI_OPCODE_SNE: + case TGSI_OPCODE_ISLT: + case TGSI_OPCODE_ISGE: + case TGSI_OPCODE_USEQ: + case TGSI_OPCODE_USGE: + case TGSI_OPCODE_USLT: + case TGSI_OPCODE_USNE: + FOR_EACH_DST0_ENABLED_CHANNEL(c, insn) { + src0 = emit_fetch(bld, insn, 0, c); + src1 = emit_fetch(bld, insn, 1, c); + dst0[c] = bld_insn_2(bld, opcode, src0, src1); + dst0[c]->insn->set_cond = translate_setcc(insn->Instruction.Opcode); + } + break; + case TGSI_OPCODE_SCS: + if (insn->Dst[0].Register.WriteMask & 0x3) { + src0 = emit_fetch(bld, insn, 0, 0); + temp = bld_insn_1(bld, NV_OP_PRESIN, src0); + if (insn->Dst[0].Register.WriteMask & 0x1) + dst0[0] = bld_insn_1(bld, NV_OP_COS, temp); + if (insn->Dst[0].Register.WriteMask & 0x2) + dst0[1] = bld_insn_1(bld, NV_OP_SIN, temp); + } + if (insn->Dst[0].Register.WriteMask & 0x4) + dst0[2] = bld_imm_f32(bld, 0.0f); + if (insn->Dst[0].Register.WriteMask & 0x8) + dst0[3] = bld_imm_f32(bld, 1.0f); + break; + case TGSI_OPCODE_SSG: + FOR_EACH_DST0_ENABLED_CHANNEL(c, insn) { /* XXX: set lt, set gt, sub */ + src0 = emit_fetch(bld, insn, 0, c); + src1 = bld_setp(bld, NV_OP_SET_F32, NV_CC_EQ, src0, bld->zero); + temp = bld_insn_2(bld, NV_OP_AND, src0, bld_imm_u32(bld, 0x80000000)); + temp = bld_insn_2(bld, NV_OP_OR, temp, bld_imm_f32(bld, 1.0f)); + dst0[c] = bld_insn_1(bld, NV_OP_MOV, temp); + bld_src_predicate(bld, dst0[c]->insn, 1, src1); + } + break; + case TGSI_OPCODE_SUB: + FOR_EACH_DST0_ENABLED_CHANNEL(c, insn) { + src0 = emit_fetch(bld, insn, 0, c); + src1 = emit_fetch(bld, insn, 1, c); + dst0[c] = bld_insn_2(bld, NV_OP_SUB_F32, src0, src1); + } + break; + case TGSI_OPCODE_TEX: + case TGSI_OPCODE_TXB: + case TGSI_OPCODE_TXL: + case TGSI_OPCODE_TXP: + bld_tex(bld, dst0, insn); + break; + case TGSI_OPCODE_XPD: + FOR_EACH_DST0_ENABLED_CHANNEL(c, insn) { + if (c == 3) { + dst0[3] = bld_imm_f32(bld, 1.0f); + break; + } + src0 = emit_fetch(bld, insn, 1, (c + 1) % 3); + src1 = emit_fetch(bld, insn, 0, (c + 2) % 3); + dst0[c] = bld_insn_2(bld, NV_OP_MUL_F32, src0, src1); + + src0 = emit_fetch(bld, insn, 0, (c + 1) % 3); + src1 = emit_fetch(bld, insn, 1, (c + 2) % 3); + dst0[c] = bld_insn_3(bld, NV_OP_MAD_F32, src0, src1, dst0[c]); + + dst0[c]->insn->src[2]->mod ^= NV_MOD_NEG; + } + break; + case TGSI_OPCODE_RET: + (new_instruction(bld->pc, NV_OP_RET))->fixed = 1; + break; + case TGSI_OPCODE_END: + /* VP outputs are exported in-place as scalars, optimization later */ + if (bld->pc->is_fragprog) + bld_export_fp_outputs(bld); + if (bld->ti->append_ucp) + bld_append_vp_ucp(bld); + return; + default: + NOUVEAU_ERR("unhandled opcode %u\n", insn->Instruction.Opcode); + abort(); + return; + } + + if (insn->Dst[0].Register.File == TGSI_FILE_OUTPUT && + !bld->pc->is_fragprog) { + struct nv_instruction *mi = NULL; + uint size; + + if (bld->ti->append_ucp) { + if (bld->ti->output_loc[insn->Dst[0].Register.Index][0] == 0x70) { + bld->hpos_index = insn->Dst[0].Register.Index; + for (c = 0; c < 4; ++c) + if (mask & (1 << c)) + STORE_OUTP(insn->Dst[0].Register.Index, c, dst0[c]); + } + } + + for (c = 0; c < 4; ++c) + if ((mask & (1 << c)) && + ((dst0[c]->reg.file == NV_FILE_IMM) || + (dst0[c]->reg.id == 63 && dst0[c]->reg.file == NV_FILE_GPR))) + dst0[c] = bld_insn_1(bld, NV_OP_MOV, dst0[c]); + + c = 0; + if ((mask & 0x3) == 0x3) { + mask &= ~0x3; + size = 8; + mi = bld_insn_2(bld, NV_OP_BIND, dst0[0], dst0[1])->insn; + } + if ((mask & 0xc) == 0xc) { + mask &= ~0xc; + if (mi) { + size = 16; + nv_reference(bld->pc, mi, 2, dst0[2]); + nv_reference(bld->pc, mi, 3, dst0[3]); + } else { + c = 2; + size = 8; + mi = bld_insn_2(bld, NV_OP_BIND, dst0[2], dst0[3])->insn; + } + } else + if (mi && (mask & 0x4)) { + size = 12; + mask &= ~0x4; + nv_reference(bld->pc, mi, 2, dst0[2]); + } + + if (mi) { + struct nv_instruction *ex = new_instruction(bld->pc, NV_OP_EXPORT); + int s; + + nv_reference(bld->pc, ex, 0, new_value(bld->pc, NV_FILE_MEM_V, 4)); + nv_reference(bld->pc, ex, 1, mi->def[0]); + + for (s = 1; s < size / 4; ++s) { + bld_def(mi, s, new_value(bld->pc, NV_FILE_GPR, 4)); + nv_reference(bld->pc, ex, s + 1, mi->def[s]); + } + + ex->fixed = 1; + ex->src[0]->value->reg.size = size; + ex->src[0]->value->reg.address = + bld->ti->output_loc[insn->Dst[0].Register.Index][c]; + } + } + + for (c = 0; c < 4; ++c) + if (mask & (1 << c)) + emit_store(bld, insn, c, dst0[c]); +} + +static INLINE void +bld_free_registers(struct bld_register *base, int n) +{ + int i, c; + + for (i = 0; i < n; ++i) + for (c = 0; c < 4; ++c) + util_dynarray_fini(&base[i * 4 + c].vals); +} + +int +nvc0_tgsi_to_nc(struct nv_pc *pc, struct nvc0_translation_info *ti) +{ + struct bld_context *bld = CALLOC_STRUCT(bld_context); + unsigned ip; + + pc->root[0] = pc->current_block = new_basic_block(pc); + + bld->pc = pc; + bld->ti = ti; + + pc->loop_nesting_bound = 1; + + bld->zero = new_value(pc, NV_FILE_GPR, 4); + bld->zero->reg.id = 63; + + if (pc->is_fragprog) { + struct nv_value *mem = new_value(pc, NV_FILE_MEM_V, 4); + mem->reg.address = 0x7c; + + bld->frag_coord[3] = bld_insn_1(bld, NV_OP_LINTERP, mem); + bld->frag_coord[3] = bld_insn_1(bld, NV_OP_RCP, bld->frag_coord[3]); + } + + for (ip = 0; ip < ti->num_insns; ++ip) + bld_instruction(bld, &ti->insns[ip]); + + bld_free_registers(&bld->tvs[0][0], BLD_MAX_TEMPS); + bld_free_registers(&bld->avs[0][0], BLD_MAX_ADDRS); + bld_free_registers(&bld->pvs[0][0], BLD_MAX_PREDS); + bld_free_registers(&bld->ovs[0][0], PIPE_MAX_SHADER_OUTPUTS); + + FREE(bld); + return 0; +} + +/* If a variable is assigned in a loop, replace all references to the value + * from outside the loop with a phi value. + */ +static void +bld_replace_value(struct nv_pc *pc, struct nv_basic_block *b, + struct nv_value *old_val, + struct nv_value *new_val) +{ + struct nv_instruction *nvi; + + for (nvi = b->phi ? b->phi : b->entry; nvi; nvi = nvi->next) { + int s; + for (s = 0; s < 6 && nvi->src[s]; ++s) + if (nvi->src[s]->value == old_val) + nv_reference(pc, nvi, s, new_val); + } + + b->pass_seq = pc->pass_seq; + + if (b->out[0] && b->out[0]->pass_seq < pc->pass_seq) + bld_replace_value(pc, b->out[0], old_val, new_val); + + if (b->out[1] && b->out[1]->pass_seq < pc->pass_seq) + bld_replace_value(pc, b->out[1], old_val, new_val); +} diff --git a/src/gallium/drivers/nvc0/nvc0_transfer.c b/src/gallium/drivers/nvc0/nvc0_transfer.c new file mode 100644 index 00000000000..286b382f58e --- /dev/null +++ b/src/gallium/drivers/nvc0/nvc0_transfer.c @@ -0,0 +1,381 @@ + +#include "util/u_format.h" + +#include "nvc0_context.h" +#include "nvc0_transfer.h" + +#include "nv50_defs.xml.h" + +struct nvc0_transfer { + struct pipe_transfer base; + struct nvc0_m2mf_rect rect[2]; + uint32_t nblocksx; + uint32_t nblocksy; +}; + +static void +nvc0_m2mf_transfer_rect(struct pipe_screen *pscreen, + const struct nvc0_m2mf_rect *dst, + const struct nvc0_m2mf_rect *src, + uint32_t nblocksx, uint32_t nblocksy) +{ + struct nouveau_channel *chan = nouveau_screen(pscreen)->channel; + const int cpp = dst->cpp; + uint32_t src_ofst = src->base; + uint32_t dst_ofst = dst->base; + uint32_t height = nblocksy; + uint32_t sy = src->y; + uint32_t dy = dst->y; + uint32_t exec = (1 << 20); + + assert(dst->cpp == src->cpp); + + if (nouveau_bo_tile_layout(src->bo)) { + BEGIN_RING(chan, RING_MF(TILING_MODE_IN), 5); + OUT_RING (chan, src->tile_mode); + OUT_RING (chan, src->width * cpp); + OUT_RING (chan, src->height); + OUT_RING (chan, src->depth); + OUT_RING (chan, src->z); + } else { + src_ofst += src->y * src->pitch + src->x * cpp; + + BEGIN_RING(chan, RING_MF(PITCH_IN), 1); + OUT_RING (chan, src->width * cpp); + + exec |= NVC0_M2MF_EXEC_LINEAR_IN; + } + + if (nouveau_bo_tile_layout(dst->bo)) { + BEGIN_RING(chan, RING_MF(TILING_MODE_OUT), 5); + OUT_RING (chan, dst->tile_mode); + OUT_RING (chan, dst->width * cpp); + OUT_RING (chan, dst->height); + OUT_RING (chan, dst->depth); + OUT_RING (chan, dst->z); + } else { + dst_ofst += dst->y * dst->pitch + dst->x * cpp; + + BEGIN_RING(chan, RING_MF(PITCH_OUT), 1); + OUT_RING (chan, dst->width * cpp); + + exec |= NVC0_M2MF_EXEC_LINEAR_OUT; + } + + while (height) { + int line_count = height > 2047 ? 2047 : height; + + MARK_RING (chan, 17, 4); + + BEGIN_RING(chan, RING_MF(OFFSET_IN_HIGH), 2); + OUT_RELOCh(chan, src->bo, src_ofst, src->domain | NOUVEAU_BO_RD); + OUT_RELOCl(chan, src->bo, src_ofst, src->domain | NOUVEAU_BO_RD); + + BEGIN_RING(chan, RING_MF(OFFSET_OUT_HIGH), 2); + OUT_RELOCh(chan, dst->bo, dst_ofst, dst->domain | NOUVEAU_BO_WR); + OUT_RELOCl(chan, dst->bo, dst_ofst, dst->domain | NOUVEAU_BO_WR); + + if (!(exec & NVC0_M2MF_EXEC_LINEAR_IN)) { + BEGIN_RING(chan, RING_MF(TILING_POSITION_IN_X), 2); + OUT_RING (chan, src->x * cpp); + OUT_RING (chan, sy); + } else { + src_ofst += line_count * src->pitch; + } + if (!(exec & NVC0_M2MF_EXEC_LINEAR_OUT)) { + BEGIN_RING(chan, RING_MF(TILING_POSITION_OUT_X), 2); + OUT_RING (chan, dst->x * cpp); + OUT_RING (chan, dy); + } else { + dst_ofst += line_count * dst->pitch; + } + + BEGIN_RING(chan, RING_MF(LINE_LENGTH_IN), 2); + OUT_RING (chan, nblocksx * cpp); + OUT_RING (chan, line_count); + BEGIN_RING(chan, RING_MF(EXEC), 1); + OUT_RING (chan, exec); + + height -= line_count; + sy += line_count; + dy += line_count; + } +} + +void +nvc0_m2mf_push_linear(struct nvc0_context *nvc0, + struct nouveau_bo *dst, unsigned domain, int offset, + unsigned size, void *data) +{ + struct nouveau_channel *chan = nvc0->screen->base.channel; + uint32_t *src = (uint32_t *)data; + unsigned count = (size + 3) / 4; + + MARK_RING (chan, 8, 2); + + BEGIN_RING(chan, RING_MF(OFFSET_OUT_HIGH), 2); + OUT_RELOCh(chan, dst, offset, domain | NOUVEAU_BO_WR); + OUT_RELOCl(chan, dst, offset, domain | NOUVEAU_BO_WR); + BEGIN_RING(chan, RING_MF(LINE_LENGTH_IN), 2); + OUT_RING (chan, size); + OUT_RING (chan, 1); + BEGIN_RING(chan, RING_MF(EXEC), 1); + OUT_RING (chan, 0x100111); + + while (count) { + unsigned nr = AVAIL_RING(chan); + + if (nr < 9) { + FIRE_RING(chan); + nouveau_bo_validate(chan, dst, NOUVEAU_BO_WR); + continue; + } + nr = MIN2(count, nr - 1); + nr = MIN2(nr, NV04_PFIFO_MAX_PACKET_LEN); + + BEGIN_RING_NI(chan, RING_MF(DATA), nr); + OUT_RINGp (chan, src, nr); + + src += nr; + count -= nr; + } +} + +void +nvc0_m2mf_copy_linear(struct nvc0_context *nvc0, + struct nouveau_bo *dst, unsigned dstoff, unsigned dstdom, + struct nouveau_bo *src, unsigned srcoff, unsigned srcdom, + unsigned size) +{ + struct nouveau_channel *chan = nvc0->screen->base.channel; + + while (size) { + unsigned bytes = MIN2(size, 1 << 17); + + MARK_RING (chan, 11, 4); + + BEGIN_RING(chan, RING_MF(OFFSET_OUT_HIGH), 2); + OUT_RELOCh(chan, dst, dstoff, dstdom | NOUVEAU_BO_WR); + OUT_RELOCl(chan, dst, dstoff, dstdom | NOUVEAU_BO_WR); + BEGIN_RING(chan, RING_MF(OFFSET_IN_HIGH), 2); + OUT_RELOCh(chan, src, srcoff, srcdom | NOUVEAU_BO_RD); + OUT_RELOCl(chan, src, srcoff, srcdom | NOUVEAU_BO_RD); + BEGIN_RING(chan, RING_MF(LINE_LENGTH_IN), 2); + OUT_RING (chan, bytes); + OUT_RING (chan, 1); + BEGIN_RING(chan, RING_MF(EXEC), 1); + OUT_RING (chan, (1 << NVC0_M2MF_EXEC_INC__SHIFT) | + NVC0_M2MF_EXEC_LINEAR_IN | NVC0_M2MF_EXEC_LINEAR_OUT); + + srcoff += bytes; + dstoff += bytes; + size -= bytes; + } +} + +static void +nvc0_m2mf_push_rect(struct pipe_screen *pscreen, + const struct nvc0_m2mf_rect *dst, + const void *data, + unsigned nblocksx, unsigned nblocksy) +{ + struct nouveau_channel *chan; + const uint8_t *src = (const uint8_t *)data; + const int cpp = dst->cpp; + const int line_len = nblocksx * cpp; + int dy = dst->y; + + assert(nouveau_bo_tile_layout(dst->bo)); + + BEGIN_RING(chan, RING_MF(TILING_MODE_OUT), 5); + OUT_RING (chan, dst->tile_mode); + OUT_RING (chan, dst->width * cpp); + OUT_RING (chan, dst->height); + OUT_RING (chan, dst->depth); + OUT_RING (chan, dst->z); + + while (nblocksy) { + int line_count, words; + int size = MIN2(AVAIL_RING(chan), NV04_PFIFO_MAX_PACKET_LEN); + + if (size < (12 + words)) { + FIRE_RING(chan); + continue; + } + line_count = (size * 4) / line_len; + words = (line_count * line_len + 3) / 4; + + BEGIN_RING(chan, RING_MF(OFFSET_OUT_HIGH), 2); + OUT_RELOCh(chan, dst->bo, dst->base, dst->domain | NOUVEAU_BO_WR); + OUT_RELOCl(chan, dst->bo, dst->base, dst->domain | NOUVEAU_BO_WR); + + BEGIN_RING(chan, RING_MF(TILING_POSITION_OUT_X), 2); + OUT_RING (chan, dst->x * cpp); + OUT_RING (chan, dy); + BEGIN_RING(chan, RING_MF(LINE_LENGTH_IN), 2); + OUT_RING (chan, line_len); + OUT_RING (chan, line_count); + BEGIN_RING(chan, RING_MF(EXEC), 1); + OUT_RING (chan, (1 << NVC0_M2MF_EXEC_INC__SHIFT) | + NVC0_M2MF_EXEC_PUSH | NVC0_M2MF_EXEC_LINEAR_IN); + + BEGIN_RING_NI(chan, RING_MF(DATA), words); + OUT_RINGp (chan, src, words); + + dy += line_count; + src += line_len * line_count; + nblocksy -= line_count; + } +} + +struct pipe_transfer * +nvc0_miptree_transfer_new(struct pipe_context *pctx, + struct pipe_resource *res, + unsigned level, + unsigned usage, + const struct pipe_box *box) +{ + struct nvc0_context *nvc0 = nvc0_context(pctx); + struct pipe_screen *pscreen = pctx->screen; + struct nouveau_device *dev = nvc0->screen->base.device; + struct nvc0_miptree *mt = nvc0_miptree(res); + struct nvc0_miptree_level *lvl = &mt->level[level]; + struct nvc0_transfer *tx; + uint32_t size; + uint32_t w, h, d, z, layer; + int ret; + + if (mt->layout_3d) { + z = box->z; + d = u_minify(res->depth0, level); + layer = 0; + } else { + z = 0; + d = 1; + layer = box->z; + } + + tx = CALLOC_STRUCT(nvc0_transfer); + if (!tx) + return NULL; + + pipe_resource_reference(&tx->base.resource, res); + + tx->base.level = level; + tx->base.usage = usage; + tx->base.box = *box; + + tx->nblocksx = util_format_get_nblocksx(res->format, box->width); + tx->nblocksy = util_format_get_nblocksy(res->format, box->height); + + tx->base.stride = tx->nblocksx * util_format_get_blocksize(res->format); + tx->base.layer_stride = tx->nblocksy * tx->base.stride; + + w = u_minify(res->width0, level); + h = u_minify(res->height0, level); + + tx->rect[0].cpp = tx->rect[1].cpp = util_format_get_blocksize(res->format); + + tx->rect[0].bo = mt->base.bo; + tx->rect[0].base = lvl->offset + layer * mt->layer_stride; + tx->rect[0].tile_mode = lvl->tile_mode; + tx->rect[0].x = util_format_get_nblocksx(res->format, box->x); + tx->rect[0].y = util_format_get_nblocksy(res->format, box->y); + tx->rect[0].z = z; + tx->rect[0].width = util_format_get_nblocksx(res->format, w); + tx->rect[0].height = util_format_get_nblocksy(res->format, h); + tx->rect[0].depth = d; + tx->rect[0].pitch = lvl->pitch; + tx->rect[0].domain = NOUVEAU_BO_VRAM; + + size = tx->base.layer_stride; + + ret = nouveau_bo_new(dev, NOUVEAU_BO_GART | NOUVEAU_BO_MAP, 0, + size * tx->base.box.depth, &tx->rect[1].bo); + if (ret) { + FREE(tx); + return NULL; + } + + tx->rect[1].width = tx->nblocksx; + tx->rect[1].height = tx->nblocksy; + tx->rect[1].depth = 1; + tx->rect[1].pitch = tx->base.stride; + tx->rect[1].domain = NOUVEAU_BO_GART; + + if (usage & PIPE_TRANSFER_READ) { + unsigned i; + for (i = 0; i < box->depth; ++i) { + nvc0_m2mf_transfer_rect(pscreen, &tx->rect[1], &tx->rect[0], + tx->nblocksx, tx->nblocksy); + if (mt->layout_3d) + tx->rect[0].z++; + else + tx->rect[0].base += mt->layer_stride; + tx->rect[1].base += size; + } + } + tx->rect[0].z = z; + tx->rect[1].base = 0; + + return &tx->base; +} + +void +nvc0_miptree_transfer_del(struct pipe_context *pctx, + struct pipe_transfer *transfer) +{ + struct pipe_screen *pscreen = pctx->screen; + struct nvc0_transfer *tx = (struct nvc0_transfer *)transfer; + struct nvc0_miptree *mt = nvc0_miptree(tx->base.resource); + unsigned i; + + if (tx->base.usage & PIPE_TRANSFER_WRITE) { + for (i = 0; i < tx->base.box.depth; ++i) { + nvc0_m2mf_transfer_rect(pscreen, &tx->rect[0], &tx->rect[1], + tx->nblocksx, tx->nblocksy); + if (mt->layout_3d) + tx->rect[0].z++; + else + tx->rect[0].base += mt->layer_stride; + tx->rect[1].base += tx->nblocksy * tx->base.stride; + } + } + + nouveau_bo_ref(NULL, &tx->rect[1].bo); + pipe_resource_reference(&transfer->resource, NULL); + + FREE(tx); +} + +void * +nvc0_miptree_transfer_map(struct pipe_context *pctx, + struct pipe_transfer *transfer) +{ + struct nvc0_transfer *tx = (struct nvc0_transfer *)transfer; + int ret; + unsigned flags = 0; + + if (tx->rect[1].bo->map) + return tx->rect[1].bo->map; + + if (transfer->usage & PIPE_TRANSFER_READ) + flags = NOUVEAU_BO_RD; + if (transfer->usage & PIPE_TRANSFER_WRITE) + flags |= NOUVEAU_BO_WR; + + ret = nouveau_bo_map(tx->rect[1].bo, flags); + if (ret) + return NULL; + return tx->rect[1].bo->map; +} + +void +nvc0_miptree_transfer_unmap(struct pipe_context *pctx, + struct pipe_transfer *transfer) +{ + struct nvc0_transfer *tx = (struct nvc0_transfer *)transfer; + + nouveau_bo_unmap(tx->rect[1].bo); +} + diff --git a/src/gallium/drivers/nvc0/nvc0_transfer.h b/src/gallium/drivers/nvc0/nvc0_transfer.h new file mode 100644 index 00000000000..222f72d2748 --- /dev/null +++ b/src/gallium/drivers/nvc0/nvc0_transfer.h @@ -0,0 +1,38 @@ + +#ifndef __NVC0_TRANSFER_H__ +#define __NVC0_TRANSFER_H__ + +#include "pipe/p_state.h" + +struct pipe_transfer * +nvc0_miptree_transfer_new(struct pipe_context *pcontext, + struct pipe_resource *pt, + unsigned level, + unsigned usage, + const struct pipe_box *box); +void +nvc0_miptree_transfer_del(struct pipe_context *pcontext, + struct pipe_transfer *ptx); +void * +nvc0_miptree_transfer_map(struct pipe_context *pcontext, + struct pipe_transfer *ptx); +void +nvc0_miptree_transfer_unmap(struct pipe_context *pcontext, + struct pipe_transfer *ptx); + +struct nvc0_m2mf_rect { + struct nouveau_bo *bo; + uint32_t base; + unsigned domain; + uint32_t pitch; + uint32_t width; + uint32_t x; + uint32_t height; + uint32_t y; + uint16_t depth; + uint16_t z; + uint16_t tile_mode; + uint16_t cpp; +}; + +#endif diff --git a/src/gallium/drivers/nvc0/nvc0_vbo.c b/src/gallium/drivers/nvc0/nvc0_vbo.c new file mode 100644 index 00000000000..a14e9557382 --- /dev/null +++ b/src/gallium/drivers/nvc0/nvc0_vbo.c @@ -0,0 +1,654 @@ +/* + * Copyright 2010 Christoph Bumiller + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF + * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "pipe/p_context.h" +#include "pipe/p_state.h" +#include "util/u_inlines.h" +#include "util/u_format.h" +#include "translate/translate.h" + +#include "nvc0_context.h" +#include "nvc0_resource.h" + +#include "nvc0_3d.xml.h" + +void +nvc0_vertex_state_delete(struct pipe_context *pipe, + void *hwcso) +{ + struct nvc0_vertex_stateobj *so = hwcso; + + if (so->translate) + so->translate->release(so->translate); + FREE(hwcso); +} + +void * +nvc0_vertex_state_create(struct pipe_context *pipe, + unsigned num_elements, + const struct pipe_vertex_element *elements) +{ + struct nvc0_vertex_stateobj *so; + struct translate_key transkey; + unsigned i; + + assert(num_elements); + + so = MALLOC(sizeof(*so) + + (num_elements - 1) * sizeof(struct nvc0_vertex_element)); + if (!so) + return NULL; + so->num_elements = num_elements; + so->instance_bits = 0; + + transkey.nr_elements = 0; + transkey.output_stride = 0; + + for (i = 0; i < num_elements; ++i) { + const struct pipe_vertex_element *ve = &elements[i]; + const unsigned vbi = ve->vertex_buffer_index; + enum pipe_format fmt = ve->src_format; + + so->element[i].pipe = elements[i]; + so->element[i].state = nvc0_format_table[fmt].vtx; + + if (!so->element[i].state) { + switch (util_format_get_nr_components(fmt)) { + case 1: fmt = PIPE_FORMAT_R32_FLOAT; break; + case 2: fmt = PIPE_FORMAT_R32G32_FLOAT; break; + case 3: fmt = PIPE_FORMAT_R32G32B32_FLOAT; break; + case 4: fmt = PIPE_FORMAT_R32G32B32A32_FLOAT; break; + default: + assert(0); + return NULL; + } + so->element[i].state = nvc0_format_table[fmt].vtx; + } + so->element[i].state |= i; + + if (likely(!ve->instance_divisor)) { + unsigned j = transkey.nr_elements++; + + transkey.element[j].type = TRANSLATE_ELEMENT_NORMAL; + transkey.element[j].input_format = ve->src_format; + transkey.element[j].input_buffer = vbi; + transkey.element[j].input_offset = ve->src_offset; + transkey.element[j].instance_divisor = ve->instance_divisor; + + transkey.element[j].output_format = fmt; + transkey.element[j].output_offset = transkey.output_stride; + transkey.output_stride += (util_format_get_stride(fmt, 1) + 3) & ~3; + } else { + so->instance_bits |= 1 << i; + } + } + + so->translate = translate_create(&transkey); + so->vtx_size = transkey.output_stride / 4; + so->vtx_per_packet_max = NV04_PFIFO_MAX_PACKET_LEN / MAX2(so->vtx_size, 1); + + return so; +} + +#define NVC0_3D_VERTEX_ATTRIB_INACTIVE \ + NVC0_3D_VERTEX_ATTRIB_FORMAT_TYPE_FLOAT | \ + NVC0_3D_VERTEX_ATTRIB_FORMAT_SIZE_32 | NVC0_3D_VERTEX_ATTRIB_FORMAT_CONST + +#define VTX_ATTR(a, c, t, s) \ + ((NVC0_3D_VTX_ATTR_DEFINE_TYPE_##t) | \ + (NVC0_3D_VTX_ATTR_DEFINE_SIZE_##s) | \ + ((a) << NVC0_3D_VTX_ATTR_DEFINE_ATTR__SHIFT) | \ + ((c) << NVC0_3D_VTX_ATTR_DEFINE_COMP__SHIFT)) + +static void +nvc0_emit_vtxattr(struct nvc0_context *nvc0, struct pipe_vertex_buffer *vb, + struct pipe_vertex_element *ve, unsigned attr) +{ + const void *data; + struct nouveau_channel *chan = nvc0->screen->base.channel; + struct nvc0_resource *res = nvc0_resource(vb->buffer); + float v[4]; + int i; + const unsigned nc = util_format_get_nr_components(ve->src_format); + + data = nvc0_resource_map_offset(nvc0, res, vb->buffer_offset + + ve->src_offset, NOUVEAU_BO_RD); + + util_format_read_4f(ve->src_format, v, 0, data, 0, 0, 0, 1, 1); + + BEGIN_RING(chan, RING_3D(VTX_ATTR_DEFINE), nc + 1); + OUT_RING (chan, VTX_ATTR(attr, nc, FLOAT, 32)); + for (i = 0; i < nc; ++i) + OUT_RINGf(chan, v[i]); +} + +static void +nvc0_prevalidate_vbufs(struct nvc0_context *nvc0) +{ + struct pipe_vertex_buffer *vb; + struct nvc0_resource *buf; + int i; + uint32_t base, size; + + nvc0->vbo_fifo = nvc0->vbo_user = 0; + + for (i = 0; i < nvc0->num_vtxbufs; ++i) { + vb = &nvc0->vtxbuf[i]; + if (!vb->stride) + continue; + buf = nvc0_resource(vb->buffer); + + if (!nvc0_resource_mapped_by_gpu(vb->buffer)) { + if (nvc0->vbo_push_hint) { + nvc0->vbo_fifo = ~0; + continue; + } else { + if (buf->status & NVC0_BUFFER_STATUS_USER_MEMORY) { + nvc0->vbo_user |= 1 << i; + assert(vb->stride > vb->buffer_offset); + size = vb->stride * (nvc0->vbo_max_index - + nvc0->vbo_min_index + 1); + base = vb->stride * nvc0->vbo_min_index; + nvc0_user_buffer_upload(buf, base, size); + } else { + nvc0_buffer_migrate(nvc0, buf, NOUVEAU_BO_GART); + } + nvc0->vbo_dirty = TRUE; + } + } + nvc0_bufctx_add_resident(nvc0, NVC0_BUFCTX_VERTEX, buf, NOUVEAU_BO_RD); + nvc0_buffer_adjust_score(nvc0, buf, 1); + } +} + +static void +nvc0_update_user_vbufs(struct nvc0_context *nvc0) +{ + struct nouveau_channel *chan = nvc0->screen->base.channel; + const uint32_t vertex_count = nvc0->vbo_max_index - nvc0->vbo_min_index + 1; + uint32_t base, offset, size; + int i; + uint32_t written = 0; + + for (i = 0; i < nvc0->vertex->num_elements; ++i) { + struct pipe_vertex_element *ve = &nvc0->vertex->element[i].pipe; + const int b = ve->vertex_buffer_index; + struct pipe_vertex_buffer *vb = &nvc0->vtxbuf[b]; + struct nvc0_resource *buf = nvc0_resource(vb->buffer); + + if (!(nvc0->vbo_user & (1 << b))) + continue; + + if (!vb->stride) { + nvc0_emit_vtxattr(nvc0, vb, ve, i); + continue; + } + size = vb->stride * vertex_count; + base = vb->stride * nvc0->vbo_min_index; + + if (!(written & (1 << b))) { + written |= 1 << b; + nvc0_user_buffer_upload(buf, base, size); + } + offset = vb->buffer_offset + ve->src_offset; + + BEGIN_RING_1I(chan, RING_3D(VERTEX_ARRAY_SELECT), 5); + OUT_RING (chan, i); + OUT_RESRCh(chan, buf, size - 1, NOUVEAU_BO_RD); + OUT_RESRCl(chan, buf, size - 1, NOUVEAU_BO_RD); + OUT_RESRCh(chan, buf, offset, NOUVEAU_BO_RD); + OUT_RESRCl(chan, buf, offset, NOUVEAU_BO_RD); + } + nvc0->vbo_dirty = TRUE; +} + +void +nvc0_vertex_arrays_validate(struct nvc0_context *nvc0) +{ + struct nouveau_channel *chan = nvc0->screen->base.channel; + struct nvc0_vertex_stateobj *vertex = nvc0->vertex; + struct pipe_vertex_buffer *vb; + struct nvc0_vertex_element *ve; + unsigned i; + + nvc0_prevalidate_vbufs(nvc0); + + BEGIN_RING(chan, RING_3D(VERTEX_ATTRIB_FORMAT(0)), vertex->num_elements); + for (i = 0; i < vertex->num_elements; ++i) { + ve = &vertex->element[i]; + vb = &nvc0->vtxbuf[ve->pipe.vertex_buffer_index]; + + if (likely(vb->stride) || nvc0->vbo_fifo) { + OUT_RING(chan, ve->state); + } else { + OUT_RING(chan, ve->state | NVC0_3D_VERTEX_ATTRIB_FORMAT_CONST); + nvc0->vbo_fifo &= ~(1 << i); + } + } + + for (i = 0; i < vertex->num_elements; ++i) { + struct nvc0_resource *res; + unsigned size, offset; + + ve = &vertex->element[i]; + vb = &nvc0->vtxbuf[ve->pipe.vertex_buffer_index]; + + if (unlikely(ve->pipe.instance_divisor)) { + if (!(nvc0->state.instance_bits & (1 << i))) { + IMMED_RING(chan, RING_3D(VERTEX_ARRAY_PER_INSTANCE(i)), 1); + } + BEGIN_RING(chan, RING_3D(VERTEX_ARRAY_DIVISOR(i)), 1); + OUT_RING (chan, ve->pipe.instance_divisor); + } else + if (unlikely(nvc0->state.instance_bits & (1 << i))) { + IMMED_RING(chan, RING_3D(VERTEX_ARRAY_PER_INSTANCE(i)), 0); + } + + res = nvc0_resource(vb->buffer); + + if (nvc0->vbo_fifo || unlikely(vb->stride == 0)) { + if (!nvc0->vbo_fifo) + nvc0_emit_vtxattr(nvc0, vb, &ve->pipe, i); + BEGIN_RING(chan, RING_3D(VERTEX_ARRAY_FETCH(i)), 1); + OUT_RING (chan, 0); + continue; + } + + size = vb->buffer->width0; + offset = ve->pipe.src_offset + vb->buffer_offset; + + BEGIN_RING(chan, RING_3D(VERTEX_ARRAY_FETCH(i)), 1); + OUT_RING (chan, (1 << 12) | vb->stride); + BEGIN_RING_1I(chan, RING_3D(VERTEX_ARRAY_SELECT), 5); + OUT_RING (chan, i); + OUT_RESRCh(chan, res, size - 1, NOUVEAU_BO_RD); + OUT_RESRCl(chan, res, size - 1, NOUVEAU_BO_RD); + OUT_RESRCh(chan, res, offset, NOUVEAU_BO_RD); + OUT_RESRCl(chan, res, offset, NOUVEAU_BO_RD); + } + for (; i < nvc0->state.num_vtxelts; ++i) { + BEGIN_RING(chan, RING_3D(VERTEX_ATTRIB_FORMAT(i)), 1); + OUT_RING (chan, NVC0_3D_VERTEX_ATTRIB_INACTIVE); + BEGIN_RING(chan, RING_3D(VERTEX_ARRAY_FETCH(i)), 1); + OUT_RING (chan, 0); + } + + nvc0->state.num_vtxelts = vertex->num_elements; + nvc0->state.instance_bits = vertex->instance_bits; +} + +#define NVC0_PRIM_GL_CASE(n) \ + case PIPE_PRIM_##n: return NVC0_3D_VERTEX_BEGIN_GL_PRIMITIVE_##n + +static INLINE unsigned +nvc0_prim_gl(unsigned prim) +{ + switch (prim) { + NVC0_PRIM_GL_CASE(POINTS); + NVC0_PRIM_GL_CASE(LINES); + NVC0_PRIM_GL_CASE(LINE_LOOP); + NVC0_PRIM_GL_CASE(LINE_STRIP); + NVC0_PRIM_GL_CASE(TRIANGLES); + NVC0_PRIM_GL_CASE(TRIANGLE_STRIP); + NVC0_PRIM_GL_CASE(TRIANGLE_FAN); + NVC0_PRIM_GL_CASE(QUADS); + NVC0_PRIM_GL_CASE(QUAD_STRIP); + NVC0_PRIM_GL_CASE(POLYGON); + NVC0_PRIM_GL_CASE(LINES_ADJACENCY); + NVC0_PRIM_GL_CASE(LINE_STRIP_ADJACENCY); + NVC0_PRIM_GL_CASE(TRIANGLES_ADJACENCY); + NVC0_PRIM_GL_CASE(TRIANGLE_STRIP_ADJACENCY); + /* + NVC0_PRIM_GL_CASE(PATCHES); */ + default: + return NVC0_3D_VERTEX_BEGIN_GL_PRIMITIVE_POINTS; + break; + } +} + +static void +nvc0_draw_vbo_flush_notify(struct nouveau_channel *chan) +{ + struct nvc0_context *nvc0 = chan->user_private; + + nvc0_bufctx_emit_relocs(nvc0); +} + +#if 0 +static struct nouveau_bo * +nvc0_tfb_setup(struct nvc0_context *nvc0) +{ + struct nouveau_channel *chan = nvc0->screen->base.channel; + struct nouveau_bo *tfb = NULL; + int ret, i; + + ret = nouveau_bo_new(nvc0->screen->base.device, + NOUVEAU_BO_GART | NOUVEAU_BO_MAP, 0, 4096, &tfb); + if (ret) + return NULL; + + ret = nouveau_bo_map(tfb, NOUVEAU_BO_WR); + if (ret) + return NULL; + memset(tfb->map, 0xee, 8 * 4 * 3); + nouveau_bo_unmap(tfb); + + BEGIN_RING(chan, RING_3D(TFB_ENABLE), 1); + OUT_RING (chan, 1); + BEGIN_RING(chan, RING_3D(TFB_BUFFER_ENABLE(0)), 5); + OUT_RING (chan, 1); + OUT_RELOCh(chan, tfb, 0, NOUVEAU_BO_GART | NOUVEAU_BO_WR); + OUT_RELOCl(chan, tfb, 0, NOUVEAU_BO_GART | NOUVEAU_BO_WR); + OUT_RING (chan, tfb->size); + OUT_RING (chan, 0); /* TFB_PRIMITIVE_ID(0) */ + BEGIN_RING(chan, RING_3D(TFB_UNK0700(0)), 3); + OUT_RING (chan, 0); + OUT_RING (chan, 8); /* TFB_VARYING_COUNT(0) */ + OUT_RING (chan, 32); /* TFB_BUFFER_STRIDE(0) */ + BEGIN_RING(chan, RING_3D(TFB_VARYING_LOCS(0)), 2); + OUT_RING (chan, 0x1f1e1d1c); + OUT_RING (chan, 0xa3a2a1a0); + for (i = 1; i < 4; ++i) { + BEGIN_RING(chan, RING_3D(TFB_BUFFER_ENABLE(i)), 1); + OUT_RING (chan, 0); + } + BEGIN_RING(chan, RING_3D(TFB_ENABLE), 1); + OUT_RING (chan, 1); + BEGIN_RING(chan, RING_3D_(0x135c), 1); + OUT_RING (chan, 1); + BEGIN_RING(chan, RING_3D_(0x135c), 1); + OUT_RING (chan, 0); + + return tfb; +} +#endif + +static void +nvc0_draw_arrays(struct nvc0_context *nvc0, + unsigned mode, unsigned start, unsigned count, + unsigned instance_count) +{ + struct nouveau_channel *chan = nvc0->screen->base.channel; + unsigned prim; + + chan->flush_notify = nvc0_draw_vbo_flush_notify; + chan->user_private = nvc0; + + prim = nvc0_prim_gl(mode); + + while (instance_count--) { + BEGIN_RING(chan, RING_3D(VERTEX_BEGIN_GL), 1); + OUT_RING (chan, prim); + BEGIN_RING(chan, RING_3D(VERTEX_BUFFER_FIRST), 2); + OUT_RING (chan, start); + OUT_RING (chan, count); + IMMED_RING(chan, RING_3D(VERTEX_END_GL), 0); + + prim |= NVC0_3D_VERTEX_BEGIN_GL_INSTANCE_NEXT; + } + + chan->flush_notify = NULL; +} + +static void +nvc0_draw_elements_inline_u08(struct nouveau_channel *chan, uint8_t *map, + unsigned start, unsigned count) +{ + map += start; + + if (count & 3) { + unsigned i; + BEGIN_RING_NI(chan, RING_3D(VB_ELEMENT_U32), count & 3); + for (i = 0; i < (count & 3); ++i) + OUT_RING(chan, *map++); + count &= ~3; + } + while (count) { + unsigned i, nr = MIN2(count, NV04_PFIFO_MAX_PACKET_LEN * 4) / 4; + + BEGIN_RING_NI(chan, RING_3D(VB_ELEMENT_U8), nr); + for (i = 0; i < nr; ++i) { + OUT_RING(chan, + (map[3] << 24) | (map[2] << 16) | (map[1] << 8) | map[0]); + map += 4; + } + count -= nr * 4; + } +} + +static void +nvc0_draw_elements_inline_u16(struct nouveau_channel *chan, uint16_t *map, + unsigned start, unsigned count) +{ + map += start; + + if (count & 1) { + count &= ~1; + BEGIN_RING(chan, RING_3D(VB_ELEMENT_U32), 1); + OUT_RING (chan, *map++); + } + while (count) { + unsigned i, nr = MIN2(count, NV04_PFIFO_MAX_PACKET_LEN * 2) / 2; + + BEGIN_RING_NI(chan, RING_3D(VB_ELEMENT_U16), nr); + for (i = 0; i < nr; ++i) { + OUT_RING(chan, (map[1] << 16) | map[0]); + map += 2; + } + count -= nr * 2; + } +} + +static void +nvc0_draw_elements_inline_u32(struct nouveau_channel *chan, uint32_t *map, + unsigned start, unsigned count) +{ + map += start; + + while (count) { + const unsigned nr = MIN2(count, NV04_PFIFO_MAX_PACKET_LEN); + + BEGIN_RING_NI(chan, RING_3D(VB_ELEMENT_U32), nr); + OUT_RINGp (chan, map, nr); + + map += nr; + count -= nr; + } +} + +static void +nvc0_draw_elements_inline_u32_short(struct nouveau_channel *chan, uint32_t *map, + unsigned start, unsigned count) +{ + map += start; + + if (count & 1) { + count--; + BEGIN_RING(chan, RING_3D(VB_ELEMENT_U32), 1); + OUT_RING (chan, *map++); + } + while (count) { + unsigned i, nr = MIN2(count, NV04_PFIFO_MAX_PACKET_LEN * 2) / 2; + + BEGIN_RING_NI(chan, RING_3D(VB_ELEMENT_U16), nr); + for (i = 0; i < nr; ++i) { + OUT_RING(chan, (map[1] << 16) | map[0]); + map += 2; + } + count -= nr * 2; + } +} + +static void +nvc0_draw_elements(struct nvc0_context *nvc0, boolean shorten, + unsigned mode, unsigned start, unsigned count, + unsigned instance_count, int32_t index_bias) +{ + struct nouveau_channel *chan = nvc0->screen->base.channel; + void *data; + unsigned prim; + const unsigned index_size = nvc0->idxbuf.index_size; + + chan->flush_notify = nvc0_draw_vbo_flush_notify; + chan->user_private = nvc0; + + prim = nvc0_prim_gl(mode); + + if (index_bias != nvc0->state.index_bias) { + BEGIN_RING(chan, RING_3D(VB_ELEMENT_BASE), 1); + OUT_RING (chan, index_bias); + nvc0->state.index_bias = index_bias; + } + + if (nvc0_resource_mapped_by_gpu(nvc0->idxbuf.buffer)) { + struct nvc0_resource *res = nvc0_resource(nvc0->idxbuf.buffer); + unsigned offset = nvc0->idxbuf.offset; + unsigned limit = nvc0->idxbuf.buffer->width0 - 1; + + nvc0_buffer_adjust_score(nvc0, res, 1); + + while (instance_count--) { + MARK_RING (chan, 11, 4); + BEGIN_RING(chan, RING_3D(VERTEX_BEGIN_GL), 1); + OUT_RING (chan, mode); + BEGIN_RING(chan, RING_3D(INDEX_ARRAY_START_HIGH), 7); + OUT_RESRCh(chan, res, offset, NOUVEAU_BO_RD); + OUT_RESRCl(chan, res, offset, NOUVEAU_BO_RD); + OUT_RESRCh(chan, res, limit, NOUVEAU_BO_RD); + OUT_RESRCl(chan, res, limit, NOUVEAU_BO_RD); + OUT_RING (chan, index_size >> 1); + OUT_RING (chan, start); + OUT_RING (chan, count); + IMMED_RING(chan, RING_3D(VERTEX_END_GL), 0); + + nvc0_resource_fence(res, NOUVEAU_BO_RD); + + mode |= NVC0_3D_VERTEX_BEGIN_GL_INSTANCE_NEXT; + } + } else { + data = nvc0_resource_map_offset(nvc0, nvc0_resource(nvc0->idxbuf.buffer), + nvc0->idxbuf.offset, NOUVEAU_BO_RD); + if (!data) + return; + + while (instance_count--) { + BEGIN_RING(chan, RING_3D(VERTEX_BEGIN_GL), 1); + OUT_RING (chan, prim); + switch (index_size) { + case 1: + nvc0_draw_elements_inline_u08(chan, data, start, count); + break; + case 2: + nvc0_draw_elements_inline_u16(chan, data, start, count); + break; + case 4: + if (shorten) + nvc0_draw_elements_inline_u32_short(chan, data, start, count); + else + nvc0_draw_elements_inline_u32(chan, data, start, count); + break; + default: + assert(0); + return; + } + IMMED_RING(chan, RING_3D(VERTEX_END_GL), 0); + + prim |= NVC0_3D_VERTEX_BEGIN_GL_INSTANCE_NEXT; + } + } + + chan->flush_notify = NULL; +} + +void +nvc0_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info) +{ + struct nvc0_context *nvc0 = nvc0_context(pipe); + struct nouveau_channel *chan = nvc0->screen->base.channel; + + /* For picking only a few vertices from a large user buffer, push is better, + * if index count is larger and we expect repeated vertices, suggest upload. + */ + nvc0->vbo_push_hint = /* the 64 is heuristic */ + !(info->indexed && + ((info->max_index - info->min_index + 64) < info->count)); + + nvc0->vbo_min_index = info->min_index; + nvc0->vbo_max_index = info->max_index; + + if (nvc0->vbo_user && !(nvc0->dirty & (NVC0_NEW_VERTEX | NVC0_NEW_ARRAYS))) + nvc0_update_user_vbufs(nvc0); + + nvc0_state_validate(nvc0); + + if (nvc0->state.instance_base != info->start_instance) { + nvc0->state.instance_base = info->start_instance; + BEGIN_RING(chan, RING_3D(VB_INSTANCE_BASE), 1); + OUT_RING (chan, info->start_instance); + } + + if (nvc0->vbo_fifo) { + nvc0_push_vbo(nvc0, info); + return; + } + + if (nvc0->vbo_dirty) { + BEGIN_RING(chan, RING_3D(VERTEX_ARRAY_FLUSH), 1); + OUT_RING (chan, 0); + nvc0->vbo_dirty = FALSE; + } + + if (!info->indexed) { + nvc0_draw_arrays(nvc0, + info->mode, info->start, info->count, + info->instance_count); + } else { + boolean shorten = info->max_index <= 65535; + + assert(nvc0->idxbuf.buffer); + + if (info->primitive_restart != nvc0->state.prim_restart) { + if (info->primitive_restart) { + BEGIN_RING(chan, RING_3D(PRIM_RESTART_ENABLE), 2); + OUT_RING (chan, 1); + OUT_RING (chan, info->restart_index); + + if (info->restart_index > 65535) + shorten = FALSE; + } else { + IMMED_RING(chan, RING_3D(PRIM_RESTART_ENABLE), 0); + } + nvc0->state.prim_restart = info->primitive_restart; + } else + if (info->primitive_restart) { + BEGIN_RING(chan, RING_3D(PRIM_RESTART_INDEX), 1); + OUT_RING (chan, info->restart_index); + + if (info->restart_index > 65535) + shorten = FALSE; + } + + nvc0_draw_elements(nvc0, shorten, + info->mode, info->start, info->count, + info->instance_count, info->index_bias); + } +} diff --git a/src/gallium/drivers/nvc0/nvc0_winsys.h b/src/gallium/drivers/nvc0/nvc0_winsys.h new file mode 100644 index 00000000000..1544fb7a1de --- /dev/null +++ b/src/gallium/drivers/nvc0/nvc0_winsys.h @@ -0,0 +1,120 @@ + +#ifndef __NVC0_WINSYS_H__ +#define __NVC0_WINSYS_H__ + +#include <stdint.h> +#include <unistd.h> +#include "pipe/p_defines.h" + +#include "nouveau/nouveau_bo.h" +#include "nouveau/nouveau_channel.h" +#include "nouveau/nouveau_grobj.h" +#include "nouveau/nouveau_device.h" +#include "nouveau/nouveau_resource.h" +#include "nouveau/nouveau_pushbuf.h" +#include "nouveau/nouveau_reloc.h" + +#include "nvc0_resource.h" /* OUT_RESRC */ + +#ifndef NV04_PFIFO_MAX_PACKET_LEN +#define NV04_PFIFO_MAX_PACKET_LEN 2047 +#endif + +#define NVC0_SUBCH_3D 1 +#define NVC0_SUBCH_2D 2 +#define NVC0_SUBCH_MF 3 + +#define NVC0_MF_(n) NVC0_M2MF_##n + +#define RING_3D(n) ((NVC0_SUBCH_3D << 13) | (NVC0_3D_##n >> 2)) +#define RING_2D(n) ((NVC0_SUBCH_2D << 13) | (NVC0_2D_##n >> 2)) +#define RING_MF(n) ((NVC0_SUBCH_MF << 13) | (NVC0_MF_(n) >> 2)) + +#define RING_3D_(m) ((NVC0_SUBCH_3D << 13) | ((m) >> 2)) +#define RING_2D_(m) ((NVC0_SUBCH_2D << 13) | ((m) >> 2)) +#define RING_MF_(m) ((NVC0_SUBCH_MF << 13) | ((m) >> 2)) + +#define RING_GR(gr, m) (((gr)->subc << 13) | ((m) >> 2)) + +int nouveau_pushbuf_flush(struct nouveau_channel *, unsigned min); + +static inline uint32_t +nouveau_bo_tile_layout(struct nouveau_bo *bo) +{ + return bo->tile_flags & NOUVEAU_BO_TILE_LAYOUT_MASK; +} + +static INLINE void +nouveau_bo_validate(struct nouveau_channel *chan, + struct nouveau_bo *bo, unsigned flags) +{ + nouveau_reloc_emit(chan, NULL, 0, NULL, bo, 0, 0, flags, 0, 0); +} + +/* incremental methods */ +static INLINE void +BEGIN_RING(struct nouveau_channel *chan, uint32_t mthd, unsigned size) +{ + WAIT_RING(chan, size + 1); + OUT_RING (chan, (0x2 << 28) | (size << 16) | mthd); +} + +/* non-incremental */ +static INLINE void +BEGIN_RING_NI(struct nouveau_channel *chan, uint32_t mthd, unsigned size) +{ + WAIT_RING(chan, size + 1); + OUT_RING (chan, (0x6 << 28) | (size << 16) | mthd); +} + +/* increment-once */ +static INLINE void +BEGIN_RING_1I(struct nouveau_channel *chan, uint32_t mthd, unsigned size) +{ + WAIT_RING(chan, size + 1); + OUT_RING (chan, (0xa << 28) | (size << 16) | mthd); +} + +/* inline-data */ +static INLINE void +IMMED_RING(struct nouveau_channel *chan, uint32_t mthd, unsigned data) +{ + WAIT_RING(chan, 1); + OUT_RING (chan, (0x8 << 28) | (data << 16) | mthd); +} + +static INLINE int +OUT_RESRCh(struct nouveau_channel *chan, struct nvc0_resource *res, + unsigned delta, unsigned flags) +{ + return OUT_RELOCh(chan, res->bo, res->offset + delta, res->domain | flags); +} + +static INLINE int +OUT_RESRCl(struct nouveau_channel *chan, struct nvc0_resource *res, + unsigned delta, unsigned flags) +{ + if (flags & NOUVEAU_BO_WR) + res->status |= NVC0_BUFFER_STATUS_DIRTY; + return OUT_RELOCl(chan, res->bo, res->offset + delta, res->domain | flags); +} + +static INLINE void +BIND_RING(struct nouveau_channel *chan, struct nouveau_grobj *gr, unsigned s) +{ + struct nouveau_subchannel *subc = &gr->channel->subc[s]; + + assert(s < 8); + if (subc->gr) { + assert(subc->gr->bound != NOUVEAU_GROBJ_BOUND_EXPLICIT); + subc->gr->bound = NOUVEAU_GROBJ_UNBOUND; + } + subc->gr = gr; + subc->gr->subc = s; + subc->gr->bound = NOUVEAU_GROBJ_BOUND_EXPLICIT; + + BEGIN_RING(chan, RING_GR(gr, 0x0000), 1); + OUT_RING (chan, gr->grclass); +} + +#endif diff --git a/src/gallium/drivers/nvfx/nv04_2d.c b/src/gallium/drivers/nvfx/nv04_2d.c index e0e65e7a87f..e2fadd33e1c 100644 --- a/src/gallium/drivers/nvfx/nv04_2d.c +++ b/src/gallium/drivers/nvfx/nv04_2d.c @@ -34,11 +34,11 @@ #include <stdio.h> #include <stdint.h> #include <nouveau/nouveau_device.h> -#include <nouveau/nouveau_pushbuf.h> #include <nouveau/nouveau_channel.h> #include <nouveau/nouveau_bo.h> #include <nouveau/nouveau_notifier.h> #include <nouveau/nouveau_grobj.h> +#include <nouveau/nv04_pushbuf.h> #include "nv04_2d.h" #include "nouveau/nv_object.xml.h" diff --git a/src/gallium/drivers/nvfx/nv30_fragtex.c b/src/gallium/drivers/nvfx/nv30_fragtex.c index d6ede5b40a1..b609891d316 100644 --- a/src/gallium/drivers/nvfx/nv30_fragtex.c +++ b/src/gallium/drivers/nvfx/nv30_fragtex.c @@ -37,12 +37,12 @@ nv30_sampler_view_init(struct pipe_context *pipe, struct pipe_resource* pt = sv->base.texture; struct nvfx_texture_format *tf = &nvfx_texture_formats[sv->base.format]; unsigned txf; - unsigned level = pt->target == PIPE_TEXTURE_CUBE ? 0 : sv->base.first_level; + unsigned level = pt->target == PIPE_TEXTURE_CUBE ? 0 : sv->base.u.tex.first_level; assert(tf->fmt[0] >= 0); txf = sv->u.init_fmt; - txf |= (level != sv->base.last_level ? NV30_3D_TEX_FORMAT_MIPMAP : 0); + txf |= (level != sv->base.u.tex.last_level ? NV30_3D_TEX_FORMAT_MIPMAP : 0); txf |= util_logbase2(u_minify(pt->width0, level)) << NV30_3D_TEX_FORMAT_BASE_SIZE_U__SHIFT; txf |= util_logbase2(u_minify(pt->height0, level)) << NV30_3D_TEX_FORMAT_BASE_SIZE_V__SHIFT; txf |= util_logbase2(u_minify(pt->depth0, level)) << NV30_3D_TEX_FORMAT_BASE_SIZE_W__SHIFT; @@ -60,8 +60,8 @@ nv30_sampler_view_init(struct pipe_context *pipe, else sv->u.nv30.rect = !!(pt->flags & NVFX_RESOURCE_FLAG_LINEAR); - sv->lod_offset = sv->base.first_level - level; - sv->max_lod_limit = sv->base.last_level - level; + sv->lod_offset = sv->base.u.tex.first_level - level; + sv->max_lod_limit = sv->base.u.tex.last_level - level; } void @@ -71,6 +71,7 @@ nv30_fragtex_set(struct nvfx_context *nvfx, int unit) struct nvfx_sampler_view* sv = (struct nvfx_sampler_view*)nvfx->fragment_sampler_views[unit]; struct nouveau_bo *bo = ((struct nvfx_miptree *)sv->base.texture)->base.bo; struct nouveau_channel* chan = nvfx->screen->base.channel; + struct nouveau_grobj *eng3d = nvfx->screen->eng3d; unsigned txf; unsigned tex_flags = NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD; unsigned use_rect; @@ -102,7 +103,7 @@ nv30_fragtex_set(struct nvfx_context *nvfx, int unit) txf = sv->u.nv30.fmt[ps->compare + (use_rect ? 2 : 0)]; MARK_RING(chan, 9, 2); - OUT_RING(chan, RING_3D(NV30_3D_TEX_OFFSET(unit), 8)); + BEGIN_RING(chan, eng3d, NV30_3D_TEX_OFFSET(unit), 8); OUT_RELOC(chan, bo, sv->offset, tex_flags | NOUVEAU_BO_LOW, 0, 0); OUT_RELOC(chan, bo, txf, tex_flags | NOUVEAU_BO_OR, diff --git a/src/gallium/drivers/nvfx/nv40_fragtex.c b/src/gallium/drivers/nvfx/nv40_fragtex.c index d4fb73702da..563183d9d0c 100644 --- a/src/gallium/drivers/nvfx/nv40_fragtex.c +++ b/src/gallium/drivers/nvfx/nv40_fragtex.c @@ -46,7 +46,7 @@ nv40_sampler_view_init(struct pipe_context *pipe, struct nvfx_miptree* mt = (struct nvfx_miptree*)pt; struct nvfx_texture_format *tf = &nvfx_texture_formats[sv->base.format]; unsigned txf; - unsigned level = pt->target == PIPE_TEXTURE_CUBE ? 0 : sv->base.first_level; + unsigned level = pt->target == PIPE_TEXTURE_CUBE ? 0 : sv->base.u.tex.first_level; assert(tf->fmt[4] >= 0); txf = sv->u.init_fmt; @@ -54,7 +54,7 @@ nv40_sampler_view_init(struct pipe_context *pipe, if(pt->target == PIPE_TEXTURE_CUBE) txf |= ((pt->last_level + 1) << NV40_3D_TEX_FORMAT_MIPMAP_COUNT__SHIFT); else - txf |= (((sv->base.last_level - sv->base.first_level) + 1) << NV40_3D_TEX_FORMAT_MIPMAP_COUNT__SHIFT); + txf |= (((sv->base.u.tex.last_level - sv->base.u.tex.first_level) + 1) << NV40_3D_TEX_FORMAT_MIPMAP_COUNT__SHIFT); if (!mt->linear_pitch) sv->u.nv40.npot_size2 = 0; @@ -68,14 +68,15 @@ nv40_sampler_view_init(struct pipe_context *pipe, sv->u.nv40.npot_size2 |= (u_minify(pt->depth0, level) << NV40_3D_TEX_SIZE1_DEPTH__SHIFT); - sv->lod_offset = (sv->base.first_level - level) * 256; - sv->max_lod_limit = (sv->base.last_level - level) * 256; + sv->lod_offset = (sv->base.u.tex.first_level - level) * 256; + sv->max_lod_limit = (sv->base.u.tex.last_level - level) * 256; } void nv40_fragtex_set(struct nvfx_context *nvfx, int unit) { struct nouveau_channel* chan = nvfx->screen->base.channel; + struct nouveau_grobj *eng3d = nvfx->screen->eng3d; struct nvfx_sampler_state *ps = nvfx->tex_sampler[unit]; struct nvfx_sampler_view* sv = (struct nvfx_sampler_view*)nvfx->fragment_sampler_views[unit]; struct nouveau_bo *bo = ((struct nvfx_miptree *)sv->base.texture)->base.bo; @@ -87,7 +88,7 @@ nv40_fragtex_set(struct nvfx_context *nvfx, int unit) txf = sv->u.nv40.fmt[ps->compare] | ps->fmt; MARK_RING(chan, 11, 2); - OUT_RING(chan, RING_3D(NV30_3D_TEX_OFFSET(unit), 8)); + BEGIN_RING(chan, eng3d, NV30_3D_TEX_OFFSET(unit), 8); OUT_RELOC(chan, bo, sv->offset, tex_flags | NOUVEAU_BO_LOW, 0, 0); OUT_RELOC(chan, bo, txf, tex_flags | NOUVEAU_BO_OR, NV30_3D_TEX_FORMAT_DMA0, NV30_3D_TEX_FORMAT_DMA1); @@ -97,7 +98,7 @@ nv40_fragtex_set(struct nvfx_context *nvfx, int unit) OUT_RING(chan, ps->filt | sv->filt); OUT_RING(chan, sv->npot_size); OUT_RING(chan, ps->bcol); - OUT_RING(chan, RING_3D(NV40_3D_TEX_SIZE1(unit), 1)); + BEGIN_RING(chan, eng3d, NV40_3D_TEX_SIZE1(unit), 1); OUT_RING(chan, sv->u.nv40.npot_size2); nvfx->hw_txf[unit] = txf; diff --git a/src/gallium/drivers/nvfx/nvfx_buffer.c b/src/gallium/drivers/nvfx/nvfx_buffer.c index 041099e0e56..b407429731f 100644 --- a/src/gallium/drivers/nvfx/nvfx_buffer.c +++ b/src/gallium/drivers/nvfx/nvfx_buffer.c @@ -64,6 +64,7 @@ nvfx_user_buffer_create(struct pipe_screen *pscreen, buffer->base.base.width0 = bytes; buffer->base.base.height0 = 1; buffer->base.base.depth0 = 1; + buffer->base.base.array_size = 1; buffer->data = ptr; buffer->size = bytes; buffer->bytes_to_draw_until_static = bytes * screen->static_reuse_threshold; diff --git a/src/gallium/drivers/nvfx/nvfx_context.c b/src/gallium/drivers/nvfx/nvfx_context.c index 95834d23273..6c8934d3a4a 100644 --- a/src/gallium/drivers/nvfx/nvfx_context.c +++ b/src/gallium/drivers/nvfx/nvfx_context.c @@ -13,13 +13,13 @@ nvfx_flush(struct pipe_context *pipe, unsigned flags, struct nvfx_context *nvfx = nvfx_context(pipe); struct nvfx_screen *screen = nvfx->screen; struct nouveau_channel *chan = screen->base.channel; + struct nouveau_grobj *eng3d = screen->eng3d; /* XXX: we need to actually be intelligent here */ if (flags & PIPE_FLUSH_TEXTURE_CACHE) { - WAIT_RING(chan, 4); - OUT_RING(chan, RING_3D(0x1fd8, 1)); + BEGIN_RING(chan, eng3d, 0x1fd8, 1); OUT_RING(chan, 2); - OUT_RING(chan, RING_3D(0x1fd8, 1)); + BEGIN_RING(chan, eng3d, 0x1fd8, 1); OUT_RING(chan, 1); } diff --git a/src/gallium/drivers/nvfx/nvfx_context.h b/src/gallium/drivers/nvfx/nvfx_context.h index 6ef2a6945d7..2238aa1ad0e 100644 --- a/src/gallium/drivers/nvfx/nvfx_context.h +++ b/src/gallium/drivers/nvfx/nvfx_context.h @@ -339,30 +339,31 @@ extern void nvfx_init_vertprog_functions(struct nvfx_context *nvfx); /* nvfx_push.c */ extern void nvfx_push_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info); -/* must WAIT_RING(chan, ncomp + 1) or equivalent beforehand! */ -static inline void nvfx_emit_vtx_attr(struct nouveau_channel* chan, unsigned attrib, const float* v, unsigned ncomp) +static inline void nvfx_emit_vtx_attr(struct nouveau_channel* chan, + struct nouveau_grobj *eng3d, unsigned attrib, const float* v, + unsigned ncomp) { switch (ncomp) { case 4: - OUT_RING(chan, RING_3D(NV30_3D_VTX_ATTR_4F_X(attrib), 4)); + BEGIN_RING(chan, eng3d, NV30_3D_VTX_ATTR_4F_X(attrib), 4); OUT_RING(chan, fui(v[0])); OUT_RING(chan, fui(v[1])); OUT_RING(chan, fui(v[2])); OUT_RING(chan, fui(v[3])); break; case 3: - OUT_RING(chan, RING_3D(NV30_3D_VTX_ATTR_3F_X(attrib), 3)); + BEGIN_RING(chan, eng3d, NV30_3D_VTX_ATTR_3F_X(attrib), 3); OUT_RING(chan, fui(v[0])); OUT_RING(chan, fui(v[1])); OUT_RING(chan, fui(v[2])); break; case 2: - OUT_RING(chan, RING_3D(NV30_3D_VTX_ATTR_2F_X(attrib), 2)); + BEGIN_RING(chan, eng3d, NV30_3D_VTX_ATTR_2F_X(attrib), 2); OUT_RING(chan, fui(v[0])); OUT_RING(chan, fui(v[1])); break; case 1: - OUT_RING(chan, RING_3D(NV30_3D_VTX_ATTR_1F(attrib), 1)); + BEGIN_RING(chan, eng3d, NV30_3D_VTX_ATTR_1F(attrib), 1); OUT_RING(chan, fui(v[0])); break; } diff --git a/src/gallium/drivers/nvfx/nvfx_draw.c b/src/gallium/drivers/nvfx/nvfx_draw.c index 61f888a8ea2..81f1ec485d3 100644 --- a/src/gallium/drivers/nvfx/nvfx_draw.c +++ b/src/gallium/drivers/nvfx/nvfx_draw.c @@ -28,10 +28,10 @@ nvfx_render_flush(struct draw_stage *stage, unsigned flags) struct nvfx_render_stage *rs = nvfx_render_stage(stage); struct nvfx_context *nvfx = rs->nvfx; struct nouveau_channel *chan = nvfx->screen->base.channel; + struct nouveau_grobj *eng3d = nvfx->screen->eng3d; if (rs->prim != NV30_3D_VERTEX_BEGIN_END_STOP) { - assert(AVAIL_RING(chan) >= 2); - OUT_RING(chan, RING_3D(NV30_3D_VERTEX_BEGIN_END, 1)); + BEGIN_RING(chan, eng3d, NV30_3D_VERTEX_BEGIN_END, 1); OUT_RING(chan, NV30_3D_VERTEX_BEGIN_END_STOP); rs->prim = NV30_3D_VERTEX_BEGIN_END_STOP; } @@ -46,6 +46,7 @@ nvfx_render_prim(struct draw_stage *stage, struct prim_header *prim, struct nvfx_screen *screen = nvfx->screen; struct nouveau_channel *chan = screen->base.channel; + struct nouveau_grobj *eng3d = screen->eng3d; boolean no_elements = nvfx->vertprog->draw_no_elements; unsigned num_attribs = nvfx->vertprog->draw_elements; @@ -63,7 +64,7 @@ nvfx_render_prim(struct draw_stage *stage, struct prim_header *prim, /* Switch primitive modes if necessary */ if (rs->prim != mode) { if (rs->prim != NV30_3D_VERTEX_BEGIN_END_STOP) { - OUT_RING(chan, RING_3D(NV30_3D_VERTEX_BEGIN_END, 1)); + BEGIN_RING(chan, eng3d, NV30_3D_VERTEX_BEGIN_END, 1); OUT_RING(chan, NV30_3D_VERTEX_BEGIN_END_STOP); } @@ -74,23 +75,24 @@ nvfx_render_prim(struct draw_stage *stage, struct prim_header *prim, int i; for(i = 0; i < 32; ++i) { - OUT_RING(chan, RING_3D(0x1dac, 1)); + BEGIN_RING(chan, eng3d, 0x1dac, 1); OUT_RING(chan, 0); } } - OUT_RING(chan, RING_3D(NV30_3D_VERTEX_BEGIN_END, 1)); + BEGIN_RING(chan, eng3d, NV30_3D_VERTEX_BEGIN_END, 1); OUT_RING (chan, mode); rs->prim = mode; } - OUT_RING(chan, RING_3D_NI(NV30_3D_VERTEX_DATA, num_attribs * 4 * count)); if(no_elements) { + BEGIN_RING_NI(chan, eng3d, NV30_3D_VERTEX_DATA, 4); OUT_RING(chan, 0); OUT_RING(chan, 0); OUT_RING(chan, 0); OUT_RING(chan, 0); } else { + BEGIN_RING_NI(chan, eng3d, NV30_3D_VERTEX_DATA, num_attribs * 4 * count); for (unsigned i = 0; i < count; ++i) { struct vertex_header* v = prim->v[i]; diff --git a/src/gallium/drivers/nvfx/nvfx_fragprog.c b/src/gallium/drivers/nvfx/nvfx_fragprog.c index 13e8beed479..dbd7c773465 100644 --- a/src/gallium/drivers/nvfx/nvfx_fragprog.c +++ b/src/gallium/drivers/nvfx/nvfx_fragprog.c @@ -1189,12 +1189,12 @@ out_err: static inline void nvfx_fp_memcpy(void* dst, const void* src, size_t len) { -#ifndef WORDS_BIGENDIAN +#ifndef PIPE_ARCH_BIG_ENDIAN memcpy(dst, src, len); #else size_t i; for(i = 0; i < len; i += 4) { - uint32_t v = (uint32_t*)((char*)src + i); + uint32_t v = *(uint32_t*)((char*)src + i); *(uint32_t*)((char*)dst + i) = (v >> 16) | (v << 16); } #endif @@ -1233,6 +1233,7 @@ void nvfx_fragprog_validate(struct nvfx_context *nvfx) { struct nouveau_channel* chan = nvfx->screen->base.channel; + struct nouveau_grobj *eng3d = nvfx->screen->eng3d; struct nvfx_pipe_fragment_program *pfp = nvfx->fragprog; struct nvfx_vertex_program* vp; @@ -1499,17 +1500,17 @@ update: nvfx->hw_fragprog = fp; MARK_RING(chan, 8, 1); - OUT_RING(chan, RING_3D(NV30_3D_FP_ACTIVE_PROGRAM, 1)); + BEGIN_RING(chan, eng3d, NV30_3D_FP_ACTIVE_PROGRAM, 1); OUT_RELOC(chan, fp->fpbo->bo, offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD | NOUVEAU_BO_LOW | NOUVEAU_BO_OR, NV30_3D_FP_ACTIVE_PROGRAM_DMA0, NV30_3D_FP_ACTIVE_PROGRAM_DMA1); - OUT_RING(chan, RING_3D(NV30_3D_FP_CONTROL, 1)); + BEGIN_RING(chan, eng3d, NV30_3D_FP_CONTROL, 1); OUT_RING(chan, fp->fp_control); if(!nvfx->is_nv4x) { - OUT_RING(chan, RING_3D(NV30_3D_FP_REG_CONTROL, 1)); + BEGIN_RING(chan, eng3d, NV30_3D_FP_REG_CONTROL, 1); OUT_RING(chan, (1<<16)|0x4); - OUT_RING(chan, RING_3D(NV30_3D_TEX_UNITS_ENABLE, 1)); + BEGIN_RING(chan, eng3d, NV30_3D_TEX_UNITS_ENABLE, 1); OUT_RING(chan, fp->samplers); } } @@ -1518,8 +1519,7 @@ update: unsigned pointsprite_control = fp->point_sprite_control | nvfx->rasterizer->pipe.point_quad_rasterization; if(pointsprite_control != nvfx->hw_pointsprite_control) { - WAIT_RING(chan, 2); - OUT_RING(chan, RING_3D(NV30_3D_POINT_SPRITE, 1)); + BEGIN_RING(chan, eng3d, NV30_3D_POINT_SPRITE, 1); OUT_RING(chan, pointsprite_control); nvfx->hw_pointsprite_control = pointsprite_control; } diff --git a/src/gallium/drivers/nvfx/nvfx_fragtex.c b/src/gallium/drivers/nvfx/nvfx_fragtex.c index 1d6b4e24cbc..1c4901df0e2 100644 --- a/src/gallium/drivers/nvfx/nvfx_fragtex.c +++ b/src/gallium/drivers/nvfx/nvfx_fragtex.c @@ -122,8 +122,8 @@ nvfx_create_sampler_view(struct pipe_context *pipe, } else { - sv->offset = nvfx_subresource_offset(pt, 0, sv->base.first_level, 0); - sv->npot_size = (u_minify(pt->width0, sv->base.first_level) << NV30_3D_TEX_NPOT_SIZE_W__SHIFT) | u_minify(pt->height0, sv->base.first_level); + sv->offset = nvfx_subresource_offset(pt, 0, sv->base.u.tex.first_level, 0); + sv->npot_size = (u_minify(pt->width0, sv->base.u.tex.first_level) << NV30_3D_TEX_NPOT_SIZE_W__SHIFT) | u_minify(pt->height0, sv->base.u.tex.first_level); /* apparently, we need to ignore the t coordinate for 1D textures to fix piglit tex1d-2dborder */ if(pt->target == PIPE_TEXTURE_1D) @@ -177,6 +177,7 @@ void nvfx_fragtex_validate(struct nvfx_context *nvfx) { struct nouveau_channel* chan = nvfx->screen->base.channel; + struct nouveau_grobj *eng3d = nvfx->screen->eng3d; unsigned samplers, unit; samplers = nvfx->dirty_samplers; @@ -197,9 +198,8 @@ nvfx_fragtex_validate(struct nvfx_context *nvfx) else nv40_fragtex_set(nvfx, unit); } else { - WAIT_RING(chan, 2); /* this is OK for nv40 too */ - OUT_RING(chan, RING_3D(NV30_3D_TEX_ENABLE(unit), 1)); + BEGIN_RING(chan, eng3d, NV30_3D_TEX_ENABLE(unit), 1); OUT_RING(chan, 0); nvfx->hw_samplers &= ~(1 << unit); } diff --git a/src/gallium/drivers/nvfx/nvfx_miptree.c b/src/gallium/drivers/nvfx/nvfx_miptree.c index 7677fde40cb..8c043b867ba 100644 --- a/src/gallium/drivers/nvfx/nvfx_miptree.c +++ b/src/gallium/drivers/nvfx/nvfx_miptree.c @@ -190,25 +190,27 @@ nvfx_miptree_from_handle(struct pipe_screen *pscreen, const struct pipe_resource } struct pipe_surface * -nvfx_miptree_surface_new(struct pipe_screen *pscreen, struct pipe_resource *pt, - unsigned face, unsigned level, unsigned zslice, - unsigned flags) +nvfx_miptree_surface_new(struct pipe_context *pipe, struct pipe_resource *pt, + const struct pipe_surface *surf_tmpl) { - struct nvfx_miptree* mt = (struct nvfx_miptree*)pt; - struct nvfx_surface *ns; - - ns = (struct nvfx_surface*)util_surfaces_get(&mt->surfaces, sizeof(struct nvfx_surface), pscreen, pt, face, level, zslice, flags); - if(ns->base.base.offset == ~0) { - util_dirty_surface_init(&ns->base); - ns->pitch = nvfx_subresource_pitch(pt, level); - ns->base.base.offset = nvfx_subresource_offset(pt, face, level, zslice); + struct nvfx_miptree *mt = (struct nvfx_miptree *)pt; + unsigned level = surf_tmpl->u.tex.level; + struct nvfx_surface *ns = NULL; + + assert(surf_tmpl->u.tex.first_layer == surf_tmpl->u.tex.last_layer); + if(util_surfaces_get(&mt->surfaces, sizeof(struct nvfx_surface), pipe, + pt, level, surf_tmpl->u.tex.first_layer, + surf_tmpl->usage, (struct pipe_surface **)&ns)) { + util_dirty_surface_init(&ns->base); + ns->pitch = nvfx_subresource_pitch(pt, level); + ns->offset = nvfx_subresource_offset(pt, surf_tmpl->u.tex.first_layer, level, surf_tmpl->u.tex.first_layer); } return &ns->base.base; } void -nvfx_miptree_surface_del(struct pipe_surface *ps) +nvfx_miptree_surface_del(struct pipe_context *pipe, struct pipe_surface *ps) { struct nvfx_surface* ns = (struct nvfx_surface*)ps; diff --git a/src/gallium/drivers/nvfx/nvfx_push.c b/src/gallium/drivers/nvfx/nvfx_push.c index ebf47e6ed30..6391741a2e5 100644 --- a/src/gallium/drivers/nvfx/nvfx_push.c +++ b/src/gallium/drivers/nvfx/nvfx_push.c @@ -10,6 +10,7 @@ struct push_context { struct nouveau_channel* chan; + struct nouveau_grobj *eng3d; void *idxbuf; int32_t idxbias; @@ -27,9 +28,10 @@ static void emit_edgeflag(void *priv, boolean enabled) { struct push_context* ctx = priv; + struct nouveau_grobj *eng3d = ctx->eng3d; struct nouveau_channel *chan = ctx->chan; - OUT_RING(chan, RING_3D(NV30_3D_EDGEFLAG, 1)); + BEGIN_RING(chan, eng3d, NV30_3D_EDGEFLAG, 1); OUT_RING(chan, enabled ? 1 : 0); } @@ -37,6 +39,7 @@ static void emit_vertices_lookup8(void *priv, unsigned start, unsigned count) { struct push_context *ctx = priv; + struct nouveau_grobj *eng3d = ctx->eng3d; uint8_t* elts = (uint8_t*)ctx->idxbuf + start; while(count) @@ -44,7 +47,7 @@ emit_vertices_lookup8(void *priv, unsigned start, unsigned count) unsigned push = MIN2(count, ctx->max_vertices_per_packet); unsigned length = push * ctx->vertex_length; - OUT_RING(ctx->chan, RING_3D_NI(NV30_3D_VERTEX_DATA, length)); + BEGIN_RING_NI(ctx->chan, eng3d, NV30_3D_VERTEX_DATA, length); ctx->translate->run_elts8(ctx->translate, elts, push, 0, ctx->chan->cur); ctx->chan->cur += length; @@ -57,6 +60,7 @@ static void emit_vertices_lookup16(void *priv, unsigned start, unsigned count) { struct push_context *ctx = priv; + struct nouveau_grobj *eng3d = ctx->eng3d; uint16_t* elts = (uint16_t*)ctx->idxbuf + start; while(count) @@ -64,7 +68,7 @@ emit_vertices_lookup16(void *priv, unsigned start, unsigned count) unsigned push = MIN2(count, ctx->max_vertices_per_packet); unsigned length = push * ctx->vertex_length; - OUT_RING(ctx->chan, RING_3D_NI(NV30_3D_VERTEX_DATA, length)); + BEGIN_RING_NI(ctx->chan, eng3d, NV30_3D_VERTEX_DATA, length); ctx->translate->run_elts16(ctx->translate, elts, push, 0, ctx->chan->cur); ctx->chan->cur += length; @@ -77,6 +81,7 @@ static void emit_vertices_lookup32(void *priv, unsigned start, unsigned count) { struct push_context *ctx = priv; + struct nouveau_grobj *eng3d = ctx->eng3d; uint32_t* elts = (uint32_t*)ctx->idxbuf + start; while(count) @@ -84,7 +89,7 @@ emit_vertices_lookup32(void *priv, unsigned start, unsigned count) unsigned push = MIN2(count, ctx->max_vertices_per_packet); unsigned length = push * ctx->vertex_length; - OUT_RING(ctx->chan, RING_3D_NI(NV30_3D_VERTEX_DATA, length)); + BEGIN_RING_NI(ctx->chan, eng3d, NV30_3D_VERTEX_DATA, length); ctx->translate->run_elts(ctx->translate, elts, push, 0, ctx->chan->cur); ctx->chan->cur += length; @@ -97,13 +102,14 @@ static void emit_vertices(void *priv, unsigned start, unsigned count) { struct push_context *ctx = priv; + struct nouveau_grobj *eng3d = ctx->eng3d; while(count) { unsigned push = MIN2(count, ctx->max_vertices_per_packet); unsigned length = push * ctx->vertex_length; - OUT_RING(ctx->chan, RING_3D_NI(NV30_3D_VERTEX_DATA, length)); + BEGIN_RING_NI(ctx->chan, eng3d, NV30_3D_VERTEX_DATA, length); ctx->translate->run(ctx->translate, start, push, 0, ctx->chan->cur); ctx->chan->cur += length; @@ -116,10 +122,11 @@ static void emit_ranges(void* priv, unsigned start, unsigned vc, unsigned reg) { struct push_context* ctx = priv; + struct nouveau_grobj *eng3d = ctx->eng3d; struct nouveau_channel *chan = ctx->chan; unsigned nr = (vc & 0xff); if (nr) { - OUT_RING(chan, RING_3D(reg, 1)); + BEGIN_RING(chan, eng3d, reg, 1); OUT_RING (chan, ((nr - 1) << 24) | start); start += nr; } @@ -130,7 +137,7 @@ emit_ranges(void* priv, unsigned start, unsigned vc, unsigned reg) nr -= push; - OUT_RING(chan, RING_3D_NI(reg, push)); + BEGIN_RING_NI(chan, eng3d, reg, push); while (push--) { OUT_RING(chan, ((0x100 - 1) << 24) | start); start += 0x100; @@ -154,12 +161,13 @@ static INLINE void emit_elt8(void* priv, unsigned start, unsigned vc) { struct push_context* ctx = priv; + struct nouveau_grobj *eng3d = ctx->eng3d; struct nouveau_channel *chan = ctx->chan; uint8_t *elts = (uint8_t *)ctx->idxbuf + start; int idxbias = ctx->idxbias; if (vc & 1) { - OUT_RING(chan, RING_3D(NV30_3D_VB_ELEMENT_U32, 1)); + BEGIN_RING(chan, eng3d, NV30_3D_VB_ELEMENT_U32, 1); OUT_RING (chan, elts[0]); elts++; vc--; } @@ -168,7 +176,7 @@ emit_elt8(void* priv, unsigned start, unsigned vc) unsigned i; unsigned push = MIN2(vc, 2047 * 2); - OUT_RING(chan, RING_3D_NI(NV30_3D_VB_ELEMENT_U16, push >> 1)); + BEGIN_RING_NI(chan, eng3d, NV30_3D_VB_ELEMENT_U16, push >> 1); for (i = 0; i < push; i+=2) OUT_RING(chan, ((elts[i+1] + idxbias) << 16) | (elts[i] + idxbias)); @@ -181,12 +189,13 @@ static INLINE void emit_elt16(void* priv, unsigned start, unsigned vc) { struct push_context* ctx = priv; + struct nouveau_grobj *eng3d = ctx->eng3d; struct nouveau_channel *chan = ctx->chan; uint16_t *elts = (uint16_t *)ctx->idxbuf + start; int idxbias = ctx->idxbias; if (vc & 1) { - OUT_RING(chan, RING_3D(NV30_3D_VB_ELEMENT_U32, 1)); + BEGIN_RING(chan, eng3d, NV30_3D_VB_ELEMENT_U32, 1); OUT_RING (chan, elts[0]); elts++; vc--; } @@ -195,7 +204,7 @@ emit_elt16(void* priv, unsigned start, unsigned vc) unsigned i; unsigned push = MIN2(vc, 2047 * 2); - OUT_RING(chan, RING_3D_NI(NV30_3D_VB_ELEMENT_U16, push >> 1)); + BEGIN_RING_NI(chan, eng3d, NV30_3D_VB_ELEMENT_U16, push >> 1); for (i = 0; i < push; i+=2) OUT_RING(chan, ((elts[i+1] + idxbias) << 16) | (elts[i] + idxbias)); @@ -208,6 +217,7 @@ static INLINE void emit_elt32(void* priv, unsigned start, unsigned vc) { struct push_context* ctx = priv; + struct nouveau_grobj *eng3d = ctx->eng3d; struct nouveau_channel *chan = ctx->chan; uint32_t *elts = (uint32_t *)ctx->idxbuf + start; int idxbias = ctx->idxbias; @@ -215,8 +225,7 @@ emit_elt32(void* priv, unsigned start, unsigned vc) while (vc) { unsigned push = MIN2(vc, 2047); - OUT_RING(chan, RING_3D_NI(NV30_3D_VB_ELEMENT_U32, push)); - assert(AVAIL_RING(chan) >= push); + BEGIN_RING_NI(chan, eng3d, NV30_3D_VB_ELEMENT_U32, push); if(idxbias) { for(unsigned i = 0; i < push; ++i) @@ -235,6 +244,7 @@ nvfx_push_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info) { struct nvfx_context *nvfx = nvfx_context(pipe); struct nouveau_channel *chan = nvfx->screen->base.channel; + struct nouveau_grobj *eng3d = nvfx->screen->eng3d; struct push_context ctx; struct util_split_prim s; unsigned instances_left = info->instance_count; @@ -251,6 +261,7 @@ nvfx_push_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info) + 4; /* potential edgeflag enable/disable */ ctx.chan = nvfx->screen->base.channel; + ctx.eng3d = nvfx->screen->eng3d; ctx.translate = nvfx->vtxelt->translate; ctx.idxbuf = NULL; ctx.vertex_length = nvfx->vtxelt->vertex_length; @@ -333,8 +344,9 @@ nvfx_push_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info) nvfx->vtxelt->per_instance[i].base.fetch_rgba_float(v, per_instance[i].map, 0, 0); - WAIT_RING(chan, 5); - nvfx_emit_vtx_attr(chan, nvfx->vtxelt->per_instance[i].base.idx, v, nvfx->vtxelt->per_instance[i].base.ncomp); + nvfx_emit_vtx_attr(chan, eng3d, + nvfx->vtxelt->per_instance[i].base.idx, v, + nvfx->vtxelt->per_instance[i].base.ncomp); } /* per-instance loop */ @@ -374,15 +386,18 @@ nvfx_push_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info) int i; for(i = 0; i < 32; ++i) { - OUT_RING(chan, RING_3D(0x1dac, 1)); + BEGIN_RING(chan, eng3d, + 0x1dac, 1); OUT_RING(chan, 0); } } - OUT_RING(chan, RING_3D(NV30_3D_VERTEX_BEGIN_END, 1)); + BEGIN_RING(chan, eng3d, + NV30_3D_VERTEX_BEGIN_END, 1); OUT_RING(chan, hw_mode); done = util_split_prim_next(&s, max_verts); - OUT_RING(chan, RING_3D(NV30_3D_VERTEX_BEGIN_END, 1)); + BEGIN_RING(chan, eng3d, + NV30_3D_VERTEX_BEGIN_END, 1); OUT_RING(chan, 0); if(done) @@ -406,8 +421,10 @@ nvfx_push_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info) per_instance[i].step = 0; nvfx->vtxelt->per_instance[i].base.fetch_rgba_float(v, per_instance[i].map, 0, 0); - WAIT_RING(chan, 5); - nvfx_emit_vtx_attr(chan, nvfx->vtxelt->per_instance[i].base.idx, v, nvfx->vtxelt->per_instance[i].base.ncomp); + nvfx_emit_vtx_attr(chan, eng3d, + nvfx->vtxelt->per_instance[i].base.idx, + v, + nvfx->vtxelt->per_instance[i].base.ncomp); } } } diff --git a/src/gallium/drivers/nvfx/nvfx_query.c b/src/gallium/drivers/nvfx/nvfx_query.c index 3935ffd7f92..3cd6bf1e477 100644 --- a/src/gallium/drivers/nvfx/nvfx_query.c +++ b/src/gallium/drivers/nvfx/nvfx_query.c @@ -49,6 +49,7 @@ nvfx_query_begin(struct pipe_context *pipe, struct pipe_query *pq) struct nvfx_query *q = nvfx_query(pq); struct nvfx_screen *screen = nvfx->screen; struct nouveau_channel *chan = screen->base.channel; + struct nouveau_grobj *eng3d = screen->eng3d; uint64_t tmp; assert(!nvfx->query); @@ -72,10 +73,9 @@ nvfx_query_begin(struct pipe_context *pipe, struct pipe_query *pq) nouveau_notifier_reset(nvfx->screen->query, q->object->start); - WAIT_RING(chan, 4); - OUT_RING(chan, RING_3D(NV30_3D_QUERY_RESET, 1)); + BEGIN_RING(chan, eng3d, NV30_3D_QUERY_RESET, 1); OUT_RING(chan, 1); - OUT_RING(chan, RING_3D(NV30_3D_QUERY_ENABLE, 1)); + BEGIN_RING(chan, eng3d, NV30_3D_QUERY_ENABLE, 1); OUT_RING(chan, 1); q->ready = FALSE; @@ -88,15 +88,15 @@ nvfx_query_end(struct pipe_context *pipe, struct pipe_query *pq) { struct nvfx_context *nvfx = nvfx_context(pipe); struct nouveau_channel *chan = nvfx->screen->base.channel; + struct nouveau_grobj *eng3d = nvfx->screen->eng3d; struct nvfx_query *q = nvfx_query(pq); assert(nvfx->query == pq); - WAIT_RING(chan, 4); - OUT_RING(chan, RING_3D(NV30_3D_QUERY_GET, 1)); + BEGIN_RING(chan, eng3d, NV30_3D_QUERY_GET, 1); OUT_RING (chan, (0x01 << NV30_3D_QUERY_GET_UNK24__SHIFT) | ((q->object->start * 32) << NV30_3D_QUERY_GET_OFFSET__SHIFT)); - OUT_RING(chan, RING_3D(NV30_3D_QUERY_ENABLE, 1)); + BEGIN_RING(chan, eng3d, NV30_3D_QUERY_ENABLE, 1); OUT_RING(chan, 0); FIRE_RING(chan); diff --git a/src/gallium/drivers/nvfx/nvfx_resource.c b/src/gallium/drivers/nvfx/nvfx_resource.c index 39ae893f1b3..c60a7bb8b93 100644 --- a/src/gallium/drivers/nvfx/nvfx_resource.c +++ b/src/gallium/drivers/nvfx/nvfx_resource.c @@ -7,7 +7,7 @@ static unsigned int nvfx_resource_is_referenced(struct pipe_context *pipe, struct pipe_resource *pr, - unsigned face, unsigned level) + unsigned level, int layer) { return !!nouveau_reference_flags(nvfx_resource(pr)->bo); } @@ -59,6 +59,9 @@ void nvfx_init_resource_functions(struct pipe_context *pipe) { pipe->is_resource_referenced = nvfx_resource_is_referenced; + + pipe->create_surface = nvfx_miptree_surface_new; + pipe->surface_destroy = nvfx_miptree_surface_del; } void @@ -69,7 +72,4 @@ nvfx_screen_init_resource_functions(struct pipe_screen *pscreen) pscreen->resource_get_handle = nvfx_resource_get_handle; pscreen->resource_destroy = nvfx_resource_destroy; pscreen->user_buffer_create = nvfx_user_buffer_create; - - pscreen->get_tex_surface = nvfx_miptree_surface_new; - pscreen->tex_surface_destroy = nvfx_miptree_surface_del; } diff --git a/src/gallium/drivers/nvfx/nvfx_resource.h b/src/gallium/drivers/nvfx/nvfx_resource.h index 583be4de2ae..070f8979442 100644 --- a/src/gallium/drivers/nvfx/nvfx_resource.h +++ b/src/gallium/drivers/nvfx/nvfx_resource.h @@ -74,6 +74,7 @@ struct nvfx_miptree { struct nvfx_surface { struct util_dirty_surface base; unsigned pitch; + unsigned offset; struct nvfx_miptree* temp; }; @@ -116,12 +117,11 @@ nvfx_miptree_from_handle(struct pipe_screen *pscreen, struct winsys_handle *whandle); void -nvfx_miptree_surface_del(struct pipe_surface *ps); +nvfx_miptree_surface_del(struct pipe_context *pipe, struct pipe_surface *ps); struct pipe_surface * -nvfx_miptree_surface_new(struct pipe_screen *pscreen, struct pipe_resource *pt, - unsigned face, unsigned level, unsigned zslice, - unsigned flags); +nvfx_miptree_surface_new(struct pipe_context *pipe, struct pipe_resource *pt, + const struct pipe_surface *surf_tmpl); /* only for miptrees, don't use for buffers */ diff --git a/src/gallium/drivers/nvfx/nvfx_screen.c b/src/gallium/drivers/nvfx/nvfx_screen.c index 92e1d330907..aa1e9567d31 100644 --- a/src/gallium/drivers/nvfx/nvfx_screen.c +++ b/src/gallium/drivers/nvfx/nvfx_screen.c @@ -301,98 +301,100 @@ nvfx_screen_destroy(struct pipe_screen *pscreen) static void nv30_screen_init(struct nvfx_screen *screen) { struct nouveau_channel *chan = screen->base.channel; + struct nouveau_grobj *eng3d = screen->eng3d; int i; /* TODO: perhaps we should do some of this on nv40 too? */ for (i=1; i<8; i++) { - OUT_RING(chan, RING_3D(NV30_3D_VIEWPORT_CLIP_HORIZ(i), 1)); + BEGIN_RING(chan, eng3d, NV30_3D_VIEWPORT_CLIP_HORIZ(i), 1); OUT_RING(chan, 0); - OUT_RING(chan, RING_3D(NV30_3D_VIEWPORT_CLIP_VERT(i), 1)); + BEGIN_RING(chan, eng3d, NV30_3D_VIEWPORT_CLIP_VERT(i), 1); OUT_RING(chan, 0); } - OUT_RING(chan, RING_3D(0x220, 1)); + BEGIN_RING(chan, eng3d, 0x220, 1); OUT_RING(chan, 1); - OUT_RING(chan, RING_3D(0x03b0, 1)); + BEGIN_RING(chan, eng3d, 0x03b0, 1); OUT_RING(chan, 0x00100000); - OUT_RING(chan, RING_3D(0x1454, 1)); + BEGIN_RING(chan, eng3d, 0x1454, 1); OUT_RING(chan, 0); - OUT_RING(chan, RING_3D(0x1d80, 1)); + BEGIN_RING(chan, eng3d, 0x1d80, 1); OUT_RING(chan, 3); - OUT_RING(chan, RING_3D(0x1450, 1)); + BEGIN_RING(chan, eng3d, 0x1450, 1); OUT_RING(chan, 0x00030004); /* NEW */ - OUT_RING(chan, RING_3D(0x1e98, 1)); + BEGIN_RING(chan, eng3d, 0x1e98, 1); OUT_RING(chan, 0); - OUT_RING(chan, RING_3D(0x17e0, 3)); + BEGIN_RING(chan, eng3d, 0x17e0, 3); OUT_RING(chan, fui(0.0)); OUT_RING(chan, fui(0.0)); OUT_RING(chan, fui(1.0)); - OUT_RING(chan, RING_3D(0x1f80, 16)); + BEGIN_RING(chan, eng3d, 0x1f80, 16); for (i=0; i<16; i++) { OUT_RING(chan, (i==8) ? 0x0000ffff : 0); } - OUT_RING(chan, RING_3D(0x120, 3)); + BEGIN_RING(chan, eng3d, 0x120, 3); OUT_RING(chan, 0); OUT_RING(chan, 1); OUT_RING(chan, 2); - OUT_RING(chan, RING_3D(0x1d88, 1)); + BEGIN_RING(chan, eng3d, 0x1d88, 1); OUT_RING(chan, 0x00001200); - OUT_RING(chan, RING_3D(NV30_3D_RC_ENABLE, 1)); + BEGIN_RING(chan, eng3d, NV30_3D_RC_ENABLE, 1); OUT_RING(chan, 0); - OUT_RING(chan, RING_3D(NV30_3D_DEPTH_RANGE_NEAR, 2)); + BEGIN_RING(chan, eng3d, NV30_3D_DEPTH_RANGE_NEAR, 2); OUT_RING(chan, fui(0.0)); OUT_RING(chan, fui(1.0)); - OUT_RING(chan, RING_3D(NV30_3D_MULTISAMPLE_CONTROL, 1)); + BEGIN_RING(chan, eng3d, NV30_3D_MULTISAMPLE_CONTROL, 1); OUT_RING(chan, 0xffff0000); /* enables use of vp rather than fixed-function somehow */ - OUT_RING(chan, RING_3D(0x1e94, 1)); + BEGIN_RING(chan, eng3d, 0x1e94, 1); OUT_RING(chan, 0x13); } static void nv40_screen_init(struct nvfx_screen *screen) { struct nouveau_channel *chan = screen->base.channel; + struct nouveau_grobj *eng3d = screen->eng3d; - OUT_RING(chan, RING_3D(NV40_3D_DMA_COLOR2, 2)); + BEGIN_RING(chan, eng3d, NV40_3D_DMA_COLOR2, 2); OUT_RING(chan, screen->base.channel->vram->handle); OUT_RING(chan, screen->base.channel->vram->handle); - OUT_RING(chan, RING_3D(0x1450, 1)); + BEGIN_RING(chan, eng3d, 0x1450, 1); OUT_RING(chan, 0x00000004); - OUT_RING(chan, RING_3D(0x1ea4, 3)); + BEGIN_RING(chan, eng3d, 0x1ea4, 3); OUT_RING(chan, 0x00000010); OUT_RING(chan, 0x01000100); OUT_RING(chan, 0xff800006); /* vtxprog output routing */ - OUT_RING(chan, RING_3D(0x1fc4, 1)); + BEGIN_RING(chan, eng3d, 0x1fc4, 1); OUT_RING(chan, 0x06144321); - OUT_RING(chan, RING_3D(0x1fc8, 2)); + BEGIN_RING(chan, eng3d, 0x1fc8, 2); OUT_RING(chan, 0xedcba987); OUT_RING(chan, 0x0000006f); - OUT_RING(chan, RING_3D(0x1fd0, 1)); + BEGIN_RING(chan, eng3d, 0x1fd0, 1); OUT_RING(chan, 0x00171615); - OUT_RING(chan, RING_3D(0x1fd4, 1)); + BEGIN_RING(chan, eng3d, 0x1fd4, 1); OUT_RING(chan, 0x001b1a19); - OUT_RING(chan, RING_3D(0x1ef8, 1)); + BEGIN_RING(chan, eng3d, 0x1ef8, 1); OUT_RING(chan, 0x0020ffff); - OUT_RING(chan, RING_3D(0x1d64, 1)); + BEGIN_RING(chan, eng3d, 0x1d64, 1); OUT_RING(chan, 0x01d300d4); - OUT_RING(chan, RING_3D(0x1e94, 1)); + BEGIN_RING(chan, eng3d, 0x1e94, 1); OUT_RING(chan, 0x00000001); - OUT_RING(chan, RING_3D(NV40_3D_MIPMAP_ROUNDING, 1)); + BEGIN_RING(chan, eng3d, NV40_3D_MIPMAP_ROUNDING, 1); OUT_RING(chan, NV40_3D_MIPMAP_ROUNDING_MODE_DOWN); } @@ -571,25 +573,25 @@ nvfx_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev) /* Static eng3d initialisation */ /* note that we just started using the channel, so we must have space in the pushbuffer */ - OUT_RING(chan, RING_3D(NV30_3D_DMA_NOTIFY, 1)); + BEGIN_RING(chan, screen->eng3d, NV30_3D_DMA_NOTIFY, 1); OUT_RING(chan, screen->sync->handle); - OUT_RING(chan, RING_3D(NV30_3D_DMA_TEXTURE0, 2)); + BEGIN_RING(chan, screen->eng3d, NV30_3D_DMA_TEXTURE0, 2); OUT_RING(chan, chan->vram->handle); OUT_RING(chan, chan->gart->handle); - OUT_RING(chan, RING_3D(NV30_3D_DMA_COLOR1, 1)); + BEGIN_RING(chan, screen->eng3d, NV30_3D_DMA_COLOR1, 1); OUT_RING(chan, chan->vram->handle); - OUT_RING(chan, RING_3D(NV30_3D_DMA_COLOR0, 2)); + BEGIN_RING(chan, screen->eng3d, NV30_3D_DMA_COLOR0, 2); OUT_RING(chan, chan->vram->handle); OUT_RING(chan, chan->vram->handle); - OUT_RING(chan, RING_3D(NV30_3D_DMA_VTXBUF0, 2)); + BEGIN_RING(chan, screen->eng3d, NV30_3D_DMA_VTXBUF0, 2); OUT_RING(chan, chan->vram->handle); OUT_RING(chan, chan->gart->handle); - OUT_RING(chan, RING_3D(NV30_3D_DMA_FENCE, 2)); + BEGIN_RING(chan, screen->eng3d, NV30_3D_DMA_FENCE, 2); OUT_RING(chan, 0); OUT_RING(chan, screen->query->handle); - OUT_RING(chan, RING_3D(NV30_3D_DMA_UNK1AC, 2)); + BEGIN_RING(chan, screen->eng3d, NV30_3D_DMA_UNK1AC, 2); OUT_RING(chan, chan->vram->handle); OUT_RING(chan, chan->vram->handle); diff --git a/src/gallium/drivers/nvfx/nvfx_state.c b/src/gallium/drivers/nvfx/nvfx_state.c index 54619037d82..f3dcb205c61 100644 --- a/src/gallium/drivers/nvfx/nvfx_state.c +++ b/src/gallium/drivers/nvfx/nvfx_state.c @@ -304,7 +304,7 @@ nvfx_set_constant_buffer(struct pipe_context *pipe, uint shader, uint index, { struct nvfx_context *nvfx = nvfx_context(pipe); - nvfx->constbuf[shader] = buf; + pipe_resource_reference(&nvfx->constbuf[shader], buf); nvfx->constbuf_nr[shader] = buf ? (buf->width0 / (4 * sizeof(float))) : 0; if (shader == PIPE_SHADER_VERTEX) { diff --git a/src/gallium/drivers/nvfx/nvfx_state_emit.c b/src/gallium/drivers/nvfx/nvfx_state_emit.c index 501fdd4430c..40ae4f5bd21 100644 --- a/src/gallium/drivers/nvfx/nvfx_state_emit.c +++ b/src/gallium/drivers/nvfx/nvfx_state_emit.c @@ -7,11 +7,11 @@ void nvfx_state_viewport_validate(struct nvfx_context *nvfx) { struct nouveau_channel *chan = nvfx->screen->base.channel; + struct nouveau_grobj *eng3d = nvfx->screen->eng3d; struct pipe_viewport_state *vpt = &nvfx->viewport; - WAIT_RING(chan, 11); if(nvfx->render_mode == HW) { - OUT_RING(chan, RING_3D(NV30_3D_VIEWPORT_TRANSLATE_X, 8)); + BEGIN_RING(chan, eng3d, NV30_3D_VIEWPORT_TRANSLATE_X, 8); OUT_RINGf(chan, vpt->translate[0]); OUT_RINGf(chan, vpt->translate[1]); OUT_RINGf(chan, vpt->translate[2]); @@ -20,10 +20,10 @@ nvfx_state_viewport_validate(struct nvfx_context *nvfx) OUT_RINGf(chan, vpt->scale[1]); OUT_RINGf(chan, vpt->scale[2]); OUT_RINGf(chan, vpt->scale[3]); - OUT_RING(chan, RING_3D(0x1d78, 1)); + BEGIN_RING(chan, eng3d, 0x1d78, 1); OUT_RING(chan, 1); } else { - OUT_RING(chan, RING_3D(NV30_3D_VIEWPORT_TRANSLATE_X, 8)); + BEGIN_RING(chan, eng3d, NV30_3D_VIEWPORT_TRANSLATE_X, 8); OUT_RINGf(chan, 0.0f); OUT_RINGf(chan, 0.0f); OUT_RINGf(chan, 0.0f); @@ -32,7 +32,7 @@ nvfx_state_viewport_validate(struct nvfx_context *nvfx) OUT_RINGf(chan, 1.0f); OUT_RINGf(chan, 1.0f); OUT_RINGf(chan, 1.0f); - OUT_RING(chan, RING_3D(0x1d78, 1)); + BEGIN_RING(chan, eng3d, 0x1d78, 1); OUT_RING(chan, nvfx->is_nv4x ? 0x110 : 1); } } @@ -41,6 +41,7 @@ void nvfx_state_scissor_validate(struct nvfx_context *nvfx) { struct nouveau_channel *chan = nvfx->screen->base.channel; + struct nouveau_grobj *eng3d = nvfx->screen->eng3d; struct pipe_rasterizer_state *rast = &nvfx->rasterizer->pipe; struct pipe_scissor_state *s = &nvfx->scissor; @@ -48,8 +49,7 @@ nvfx_state_scissor_validate(struct nvfx_context *nvfx) return; nvfx->state.scissor_enabled = rast->scissor; - WAIT_RING(chan, 3); - OUT_RING(chan, RING_3D(NV30_3D_SCISSOR_HORIZ, 2)); + BEGIN_RING(chan, eng3d, NV30_3D_SCISSOR_HORIZ, 2); if (nvfx->state.scissor_enabled) { OUT_RING(chan, ((s->maxx - s->minx) << 16) | s->minx); OUT_RING(chan, ((s->maxy - s->miny) << 16) | s->miny); @@ -63,12 +63,12 @@ void nvfx_state_sr_validate(struct nvfx_context *nvfx) { struct nouveau_channel* chan = nvfx->screen->base.channel; + struct nouveau_grobj *eng3d = nvfx->screen->eng3d; struct pipe_stencil_ref *sr = &nvfx->stencil_ref; - WAIT_RING(chan, 4); - OUT_RING(chan, RING_3D(NV30_3D_STENCIL_FUNC_REF(0), 1)); + BEGIN_RING(chan, eng3d, NV30_3D_STENCIL_FUNC_REF(0), 1); OUT_RING(chan, sr->ref_value[0]); - OUT_RING(chan, RING_3D(NV30_3D_STENCIL_FUNC_REF(1), 1)); + BEGIN_RING(chan, eng3d, NV30_3D_STENCIL_FUNC_REF(1), 1); OUT_RING(chan, sr->ref_value[1]); } @@ -76,10 +76,10 @@ void nvfx_state_blend_colour_validate(struct nvfx_context *nvfx) { struct nouveau_channel* chan = nvfx->screen->base.channel; + struct nouveau_grobj *eng3d = nvfx->screen->eng3d; struct pipe_blend_color *bcol = &nvfx->blend_colour; - WAIT_RING(chan, 2); - OUT_RING(chan, RING_3D(NV30_3D_BLEND_COLOR, 1)); + BEGIN_RING(chan, eng3d, NV30_3D_BLEND_COLOR, 1); OUT_RING(chan, ((float_to_ubyte(bcol->color[3]) << 24) | (float_to_ubyte(bcol->color[0]) << 16) | (float_to_ubyte(bcol->color[1]) << 8) | @@ -90,9 +90,9 @@ void nvfx_state_stipple_validate(struct nvfx_context *nvfx) { struct nouveau_channel *chan = nvfx->screen->base.channel; + struct nouveau_grobj *eng3d = nvfx->screen->eng3d; - WAIT_RING(chan, 33); - OUT_RING(chan, RING_3D(NV30_3D_POLYGON_STIPPLE_PATTERN(0), 32)); + BEGIN_RING(chan, eng3d, NV30_3D_POLYGON_STIPPLE_PATTERN(0), 32); OUT_RINGp(chan, nvfx->stipple, 32); } @@ -100,12 +100,12 @@ static void nvfx_coord_conventions_validate(struct nvfx_context* nvfx) { struct nouveau_channel* chan = nvfx->screen->base.channel; + struct nouveau_grobj *eng3d = nvfx->screen->eng3d; unsigned value = nvfx->hw_fragprog->coord_conventions; if(value & NV30_3D_COORD_CONVENTIONS_ORIGIN_INVERTED) value |= nvfx->framebuffer.height << NV30_3D_COORD_CONVENTIONS_HEIGHT__SHIFT; - WAIT_RING(chan, 2); - OUT_RING(chan, RING_3D(NV30_3D_COORD_CONVENTIONS, 1)); + BEGIN_RING(chan, eng3d, NV30_3D_COORD_CONVENTIONS, 1); OUT_RING(chan, value); } @@ -113,6 +113,7 @@ static void nvfx_ucp_validate(struct nvfx_context* nvfx) { struct nouveau_channel* chan = nvfx->screen->base.channel; + struct nouveau_grobj *eng3d = nvfx->screen->eng3d; unsigned enables[7] = { 0, @@ -126,17 +127,15 @@ nvfx_ucp_validate(struct nvfx_context* nvfx) if(!nvfx->use_vp_clipping) { - WAIT_RING(chan, 2); - OUT_RING(chan, RING_3D(NV30_3D_VP_CLIP_PLANES_ENABLE, 1)); + BEGIN_RING(chan, eng3d, NV30_3D_VP_CLIP_PLANES_ENABLE, 1); OUT_RING(chan, 0); - WAIT_RING(chan, 6 * 4 + 1); - OUT_RING(chan, RING_3D(NV30_3D_VP_CLIP_PLANE(0, 0), nvfx->clip.nr * 4)); + BEGIN_RING(chan, eng3d, NV30_3D_VP_CLIP_PLANE(0, 0), + nvfx->clip.nr * 4); OUT_RINGp(chan, &nvfx->clip.ucp[0][0], nvfx->clip.nr * 4); } - WAIT_RING(chan, 2); - OUT_RING(chan, RING_3D(NV30_3D_VP_CLIP_PLANES_ENABLE, 1)); + BEGIN_RING(chan, eng3d, NV30_3D_VP_CLIP_PLANES_ENABLE, 1); OUT_RING(chan, enables[nvfx->clip.nr]); } @@ -144,38 +143,37 @@ static void nvfx_vertprog_ucp_validate(struct nvfx_context* nvfx) { struct nouveau_channel* chan = nvfx->screen->base.channel; + struct nouveau_grobj *eng3d = nvfx->screen->eng3d; unsigned i; struct nvfx_vertex_program* vp = nvfx->hw_vertprog; if(nvfx->clip.nr != vp->clip_nr) { unsigned idx; - WAIT_RING(chan, 14); /* remove last instruction bit */ if(vp->clip_nr >= 0) { idx = vp->nr_insns - 7 + vp->clip_nr; - OUT_RING(chan, RING_3D(NV30_3D_VP_UPLOAD_FROM_ID, 1)); + BEGIN_RING(chan, eng3d, NV30_3D_VP_UPLOAD_FROM_ID, 1); OUT_RING(chan, vp->exec->start + idx); - OUT_RING(chan, RING_3D(NV30_3D_VP_UPLOAD_INST(0), 4)); + BEGIN_RING(chan, eng3d, NV30_3D_VP_UPLOAD_INST(0), 4); OUT_RINGp (chan, vp->insns[idx].data, 4); } /* set last instruction bit */ idx = vp->nr_insns - 7 + nvfx->clip.nr; - OUT_RING(chan, RING_3D(NV30_3D_VP_UPLOAD_FROM_ID, 1)); + BEGIN_RING(chan, eng3d, NV30_3D_VP_UPLOAD_FROM_ID, 1); OUT_RING(chan, vp->exec->start + idx); - OUT_RING(chan, RING_3D(NV30_3D_VP_UPLOAD_INST(0), 4)); + BEGIN_RING(chan, eng3d, NV30_3D_VP_UPLOAD_INST(0), 4); OUT_RINGp(chan, vp->insns[idx].data, 3); OUT_RING(chan, vp->insns[idx].data[3] | 1); vp->clip_nr = nvfx->clip.nr; } // TODO: only do this for the ones changed - WAIT_RING(chan, 6 * 6); for(i = 0; i < nvfx->clip.nr; ++i) { - OUT_RING(chan, RING_3D(NV30_3D_VP_UPLOAD_CONST_ID, 5)); + BEGIN_RING(chan, eng3d, NV30_3D_VP_UPLOAD_CONST_ID, 5); OUT_RING(chan, vp->data->start + i); OUT_RINGp (chan, nvfx->clip.ucp[i], 4); } @@ -185,6 +183,7 @@ static boolean nvfx_state_validate_common(struct nvfx_context *nvfx) { struct nouveau_channel* chan = nvfx->screen->base.channel; + struct nouveau_grobj *eng3d = nvfx->screen->eng3d; unsigned dirty; unsigned still_dirty = 0; int new_fb_mode = -1; /* 1 = all swizzled, 0 = make all linear */ @@ -287,8 +286,7 @@ nvfx_state_validate_common(struct nvfx_context *nvfx) if(vp_output != nvfx->hw_vp_output) { - WAIT_RING(chan, 2); - OUT_RING(chan, RING_3D(NV40_3D_VP_RESULT_EN, 1)); + BEGIN_RING(chan, eng3d, NV40_3D_VP_RESULT_EN, 1); OUT_RING(chan, vp_output); nvfx->hw_vp_output = vp_output; } @@ -320,8 +318,7 @@ nvfx_state_validate_common(struct nvfx_context *nvfx) if(dirty & NVFX_NEW_ZSA || (new_fb_mode >= 0)) { - WAIT_RING(chan, 3); - OUT_RING(chan, RING_3D(NV30_3D_DEPTH_WRITE_ENABLE, 2)); + BEGIN_RING(chan, eng3d, NV30_3D_DEPTH_WRITE_ENABLE, 2); OUT_RING(chan, nvfx->framebuffer.zsbuf && nvfx->zsa->pipe.depth.writemask); OUT_RING(chan, nvfx->framebuffer.zsbuf && nvfx->zsa->pipe.depth.enabled); } @@ -334,10 +331,9 @@ nvfx_state_validate_common(struct nvfx_context *nvfx) // TODO: what about nv30? if(nvfx->is_nv4x) { - WAIT_RING(chan, 4); - OUT_RING(chan, RING_3D(NV40_3D_TEX_CACHE_CTL, 1)); + BEGIN_RING(chan, eng3d, NV40_3D_TEX_CACHE_CTL, 1); OUT_RING(chan, 2); - OUT_RING(chan, RING_3D(NV40_3D_TEX_CACHE_CTL, 1)); + BEGIN_RING(chan, eng3d, NV40_3D_TEX_CACHE_CTL, 1); OUT_RING(chan, 1); } } diff --git a/src/gallium/drivers/nvfx/nvfx_state_fb.c b/src/gallium/drivers/nvfx/nvfx_state_fb.c index 30e48c80735..f9fed94044a 100644 --- a/src/gallium/drivers/nvfx/nvfx_state_fb.c +++ b/src/gallium/drivers/nvfx/nvfx_state_fb.c @@ -7,7 +7,7 @@ nvfx_surface_linear_renderable(struct pipe_surface* surf) { /* TODO: precompute this in nvfx_surface creation */ return (surf->texture->flags & NVFX_RESOURCE_FLAG_LINEAR) - && !(surf->offset & 63) + && !(((struct nvfx_surface*)surf)->offset & 63) && !(((struct nvfx_surface*)surf)->pitch & 63); } @@ -16,8 +16,8 @@ nvfx_surface_swizzled_renderable(struct pipe_framebuffer_state* fb, struct pipe_ { /* TODO: precompute this in nvfx_surface creation */ return !((struct nvfx_miptree*)surf->texture)->linear_pitch - && (surf->texture->target != PIPE_TEXTURE_3D || u_minify(surf->texture->depth0, surf->level) <= 1) - && !(surf->offset & 127) + && (surf->texture->target != PIPE_TEXTURE_3D || u_minify(surf->texture->depth0, surf->u.tex.level) <= 1) + && !(((struct nvfx_surface*)surf)->offset & 127) && (surf->width == fb->width) && (surf->height == fb->height) && !((struct nvfx_surface*)surf)->temp @@ -31,7 +31,7 @@ nvfx_surface_get_render_target(struct pipe_surface* surf, int all_swizzled, stru if(!ns->temp) { target->bo = ((struct nvfx_miptree*)surf->texture)->base.bo; - target->offset = surf->offset; + target->offset = ns->offset; target->pitch = align(ns->pitch, 64); assert(target->pitch); return FALSE; @@ -54,7 +54,7 @@ nvfx_framebuffer_prepare(struct nvfx_context *nvfx) int all_swizzled = 1; if(!nvfx->is_nv4x) - assert(fb->nr_cbufs <= 2); + assert(fb->nr_cbufs <= 1); else assert(fb->nr_cbufs <= 4); @@ -96,6 +96,7 @@ nvfx_framebuffer_validate(struct nvfx_context *nvfx, unsigned prepare_result) { struct pipe_framebuffer_state *fb = &nvfx->framebuffer; struct nouveau_channel *chan = nvfx->screen->base.channel; + struct nouveau_grobj *eng3d = nvfx->screen->eng3d; uint32_t rt_enable, rt_format; int i; unsigned rt_flags = NOUVEAU_BO_RDWR | NOUVEAU_BO_VRAM; @@ -113,7 +114,9 @@ nvfx_framebuffer_validate(struct nvfx_context *nvfx, unsigned prepare_result) nvfx->state.render_temps |= nvfx_surface_get_render_target(fb->cbufs[i], prepare_result, &nvfx->hw_rt[i]) << i; for(; i < 4; ++i) - nvfx->hw_rt[i].bo = 0; + nvfx->hw_rt[i].bo = NULL; + + nvfx->hw_zeta.bo = NULL; if (fb->zsbuf) { nvfx->state.render_temps |= nvfx_surface_get_render_target(fb->zsbuf, prepare_result, &nvfx->hw_zeta) << 7; @@ -202,11 +205,11 @@ nvfx_framebuffer_validate(struct nvfx_context *nvfx, unsigned prepare_result) //printf("rendering to bo %p [%i] at offset %i with pitch %i\n", rt0->bo, rt0->bo->handle, rt0->offset, pitch); - OUT_RING(chan, RING_3D(NV30_3D_DMA_COLOR0, 1)); + BEGIN_RING(chan, eng3d, NV30_3D_DMA_COLOR0, 1); OUT_RELOC(chan, rt0->bo, 0, rt_flags | NOUVEAU_BO_OR, chan->vram->handle, chan->gart->handle); - OUT_RING(chan, RING_3D(NV30_3D_COLOR0_PITCH, 2)); + BEGIN_RING(chan, eng3d, NV30_3D_COLOR0_PITCH, 2); OUT_RING(chan, pitch); OUT_RELOC(chan, rt0->bo, rt0->offset, rt_flags | NOUVEAU_BO_LOW, @@ -214,11 +217,11 @@ nvfx_framebuffer_validate(struct nvfx_context *nvfx, unsigned prepare_result) } if (rt_enable & NV30_3D_RT_ENABLE_COLOR1) { - OUT_RING(chan, RING_3D(NV30_3D_DMA_COLOR1, 1)); + BEGIN_RING(chan, eng3d, NV30_3D_DMA_COLOR1, 1); OUT_RELOC(chan, nvfx->hw_rt[1].bo, 0, rt_flags | NOUVEAU_BO_OR, chan->vram->handle, chan->gart->handle); - OUT_RING(chan, RING_3D(NV30_3D_COLOR1_OFFSET, 2)); + BEGIN_RING(chan, eng3d, NV30_3D_COLOR1_OFFSET, 2); OUT_RELOC(chan, nvfx->hw_rt[1].bo, nvfx->hw_rt[1].offset, rt_flags | NOUVEAU_BO_LOW, 0, 0); @@ -228,68 +231,68 @@ nvfx_framebuffer_validate(struct nvfx_context *nvfx, unsigned prepare_result) if(nvfx->is_nv4x) { if (rt_enable & NV40_3D_RT_ENABLE_COLOR2) { - OUT_RING(chan, RING_3D(NV40_3D_DMA_COLOR2, 1)); + BEGIN_RING(chan, eng3d, NV40_3D_DMA_COLOR2, 1); OUT_RELOC(chan, nvfx->hw_rt[2].bo, 0, rt_flags | NOUVEAU_BO_OR, chan->vram->handle, chan->gart->handle); - OUT_RING(chan, RING_3D(NV40_3D_COLOR2_OFFSET, 1)); + BEGIN_RING(chan, eng3d, NV40_3D_COLOR2_OFFSET, 1); OUT_RELOC(chan, nvfx->hw_rt[2].bo, nvfx->hw_rt[2].offset, rt_flags | NOUVEAU_BO_LOW, 0, 0); - OUT_RING(chan, RING_3D(NV40_3D_COLOR2_PITCH, 1)); + BEGIN_RING(chan, eng3d, NV40_3D_COLOR2_PITCH, 1); OUT_RING(chan, nvfx->hw_rt[2].pitch); } if (rt_enable & NV40_3D_RT_ENABLE_COLOR3) { - OUT_RING(chan, RING_3D(NV40_3D_DMA_COLOR3, 1)); + BEGIN_RING(chan, eng3d, NV40_3D_DMA_COLOR3, 1); OUT_RELOC(chan, nvfx->hw_rt[3].bo, 0, rt_flags | NOUVEAU_BO_OR, chan->vram->handle, chan->gart->handle); - OUT_RING(chan, RING_3D(NV40_3D_COLOR3_OFFSET, 1)); + BEGIN_RING(chan, eng3d, NV40_3D_COLOR3_OFFSET, 1); OUT_RELOC(chan, nvfx->hw_rt[3].bo, nvfx->hw_rt[3].offset, rt_flags | NOUVEAU_BO_LOW, 0, 0); - OUT_RING(chan, RING_3D(NV40_3D_COLOR3_PITCH, 1)); + BEGIN_RING(chan, eng3d, NV40_3D_COLOR3_PITCH, 1); OUT_RING(chan, nvfx->hw_rt[3].pitch); } } if (fb->zsbuf) { - OUT_RING(chan, RING_3D(NV30_3D_DMA_ZETA, 1)); + BEGIN_RING(chan, eng3d, NV30_3D_DMA_ZETA, 1); OUT_RELOC(chan, nvfx->hw_zeta.bo, 0, rt_flags | NOUVEAU_BO_OR, chan->vram->handle, chan->gart->handle); - OUT_RING(chan, RING_3D(NV30_3D_ZETA_OFFSET, 1)); + BEGIN_RING(chan, eng3d, NV30_3D_ZETA_OFFSET, 1); /* TODO: reverse engineer LMA */ OUT_RELOC(chan, nvfx->hw_zeta.bo, nvfx->hw_zeta.offset, rt_flags | NOUVEAU_BO_LOW, 0, 0); if(nvfx->is_nv4x) { - OUT_RING(chan, RING_3D(NV40_3D_ZETA_PITCH, 1)); + BEGIN_RING(chan, eng3d, NV40_3D_ZETA_PITCH, 1); OUT_RING(chan, nvfx->hw_zeta.pitch); } } else if(nvfx->is_nv4x) { - OUT_RING(chan, RING_3D(NV40_3D_ZETA_PITCH, 1)); + BEGIN_RING(chan, eng3d, NV40_3D_ZETA_PITCH, 1); OUT_RING(chan, 64); } - OUT_RING(chan, RING_3D(NV30_3D_RT_ENABLE, 1)); + BEGIN_RING(chan, eng3d, NV30_3D_RT_ENABLE, 1); OUT_RING(chan, rt_enable); - OUT_RING(chan, RING_3D(NV30_3D_RT_HORIZ, 3)); + BEGIN_RING(chan, eng3d, NV30_3D_RT_HORIZ, 3); OUT_RING(chan, (w << 16) | 0); OUT_RING(chan, (h << 16) | 0); OUT_RING(chan, rt_format); - OUT_RING(chan, RING_3D(NV30_3D_VIEWPORT_HORIZ, 2)); + BEGIN_RING(chan, eng3d, NV30_3D_VIEWPORT_HORIZ, 2); OUT_RING(chan, (w << 16) | 0); OUT_RING(chan, (h << 16) | 0); - OUT_RING(chan, RING_3D(NV30_3D_VIEWPORT_CLIP_HORIZ(0), 2)); + BEGIN_RING(chan, eng3d, NV30_3D_VIEWPORT_CLIP_HORIZ(0), 2); OUT_RING(chan, ((w - 1) << 16) | 0); OUT_RING(chan, ((h - 1) << 16) | 0); if(!nvfx->is_nv4x) { /* Wonder why this is needed, context should all be set to zero on init */ /* TODO: we can most likely remove this, after putting it in context init */ - OUT_RING(chan, RING_3D(NV30_3D_VIEWPORT_TX_ORIGIN, 1)); + BEGIN_RING(chan, eng3d, NV30_3D_VIEWPORT_TX_ORIGIN, 1); OUT_RING(chan, 0); } nvfx->relocs_needed &=~ NVFX_RELOCATE_FRAMEBUFFER; diff --git a/src/gallium/drivers/nvfx/nvfx_surface.c b/src/gallium/drivers/nvfx/nvfx_surface.c index 70adebc1be5..be31853d717 100644 --- a/src/gallium/drivers/nvfx/nvfx_surface.c +++ b/src/gallium/drivers/nvfx/nvfx_surface.c @@ -99,17 +99,17 @@ nvfx_region_init_for_surface(struct nv04_region* rgn, struct nvfx_surface* surf, util_dirty_surface_set_dirty(nvfx_surface_get_dirty_surfaces(&surf->base.base), &surf->base); } else { rgn->bo = ((struct nvfx_resource*)surf->base.base.texture)->bo; - rgn->offset = surf->base.base.offset; + rgn->offset = surf->offset; if(surf->base.base.texture->flags & NVFX_RESOURCE_FLAG_LINEAR) rgn->pitch = surf->pitch; else { rgn->pitch = 0; - rgn->z = surf->base.base.zslice; + rgn->z = surf->base.base.u.tex.first_layer; rgn->w = surf->base.base.width; rgn->h = surf->base.base.height; - rgn->d = u_minify(surf->base.base.texture->depth0, surf->base.base.level); + rgn->d = u_minify(surf->base.base.texture->depth0, surf->base.base.u.tex.level); } } @@ -119,11 +119,11 @@ nvfx_region_init_for_surface(struct nv04_region* rgn, struct nvfx_surface* surf, } static INLINE void -nvfx_region_init_for_subresource(struct nv04_region* rgn, struct pipe_resource* pt, struct pipe_subresource sub, unsigned x, unsigned y, unsigned z, bool for_write) +nvfx_region_init_for_subresource(struct nv04_region* rgn, struct pipe_resource* pt, unsigned level, unsigned x, unsigned y, unsigned z, bool for_write) { if(pt->target != PIPE_BUFFER) { - struct nvfx_surface* ns = (struct nvfx_surface*)util_surfaces_peek(&((struct nvfx_miptree*)pt)->surfaces, pt, sub.face, sub.level, z); + struct nvfx_surface* ns = (struct nvfx_surface*)util_surfaces_peek(&((struct nvfx_miptree*)pt)->surfaces, pt, level, z); if(ns && util_dirty_surface_is_dirty(&ns->base)) { nvfx_region_init_for_surface(rgn, ns, x, y, for_write); @@ -132,22 +132,22 @@ nvfx_region_init_for_subresource(struct nv04_region* rgn, struct pipe_resource* } rgn->bo = ((struct nvfx_resource*)pt)->bo; - rgn->offset = nvfx_subresource_offset(pt, sub.face, sub.level, z); + rgn->offset = nvfx_subresource_offset(pt, z, level, z); rgn->x = x; rgn->y = y; if(pt->flags & NVFX_RESOURCE_FLAG_LINEAR) { - rgn->pitch = nvfx_subresource_pitch(pt, sub.level); + rgn->pitch = nvfx_subresource_pitch(pt, level); rgn->z = 0; } else { rgn->pitch = 0; rgn->z = z; - rgn->w = u_minify(pt->width0, sub.level); - rgn->h = u_minify(pt->height0, sub.level); - rgn->d = u_minify(pt->depth0, sub.level); + rgn->w = u_minify(pt->width0, level); + rgn->h = u_minify(pt->height0, level); + rgn->d = u_minify(pt->depth0, level); } nvfx_region_set_format(rgn, pt->format); @@ -168,8 +168,8 @@ nvfx_get_blitter(struct pipe_context* pipe, int copy) if(nvfx->query && !nvfx->blitters_in_use) { struct nouveau_channel* chan = nvfx->screen->base.channel; - WAIT_RING(chan, 2); - OUT_RING(chan, RING_3D(NV30_3D_QUERY_ENABLE, 1)); + struct nouveau_grobj *eng3d = nvfx->screen->eng3d; + BEGIN_RING(chan, eng3d, NV30_3D_QUERY_ENABLE, 1); OUT_RING(chan, 0); } @@ -209,8 +209,8 @@ nvfx_put_blitter(struct pipe_context* pipe, struct blitter_context* blitter) if(nvfx->query && !nvfx->blitters_in_use) { struct nouveau_channel* chan = nvfx->screen->base.channel; - WAIT_RING(chan, 2); - OUT_RING(chan, RING_3D(NV30_3D_QUERY_ENABLE, 1)); + struct nouveau_grobj *eng3d = nvfx->screen->eng3d; + BEGIN_RING(chan, eng3d, NV30_3D_QUERY_ENABLE, 1); OUT_RING(chan, 1); } } @@ -234,11 +234,10 @@ nvfx_region_clone(struct nv04_2d_context* ctx, struct nv04_region* rgn, unsigned static void nvfx_resource_copy_region(struct pipe_context *pipe, - struct pipe_resource *dstr, struct pipe_subresource subdst, - unsigned dstx, unsigned dsty, unsigned dstz, - struct pipe_resource *srcr, struct pipe_subresource subsrc, - unsigned srcx, unsigned srcy, unsigned srcz, - unsigned w, unsigned h) + struct pipe_resource *dstr, unsigned dst_level, + unsigned dstx, unsigned dsty, unsigned dstz, + struct pipe_resource *srcr, unsigned src_level, + const struct pipe_box *src_box) { static int copy_threshold = -1; struct nv04_2d_context *ctx = nvfx_screen(pipe->screen)->eng2d; @@ -247,6 +246,8 @@ nvfx_resource_copy_region(struct pipe_context *pipe, int src_on_gpu; boolean small; int ret; + unsigned w = src_box->width; + unsigned h = src_box->height; if(!w || !h) return; @@ -257,8 +258,8 @@ nvfx_resource_copy_region(struct pipe_context *pipe, dst_to_gpu = dstr->usage != PIPE_USAGE_DYNAMIC && dstr->usage != PIPE_USAGE_STAGING; src_on_gpu = nvfx_resource_on_gpu(srcr); - nvfx_region_init_for_subresource(&dst, dstr, subdst, dstx, dsty, dstz, TRUE); - nvfx_region_init_for_subresource(&src, srcr, subsrc, srcx, srcy, srcz, FALSE); + nvfx_region_init_for_subresource(&dst, dstr, dst_level, dstx, dsty, dstz, TRUE); + nvfx_region_init_for_subresource(&src, srcr, src_level, src_box->x, src_box->y, src_box->z, FALSE); w = util_format_get_stride(dstr->format, w) >> dst.bpps; h = util_format_get_nblocksy(dstr->format, h); @@ -279,7 +280,7 @@ nvfx_resource_copy_region(struct pipe_context *pipe, * TODO: perhaps support reinterpreting the formats */ struct blitter_context* blitter = nvfx_get_blitter(pipe, 1); - util_blitter_copy_region(blitter, dstr, subdst, dstx, dsty, dstz, srcr, subsrc, srcx, srcy, srcz, w, h, TRUE); + util_blitter_copy_region(blitter, dstr, dst_level, dstx, dsty, dstz, srcr, src_level, src_box, TRUE); nvfx_put_blitter(pipe, blitter); } else @@ -371,7 +372,7 @@ static void nvfx_surface_copy_temp(struct pipe_context* pipe, struct pipe_surface* surf, int to_temp) { struct nvfx_surface* ns = (struct nvfx_surface*)surf; - struct pipe_subresource tempsr, surfsr; + struct pipe_box box; struct nvfx_context* nvfx = nvfx_context(pipe); struct nvfx_miptree* temp; unsigned use_vertex_buffers; @@ -387,15 +388,20 @@ nvfx_surface_copy_temp(struct pipe_context* pipe, struct pipe_surface* surf, int use_index_buffer = nvfx->use_index_buffer; base_vertex = nvfx->base_vertex; - tempsr.face = 0; - tempsr.level = 0; - surfsr.face = surf->face; - surfsr.level = surf->level; + box.x = box.y = 0; + assert(surf->u.tex.first_layer == surf->u.tex.last_layer); + box.width = surf->width; + box.height = surf->height; + box.depth = 1; - if(to_temp) - nvfx_resource_copy_region(pipe, &temp->base.base, tempsr, 0, 0, 0, surf->texture, surfsr, 0, 0, surf->zslice, surf->width, surf->height); - else - nvfx_resource_copy_region(pipe, surf->texture, surfsr, 0, 0, surf->zslice, &temp->base.base, tempsr, 0, 0, 0, surf->width, surf->height); + if(to_temp) { + box.z = surf->u.tex.first_layer; + nvfx_resource_copy_region(pipe, &temp->base.base, 0, 0, 0, 0, surf->texture, surf->u.tex.level, &box); + } + else { + box.z = 0; + nvfx_resource_copy_region(pipe, surf->texture, surf->u.tex.level, 0, 0, surf->u.tex.first_layer, &temp->base.base, 0, &box); + } /* If this triggers, it probably means we attempted to use the blitter * but failed due to non-renderability of the target. diff --git a/src/gallium/drivers/nvfx/nvfx_transfer.c b/src/gallium/drivers/nvfx/nvfx_transfer.c index 7cb47a20f64..2debcb6eb8f 100644 --- a/src/gallium/drivers/nvfx/nvfx_transfer.c +++ b/src/gallium/drivers/nvfx/nvfx_transfer.c @@ -21,10 +21,10 @@ struct nvfx_staging_transfer struct pipe_transfer * nvfx_transfer_new(struct pipe_context *pipe, - struct pipe_resource *pt, - struct pipe_subresource sr, - unsigned usage, - const struct pipe_box *box) + struct pipe_resource *pt, + unsigned level, + unsigned usage, + const struct pipe_box *box) { if((usage & (PIPE_TRANSFER_UNSYNCHRONIZED | PIPE_TRANSFER_DONTBLOCK)) == PIPE_TRANSFER_DONTBLOCK) { @@ -44,11 +44,11 @@ nvfx_transfer_new(struct pipe_context *pipe, return NULL; pipe_resource_reference(&tx->resource, pt); - tx->sr = sr; + tx->level = level; tx->usage = usage; tx->box = *box; - tx->slice_stride = tx->stride = util_format_get_stride(pt->format, box->width); + tx->layer_stride = tx->stride = util_format_get_stride(pt->format, box->width); tx->data = buffer->data + util_format_get_stride(pt->format, box->x); return tx; @@ -62,20 +62,20 @@ nvfx_transfer_new(struct pipe_context *pipe, if(!tx) return NULL; - util_staging_transfer_init(pipe, pt, sr, usage, box, direct, &tx->base); + util_staging_transfer_init(pipe, pt, level, usage, box, direct, &tx->base); if(direct) { - tx->base.base.stride = nvfx_subresource_pitch(pt, sr.level); - tx->base.base.slice_stride = tx->base.base.stride * u_minify(pt->height0, sr.level); - tx->offset = nvfx_subresource_offset(pt, sr.face, sr.level, box->z) + tx->base.base.stride = nvfx_subresource_pitch(pt, level); + tx->base.base.layer_stride = tx->base.base.stride * u_minify(pt->height0, level); + tx->offset = nvfx_subresource_offset(pt, box->z, level, box->z) + util_format_get_2d_size(pt->format, tx->base.base.stride, box->y) + util_format_get_stride(pt->format, box->x); } else { tx->base.base.stride = nvfx_subresource_pitch(tx->base.staging_resource, 0); - tx->base.base.slice_stride = tx->base.base.stride * tx->base.staging_resource->height0; + tx->base.base.layer_stride = tx->base.base.stride * tx->base.staging_resource->height0; tx->offset = 0; } @@ -187,7 +187,7 @@ nvfx_transfer_unmap(struct pipe_context *pipe, struct pipe_transfer *ptx) static void nvfx_transfer_inline_write( struct pipe_context *pipe, struct pipe_resource *pr, - struct pipe_subresource sr, + unsigned level, unsigned usage, const struct pipe_box *box, const void *data, @@ -196,7 +196,7 @@ static void nvfx_transfer_inline_write( struct pipe_context *pipe, { if(pr->target != PIPE_BUFFER) { - u_default_transfer_inline_write(pipe, pr, sr, usage, box, data, stride, slice_stride); + u_default_transfer_inline_write(pipe, pr, level, usage, box, data, stride, slice_stride); } else { diff --git a/src/gallium/drivers/nvfx/nvfx_transfer.h b/src/gallium/drivers/nvfx/nvfx_transfer.h index 20f20d5b0b8..682f428b793 100644 --- a/src/gallium/drivers/nvfx/nvfx_transfer.h +++ b/src/gallium/drivers/nvfx/nvfx_transfer.h @@ -9,7 +9,7 @@ struct pipe_transfer * nvfx_transfer_new(struct pipe_context *pcontext, struct pipe_resource *pt, - struct pipe_subresource sr, + unsigned level, unsigned usage, const struct pipe_box *box); diff --git a/src/gallium/drivers/nvfx/nvfx_vbo.c b/src/gallium/drivers/nvfx/nvfx_vbo.c index 597664e7716..01dacb43dad 100644 --- a/src/gallium/drivers/nvfx/nvfx_vbo.c +++ b/src/gallium/drivers/nvfx/nvfx_vbo.c @@ -9,8 +9,7 @@ #include "nvfx_resource.h" #include "nouveau/nouveau_channel.h" - -#include "nouveau/nouveau_pushbuf.h" +#include "nouveau/nv04_pushbuf.h" static inline unsigned util_guess_unique_indices_count(unsigned mode, unsigned indices) @@ -247,6 +246,7 @@ boolean nvfx_vbo_validate(struct nvfx_context *nvfx) { struct nouveau_channel* chan = nvfx->screen->base.channel; + struct nouveau_grobj *eng3d = nvfx->screen->eng3d; int i; int elements = MAX2(nvfx->vtxelt->num_elements, nvfx->hw_vtxelt_nr); unsigned vb_flags = nvfx->screen->vertex_buffer_reloc_flags | NOUVEAU_BO_RD; @@ -262,11 +262,11 @@ nvfx_vbo_validate(struct nvfx_context *nvfx) struct nvfx_buffer* buffer = nvfx_buffer(vb->buffer); float v[4]; ve->fetch_rgba_float(v, buffer->data + vb->buffer_offset + ve->src_offset, 0, 0); - nvfx_emit_vtx_attr(chan, ve->idx, v, ve->ncomp); + nvfx_emit_vtx_attr(chan, eng3d, ve->idx, v, ve->ncomp); } - OUT_RING(chan, RING_3D(NV30_3D_VTXFMT(0), elements)); + BEGIN_RING(chan, eng3d, NV30_3D_VTXFMT(0), elements); if(nvfx->use_vertex_buffers) { unsigned idx = 0; @@ -297,12 +297,12 @@ nvfx_vbo_validate(struct nvfx_context *nvfx) unsigned i; /* seems to be some kind of cache flushing */ for(i = 0; i < 3; ++i) { - OUT_RING(chan, RING_3D(0x1718, 1)); + BEGIN_RING(chan, eng3d, 0x1718, 1); OUT_RING(chan, 0); } } - OUT_RING(chan, RING_3D(NV30_3D_VTXBUF(0), elements)); + BEGIN_RING(chan, eng3d, NV30_3D_VTXBUF(0), elements); if(nvfx->use_vertex_buffers) { unsigned idx = 0; @@ -330,7 +330,7 @@ nvfx_vbo_validate(struct nvfx_context *nvfx) OUT_RING(chan, 0); } - OUT_RING(chan, RING_3D(0x1710, 1)); + BEGIN_RING(chan, eng3d, 0x1710, 1); OUT_RING(chan, 0); nvfx->hw_vtxelt_nr = nvfx->vtxelt->num_elements; @@ -342,15 +342,14 @@ void nvfx_vbo_swtnl_validate(struct nvfx_context *nvfx) { struct nouveau_channel* chan = nvfx->screen->base.channel; + struct nouveau_grobj *eng3d = nvfx->screen->eng3d; unsigned num_outputs = nvfx->vertprog->draw_elements; int elements = MAX2(num_outputs, nvfx->hw_vtxelt_nr); if (!elements) return; - WAIT_RING(chan, (1 + 6 + 1 + 2) + elements * 2); - - OUT_RING(chan, RING_3D(NV30_3D_VTXFMT(0), elements)); + BEGIN_RING(chan, eng3d, NV30_3D_VTXFMT(0), elements); for(unsigned i = 0; i < num_outputs; ++i) OUT_RING(chan, (4 << NV30_3D_VTXFMT_SIZE__SHIFT) | NV30_3D_VTXFMT_TYPE_V32_FLOAT); for(unsigned i = num_outputs; i < elements; ++i) @@ -360,16 +359,16 @@ nvfx_vbo_swtnl_validate(struct nvfx_context *nvfx) unsigned i; /* seems to be some kind of cache flushing */ for(i = 0; i < 3; ++i) { - OUT_RING(chan, RING_3D(0x1718, 1)); + BEGIN_RING(chan, eng3d, 0x1718, 1); OUT_RING(chan, 0); } } - OUT_RING(chan, RING_3D(NV30_3D_VTXBUF(0), elements)); + BEGIN_RING(chan, eng3d, NV30_3D_VTXBUF(0), elements); for (unsigned i = 0; i < elements; i++) OUT_RING(chan, 0); - OUT_RING(chan, RING_3D(0x1710, 1)); + BEGIN_RING(chan, eng3d, 0x1710, 1); OUT_RING(chan, 0); nvfx->hw_vtxelt_nr = num_outputs; @@ -592,18 +591,10 @@ nvfx_set_vertex_buffers(struct pipe_context *pipe, unsigned count, { struct nvfx_context *nvfx = nvfx_context(pipe); - for(unsigned i = 0; i < count; ++i) - { - pipe_resource_reference(&nvfx->vtxbuf[i].buffer, vb[i].buffer); - nvfx->vtxbuf[i].buffer_offset = vb[i].buffer_offset; - nvfx->vtxbuf[i].max_index = vb[i].max_index; - nvfx->vtxbuf[i].stride = vb[i].stride; - } - - for(unsigned i = count; i < nvfx->vtxbuf_nr; ++i) - pipe_resource_reference(&nvfx->vtxbuf[i].buffer, 0); + util_copy_vertex_buffers(nvfx->vtxbuf, + &nvfx->vtxbuf_nr, + vb, count); - nvfx->vtxbuf_nr = count; nvfx->use_vertex_buffers = -1; nvfx->draw_dirty |= NVFX_NEW_ARRAYS; } diff --git a/src/gallium/drivers/nvfx/nvfx_vertprog.c b/src/gallium/drivers/nvfx/nvfx_vertprog.c index e543fda50ef..a11941f3d51 100644 --- a/src/gallium/drivers/nvfx/nvfx_vertprog.c +++ b/src/gallium/drivers/nvfx/nvfx_vertprog.c @@ -1182,6 +1182,7 @@ nvfx_vertprog_validate(struct nvfx_context *nvfx) { struct nvfx_screen *screen = nvfx->screen; struct nouveau_channel *chan = screen->base.channel; + struct nouveau_grobj *eng3d = screen->eng3d; struct nvfx_pipe_vertex_program *pvp = nvfx->vertprog; struct nvfx_vertex_program* vp; struct pipe_resource *constbuf; @@ -1341,7 +1342,6 @@ nvfx_vertprog_validate(struct nvfx_context *nvfx) } */ - WAIT_RING(chan, 6 * vp->nr_consts); for (i = nvfx->use_vp_clipping ? 6 : 0; i < vp->nr_consts; i++) { struct nvfx_vertex_program_data *vpd = &vp->consts[i]; @@ -1356,7 +1356,7 @@ nvfx_vertprog_validate(struct nvfx_context *nvfx) //printf("upload into %i + %i: %f %f %f %f\n", vp->data->start, i, vpd->value[0], vpd->value[1], vpd->value[2], vpd->value[3]); - OUT_RING(chan, RING_3D(NV30_3D_VP_UPLOAD_CONST_ID, 5)); + BEGIN_RING(chan, eng3d, NV30_3D_VP_UPLOAD_CONST_ID, 5); OUT_RING(chan, i + vp->data->start); OUT_RINGp(chan, (uint32_t *)vpd->value, 4); } @@ -1364,11 +1364,10 @@ nvfx_vertprog_validate(struct nvfx_context *nvfx) /* Upload vtxprog */ if (upload_code) { - WAIT_RING(chan, 2 + 5 * vp->nr_insns); - OUT_RING(chan, RING_3D(NV30_3D_VP_UPLOAD_FROM_ID, 1)); + BEGIN_RING(chan, eng3d, NV30_3D_VP_UPLOAD_FROM_ID, 1); OUT_RING(chan, vp->exec->start); for (i = 0; i < vp->nr_insns; i++) { - OUT_RING(chan, RING_3D(NV30_3D_VP_UPLOAD_INST(0), 4)); + BEGIN_RING(chan, eng3d, NV30_3D_VP_UPLOAD_INST(0), 4); //printf("%08x %08x %08x %08x\n", vp->insns[i].data[0], vp->insns[i].data[1], vp->insns[i].data[2], vp->insns[i].data[3]); OUT_RINGp(chan, vp->insns[i].data, 4); } @@ -1377,11 +1376,10 @@ nvfx_vertprog_validate(struct nvfx_context *nvfx) if(nvfx->dirty & (NVFX_NEW_VERTPROG)) { - WAIT_RING(chan, 6); - OUT_RING(chan, RING_3D(NV30_3D_VP_START_FROM_ID, 1)); + BEGIN_RING(chan, eng3d, NV30_3D_VP_START_FROM_ID, 1); OUT_RING(chan, vp->exec->start); if(nvfx->is_nv4x) { - OUT_RING(chan, RING_3D(NV40_3D_VP_ATTRIB_EN, 1)); + BEGIN_RING(chan, eng3d, NV40_3D_VP_ATTRIB_EN, 1); OUT_RING(chan, vp->ir); } } diff --git a/src/gallium/drivers/r300/r300_blit.c b/src/gallium/drivers/r300/r300_blit.c index 0ac4e4c6f12..a43e83c0d36 100644 --- a/src/gallium/drivers/r300/r300_blit.c +++ b/src/gallium/drivers/r300/r300_blit.c @@ -179,6 +179,12 @@ static void r300_clear(struct pipe_context* pipe, boolean can_hyperz = r300->rws->get_value(r300->rws, R300_CAN_HYPERZ); uint32_t hyperz_dcv = hyperz->zb_depthclearvalue; + /* Decompress zbuffers that are bound as textures. If we didn't flush here, + * it would happen inside the blitter when updating derived state, + * causing a blitter operation to be called from inside the blitter, + * which would overwrite saved states and they would never get restored. */ + r300_flush_depth_textures(r300); + /* Enable fast Z clear. * The zbuffer must be in micro-tiled mode, otherwise it locks up. */ if ((buffers & PIPE_CLEAR_DEPTHSTENCIL) && can_hyperz) { @@ -186,12 +192,12 @@ static void r300_clear(struct pipe_context* pipe, r300_depth_clear_value(fb->zsbuf->format, depth, stencil); r300_mark_fb_state_dirty(r300, R300_CHANGED_ZCLEAR_FLAG); - if (zstex->zmask_mem[fb->zsbuf->level]) { - r300->zmask_clear.dirty = TRUE; + if (zstex->zmask_mem[fb->zsbuf->u.tex.level]) { + r300_mark_atom_dirty(r300, &r300->zmask_clear); buffers &= ~PIPE_CLEAR_DEPTHSTENCIL; } - if (zstex->hiz_mem[fb->zsbuf->level]) - r300->hiz_clear.dirty = TRUE; + if (zstex->hiz_mem[fb->zsbuf->u.tex.level]) + r300_mark_atom_dirty(r300, &r300->hiz_clear); } /* Enable CBZB clear. */ @@ -230,7 +236,7 @@ static void r300_clear(struct pipe_context* pipe, r300_get_num_cs_end_dwords(r300); /* Reserve CS space. */ - if (dwords > (r300->cs->ndw - r300->cs->cdw)) { + if (dwords > (R300_MAX_CMDBUF_DWORDS - r300->cs->cdw)) { r300->context.flush(&r300->context, 0, NULL); } @@ -259,9 +265,9 @@ static void r300_clear(struct pipe_context* pipe, * If we cleared zmask/hiz, it's in use now. The Hyper-Z state update * looks if zmask/hiz is in use and enables fastfill accordingly. */ if (zstex && - (zstex->zmask_in_use[fb->zsbuf->level] || - zstex->hiz_in_use[fb->zsbuf->level])) { - r300->hyperz_state.dirty = TRUE; + (zstex->zmask_in_use[fb->zsbuf->u.tex.level] || + zstex->hiz_in_use[fb->zsbuf->u.tex.level])) { + r300_mark_atom_dirty(r300, &r300->hyperz_state); } } @@ -274,6 +280,12 @@ static void r300_clear_render_target(struct pipe_context *pipe, { struct r300_context *r300 = r300_context(pipe); + /* Decompress zbuffers that are bound as textures. If we didn't flush here, + * it would happen inside the blitter when updating derived state, + * causing a blitter operation to be called from inside the blitter, + * which would overwrite saved states and they would never get restored. */ + r300_flush_depth_textures(r300); + r300_blitter_begin(r300, R300_CLEAR_SURFACE); util_blitter_clear_render_target(r300->blitter, dst, rgba, dstx, dsty, width, height); @@ -291,6 +303,12 @@ static void r300_clear_depth_stencil(struct pipe_context *pipe, { struct r300_context *r300 = r300_context(pipe); + /* Decompress zbuffers that are bound as textures. If we didn't flush here, + * it would happen inside the blitter when updating derived state, + * causing a blitter operation to be called from inside the blitter, + * which would overwrite saved states and they would never get restored. */ + r300_flush_depth_textures(r300); + r300_blitter_begin(r300, R300_CLEAR_SURFACE); util_blitter_clear_depth_stencil(r300->blitter, dst, clear_flags, depth, stencil, dstx, dsty, width, height); @@ -298,64 +316,103 @@ static void r300_clear_depth_stencil(struct pipe_context *pipe, } /* Flush a depth stencil buffer. */ -void r300_flush_depth_stencil(struct pipe_context *pipe, - struct pipe_resource *dst, - struct pipe_subresource subdst, - unsigned zslice) +static void r300_flush_depth_stencil(struct pipe_context *pipe, + struct pipe_resource *dst, + unsigned level, + unsigned layer) { struct r300_context *r300 = r300_context(pipe); - struct pipe_surface *dstsurf; + struct pipe_surface *dstsurf, surf_tmpl; struct r300_texture *tex = r300_texture(dst); - if (!tex->zmask_mem[subdst.level]) + if (!tex->zmask_mem[level]) return; - if (!tex->zmask_in_use[subdst.level]) + if (!tex->zmask_in_use[level]) return; - dstsurf = pipe->screen->get_tex_surface(pipe->screen, dst, - subdst.face, subdst.level, zslice, - PIPE_BIND_DEPTH_STENCIL); + surf_tmpl.format = dst->format; + surf_tmpl.usage = PIPE_BIND_DEPTH_STENCIL; + surf_tmpl.u.tex.level = level; + surf_tmpl.u.tex.first_layer = layer; + surf_tmpl.u.tex.last_layer = layer; + dstsurf = pipe->create_surface(pipe, dst, &surf_tmpl); + r300->z_decomp_rd = TRUE; + r300_blitter_begin(r300, R300_CLEAR_SURFACE); util_blitter_flush_depth_stencil(r300->blitter, dstsurf); r300_blitter_end(r300); r300->z_decomp_rd = FALSE; - tex->zmask_in_use[subdst.level] = FALSE; + tex->zmask_in_use[level] = FALSE; + pipe_surface_reference(&dstsurf, NULL); +} + +/* We can't use compressed zbuffers as samplers. */ +void r300_flush_depth_textures(struct r300_context *r300) +{ + struct r300_textures_state *state = + (struct r300_textures_state*)r300->textures_state.state; + unsigned i, level; + unsigned count = MIN2(state->sampler_view_count, + state->sampler_state_count); + + if (r300->z_decomp_rd) + return; + + for (i = 0; i < count; i++) + if (state->sampler_views[i] && state->sampler_states[i]) { + struct pipe_resource *tex = state->sampler_views[i]->base.texture; + + if (tex->target == PIPE_TEXTURE_3D || + tex->target == PIPE_TEXTURE_CUBE) + continue; + + /* Ignore non-depth textures. + * Also ignore reinterpreted depth textures, e.g. resource_copy. */ + if (!util_format_is_depth_or_stencil(tex->format)) + continue; + + for (level = 0; level <= tex->last_level; level++) + if (r300_texture(tex)->zmask_in_use[level]) { + /* We don't handle 3D textures and cubemaps yet. */ + r300_flush_depth_stencil(&r300->context, tex, level, 0); + } + } } /* Copy a block of pixels from one surface to another using HW. */ static void r300_hw_copy_region(struct pipe_context* pipe, struct pipe_resource *dst, - struct pipe_subresource subdst, + unsigned dst_level, unsigned dstx, unsigned dsty, unsigned dstz, struct pipe_resource *src, - struct pipe_subresource subsrc, - unsigned srcx, unsigned srcy, unsigned srcz, - unsigned width, unsigned height) + unsigned src_level, + const struct pipe_box *src_box) { struct r300_context* r300 = r300_context(pipe); r300_blitter_begin(r300, R300_COPY); - util_blitter_copy_region(r300->blitter, dst, subdst, dstx, dsty, dstz, - src, subsrc, srcx, srcy, srcz, width, height, - TRUE); + + /* Do a copy */ + util_blitter_copy_region(r300->blitter, dst, dst_level, dstx, dsty, dstz, + src, src_level, src_box, TRUE); r300_blitter_end(r300); } /* Copy a block of pixels from one surface to another. */ static void r300_resource_copy_region(struct pipe_context *pipe, struct pipe_resource *dst, - struct pipe_subresource subdst, + unsigned dst_level, unsigned dstx, unsigned dsty, unsigned dstz, struct pipe_resource *src, - struct pipe_subresource subsrc, - unsigned srcx, unsigned srcy, unsigned srcz, - unsigned width, unsigned height) + unsigned src_level, + const struct pipe_box *src_box) { enum pipe_format old_format = dst->format; enum pipe_format new_format = old_format; boolean is_depth; + if (!pipe->screen->is_format_supported(pipe->screen, old_format, src->target, src->nr_samples, @@ -384,8 +441,9 @@ static void r300_resource_copy_region(struct pipe_context *pipe, is_depth = util_format_get_component_bits(src->format, UTIL_FORMAT_COLORSPACE_ZS, 0) != 0; if (is_depth) { - r300_flush_depth_stencil(pipe, src, subsrc, srcz); + r300_flush_depth_stencil(pipe, src, src_level, src_box->z); } + if (old_format != new_format) { r300_texture_reinterpret_format(pipe->screen, dst, new_format); @@ -393,8 +451,8 @@ static void r300_resource_copy_region(struct pipe_context *pipe, src, new_format); } - r300_hw_copy_region(pipe, dst, subdst, dstx, dsty, dstz, - src, subsrc, srcx, srcy, srcz, width, height); + r300_hw_copy_region(pipe, dst, dst_level, dstx, dsty, dstz, + src, src_level, src_box); if (old_format != new_format) { r300_texture_reinterpret_format(pipe->screen, diff --git a/src/gallium/drivers/r300/r300_chipset.c b/src/gallium/drivers/r300/r300_chipset.c index 48c24092114..2b183f62c56 100644 --- a/src/gallium/drivers/r300/r300_chipset.c +++ b/src/gallium/drivers/r300/r300_chipset.c @@ -366,7 +366,7 @@ void r300_parse_chipset(struct r300_capabilities* caps) caps->family = CHIP_FAMILY_RV530; caps->num_vert_fpus = 5; caps->is_r500 = TRUE; - /*caps->hiz_ram = RV530_HIZ_LIMIT;*/ + caps->hiz_ram = RV530_HIZ_LIMIT; caps->zmask_ram = PIPE_ZMASK_SIZE; break; @@ -424,4 +424,5 @@ void r300_parse_chipset(struct r300_capabilities* caps) } caps->is_rv350 = caps->family >= CHIP_FAMILY_RV350; + caps->dxtc_swizzle = caps->is_r400 || caps->is_r500; } diff --git a/src/gallium/drivers/r300/r300_chipset.h b/src/gallium/drivers/r300/r300_chipset.h index e7ca642b4f3..f2035d20092 100644 --- a/src/gallium/drivers/r300/r300_chipset.h +++ b/src/gallium/drivers/r300/r300_chipset.h @@ -79,6 +79,10 @@ struct r300_capabilities { boolean is_r500; /* Whether or not the second pixel pipe is accessed with the high bit */ boolean high_second_pipe; + /* DXTC texture swizzling. */ + boolean dxtc_swizzle; + /* Index bias (AKA index offset). */ + boolean index_bias_supported; }; /* Enumerations for legibility and telling which card we're running on. */ diff --git a/src/gallium/drivers/r300/r300_context.c b/src/gallium/drivers/r300/r300_context.c index e8c09b214af..91263ad7bcd 100644 --- a/src/gallium/drivers/r300/r300_context.c +++ b/src/gallium/drivers/r300/r300_context.c @@ -35,7 +35,9 @@ #include "r300_screen_buffer.h" #include "r300_winsys.h" -#include <inttypes.h> +#ifdef HAVE_LLVM +#include "gallivm/lp_bld_init.h" +#endif static void r300_update_num_contexts(struct r300_screen *r300screen, int diff) @@ -44,14 +46,14 @@ static void r300_update_num_contexts(struct r300_screen *r300screen, p_atomic_inc(&r300screen->num_contexts); if (r300screen->num_contexts > 1) - util_mempool_set_thread_safety(&r300screen->pool_buffers, - UTIL_MEMPOOL_MULTITHREADED); + util_slab_set_thread_safety(&r300screen->pool_buffers, + UTIL_SLAB_MULTITHREADED); } else { p_atomic_dec(&r300screen->num_contexts); if (r300screen->num_contexts <= 1) - util_mempool_set_thread_safety(&r300screen->pool_buffers, - UTIL_MEMPOOL_SINGLETHREADED); + util_slab_set_thread_safety(&r300screen->pool_buffers, + UTIL_SLAB_SINGLETHREADED); } } @@ -88,6 +90,7 @@ static void r300_release_referenced_objects(struct r300_context *r300) /* Vertex buffers. */ for (i = 0; i < r300->vertex_buffer_count; i++) { pipe_resource_reference(&r300->vertex_buffer[i].buffer, NULL); + pipe_resource_reference(&r300->valid_vertex_buffer[i], NULL); } /* If there are any queries pending or not destroyed, remove them now. */ @@ -100,21 +103,15 @@ static void r300_release_referenced_objects(struct r300_context *r300) static void r300_destroy_context(struct pipe_context* context) { struct r300_context* r300 = r300_context(context); - struct r300_atom *atom; if (r300->blitter) util_blitter_destroy(r300->blitter); - if (r300->draw) + if (r300->draw) { draw_destroy(r300->draw); - /* Print stats, if enabled. */ - if (SCREEN_DBG_ON(r300->screen, DBG_STATS)) { - fprintf(stderr, "r300: Stats for context %p:\n", r300); - fprintf(stderr, " : Flushes: %" PRIu64 "\n", r300->flush_counter); - foreach(atom, &r300->atom_list) { - fprintf(stderr, " : %s: %" PRIu64 " emits\n", - atom->name, atom->counter); - } +#ifdef HAVE_LLVM + gallivm_destroy(r300->gallivm); +#endif } if (r300->upload_vb) @@ -135,7 +132,7 @@ static void r300_destroy_context(struct pipe_context* context) r300->rws->cs_destroy(r300->cs); /* XXX: No way to tell if this was initialized or not? */ - util_mempool_destroy(&r300->pool_transfers); + util_slab_destroy(&r300->pool_transfers); r300_update_num_contexts(r300->screen, -1); @@ -177,10 +174,16 @@ void r300_flush_cb(void *data) r300->atomname.size = atomsize; \ r300->atomname.emit = r300_emit_##atomname; \ r300->atomname.dirty = FALSE; \ - insert_at_tail(&r300->atom_list, &r300->atomname); \ } while (0) -static void r300_setup_atoms(struct r300_context* r300) +#define R300_ALLOC_ATOM(atomname, statetype) \ +do { \ + r300->atomname.state = CALLOC_STRUCT(statetype); \ + if (r300->atomname.state == NULL) \ + return FALSE; \ +} while (0) + +static boolean r300_setup_atoms(struct r300_context* r300) { boolean is_rv350 = r300->screen->caps.is_rv350; boolean is_r500 = r300->screen->caps.is_r500; @@ -192,9 +195,6 @@ static void r300_setup_atoms(struct r300_context* r300) /* Create the actual atom list. * - * Each atom is examined and emitted in the order it appears here, which - * can affect performance and conformance if not handled with care. - * * Some atoms never change size, others change every emit - those have * the size of 0 here. * @@ -206,7 +206,6 @@ static void r300_setup_atoms(struct r300_context* r300) * - fb_state_pipelined (pipelined regs) * The motivation behind this is to be able to emit a strict * subset of the regs, and to have reasonable register ordering. */ - make_empty_list(&r300->atom_list); /* SC, GB (unpipelined), RB3D (unpipelined), ZB (unpipelined). */ R300_INIT_ATOM(gpu_flush, 9); R300_INIT_ATOM(aa_state, 4); @@ -261,23 +260,23 @@ static void r300_setup_atoms(struct r300_context* r300) } /* Some non-CSO atoms need explicit space to store the state locally. */ - r300->aa_state.state = CALLOC_STRUCT(r300_aa_state); - r300->blend_color_state.state = CALLOC_STRUCT(r300_blend_color_state); - r300->clip_state.state = CALLOC_STRUCT(r300_clip_state); - r300->fb_state.state = CALLOC_STRUCT(pipe_framebuffer_state); - r300->gpu_flush.state = CALLOC_STRUCT(pipe_framebuffer_state); - r300->hyperz_state.state = CALLOC_STRUCT(r300_hyperz_state); - r300->invariant_state.state = CALLOC_STRUCT(r300_invariant_state); - r300->rs_block_state.state = CALLOC_STRUCT(r300_rs_block); - r300->scissor_state.state = CALLOC_STRUCT(pipe_scissor_state); - r300->textures_state.state = CALLOC_STRUCT(r300_textures_state); - r300->vap_invariant_state.state = CALLOC_STRUCT(r300_vap_invariant_state); - r300->viewport_state.state = CALLOC_STRUCT(r300_viewport_state); - r300->ztop_state.state = CALLOC_STRUCT(r300_ztop_state); - r300->fs_constants.state = CALLOC_STRUCT(r300_constant_buffer); - r300->vs_constants.state = CALLOC_STRUCT(r300_constant_buffer); + R300_ALLOC_ATOM(aa_state, r300_aa_state); + R300_ALLOC_ATOM(blend_color_state, r300_blend_color_state); + R300_ALLOC_ATOM(clip_state, r300_clip_state); + R300_ALLOC_ATOM(hyperz_state, r300_hyperz_state); + R300_ALLOC_ATOM(invariant_state, r300_invariant_state); + R300_ALLOC_ATOM(textures_state, r300_textures_state); + R300_ALLOC_ATOM(vap_invariant_state, r300_vap_invariant_state); + R300_ALLOC_ATOM(viewport_state, r300_viewport_state); + R300_ALLOC_ATOM(ztop_state, r300_ztop_state); + R300_ALLOC_ATOM(fb_state, pipe_framebuffer_state); + R300_ALLOC_ATOM(gpu_flush, pipe_framebuffer_state); + R300_ALLOC_ATOM(scissor_state, pipe_scissor_state); + R300_ALLOC_ATOM(rs_block_state, r300_rs_block); + R300_ALLOC_ATOM(fs_constants, r300_constant_buffer); + R300_ALLOC_ATOM(vs_constants, r300_constant_buffer); if (!r300->screen->caps.has_tcl) { - r300->vertex_stream_state.state = CALLOC_STRUCT(r300_vertex_stream_state); + R300_ALLOC_ATOM(vertex_stream_state, r300_vertex_stream_state); } /* Some non-CSO atoms don't use the state pointer. */ @@ -289,11 +288,13 @@ static void r300_setup_atoms(struct r300_context* r300) /* Some states must be marked as dirty here to properly set up * hardware in the first command stream. */ - r300->invariant_state.dirty = TRUE; - r300->pvs_flush.dirty = TRUE; - r300->vap_invariant_state.dirty = TRUE; - r300->texture_cache_inval.dirty = TRUE; - r300->textures_state.dirty = TRUE; + r300_mark_atom_dirty(r300, &r300->invariant_state); + r300_mark_atom_dirty(r300, &r300->pvs_flush); + r300_mark_atom_dirty(r300, &r300->vap_invariant_state); + r300_mark_atom_dirty(r300, &r300->texture_cache_inval); + r300_mark_atom_dirty(r300, &r300->textures_state); + + return TRUE; } /* Not every state tracker calls every driver function before the first draw @@ -319,7 +320,7 @@ static void r300_init_states(struct pipe_context *pipe) pipe->set_scissor_state(pipe, &ss); /* Initialize the clip state. */ - if (r300_context(pipe)->screen->caps.has_tcl) { + if (r300->screen->caps.has_tcl) { pipe->set_clip_state(pipe, &cs); } else { BEGIN_CB(clip->cb, 2); @@ -421,9 +422,9 @@ struct pipe_context* r300_create_context(struct pipe_screen* screen, make_empty_list(&r300->query_list); - util_mempool_create(&r300->pool_transfers, - sizeof(struct pipe_transfer), 64, - UTIL_MEMPOOL_SINGLETHREADED); + util_slab_create(&r300->pool_transfers, + sizeof(struct pipe_transfer), 64, + UTIL_SLAB_SINGLETHREADED); r300->cs = rws->cs_create(rws); if (r300->cs == NULL) @@ -431,7 +432,14 @@ struct pipe_context* r300_create_context(struct pipe_screen* screen, if (!r300screen->caps.has_tcl) { /* Create a Draw. This is used for SW TCL. */ +#ifdef HAVE_LLVM + r300->gallivm = gallivm_create(); + r300->draw = draw_create_gallivm(&r300->context, r300->gallivm); +#else r300->draw = draw_create(&r300->context); +#endif + if (r300->draw == NULL) + goto fail; /* Enable our renderer. */ draw_set_rasterize_stage(r300->draw, r300_draw_stage(r300)); /* Disable converting points/lines to triangles. */ @@ -439,7 +447,8 @@ struct pipe_context* r300_create_context(struct pipe_screen* screen, draw_wide_point_threshold(r300->draw, 10000000.f); } - r300_setup_atoms(r300); + if (!r300_setup_atoms(r300)) + goto fail; r300_init_blit_functions(r300); r300_init_flush_functions(r300); @@ -462,14 +471,14 @@ struct pipe_context* r300_create_context(struct pipe_screen* screen, goto fail; r300->upload_ib = u_upload_create(&r300->context, - 32 * 1024, 16, + 64 * 1024, 16, PIPE_BIND_INDEX_BUFFER); if (r300->upload_ib == NULL) goto fail; r300->upload_vb = u_upload_create(&r300->context, - 128 * 1024, 16, + 1024 * 1024, 16, PIPE_BIND_VERTEX_BUFFER); if (r300->upload_vb == NULL) goto fail; diff --git a/src/gallium/drivers/r300/r300_context.h b/src/gallium/drivers/r300/r300_context.h index 7217c51b951..e09cf87f733 100644 --- a/src/gallium/drivers/r300/r300_context.h +++ b/src/gallium/drivers/r300/r300_context.h @@ -43,12 +43,8 @@ struct r300_vertex_shader; struct r300_stencilref_context; struct r300_atom { - /* List pointers. */ - struct r300_atom *prev, *next; /* Name, for debugging. */ const char* name; - /* Stat counter. */ - uint64_t counter; /* Opaque state. */ void* state; /* Emit the state to the context. */ @@ -258,6 +254,8 @@ struct r300_constant_buffer { uint32_t *ptr; /* Remapping table. */ unsigned *remap_table; + /* const buffer base */ + uint32_t buffer_base; }; /* Query object. @@ -282,6 +280,7 @@ struct r300_query { /* The buffer where query results are stored. */ struct r300_winsys_buffer *buffer; + struct r300_winsys_cs_buffer *cs_buffer; /* The size of the buffer. */ unsigned buffer_size; /* The domain of the buffer. */ @@ -313,6 +312,7 @@ struct r300_surface { /* Winsys buffer backing the texture. */ struct r300_winsys_buffer *buffer; + struct r300_winsys_cs_buffer *cs_buffer; enum r300_buffer_domain domain; @@ -396,6 +396,7 @@ struct r300_texture { /* Pipe buffer backing this texture. */ struct r300_winsys_buffer *buffer; + struct r300_winsys_cs_buffer *cs_buffer; /* Registers carrying texture format data. */ /* Only format-independent bits should be filled in. */ @@ -439,9 +440,6 @@ struct r300_translate_context { /* Translate cache for incompatible vertex offset/stride/format fallback. */ struct translate_cache *translate_cache; - /* The vertex buffer slot containing the translated buffer. */ - unsigned vb_slot; - /* Saved and new vertex element state. */ void *saved_velems, *new_velems; }; @@ -458,6 +456,7 @@ struct r300_context { struct r300_screen *screen; /* Draw module. Used mostly for SW TCL. */ + struct gallivm_state *gallivm; struct draw_context* draw; /* Vertex buffer for SW TCL. */ struct pipe_resource* vbo; @@ -492,73 +491,79 @@ struct r300_context { struct r300_query query_list; /* Various CSO state objects. */ - /* Beginning of atom list. */ - struct r300_atom atom_list; + + /* Each atom is emitted in the order it appears here, which can affect + * performance and stability if not handled with care. */ + /* GPU flush. */ + struct r300_atom gpu_flush; /* Anti-aliasing (MSAA) state. */ struct r300_atom aa_state; + /* Framebuffer state. */ + struct r300_atom fb_state; + /* HyperZ state (various SC/ZB bits). */ + struct r300_atom hyperz_state; + /* ZTOP state. */ + struct r300_atom ztop_state; + /* Depth, stencil, and alpha state. */ + struct r300_atom dsa_state; /* Blend state. */ struct r300_atom blend_state; /* Blend color state. */ struct r300_atom blend_color_state; + /* Scissor state. */ + struct r300_atom scissor_state; + /* Invariant state. This must be emitted to get the engine started. */ + struct r300_atom invariant_state; + /* Viewport state. */ + struct r300_atom viewport_state; + /* PVS flush. */ + struct r300_atom pvs_flush; + /* VAP invariant state. */ + struct r300_atom vap_invariant_state; + /* Vertex stream formatting state. */ + struct r300_atom vertex_stream_state; + /* Vertex shader. */ + struct r300_atom vs_state; /* User clip planes. */ struct r300_atom clip_state; - /* Depth, stencil, and alpha state. */ - struct r300_atom dsa_state; + /* RS block state + VAP (vertex shader) output mapping state. */ + struct r300_atom rs_block_state; + /* Rasterizer state. */ + struct r300_atom rs_state; + /* Framebuffer state (pipelined regs). */ + struct r300_atom fb_state_pipelined; /* Fragment shader. */ struct r300_atom fs; /* Fragment shader RC_CONSTANT_STATE variables. */ struct r300_atom fs_rc_constant_state; /* Fragment shader constant buffer. */ struct r300_atom fs_constants; - /* Framebuffer state. */ - struct r300_atom fb_state; - /* Framebuffer state (pipelined regs). */ - struct r300_atom fb_state_pipelined; - /* HyperZ state (various SC/ZB bits). */ - struct r300_atom hyperz_state; - /* Occlusion query. */ - struct r300_atom query_start; - /* Rasterizer state. */ - struct r300_atom rs_state; - /* RS block state + VAP (vertex shader) output mapping state. */ - struct r300_atom rs_block_state; - /* Scissor state. */ - struct r300_atom scissor_state; - /* Textures state. */ - struct r300_atom textures_state; - /* Vertex stream formatting state. */ - struct r300_atom vertex_stream_state; - /* Vertex shader. */ - struct r300_atom vs_state; /* Vertex shader constant buffer. */ struct r300_atom vs_constants; - /* Viewport state. */ - struct r300_atom viewport_state; - /* ZTOP state. */ - struct r300_atom ztop_state; - /* PVS flush. */ - struct r300_atom pvs_flush; - /* VAP invariant state. */ - struct r300_atom vap_invariant_state; /* Texture cache invalidate. */ struct r300_atom texture_cache_inval; - /* GPU flush. */ - struct r300_atom gpu_flush; + /* Textures state. */ + struct r300_atom textures_state; /* HiZ clear */ struct r300_atom hiz_clear; /* zmask clear */ struct r300_atom zmask_clear; + /* Occlusion query. */ + struct r300_atom query_start; - /* Invariant state. This must be emitted to get the engine started. */ - struct r300_atom invariant_state; + /* The pointers to the first and the last atom. */ + struct r300_atom *first_dirty, *last_dirty; /* Vertex buffers for Gallium. */ + /* May contain user buffers. */ struct pipe_vertex_buffer vertex_buffer[PIPE_MAX_ATTRIBS]; + /* Contains only non-user buffers. */ + struct pipe_resource *valid_vertex_buffer[PIPE_MAX_ATTRIBS]; int vertex_buffer_count; int vertex_buffer_max_index; + boolean any_user_vbs; /* Vertex elements for Gallium. */ struct r300_vertex_element_state *velems; - bool any_user_vbs; struct pipe_index_buffer index_buffer; @@ -599,12 +604,32 @@ struct r300_context { struct u_upload_mgr *upload_vb; struct u_upload_mgr *upload_ib; - struct util_mempool pool_transfers; + struct util_slab_mempool pool_transfers; /* Stat counter. */ uint64_t flush_counter; + + /* const tracking for VS */ + int vs_const_base; + + /* AOS (PACKET3_3D_LOAD_VBPNTR) command buffer for the case offset=0. */ + uint32_t vertex_arrays_cb[(16 * 3 + 1) / 2]; + boolean vertex_arrays_dirty; + + /* Whether any buffer (FB, textures, VBOs) has been set, but buffers + * haven't been validated yet. */ + boolean validate_buffers; + /* Whether user buffers have been validated. */ + boolean upload_vb_validated; + boolean upload_ib_validated; }; +#define foreach_atom(r300, atom) \ + for (atom = &r300->gpu_flush; atom != (&r300->query_start)+1; atom++) + +#define foreach_dirty_atom(r300, atom) \ + for (atom = r300->first_dirty; atom != r300->last_dirty; atom++) + /* Convenience cast wrappers. */ static INLINE struct r300_query* r300_query(struct pipe_query* q) { @@ -631,6 +656,22 @@ static INLINE struct r300_fragment_shader *r300_fs(struct r300_context *r300) return (struct r300_fragment_shader*)r300->fs.state; } +static INLINE void r300_mark_atom_dirty(struct r300_context *r300, + struct r300_atom *atom) +{ + atom->dirty = TRUE; + + if (!r300->first_dirty) { + r300->first_dirty = atom; + r300->last_dirty = atom+1; + } else { + if (atom < r300->first_dirty) + r300->first_dirty = atom; + else if (atom+1 > r300->last_dirty) + r300->last_dirty = atom+1; + } +} + struct pipe_context* r300_create_context(struct pipe_screen* screen, void *priv); @@ -647,10 +688,7 @@ void r300_init_state_functions(struct r300_context* r300); void r300_init_resource_functions(struct r300_context* r300); /* r300_blit.c */ -void r300_flush_depth_stencil(struct pipe_context *pipe, - struct pipe_resource *dst, - struct pipe_subresource subdst, - unsigned zslice); +void r300_flush_depth_textures(struct r300_context *r300); /* r300_query.c */ void r300_resume_query(struct r300_context *r300, @@ -658,7 +696,8 @@ void r300_resume_query(struct r300_context *r300, void r300_stop_query(struct r300_context *r300); /* r300_render_translate.c */ -void r300_begin_vertex_translate(struct r300_context *r300); +void r300_begin_vertex_translate(struct r300_context *r300, + int min_index, int max_index); void r300_end_vertex_translate(struct r300_context *r300); void r300_translate_index_buffer(struct r300_context *r300, struct pipe_resource **index_buffer, @@ -670,20 +709,23 @@ void r300_plug_in_stencil_ref_fallback(struct r300_context *r300); /* r300_render.c */ void r300_draw_flush_vbuf(struct r300_context *r300); -boolean r500_index_bias_supported(struct r300_context *r300); void r500_emit_index_bias(struct r300_context *r300, int index_bias); /* r300_state.c */ enum r300_fb_state_change { R300_CHANGED_FB_STATE = 0, R300_CHANGED_CBZB_FLAG, - R300_CHANGED_ZCLEAR_FLAG + R300_CHANGED_ZCLEAR_FLAG, + R300_CHANGED_MULTIWRITE }; void r300_mark_fb_state_dirty(struct r300_context *r300, enum r300_fb_state_change change); void r300_mark_fs_code_dirty(struct r300_context *r300); +/* r300_state_derived.c */ +void r300_update_derived_state(struct r300_context* r300); + /* r300_debug.c */ void r500_dump_rs_block(struct r300_rs_block *rs); diff --git a/src/gallium/drivers/r300/r300_cs.h b/src/gallium/drivers/r300/r300_cs.h index c194d6a1b08..6726f100e1b 100644 --- a/src/gallium/drivers/r300/r300_cs.h +++ b/src/gallium/drivers/r300/r300_cs.h @@ -51,7 +51,7 @@ int cs_count = 0; (void) cs_count; (void) cs_winsys; #define BEGIN_CS(size) do { \ - assert(size <= (cs_copy->ndw - cs_copy->cdw)); \ + assert(size <= (R300_MAX_CMDBUF_DWORDS - cs_copy->cdw)); \ CS_DEBUG(cs_count = size;) \ } while (0) @@ -72,7 +72,7 @@ */ #define OUT_CS(value) do { \ - cs_copy->ptr[cs_copy->cdw++] = (value); \ + cs_copy->buf[cs_copy->cdw++] = (value); \ CS_DEBUG(cs_count--;) \ } while (0) @@ -96,7 +96,7 @@ OUT_CS(CP_PACKET3(op, count)) #define OUT_CS_TABLE(values, count) do { \ - memcpy(cs_copy->ptr + cs_copy->cdw, values, count * 4); \ + memcpy(cs_copy->buf + cs_copy->cdw, values, count * 4); \ cs_copy->cdw += count; \ CS_DEBUG(cs_count -= count;) \ } while (0) @@ -106,26 +106,26 @@ * Writing relocations. */ -#define OUT_CS_RELOC(bo, offset, rd, wd) do { \ +#define OUT_CS_RELOC(bo, offset) do { \ assert(bo); \ OUT_CS(offset); \ - cs_winsys->cs_write_reloc(cs_copy, bo, rd, wd); \ + cs_winsys->cs_write_reloc(cs_copy, bo); \ CS_DEBUG(cs_count -= 2;) \ } while (0) -#define OUT_CS_BUF_RELOC(bo, offset, rd, wd) do { \ +#define OUT_CS_BUF_RELOC(bo, offset) do { \ assert(bo); \ - OUT_CS_RELOC(r300_buffer(bo)->buf, offset, rd, wd); \ + OUT_CS_RELOC(r300_buffer(bo)->cs_buf, offset); \ } while (0) -#define OUT_CS_TEX_RELOC(tex, offset, rd, wd) do { \ +#define OUT_CS_TEX_RELOC(tex, offset) do { \ assert(tex); \ - OUT_CS_RELOC(tex->buffer, offset, rd, wd); \ + OUT_CS_RELOC(tex->cs_buffer, offset); \ } while (0) -#define OUT_CS_BUF_RELOC_NO_OFFSET(bo, rd, wd) do { \ +#define OUT_CS_BUF_RELOC_NO_OFFSET(bo) do { \ assert(bo); \ - cs_winsys->cs_write_reloc(cs_copy, r300_buffer(bo)->buf, rd, wd); \ + cs_winsys->cs_write_reloc(cs_copy, r300_buffer(bo)->cs_buf); \ CS_DEBUG(cs_count -= 2;) \ } while (0) @@ -136,7 +136,7 @@ #define WRITE_CS_TABLE(values, count) do { \ CS_DEBUG(assert(cs_count == 0);) \ - memcpy(cs_copy->ptr + cs_copy->cdw, (values), (count) * 4); \ + memcpy(cs_copy->buf + cs_copy->cdw, (values), (count) * 4); \ cs_copy->cdw += (count); \ } while (0) diff --git a/src/gallium/drivers/r300/r300_debug.c b/src/gallium/drivers/r300/r300_debug.c index f78fe34790c..d6aa90bd053 100644 --- a/src/gallium/drivers/r300/r300_debug.c +++ b/src/gallium/drivers/r300/r300_debug.c @@ -40,8 +40,8 @@ static const struct debug_named_value debug_options[] = { { "rs", DBG_RS, "Log rasterizer" }, { "fb", DBG_FB, "Log framebuffer" }, { "cbzb", DBG_CBZB, "Log fast color clear info" }, - { "stats", DBG_STATS, "Log emission statistics" }, { "hyperz", DBG_HYPERZ, "Log HyperZ info" }, + { "upload", DBG_UPLOAD, "Log user buffer upload info" }, { "scissor", DBG_SCISSOR, "Log scissor info" }, { "fakeocc", DBG_FAKE_OCC, "Use fake occlusion queries" }, { "anisohq", DBG_ANISOHQ, "Use high quality anisotropic filtering" }, diff --git a/src/gallium/drivers/r300/r300_defines.h b/src/gallium/drivers/r300/r300_defines.h index 896aeef395d..2d111f9158d 100644 --- a/src/gallium/drivers/r300/r300_defines.h +++ b/src/gallium/drivers/r300/r300_defines.h @@ -43,8 +43,8 @@ enum r300_buffer_tiling { }; enum r300_buffer_domain { /* bitfield */ - R300_DOMAIN_GTT = 1, - R300_DOMAIN_VRAM = 2 + R300_DOMAIN_GTT = 2, + R300_DOMAIN_VRAM = 4 }; #endif diff --git a/src/gallium/drivers/r300/r300_emit.c b/src/gallium/drivers/r300/r300_emit.c index 5b6f82cd891..d14cdcbbaf0 100644 --- a/src/gallium/drivers/r300/r300_emit.c +++ b/src/gallium/drivers/r300/r300_emit.c @@ -26,9 +26,9 @@ #include "util/u_format.h" #include "util/u_math.h" #include "util/u_mm.h" -#include "util/u_simple_list.h" #include "r300_context.h" +#include "r300_cb.h" #include "r300_cs.h" #include "r300_emit.h" #include "r300_fs.h" @@ -353,10 +353,10 @@ void r300_emit_aa_state(struct r300_context *r300, unsigned size, void *state) if (aa->dest) { OUT_CS_REG_SEQ(R300_RB3D_AARESOLVE_OFFSET, 1); - OUT_CS_RELOC(aa->dest->buffer, aa->dest->offset, 0, aa->dest->domain); + OUT_CS_RELOC(aa->dest->cs_buffer, aa->dest->offset); OUT_CS_REG_SEQ(R300_RB3D_AARESOLVE_PITCH, 1); - OUT_CS_RELOC(aa->dest->buffer, aa->dest->pitch, 0, aa->dest->domain); + OUT_CS_RELOC(aa->dest->cs_buffer, aa->dest->pitch); } OUT_CS_REG(R300_RB3D_AARESOLVE_CTL, aa->aaresolve_ctl); @@ -369,6 +369,8 @@ void r300_emit_fb_state(struct r300_context* r300, unsigned size, void* state) struct r300_surface* surf; unsigned i; boolean can_hyperz = r300->rws->get_value(r300->rws, R300_CAN_HYPERZ); + uint32_t rb3d_cctl = 0; + CS_LOCALS(r300); BEGIN_CS(size); @@ -376,21 +378,24 @@ void r300_emit_fb_state(struct r300_context* r300, unsigned size, void* state) /* NUM_MULTIWRITES replicates COLOR[0] to all colorbuffers, which is not * what we usually want. */ if (r300->screen->caps.is_r500) { - OUT_CS_REG(R300_RB3D_CCTL, - R300_RB3D_CCTL_INDEPENDENT_COLORFORMAT_ENABLE_ENABLE); - } else { - OUT_CS_REG(R300_RB3D_CCTL, 0); + rb3d_cctl = R300_RB3D_CCTL_INDEPENDENT_COLORFORMAT_ENABLE_ENABLE; + } + if (fb->nr_cbufs && + r300_fragment_shader_writes_all(r300_fs(r300))) { + rb3d_cctl |= R300_RB3D_CCTL_NUM_MULTIWRITES(fb->nr_cbufs); } + OUT_CS_REG(R300_RB3D_CCTL, rb3d_cctl); + /* Set up colorbuffers. */ for (i = 0; i < fb->nr_cbufs; i++) { surf = r300_surface(fb->cbufs[i]); OUT_CS_REG_SEQ(R300_RB3D_COLOROFFSET0 + (4 * i), 1); - OUT_CS_RELOC(surf->buffer, surf->offset, 0, surf->domain); + OUT_CS_RELOC(surf->cs_buffer, surf->offset); OUT_CS_REG_SEQ(R300_RB3D_COLORPITCH0 + (4 * i), 1); - OUT_CS_RELOC(surf->buffer, surf->pitch, 0, surf->domain); + OUT_CS_RELOC(surf->cs_buffer, surf->pitch); } /* Set up the ZB part of the CBZB clear. */ @@ -400,10 +405,10 @@ void r300_emit_fb_state(struct r300_context* r300, unsigned size, void* state) OUT_CS_REG(R300_ZB_FORMAT, surf->cbzb_format); OUT_CS_REG_SEQ(R300_ZB_DEPTHOFFSET, 1); - OUT_CS_RELOC(surf->buffer, surf->cbzb_midpoint_offset, 0, surf->domain); + OUT_CS_RELOC(surf->cs_buffer, surf->cbzb_midpoint_offset); OUT_CS_REG_SEQ(R300_ZB_DEPTHPITCH, 1); - OUT_CS_RELOC(surf->buffer, surf->cbzb_pitch, 0, surf->domain); + OUT_CS_RELOC(surf->cs_buffer, surf->cbzb_pitch); DBG(r300, DBG_CBZB, "CBZB clearing cbuf %08x %08x\n", surf->cbzb_format, @@ -416,15 +421,15 @@ void r300_emit_fb_state(struct r300_context* r300, unsigned size, void* state) OUT_CS_REG(R300_ZB_FORMAT, surf->format); OUT_CS_REG_SEQ(R300_ZB_DEPTHOFFSET, 1); - OUT_CS_RELOC(surf->buffer, surf->offset, 0, surf->domain); + OUT_CS_RELOC(surf->cs_buffer, surf->offset); OUT_CS_REG_SEQ(R300_ZB_DEPTHPITCH, 1); - OUT_CS_RELOC(surf->buffer, surf->pitch, 0, surf->domain); + OUT_CS_RELOC(surf->cs_buffer, surf->pitch); if (can_hyperz) { uint32_t surf_pitch; struct r300_texture *tex; - int level = surf->base.level; + int level = surf->base.u.tex.level; tex = r300_texture(surf->base.texture); surf_pitch = surf->pitch & R300_DEPTHPITCH_MASK; @@ -482,15 +487,21 @@ void r300_emit_fb_state_pipelined(struct r300_context *r300, { struct pipe_framebuffer_state* fb = (struct pipe_framebuffer_state*)r300->fb_state.state; - unsigned i; + unsigned i, num_cbufs = fb->nr_cbufs; CS_LOCALS(r300); + /* If we use the multiwrite feature, the colorbuffers 2,3,4 must be + * marked as UNUSED in the US block. */ + if (r300_fragment_shader_writes_all(r300_fs(r300))) { + num_cbufs = MIN2(num_cbufs, 1); + } + BEGIN_CS(size); /* Colorbuffer format in the US block. * (must be written after unpipelined regs) */ OUT_CS_REG_SEQ(R300_US_OUT_FMT_0, 4); - for (i = 0; i < fb->nr_cbufs; i++) { + for (i = 0; i < num_cbufs; i++) { OUT_CS(r300_surface(fb->cbufs[i])->format); } for (; i < 4; i++) { @@ -559,7 +570,7 @@ static void r300_emit_query_end_frag_pipes(struct r300_context *r300, struct r300_query *query) { struct r300_capabilities* caps = &r300->screen->caps; - struct r300_winsys_buffer *buf = r300->query_current->buffer; + struct r300_winsys_cs_buffer *buf = r300->query_current->cs_buffer; CS_LOCALS(r300); assert(caps->num_frag_pipes); @@ -578,28 +589,24 @@ static void r300_emit_query_end_frag_pipes(struct r300_context *r300, /* pipe 3 only */ OUT_CS_REG(R300_SU_REG_DEST, 1 << 3); OUT_CS_REG_SEQ(R300_ZB_ZPASS_ADDR, 1); - OUT_CS_RELOC(buf, (query->num_results + 3) * 4, - 0, query->domain); + OUT_CS_RELOC(buf, (query->num_results + 3) * 4); case 3: /* pipe 2 only */ OUT_CS_REG(R300_SU_REG_DEST, 1 << 2); OUT_CS_REG_SEQ(R300_ZB_ZPASS_ADDR, 1); - OUT_CS_RELOC(buf, (query->num_results + 2) * 4, - 0, query->domain); + OUT_CS_RELOC(buf, (query->num_results + 2) * 4); case 2: /* pipe 1 only */ /* As mentioned above, accomodate RV380 and older. */ OUT_CS_REG(R300_SU_REG_DEST, 1 << (caps->high_second_pipe ? 3 : 1)); OUT_CS_REG_SEQ(R300_ZB_ZPASS_ADDR, 1); - OUT_CS_RELOC(buf, (query->num_results + 1) * 4, - 0, query->domain); + OUT_CS_RELOC(buf, (query->num_results + 1) * 4); case 1: /* pipe 0 only */ OUT_CS_REG(R300_SU_REG_DEST, 1 << 0); OUT_CS_REG_SEQ(R300_ZB_ZPASS_ADDR, 1); - OUT_CS_RELOC(buf, (query->num_results + 0) * 4, - 0, query->domain); + OUT_CS_RELOC(buf, (query->num_results + 0) * 4); break; default: fprintf(stderr, "r300: Implementation error: Chipset reports %d" @@ -615,13 +622,13 @@ static void r300_emit_query_end_frag_pipes(struct r300_context *r300, static void rv530_emit_query_end_single_z(struct r300_context *r300, struct r300_query *query) { - struct r300_winsys_buffer *buf = r300->query_current->buffer; + struct r300_winsys_cs_buffer *buf = r300->query_current->cs_buffer; CS_LOCALS(r300); BEGIN_CS(8); OUT_CS_REG(RV530_FG_ZBREG_DEST, RV530_FG_ZBREG_DEST_PIPE_SELECT_0); OUT_CS_REG_SEQ(R300_ZB_ZPASS_ADDR, 1); - OUT_CS_RELOC(buf, query->num_results * 4, 0, query->domain); + OUT_CS_RELOC(buf, query->num_results * 4); OUT_CS_REG(RV530_FG_ZBREG_DEST, RV530_FG_ZBREG_DEST_PIPE_SELECT_ALL); END_CS; } @@ -629,16 +636,16 @@ static void rv530_emit_query_end_single_z(struct r300_context *r300, static void rv530_emit_query_end_double_z(struct r300_context *r300, struct r300_query *query) { - struct r300_winsys_buffer *buf = r300->query_current->buffer; + struct r300_winsys_cs_buffer *buf = r300->query_current->cs_buffer; CS_LOCALS(r300); BEGIN_CS(14); OUT_CS_REG(RV530_FG_ZBREG_DEST, RV530_FG_ZBREG_DEST_PIPE_SELECT_0); OUT_CS_REG_SEQ(R300_ZB_ZPASS_ADDR, 1); - OUT_CS_RELOC(buf, (query->num_results + 0) * 4, 0, query->domain); + OUT_CS_RELOC(buf, (query->num_results + 0) * 4); OUT_CS_REG(RV530_FG_ZBREG_DEST, RV530_FG_ZBREG_DEST_PIPE_SELECT_1); OUT_CS_REG_SEQ(R300_ZB_ZPASS_ADDR, 1); - OUT_CS_RELOC(buf, (query->num_results + 1) * 4, 0, query->domain); + OUT_CS_RELOC(buf, (query->num_results + 1) * 4); OUT_CS_REG(RV530_FG_ZBREG_DEST, RV530_FG_ZBREG_DEST_PIPE_SELECT_ALL); END_CS; } @@ -799,56 +806,100 @@ void r300_emit_textures_state(struct r300_context *r300, OUT_CS_REG(R300_TX_FORMAT2_0 + (i * 4), texstate->format.format2); OUT_CS_REG_SEQ(R300_TX_OFFSET_0 + (i * 4), 1); - OUT_CS_TEX_RELOC(tex, texstate->format.tile_config, tex->domain, - 0); + OUT_CS_TEX_RELOC(tex, texstate->format.tile_config); } } END_CS; } -void r300_emit_aos(struct r300_context* r300, int offset, boolean indexed) +static void r300_update_vertex_arrays_cb(struct r300_context *r300, unsigned packet_size) { struct pipe_vertex_buffer *vb1, *vb2, *vbuf = r300->vertex_buffer; struct pipe_vertex_element *velem = r300->velems->velem; - struct r300_buffer *buf; - int i; unsigned *hw_format_size = r300->velems->hw_format_size; - unsigned size1, size2, aos_count = r300->velems->count; - unsigned packet_size = (aos_count * 3 + 1) / 2; - CS_LOCALS(r300); - - BEGIN_CS(2 + packet_size + aos_count * 2); - OUT_CS_PKT3(R300_PACKET3_3D_LOAD_VBPNTR, packet_size); - OUT_CS(aos_count | (!indexed ? R300_VC_FORCE_PREFETCH : 0)); + unsigned size1, size2, vertex_array_count = r300->velems->count; + int i; + CB_LOCALS; - for (i = 0; i < aos_count - 1; i += 2) { + BEGIN_CB(r300->vertex_arrays_cb, packet_size); + for (i = 0; i < vertex_array_count - 1; i += 2) { vb1 = &vbuf[velem[i].vertex_buffer_index]; vb2 = &vbuf[velem[i+1].vertex_buffer_index]; size1 = hw_format_size[i]; size2 = hw_format_size[i+1]; - OUT_CS(R300_VBPNTR_SIZE0(size1) | R300_VBPNTR_STRIDE0(vb1->stride) | + OUT_CB(R300_VBPNTR_SIZE0(size1) | R300_VBPNTR_STRIDE0(vb1->stride) | R300_VBPNTR_SIZE1(size2) | R300_VBPNTR_STRIDE1(vb2->stride)); - OUT_CS(vb1->buffer_offset + velem[i].src_offset + offset * vb1->stride); - OUT_CS(vb2->buffer_offset + velem[i+1].src_offset + offset * vb2->stride); + OUT_CB(vb1->buffer_offset + velem[i].src_offset); + OUT_CB(vb2->buffer_offset + velem[i+1].src_offset); } - if (aos_count & 1) { + if (vertex_array_count & 1) { vb1 = &vbuf[velem[i].vertex_buffer_index]; size1 = hw_format_size[i]; - OUT_CS(R300_VBPNTR_SIZE0(size1) | R300_VBPNTR_STRIDE0(vb1->stride)); - OUT_CS(vb1->buffer_offset + velem[i].src_offset + offset * vb1->stride); + OUT_CB(R300_VBPNTR_SIZE0(size1) | R300_VBPNTR_STRIDE0(vb1->stride)); + OUT_CB(vb1->buffer_offset + velem[i].src_offset); } + END_CB; + + r300->vertex_arrays_dirty = FALSE; +} + +void r300_emit_vertex_arrays(struct r300_context* r300, int offset, boolean indexed) +{ + struct pipe_vertex_buffer *vbuf = r300->vertex_buffer; + struct pipe_resource **valid_vbuf = r300->valid_vertex_buffer; + struct pipe_vertex_element *velem = r300->velems->velem; + struct r300_buffer *buf; + int i; + unsigned vertex_array_count = r300->velems->count; + unsigned packet_size = (vertex_array_count * 3 + 1) / 2; + CS_LOCALS(r300); - for (i = 0; i < aos_count; i++) { - buf = r300_buffer(vbuf[velem[i].vertex_buffer_index].buffer); - OUT_CS_BUF_RELOC_NO_OFFSET(&buf->b.b, buf->domain, 0); + BEGIN_CS(2 + packet_size + vertex_array_count * 2); + OUT_CS_PKT3(R300_PACKET3_3D_LOAD_VBPNTR, packet_size); + OUT_CS(vertex_array_count | (!indexed ? R300_VC_FORCE_PREFETCH : 0)); + + if (!offset) { + if (r300->vertex_arrays_dirty) { + r300_update_vertex_arrays_cb(r300, packet_size); + } + OUT_CS_TABLE(r300->vertex_arrays_cb, packet_size); + } else { + struct pipe_vertex_buffer *vb1, *vb2; + unsigned *hw_format_size = r300->velems->hw_format_size; + unsigned size1, size2; + + for (i = 0; i < vertex_array_count - 1; i += 2) { + vb1 = &vbuf[velem[i].vertex_buffer_index]; + vb2 = &vbuf[velem[i+1].vertex_buffer_index]; + size1 = hw_format_size[i]; + size2 = hw_format_size[i+1]; + + OUT_CS(R300_VBPNTR_SIZE0(size1) | R300_VBPNTR_STRIDE0(vb1->stride) | + R300_VBPNTR_SIZE1(size2) | R300_VBPNTR_STRIDE1(vb2->stride)); + OUT_CS(vb1->buffer_offset + velem[i].src_offset + offset * vb1->stride); + OUT_CS(vb2->buffer_offset + velem[i+1].src_offset + offset * vb2->stride); + } + + if (vertex_array_count & 1) { + vb1 = &vbuf[velem[i].vertex_buffer_index]; + size1 = hw_format_size[i]; + + OUT_CS(R300_VBPNTR_SIZE0(size1) | R300_VBPNTR_STRIDE0(vb1->stride)); + OUT_CS(vb1->buffer_offset + velem[i].src_offset + offset * vb1->stride); + } + } + + for (i = 0; i < vertex_array_count; i++) { + buf = r300_buffer(valid_vbuf[velem[i].vertex_buffer_index]); + OUT_CS_BUF_RELOC_NO_OFFSET(&buf->b.b); } END_CS; } -void r300_emit_aos_swtcl(struct r300_context *r300, boolean indexed) +void r300_emit_vertex_arrays_swtcl(struct r300_context *r300, boolean indexed) { CS_LOCALS(r300); @@ -868,7 +919,7 @@ void r300_emit_aos_swtcl(struct r300_context *r300, boolean indexed) OUT_CS(r300->vertex_info.size | (r300->vertex_info.size << 8)); OUT_CS(r300->draw_vbo_offset); - OUT_CS_BUF_RELOC(r300->vbo, 0, r300_buffer(r300->vbo)->domain, 0); + OUT_CS_BUF_RELOC(r300->vbo, 0); END_CS; } @@ -924,7 +975,6 @@ void r300_emit_vs_state(struct r300_context* r300, unsigned size, void* state) struct r300_vertex_program_code* code = &vs->code; struct r300_screen* r300screen = r300->screen; unsigned instruction_count = code->length / 4; - unsigned i; unsigned vtx_mem_size = r300screen->caps.is_r500 ? 128 : 72; unsigned input_count = MAX2(util_bitcount(code->InputsRead), 1); @@ -935,10 +985,6 @@ void r300_emit_vs_state(struct r300_context* r300, unsigned size, void* state) vtx_mem_size / output_count, 10); unsigned pvs_num_controllers = MIN2(vtx_mem_size / temp_count, 5); - unsigned imm_first = vs->externals_count; - unsigned imm_end = vs->code.constants.Count; - unsigned imm_count = vs->immediates_count; - CS_LOCALS(r300); BEGIN_CS(size); @@ -947,12 +993,10 @@ void r300_emit_vs_state(struct r300_context* r300, unsigned size, void* state) * R300_VAP_PVS_CONST_CNTL * R300_VAP_PVS_CODE_CNTL_1 * See the r5xx docs for instructions on how to use these. */ - OUT_CS_REG_SEQ(R300_VAP_PVS_CODE_CNTL_0, 3); - OUT_CS(R300_PVS_FIRST_INST(0) | - R300_PVS_XYZW_VALID_INST(instruction_count - 1) | - R300_PVS_LAST_INST(instruction_count - 1)); - OUT_CS(R300_PVS_MAX_CONST_ADDR(code->constants.Count - 1)); - OUT_CS(instruction_count - 1); + OUT_CS_REG(R300_VAP_PVS_CODE_CNTL_0, R300_PVS_FIRST_INST(0) | + R300_PVS_XYZW_VALID_INST(instruction_count - 1) | + R300_PVS_LAST_INST(instruction_count - 1)); + OUT_CS_REG(R300_VAP_PVS_CODE_CNTL_1, instruction_count - 1); OUT_CS_REG(R300_VAP_PVS_VECTOR_INDX_REG, 0); OUT_CS_ONE_REG(R300_VAP_PVS_UPLOAD_DATA, code->length); @@ -964,19 +1008,6 @@ void r300_emit_vs_state(struct r300_context* r300, unsigned size, void* state) R300_PVS_VF_MAX_VTX_NUM(12) | (r300screen->caps.is_r500 ? R500_TCL_STATE_OPTIMIZATION : 0)); - /* Emit immediates. */ - if (imm_count) { - OUT_CS_REG(R300_VAP_PVS_VECTOR_INDX_REG, - (r300->screen->caps.is_r500 ? - R500_PVS_CONST_START : R300_PVS_CONST_START) + - imm_first); - OUT_CS_ONE_REG(R300_VAP_PVS_UPLOAD_DATA, imm_count * 4); - for (i = imm_first; i < imm_end; i++) { - const float *data = vs->code.constants.Constants[i].u.Immediate; - OUT_CS_TABLE(data, 4); - } - } - /* Emit flow control instructions. */ if (code->num_fc_ops) { @@ -1001,24 +1032,43 @@ void r300_emit_vs_constants(struct r300_context* r300, unsigned count = ((struct r300_vertex_shader*)r300->vs_state.state)->externals_count; struct r300_constant_buffer *buf = (struct r300_constant_buffer*)state; + struct r300_vertex_shader *vs = (struct r300_vertex_shader*)r300->vs_state.state; unsigned i; + int imm_first = vs->externals_count; + int imm_end = vs->code.constants.Count; + int imm_count = vs->immediates_count; CS_LOCALS(r300); - if (!count) - return; - BEGIN_CS(size); - OUT_CS_REG(R300_VAP_PVS_VECTOR_INDX_REG, - (r300->screen->caps.is_r500 ? - R500_PVS_CONST_START : R300_PVS_CONST_START)); - OUT_CS_ONE_REG(R300_VAP_PVS_UPLOAD_DATA, count * 4); - if (buf->remap_table){ - for (i = 0; i < count; i++) { - uint32_t *data = &buf->ptr[buf->remap_table[i]*4]; + OUT_CS_REG(R300_VAP_PVS_CONST_CNTL, + R300_PVS_CONST_BASE_OFFSET(buf->buffer_base) | + R300_PVS_MAX_CONST_ADDR(MAX2(imm_end - 1, 0))); + if (vs->externals_count) { + OUT_CS_REG(R300_VAP_PVS_VECTOR_INDX_REG, + (r300->screen->caps.is_r500 ? + R500_PVS_CONST_START : R300_PVS_CONST_START) + buf->buffer_base); + OUT_CS_ONE_REG(R300_VAP_PVS_UPLOAD_DATA, count * 4); + if (buf->remap_table){ + for (i = 0; i < count; i++) { + uint32_t *data = &buf->ptr[buf->remap_table[i]*4]; + OUT_CS_TABLE(data, 4); + } + } else { + OUT_CS_TABLE(buf->ptr, count * 4); + } + } + + /* Emit immediates. */ + if (imm_count) { + OUT_CS_REG(R300_VAP_PVS_VECTOR_INDX_REG, + (r300->screen->caps.is_r500 ? + R500_PVS_CONST_START : R300_PVS_CONST_START) + + buf->buffer_base + imm_first); + OUT_CS_ONE_REG(R300_VAP_PVS_UPLOAD_DATA, imm_count * 4); + for (i = imm_first; i < imm_end; i++) { + const float *data = vs->code.constants.Constants[i].u.Immediate; OUT_CS_TABLE(data, 4); } - } else { - OUT_CS_TABLE(buf->ptr, count * 4); } END_CS; } @@ -1073,8 +1123,8 @@ void r300_emit_hiz_clear(struct r300_context *r300, unsigned size, void *state) tex = r300_texture(fb->zsbuf->texture); - offset = tex->hiz_mem[fb->zsbuf->level]->ofs; - stride = tex->desc.stride_in_pixels[fb->zsbuf->level]; + offset = tex->hiz_mem[fb->zsbuf->u.tex.level]->ofs; + stride = tex->desc.stride_in_pixels[fb->zsbuf->u.tex.level]; /* convert from pixels to 4x4 blocks */ stride = ALIGN_DIVUP(stride, 4); @@ -1096,7 +1146,7 @@ void r300_emit_hiz_clear(struct r300_context *r300, unsigned size, void *state) z->current_func = -1; /* Mark the current zbuffer's hiz ram as in use. */ - tex->hiz_in_use[fb->zsbuf->level] = TRUE; + tex->hiz_in_use[fb->zsbuf->u.tex.level] = TRUE; } void r300_emit_zmask_clear(struct r300_context *r300, unsigned size, void *state) @@ -1110,9 +1160,9 @@ void r300_emit_zmask_clear(struct r300_context *r300, unsigned size, void *state int mult, offset_shift; tex = r300_texture(fb->zsbuf->texture); - stride = tex->desc.stride_in_pixels[fb->zsbuf->level]; + stride = tex->desc.stride_in_pixels[fb->zsbuf->u.tex.level]; - offset = tex->zmask_mem[fb->zsbuf->level]->ofs; + offset = tex->zmask_mem[fb->zsbuf->u.tex.level]->ofs; if (r300->z_compression == RV350_Z_COMPRESS_88) mult = 8; @@ -1138,7 +1188,7 @@ void r300_emit_zmask_clear(struct r300_context *r300, unsigned size, void *state } /* Mark the current zbuffer's zmask as in use. */ - tex->zmask_in_use[fb->zsbuf->level] = TRUE; + tex->zmask_in_use[fb->zsbuf->u.tex.level] = TRUE; } void r300_emit_ztop_state(struct r300_context* r300, @@ -1165,72 +1215,77 @@ boolean r300_emit_buffer_validate(struct r300_context *r300, boolean do_validate_vertex_buffers, struct pipe_resource *index_buffer) { - struct pipe_framebuffer_state* fb = + struct pipe_framebuffer_state *fb = (struct pipe_framebuffer_state*)r300->fb_state.state; struct r300_textures_state *texstate = (struct r300_textures_state*)r300->textures_state.state; - struct r300_texture* tex; - struct pipe_vertex_buffer *vbuf = r300->vertex_buffer; - struct pipe_vertex_element *velem = r300->velems->velem; - struct pipe_resource *pbuf; + struct r300_texture *tex; unsigned i; - - /* upload buffers first */ - if (r300->screen->caps.has_tcl && r300->any_user_vbs) { - r300_upload_user_buffers(r300); - r300->any_user_vbs = false; + boolean flushed = FALSE; + +validate: + if (r300->fb_state.dirty) { + /* Color buffers... */ + for (i = 0; i < fb->nr_cbufs; i++) { + tex = r300_texture(fb->cbufs[i]->texture); + assert(tex && tex->buffer && "cbuf is marked, but NULL!"); + r300->rws->cs_add_reloc(r300->cs, tex->cs_buffer, 0, + r300_surface(fb->cbufs[i])->domain); + } + /* ...depth buffer... */ + if (fb->zsbuf) { + tex = r300_texture(fb->zsbuf->texture); + assert(tex && tex->buffer && "zsbuf is marked, but NULL!"); + r300->rws->cs_add_reloc(r300->cs, tex->cs_buffer, 0, + r300_surface(fb->zsbuf)->domain); + } } + if (r300->textures_state.dirty) { + /* ...textures... */ + for (i = 0; i < texstate->count; i++) { + if (!(texstate->tx_enable & (1 << i))) { + continue; + } - /* Clean out BOs. */ - r300->rws->cs_reset_buffers(r300->cs); - - /* Color buffers... */ - for (i = 0; i < fb->nr_cbufs; i++) { - tex = r300_texture(fb->cbufs[i]->texture); - assert(tex && tex->buffer && "cbuf is marked, but NULL!"); - r300->rws->cs_add_buffer(r300->cs, tex->buffer, 0, - r300_surface(fb->cbufs[i])->domain); - } - /* ...depth buffer... */ - if (fb->zsbuf) { - tex = r300_texture(fb->zsbuf->texture); - assert(tex && tex->buffer && "zsbuf is marked, but NULL!"); - r300->rws->cs_add_buffer(r300->cs, tex->buffer, 0, - r300_surface(fb->zsbuf)->domain); - } - /* ...textures... */ - for (i = 0; i < texstate->count; i++) { - if (!(texstate->tx_enable & (1 << i))) { - continue; + tex = r300_texture(texstate->sampler_views[i]->base.texture); + r300->rws->cs_add_reloc(r300->cs, tex->cs_buffer, tex->domain, 0); } - - tex = r300_texture(texstate->sampler_views[i]->base.texture); - r300->rws->cs_add_buffer(r300->cs, tex->buffer, tex->domain, 0); } /* ...occlusion query buffer... */ if (r300->query_current) - r300->rws->cs_add_buffer(r300->cs, r300->query_current->buffer, - 0, r300->query_current->domain); + r300->rws->cs_add_reloc(r300->cs, r300->query_current->cs_buffer, + 0, r300->query_current->domain); /* ...vertex buffer for SWTCL path... */ if (r300->vbo) - r300->rws->cs_add_buffer(r300->cs, r300_buffer(r300->vbo)->buf, - r300_buffer(r300->vbo)->domain, 0); + r300->rws->cs_add_reloc(r300->cs, r300_buffer(r300->vbo)->cs_buf, + r300_buffer(r300->vbo)->domain, 0); /* ...vertex buffers for HWTCL path... */ if (do_validate_vertex_buffers) { - for (i = 0; i < r300->velems->count; i++) { - pbuf = vbuf[velem[i].vertex_buffer_index].buffer; - - r300->rws->cs_add_buffer(r300->cs, r300_buffer(pbuf)->buf, - r300_buffer(pbuf)->domain, 0); + struct pipe_resource **buf = r300->valid_vertex_buffer; + struct pipe_resource **last = r300->valid_vertex_buffer + + r300->vertex_buffer_count; + for (; buf != last; buf++) { + if (!*buf) + continue; + + r300->rws->cs_add_reloc(r300->cs, r300_buffer(*buf)->cs_buf, + r300_buffer(*buf)->domain, 0); } } /* ...and index buffer for HWTCL path. */ if (index_buffer) - r300->rws->cs_add_buffer(r300->cs, r300_buffer(index_buffer)->buf, - r300_buffer(index_buffer)->domain, 0); + r300->rws->cs_add_reloc(r300->cs, r300_buffer(index_buffer)->cs_buf, + r300_buffer(index_buffer)->domain, 0); + /* Now do the validation. */ if (!r300->rws->cs_validate(r300->cs)) { - return FALSE; + /* Ooops, an infinite loop, give up. */ + if (flushed) + return FALSE; + + r300->context.flush(&r300->context, 0, NULL); + flushed = TRUE; + goto validate; } return TRUE; @@ -1241,7 +1296,7 @@ unsigned r300_get_num_dirty_dwords(struct r300_context *r300) struct r300_atom* atom; unsigned dwords = 0; - foreach(atom, &r300->atom_list) { + foreach_dirty_atom(r300, atom) { if (atom->dirty) { dwords += atom->size; } @@ -1260,7 +1315,7 @@ unsigned r300_get_num_cs_end_dwords(struct r300_context *r300) /* Emitted in flush. */ dwords += 26; /* emit_query_end */ dwords += r300->hyperz_state.size + 2; /* emit_hyperz_end + zcache flush */ - if (r500_index_bias_supported(r300)) + if (r300->screen->caps.index_bias_supported) dwords += 2; return dwords; @@ -1269,17 +1324,16 @@ unsigned r300_get_num_cs_end_dwords(struct r300_context *r300) /* Emit all dirty state. */ void r300_emit_dirty_state(struct r300_context* r300) { - struct r300_atom* atom; + struct r300_atom *atom; - foreach(atom, &r300->atom_list) { + foreach_dirty_atom(r300, atom) { if (atom->dirty) { atom->emit(r300, atom->size, atom->state); - if (SCREEN_DBG_ON(r300->screen, DBG_STATS)) { - atom->counter++; - } atom->dirty = FALSE; } } + r300->first_dirty = NULL; + r300->last_dirty = NULL; r300->dirty_hw++; } diff --git a/src/gallium/drivers/r300/r300_emit.h b/src/gallium/drivers/r300/r300_emit.h index 278dbcb4c7c..acea51d942f 100644 --- a/src/gallium/drivers/r300/r300_emit.h +++ b/src/gallium/drivers/r300/r300_emit.h @@ -31,7 +31,7 @@ struct r300_vertex_program_code; uint32_t pack_float24(float f); -void r300_emit_aos(struct r300_context* r300, int offset, boolean indexed); +void r300_emit_vertex_arrays(struct r300_context* r300, int offset, boolean indexed); void r300_emit_blend_state(struct r300_context* r300, unsigned size, void* state); @@ -86,7 +86,7 @@ void r300_emit_scissor_state(struct r300_context* r300, void r300_emit_textures_state(struct r300_context *r300, unsigned size, void *state); -void r300_emit_aos_swtcl(struct r300_context *r300, boolean indexed); +void r300_emit_vertex_arrays_swtcl(struct r300_context *r300, boolean indexed); void r300_emit_vap_invariant_state(struct r300_context *r300, unsigned size, void *state); diff --git a/src/gallium/drivers/r300/r300_flush.c b/src/gallium/drivers/r300/r300_flush.c index 1afd27f0938..b250532ba92 100644 --- a/src/gallium/drivers/r300/r300_flush.c +++ b/src/gallium/drivers/r300/r300_flush.c @@ -49,7 +49,7 @@ static void r300_flush(struct pipe_context* pipe, if (r300->dirty_hw) { r300_emit_hyperz_end(r300); r300_emit_query_end(r300); - if (r500_index_bias_supported(r300)) + if (r300->screen->caps.index_bias_supported) r500_emit_index_bias(r300, 0); r300->flush_counter++; @@ -57,9 +57,9 @@ static void r300_flush(struct pipe_context* pipe, r300->dirty_hw = 0; /* New kitchen sink, baby. */ - foreach(atom, &r300->atom_list) { + foreach_atom(r300, atom) { if (atom->state || atom->allow_null_state) { - atom->dirty = TRUE; + r300_mark_atom_dirty(r300, atom); } } @@ -68,6 +68,14 @@ static void r300_flush(struct pipe_context* pipe, r300->vs_state.dirty = FALSE; r300->vs_constants.dirty = FALSE; } + + r300->validate_buffers = TRUE; + r300->upload_vb_validated = FALSE; + r300->upload_ib_validated = FALSE; + } else { + /* Even if hw is not dirty, we should at least reset the CS in case + * the space checking failed for the first draw operation. */ + r300->rws->cs_flush(r300->cs); } /* reset flushed query */ diff --git a/src/gallium/drivers/r300/r300_fs.c b/src/gallium/drivers/r300/r300_fs.c index c91532eb7b3..6d4091dc87d 100644 --- a/src/gallium/drivers/r300/r300_fs.c +++ b/src/gallium/drivers/r300/r300_fs.c @@ -390,12 +390,18 @@ static void r300_translate_fragment_shader( compiler.Base.max_temp_regs = compiler.Base.is_r500 ? 128 : 32; compiler.Base.max_constants = compiler.Base.is_r500 ? 256 : 32; compiler.Base.max_alu_insts = compiler.Base.is_r500 ? 512 : 64; - compiler.Base.remove_unused_constants = TRUE; compiler.AllocateHwInputs = &allocate_hardware_inputs; compiler.UserData = &shader->inputs; find_output_registers(&compiler, shader); + shader->write_all = FALSE; + for (i = 0; i < shader->info.num_properties; i++) { + if (shader->info.properties[i].name == TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS) { + shader->write_all = TRUE; + } + } + if (compiler.Base.Debug & RC_DBG_LOG) { DBG(r300, DBG_FP, "r300: Initial fragment program\n"); tgsi_dump(tokens, 0); @@ -408,6 +414,11 @@ static void r300_translate_fragment_shader( r300_tgsi_to_rc(&ttr, tokens); + if (!r300->screen->caps.is_r500 || + compiler.Base.Program.Constants.Count > 200) { + compiler.Base.remove_unused_constants = TRUE; + } + /** * Transform the program to support WPOS. * diff --git a/src/gallium/drivers/r300/r300_fs.h b/src/gallium/drivers/r300/r300_fs.h index 51bfa88c5ef..c86a90b85ae 100644 --- a/src/gallium/drivers/r300/r300_fs.h +++ b/src/gallium/drivers/r300/r300_fs.h @@ -54,6 +54,9 @@ struct r300_fragment_shader_code { uint32_t *cb_code; struct r300_fragment_shader_code* next; + + boolean write_all; + }; struct r300_fragment_shader { @@ -81,4 +84,10 @@ static INLINE boolean r300_fragment_shader_writes_depth(struct r300_fragment_sha return (fs->shader->code.writes_depth) ? TRUE : FALSE; } +static INLINE boolean r300_fragment_shader_writes_all(struct r300_fragment_shader *fs) +{ + if (!fs) + return FALSE; + return (fs->shader->write_all) ? TRUE : FALSE; +} #endif /* R300_FS_H */ diff --git a/src/gallium/drivers/r300/r300_hyperz.c b/src/gallium/drivers/r300/r300_hyperz.c index 79f7f8abe9b..c22e307c679 100644 --- a/src/gallium/drivers/r300/r300_hyperz.c +++ b/src/gallium/drivers/r300/r300_hyperz.c @@ -158,8 +158,8 @@ static void r300_update_hyperz(struct r300_context* r300) if (!r300->rws->get_value(r300->rws, R300_CAN_HYPERZ)) return; - zmask_in_use = zstex->zmask_in_use[fb->zsbuf->level]; - hiz_in_use = zstex->hiz_in_use[fb->zsbuf->level]; + zmask_in_use = zstex->zmask_in_use[fb->zsbuf->u.tex.level]; + hiz_in_use = zstex->hiz_in_use[fb->zsbuf->u.tex.level]; /* Z fastfill. */ if (zmask_in_use) { @@ -282,7 +282,7 @@ static void r300_update_ztop(struct r300_context* r300) ztop_state->z_buffer_top = R300_ZTOP_ENABLE; } if (ztop_state->z_buffer_top != old_ztop) - r300->ztop_state.dirty = TRUE; + r300_mark_atom_dirty(r300, &r300->ztop_state); } #define ALIGN_DIVUP(x, y) (((x) + (y) - 1) / (y)) @@ -333,7 +333,7 @@ void r300_hiz_alloc_block(struct r300_context *r300, struct r300_surface *surf) { struct r300_texture *tex; uint32_t zsize, ndw; - int level = surf->base.level; + int level = surf->base.u.tex.level; tex = r300_texture(surf->base.texture); @@ -352,7 +352,7 @@ void r300_zmask_alloc_block(struct r300_context *r300, struct r300_surface *surf { int bsize = 256; uint32_t zsize, ndw; - int level = surf->base.level; + int level = surf->base.u.tex.level; struct r300_texture *tex; tex = r300_texture(surf->base.texture); diff --git a/src/gallium/drivers/r300/r300_query.c b/src/gallium/drivers/r300/r300_query.c index 5f34fcb2744..6223e043210 100644 --- a/src/gallium/drivers/r300/r300_query.c +++ b/src/gallium/drivers/r300/r300_query.c @@ -60,6 +60,7 @@ static struct pipe_query *r300_create_query(struct pipe_context *pipe, q->buffer = r300->rws->buffer_create(r300->rws, q->buffer_size, 4096, PIPE_BIND_CUSTOM, PIPE_USAGE_STREAM, q->domain); + q->cs_buffer = r300->rws->buffer_get_cs_handle(r300->rws, q->buffer); return (struct pipe_query*)q; } @@ -79,7 +80,7 @@ void r300_resume_query(struct r300_context *r300, struct r300_query *query) { r300->query_current = query; - r300->query_start.dirty = TRUE; + r300_mark_atom_dirty(r300, &r300->query_start); } static void r300_begin_query(struct pipe_context* pipe, diff --git a/src/gallium/drivers/r300/r300_reg.h b/src/gallium/drivers/r300/r300_reg.h index 6bea783f697..d1154dee40a 100644 --- a/src/gallium/drivers/r300/r300_reg.h +++ b/src/gallium/drivers/r300/r300_reg.h @@ -427,7 +427,7 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. # define R300_PVS_CONST_START 512 # define R500_PVS_CONST_START 1024 # define R300_MAX_PVS_CONST_VECS 256 -# define R500_MAX_PVS_CONST_VECS 1024 +# define R500_MAX_PVS_CONST_VECS 256 # define R300_PVS_UCP_START 1024 # define R500_PVS_UCP_START 1536 # define R300_POINT_VPORT_SCALE_OFFSET 1030 @@ -553,6 +553,7 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. /* Addresses are relative to the vertex program parameters area. */ #define R300_VAP_PVS_CONST_CNTL 0x22D4 # define R300_PVS_CONST_BASE_OFFSET_SHIFT 0 +# define R300_PVS_CONST_BASE_OFFSET(x) (x) # define R300_PVS_MAX_CONST_ADDR_SHIFT 16 # define R300_PVS_MAX_CONST_ADDR(x) ((x) << 16) #define R300_VAP_PVS_CODE_CNTL_1 0x22D8 @@ -1520,11 +1521,11 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. # define R300_TX_TRI_PERF_3_8 (3<<15) # define R300_ANISO_THRESHOLD_MASK (7<<17) +# define R400_DXTC_SWIZZLE_ENABLE (1<<21) # define R500_MACRO_SWITCH (1<<22) # define R500_TX_MAX_ANISO(x) ((x) << 23) # define R500_TX_MAX_ANISO_MASK (63 << 23) # define R500_TX_ANISO_HIGH_QUALITY (1 << 30) - # define R500_BORDER_FIX (1<<31) #define R300_TX_FORMAT0_0 0x4480 @@ -2630,8 +2631,8 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. #define R300_ZB_BW_CNTL 0x4f1c # define R300_HIZ_DISABLE (0 << 0) # define R300_HIZ_ENABLE (1 << 0) -# define R300_HIZ_MIN (0 << 1) -# define R300_HIZ_MAX (1 << 1) +# define R300_HIZ_MAX (0 << 1) +# define R300_HIZ_MIN (1 << 1) # define R300_FAST_FILL_DISABLE (0 << 2) # define R300_FAST_FILL_ENABLE (1 << 2) # define R300_RD_COMP_DISABLE (0 << 3) diff --git a/src/gallium/drivers/r300/r300_render.c b/src/gallium/drivers/r300/r300_render.c index 60700cf3037..b35822c82f8 100644 --- a/src/gallium/drivers/r300/r300_render.c +++ b/src/gallium/drivers/r300/r300_render.c @@ -39,7 +39,6 @@ #include "r300_screen_buffer.h" #include "r300_emit.h" #include "r300_reg.h" -#include "r300_state_derived.h" #include <limits.h> @@ -118,12 +117,6 @@ static uint32_t r300_provoking_vertex_fixes(struct r300_context *r300, return color_control; } -boolean r500_index_bias_supported(struct r300_context *r300) -{ - return r300->screen->caps.is_r500 && - r300->rws->get_value(r300->rws, R300_VID_DRM_2_3_0); -} - void r500_emit_index_bias(struct r300_context *r300, int index_bias) { CS_LOCALS(r300); @@ -136,7 +129,7 @@ void r500_emit_index_bias(struct r300_context *r300, int index_bias) /* This function splits the index bias value into two parts: * - buffer_offset: the value that can be safely added to buffer offsets - * in r300_emit_aos (it must yield a positive offset when added to + * in r300_emit_vertex_arrays (it must yield a positive offset when added to * a vertex buffer offset) * - index_offset: the value that must be manually subtracted from indices * in an index buffer to achieve negative offsets. */ @@ -172,8 +165,8 @@ static void r300_split_index_bias(struct r300_context *r300, int index_bias, enum r300_prepare_flags { PREP_FIRST_DRAW = (1 << 0), /* call emit_dirty_state and friends? */ PREP_VALIDATE_VBOS = (1 << 1), /* validate VBOs? */ - PREP_EMIT_AOS = (1 << 2), /* call emit_aos? */ - PREP_EMIT_AOS_SWTCL = (1 << 3), /* call emit_aos_swtcl? */ + PREP_EMIT_AOS = (1 << 2), /* call emit_vertex_arrays? */ + PREP_EMIT_AOS_SWTCL = (1 << 3), /* call emit_vertex_arrays_swtcl? */ PREP_INDEXED = (1 << 4) /* is this draw_elements? */ }; @@ -191,28 +184,27 @@ static boolean r300_reserve_cs_dwords(struct r300_context *r300, { boolean flushed = FALSE; boolean first_draw = flags & PREP_FIRST_DRAW; - boolean emit_aos = flags & PREP_EMIT_AOS; - boolean emit_aos_swtcl = flags & PREP_EMIT_AOS_SWTCL; - boolean hw_index_bias = r500_index_bias_supported(r300); + boolean emit_vertex_arrays = flags & PREP_EMIT_AOS; + boolean emit_vertex_arrays_swtcl = flags & PREP_EMIT_AOS_SWTCL; /* Add dirty state, index offset, and AOS. */ if (first_draw) { cs_dwords += r300_get_num_dirty_dwords(r300); - if (hw_index_bias) + if (r300->screen->caps.index_bias_supported) cs_dwords += 2; /* emit_index_offset */ - if (emit_aos) - cs_dwords += 55; /* emit_aos */ + if (emit_vertex_arrays) + cs_dwords += 55; /* emit_vertex_arrays */ - if (emit_aos_swtcl) - cs_dwords += 7; /* emit_aos_swtcl */ + if (emit_vertex_arrays_swtcl) + cs_dwords += 7; /* emit_vertex_arrays_swtcl */ } cs_dwords += r300_get_num_cs_end_dwords(r300); /* Reserve requested CS space. */ - if (cs_dwords > (r300->cs->ndw - r300->cs->cdw)) { + if (cs_dwords > (R300_MAX_CMDBUF_DWORDS - r300->cs->cdw)) { r300->context.flush(&r300->context, 0, NULL); flushed = TRUE; } @@ -225,44 +217,57 @@ static boolean r300_reserve_cs_dwords(struct r300_context *r300, * \param r300 The context. * \param flags See r300_prepare_flags. * \param index_buffer The index buffer to validate. The parameter may be NULL. - * \param aos_offset The offset passed to emit_aos. + * \param buffer_offset The offset passed to emit_vertex_arrays. * \param index_bias The index bias to emit. * \return TRUE if rendering should be skipped */ static boolean r300_emit_states(struct r300_context *r300, enum r300_prepare_flags flags, struct pipe_resource *index_buffer, - int aos_offset, + int buffer_offset, int index_bias) { boolean first_draw = flags & PREP_FIRST_DRAW; - boolean emit_aos = flags & PREP_EMIT_AOS; - boolean emit_aos_swtcl = flags & PREP_EMIT_AOS_SWTCL; + boolean emit_vertex_arrays = flags & PREP_EMIT_AOS; + boolean emit_vertex_arrays_swtcl = flags & PREP_EMIT_AOS_SWTCL; boolean indexed = flags & PREP_INDEXED; - boolean hw_index_bias = r500_index_bias_supported(r300); + boolean validate_vbos = flags & PREP_VALIDATE_VBOS; /* Validate buffers and emit dirty state if needed. */ if (first_draw) { - if (!r300_emit_buffer_validate(r300, flags & PREP_VALIDATE_VBOS, - index_buffer)) { - fprintf(stderr, "r300: CS space validation failed. " - "(not enough memory?) Skipping rendering.\n"); - return FALSE; + if (r300->validate_buffers) { + if (!r300_emit_buffer_validate(r300, validate_vbos, + index_buffer)) { + fprintf(stderr, "r300: CS space validation failed. " + "(not enough memory?) Skipping rendering.\n"); + return FALSE; + } + + /* Consider the validation done only if everything was validated. */ + if (validate_vbos) { + r300->validate_buffers = FALSE; + if (r300->any_user_vbs) + r300->upload_vb_validated = TRUE; + if (r300->index_buffer.buffer && + r300_is_user_buffer(r300->index_buffer.buffer)) { + r300->upload_ib_validated = TRUE; + } + } } r300_emit_dirty_state(r300); - if (hw_index_bias) { + if (r300->screen->caps.index_bias_supported) { if (r300->screen->caps.has_tcl) r500_emit_index_bias(r300, index_bias); else r500_emit_index_bias(r300, 0); } - if (emit_aos) - r300_emit_aos(r300, aos_offset, indexed); + if (emit_vertex_arrays) + r300_emit_vertex_arrays(r300, buffer_offset, indexed); - if (emit_aos_swtcl) - r300_emit_aos_swtcl(r300, indexed); + if (emit_vertex_arrays_swtcl) + r300_emit_vertex_arrays_swtcl(r300, indexed); } return TRUE; @@ -275,7 +280,7 @@ static boolean r300_emit_states(struct r300_context *r300, * \param flags See r300_prepare_flags. * \param index_buffer The index buffer to validate. The parameter may be NULL. * \param cs_dwords The number of dwords to reserve in CS. - * \param aos_offset The offset passed to emit_aos. + * \param buffer_offset The offset passed to emit_vertex_arrays. * \param index_bias The index bias to emit. * \return TRUE if rendering should be skipped */ @@ -283,20 +288,20 @@ static boolean r300_prepare_for_rendering(struct r300_context *r300, enum r300_prepare_flags flags, struct pipe_resource *index_buffer, unsigned cs_dwords, - int aos_offset, + int buffer_offset, int index_bias) { if (r300_reserve_cs_dwords(r300, flags, cs_dwords)) flags |= PREP_FIRST_DRAW; - return r300_emit_states(r300, flags, index_buffer, aos_offset, index_bias); + return r300_emit_states(r300, flags, index_buffer, buffer_offset, index_bias); } static boolean immd_is_good_idea(struct r300_context *r300, unsigned count) { struct pipe_vertex_element* velem; - struct pipe_vertex_buffer* vbuf; + struct pipe_resource *buf; boolean checked[PIPE_MAX_ATTRIBS] = {0}; unsigned vertex_element_count = r300->velems->count; unsigned i, vbi; @@ -320,14 +325,13 @@ static boolean immd_is_good_idea(struct r300_context *r300, vbi = velem->vertex_buffer_index; if (!checked[vbi]) { - vbuf = &r300->vertex_buffer[vbi]; + buf = r300->valid_vertex_buffer[vbi]; - if (!(r300_buffer(vbuf->buffer)->domain & R300_DOMAIN_GTT)) { + if (!(r300_buffer(buf)->domain & R300_DOMAIN_GTT)) { return FALSE; } - if (r300_buffer_is_referenced(&r300->context, - vbuf->buffer, + if (r300_buffer_is_referenced(&r300->context, buf, R300_REF_CS | R300_REF_HW)) { /* It's a very bad idea to map it... */ return FALSE; @@ -386,7 +390,7 @@ static void r300_emit_draw_arrays_immediate(struct r300_context *r300, /* Map the buffer. */ if (!transfer[vbi]) { map[vbi] = (uint32_t*)pipe_buffer_map(&r300->context, - vbuf->buffer, + r300->valid_vertex_buffer[vbi], PIPE_TRANSFER_READ, &transfer[vbi]); map[vbi] += (vbuf->buffer_offset / 4) + stride[i] * start; @@ -418,8 +422,7 @@ static void r300_emit_draw_arrays_immediate(struct r300_context *r300, vbi = r300->velems->velem[i].vertex_buffer_index; if (transfer[vbi]) { - vbuf = &r300->vertex_buffer[vbi]; - pipe_buffer_unmap(&r300->context, vbuf->buffer, transfer[vbi]); + pipe_buffer_unmap(&r300->context, transfer[vbi]); transfer[vbi] = NULL; } } @@ -461,10 +464,10 @@ static void r300_emit_draw_elements(struct r300_context *r300, unsigned maxIndex, unsigned mode, unsigned start, - unsigned count) + unsigned count, + uint16_t *imm_indices3) { - uint32_t count_dwords; - uint32_t offset_dwords = indexSize * start / sizeof(uint32_t); + uint32_t count_dwords, offset_dwords; boolean alt_num_verts = count > 65535; CS_LOCALS(r300); @@ -474,20 +477,42 @@ static void r300_emit_draw_elements(struct r300_context *r300, return; } - maxIndex = MIN2(maxIndex, r300->vertex_buffer_max_index); - DBG(r300, DBG_DRAW, "r300: Indexbuf of %u indices, min %u max %u\n", count, minIndex, maxIndex); - BEGIN_CS(13 + (alt_num_verts ? 2 : 0)); - if (alt_num_verts) { - OUT_CS_REG(R500_VAP_ALT_NUM_VERTICES, count); - } + BEGIN_CS(5); OUT_CS_REG(R300_GA_COLOR_CONTROL, r300_provoking_vertex_fixes(r300, mode)); OUT_CS_REG_SEQ(R300_VAP_VF_MAX_VTX_INDX, 2); OUT_CS(maxIndex); OUT_CS(minIndex); + END_CS; + + /* If start is odd, render the first triangle with indices embedded + * in the command stream. This will increase start by 3 and make it + * even. We can then proceed without a fallback. */ + if (indexSize == 2 && (start & 1) && + mode == PIPE_PRIM_TRIANGLES) { + BEGIN_CS(4); + OUT_CS_PKT3(R300_PACKET3_3D_DRAW_INDX_2, 2); + OUT_CS(R300_VAP_VF_CNTL__PRIM_WALK_INDICES | (3 << 16) | + R300_VAP_VF_CNTL__PRIM_TRIANGLES); + OUT_CS(imm_indices3[1] << 16 | imm_indices3[0]); + OUT_CS(imm_indices3[2]); + END_CS; + + start += 3; + count -= 3; + if (!count) + return; + } + + offset_dwords = indexSize * start / sizeof(uint32_t); + + BEGIN_CS(8 + (alt_num_verts ? 2 : 0)); + if (alt_num_verts) { + OUT_CS_REG(R500_VAP_ALT_NUM_VERTICES, count); + } OUT_CS_PKT3(R300_PACKET3_3D_DRAW_INDX_2, 0); if (indexSize == 4) { count_dwords = count; @@ -511,16 +536,13 @@ static void r300_emit_draw_elements(struct r300_context *r300, OUT_CS(R300_INDX_BUFFER_ONE_REG_WR | (R300_VAP_PORT_IDX0 >> 2) | (0 << R300_INDX_BUFFER_SKIP_SHIFT)); OUT_CS(offset_dwords << 2); - OUT_CS_BUF_RELOC(indexBuffer, count_dwords, - r300_buffer(indexBuffer)->domain, 0); + OUT_CS_BUF_RELOC(indexBuffer, count_dwords); END_CS; } /* This is the fast-path drawing & emission for HW TCL. */ static void r300_draw_range_elements(struct pipe_context* pipe, - struct pipe_resource* indexBuffer, - unsigned indexSize, int indexBias, unsigned minIndex, unsigned maxIndex, @@ -529,41 +551,66 @@ static void r300_draw_range_elements(struct pipe_context* pipe, unsigned count) { struct r300_context* r300 = r300_context(pipe); + struct pipe_resource *indexBuffer = r300->index_buffer.buffer; + unsigned indexSize = r300->index_buffer.index_size; struct pipe_resource* orgIndexBuffer = indexBuffer; boolean alt_num_verts = r300->screen->caps.is_r500 && count > 65536 && r300->rws->get_value(r300->rws, R300_VID_DRM_2_3_0); unsigned short_count; int buffer_offset = 0, index_offset = 0; /* for index bias emulation */ - unsigned new_offset; + uint16_t indices3[3]; - if (indexBias && !r500_index_bias_supported(r300)) { + if (indexBias && !r300->screen->caps.index_bias_supported) { r300_split_index_bias(r300, indexBias, &buffer_offset, &index_offset); } r300_translate_index_buffer(r300, &indexBuffer, &indexSize, index_offset, &start, count); - r300_update_derived_state(r300); - r300_upload_index_buffer(r300, &indexBuffer, indexSize, start, count, &new_offset); + /* Fallback for misaligned ushort indices. */ + if (indexSize == 2 && (start & 1) && + !r300_is_user_buffer(indexBuffer)) { + struct pipe_transfer *transfer; + struct pipe_resource *userbuf; - start = new_offset; + uint16_t *ptr = pipe_buffer_map(pipe, indexBuffer, + PIPE_TRANSFER_READ, &transfer); - /* 15 dwords for emit_draw_elements. Give up if the function fails. */ + if (mode == PIPE_PRIM_TRIANGLES) { + memcpy(indices3, ptr + start, 6); + } else { + /* Copy the mapped index buffer directly to the upload buffer. + * The start index will be aligned simply from the fact that + * every sub-buffer in u_upload_mgr is aligned. */ + userbuf = pipe->screen->user_buffer_create(pipe->screen, + ptr, 0, + PIPE_BIND_INDEX_BUFFER); + indexBuffer = userbuf; + r300_upload_index_buffer(r300, &indexBuffer, indexSize, &start, count); + pipe_resource_reference(&userbuf, NULL); + } + pipe_buffer_unmap(pipe, transfer); + } else { + if (r300_is_user_buffer(indexBuffer)) + r300_upload_index_buffer(r300, &indexBuffer, indexSize, &start, count); + } + + /* 19 dwords for emit_draw_elements. Give up if the function fails. */ if (!r300_prepare_for_rendering(r300, PREP_FIRST_DRAW | PREP_VALIDATE_VBOS | PREP_EMIT_AOS | - PREP_INDEXED, indexBuffer, 15, buffer_offset, indexBias)) + PREP_INDEXED, indexBuffer, 19, buffer_offset, indexBias)) goto done; if (alt_num_verts || count <= 65535) { r300_emit_draw_elements(r300, indexBuffer, indexSize, - minIndex, maxIndex, mode, start, count); + minIndex, maxIndex, mode, start, count, indices3); } else { do { short_count = MIN2(count, 65534); r300_emit_draw_elements(r300, indexBuffer, indexSize, minIndex, maxIndex, - mode, start, short_count); + mode, start, short_count, indices3); start += short_count; count -= short_count; @@ -572,7 +619,7 @@ static void r300_draw_range_elements(struct pipe_context* pipe, if (count) { if (!r300_prepare_for_rendering(r300, PREP_VALIDATE_VBOS | PREP_EMIT_AOS | PREP_INDEXED, - indexBuffer, 15, buffer_offset, indexBias)) + indexBuffer, 19, buffer_offset, indexBias)) goto done; } } while (count); @@ -593,8 +640,6 @@ static void r300_draw_arrays(struct pipe_context* pipe, unsigned mode, r300->rws->get_value(r300->rws, R300_VID_DRM_2_3_0); unsigned short_count; - r300_update_derived_state(r300); - if (immd_is_good_idea(r300, count)) { r300_emit_draw_arrays_immediate(r300, mode, start, count); } else { @@ -633,7 +678,8 @@ static void r300_draw_vbo(struct pipe_context* pipe, unsigned count = info->count; boolean translate = FALSE; boolean indexed = info->indexed && r300->index_buffer.buffer; - unsigned start_indexed = 0; + unsigned min_index = 0; + unsigned max_index = r300->vertex_buffer_max_index; if (r300->skip_rendering) { return; @@ -643,43 +689,65 @@ static void r300_draw_vbo(struct pipe_context* pipe, return; } - /* Index buffer range checking. */ if (indexed) { - assert(r300->index_buffer.offset % r300->index_buffer.index_size == 0); - - /* Compute start for draw_elements, taking the offset into account. */ - start_indexed = + int real_min_index, real_max_index; + /* Compute the start for draw_elements, taking the offset into account. */ + unsigned start_indexed = info->start + (r300->index_buffer.offset / r300->index_buffer.index_size); + assert(r300->index_buffer.offset % r300->index_buffer.index_size == 0); + + /* Index buffer range checking. */ if ((start_indexed + count) * r300->index_buffer.index_size > r300->index_buffer.buffer->width0) { fprintf(stderr, "r300: Invalid index buffer range. Skipping rendering.\n"); return; } - } - /* Set up fallback for incompatible vertex layout if needed. */ - if (r300->incompatible_vb_layout || r300->velems->incompatible_layout) { - r300_begin_vertex_translate(r300); - translate = TRUE; - } + min_index = MAX2(min_index, info->min_index); + max_index = MIN2(max_index, info->max_index); + real_min_index = (int)min_index - info->index_bias; + real_max_index = (int)max_index - info->index_bias; - if (indexed) { - r300_draw_range_elements(pipe, - r300->index_buffer.buffer, - r300->index_buffer.index_size, - info->index_bias, - info->min_index, - info->max_index, - info->mode, - start_indexed, - count); + if (max_index >= (1 << 24) - 1) { + fprintf(stderr, "r300: Invalid max_index: %i. Skipping rendering...\n", max_index); + return; + } + + r300_update_derived_state(r300); + + /* Set up the fallback for an incompatible vertex layout if needed. */ + if (r300->incompatible_vb_layout || r300->velems->incompatible_layout) { + r300_begin_vertex_translate(r300, real_min_index, real_max_index); + translate = TRUE; + } + + /* Upload vertex buffers. */ + if (r300->any_user_vbs) { + r300_upload_user_buffers(r300, real_min_index, real_max_index); + } + + r300_draw_range_elements(pipe, info->index_bias, min_index, max_index, + info->mode, start_indexed, count); } else { - r300_draw_arrays(pipe, - info->mode, - info->start, - count); + min_index = MAX2(min_index, info->start); + max_index = MIN2(max_index, info->start + count - 1); + + r300_update_derived_state(r300); + + /* Set up the fallback for an incompatible vertex layout if needed. */ + if (r300->incompatible_vb_layout || r300->velems->incompatible_layout) { + r300_begin_vertex_translate(r300, min_index, max_index); + translate = TRUE; + } + + /* Upload vertex buffers. */ + if (r300->any_user_vbs) { + r300_upload_user_buffers(r300, min_index, max_index); + } + + r300_draw_arrays(pipe, info->mode, info->start, count); } if (translate) { @@ -744,14 +812,13 @@ static void r300_swtcl_draw_vbo(struct pipe_context* pipe, for (i = 0; i < r300->vertex_buffer_count; i++) { if (r300->vertex_buffer[i].buffer) { - pipe_buffer_unmap(pipe, r300->vertex_buffer[i].buffer, - vb_transfer[i]); + pipe_buffer_unmap(pipe, vb_transfer[i]); draw_set_mapped_vertex_buffer(r300->draw, i, NULL); } } if (indexed) { - pipe_buffer_unmap(pipe, r300->index_buffer.buffer, ib_transfer); + pipe_buffer_unmap(pipe, ib_transfer); draw_set_mapped_index_buffer(r300->draw, NULL); } } @@ -810,6 +877,7 @@ static boolean r300_render_allocate_vertices(struct vbuf_render* render, R300_MAX_DRAW_VBO_SIZE); r300->draw_vbo_offset = 0; r300->draw_vbo_size = R300_MAX_DRAW_VBO_SIZE; + r300->validate_buffers = TRUE; } r300render->vertex_size = vertex_size; @@ -850,7 +918,7 @@ static void r300_render_unmap_vertices(struct vbuf_render* render, r300render->vbo_max_used = MAX2(r300render->vbo_max_used, r300render->vertex_size * (max + 1)); - pipe_buffer_unmap(context, r300->vbo, r300render->vbo_transfer); + pipe_buffer_unmap(context, r300render->vbo_transfer); r300render->vbo_transfer = NULL; } @@ -967,7 +1035,7 @@ static void r300_render_draw_elements(struct vbuf_render* render, end_cs_dwords = r300_get_num_cs_end_dwords(r300); while (count) { - free_dwords = r300->cs->ndw - r300->cs->cdw; + free_dwords = R300_MAX_CMDBUF_DWORDS - r300->cs->cdw; short_count = MIN2(count, (free_dwords - end_cs_dwords - 6) * 2); @@ -1015,8 +1083,7 @@ static struct vbuf_render* r300_render_create(struct r300_context* r300) r300render->r300 = r300; - /* XXX find real numbers plz */ - r300render->base.max_vertex_buffer_bytes = 128 * 1024; + r300render->base.max_vertex_buffer_bytes = 1024 * 1024; r300render->base.max_indices = 16 * 1024; r300render->base.get_vertex_info = r300_render_get_vertex_info; @@ -1088,6 +1155,8 @@ static void r300_blitter_draw_rectangle(struct blitter_context *blitter, const float zeros[4] = {0, 0, 0, 0}; CS_LOCALS(r300); + r300->context.set_vertex_buffers(&r300->context, 0, NULL); + if (type == UTIL_BLITTER_ATTRIB_TEXCOORD) r300->sprite_coord_enable = 1; @@ -1144,37 +1213,45 @@ static void r300_blitter_draw_rectangle(struct blitter_context *blitter, done: /* Restore the state. */ - r300->clip_state.dirty = TRUE; - r300->rs_state.dirty = TRUE; - r300->viewport_state.dirty = TRUE; + r300_mark_atom_dirty(r300, &r300->clip_state); + r300_mark_atom_dirty(r300, &r300->rs_state); + r300_mark_atom_dirty(r300, &r300->viewport_state); r300->sprite_coord_enable = last_sprite_coord_enable; } static void r300_resource_resolve(struct pipe_context* pipe, struct pipe_resource* dest, - struct pipe_subresource subdest, + unsigned dst_layer, struct pipe_resource* src, - struct pipe_subresource subsrc) + unsigned src_layer) { struct r300_context* r300 = r300_context(pipe); + struct pipe_surface* srcsurf, surf_tmpl; struct r300_aa_state *aa = (struct r300_aa_state*)r300->aa_state.state; - struct pipe_surface* srcsurf = src->screen->get_tex_surface(src->screen, - src, subsrc.face, subsrc.level, 0, 0); float color[] = {0, 0, 0, 0}; + memset(&surf_tmpl, 0, sizeof(surf_tmpl)); + surf_tmpl.format = src->format; + surf_tmpl.usage = 0; /* not really a surface hence no bind flags */ + surf_tmpl.u.tex.level = 0; /* msaa resources cannot have mipmaps */ + surf_tmpl.u.tex.first_layer = src_layer; + surf_tmpl.u.tex.last_layer = src_layer; + srcsurf = pipe->create_surface(pipe, src, &surf_tmpl); + surf_tmpl.format = dest->format; + surf_tmpl.u.tex.first_layer = dst_layer; + surf_tmpl.u.tex.last_layer = dst_layer; + DBG(r300, DBG_DRAW, "r300: Resolving resource...\n"); /* Enable AA resolve. */ - aa->dest = r300_surface( - dest->screen->get_tex_surface(dest->screen, dest, subdest.face, - subdest.level, 0, 0)); + aa->dest = r300_surface(pipe->create_surface(pipe, dest, &surf_tmpl)); aa->aaresolve_ctl = R300_RB3D_AARESOLVE_CTL_AARESOLVE_MODE_RESOLVE | R300_RB3D_AARESOLVE_CTL_AARESOLVE_ALPHA_AVERAGE; r300->aa_state.size = 12; - r300->aa_state.dirty = TRUE; + r300_mark_atom_dirty(r300, &r300->aa_state); /* Resolve the surface. */ r300->context.clear_render_target(pipe, @@ -1183,7 +1260,7 @@ static void r300_resource_resolve(struct pipe_context* pipe, /* Disable AA resolve. */ aa->aaresolve_ctl = 0; r300->aa_state.size = 4; - r300->aa_state.dirty = TRUE; + r300_mark_atom_dirty(r300, &r300->aa_state); pipe_surface_reference((struct pipe_surface**)&srcsurf, NULL); pipe_surface_reference((struct pipe_surface**)&aa->dest, NULL); diff --git a/src/gallium/drivers/r300/r300_render_stencilref.c b/src/gallium/drivers/r300/r300_render_stencilref.c index 1f035d64a28..747594afaf2 100644 --- a/src/gallium/drivers/r300/r300_render_stencilref.c +++ b/src/gallium/drivers/r300/r300_render_stencilref.c @@ -66,7 +66,7 @@ static void r300_stencilref_begin(struct r300_context *r300) /* We *cull* pixels, therefore no need to mask out the bits. */ rs->cb_main[rs->cull_mode_index] |= R300_CULL_BACK; - r300->rs_state.dirty = TRUE; + r300_mark_atom_dirty(r300, &r300->rs_state); } /* Set drawing for back faces. */ @@ -80,8 +80,8 @@ static void r300_stencilref_switch_side(struct r300_context *r300) dsa->stencil_ref_mask = dsa->stencil_ref_bf; r300->stencil_ref.ref_value[0] = r300->stencil_ref.ref_value[1]; - r300->rs_state.dirty = TRUE; - r300->dsa_state.dirty = TRUE; + r300_mark_atom_dirty(r300, &r300->rs_state); + r300_mark_atom_dirty(r300, &r300->dsa_state); } /* Restore the original state. */ @@ -96,8 +96,8 @@ static void r300_stencilref_end(struct r300_context *r300) dsa->stencil_ref_mask = sr->zb_stencilrefmask; r300->stencil_ref.ref_value[0] = sr->ref_value_front; - r300->rs_state.dirty = TRUE; - r300->dsa_state.dirty = TRUE; + r300_mark_atom_dirty(r300, &r300->rs_state); + r300_mark_atom_dirty(r300, &r300->dsa_state); } static void r300_stencilref_draw_vbo(struct pipe_context *pipe, diff --git a/src/gallium/drivers/r300/r300_render_translate.c b/src/gallium/drivers/r300/r300_render_translate.c index 9247064508f..c48062c8084 100644 --- a/src/gallium/drivers/r300/r300_render_translate.c +++ b/src/gallium/drivers/r300/r300_render_translate.c @@ -31,7 +31,10 @@ #include "translate/translate.h" #include "util/u_index_modify.h" -void r300_begin_vertex_translate(struct r300_context *r300) +/* XXX Optimization: use min_index and translate only that range. */ +/* XXX Use the uploader. */ +void r300_begin_vertex_translate(struct r300_context *r300, + int min_index, int max_index) { struct pipe_context *pipe = &r300->context; struct translate_key key = {0}; @@ -44,6 +47,7 @@ void r300_begin_vertex_translate(struct r300_context *r300) struct pipe_transfer *vb_transfer[PIPE_MAX_ATTRIBS] = {0}, *out_transfer; struct pipe_resource *out_buffer; unsigned i, num_verts; + unsigned slot; /* Initialize the translate key, i.e. the recipe how vertices should be * translated. */ @@ -108,12 +112,12 @@ void r300_begin_vertex_translate(struct r300_context *r300) vb_map[i] = pipe_buffer_map(pipe, vb->buffer, PIPE_TRANSFER_READ, &vb_transfer[i]); - tr->set_buffer(tr, i, vb_map[i], vb->stride, vb->max_index); + tr->set_buffer(tr, i, vb_map[i], vb->stride, max_index); } } /* Create and map the output buffer. */ - num_verts = r300->vertex_buffer_max_index + 1; + num_verts = max_index + 1; out_buffer = pipe_buffer_create(&r300->screen->screen, PIPE_BIND_VERTEX_BUFFER, @@ -128,26 +132,30 @@ void r300_begin_vertex_translate(struct r300_context *r300) /* Unmap all buffers. */ for (i = 0; i < r300->vertex_buffer_count; i++) { if (vb_translated[i]) { - pipe_buffer_unmap(pipe, r300->vertex_buffer[i].buffer, - vb_transfer[i]); + pipe_buffer_unmap(pipe, vb_transfer[i]); } } - pipe_buffer_unmap(pipe, out_buffer, out_transfer); + pipe_buffer_unmap(pipe, out_transfer); /* Setup the new vertex buffer in the first free slot. */ + slot = ~0; for (i = 0; i < PIPE_MAX_ATTRIBS; i++) { struct pipe_vertex_buffer *vb = &r300->vertex_buffer[i]; if (!vb->buffer) { - pipe_resource_reference(&vb->buffer, out_buffer); + pipe_resource_reference(&r300->valid_vertex_buffer[i], out_buffer); vb->buffer_offset = 0; - vb->max_index = num_verts - 1; vb->stride = key.output_stride; - r300->tran.vb_slot = i; + slot = i; + /* XXX probably need to preserve the real count for u_blitter_save_*. */ + r300->vertex_buffer_count = MAX2(r300->vertex_buffer_count, i+1); + r300->validate_buffers = TRUE; break; } } + /* XXX This may fail. */ + assert(slot != ~0); /* Save and replace vertex elements. */ { @@ -161,7 +169,7 @@ void r300_begin_vertex_translate(struct r300_context *r300) new_velems[i].instance_divisor = ve->velem[i].instance_divisor; new_velems[i].src_format = te->output_format; new_velems[i].src_offset = te->output_offset; - new_velems[i].vertex_buffer_index = r300->tran.vb_slot; + new_velems[i].vertex_buffer_index = slot; } else { memcpy(&new_velems[i], &ve->velem[i], sizeof(struct pipe_vertex_element)); @@ -183,12 +191,9 @@ void r300_end_vertex_translate(struct r300_context *r300) /* Restore vertex elements. */ pipe->bind_vertex_elements_state(pipe, r300->tran.saved_velems); pipe->delete_vertex_elements_state(pipe, r300->tran.new_velems); - - /* Delete the now-unused VBO. */ - pipe_resource_reference(&r300->vertex_buffer[r300->tran.vb_slot].buffer, - NULL); } +/* XXX Use the uploader. */ void r300_translate_index_buffer(struct r300_context *r300, struct pipe_resource **index_buffer, unsigned *index_size, unsigned index_offset, @@ -199,12 +204,14 @@ void r300_translate_index_buffer(struct r300_context *r300, util_shorten_ubyte_elts(&r300->context, index_buffer, index_offset, *start, count); *index_size = 2; *start = 0; + r300->validate_buffers = TRUE; break; case 2: - if (*start % 2 != 0 || index_offset) { + if (index_offset) { util_rebuild_ushort_elts(&r300->context, index_buffer, index_offset, *start, count); *start = 0; + r300->validate_buffers = TRUE; } break; @@ -212,6 +219,7 @@ void r300_translate_index_buffer(struct r300_context *r300, if (index_offset) { util_rebuild_uint_elts(&r300->context, index_buffer, index_offset, *start, count); *start = 0; + r300->validate_buffers = TRUE; } break; } diff --git a/src/gallium/drivers/r300/r300_resource.c b/src/gallium/drivers/r300/r300_resource.c index f6f33028dc6..dd1df970594 100644 --- a/src/gallium/drivers/r300/r300_resource.c +++ b/src/gallium/drivers/r300/r300_resource.c @@ -58,6 +58,8 @@ void r300_init_resource_functions(struct r300_context *r300) r300->context.transfer_destroy = u_transfer_destroy_vtbl; r300->context.transfer_inline_write = u_transfer_inline_write_vtbl; r300->context.is_resource_referenced = u_is_resource_referenced_vtbl; + r300->context.create_surface = r300_create_surface; + r300->context.surface_destroy = r300_surface_destroy; } void r300_init_screen_resource_functions(struct r300_screen *r300screen) @@ -67,7 +69,4 @@ void r300_init_screen_resource_functions(struct r300_screen *r300screen) r300screen->screen.resource_get_handle = u_resource_get_handle_vtbl; r300screen->screen.resource_destroy = u_resource_destroy_vtbl; r300screen->screen.user_buffer_create = r300_user_buffer_create; - - r300screen->screen.get_tex_surface = r300_get_tex_surface; - r300screen->screen.tex_surface_destroy = r300_tex_surface_destroy; } diff --git a/src/gallium/drivers/r300/r300_screen.c b/src/gallium/drivers/r300/r300_screen.c index 5332866188f..c75aeaa10a7 100644 --- a/src/gallium/drivers/r300/r300_screen.c +++ b/src/gallium/drivers/r300/r300_screen.c @@ -34,6 +34,10 @@ #include "draw/draw_context.h" +#ifdef HAVE_LLVM +#include "gallivm/lp_bld_init.h" +#endif + /* Return the identifier behind whom the brave coders responsible for this * amalgamation of code, sweat, and duct tape, routinely obscure their names. * @@ -116,8 +120,9 @@ static int r300_get_param(struct pipe_screen* pscreen, enum pipe_cap param) case PIPE_CAP_TEXTURE_MIRROR_CLAMP: case PIPE_CAP_TEXTURE_MIRROR_REPEAT: case PIPE_CAP_BLEND_EQUATION_SEPARATE: - case PIPE_CAP_TEXTURE_SWIZZLE: return 1; + case PIPE_CAP_TEXTURE_SWIZZLE: + return util_format_s3tc_enabled ? r300screen->caps.dxtc_swizzle : 1; /* Unsupported features (boolean caps). */ case PIPE_CAP_TIMER_QUERY: @@ -308,7 +313,9 @@ static boolean r300_is_format_supported(struct pipe_screen* screen, unsigned usage, unsigned geom_flags) { + struct r300_winsys_screen *rws = r300_screen(screen)->rws; uint32_t retval = 0; + boolean drm_2_8_0 = rws->get_value(rws, R300_VID_DRM_2_8_0); boolean is_r500 = r300_screen(screen)->caps.is_r500; boolean is_r400 = r300_screen(screen)->caps.is_r400; boolean is_color2101010 = format == PIPE_FORMAT_R10G10B10A2_UNORM || @@ -362,7 +369,7 @@ static boolean r300_is_format_supported(struct pipe_screen* screen, PIPE_BIND_SCANOUT | PIPE_BIND_SHARED)) && /* 2101010 cannot be rendered to on non-r5xx. */ - (is_r500 || !is_color2101010) && + (!is_color2101010 || (is_r500 && drm_2_8_0)) && r300_is_colorbuffer_format_supported(format)) { retval |= usage & (PIPE_BIND_RENDER_TARGET | @@ -399,7 +406,7 @@ static void r300_destroy_screen(struct pipe_screen* pscreen) struct r300_screen* r300screen = r300_screen(pscreen); struct r300_winsys_screen *rws = r300_winsys_screen(pscreen); - util_mempool_destroy(&r300screen->pool_buffers); + util_slab_destroy(&r300screen->pool_buffers); if (rws) rws->destroy(rws); @@ -456,9 +463,13 @@ struct pipe_screen* r300_screen_create(struct r300_winsys_screen *rws) r300_init_debug(r300screen); r300_parse_chipset(&r300screen->caps); - util_mempool_create(&r300screen->pool_buffers, - sizeof(struct r300_buffer), 64, - UTIL_MEMPOOL_SINGLETHREADED); + r300screen->caps.index_bias_supported = + r300screen->caps.is_r500 && + rws->get_value(rws, R300_VID_DRM_2_3_0); + + util_slab_create(&r300screen->pool_buffers, + sizeof(struct r300_buffer), 64, + UTIL_SLAB_SINGLETHREADED); r300screen->rws = rws; r300screen->screen.winsys = (struct pipe_winsys*)rws; @@ -479,5 +490,9 @@ struct pipe_screen* r300_screen_create(struct r300_winsys_screen *rws) util_format_s3tc_init(); +#ifdef HAVE_LLVM + lp_build_init(); +#endif + return &r300screen->screen; } diff --git a/src/gallium/drivers/r300/r300_screen.h b/src/gallium/drivers/r300/r300_screen.h index 8b7f1fab61b..752f53b7579 100644 --- a/src/gallium/drivers/r300/r300_screen.h +++ b/src/gallium/drivers/r300/r300_screen.h @@ -28,7 +28,7 @@ #include "r300_chipset.h" -#include "util/u_mempool.h" +#include "util/u_slab.h" #include <stdio.h> @@ -44,7 +44,7 @@ struct r300_screen { struct r300_capabilities caps; /* Memory pools. */ - struct util_mempool pool_buffers; + struct util_slab_mempool pool_buffers; /** Combination of DBG_xxx flags */ unsigned debug; @@ -93,6 +93,7 @@ r300_winsys_screen(struct pipe_screen *screen) { #define DBG_CBZB (1 << 11) #define DBG_HYPERZ (1 << 12) #define DBG_SCISSOR (1 << 13) +#define DBG_UPLOAD (1 << 14) /* Features. */ #define DBG_ANISOHQ (1 << 16) #define DBG_NO_TILING (1 << 17) @@ -101,7 +102,6 @@ r300_winsys_screen(struct pipe_screen *screen) { #define DBG_NO_OPT (1 << 20) #define DBG_NO_CBZB (1 << 21) /* Statistics. */ -#define DBG_STATS (1 << 24) #define DBG_P_STAT (1 << 25) /*@}*/ diff --git a/src/gallium/drivers/r300/r300_screen_buffer.c b/src/gallium/drivers/r300/r300_screen_buffer.c index 37a080ba48b..cc3c1d7687e 100644 --- a/src/gallium/drivers/r300/r300_screen_buffer.c +++ b/src/gallium/drivers/r300/r300_screen_buffer.c @@ -40,10 +40,10 @@ unsigned r300_buffer_is_referenced(struct pipe_context *context, struct r300_context *r300 = r300_context(context); struct r300_buffer *rbuf = r300_buffer(buf); - if (r300_buffer_is_user_buffer(buf)) + if (r300_is_user_buffer(buf)) return PIPE_UNREFERENCED; - if (r300->rws->cs_is_buffer_referenced(r300->cs, rbuf->buf, domain)) + if (r300->rws->cs_is_buffer_referenced(r300->cs, rbuf->cs_buf, domain)) return PIPE_REFERENCED_FOR_READ | PIPE_REFERENCED_FOR_WRITE; return PIPE_UNREFERENCED; @@ -51,76 +51,86 @@ unsigned r300_buffer_is_referenced(struct pipe_context *context, static unsigned r300_buffer_is_referenced_by_cs(struct pipe_context *context, struct pipe_resource *buf, - unsigned face, unsigned level) + unsigned level, int layer) { return r300_buffer_is_referenced(context, buf, R300_REF_CS); } -/* External helper, not required to implent u_resource_vtbl: - */ -int r300_upload_index_buffer(struct r300_context *r300, - struct pipe_resource **index_buffer, - unsigned index_size, - unsigned start, - unsigned count, - unsigned *out_offset) +void r300_upload_index_buffer(struct r300_context *r300, + struct pipe_resource **index_buffer, + unsigned index_size, unsigned *start, + unsigned count) { - struct pipe_resource *upload_buffer = NULL; - unsigned index_offset = start * index_size; - int ret = 0; - - if (r300_buffer_is_user_buffer(*index_buffer)) { - ret = u_upload_buffer(r300->upload_ib, - index_offset, - count * index_size, - *index_buffer, - &index_offset, - &upload_buffer); - if (ret) { - goto done; - } - *index_buffer = upload_buffer; - *out_offset = index_offset / index_size; - } else - *out_offset = start; - - done: - // if (upload_buffer) - // pipe_resource_reference(&upload_buffer, NULL); - return ret; + unsigned index_offset; + uint8_t *ptr = r300_buffer(*index_buffer)->user_buffer; + boolean flushed; + + *index_buffer = NULL; + + u_upload_data(r300->upload_ib, + 0, count * index_size, + ptr + (*start * index_size), + &index_offset, + index_buffer, &flushed); + + *start = index_offset / index_size; + + if (flushed || !r300->upload_ib_validated) { + r300->upload_ib_validated = FALSE; + r300->validate_buffers = TRUE; + } } -/* External helper, not required to implement u_resource_vtbl: - */ -int r300_upload_user_buffers(struct r300_context *r300) +void r300_upload_user_buffers(struct r300_context *r300, + int min_index, int max_index) { - enum pipe_error ret = PIPE_OK; - int i, nr; - - nr = r300->velems->count; + int i, nr = r300->velems->count; + unsigned count = max_index + 1 - min_index; + boolean flushed; + boolean uploaded[16] = {0}; for (i = 0; i < nr; i++) { - struct pipe_vertex_buffer *vb = - &r300->vertex_buffer[r300->velems->velem[i].vertex_buffer_index]; - - if (r300_buffer_is_user_buffer(vb->buffer)) { - struct pipe_resource *upload_buffer = NULL; - unsigned offset = 0; /*vb->buffer_offset * 4;*/ - unsigned size = vb->buffer->width0; - unsigned upload_offset; - ret = u_upload_buffer(r300->upload_vb, - offset, size, - vb->buffer, - &upload_offset, &upload_buffer); - if (ret) - return ret; - - pipe_resource_reference(&vb->buffer, NULL); - vb->buffer = upload_buffer; - vb->buffer_offset = upload_offset; + unsigned index = r300->velems->velem[i].vertex_buffer_index; + struct pipe_vertex_buffer *vb = &r300->vertex_buffer[index]; + struct r300_buffer *userbuf = r300_buffer(vb->buffer); + + if (userbuf && userbuf->user_buffer && !uploaded[index]) { + unsigned first, size; + + if (vb->stride) { + first = vb->stride * min_index; + size = vb->stride * count; + } else { + first = 0; + size = r300->velems->hw_format_size[i]; + } + + DBG(r300, DBG_UPLOAD, + "Uploading %i bytes, index: %i, buffer: %p, userptr: %p " + "offset: %i, stride: %i.\n", + size, index, userbuf, userbuf->user_buffer, + vb->buffer_offset, vb->stride); + + u_upload_data(r300->upload_vb, first, size, + userbuf->user_buffer + first, + &vb->buffer_offset, + &r300->valid_vertex_buffer[index], + &flushed); + + vb->buffer_offset -= first; + + r300->vertex_arrays_dirty = TRUE; + + if (flushed || !r300->upload_vb_validated) { + r300->upload_vb_validated = FALSE; + r300->validate_buffers = TRUE; + } + uploaded[index] = TRUE; + } else { + assert(r300->valid_vertex_buffer[index]); } } - return ret; + DBG(r300, DBG_UPLOAD, "-------\n"); } static void r300_buffer_destroy(struct pipe_screen *screen, @@ -136,26 +146,26 @@ static void r300_buffer_destroy(struct pipe_screen *screen, if (rbuf->buf) rws->buffer_reference(rws, &rbuf->buf, NULL); - util_mempool_free(&r300screen->pool_buffers, rbuf); + util_slab_free(&r300screen->pool_buffers, rbuf); } static struct pipe_transfer* -r300_default_get_transfer(struct pipe_context *context, - struct pipe_resource *resource, - struct pipe_subresource sr, - unsigned usage, - const struct pipe_box *box) +r300_buffer_get_transfer(struct pipe_context *context, + struct pipe_resource *resource, + unsigned level, + unsigned usage, + const struct pipe_box *box) { struct r300_context *r300 = r300_context(context); struct pipe_transfer *transfer = - util_mempool_malloc(&r300->pool_transfers); + util_slab_alloc(&r300->pool_transfers); transfer->resource = resource; - transfer->sr = sr; + transfer->level = level; transfer->usage = usage; transfer->box = *box; transfer->stride = 0; - transfer->slice_stride = 0; + transfer->layer_stride = 0; transfer->data = NULL; /* Note strides are zero, this is ok for buffers, but not for @@ -164,11 +174,11 @@ r300_default_get_transfer(struct pipe_context *context, return transfer; } -static void r300_default_transfer_destroy(struct pipe_context *pipe, - struct pipe_transfer *transfer) +static void r300_buffer_transfer_destroy(struct pipe_context *pipe, + struct pipe_transfer *transfer) { struct r300_context *r300 = r300_context(pipe); - util_mempool_free(&r300->pool_transfers, transfer); + util_slab_free(&r300->pool_transfers, transfer); } static void * @@ -180,46 +190,17 @@ r300_buffer_transfer_map( struct pipe_context *pipe, struct r300_winsys_screen *rws = r300screen->rws; struct r300_buffer *rbuf = r300_buffer(transfer->resource); uint8_t *map; - boolean flush = FALSE; - unsigned i; if (rbuf->user_buffer) return (uint8_t *) rbuf->user_buffer + transfer->box.x; if (rbuf->constant_buffer) return (uint8_t *) rbuf->constant_buffer + transfer->box.x; - /* check if the mapping is to a range we already flushed */ - if (transfer->usage & PIPE_TRANSFER_DISCARD) { - for (i = 0; i < rbuf->num_ranges; i++) { - if ((transfer->box.x >= rbuf->ranges[i].start) && - (transfer->box.x < rbuf->ranges[i].end)) - flush = TRUE; - - if (flush) { - /* unreference this hw buffer and allocate a new one */ - rws->buffer_reference(rws, &rbuf->buf, NULL); - - rbuf->num_ranges = 0; - rbuf->buf = - r300screen->rws->buffer_create(r300screen->rws, - rbuf->b.b.width0, 16, - rbuf->b.b.bind, - rbuf->b.b.usage, - rbuf->domain); - break; - } - } - } - map = rws->buffer_map(rws, rbuf->buf, r300->cs, transfer->usage); if (map == NULL) return NULL; - /* map_buffer() returned a pointer to the beginning of the buffer, - * but transfers are expected to return a pointer to just the - * region specified in the box. - */ return map + transfer->box.x; } @@ -227,30 +208,7 @@ static void r300_buffer_transfer_flush_region( struct pipe_context *pipe, struct pipe_transfer *transfer, const struct pipe_box *box) { - struct r300_buffer *rbuf = r300_buffer(transfer->resource); - unsigned i; - unsigned offset = transfer->box.x + box->x; - unsigned length = box->width; - - assert(box->x + box->width <= transfer->box.width); - - if (rbuf->user_buffer) - return; - if (rbuf->constant_buffer) - return; - - /* mark the range as used */ - for(i = 0; i < rbuf->num_ranges; ++i) { - if(offset <= rbuf->ranges[i].end && rbuf->ranges[i].start <= (offset+box->width)) { - rbuf->ranges[i].start = MIN2(rbuf->ranges[i].start, offset); - rbuf->ranges[i].end = MAX2(rbuf->ranges[i].end, (offset+length)); - return; - } - } - - rbuf->ranges[rbuf->num_ranges].start = offset; - rbuf->ranges[rbuf->num_ranges].end = offset+length; - rbuf->num_ranges++; + /* no-op */ } static void r300_buffer_transfer_unmap( struct pipe_context *pipe, @@ -265,17 +223,45 @@ static void r300_buffer_transfer_unmap( struct pipe_context *pipe, } } -struct u_resource_vtbl r300_buffer_vtbl = +static void r300_buffer_transfer_inline_write(struct pipe_context *pipe, + struct pipe_resource *resource, + unsigned level, + unsigned usage, + const struct pipe_box *box, + const void *data, + unsigned stride, + unsigned layer_stride) +{ + struct r300_context *r300 = r300_context(pipe); + struct r300_winsys_screen *rws = r300->screen->rws; + struct r300_buffer *rbuf = r300_buffer(resource); + uint8_t *map = NULL; + + if (rbuf->constant_buffer) { + memcpy(rbuf->constant_buffer + box->x, data, box->width); + return; + } + assert(rbuf->user_buffer == NULL); + + map = rws->buffer_map(rws, rbuf->buf, r300->cs, + PIPE_TRANSFER_WRITE | PIPE_TRANSFER_DISCARD | usage); + + memcpy(map + box->x, data, box->width); + + rws->buffer_unmap(rws, rbuf->buf); +} + +struct u_resource_vtbl r300_buffer_vtbl = { u_default_resource_get_handle, /* get_handle */ r300_buffer_destroy, /* resource_destroy */ r300_buffer_is_referenced_by_cs, /* is_buffer_referenced */ - r300_default_get_transfer, /* get_transfer */ - r300_default_transfer_destroy, /* transfer_destroy */ + r300_buffer_get_transfer, /* get_transfer */ + r300_buffer_transfer_destroy, /* transfer_destroy */ r300_buffer_transfer_map, /* transfer_map */ r300_buffer_transfer_flush_region, /* transfer_flush_region */ r300_buffer_transfer_unmap, /* transfer_unmap */ - u_default_transfer_inline_write /* transfer_inline_write */ + r300_buffer_transfer_inline_write /* transfer_inline_write */ }; struct pipe_resource *r300_buffer_create(struct pipe_screen *screen, @@ -285,7 +271,7 @@ struct pipe_resource *r300_buffer_create(struct pipe_screen *screen, struct r300_buffer *rbuf; unsigned alignment = 16; - rbuf = util_mempool_malloc(&r300screen->pool_buffers); + rbuf = util_slab_alloc(&r300screen->pool_buffers); rbuf->magic = R300_BUFFER_MAGIC; @@ -294,7 +280,6 @@ struct pipe_resource *r300_buffer_create(struct pipe_screen *screen, pipe_reference_init(&rbuf->b.b.reference, 1); rbuf->b.b.screen = screen; rbuf->domain = R300_DOMAIN_GTT; - rbuf->num_ranges = 0; rbuf->buf = NULL; rbuf->constant_buffer = NULL; rbuf->user_buffer = NULL; @@ -310,9 +295,11 @@ struct pipe_resource *r300_buffer_create(struct pipe_screen *screen, rbuf->b.b.width0, alignment, rbuf->b.b.bind, rbuf->b.b.usage, rbuf->domain); + rbuf->cs_buf = + r300screen->rws->buffer_get_cs_handle(r300screen->rws, rbuf->buf); if (!rbuf->buf) { - util_mempool_free(&r300screen->pool_buffers, rbuf); + util_slab_free(&r300screen->pool_buffers, rbuf); return NULL; } @@ -320,14 +307,13 @@ struct pipe_resource *r300_buffer_create(struct pipe_screen *screen, } struct pipe_resource *r300_user_buffer_create(struct pipe_screen *screen, - void *ptr, - unsigned bytes, + void *ptr, unsigned size, unsigned bind) { struct r300_screen *r300screen = r300_screen(screen); struct r300_buffer *rbuf; - rbuf = util_mempool_malloc(&r300screen->pool_buffers); + rbuf = util_slab_alloc(&r300screen->pool_buffers); rbuf->magic = R300_BUFFER_MAGIC; @@ -338,12 +324,12 @@ struct pipe_resource *r300_user_buffer_create(struct pipe_screen *screen, rbuf->b.b.format = PIPE_FORMAT_R8_UNORM; rbuf->b.b.usage = PIPE_USAGE_IMMUTABLE; rbuf->b.b.bind = bind; - rbuf->b.b.width0 = bytes; + rbuf->b.b.width0 = ~0; rbuf->b.b.height0 = 1; rbuf->b.b.depth0 = 1; + rbuf->b.b.array_size = 1; rbuf->b.b.flags = 0; rbuf->domain = R300_DOMAIN_GTT; - rbuf->num_ranges = 0; rbuf->buf = NULL; rbuf->constant_buffer = NULL; rbuf->user_buffer = ptr; diff --git a/src/gallium/drivers/r300/r300_screen_buffer.h b/src/gallium/drivers/r300/r300_screen_buffer.h index cafa9f96f20..58dec8539b6 100644 --- a/src/gallium/drivers/r300/r300_screen_buffer.h +++ b/src/gallium/drivers/r300/r300_screen_buffer.h @@ -51,32 +51,30 @@ struct r300_buffer uint32_t magic; struct r300_winsys_buffer *buf; + struct r300_winsys_cs_buffer *cs_buf; enum r300_buffer_domain domain; - void *user_buffer; - void *constant_buffer; - struct r300_buffer_range ranges[R300_BUFFER_MAX_RANGES]; - unsigned num_ranges; + uint8_t *user_buffer; + uint8_t *constant_buffer; }; /* Functions. */ -int r300_upload_user_buffers(struct r300_context *r300); +void r300_upload_user_buffers(struct r300_context *r300, + int min_index, int max_index); -int r300_upload_index_buffer(struct r300_context *r300, - struct pipe_resource **index_buffer, - unsigned index_size, - unsigned start, - unsigned count, unsigned *out_offset); +void r300_upload_index_buffer(struct r300_context *r300, + struct pipe_resource **index_buffer, + unsigned index_size, unsigned *start, + unsigned count); struct pipe_resource *r300_buffer_create(struct pipe_screen *screen, const struct pipe_resource *templ); struct pipe_resource *r300_user_buffer_create(struct pipe_screen *screen, - void *ptr, - unsigned bytes, - unsigned usage); + void *ptr, unsigned size, + unsigned bind); unsigned r300_buffer_is_referenced(struct pipe_context *context, struct pipe_resource *buf, @@ -86,14 +84,10 @@ unsigned r300_buffer_is_referenced(struct pipe_context *context, static INLINE struct r300_buffer *r300_buffer(struct pipe_resource *buffer) { - if (buffer) { - assert(((struct r300_buffer *)buffer)->magic == R300_BUFFER_MAGIC); - return (struct r300_buffer *)buffer; - } - return NULL; + return (struct r300_buffer *)buffer; } -static INLINE boolean r300_buffer_is_user_buffer(struct pipe_resource *buffer) +static INLINE boolean r300_is_user_buffer(struct pipe_resource *buffer) { return r300_buffer(buffer)->user_buffer ? true : false; } diff --git a/src/gallium/drivers/r300/r300_state.c b/src/gallium/drivers/r300/r300_state.c index 247c22216e1..3a97b76a4c8 100644 --- a/src/gallium/drivers/r300/r300_state.c +++ b/src/gallium/drivers/r300/r300_state.c @@ -52,7 +52,7 @@ #define UPDATE_STATE(cso, atom) \ if (cso != atom.state) { \ atom.state = cso; \ - atom.dirty = TRUE; \ + r300_mark_atom_dirty(r300, &(atom)); \ } static boolean blend_discard_if_src_alpha_0(unsigned srcRGB, unsigned srcA, @@ -417,7 +417,7 @@ static void r300_set_blend_color(struct pipe_context* pipe, END_CB; } - r300->blend_color_state.dirty = TRUE; + r300_mark_atom_dirty(r300, &r300->blend_color_state); } static void r300_set_clip_state(struct pipe_context* pipe, @@ -446,7 +446,7 @@ static void r300_set_clip_state(struct pipe_context* pipe, (state->depth_clamp ? R300_CLIP_DISABLE : 0)); END_CB; - r300->clip_state.dirty = TRUE; + r300_mark_atom_dirty(r300, &r300->clip_state); } else { draw_set_clip_state(r300->draw, state); } @@ -594,7 +594,7 @@ static void r300_bind_dsa_state(struct pipe_context* pipe, UPDATE_STATE(state, r300->dsa_state); - r300->hyperz_state.dirty = TRUE; /* Will be updated before the emission. */ + r300_mark_atom_dirty(r300, &r300->hyperz_state); /* Will be updated before the emission. */ r300_dsa_inject_stencilref(r300); } @@ -613,7 +613,7 @@ static void r300_set_stencil_ref(struct pipe_context* pipe, r300->stencil_ref = *sr; r300_dsa_inject_stencilref(r300); - r300->dsa_state.dirty = TRUE; + r300_mark_atom_dirty(r300, &r300->dsa_state); } static void r300_tex_set_tiling_flags(struct r300_context *r300, @@ -626,7 +626,7 @@ static void r300_tex_set_tiling_flags(struct r300_context *r300, /* Tiling determines how DRM treats the buffer data. * We must flush CS when changing it if the buffer is referenced. */ if (r300->rws->cs_is_buffer_referenced(r300->cs, - tex->buffer, R300_REF_CS)) + tex->cs_buffer, R300_REF_CS)) r300->context.flush(&r300->context, 0, NULL); r300->rws->buffer_set_tiling(r300->rws, tex->buffer, @@ -647,12 +647,12 @@ static void r300_fb_set_tiling_flags(struct r300_context *r300, for (i = 0; i < state->nr_cbufs; i++) { r300_tex_set_tiling_flags(r300, r300_texture(state->cbufs[i]->texture), - state->cbufs[i]->level); + state->cbufs[i]->u.tex.level); } if (state->zsbuf) { r300_tex_set_tiling_flags(r300, r300_texture(state->zsbuf->texture), - state->zsbuf->level); + state->zsbuf->u.tex.level); } } @@ -663,14 +663,14 @@ static void r300_print_fb_surf_info(struct pipe_surface *surf, unsigned index, struct r300_texture *rtex = r300_texture(tex); fprintf(stderr, - "r300: %s[%i] Dim: %ix%i, Offset: %i, ZSlice: %i, " - "Face: %i, Level: %i, Format: %s\n" + "r300: %s[%i] Dim: %ix%i, Firstlayer: %i, " + "Lastlayer: %i, Level: %i, Format: %s\n" "r300: TEX: Macro: %s, Micro: %s, Pitch: %i, " "Dim: %ix%ix%i, LastLevel: %i, Format: %s\n", - binding, index, surf->width, surf->height, surf->offset, - surf->zslice, surf->face, surf->level, + binding, index, surf->width, surf->height, + surf->u.tex.first_layer, surf->u.tex.last_layer, surf->u.tex.level, util_format_short_name(surf->format), rtex->desc.macrotile[0] ? "YES" : " NO", @@ -686,14 +686,23 @@ void r300_mark_fb_state_dirty(struct r300_context *r300, struct pipe_framebuffer_state *state = r300->fb_state.state; boolean can_hyperz = r300->rws->get_value(r300->rws, R300_CAN_HYPERZ); - /* What is marked as dirty depends on the enum r300_fb_state_change. */ - r300->gpu_flush.dirty = TRUE; - r300->fb_state.dirty = TRUE; - r300->hyperz_state.dirty = TRUE; + r300_mark_atom_dirty(r300, &r300->gpu_flush); + r300_mark_atom_dirty(r300, &r300->fb_state); + /* What is marked as dirty depends on the enum r300_fb_state_change. */ if (change == R300_CHANGED_FB_STATE) { - r300->aa_state.dirty = TRUE; - r300->fb_state_pipelined.dirty = TRUE; + r300_mark_atom_dirty(r300, &r300->aa_state); + } + + if (change == R300_CHANGED_FB_STATE || + change == R300_CHANGED_CBZB_FLAG || + change == R300_CHANGED_ZCLEAR_FLAG) { + r300_mark_atom_dirty(r300, &r300->hyperz_state); + } + + if (change == R300_CHANGED_FB_STATE || + change == R300_CHANGED_MULTIWRITE) { + r300_mark_atom_dirty(r300, &r300->fb_state_pipelined); } /* Now compute the fb_state atom size. */ @@ -738,11 +747,11 @@ static void /* If nr_cbufs is changed from zero to non-zero or vice versa... */ if (!!old_state->nr_cbufs != !!state->nr_cbufs) { - r300->blend_state.dirty = TRUE; + r300_mark_atom_dirty(r300, &r300->blend_state); } /* If zsbuf is set from NULL to non-NULL or vice versa.. */ if (!!old_state->zsbuf != !!state->zsbuf) { - r300->dsa_state.dirty = TRUE; + r300_mark_atom_dirty(r300, &r300->dsa_state); } /* The tiling flags are dependent on the surface miplevel, unfortunately. */ @@ -751,6 +760,7 @@ static void util_copy_framebuffer_state(r300->fb_state.state, state); r300_mark_fb_state_dirty(r300, R300_CHANGED_FB_STATE); + r300->validate_buffers = TRUE; r300->z_compression = false; @@ -768,7 +778,7 @@ static void struct r300_surface *zs_surf = r300_surface(state->zsbuf); struct r300_texture *tex; int compress = r300->screen->caps.is_rv350 ? RV350_Z_COMPRESS_88 : R300_Z_COMPRESS_44; - int level = zs_surf->base.level; + int level = zs_surf->base.u.tex.level; tex = r300_texture(zs_surf->base.texture); @@ -795,7 +805,7 @@ static void r300->zbuffer_bpp = zbuffer_bpp; if (r300->polygon_offset_enabled) - r300->rs_state.dirty = TRUE; + r300_mark_atom_dirty(r300, &r300->rs_state); } } @@ -853,9 +863,9 @@ void r300_mark_fs_code_dirty(struct r300_context *r300) { struct r300_fragment_shader* fs = r300_fs(r300); - r300->fs.dirty = TRUE; - r300->fs_rc_constant_state.dirty = TRUE; - r300->fs_constants.dirty = TRUE; + r300_mark_atom_dirty(r300, &r300->fs); + r300_mark_atom_dirty(r300, &r300->fs_rc_constant_state); + r300_mark_atom_dirty(r300, &r300->fs_constants); r300->fs.size = fs->shader->cb_code_size; if (r300->screen->caps.is_r500) { @@ -875,17 +885,26 @@ static void r300_bind_fs_state(struct pipe_context* pipe, void* shader) { struct r300_context* r300 = r300_context(pipe); struct r300_fragment_shader* fs = (struct r300_fragment_shader*)shader; + struct pipe_framebuffer_state *fb = r300->fb_state.state; + boolean last_multi_write; if (fs == NULL) { r300->fs.state = NULL; return; } + last_multi_write = r300_fragment_shader_writes_all(r300_fs(r300)); + r300->fs.state = fs; r300_pick_fragment_shader(r300); r300_mark_fs_code_dirty(r300); - r300->rs_block_state.dirty = TRUE; /* Will be updated before the emission. */ + if (fb->nr_cbufs > 1 && + last_multi_write != r300_fragment_shader_writes_all(fs)) { + r300_mark_fb_state_dirty(r300, R300_CHANGED_MULTIWRITE); + } + + r300_mark_atom_dirty(r300, &r300->rs_block_state); /* Will be updated before the emission. */ } /* Delete fragment shader state. */ @@ -1137,7 +1156,7 @@ static void r300_bind_rs_state(struct pipe_context* pipe, void* state) if (last_sprite_coord_enable != r300->sprite_coord_enable || last_two_sided_color != r300->two_sided_color) { - r300->rs_block_state.dirty = TRUE; + r300_mark_atom_dirty(r300, &r300->rs_block_state); } } @@ -1235,7 +1254,7 @@ static void r300_bind_sampler_states(struct pipe_context* pipe, memcpy(state->sampler_states, states, sizeof(void*) * count); state->sampler_state_count = count; - r300->textures_state.dirty = TRUE; + r300_mark_atom_dirty(r300, &r300->textures_state); } static void r300_lacks_vertex_textures(struct pipe_context* pipe, @@ -1297,29 +1316,27 @@ static void r300_set_fragment_sampler_views(struct pipe_context* pipe, } for (i = 0; i < count; i++) { - if (&state->sampler_views[i]->base != views[i]) { - pipe_sampler_view_reference( - (struct pipe_sampler_view**)&state->sampler_views[i], - views[i]); + pipe_sampler_view_reference( + (struct pipe_sampler_view**)&state->sampler_views[i], + views[i]); - if (!views[i]) { - continue; - } + if (!views[i]) { + continue; + } - /* A new sampler view (= texture)... */ - dirty_tex = TRUE; + /* A new sampler view (= texture)... */ + dirty_tex = TRUE; - /* Set the texrect factor in the fragment shader. + /* Set the texrect factor in the fragment shader. * Needed for RECT and NPOT fallback. */ - texture = r300_texture(views[i]->texture); - if (texture->desc.is_npot) { - r300->fs_rc_constant_state.dirty = TRUE; - } + texture = r300_texture(views[i]->texture); + if (texture->desc.is_npot) { + r300_mark_atom_dirty(r300, &r300->fs_rc_constant_state); + } - state->sampler_views[i]->texcache_region = + state->sampler_views[i]->texcache_region = r300_assign_texture_cache_region(view_index, real_num_views); - view_index++; - } + view_index++; } for (i = count; i < tex_units; i++) { @@ -1332,10 +1349,11 @@ static void r300_set_fragment_sampler_views(struct pipe_context* pipe, state->sampler_view_count = count; - r300->textures_state.dirty = TRUE; + r300_mark_atom_dirty(r300, &r300->textures_state); + r300->validate_buffers = TRUE; if (dirty_tex) { - r300->texture_cache_inval.dirty = TRUE; + r300_mark_atom_dirty(r300, &r300->texture_cache_inval); } } @@ -1347,6 +1365,7 @@ r300_create_sampler_view(struct pipe_context *pipe, struct r300_sampler_view *view = CALLOC_STRUCT(r300_sampler_view); struct r300_texture *tex = r300_texture(texture); boolean is_r500 = r300_screen(pipe->screen)->caps.is_r500; + boolean dxtc_swizzle = r300_screen(pipe->screen)->caps.dxtc_swizzle; if (view) { view->base = *templ; @@ -1363,7 +1382,8 @@ r300_create_sampler_view(struct pipe_context *pipe, view->format = tex->tx_format; view->format.format1 |= r300_translate_texformat(templ->format, view->swizzle, - is_r500); + is_r500, + dxtc_swizzle); if (is_r500) { view->format.format2 |= r500_tx_format_msb_bit(templ->format); } @@ -1388,7 +1408,7 @@ static void r300_set_scissor_state(struct pipe_context* pipe, memcpy(r300->scissor_state.state, state, sizeof(struct pipe_scissor_state)); - r300->scissor_state.dirty = TRUE; + r300_mark_atom_dirty(r300, &r300->scissor_state); } static void r300_set_viewport_state(struct pipe_context* pipe, @@ -1434,9 +1454,9 @@ static void r300_set_viewport_state(struct pipe_context* pipe, viewport->vte_control |= R300_VPORT_Z_OFFSET_ENA; } - r300->viewport_state.dirty = TRUE; + r300_mark_atom_dirty(r300, &r300->viewport_state); if (r300->fs.state && r300_fs(r300)->shader->inputs.wpos != ATTR_UNUSED) { - r300->fs_rc_constant_state.dirty = TRUE; + r300_mark_atom_dirty(r300, &r300->fs_rc_constant_state); } } @@ -1445,15 +1465,15 @@ static void r300_set_vertex_buffers(struct pipe_context* pipe, const struct pipe_vertex_buffer* buffers) { struct r300_context* r300 = r300_context(pipe); - struct pipe_vertex_buffer *vbo; + const struct pipe_vertex_buffer *vbo; unsigned i, max_index = (1 << 24) - 1; boolean any_user_buffer = FALSE; + boolean any_nonuser_buffer = FALSE; struct pipe_vertex_buffer dummy_vb = {0}; /* There must be at least one vertex buffer set, otherwise it locks up. */ if (!count) { dummy_vb.buffer = r300->dummy_vb; - dummy_vb.max_index = r300->dummy_vb->width0 / 4; buffers = &dummy_vb; count = 1; } @@ -1480,34 +1500,41 @@ static void r300_set_vertex_buffers(struct pipe_context* pipe, } for (i = 0; i < count; i++) { - /* Why, yes, I AM casting away constness. How did you know? */ - vbo = (struct pipe_vertex_buffer*)&buffers[i]; + vbo = &buffers[i]; /* Skip NULL buffers */ - if (!buffers[i].buffer) { + if (!vbo->buffer) { continue; } - if (r300_buffer_is_user_buffer(vbo->buffer)) { + /* User buffers have no info about maximum index, + * we will have to compute it in draw_vbo. */ + if (r300_is_user_buffer(vbo->buffer)) { any_user_buffer = TRUE; + continue; } + any_nonuser_buffer = TRUE; - if (vbo->max_index == ~0) { - /* if no VBO stride then only one vertex value so max index is 1 */ - /* should think about converting to VS constants like svga does */ - if (!vbo->stride) - vbo->max_index = 1; - else - vbo->max_index = - (vbo->buffer->width0 - vbo->buffer_offset) / vbo->stride; - } + /* The stride of zero means we will be fetching only the first + * vertex, so don't care about max_index. */ + if (!vbo->stride) + continue; - max_index = MIN2(vbo->max_index, max_index); + /* Update the maximum index. */ + { + unsigned vbo_max_index = + (vbo->buffer->width0 - vbo->buffer_offset) / vbo->stride; + max_index = MIN2(max_index, vbo_max_index); + } } r300->any_user_vbs = any_user_buffer; r300->vertex_buffer_max_index = max_index; - + r300->vertex_arrays_dirty = TRUE; + if (any_nonuser_buffer) + r300->validate_buffers = TRUE; + if (!any_user_buffer) + r300->upload_vb_validated = FALSE; } else { /* SW TCL. */ draw_set_vertex_buffers(r300->draw, count, buffers); @@ -1515,16 +1542,25 @@ static void r300_set_vertex_buffers(struct pipe_context* pipe, /* Common code. */ for (i = 0; i < count; i++) { + vbo = &buffers[i]; + /* Reference our buffer. */ - pipe_resource_reference(&r300->vertex_buffer[i].buffer, buffers[i].buffer); + pipe_resource_reference(&r300->vertex_buffer[i].buffer, vbo->buffer); + if (vbo->buffer && r300_is_user_buffer(vbo->buffer)) { + pipe_resource_reference(&r300->valid_vertex_buffer[i], NULL); + } else { + pipe_resource_reference(&r300->valid_vertex_buffer[i], vbo->buffer); + } } for (; i < r300->vertex_buffer_count; i++) { /* Dereference any old buffers. */ pipe_resource_reference(&r300->vertex_buffer[i].buffer, NULL); + pipe_resource_reference(&r300->valid_vertex_buffer[i], NULL); } memcpy(r300->vertex_buffer, buffers, - sizeof(struct pipe_vertex_buffer) * count); + sizeof(struct pipe_vertex_buffer) * count); + r300->vertex_buffer_count = count; } @@ -1533,20 +1569,23 @@ static void r300_set_index_buffer(struct pipe_context* pipe, { struct r300_context* r300 = r300_context(pipe); - if (ib) { + if (ib && ib->buffer) { pipe_resource_reference(&r300->index_buffer.buffer, ib->buffer); memcpy(&r300->index_buffer, ib, sizeof(r300->index_buffer)); + + if (r300->screen->caps.has_tcl && + !r300_is_user_buffer(ib->buffer)) { + r300->validate_buffers = TRUE; + r300->upload_ib_validated = FALSE; + } } else { pipe_resource_reference(&r300->index_buffer.buffer, NULL); memset(&r300->index_buffer, 0, sizeof(r300->index_buffer)); } - if (r300->screen->caps.has_tcl) { - /* TODO make this more like a state */ - } - else { - draw_set_index_buffer(r300->draw, ib); + if (!r300->screen->caps.has_tcl) { + draw_set_index_buffer(r300->draw, ib); } } @@ -1714,6 +1753,7 @@ static void r300_bind_vertex_elements_state(struct pipe_context *pipe, UPDATE_STATE(&velems->vertex_stream, r300->vertex_stream_state); r300->vertex_stream_state.size = (1 + velems->vertex_stream.count) * 2; + r300->vertex_arrays_dirty = TRUE; } static void r300_delete_vertex_elements_state(struct pipe_context *pipe, void *state) @@ -1756,27 +1796,25 @@ static void r300_bind_vs_state(struct pipe_context* pipe, void* shader) r300->vs_state.state = vs; /* The majority of the RS block bits is dependent on the vertex shader. */ - r300->rs_block_state.dirty = TRUE; /* Will be updated before the emission. */ + r300_mark_atom_dirty(r300, &r300->rs_block_state); /* Will be updated before the emission. */ if (r300->screen->caps.has_tcl) { unsigned fc_op_dwords = r300->screen->caps.is_r500 ? 3 : 2; - r300->vs_state.dirty = TRUE; + r300_mark_atom_dirty(r300, &r300->vs_state); r300->vs_state.size = vs->code.length + 9 + - (vs->immediates_count ? vs->immediates_count * 4 + 3 : 0) + (vs->code.num_fc_ops ? vs->code.num_fc_ops * fc_op_dwords + 4 : 0); - if (vs->externals_count) { - r300->vs_constants.dirty = TRUE; - r300->vs_constants.size = vs->externals_count * 4 + 3; - } else { - r300->vs_constants.size = 0; - } + r300_mark_atom_dirty(r300, &r300->vs_constants); + r300->vs_constants.size = + 2 + + (vs->externals_count ? vs->externals_count * 4 + 3 : 0) + + (vs->immediates_count ? vs->immediates_count * 4 + 3 : 0); ((struct r300_constant_buffer*)r300->vs_constants.state)->remap_table = vs->code.constants_remap_table; - r300->pvs_flush.dirty = TRUE; + r300_mark_atom_dirty(r300, &r300->pvs_flush); } else { draw_bind_vertex_shader(r300->draw, (struct draw_vertex_shader*)vs->draw_vs); @@ -1807,6 +1845,7 @@ static void r300_set_constant_buffer(struct pipe_context *pipe, { struct r300_context* r300 = r300_context(pipe); struct r300_constant_buffer *cbuf; + struct r300_buffer *rbuf = r300_buffer(buf); uint32_t *mapped; switch (shader) { @@ -1817,33 +1856,48 @@ static void r300_set_constant_buffer(struct pipe_context *pipe, cbuf = (struct r300_constant_buffer*)r300->fs_constants.state; break; default: - assert(0); return; } - if (buf == NULL || buf->width0 == 0 || - (mapped = r300_buffer(buf)->constant_buffer) == NULL) { + if (buf == NULL || buf->width0 == 0) + return; + + if (rbuf->user_buffer) + mapped = (uint32_t*)rbuf->user_buffer; + else if (rbuf->constant_buffer) + mapped = (uint32_t*)rbuf->constant_buffer; + else return; - } if (shader == PIPE_SHADER_FRAGMENT || (shader == PIPE_SHADER_VERTEX && r300->screen->caps.has_tcl)) { - assert((buf->width0 % (4 * sizeof(float))) == 0); - cbuf->ptr = mapped + index*4; + cbuf->ptr = mapped; } if (shader == PIPE_SHADER_VERTEX) { if (r300->screen->caps.has_tcl) { - if (r300->vs_constants.size) { - r300->vs_constants.dirty = TRUE; + struct r300_vertex_shader *vs = + (struct r300_vertex_shader*)r300->vs_state.state; + + if (!vs) { + cbuf->buffer_base = 0; + return; + } + + cbuf->buffer_base = r300->vs_const_base; + r300->vs_const_base += vs->code.constants.Count; + if (r300->vs_const_base > R500_MAX_PVS_CONST_VECS) { + r300->vs_const_base = vs->code.constants.Count; + cbuf->buffer_base = 0; + r300_mark_atom_dirty(r300, &r300->pvs_flush); } - r300->pvs_flush.dirty = TRUE; + r300_mark_atom_dirty(r300, &r300->vs_constants); } else if (r300->draw) { draw_set_mapped_constant_buffer(r300->draw, PIPE_SHADER_VERTEX, 0, mapped, buf->width0); } } else if (shader == PIPE_SHADER_FRAGMENT) { - r300->fs_constants.dirty = TRUE; + r300_mark_atom_dirty(r300, &r300->fs_constants); } } diff --git a/src/gallium/drivers/r300/r300_state_derived.c b/src/gallium/drivers/r300/r300_state_derived.c index 1cff3483b50..95be7849f8f 100644 --- a/src/gallium/drivers/r300/r300_state_derived.c +++ b/src/gallium/drivers/r300/r300_state_derived.c @@ -32,7 +32,6 @@ #include "r300_hyperz.h" #include "r300_screen.h" #include "r300_shader_semantics.h" -#include "r300_state_derived.h" #include "r300_state_inlines.h" #include "r300_texture.h" #include "r300_vs.h" @@ -193,7 +192,7 @@ static void r300_swtcl_vertex_psc(struct r300_context *r300) (R300_LAST_VEC << (i & 1 ? 16 : 0)); vstream->count = (i >> 1) + 1; - r300->vertex_stream_state.dirty = TRUE; + r300_mark_atom_dirty(r300, &r300->vertex_stream_state); r300->vertex_stream_state.size = (1 + vstream->count) * 2; } @@ -592,7 +591,8 @@ static void r300_update_rs_block(struct r300_context *r300) } static uint32_t r300_get_border_color(enum pipe_format format, - const float border[4]) + const float border[4], + boolean is_r500) { const struct util_format_description *desc; float border_swizzled[4] = {0}; @@ -601,6 +601,24 @@ static uint32_t r300_get_border_color(enum pipe_format format, desc = util_format_description(format); + /* Do depth formats first. */ + if (util_format_is_depth_or_stencil(format)) { + switch (format) { + case PIPE_FORMAT_Z16_UNORM: + return util_pack_z(PIPE_FORMAT_Z16_UNORM, border[0]); + case PIPE_FORMAT_X8Z24_UNORM: + case PIPE_FORMAT_S8_USCALED_Z24_UNORM: + if (is_r500) { + return util_pack_z(PIPE_FORMAT_X8Z24_UNORM, border[0]); + } else { + return util_pack_z(PIPE_FORMAT_Z16_UNORM, border[0]) << 16; + } + default: + assert(0); + return 0; + } + } + /* Apply inverse swizzle of the format. */ for (i = 0; i < 4; i++) { switch (desc->swizzle[i]) { @@ -619,7 +637,17 @@ static uint32_t r300_get_border_color(enum pipe_format format, } } + /* Compressed formats. */ + if (util_format_is_compressed(format)) { + util_pack_color(border_swizzled, PIPE_FORMAT_R8G8B8A8_UNORM, &uc); + return uc.ui; + } + switch (desc->channel[0].size) { + case 2: + util_pack_color(border_swizzled, PIPE_FORMAT_B2G3R3_UNORM, &uc); + break; + case 4: util_pack_color(border_swizzled, PIPE_FORMAT_B4G4R4A4_UNORM, &uc); break; @@ -642,6 +670,15 @@ static uint32_t r300_get_border_color(enum pipe_format format, case 10: util_pack_color(border_swizzled, PIPE_FORMAT_B10G10R10A2_UNORM, &uc); break; + + case 16: + if (desc->nr_channels <= 2) { + border_swizzled[0] = border_swizzled[2]; + util_pack_color(border_swizzled, PIPE_FORMAT_R16G16_UNORM, &uc); + } else { + util_pack_color(border_swizzled, PIPE_FORMAT_B8G8R8A8_UNORM, &uc); + } + break; } return uc.ui; @@ -683,12 +720,13 @@ static void r300_merge_textures_and_samplers(struct r300_context* r300) /* Set the border color. */ texstate->border_color = r300_get_border_color(view->base.format, - sampler->state.border_color); + sampler->state.border_color, + r300->screen->caps.is_r500); /* determine min/max levels */ - max_level = MIN3(sampler->max_lod + view->base.first_level, - tex->desc.b.b.last_level, view->base.last_level); - min_level = MIN2(sampler->min_lod + view->base.first_level, + max_level = MIN3(sampler->max_lod + view->base.u.tex.first_level, + tex->desc.b.b.last_level, view->base.u.tex.last_level); + min_level = MIN2(sampler->min_lod + view->base.u.tex.first_level, max_level); if (tex->desc.is_npot && min_level > 0) { @@ -729,13 +767,18 @@ static void r300_merge_textures_and_samplers(struct r300_context* r300) if (sampler->state.compare_mode == PIPE_TEX_COMPARE_NONE) { texstate->format.format1 |= r300_get_swizzle_combined(depth_swizzle, - view->swizzle); + view->swizzle, FALSE); } else { texstate->format.format1 |= - r300_get_swizzle_combined(depth_swizzle, 0); + r300_get_swizzle_combined(depth_swizzle, 0, FALSE); } } + if (r300->screen->caps.dxtc_swizzle && + util_format_is_compressed(tex->desc.b.b.format)) { + texstate->filter1 |= R400_DXTC_SWIZZLE_ENABLE; + } + /* to emulate 1D textures through 2D ones correctly */ if (tex->desc.b.b.target == PIPE_TEXTURE_1D) { texstate->filter0 &= ~R300_TX_WRAP_T_MASK; @@ -819,40 +862,6 @@ static void r300_merge_textures_and_samplers(struct r300_context* r300) } } -/* We can't use compressed zbuffers as samplers. */ -static void r300_flush_depth_textures(struct r300_context *r300) -{ - struct r300_textures_state *state = - (struct r300_textures_state*)r300->textures_state.state; - unsigned i, level; - unsigned count = MIN2(state->sampler_view_count, - state->sampler_state_count); - - if (r300->z_decomp_rd) - return; - - for (i = 0; i < count; i++) - if (state->sampler_views[i] && state->sampler_states[i]) { - struct pipe_resource *tex = state->sampler_views[i]->base.texture; - - if (tex->target == PIPE_TEXTURE_3D || - tex->target == PIPE_TEXTURE_CUBE) - continue; - - /* Ignore non-depth textures. - * Also ignore reinterpreted depth textures, e.g. resource_copy. */ - if (!util_format_is_depth_or_stencil(tex->format)) - continue; - - for (level = 0; level <= tex->last_level; level++) - if (r300_texture(tex)->zmask_in_use[level]) { - /* We don't handle 3D textures and cubemaps yet. */ - r300_flush_depth_stencil(&r300->context, tex, - u_subresource(0, level), 0); - } - } -} - void r300_update_derived_state(struct r300_context* r300) { r300_flush_depth_textures(r300); diff --git a/src/gallium/drivers/r300/r300_state_derived.h b/src/gallium/drivers/r300/r300_state_derived.h deleted file mode 100644 index 71a4a47b003..00000000000 --- a/src/gallium/drivers/r300/r300_state_derived.h +++ /dev/null @@ -1,30 +0,0 @@ -/* - * Copyright 2008 Corbin Simpson <[email protected]> - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * on the rights to use, copy, modify, merge, publish, distribute, sub - * license, and/or sell copies of the Software, and to permit persons to whom - * the Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL - * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, - * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR - * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE - * USE OR OTHER DEALINGS IN THE SOFTWARE. */ - -#ifndef R300_STATE_DERIVED_H -#define R300_STATE_DERIVED_H - -struct r300_context; - -void r300_update_derived_state(struct r300_context* r300); - -#endif /* R300_STATE_DERIVED_H */ diff --git a/src/gallium/drivers/r300/r300_texture.c b/src/gallium/drivers/r300/r300_texture.c index cee56bccdcd..6fdc504ed54 100644 --- a/src/gallium/drivers/r300/r300_texture.c +++ b/src/gallium/drivers/r300/r300_texture.c @@ -40,7 +40,8 @@ #include "pipe/p_screen.h" unsigned r300_get_swizzle_combined(const unsigned char *swizzle_format, - const unsigned char *swizzle_view) + const unsigned char *swizzle_view, + boolean dxtc_swizzle) { unsigned i; unsigned char swizzle[4]; @@ -51,10 +52,10 @@ unsigned r300_get_swizzle_combined(const unsigned char *swizzle_format, R300_TX_FORMAT_B_SHIFT, R300_TX_FORMAT_A_SHIFT }; - const uint32_t swizzle_bit[4] = { - R300_TX_FORMAT_X, + uint32_t swizzle_bit[4] = { + dxtc_swizzle ? R300_TX_FORMAT_Z : R300_TX_FORMAT_X, R300_TX_FORMAT_Y, - R300_TX_FORMAT_Z, + dxtc_swizzle ? R300_TX_FORMAT_X : R300_TX_FORMAT_Z, R300_TX_FORMAT_W }; @@ -107,7 +108,8 @@ unsigned r300_get_swizzle_combined(const unsigned char *swizzle_format, * makes available X, Y, Z, W, ZERO, and ONE for swizzling. */ uint32_t r300_translate_texformat(enum pipe_format format, const unsigned char *swizzle_view, - boolean is_r500) + boolean is_r500, + boolean dxtc_swizzle) { uint32_t result = 0; const struct util_format_description *desc; @@ -169,7 +171,8 @@ uint32_t r300_translate_texformat(enum pipe_format format, } } - result |= r300_get_swizzle_combined(desc->swizzle, swizzle_view); + result |= r300_get_swizzle_combined(desc->swizzle, swizzle_view, + util_format_is_compressed(format) && dxtc_swizzle); /* S3TC formats. */ if (desc->layout == UTIL_FORMAT_LAYOUT_S3TC) { @@ -241,6 +244,11 @@ uint32_t r300_translate_texformat(enum pipe_format format, desc->channel[2].size == 6) { return R300_TX_FORMAT_Z6Y5X5 | result; } + if (desc->channel[0].size == 2 && + desc->channel[1].size == 3 && + desc->channel[2].size == 3) { + return R300_TX_FORMAT_Z3Y3X2 | result; + } return ~0; /* Unsupported/unknown. */ case 4: @@ -478,6 +486,8 @@ static uint32_t r300_translate_out_fmt(enum pipe_format format) } else { if (desc->channel[i].size == 16) { modifier |= R300_US_OUT_FMT_C4_16; + } else if (desc->channel[i].size == 10) { + modifier |= R300_US_OUT_FMT_C4_10; } else { /* C4_8 seems to be used for the formats whose pixel size * is <= 32 bits. */ @@ -571,7 +581,7 @@ boolean r300_is_zs_format_supported(enum pipe_format format) boolean r300_is_sampler_format_supported(enum pipe_format format) { - return r300_translate_texformat(format, 0, TRUE) != ~0; + return r300_translate_texformat(format, 0, TRUE, FALSE) != ~0; } void r300_texture_setup_format_state(struct r300_screen *screen, @@ -665,21 +675,21 @@ void r300_texture_reinterpret_format(struct pipe_screen *screen, } static unsigned r300_texture_is_referenced(struct pipe_context *context, - struct pipe_resource *texture, - unsigned face, unsigned level) + struct pipe_resource *texture, + unsigned level, int layer) { struct r300_context *r300 = r300_context(context); struct r300_texture *rtex = (struct r300_texture *)texture; if (r300->rws->cs_is_buffer_referenced(r300->cs, - rtex->buffer, R300_REF_CS)) + rtex->cs_buffer, R300_REF_CS)) return PIPE_REFERENCED_FOR_READ | PIPE_REFERENCED_FOR_WRITE; return PIPE_UNREFERENCED; } static void r300_texture_destroy(struct pipe_screen *screen, - struct pipe_resource* texture) + struct pipe_resource* texture) { struct r300_texture* tex = (struct r300_texture*)texture; struct r300_winsys_screen *rws = (struct r300_winsys_screen *)texture->screen->winsys; @@ -774,6 +784,8 @@ r300_texture_create_object(struct r300_screen *rscreen, } } + tex->cs_buffer = rws->buffer_get_cs_handle(rws, tex->buffer); + rws->buffer_set_tiling(rws, tex->buffer, tex->desc.microtile, tex->desc.macrotile[0], tex->desc.stride_in_bytes[0]); @@ -848,38 +860,40 @@ struct pipe_resource *r300_texture_from_handle(struct pipe_screen *screen, /* Not required to implement u_resource_vtbl, consider moving to another file: */ -struct pipe_surface* r300_get_tex_surface(struct pipe_screen* screen, - struct pipe_resource* texture, - unsigned face, - unsigned level, - unsigned zslice, - unsigned flags) +struct pipe_surface* r300_create_surface(struct pipe_context * ctx, + struct pipe_resource* texture, + const struct pipe_surface *surf_tmpl) { struct r300_texture* tex = r300_texture(texture); struct r300_surface* surface = CALLOC_STRUCT(r300_surface); + unsigned level = surf_tmpl->u.tex.level; + + assert(surf_tmpl->u.tex.first_layer == surf_tmpl->u.tex.last_layer); if (surface) { uint32_t offset, tile_height; pipe_reference_init(&surface->base.reference, 1); pipe_resource_reference(&surface->base.texture, texture); - surface->base.format = texture->format; + surface->base.context = ctx; + surface->base.format = surf_tmpl->format; surface->base.width = u_minify(texture->width0, level); surface->base.height = u_minify(texture->height0, level); - surface->base.usage = flags; - surface->base.zslice = zslice; - surface->base.face = face; - surface->base.level = level; + surface->base.usage = surf_tmpl->usage; + surface->base.u.tex.level = level; + surface->base.u.tex.first_layer = surf_tmpl->u.tex.first_layer; + surface->base.u.tex.last_layer = surf_tmpl->u.tex.last_layer; surface->buffer = tex->buffer; + surface->cs_buffer = tex->cs_buffer; /* Prefer VRAM if there are multiple domains to choose from. */ surface->domain = tex->domain; if (surface->domain & R300_DOMAIN_VRAM) surface->domain &= ~R300_DOMAIN_GTT; - surface->offset = r300_texture_get_offset(&tex->desc, - level, zslice, face); + surface->offset = r300_texture_get_offset(&tex->desc, level, + surf_tmpl->u.tex.first_layer); surface->pitch = tex->fb_state.pitch[level]; surface->format = tex->fb_state.format; @@ -892,7 +906,7 @@ struct pipe_surface* r300_get_tex_surface(struct pipe_screen* screen, tex->desc.b.b.nr_samples, tex->desc.microtile, tex->desc.macrotile[level], - DIM_HEIGHT); + DIM_HEIGHT, 0); surface->cbzb_height = align((surface->base.height + 1) / 2, tile_height); @@ -910,13 +924,13 @@ struct pipe_surface* r300_get_tex_surface(struct pipe_screen* screen, else surface->cbzb_format = R300_DEPTHFORMAT_16BIT_INT_Z; - SCREEN_DBG(r300_screen(screen), DBG_CBZB, - "CBZB Allowed: %s, Dim: %ix%i, Misalignment: %i, Micro: %s, Macro: %s\n", - surface->cbzb_allowed ? "YES" : " NO", - surface->cbzb_width, surface->cbzb_height, - offset & 2047, - tex->desc.microtile ? "YES" : " NO", - tex->desc.macrotile[level] ? "YES" : " NO"); + DBG(r300_context(ctx), DBG_CBZB, + "CBZB Allowed: %s, Dim: %ix%i, Misalignment: %i, Micro: %s, Macro: %s\n", + surface->cbzb_allowed ? "YES" : " NO", + surface->cbzb_width, surface->cbzb_height, + offset & 2047, + tex->desc.microtile ? "YES" : " NO", + tex->desc.macrotile[level] ? "YES" : " NO"); } return &surface->base; @@ -924,7 +938,7 @@ struct pipe_surface* r300_get_tex_surface(struct pipe_screen* screen, /* Not required to implement u_resource_vtbl, consider moving to another file: */ -void r300_tex_surface_destroy(struct pipe_surface* s) +void r300_surface_destroy(struct pipe_context *ctx, struct pipe_surface* s) { pipe_resource_reference(&s->texture, NULL); FREE(s); diff --git a/src/gallium/drivers/r300/r300_texture.h b/src/gallium/drivers/r300/r300_texture.h index c4588a0c90b..0ab22f747e4 100644 --- a/src/gallium/drivers/r300/r300_texture.h +++ b/src/gallium/drivers/r300/r300_texture.h @@ -27,6 +27,7 @@ #include "pipe/p_format.h" struct pipe_screen; +struct pipe_context; struct pipe_resource; struct winsys_handle; struct r300_texture_format_state; @@ -35,11 +36,13 @@ struct r300_texture; struct r300_screen; unsigned r300_get_swizzle_combined(const unsigned char *swizzle_format, - const unsigned char *swizzle_view); + const unsigned char *swizzle_view, + boolean dxtc_swizzle); uint32_t r300_translate_texformat(enum pipe_format format, const unsigned char *swizzle_view, - boolean is_r500); + boolean is_r500, + boolean dxtc_swizzle); uint32_t r500_tx_format_msb_bit(enum pipe_format format); @@ -68,13 +71,10 @@ r300_texture_create(struct pipe_screen* screen, const struct pipe_resource* templ); -struct pipe_surface* r300_get_tex_surface(struct pipe_screen* screen, - struct pipe_resource* texture, - unsigned face, - unsigned level, - unsigned zslice, - unsigned flags); +struct pipe_surface* r300_create_surface(struct pipe_context *ctx, + struct pipe_resource* texture, + const struct pipe_surface *surf_tmpl); -void r300_tex_surface_destroy(struct pipe_surface* s); +void r300_surface_destroy(struct pipe_context *ctx, struct pipe_surface* s); #endif /* R300_TEXTURE_H */ diff --git a/src/gallium/drivers/r300/r300_texture_desc.c b/src/gallium/drivers/r300/r300_texture_desc.c index 543d0fdc15b..7b1739142d4 100644 --- a/src/gallium/drivers/r300/r300_texture_desc.c +++ b/src/gallium/drivers/r300/r300_texture_desc.c @@ -34,7 +34,7 @@ unsigned r300_get_pixel_alignment(enum pipe_format format, unsigned num_samples, enum r300_buffer_tiling microtile, enum r300_buffer_tiling macrotile, - enum r300_dim dim) + enum r300_dim dim, boolean is_rs690) { static const unsigned table[2][5][3][2] = { @@ -57,6 +57,7 @@ unsigned r300_get_pixel_alignment(enum pipe_format format, {{ 16, 8}, { 0, 0}, { 0, 0}} /* 128 bits per pixel */ } }; + static const unsigned aa_block[2] = {4, 8}; unsigned tile = 0; unsigned pixsize = util_format_get_blocksize(format); @@ -74,6 +75,14 @@ unsigned r300_get_pixel_alignment(enum pipe_format format, } else { /* Standard alignment. */ tile = table[macrotile][util_logbase2(pixsize)][microtile][dim]; + if (macrotile == 0 && is_rs690 && dim == DIM_WIDTH) { + int align; + int h_tile; + h_tile = table[macrotile][util_logbase2(pixsize)][microtile][DIM_HEIGHT]; + align = 64 / (pixsize * h_tile); + if (tile < align) + tile = align; + } } assert(tile); @@ -89,7 +98,7 @@ static boolean r300_texture_macro_switch(struct r300_texture_desc *desc, unsigned tile, texdim; tile = r300_get_pixel_alignment(desc->b.b.format, desc->b.b.nr_samples, - desc->microtile, R300_BUFFER_TILED, dim); + desc->microtile, R300_BUFFER_TILED, dim, 0); if (dim == DIM_WIDTH) { texdim = u_minify(desc->width0, level); } else { @@ -113,6 +122,9 @@ static unsigned r300_texture_get_stride(struct r300_screen *screen, unsigned level) { unsigned tile_width, width, stride; + boolean is_rs690 = (screen->caps.family == CHIP_FAMILY_RS600 || + screen->caps.family == CHIP_FAMILY_RS690 || + screen->caps.family == CHIP_FAMILY_RS740); if (desc->stride_in_bytes_override) return desc->stride_in_bytes_override; @@ -131,38 +143,14 @@ static unsigned r300_texture_get_stride(struct r300_screen *screen, desc->b.b.nr_samples, desc->microtile, desc->macrotile[level], - DIM_WIDTH); + DIM_WIDTH, is_rs690); width = align(width, tile_width); stride = util_format_get_stride(desc->b.b.format, width); - - /* Some IGPs need a minimum stride of 64 bytes, hmm... */ - if (!desc->macrotile[level] && - (screen->caps.family == CHIP_FAMILY_RS600 || - screen->caps.family == CHIP_FAMILY_RS690 || - screen->caps.family == CHIP_FAMILY_RS740)) { - unsigned min_stride; - - if (desc->microtile) { - unsigned tile_height = - r300_get_pixel_alignment(desc->b.b.format, - desc->b.b.nr_samples, - desc->microtile, - desc->macrotile[level], - DIM_HEIGHT); - - min_stride = 64 / tile_height; - } else { - min_stride = 64; - } - - return stride < min_stride ? min_stride : stride; - } - /* The alignment to 32 bytes is sort of implied by the layout... */ return stride; } else { - return align(util_format_get_stride(desc->b.b.format, width), 32); + return align(util_format_get_stride(desc->b.b.format, width), is_rs690 ? 64 : 32); } } @@ -179,7 +167,7 @@ static unsigned r300_texture_get_nblocksy(struct r300_texture_desc *desc, desc->b.b.nr_samples, desc->microtile, desc->macrotile[level], - DIM_HEIGHT); + DIM_HEIGHT, 0); height = align(height, tile_height); /* This is needed for the kernel checker, unfortunately. */ @@ -474,22 +462,17 @@ boolean r300_texture_desc_init(struct r300_screen *rscreen, } unsigned r300_texture_get_offset(struct r300_texture_desc *desc, - unsigned level, unsigned zslice, - unsigned face) + unsigned level, unsigned layer) { unsigned offset = desc->offset_in_bytes[level]; switch (desc->b.b.target) { case PIPE_TEXTURE_3D: - assert(face == 0); - return offset + zslice * desc->layer_size_in_bytes[level]; - case PIPE_TEXTURE_CUBE: - assert(zslice == 0); - return offset + face * desc->layer_size_in_bytes[level]; + return offset + layer * desc->layer_size_in_bytes[level]; default: - assert(zslice == 0 && face == 0); + assert(layer == 0); return offset; } } diff --git a/src/gallium/drivers/r300/r300_texture_desc.h b/src/gallium/drivers/r300/r300_texture_desc.h index 3d7fe1fb473..121d215b4cb 100644 --- a/src/gallium/drivers/r300/r300_texture_desc.h +++ b/src/gallium/drivers/r300/r300_texture_desc.h @@ -41,7 +41,7 @@ unsigned r300_get_pixel_alignment(enum pipe_format format, unsigned num_samples, enum r300_buffer_tiling microtile, enum r300_buffer_tiling macrotile, - enum r300_dim dim); + enum r300_dim dim, boolean is_rs690); boolean r300_texture_desc_init(struct r300_screen *rscreen, struct r300_texture_desc *desc, @@ -52,7 +52,6 @@ boolean r300_texture_desc_init(struct r300_screen *rscreen, unsigned max_buffer_size); unsigned r300_texture_get_offset(struct r300_texture_desc *desc, - unsigned level, unsigned zslice, - unsigned face); + unsigned level, unsigned layer); #endif diff --git a/src/gallium/drivers/r300/r300_tgsi_to_rc.c b/src/gallium/drivers/r300/r300_tgsi_to_rc.c index 33448bf0def..15a323989b2 100644 --- a/src/gallium/drivers/r300/r300_tgsi_to_rc.c +++ b/src/gallium/drivers/r300/r300_tgsi_to_rc.c @@ -57,7 +57,7 @@ static unsigned translate_opcode(unsigned opcode) /* case TGSI_OPCODE_DP2A: return RC_OPCODE_DP2A; */ /* gap */ case TGSI_OPCODE_FRC: return RC_OPCODE_FRC; - /* case TGSI_OPCODE_CLAMP: return RC_OPCODE_CLAMP; */ + case TGSI_OPCODE_CLAMP: return RC_OPCODE_CLAMP; case TGSI_OPCODE_FLR: return RC_OPCODE_FLR; /* case TGSI_OPCODE_ROUND: return RC_OPCODE_ROUND; */ case TGSI_OPCODE_EX2: return RC_OPCODE_EX2; diff --git a/src/gallium/drivers/r300/r300_transfer.c b/src/gallium/drivers/r300/r300_transfer.c index e9333b35ef5..ae93fab554e 100644 --- a/src/gallium/drivers/r300/r300_transfer.c +++ b/src/gallium/drivers/r300/r300_transfer.c @@ -27,6 +27,7 @@ #include "util/u_memory.h" #include "util/u_format.h" +#include "util/u_box.h" struct r300_transfer { /* Parent class */ @@ -52,16 +53,10 @@ static void r300_copy_from_tiled_texture(struct pipe_context *ctx, { struct pipe_transfer *transfer = (struct pipe_transfer*)r300transfer; struct pipe_resource *tex = transfer->resource; - struct pipe_subresource subdst; - subdst.face = 0; - subdst.level = 0; - - ctx->resource_copy_region(ctx, &r300transfer->linear_texture->desc.b.b, subdst, - 0, 0, 0, - tex, transfer->sr, - transfer->box.x, transfer->box.y, transfer->box.z, - transfer->box.width, transfer->box.height); + ctx->resource_copy_region(ctx, &r300transfer->linear_texture->desc.b.b, 0, + 0, 0, 0, + tex, transfer->level, &transfer->box); } /* Copy a detiled texture to a tiled one. */ @@ -70,26 +65,22 @@ static void r300_copy_into_tiled_texture(struct pipe_context *ctx, { struct pipe_transfer *transfer = (struct pipe_transfer*)r300transfer; struct pipe_resource *tex = transfer->resource; - struct pipe_subresource subsrc; - - subsrc.face = 0; - subsrc.level = 0; + struct pipe_box src_box; + u_box_origin_2d(transfer->box.width, transfer->box.height, &src_box); - ctx->resource_copy_region(ctx, tex, transfer->sr, - transfer->box.x, transfer->box.y, transfer->box.z, - &r300transfer->linear_texture->desc.b.b, subsrc, - 0, 0, 0, - transfer->box.width, transfer->box.height); + ctx->resource_copy_region(ctx, tex, transfer->level, + transfer->box.x, transfer->box.y, transfer->box.z, + &r300transfer->linear_texture->desc.b.b, 0, &src_box); ctx->flush(ctx, 0, NULL); } struct pipe_transfer* r300_texture_get_transfer(struct pipe_context *ctx, - struct pipe_resource *texture, - struct pipe_subresource sr, - unsigned usage, - const struct pipe_box *box) + struct pipe_resource *texture, + unsigned level, + unsigned usage, + const struct pipe_box *box) { struct r300_context *r300 = r300_context(ctx); struct r300_texture *tex = r300_texture(texture); @@ -99,13 +90,13 @@ r300_texture_get_transfer(struct pipe_context *ctx, referenced_cs = r300->rws->cs_is_buffer_referenced(r300->cs, - tex->buffer, R300_REF_CS); + tex->cs_buffer, R300_REF_CS); if (referenced_cs) { referenced_hw = TRUE; } else { referenced_hw = r300->rws->cs_is_buffer_referenced(r300->cs, - tex->buffer, R300_REF_HW); + tex->cs_buffer, R300_REF_HW); } blittable = ctx->screen->is_format_supported( @@ -116,25 +107,26 @@ r300_texture_get_transfer(struct pipe_context *ctx, if (trans) { /* Initialize the transfer object. */ pipe_resource_reference(&trans->transfer.resource, texture); - trans->transfer.sr = sr; + trans->transfer.level = level; trans->transfer.usage = usage; trans->transfer.box = *box; /* If the texture is tiled, we must create a temporary detiled texture * for this transfer. * Also make write transfers pipelined. */ - if (tex->desc.microtile || tex->desc.macrotile[sr.level] || + if (tex->desc.microtile || tex->desc.macrotile[level] || ((referenced_hw & !(usage & PIPE_TRANSFER_READ)) && blittable)) { base.target = PIPE_TEXTURE_2D; base.format = texture->format; base.width0 = box->width; base.height0 = box->height; - base.depth0 = 0; + base.depth0 = 1; + base.array_size = 1; base.last_level = 0; base.nr_samples = 0; base.usage = PIPE_USAGE_DYNAMIC; base.bind = 0; - base.flags = R300_RESOURCE_FLAG_TRANSFER; + base.flags = R300_RESOURCE_FLAG_TRANSFER; /* For texture reading, the temporary (detiled) texture is used as * a render target when blitting from a tiled texture. */ @@ -164,7 +156,7 @@ r300_texture_get_transfer(struct pipe_context *ctx, if (!trans->linear_texture) { /* For linear textures, it's safe to fallback to * an unpipelined transfer. */ - if (!tex->desc.microtile && !tex->desc.macrotile[sr.level]) { + if (!tex->desc.microtile && !tex->desc.macrotile[level]) { goto unpipelined; } @@ -182,7 +174,7 @@ r300_texture_get_transfer(struct pipe_context *ctx, /* Set the stride. * * Even though we are using an internal texture for this, - * the transfer sr, box and usage parameters still reflect + * the transfer level, box and usage parameters still reflect * the arguments received to get_transfer. We just do the * right thing internally. */ @@ -202,9 +194,8 @@ r300_texture_get_transfer(struct pipe_context *ctx, unpipelined: /* Unpipelined transfer. */ - trans->transfer.stride = tex->desc.stride_in_bytes[sr.level]; - trans->offset = r300_texture_get_offset(&tex->desc, - sr.level, box->z, sr.face); + trans->transfer.stride = tex->desc.stride_in_bytes[level]; + trans->offset = r300_texture_get_offset(&tex->desc, level, box->z); if (referenced_cs) ctx->flush(ctx, PIPE_FLUSH_RENDER_CACHE, NULL); diff --git a/src/gallium/drivers/r300/r300_transfer.h b/src/gallium/drivers/r300/r300_transfer.h index 0d32a68d1fa..7977ef516f2 100644 --- a/src/gallium/drivers/r300/r300_transfer.h +++ b/src/gallium/drivers/r300/r300_transfer.h @@ -30,22 +30,22 @@ struct r300_context; struct pipe_transfer* r300_texture_get_transfer(struct pipe_context *ctx, - struct pipe_resource *texture, - struct pipe_subresource sr, - unsigned usage, - const struct pipe_box *box); + struct pipe_resource *texture, + unsigned level, + unsigned usage, + const struct pipe_box *box); void r300_texture_transfer_destroy(struct pipe_context *ctx, - struct pipe_transfer *trans); + struct pipe_transfer *trans); void* r300_texture_transfer_map(struct pipe_context *ctx, - struct pipe_transfer *transfer); + struct pipe_transfer *transfer); void r300_texture_transfer_unmap(struct pipe_context *ctx, - struct pipe_transfer *transfer); + struct pipe_transfer *transfer); #endif diff --git a/src/gallium/drivers/r300/r300_vs.c b/src/gallium/drivers/r300/r300_vs.c index 65696555ac3..78021e2c5d4 100644 --- a/src/gallium/drivers/r300/r300_vs.c +++ b/src/gallium/drivers/r300/r300_vs.c @@ -213,7 +213,6 @@ void r300_translate_vertex_shader(struct r300_context *r300, compiler.Base.max_temp_regs = 32; compiler.Base.max_constants = 256; compiler.Base.max_alu_insts = r300->screen->caps.is_r500 ? 1024 : 256; - compiler.Base.remove_unused_constants = TRUE; if (compiler.Base.Debug & RC_DBG_LOG) { DBG(r300, DBG_VP, "r300: Initial vertex program\n"); @@ -227,6 +226,10 @@ void r300_translate_vertex_shader(struct r300_context *r300, r300_tgsi_to_rc(&ttr, vs->state.tokens); + if (compiler.Base.Program.Constants.Count > 200) { + compiler.Base.remove_unused_constants = TRUE; + } + compiler.RequiredOutputs = ~(~0 << (vs->info.num_outputs + 1)); compiler.SetHwInputOutput = &set_vertex_inputs_outputs; diff --git a/src/gallium/drivers/r300/r300_winsys.h b/src/gallium/drivers/r300/r300_winsys.h index 4597332399a..460da77a4fb 100644 --- a/src/gallium/drivers/r300/r300_winsys.h +++ b/src/gallium/drivers/r300/r300_winsys.h @@ -33,15 +33,17 @@ #include "r300_defines.h" +#define R300_MAX_CMDBUF_DWORDS (16 * 1024) + struct winsys_handle; struct r300_winsys_screen; -struct r300_winsys_buffer; +struct r300_winsys_buffer; /* for map/unmap etc. */ +struct r300_winsys_cs_buffer; /* for write_reloc etc. */ struct r300_winsys_cs { - uint32_t *ptr; /* Pointer to the beginning of the CS. */ - unsigned cdw; /* Number of used dwords. */ - unsigned ndw; /* Size of the CS in dwords. */ + unsigned cdw; /* Number of used dwords. */ + uint32_t buf[R300_MAX_CMDBUF_DWORDS]; /* The command buffer. */ }; enum r300_value_id { @@ -49,8 +51,9 @@ enum r300_value_id { R300_VID_GB_PIPES, R300_VID_Z_PIPES, R300_VID_SQUARE_TILING_SUPPORT, - R300_VID_DRM_2_3_0, - R300_VID_DRM_2_6_0, + R300_VID_DRM_2_3_0, /* R500 VAP regs, MSPOS regs, fixed tex3D size checking */ + R300_VID_DRM_2_6_0, /* Hyper-Z, GB_Z_PEQ_CONFIG on rv350->r4xx, R500 FG_ALPHA_VALUE */ + R300_VID_DRM_2_8_0, /* R500 US_FORMAT regs, R500 ARGB2101010 colorbuffer */ R300_CAN_HYPERZ, }; @@ -102,6 +105,10 @@ struct r300_winsys_screen { unsigned usage, enum r300_buffer_domain domain); + struct r300_winsys_cs_buffer *(*buffer_get_cs_handle)( + struct r300_winsys_screen *ws, + struct r300_winsys_buffer *buf); + /** * Reference a buffer object (assign with reference counting). * @@ -232,23 +239,22 @@ struct r300_winsys_screen { void (*cs_destroy)(struct r300_winsys_cs *cs); /** - * Add a buffer object to the list of buffers to validate. + * Add a new buffer relocation. Every relocation must first be added + * before it can be written. * - * \param cs A command stream to add buffer for validation against. - * \param buf A winsys buffer to validate. - * \param rd A read domain containing a bitmask - * of the R300_DOMAIN_* flags. - * \param wd A write domain containing a bitmask - * of the R300_DOMAIN_* flags. + * \param cs A command stream to add buffer for validation against. + * \param buf A winsys buffer to validate. + * \param rd A read domain containing a bitmask of the R300_DOMAIN_* flags. + * \param wd A write domain containing a bitmask of the R300_DOMAIN_* flags. */ - void (*cs_add_buffer)(struct r300_winsys_cs *cs, - struct r300_winsys_buffer *buf, - enum r300_buffer_domain rd, - enum r300_buffer_domain wd); + void (*cs_add_reloc)(struct r300_winsys_cs *cs, + struct r300_winsys_cs_buffer *buf, + enum r300_buffer_domain rd, + enum r300_buffer_domain wd); /** - * Revalidate all currently set up winsys buffers. - * Returns TRUE if a flush is required. + * Return TRUE if there is enough memory in VRAM and GTT for the relocs + * added so far. * * \param cs A command stream to validate. */ @@ -263,9 +269,7 @@ struct r300_winsys_screen { * \param wd A write domain containing a bitmask of the R300_DOMAIN_* flags. */ void (*cs_write_reloc)(struct r300_winsys_cs *cs, - struct r300_winsys_buffer *buf, - enum r300_buffer_domain rd, - enum r300_buffer_domain wd); + struct r300_winsys_cs_buffer *buf); /** * Flush a command stream. @@ -287,14 +291,6 @@ struct r300_winsys_screen { void *user); /** - * Reset the list of buffer objects to validate, usually called - * prior to adding buffer objects for validation. - * - * \param cs A command stream to reset buffers for. - */ - void (*cs_reset_buffers)(struct r300_winsys_cs *cs); - - /** * Return TRUE if a buffer is referenced by a command stream or by hardware * (i.e. is busy), based on the domain parameter. * @@ -303,7 +299,7 @@ struct r300_winsys_screen { * \param domain A bitmask of the R300_REF_* enums. */ boolean (*cs_is_buffer_referenced)(struct r300_winsys_cs *cs, - struct r300_winsys_buffer *buf, + struct r300_winsys_cs_buffer *buf, enum r300_reference_domain domain); }; diff --git a/src/gallium/drivers/r600/Makefile b/src/gallium/drivers/r600/Makefile index a484f38e9f1..b476b9af3b8 100644 --- a/src/gallium/drivers/r600/Makefile +++ b/src/gallium/drivers/r600/Makefile @@ -21,6 +21,7 @@ C_SOURCES = \ evergreen_state.c \ eg_asm.c \ r600_translate.c \ - r600_state_common.c + r600_state_common.c \ + r600_upload.c include ../../Makefile.template diff --git a/src/gallium/drivers/r600/SConscript b/src/gallium/drivers/r600/SConscript index 3fc1fa94c27..64980140963 100644 --- a/src/gallium/drivers/r600/SConscript +++ b/src/gallium/drivers/r600/SConscript @@ -28,6 +28,7 @@ r600 = env.ConvenienceLibrary( 'r600_state_common.c', 'r600_texture.c', 'r600_translate.c', + 'r600_upload.c', 'r700_asm.c', 'evergreen_state.c', 'eg_asm.c', diff --git a/src/gallium/drivers/r600/eg_asm.c b/src/gallium/drivers/r600/eg_asm.c index 21d66fa9564..67d742b3760 100644 --- a/src/gallium/drivers/r600/eg_asm.c +++ b/src/gallium/drivers/r600/eg_asm.c @@ -27,6 +27,7 @@ #include "r600_asm.h" #include "eg_sq.h" #include "r600_opcodes.h" +#include "evergreend.h" int eg_bc_cf_build(struct r600_bc *bc, struct r600_bc_cf *cf) { @@ -34,15 +35,17 @@ int eg_bc_cf_build(struct r600_bc *bc, struct r600_bc_cf *cf) switch (cf->inst) { case (EG_V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU << 3): + case (EG_V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP_AFTER << 3): + case (EG_V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP2_AFTER << 3): case (EG_V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_PUSH_BEFORE << 3): bc->bytecode[id++] = S_SQ_CF_ALU_WORD0_ADDR(cf->addr >> 1) | - S_SQ_CF_ALU_WORD0_KCACHE_MODE0(cf->kcache0_mode) | - S_SQ_CF_ALU_WORD0_KCACHE_BANK0(cf->kcache0_bank) | - S_SQ_CF_ALU_WORD0_KCACHE_BANK1(cf->kcache1_bank); + S_SQ_CF_ALU_WORD0_KCACHE_MODE0(cf->kcache[0].mode) | + S_SQ_CF_ALU_WORD0_KCACHE_BANK0(cf->kcache[0].bank) | + S_SQ_CF_ALU_WORD0_KCACHE_BANK1(cf->kcache[1].bank); bc->bytecode[id++] = S_SQ_CF_ALU_WORD1_CF_INST(cf->inst >> 3) | - S_SQ_CF_ALU_WORD1_KCACHE_MODE1(cf->kcache1_mode) | - S_SQ_CF_ALU_WORD1_KCACHE_ADDR0(cf->kcache0_addr) | - S_SQ_CF_ALU_WORD1_KCACHE_ADDR1(cf->kcache1_addr) | + S_SQ_CF_ALU_WORD1_KCACHE_MODE1(cf->kcache[1].mode) | + S_SQ_CF_ALU_WORD1_KCACHE_ADDR0(cf->kcache[0].addr) | + S_SQ_CF_ALU_WORD1_KCACHE_ADDR1(cf->kcache[1].addr) | S_SQ_CF_ALU_WORD1_BARRIER(1) | S_SQ_CF_ALU_WORD1_COUNT((cf->ndw / 2) - 1); break; @@ -89,3 +92,37 @@ int eg_bc_cf_build(struct r600_bc *bc, struct r600_bc_cf *cf) } return 0; } + +void eg_cf_vtx(struct r600_vertex_element *ve, u32 *bytecode, unsigned count) +{ + struct r600_pipe_state *rstate; + unsigned i = 0; + + if (count > 8) { + bytecode[i++] = S_SQ_CF_WORD0_ADDR(8 >> 1); + bytecode[i++] = S_SQ_CF_WORD1_CF_INST(EG_V_SQ_CF_WORD1_SQ_CF_INST_VTX) | + S_SQ_CF_WORD1_BARRIER(1) | + S_SQ_CF_WORD1_COUNT(8 - 1); + bytecode[i++] = S_SQ_CF_WORD0_ADDR(40 >> 1); + bytecode[i++] = S_SQ_CF_WORD1_CF_INST(EG_V_SQ_CF_WORD1_SQ_CF_INST_VTX) | + S_SQ_CF_WORD1_BARRIER(1) | + S_SQ_CF_WORD1_COUNT(count - 8 - 1); + } else { + bytecode[i++] = S_SQ_CF_WORD0_ADDR(8 >> 1); + bytecode[i++] = S_SQ_CF_WORD1_CF_INST(EG_V_SQ_CF_WORD1_SQ_CF_INST_VTX) | + S_SQ_CF_WORD1_BARRIER(1) | + S_SQ_CF_WORD1_COUNT(count - 1); + } + bytecode[i++] = S_SQ_CF_WORD0_ADDR(0); + bytecode[i++] = S_SQ_CF_WORD1_CF_INST(EG_V_SQ_CF_WORD1_SQ_CF_INST_RETURN) | + S_SQ_CF_WORD1_BARRIER(1); + + rstate = &ve->rstate; + rstate->id = R600_PIPE_STATE_FETCH_SHADER; + rstate->nregs = 0; + r600_pipe_state_add_reg(rstate, R_0288A8_SQ_PGM_RESOURCES_FS, + 0x00000000, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_0288A4_SQ_PGM_START_FS, + (r600_bo_offset(ve->fetch_shader)) >> 8, + 0xFFFFFFFF, ve->fetch_shader); +} diff --git a/src/gallium/drivers/r600/eg_state_inlines.h b/src/gallium/drivers/r600/eg_state_inlines.h index 698299ec134..ecea1db4f15 100644 --- a/src/gallium/drivers/r600/eg_state_inlines.h +++ b/src/gallium/drivers/r600/eg_state_inlines.h @@ -290,6 +290,7 @@ static inline uint32_t r600_translate_colorswap(enum pipe_format format) switch (format) { /* 8-bit buffers. */ case PIPE_FORMAT_A8_UNORM: + return V_028C70_SWAP_ALT_REV; case PIPE_FORMAT_I8_UNORM: case PIPE_FORMAT_L8_UNORM: case PIPE_FORMAT_R8_UNORM: @@ -312,6 +313,7 @@ static inline uint32_t r600_translate_colorswap(enum pipe_format format) return V_028C70_SWAP_STD; case PIPE_FORMAT_L8A8_UNORM: + return V_028C70_SWAP_ALT; case PIPE_FORMAT_R8G8_UNORM: return V_028C70_SWAP_STD; diff --git a/src/gallium/drivers/r600/evergreen_state.c b/src/gallium/drivers/r600/evergreen_state.c index 0509522d813..94eef77945b 100644 --- a/src/gallium/drivers/r600/evergreen_state.c +++ b/src/gallium/drivers/r600/evergreen_state.c @@ -410,9 +410,9 @@ static struct pipe_sampler_view *evergreen_create_sampler_view(struct pipe_conte r600_pipe_state_add_reg(rstate, R_030010_RESOURCE0_WORD4, word4 | S_030010_NUM_FORMAT_ALL(V_030010_SQ_NUM_FORMAT_NORM) | S_030010_SRF_MODE_ALL(V_030010_SFR_MODE_NO_ZERO) | - S_030010_BASE_LEVEL(state->first_level), 0xFFFFFFFF, NULL); + S_030010_BASE_LEVEL(state->u.tex.first_level), 0xFFFFFFFF, NULL); r600_pipe_state_add_reg(rstate, R_030014_RESOURCE0_WORD5, - S_030014_LAST_LEVEL(state->last_level) | + S_030014_LAST_LEVEL(state->u.tex.last_level) | S_030014_BASE_ARRAY(0) | S_030014_LAST_ARRAY(0), 0xffffffff, NULL); r600_pipe_state_add_reg(rstate, R_030018_RESOURCE0_WORD6, 0x0, 0xFFFFFFFF, NULL); @@ -633,10 +633,11 @@ static void evergreen_cb(struct r600_pipe_context *rctx, struct r600_pipe_state struct r600_resource_texture *rtex; struct r600_resource *rbuffer; struct r600_surface *surf; - unsigned level = state->cbufs[cb]->level; + unsigned level = state->cbufs[cb]->u.tex.level; unsigned pitch, slice; unsigned color_info; unsigned format, swap, ntype; + unsigned offset; const struct util_format_description *desc; struct r600_bo *bo[3]; @@ -647,6 +648,9 @@ static void evergreen_cb(struct r600_pipe_context *rctx, struct r600_pipe_state bo[1] = rbuffer->bo; bo[2] = rbuffer->bo; + /* XXX quite sure for dx10+ hw don't need any offset hacks */ + offset = r600_texture_get_offset((struct r600_resource_texture *)state->cbufs[cb]->texture, + level, state->cbufs[cb]->u.tex.first_layer); pitch = rtex->pitch_in_pixels[level] / 8 - 1; slice = rtex->pitch_in_pixels[level] * surf->aligned_height / 64 - 1; ntype = 0; @@ -666,7 +670,7 @@ static void evergreen_cb(struct r600_pipe_context *rctx, struct r600_pipe_state /* FIXME handle enabling of CB beyond BASE8 which has different offset */ r600_pipe_state_add_reg(rstate, R_028C60_CB_COLOR0_BASE + cb * 0x3C, - (state->cbufs[cb]->offset + r600_bo_offset(bo[0])) >> 8, 0xFFFFFFFF, bo[0]); + (offset + r600_bo_offset(bo[0])) >> 8, 0xFFFFFFFF, bo[0]); r600_pipe_state_add_reg(rstate, R_028C78_CB_COLOR0_DIM + cb * 0x3C, 0x0, 0xFFFFFFFF, NULL); @@ -698,11 +702,12 @@ static void evergreen_db(struct r600_pipe_context *rctx, struct r600_pipe_state struct r600_surface *surf; unsigned level; unsigned pitch, slice, format, stencil_format; + unsigned offset; if (state->zsbuf == NULL) return; - level = state->zsbuf->level; + level = state->zsbuf->u.tex.level; surf = (struct r600_surface *)state->zsbuf; rtex = (struct r600_resource_texture*)state->zsbuf->texture; @@ -712,24 +717,27 @@ static void evergreen_db(struct r600_pipe_context *rctx, struct r600_pipe_state rtex->depth = 1; rbuffer = &rtex->resource; + /* XXX quite sure for dx10+ hw don't need any offset hacks */ + offset = r600_texture_get_offset((struct r600_resource_texture *)state->zsbuf->texture, + level, state->zsbuf->u.tex.first_layer); pitch = rtex->pitch_in_pixels[level] / 8 - 1; slice = rtex->pitch_in_pixels[level] * surf->aligned_height / 64 - 1; format = r600_translate_dbformat(state->zsbuf->texture->format); stencil_format = r600_translate_stencilformat(state->zsbuf->texture->format); r600_pipe_state_add_reg(rstate, R_028048_DB_Z_READ_BASE, - (state->zsbuf->offset + r600_bo_offset(rbuffer->bo)) >> 8, 0xFFFFFFFF, rbuffer->bo); + (offset + r600_bo_offset(rbuffer->bo)) >> 8, 0xFFFFFFFF, rbuffer->bo); r600_pipe_state_add_reg(rstate, R_028050_DB_Z_WRITE_BASE, - (state->zsbuf->offset + r600_bo_offset(rbuffer->bo)) >> 8, 0xFFFFFFFF, rbuffer->bo); + (offset + r600_bo_offset(rbuffer->bo)) >> 8, 0xFFFFFFFF, rbuffer->bo); if (stencil_format) { uint32_t stencil_offset; stencil_offset = ((surf->aligned_height * rtex->pitch_in_bytes[level]) + 255) & ~255; r600_pipe_state_add_reg(rstate, R_02804C_DB_STENCIL_READ_BASE, - (state->zsbuf->offset + stencil_offset + r600_bo_offset(rbuffer->bo)) >> 8, 0xFFFFFFFF, rbuffer->bo); + (offset + stencil_offset + r600_bo_offset(rbuffer->bo)) >> 8, 0xFFFFFFFF, rbuffer->bo); r600_pipe_state_add_reg(rstate, R_028054_DB_STENCIL_WRITE_BASE, - (state->zsbuf->offset + stencil_offset + r600_bo_offset(rbuffer->bo)) >> 8, 0xFFFFFFFF, rbuffer->bo); + (offset + stencil_offset + r600_bo_offset(rbuffer->bo)) >> 8, 0xFFFFFFFF, rbuffer->bo); } r600_pipe_state_add_reg(rstate, R_028008_DB_DEPTH_VIEW, 0x00000000, 0xFFFFFFFF, NULL); @@ -762,8 +770,6 @@ static void evergreen_set_framebuffer_state(struct pipe_context *ctx, util_copy_framebuffer_state(&rctx->framebuffer, state); - rctx->pframebuffer = &rctx->framebuffer; - /* build states */ for (int i = 0; i < state->nr_cbufs; i++) { evergreen_cb(rctx, rstate, state, i); @@ -825,6 +831,10 @@ static void evergreen_set_framebuffer_state(struct pipe_context *ctx, free(rctx->states[R600_PIPE_STATE_FRAMEBUFFER]); rctx->states[R600_PIPE_STATE_FRAMEBUFFER] = rstate; r600_context_pipe_state_set(&rctx->ctx, rstate); + + if (state->zsbuf) { + evergreen_polygon_offset_update(rctx); + } } static void evergreen_set_constant_buffer(struct pipe_context *ctx, uint shader, uint index, @@ -832,6 +842,7 @@ static void evergreen_set_constant_buffer(struct pipe_context *ctx, uint shader, { struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; struct r600_resource *rbuffer = (struct r600_resource*)buffer; + uint32_t offset; /* Note that the state tracker can unbind constant buffers by * passing NULL here. @@ -840,6 +851,8 @@ static void evergreen_set_constant_buffer(struct pipe_context *ctx, uint shader, return; } + r600_upload_const_buffer(rctx, buffer, &offset); + switch (shader) { case PIPE_SHADER_VERTEX: rctx->vs_const_buffer.nregs = 0; @@ -849,7 +862,7 @@ static void evergreen_set_constant_buffer(struct pipe_context *ctx, uint shader, 0xFFFFFFFF, NULL); r600_pipe_state_add_reg(&rctx->vs_const_buffer, R_028980_ALU_CONST_CACHE_VS_0, - (r600_bo_offset(rbuffer->bo)) >> 8, 0xFFFFFFFF, rbuffer->bo); + (r600_bo_offset(rbuffer->bo) + offset) >> 8, 0xFFFFFFFF, rbuffer->bo); r600_context_pipe_state_set(&rctx->ctx, &rctx->vs_const_buffer); break; case PIPE_SHADER_FRAGMENT: @@ -860,7 +873,7 @@ static void evergreen_set_constant_buffer(struct pipe_context *ctx, uint shader, 0xFFFFFFFF, NULL); r600_pipe_state_add_reg(&rctx->ps_const_buffer, R_028940_ALU_CONST_CACHE_PS_0, - (r600_bo_offset(rbuffer->bo)) >> 8, 0xFFFFFFFF, rbuffer->bo); + (r600_bo_offset(rbuffer->bo) + offset) >> 8, 0xFFFFFFFF, rbuffer->bo); r600_context_pipe_state_set(&rctx->ctx, &rctx->ps_const_buffer); break; default: @@ -1057,12 +1070,76 @@ void evergreen_init_config(struct r600_pipe_context *rctx) num_hs_stack_entries = 42; num_ls_stack_entries = 42; break; + case CHIP_BARTS: + num_ps_gprs = 93; + num_vs_gprs = 46; + num_temp_gprs = 4; + num_gs_gprs = 31; + num_es_gprs = 31; + num_hs_gprs = 23; + num_ls_gprs = 23; + num_ps_threads = 128; + num_vs_threads = 20; + num_gs_threads = 20; + num_es_threads = 20; + num_hs_threads = 20; + num_ls_threads = 20; + num_ps_stack_entries = 85; + num_vs_stack_entries = 85; + num_gs_stack_entries = 85; + num_es_stack_entries = 85; + num_hs_stack_entries = 85; + num_ls_stack_entries = 85; + break; + case CHIP_TURKS: + num_ps_gprs = 93; + num_vs_gprs = 46; + num_temp_gprs = 4; + num_gs_gprs = 31; + num_es_gprs = 31; + num_hs_gprs = 23; + num_ls_gprs = 23; + num_ps_threads = 128; + num_vs_threads = 20; + num_gs_threads = 20; + num_es_threads = 20; + num_hs_threads = 20; + num_ls_threads = 20; + num_ps_stack_entries = 42; + num_vs_stack_entries = 42; + num_gs_stack_entries = 42; + num_es_stack_entries = 42; + num_hs_stack_entries = 42; + num_ls_stack_entries = 42; + break; + case CHIP_CAICOS: + num_ps_gprs = 93; + num_vs_gprs = 46; + num_temp_gprs = 4; + num_gs_gprs = 31; + num_es_gprs = 31; + num_hs_gprs = 23; + num_ls_gprs = 23; + num_ps_threads = 128; + num_vs_threads = 10; + num_gs_threads = 10; + num_es_threads = 10; + num_hs_threads = 10; + num_ls_threads = 10; + num_ps_stack_entries = 42; + num_vs_stack_entries = 42; + num_gs_stack_entries = 42; + num_es_stack_entries = 42; + num_hs_stack_entries = 42; + num_ls_stack_entries = 42; + break; } tmp = 0x00000000; switch (family) { case CHIP_CEDAR: case CHIP_PALM: + case CHIP_CAICOS: break; default: tmp |= S_008C00_VC_ENABLE(1); @@ -1194,29 +1271,101 @@ void evergreen_init_config(struct r600_pipe_context *rctx) r600_pipe_state_add_reg(rstate, R_0283F8_SQ_VTX_SEMANTIC_30, 0x0, 0xFFFFFFFF, NULL); r600_pipe_state_add_reg(rstate, R_0283FC_SQ_VTX_SEMANTIC_31, 0x0, 0xFFFFFFFF, NULL); -r600_pipe_state_add_reg(rstate, R_028810_PA_CL_CLIP_CNTL, - 0x0, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028810_PA_CL_CLIP_CNTL, 0x0, 0xFFFFFFFF, NULL); r600_context_pipe_state_set(&rctx->ctx, rstate); } -int r600_conv_pipe_prim(unsigned pprim, unsigned *prim); -void evergreen_draw(struct pipe_context *ctx, const struct pipe_draw_info *info) +void evergreen_polygon_offset_update(struct r600_pipe_context *rctx) +{ + struct r600_pipe_state state; + + state.id = R600_PIPE_STATE_POLYGON_OFFSET; + state.nregs = 0; + if (rctx->rasterizer && rctx->framebuffer.zsbuf) { + float offset_units = rctx->rasterizer->offset_units; + unsigned offset_db_fmt_cntl = 0, depth; + + switch (rctx->framebuffer.zsbuf->texture->format) { + case PIPE_FORMAT_Z24X8_UNORM: + case PIPE_FORMAT_Z24_UNORM_S8_USCALED: + depth = -24; + offset_units *= 2.0f; + break; + case PIPE_FORMAT_Z32_FLOAT: + depth = -23; + offset_units *= 1.0f; + offset_db_fmt_cntl |= S_028B78_POLY_OFFSET_DB_IS_FLOAT_FMT(1); + break; + case PIPE_FORMAT_Z16_UNORM: + depth = -16; + offset_units *= 4.0f; + break; + default: + return; + } + /* FIXME some of those reg can be computed with cso */ + offset_db_fmt_cntl |= S_028B78_POLY_OFFSET_NEG_NUM_DB_BITS(depth); + r600_pipe_state_add_reg(&state, + R_028B80_PA_SU_POLY_OFFSET_FRONT_SCALE, + fui(rctx->rasterizer->offset_scale), 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(&state, + R_028B84_PA_SU_POLY_OFFSET_FRONT_OFFSET, + fui(offset_units), 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(&state, + R_028B88_PA_SU_POLY_OFFSET_BACK_SCALE, + fui(rctx->rasterizer->offset_scale), 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(&state, + R_028B8C_PA_SU_POLY_OFFSET_BACK_OFFSET, + fui(offset_units), 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(&state, + R_028B78_PA_SU_POLY_OFFSET_DB_FMT_CNTL, + offset_db_fmt_cntl, 0xFFFFFFFF, NULL); + r600_context_pipe_state_set(&rctx->ctx, &state); + } +} + +static void evergreen_spi_update(struct r600_pipe_context *rctx) +{ + struct r600_pipe_shader *shader = rctx->ps_shader; + struct r600_pipe_state rstate; + struct r600_shader *rshader = &shader->shader; + unsigned i, tmp; + + rstate.nregs = 0; + for (i = 0; i < rshader->ninput; i++) { + tmp = S_028644_SEMANTIC(r600_find_vs_semantic_index(&rctx->vs_shader->shader, rshader, i)); + if (rshader->input[i].name == TGSI_SEMANTIC_COLOR || + rshader->input[i].name == TGSI_SEMANTIC_BCOLOR || + rshader->input[i].name == TGSI_SEMANTIC_POSITION) { + tmp |= S_028644_FLAT_SHADE(rctx->flatshade); + } + if (rshader->input[i].name == TGSI_SEMANTIC_GENERIC && + rctx->sprite_coord_enable & (1 << rshader->input[i].sid)) { + tmp |= S_028644_PT_SPRITE_TEX(1); + } + r600_pipe_state_add_reg(&rstate, R_028644_SPI_PS_INPUT_CNTL_0 + i * 4, tmp, 0xFFFFFFFF, NULL); + } + r600_context_pipe_state_set(&rctx->ctx, &rstate); +} + +void evergreen_vertex_buffer_update(struct r600_pipe_context *rctx) { - struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; struct r600_pipe_state *rstate; struct r600_resource *rbuffer; - unsigned i, j, offset, prim; - u32 vgt_dma_index_type, vgt_draw_initiator, mask; struct pipe_vertex_buffer *vertex_buffer; - struct r600_draw rdraw; - struct r600_pipe_state vgt; - struct r600_drawl draw; - boolean translate = FALSE; + unsigned i, offset; + + /* we don't update until we know vertex elements */ + if (rctx->vertex_elements == NULL || !rctx->nvertex_buffer) + return; if (rctx->vertex_elements->incompatible_layout) { + /* translate rebind new vertex elements so + * return once translated + */ r600_begin_vertex_translate(rctx); - translate = TRUE; + return; } if (rctx->any_user_vbs) { @@ -1224,6 +1373,72 @@ void evergreen_draw(struct pipe_context *ctx, const struct pipe_draw_info *info) rctx->any_user_vbs = FALSE; } + if (rctx->vertex_elements->vbuffer_need_offset) { + /* one resource per vertex elements */ + rctx->nvs_resource = rctx->vertex_elements->count; + } else { + /* bind vertex buffer once */ + rctx->nvs_resource = rctx->nvertex_buffer; + } + + for (i = 0 ; i < rctx->nvs_resource; i++) { + rstate = &rctx->vs_resource[i]; + rstate->id = R600_PIPE_STATE_RESOURCE; + rstate->nregs = 0; + + if (rctx->vertex_elements->vbuffer_need_offset) { + /* one resource per vertex elements */ + unsigned vbuffer_index; + vbuffer_index = rctx->vertex_elements->elements[i].vertex_buffer_index; + vertex_buffer = &rctx->vertex_buffer[vbuffer_index]; + rbuffer = (struct r600_resource*)vertex_buffer->buffer; + offset = rctx->vertex_elements->vbuffer_offset[i]; + } else { + /* bind vertex buffer once */ + vertex_buffer = &rctx->vertex_buffer[i]; + rbuffer = (struct r600_resource*)vertex_buffer->buffer; + offset = 0; + } + if (vertex_buffer == NULL || rbuffer == NULL) + continue; + offset += vertex_buffer->buffer_offset + r600_bo_offset(rbuffer->bo); + + r600_pipe_state_add_reg(rstate, R_030000_RESOURCE0_WORD0, + offset, 0xFFFFFFFF, rbuffer->bo); + r600_pipe_state_add_reg(rstate, R_030004_RESOURCE0_WORD1, + rbuffer->bo_size - offset - 1, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_030008_RESOURCE0_WORD2, + S_030008_STRIDE(vertex_buffer->stride), + 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_03000C_RESOURCE0_WORD3, + S_03000C_DST_SEL_X(V_03000C_SQ_SEL_X) | + S_03000C_DST_SEL_Y(V_03000C_SQ_SEL_Y) | + S_03000C_DST_SEL_Z(V_03000C_SQ_SEL_Z) | + S_03000C_DST_SEL_W(V_03000C_SQ_SEL_W), + 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_030010_RESOURCE0_WORD4, + 0x00000000, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_030014_RESOURCE0_WORD5, + 0x00000000, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_030018_RESOURCE0_WORD6, + 0x00000000, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_03001C_RESOURCE0_WORD7, + 0xC0000000, 0xFFFFFFFF, NULL); + evergreen_context_pipe_state_set_fs_resource(&rctx->ctx, rstate, i); + } +} + +int r600_conv_pipe_prim(unsigned pprim, unsigned *prim); +void evergreen_draw(struct pipe_context *ctx, const struct pipe_draw_info *info) +{ + struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; + struct r600_resource *rbuffer; + u32 vgt_dma_index_type, vgt_draw_initiator, mask; + struct r600_draw rdraw; + struct r600_pipe_state vgt; + struct r600_drawl draw; + unsigned prim; + memset(&draw, 0, sizeof(struct r600_drawl)); draw.ctx = ctx; draw.mode = info->mode; @@ -1272,48 +1487,23 @@ void evergreen_draw(struct pipe_context *ctx, const struct pipe_draw_info *info) } if (r600_conv_pipe_prim(draw.mode, &prim)) return; - - /* rebuild vertex shader if input format changed */ - if (r600_pipe_shader_update(&rctx->context, rctx->vs_shader)) + if (unlikely(rctx->ps_shader == NULL)) { + R600_ERR("missing vertex shader\n"); return; - if (r600_pipe_shader_update(&rctx->context, rctx->ps_shader)) + } + if (unlikely(rctx->vs_shader == NULL)) { + R600_ERR("missing vertex shader\n"); + return; + } + /* there should be enough input */ + if (rctx->vertex_elements->count < rctx->vs_shader->shader.bc.nresource) { + R600_ERR("%d resources provided, expecting %d\n", + rctx->vertex_elements->count, rctx->vs_shader->shader.bc.nresource); return; - - for (i = 0 ; i < rctx->vertex_elements->count; i++) { - uint32_t word3, word2; - uint32_t format; - rstate = &rctx->vs_resource[i]; - - rstate->id = R600_PIPE_STATE_RESOURCE; - rstate->nregs = 0; - - j = rctx->vertex_elements->elements[i].vertex_buffer_index; - vertex_buffer = &rctx->vertex_buffer[j]; - rbuffer = (struct r600_resource*)vertex_buffer->buffer; - offset = rctx->vertex_elements->elements[i].src_offset + - vertex_buffer->buffer_offset + - r600_bo_offset(rbuffer->bo); - - format = r600_translate_vertex_data_type(rctx->vertex_elements->hw_format[i]); - - word2 = format | S_030008_STRIDE(vertex_buffer->stride); - - word3 = S_03000C_DST_SEL_X(V_03000C_SQ_SEL_X) | - S_03000C_DST_SEL_Y(V_03000C_SQ_SEL_Y) | - S_03000C_DST_SEL_Z(V_03000C_SQ_SEL_Z) | - S_03000C_DST_SEL_W(V_03000C_SQ_SEL_W); - - r600_pipe_state_add_reg(rstate, R_030000_RESOURCE0_WORD0, offset, 0xFFFFFFFF, rbuffer->bo); - r600_pipe_state_add_reg(rstate, R_030004_RESOURCE0_WORD1, rbuffer->size - offset - 1, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_030008_RESOURCE0_WORD2, word2, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_03000C_RESOURCE0_WORD3, word3, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_030010_RESOURCE0_WORD4, 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_030014_RESOURCE0_WORD5, 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_030018_RESOURCE0_WORD6, 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_03001C_RESOURCE0_WORD7, 0xC0000000, 0xFFFFFFFF, NULL); - evergreen_fs_resource_set(&rctx->ctx, rstate, i); } + evergreen_spi_update(rctx); + mask = 0; for (int i = 0; i < rctx->framebuffer.nr_cbufs; i++) { mask |= (0xF << (i * 4)); @@ -1328,46 +1518,6 @@ void evergreen_draw(struct pipe_context *ctx, const struct pipe_draw_info *info) r600_pipe_state_add_reg(&vgt, R_028404_VGT_MIN_VTX_INDX, draw.min_index, 0xFFFFFFFF, NULL); r600_pipe_state_add_reg(&vgt, R_03CFF0_SQ_VTX_BASE_VTX_LOC, 0, 0xFFFFFFFF, NULL); r600_pipe_state_add_reg(&vgt, R_03CFF4_SQ_VTX_START_INST_LOC, 0, 0xFFFFFFFF, NULL); - - if (rctx->rasterizer && rctx->framebuffer.zsbuf) { - float offset_units = rctx->rasterizer->offset_units; - unsigned offset_db_fmt_cntl = 0, depth; - - switch (rctx->framebuffer.zsbuf->texture->format) { - case PIPE_FORMAT_Z24X8_UNORM: - case PIPE_FORMAT_Z24_UNORM_S8_USCALED: - depth = -24; - offset_units *= 2.0f; - break; - case PIPE_FORMAT_Z32_FLOAT: - depth = -23; - offset_units *= 1.0f; - offset_db_fmt_cntl |= S_028B78_POLY_OFFSET_DB_IS_FLOAT_FMT(1); - break; - case PIPE_FORMAT_Z16_UNORM: - depth = -16; - offset_units *= 4.0f; - break; - default: - return; - } - offset_db_fmt_cntl |= S_028B78_POLY_OFFSET_NEG_NUM_DB_BITS(depth); - r600_pipe_state_add_reg(&vgt, - R_028B80_PA_SU_POLY_OFFSET_FRONT_SCALE, - fui(rctx->rasterizer->offset_scale), 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(&vgt, - R_028B84_PA_SU_POLY_OFFSET_FRONT_OFFSET, - fui(offset_units), 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(&vgt, - R_028B88_PA_SU_POLY_OFFSET_BACK_SCALE, - fui(rctx->rasterizer->offset_scale), 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(&vgt, - R_028B8C_PA_SU_POLY_OFFSET_BACK_OFFSET, - fui(offset_units), 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(&vgt, - R_028B78_PA_SU_POLY_OFFSET_DB_FMT_CNTL, - offset_db_fmt_cntl, 0xFFFFFFFF, NULL); - } r600_context_pipe_state_set(&rctx->ctx, &vgt); rdraw.vgt_num_indices = draw.count; @@ -1382,28 +1532,22 @@ void evergreen_draw(struct pipe_context *ctx, const struct pipe_draw_info *info) } evergreen_context_draw(&rctx->ctx, &rdraw); - if (translate) - r600_end_vertex_translate(rctx); - pipe_resource_reference(&draw.index_buffer, NULL); } void evergreen_pipe_shader_ps(struct pipe_context *ctx, struct r600_pipe_shader *shader) { - struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; struct r600_pipe_state *rstate = &shader->rstate; struct r600_shader *rshader = &shader->shader; - unsigned i, tmp, exports_ps, num_cout, spi_ps_in_control_0, spi_input_z, spi_ps_in_control_1; + unsigned i, exports_ps, num_cout, spi_ps_in_control_0, spi_input_z, spi_ps_in_control_1; int pos_index = -1, face_index = -1; int ninterp = 0; boolean have_linear = FALSE, have_centroid = FALSE, have_perspective = FALSE; unsigned spi_baryc_cntl; - /* clear previous register */ rstate->nregs = 0; for (i = 0; i < rshader->ninput; i++) { - tmp = S_028644_SEMANTIC(r600_find_vs_semantic_index(&rctx->vs_shader->shader, rshader, i)); /* evergreen NUM_INTERP only contains values interpolated into the LDS, POSITION goes via GPRs from the SC so isn't counted */ if (rshader->input[i].name == TGSI_SEMANTIC_POSITION) @@ -1421,16 +1565,6 @@ void evergreen_pipe_shader_ps(struct pipe_context *ctx, struct r600_pipe_shader if (rshader->input[i].centroid) have_centroid = TRUE; } - if (rshader->input[i].name == TGSI_SEMANTIC_COLOR || - rshader->input[i].name == TGSI_SEMANTIC_BCOLOR || - rshader->input[i].name == TGSI_SEMANTIC_POSITION) { - tmp |= S_028644_FLAT_SHADE(rshader->flat_shade); - } - if (rshader->input[i].name == TGSI_SEMANTIC_GENERIC && - rctx->sprite_coord_enable & (1 << rshader->input[i].sid)) { - tmp |= S_028644_PT_SPRITE_TEX(1); - } - r600_pipe_state_add_reg(rstate, R_028644_SPI_PS_INPUT_CNTL_0 + i * 4, tmp, 0xFFFFFFFF, NULL); } for (i = 0; i < rshader->noutput; i++) { if (rshader->output[i].name == TGSI_SEMANTIC_POSITION) @@ -1569,14 +1703,8 @@ void evergreen_pipe_shader_vs(struct pipe_context *ctx, struct r600_pipe_shader R_028864_SQ_PGM_RESOURCES_2_VS, 0x0, 0xFFFFFFFF, NULL); r600_pipe_state_add_reg(rstate, - R_0288A8_SQ_PGM_RESOURCES_FS, - 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_02885C_SQ_PGM_START_VS, (r600_bo_offset(shader->bo)) >> 8, 0xFFFFFFFF, shader->bo); - r600_pipe_state_add_reg(rstate, - R_0288A4_SQ_PGM_START_FS, - (r600_bo_offset(shader->bo)) >> 8, 0xFFFFFFFF, shader->bo_fetch); r600_pipe_state_add_reg(rstate, R_03A200_SQ_LOOP_CONST_0 + (32 * 4), 0x01000FFF, diff --git a/src/gallium/drivers/r600/r600.h b/src/gallium/drivers/r600/r600.h index 2ab60f3086a..a852bef6156 100644 --- a/src/gallium/drivers/r600/r600.h +++ b/src/gallium/drivers/r600/r600.h @@ -35,7 +35,7 @@ #define RADEON_CTX_MAX_PM4 (64 * 1024 / 4) #define R600_ERR(fmt, args...) \ - fprintf(stderr, "EE %s/%s:%d - "fmt, __FILE__, __func__, __LINE__, ##args) + fprintf(stderr, "EE %s:%d %s - "fmt, __FILE__, __LINE__, __func__, ##args) typedef uint64_t u64; typedef uint32_t u32; @@ -92,6 +92,9 @@ enum radeon_family { CHIP_CYPRESS, CHIP_HEMLOCK, CHIP_PALM, + CHIP_BARTS, + CHIP_TURKS, + CHIP_CAICOS, CHIP_LAST, }; @@ -245,10 +248,7 @@ struct r600_context { u32 *pm4; struct list_head query_list; unsigned num_query_running; - unsigned fence; struct list_head fenced_bo; - unsigned *cfence; - struct r600_bo *fence_bo; }; struct r600_draw { @@ -284,14 +284,12 @@ void r600_context_queries_resume(struct r600_context *ctx); int evergreen_context_init(struct r600_context *ctx, struct radeon *radeon); void evergreen_context_draw(struct r600_context *ctx, const struct r600_draw *draw); -void evergreen_ps_resource_set(struct r600_context *ctx, struct r600_pipe_state *state, unsigned rid); -void evergreen_vs_resource_set(struct r600_context *ctx, struct r600_pipe_state *state, unsigned rid); -void evergreen_fs_resource_set(struct r600_context *ctx, struct r600_pipe_state *state, unsigned rid); - void evergreen_context_pipe_state_set_ps_resource(struct r600_context *ctx, struct r600_pipe_state *state, unsigned rid); void evergreen_context_pipe_state_set_vs_resource(struct r600_context *ctx, struct r600_pipe_state *state, unsigned rid); void evergreen_context_pipe_state_set_fs_resource(struct r600_context *ctx, struct r600_pipe_state *state, unsigned rid); void evergreen_context_pipe_state_set_ps_sampler(struct r600_context *ctx, struct r600_pipe_state *state, unsigned id); void evergreen_context_pipe_state_set_vs_sampler(struct r600_context *ctx, struct r600_pipe_state *state, unsigned id); +struct radeon *radeon_decref(struct radeon *radeon); + #endif diff --git a/src/gallium/drivers/r600/r600_asm.c b/src/gallium/drivers/r600/r600_asm.c index edadedff25f..beefd17ae2f 100644 --- a/src/gallium/drivers/r600/r600_asm.c +++ b/src/gallium/drivers/r600/r600_asm.c @@ -22,59 +22,122 @@ */ #include <stdio.h> #include <errno.h> +#include "util/u_format.h" #include "util/u_memory.h" #include "pipe/p_shader_tokens.h" #include "r600_pipe.h" #include "r600_sq.h" #include "r600_opcodes.h" #include "r600_asm.h" +#include "r600_formats.h" +#include "r600d.h" -static inline unsigned int r600_bc_get_num_operands(struct r600_bc_alu *alu) +#define NUM_OF_CYCLES 3 +#define NUM_OF_COMPONENTS 4 + +static inline unsigned int r600_bc_get_num_operands(struct r600_bc *bc, struct r600_bc_alu *alu) { if(alu->is_op3) return 3; - switch (alu->inst) { - case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP: - return 0; - case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD: - case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLE: - case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT: - case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGE: - case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLNE: - case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL: - case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX: - case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN: - case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETE: - case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETNE: - case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT: - case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE: - case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETE: - case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGT: - case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGE: - case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETNE: - case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4: - case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4_IEEE: - case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_CUBE: - return 2; - - case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV: - case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_FLOOR: - case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT: - case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR: - case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_TRUNC: - case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE: - case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_CLAMPED: - case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE: - case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE: - case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_IEEE: - case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT: - case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN: - case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS: - return 1; - default: R600_ERR( - "Need instruction operand number for 0x%x.\n", alu->inst); - }; + switch (bc->chiprev) { + case CHIPREV_R600: + case CHIPREV_R700: + switch (alu->inst) { + case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP: + return 0; + case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD: + case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLE: + case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT: + case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGE: + case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLNE: + case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL: + case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX: + case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN: + case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETE: + case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETNE: + case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT: + case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE: + case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETE: + case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGT: + case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGE: + case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETNE: + case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4: + case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4_IEEE: + case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_CUBE: + return 2; + + case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV: + case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA: + case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_FLOOR: + case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_INT: + case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT: + case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR: + case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_TRUNC: + case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE: + case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_CLAMPED: + case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE: + case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_CLAMPED: + case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE: + case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_CLAMPED: + case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_IEEE: + case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT: + case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN: + case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS: + return 1; + default: R600_ERR( + "Need instruction operand number for 0x%x.\n", alu->inst); + } + break; + case CHIPREV_EVERGREEN: + switch (alu->inst) { + case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP: + return 0; + case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD: + case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLE: + case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT: + case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGE: + case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLNE: + case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL: + case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX: + case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN: + case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETE: + case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETNE: + case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT: + case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE: + case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETE: + case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGT: + case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGE: + case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETNE: + case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4: + case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4_IEEE: + case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_CUBE: + case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INTERP_XY: + case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INTERP_ZW: + return 2; + + case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV: + case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_INT: + case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT: + case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR: + case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_TRUNC: + case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE: + case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_CLAMPED: + case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE: + case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_CLAMPED: + case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE: + case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_CLAMPED: + case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_IEEE: + case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT: + case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT_FLOOR: + case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN: + case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS: + return 1; + default: R600_ERR( + "Need instruction operand number for 0x%x.\n", alu->inst); + } + break; + } return 3; } @@ -101,7 +164,6 @@ static struct r600_bc_alu *r600_bc_alu(void) if (alu == NULL) return NULL; LIST_INITHEAD(&alu->list); - LIST_INITHEAD(&alu->bs_list); return alu; } @@ -152,6 +214,9 @@ int r600_bc_init(struct r600_bc *bc, enum radeon_family family) case CHIP_CYPRESS: case CHIP_HEMLOCK: case CHIP_PALM: + case CHIP_BARTS: + case CHIP_TURKS: + case CHIP_CAICOS: bc->chiprev = CHIPREV_EVERGREEN; break; default: @@ -189,221 +254,813 @@ int r600_bc_add_output(struct r600_bc *bc, const struct r600_bc_output *output) return 0; } -const unsigned bank_swizzle_vec[8] = {SQ_ALU_VEC_210, //000 - SQ_ALU_VEC_120, //001 - SQ_ALU_VEC_102, //010 +/* alu instructions that can ony exits once per group */ +static int is_alu_once_inst(struct r600_bc *bc, struct r600_bc_alu *alu) +{ + switch (bc->chiprev) { + case CHIPREV_R600: + case CHIPREV_R700: + return !alu->is_op3 && ( + alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLE || + alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT || + alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGE || + alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLNE || + alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT_UINT || + alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGE_UINT || + alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLE_INT || + alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT_INT || + alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGE_INT || + alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLNE_INT || + alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGT_UINT || + alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGE_UINT || + alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETE || + alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGT || + alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGE || + alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETNE || + alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SET_INV || + alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SET_POP || + alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SET_CLR || + alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SET_RESTORE || + alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETE_PUSH || + alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGT_PUSH || + alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGE_PUSH || + alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETNE_PUSH || + alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETE_INT || + alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGT_INT || + alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGE_INT || + alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETNE_INT || + alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETE_PUSH_INT || + alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGT_PUSH_INT || + alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGE_PUSH_INT || + alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETNE_PUSH_INT || + alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETLT_PUSH_INT || + alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETLE_PUSH_INT); + case CHIPREV_EVERGREEN: + default: + return !alu->is_op3 && ( + alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLE || + alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT || + alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGE || + alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLNE || + alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT_UINT || + alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGE_UINT || + alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLE_INT || + alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT_INT || + alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGE_INT || + alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLNE_INT || + alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGT_UINT || + alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGE_UINT || + alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETE || + alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGT || + alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGE || + alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETNE || + alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SET_INV || + alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SET_POP || + alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SET_CLR || + alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SET_RESTORE || + alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETE_PUSH || + alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGT_PUSH || + alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGE_PUSH || + alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETNE_PUSH || + alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETE_INT || + alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGT_INT || + alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGE_INT || + alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETNE_INT || + alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETE_PUSH_INT || + alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGT_PUSH_INT || + alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGE_PUSH_INT || + alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETNE_PUSH_INT || + alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETLT_PUSH_INT || + alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETLE_PUSH_INT); + } +} - SQ_ALU_VEC_201, //011 - SQ_ALU_VEC_012, //100 - SQ_ALU_VEC_021, //101 +static int is_alu_reduction_inst(struct r600_bc *bc, struct r600_bc_alu *alu) +{ + switch (bc->chiprev) { + case CHIPREV_R600: + case CHIPREV_R700: + return !alu->is_op3 && ( + alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_CUBE || + alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4 || + alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4_IEEE || + alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX4); + case CHIPREV_EVERGREEN: + default: + return !alu->is_op3 && ( + alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_CUBE || + alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4 || + alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4_IEEE || + alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX4); + } +} - SQ_ALU_VEC_012, //110 - SQ_ALU_VEC_012}; //111 +static int is_alu_mova_inst(struct r600_bc *bc, struct r600_bc_alu *alu) +{ + switch (bc->chiprev) { + case CHIPREV_R600: + case CHIPREV_R700: + return !alu->is_op3 && ( + alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA || + alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_FLOOR || + alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_INT); + case CHIPREV_EVERGREEN: + default: + return !alu->is_op3 && ( + alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_INT); + } +} -const unsigned bank_swizzle_scl[8] = {SQ_ALU_SCL_210, //000 - SQ_ALU_SCL_122, //001 - SQ_ALU_SCL_122, //010 +/* alu instructions that can only execute on the vector unit */ +static int is_alu_vec_unit_inst(struct r600_bc *bc, struct r600_bc_alu *alu) +{ + return is_alu_reduction_inst(bc, alu) || + is_alu_mova_inst(bc, alu); +} - SQ_ALU_SCL_221, //011 - SQ_ALU_SCL_212, //100 - SQ_ALU_SCL_122, //101 +/* alu instructions that can only execute on the trans unit */ +static int is_alu_trans_unit_inst(struct r600_bc *bc, struct r600_bc_alu *alu) +{ + switch (bc->chiprev) { + case CHIPREV_R600: + case CHIPREV_R700: + if (!alu->is_op3) + return alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ASHR_INT || + alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT || + alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_INT_TO_FLT || + alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LSHL_INT || + alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LSHR_INT || + alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULHI_INT || + alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULHI_UINT || + alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULLO_INT || + alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULLO_UINT || + alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_INT || + alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_UINT || + alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_UINT_TO_FLT || + alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS || + alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE || + alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_CLAMPED || + alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE || + alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_CLAMPED || + alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_FF || + alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE || + alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_CLAMPED || + alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_FF || + alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_IEEE || + alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN || + alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SQRT_IEEE; + else + return alu->inst == V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MUL_LIT || + alu->inst == V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MUL_LIT_D2 || + alu->inst == V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MUL_LIT_M2 || + alu->inst == V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MUL_LIT_M4; + case CHIPREV_EVERGREEN: + default: + if (!alu->is_op3) + return alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ASHR_INT || + alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT || + alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_INT_TO_FLT || + alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LSHL_INT || + alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LSHR_INT || + alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULHI_INT || + alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULHI_UINT || + alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULLO_INT || + alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULLO_UINT || + alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_INT || + alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_UINT || + alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_UINT_TO_FLT || + alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS || + alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE || + alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_CLAMPED || + alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE || + alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_CLAMPED || + alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_FF || + alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE || + alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_CLAMPED || + alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_FF || + alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_IEEE || + alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN || + alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SQRT_IEEE; + else + return alu->inst == EG_V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MUL_LIT; + } +} + +/* alu instructions that can execute on any unit */ +static int is_alu_any_unit_inst(struct r600_bc *bc, struct r600_bc_alu *alu) +{ + return !is_alu_vec_unit_inst(bc, alu) && + !is_alu_trans_unit_inst(bc, alu); +} + +static int assign_alu_units(struct r600_bc *bc, struct r600_bc_alu *alu_first, + struct r600_bc_alu *assignment[5]) +{ + struct r600_bc_alu *alu; + unsigned i, chan, trans; + + for (i = 0; i < 5; i++) + assignment[i] = NULL; + + for (alu = alu_first; alu; alu = LIST_ENTRY(struct r600_bc_alu, alu->list.next, list)) { + chan = alu->dst.chan; + if (is_alu_trans_unit_inst(bc, alu)) + trans = 1; + else if (is_alu_vec_unit_inst(bc, alu)) + trans = 0; + else if (assignment[chan]) + trans = 1; // assume ALU_INST_PREFER_VECTOR + else + trans = 0; - SQ_ALU_SCL_122, //110 - SQ_ALU_SCL_122}; //111 + if (trans) { + if (assignment[4]) { + assert(0); //ALU.Trans has already been allocated + return -1; + } + assignment[4] = alu; + } else { + if (assignment[chan]) { + assert(0); //ALU.chan has already been allocated + return -1; + } + assignment[chan] = alu; + } -static int init_gpr(struct r600_bc_alu *alu) + if (alu->last) + break; + } + return 0; +} + +struct alu_bank_swizzle { + int hw_gpr[NUM_OF_CYCLES][NUM_OF_COMPONENTS]; + int hw_cfile_addr[4]; + int hw_cfile_elem[4]; +}; + +const unsigned cycle_for_bank_swizzle_vec[][3] = { + [SQ_ALU_VEC_012] = { 0, 1, 2 }, + [SQ_ALU_VEC_021] = { 0, 2, 1 }, + [SQ_ALU_VEC_120] = { 1, 2, 0 }, + [SQ_ALU_VEC_102] = { 1, 0, 2 }, + [SQ_ALU_VEC_201] = { 2, 0, 1 }, + [SQ_ALU_VEC_210] = { 2, 1, 0 } +}; + +const unsigned cycle_for_bank_swizzle_scl[][3] = { + [SQ_ALU_SCL_210] = { 2, 1, 0 }, + [SQ_ALU_SCL_122] = { 1, 2, 2 }, + [SQ_ALU_SCL_212] = { 2, 1, 2 }, + [SQ_ALU_SCL_221] = { 2, 2, 1 } +}; + +static void init_bank_swizzle(struct alu_bank_swizzle *bs) { - int cycle, component; + int i, cycle, component; /* set up gpr use */ for (cycle = 0; cycle < NUM_OF_CYCLES; cycle++) for (component = 0; component < NUM_OF_COMPONENTS; component++) - alu->hw_gpr[cycle][component] = -1; - return 0; + bs->hw_gpr[cycle][component] = -1; + for (i = 0; i < 4; i++) + bs->hw_cfile_addr[i] = -1; + for (i = 0; i < 4; i++) + bs->hw_cfile_elem[i] = -1; } - -#if 0 -static int reserve_gpr(struct r600_bc_alu *alu, unsigned sel, unsigned chan, unsigned cycle) + +static int reserve_gpr(struct alu_bank_swizzle *bs, unsigned sel, unsigned chan, unsigned cycle) { - if (alu->hw_gpr[cycle][chan] < 0) - alu->hw_gpr[cycle][chan] = sel; - else if (alu->hw_gpr[cycle][chan] != (int)sel) { - R600_ERR("Another scalar operation has already used GPR read port for channel\n"); + if (bs->hw_gpr[cycle][chan] == -1) + bs->hw_gpr[cycle][chan] = sel; + else if (bs->hw_gpr[cycle][chan] != (int)sel) { + // Another scalar operation has already used GPR read port for channel return -1; } return 0; } - -static int cycle_for_scalar_bank_swizzle(const int swiz, const int sel, unsigned *p_cycle) -{ - int table[3]; - int ret = 0; - switch (swiz) { - case SQ_ALU_SCL_210: - table[0] = 2; table[1] = 1; table[2] = 0; - *p_cycle = table[sel]; - break; - case SQ_ALU_SCL_122: - table[0] = 1; table[1] = 2; table[2] = 2; - *p_cycle = table[sel]; - break; - case SQ_ALU_SCL_212: - table[0] = 2; table[1] = 1; table[2] = 2; - *p_cycle = table[sel]; - break; - case SQ_ALU_SCL_221: - table[0] = 2; table[1] = 2; table[2] = 1; - *p_cycle = table[sel]; - break; - break; - default: - R600_ERR("bad scalar bank swizzle value\n"); - ret = -1; - break; + +static int reserve_cfile(struct alu_bank_swizzle *bs, unsigned sel, unsigned chan) +{ + int res, resmatch = -1, resempty = -1; + for (res = 3; res >= 0; --res) { + if (bs->hw_cfile_addr[res] == -1) + resempty = res; + else if (bs->hw_cfile_addr[res] == sel && + bs->hw_cfile_elem[res] == chan) + resmatch = res; } - return ret; -} - -static int cycle_for_vector_bank_swizzle(const int swiz, const int sel, unsigned *p_cycle) -{ - int table[3]; - int ret; - - switch (swiz) { - case SQ_ALU_VEC_012: - table[0] = 0; table[1] = 1; table[2] = 2; - *p_cycle = table[sel]; - break; - case SQ_ALU_VEC_021: - table[0] = 0; table[1] = 2; table[2] = 1; - *p_cycle = table[sel]; - break; - case SQ_ALU_VEC_120: - table[0] = 1; table[1] = 2; table[2] = 0; - *p_cycle = table[sel]; - break; - case SQ_ALU_VEC_102: - table[0] = 1; table[1] = 0; table[2] = 2; - *p_cycle = table[sel]; - break; - case SQ_ALU_VEC_201: - table[0] = 2; table[1] = 0; table[2] = 1; - *p_cycle = table[sel]; - break; - case SQ_ALU_VEC_210: - table[0] = 2; table[1] = 1; table[2] = 0; - *p_cycle = table[sel]; - break; - default: - R600_ERR("bad vector bank swizzle value\n"); - ret = -1; - break; + if (resmatch != -1) + return 0; // Read for this scalar element already reserved, nothing to do here. + else if (resempty != -1) { + bs->hw_cfile_addr[resempty] = sel; + bs->hw_cfile_elem[resempty] = chan; + } else { + // All cfile read ports are used, cannot reference vector element + return -1; } - return ret; + return 0; +} + +static int is_gpr(unsigned sel) +{ + return (sel >= 0 && sel <= 127); } +/* CB constants start at 512, and get translated to a kcache index when ALU + * clauses are constructed. Note that we handle kcache constants the same way + * as (the now gone) cfile constants, is that really required? */ +static int is_cfile(unsigned sel) +{ + return (sel > 255 && sel < 512) || + (sel > 511 && sel < 4607) || // Kcache before translate + (sel > 127 && sel < 192); // Kcache after translate +} + +static int is_const(int sel) +{ + return is_cfile(sel) || + (sel >= V_SQ_ALU_SRC_0 && + sel <= V_SQ_ALU_SRC_LITERAL); +} + +static int check_vector(struct r600_bc *bc, struct r600_bc_alu *alu, + struct alu_bank_swizzle *bs, int bank_swizzle) +{ + int r, src, num_src, sel, elem, cycle; + + num_src = r600_bc_get_num_operands(bc, alu); + for (src = 0; src < num_src; src++) { + sel = alu->src[src].sel; + elem = alu->src[src].chan; + if (is_gpr(sel)) { + cycle = cycle_for_bank_swizzle_vec[bank_swizzle][src]; + if (src == 1 && sel == alu->src[0].sel && elem == alu->src[0].chan) + // Nothing to do; special-case optimization, + // second source uses first source’s reservation + continue; + else { + r = reserve_gpr(bs, sel, elem, cycle); + if (r) + return r; + } + } else if (is_cfile(sel)) { + r = reserve_cfile(bs, sel, elem); + if (r) + return r; + } + // No restrictions on PV, PS, literal or special constants + } + return 0; +} + +static int check_scalar(struct r600_bc *bc, struct r600_bc_alu *alu, + struct alu_bank_swizzle *bs, int bank_swizzle) +{ + int r, src, num_src, const_count, sel, elem, cycle; + + num_src = r600_bc_get_num_operands(bc, alu); + for (const_count = 0, src = 0; src < num_src; ++src) { + sel = alu->src[src].sel; + elem = alu->src[src].chan; + if (is_const(sel)) { // Any constant, including literal and inline constants + if (const_count >= 2) + // More than two references to a constant in + // transcendental operation. + return -1; + else + const_count++; + } + if (is_cfile(sel)) { + r = reserve_cfile(bs, sel, elem); + if (r) + return r; + } + } + for (src = 0; src < num_src; ++src) { + sel = alu->src[src].sel; + elem = alu->src[src].chan; + if (is_gpr(sel)) { + cycle = cycle_for_bank_swizzle_scl[bank_swizzle][src]; + if (cycle < const_count) + // Cycle for GPR load conflicts with + // constant load in transcendental operation. + return -1; + r = reserve_gpr(bs, sel, elem, cycle); + if (r) + return r; + } + // Constants already processed + // No restrictions on PV, PS + } + return 0; +} - -static void update_chan_counter(struct r600_bc_alu *alu, int *chan_counter) +static int check_and_set_bank_swizzle(struct r600_bc *bc, + struct r600_bc_alu *slots[5]) { - int num_src; - int i; - int channel_swizzle; + struct alu_bank_swizzle bs; + int bank_swizzle[5]; + int i, r = 0, forced = 0; + + for (i = 0; i < 5; i++) + if (slots[i] && slots[i]->bank_swizzle_force) { + slots[i]->bank_swizzle = slots[i]->bank_swizzle_force; + forced = 1; + } - num_src = r600_bc_get_num_operands(alu); + if (forced) + return 0; - for (i = 0; i < num_src; i++) { - channel_swizzle = alu->src[i].chan; - if ((alu->src[i].sel > 0 && alu->src[i].sel < 128) && channel_swizzle <= 3) - chan_counter[channel_swizzle]++; + // just check every possible combination of bank swizzle + // not very efficent, but works on the first try in most of the cases + for (i = 0; i < 4; i++) + bank_swizzle[i] = SQ_ALU_VEC_012; + bank_swizzle[4] = SQ_ALU_SCL_210; + while(bank_swizzle[4] <= SQ_ALU_SCL_221) { + init_bank_swizzle(&bs); + for (i = 0; i < 4; i++) { + if (slots[i]) { + r = check_vector(bc, slots[i], &bs, bank_swizzle[i]); + if (r) + break; + } + } + if (!r && slots[4]) { + r = check_scalar(bc, slots[4], &bs, bank_swizzle[4]); + } + if (!r) { + for (i = 0; i < 5; i++) { + if (slots[i]) + slots[i]->bank_swizzle = bank_swizzle[i]; + } + return 0; + } + + for (i = 0; i < 5; i++) { + bank_swizzle[i]++; + if (bank_swizzle[i] <= SQ_ALU_VEC_210) + break; + else + bank_swizzle[i] = SQ_ALU_VEC_012; + } } + + // couldn't find a working swizzle + return -1; } -/* we need something like this I think - but this is bogus */ -int check_read_slots(struct r600_bc *bc, struct r600_bc_alu *alu_first) +static int replace_gpr_with_pv_ps(struct r600_bc *bc, + struct r600_bc_alu *slots[5], struct r600_bc_alu *alu_prev) { - struct r600_bc_alu *alu; - int chan_counter[4] = { 0 }; + struct r600_bc_alu *prev[5]; + int gpr[5], chan[5]; + int i, j, r, src, num_src; - update_chan_counter(alu_first, chan_counter); + r = assign_alu_units(bc, alu_prev, prev); + if (r) + return r; - LIST_FOR_EACH_ENTRY(alu, &alu_first->bs_list, bs_list) { - update_chan_counter(alu, chan_counter); + for (i = 0; i < 5; ++i) { + if(prev[i] && prev[i]->dst.write && !prev[i]->dst.rel) { + gpr[i] = prev[i]->dst.sel; + if (is_alu_reduction_inst(bc, prev[i])) + chan[i] = 0; + else + chan[i] = prev[i]->dst.chan; + } else + gpr[i] = -1; } - if (chan_counter[0] > 3 || - chan_counter[1] > 3 || - chan_counter[2] > 3 || - chan_counter[3] > 3) { - R600_ERR("needed to split instruction for input ran out of banks %x %d %d %d %d\n", - alu_first->inst, chan_counter[0], chan_counter[1], chan_counter[2], chan_counter[3]); - return -1; + for (i = 0; i < 5; ++i) { + struct r600_bc_alu *alu = slots[i]; + if(!alu) + continue; + + num_src = r600_bc_get_num_operands(bc, alu); + for (src = 0; src < num_src; ++src) { + if (!is_gpr(alu->src[src].sel) || alu->src[src].rel) + continue; + + if (alu->src[src].sel == gpr[4] && + alu->src[src].chan == chan[4]) { + alu->src[src].sel = V_SQ_ALU_SRC_PS; + alu->src[src].chan = 0; + continue; + } + + for (j = 0; j < 4; ++j) { + if (alu->src[src].sel == gpr[j] && + alu->src[src].chan == j) { + alu->src[src].sel = V_SQ_ALU_SRC_PV; + alu->src[src].chan = chan[j]; + break; + } + } + } } + return 0; } -#endif -static int is_const(int sel) +void r600_bc_special_constants(u32 value, unsigned *sel, unsigned *neg) { - if (sel > 255 && sel < 512) - return 1; - if (sel >= V_SQ_ALU_SRC_0 && sel <= V_SQ_ALU_SRC_LITERAL) - return 1; - return 0; + switch(value) { + case 0: + *sel = V_SQ_ALU_SRC_0; + break; + case 1: + *sel = V_SQ_ALU_SRC_1_INT; + break; + case -1: + *sel = V_SQ_ALU_SRC_M_1_INT; + break; + case 0x3F800000: // 1.0f + *sel = V_SQ_ALU_SRC_1; + break; + case 0x3F000000: // 0.5f + *sel = V_SQ_ALU_SRC_0_5; + break; + case 0xBF800000: // -1.0f + *sel = V_SQ_ALU_SRC_1; + *neg ^= 1; + break; + case 0xBF000000: // -0.5f + *sel = V_SQ_ALU_SRC_0_5; + *neg ^= 1; + break; + default: + *sel = V_SQ_ALU_SRC_LITERAL; + break; + } } -static int check_scalar(struct r600_bc *bc, struct r600_bc_alu *alu) +/* compute how many literal are needed */ +static int r600_bc_alu_nliterals(struct r600_bc *bc, struct r600_bc_alu *alu, + uint32_t literal[4], unsigned *nliteral) { - unsigned swizzle_key; - - if (alu->bank_swizzle_force) { - alu->bank_swizzle = alu->bank_swizzle_force; - return 0; + unsigned num_src = r600_bc_get_num_operands(bc, alu); + unsigned i, j; + + for (i = 0; i < num_src; ++i) { + if (alu->src[i].sel == V_SQ_ALU_SRC_LITERAL) { + uint32_t value = alu->src[i].value[alu->src[i].chan]; + unsigned found = 0; + for (j = 0; j < *nliteral; ++j) { + if (literal[j] == value) { + found = 1; + break; + } + } + if (!found) { + if (*nliteral >= 4) + return -EINVAL; + literal[(*nliteral)++] = value; + } + } } - swizzle_key = (is_const(alu->src[0].sel) ? 4 : 0 ) + - (is_const(alu->src[1].sel) ? 2 : 0 ) + - (is_const(alu->src[2].sel) ? 1 : 0 ); - - alu->bank_swizzle = bank_swizzle_scl[swizzle_key]; return 0; } -static int check_vector(struct r600_bc *bc, struct r600_bc_alu *alu) +static void r600_bc_alu_adjust_literals(struct r600_bc *bc, + struct r600_bc_alu *alu, + uint32_t literal[4], unsigned nliteral) +{ + unsigned num_src = r600_bc_get_num_operands(bc, alu); + unsigned i, j; + + for (i = 0; i < num_src; ++i) { + if (alu->src[i].sel == V_SQ_ALU_SRC_LITERAL) { + uint32_t value = alu->src[i].value[alu->src[i].chan]; + for (j = 0; j < nliteral; ++j) { + if (literal[j] == value) { + alu->src[i].chan = j; + break; + } + } + } + } +} + +static int merge_inst_groups(struct r600_bc *bc, struct r600_bc_alu *slots[5], + struct r600_bc_alu *alu_prev) { - unsigned swizzle_key; + struct r600_bc_alu *prev[5]; + struct r600_bc_alu *result[5] = { NULL }; + + uint32_t literal[4]; + unsigned nliteral = 0; + + int i, j, r, src, num_src; + int num_once_inst = 0; + + r = assign_alu_units(bc, alu_prev, prev); + if (r) + return r; - if (alu->bank_swizzle_force) { - alu->bank_swizzle = alu->bank_swizzle_force; + for (i = 0; i < 5; ++i) { + struct r600_bc_alu *alu; + + /* check number of literals */ + if (prev[i] && r600_bc_alu_nliterals(bc, prev[i], literal, &nliteral)) + return 0; + if (slots[i] && r600_bc_alu_nliterals(bc, slots[i], literal, &nliteral)) + return 0; + + // let's check used slots + if (prev[i] && !slots[i]) { + result[i] = prev[i]; + num_once_inst += is_alu_once_inst(bc, prev[i]); + continue; + } else if (prev[i] && slots[i]) { + if (result[4] == NULL && prev[4] == NULL && slots[4] == NULL) { + // trans unit is still free try to use it + if (is_alu_any_unit_inst(bc, slots[i])) { + result[i] = prev[i]; + result[4] = slots[i]; + } else if (is_alu_any_unit_inst(bc, prev[i])) { + result[i] = slots[i]; + result[4] = prev[i]; + } else + return 0; + } else + return 0; + } else if(!slots[i]) { + continue; + } else + result[i] = slots[i]; + + // let's check source gprs + alu = slots[i]; + num_once_inst += is_alu_once_inst(bc, alu); + + num_src = r600_bc_get_num_operands(bc, alu); + for (src = 0; src < num_src; ++src) { + // constants doesn't matter + if (!is_gpr(alu->src[src].sel)) + continue; + + for (j = 0; j < 5; ++j) { + if (!prev[j] || !prev[j]->dst.write) + continue; + + // if it's relative then we can't determin which gpr is really used + if (prev[j]->dst.chan == alu->src[src].chan && + (prev[j]->dst.sel == alu->src[src].sel || + prev[j]->dst.rel || alu->src[src].rel)) + return 0; + } + } + } + + /* more than one PRED_ or KILL_ ? */ + if (num_once_inst > 1) return 0; + + /* check if the result can still be swizzlet */ + r = check_and_set_bank_swizzle(bc, result); + if (r) + return 0; + + /* looks like everything worked out right, apply the changes */ + + /* sort instructions */ + for (i = 0; i < 5; ++i) { + slots[i] = result[i]; + if (result[i]) { + LIST_DEL(&result[i]->list); + result[i]->last = 0; + LIST_ADDTAIL(&result[i]->list, &bc->cf_last->alu); + } + } + + /* determine new last instruction */ + LIST_ENTRY(struct r600_bc_alu, bc->cf_last->alu.prev, list)->last = 1; + + /* determine new first instruction */ + for (i = 0; i < 5; ++i) { + if (result[i]) { + bc->cf_last->curr_bs_head = result[i]; + break; + } } - swizzle_key = (is_const(alu->src[0].sel) ? 4 : 0 ) + - (is_const(alu->src[1].sel) ? 2 : 0 ) + - (is_const(alu->src[2].sel) ? 1 : 0 ); - alu->bank_swizzle = bank_swizzle_vec[swizzle_key]; + bc->cf_last->prev_bs_head = bc->cf_last->prev2_bs_head; + bc->cf_last->prev2_bs_head = NULL; + return 0; } -static int check_and_set_bank_swizzle(struct r600_bc *bc, struct r600_bc_alu *alu_first) +/* This code handles kcache lines as single blocks of 32 constants. We could + * probably do slightly better by recognizing that we actually have two + * consecutive lines of 16 constants, but the resulting code would also be + * somewhat more complicated. */ +static int r600_bc_alloc_kcache_lines(struct r600_bc *bc, struct r600_bc_alu *alu, int type) { - struct r600_bc_alu *alu = NULL; - int num_instr = 1; + struct r600_bc_kcache *kcache = bc->cf_last->kcache; + unsigned int required_lines; + unsigned int free_lines = 0; + unsigned int cache_line[3]; + unsigned int count = 0; + unsigned int i, j; + int r; + + /* Collect required cache lines. */ + for (i = 0; i < 3; ++i) { + bool found = false; + unsigned int line; + + if (alu->src[i].sel < 512) + continue; + + line = ((alu->src[i].sel - 512) / 32) * 2; + + for (j = 0; j < count; ++j) { + if (cache_line[j] == line) { + found = true; + break; + } + } + + if (!found) + cache_line[count++] = line; + } - init_gpr(alu_first); + /* This should never actually happen. */ + if (count >= 3) return -ENOMEM; - LIST_FOR_EACH_ENTRY(alu, &alu_first->bs_list, bs_list) { - num_instr++; + for (i = 0; i < 2; ++i) { + if (kcache[i].mode == V_SQ_CF_KCACHE_NOP) { + ++free_lines; + } } - if (num_instr == 1) { - check_scalar(bc, alu_first); - - } else { -/* check_read_slots(bc, bc->cf_last->curr_bs_head);*/ - check_vector(bc, alu_first); - LIST_FOR_EACH_ENTRY(alu, &alu_first->bs_list, bs_list) { - check_vector(bc, alu); + /* Filter lines pulled in by previous intructions. Note that this is + * only for the required_lines count, we can't remove these from the + * cache_line array since we may have to start a new ALU clause. */ + for (i = 0, required_lines = count; i < count; ++i) { + for (j = 0; j < 2; ++j) { + if (kcache[j].mode == V_SQ_CF_KCACHE_LOCK_2 && + kcache[j].addr == cache_line[i]) { + --required_lines; + break; + } + } + } + + /* Start a new ALU clause if needed. */ + if (required_lines > free_lines) { + if ((r = r600_bc_add_cf(bc))) { + return r; + } + bc->cf_last->inst = (type << 3); + kcache = bc->cf_last->kcache; + } + + /* Setup the kcache lines. */ + for (i = 0; i < count; ++i) { + bool found = false; + + for (j = 0; j < 2; ++j) { + if (kcache[j].mode == V_SQ_CF_KCACHE_LOCK_2 && + kcache[j].addr == cache_line[i]) { + found = true; + break; + } + } + + if (found) continue; + + for (j = 0; j < 2; ++j) { + if (kcache[j].mode == V_SQ_CF_KCACHE_NOP) { + kcache[j].bank = 0; + kcache[j].addr = cache_line[i]; + kcache[j].mode = V_SQ_CF_KCACHE_LOCK_2; + break; + } + } + } + + /* Alter the src operands to refer to the kcache. */ + for (i = 0; i < 3; ++i) { + static const unsigned int base[] = {128, 160, 256, 288}; + unsigned int line; + + if (alu->src[i].sel < 512) + continue; + + alu->src[i].sel -= 512; + line = (alu->src[i].sel / 32) * 2; + + for (j = 0; j < 2; ++j) { + if (kcache[j].mode == V_SQ_CF_KCACHE_LOCK_2 && + kcache[j].addr == line) { + alu->src[i].sel &= 0x1f; + alu->src[i].sel += base[j]; + break; + } } } + return 0; } @@ -416,62 +1073,89 @@ int r600_bc_add_alu_type(struct r600_bc *bc, const struct r600_bc_alu *alu, int if (nalu == NULL) return -ENOMEM; memcpy(nalu, alu, sizeof(struct r600_bc_alu)); - nalu->nliteral = 0; + + if (bc->cf_last != NULL && bc->cf_last->inst != (type << 3)) { + /* check if we could add it anyway */ + if (bc->cf_last->inst == (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU << 3) && + type == V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_PUSH_BEFORE) { + LIST_FOR_EACH_ENTRY(lalu, &bc->cf_last->alu, list) { + if (lalu->predicate) { + bc->force_add_cf = 1; + break; + } + } + } else + bc->force_add_cf = 1; + } /* cf can contains only alu or only vtx or only tex */ - if (bc->cf_last == NULL || bc->cf_last->inst != (type << 3) || - bc->force_add_cf) { + if (bc->cf_last == NULL || bc->force_add_cf) { r = r600_bc_add_cf(bc); if (r) { free(nalu); return r; } - bc->cf_last->inst = (type << 3); } + bc->cf_last->inst = (type << 3); + + /* Setup the kcache for this ALU instruction. This will start a new + * ALU clause if needed. */ + if ((r = r600_bc_alloc_kcache_lines(bc, nalu, type))) { + free(nalu); + return r; + } + if (!bc->cf_last->curr_bs_head) { bc->cf_last->curr_bs_head = nalu; - LIST_INITHEAD(&nalu->bs_list); - } else { - LIST_ADDTAIL(&nalu->bs_list, &bc->cf_last->curr_bs_head->bs_list); } - /* at most 128 slots, one add alu can add 4 slots + 4 constants(2 slots) + /* at most 128 slots, one add alu can add 5 slots + 4 constants(2 slots) * worst case */ - if (alu->last && (bc->cf_last->ndw >> 1) >= 120) { + if (nalu->last && (bc->cf_last->ndw >> 1) >= 120) { bc->force_add_cf = 1; } /* number of gpr == the last gpr used in any alu */ for (i = 0; i < 3; i++) { - if (alu->src[i].sel >= bc->ngpr && alu->src[i].sel < 128) { - bc->ngpr = alu->src[i].sel + 1; - } - /* compute how many literal are needed - * either 2 or 4 literals - */ - if (alu->src[i].sel == 253) { - if (((alu->src[i].chan + 2) & 0x6) > nalu->nliteral) { - nalu->nliteral = (alu->src[i].chan + 2) & 0x6; - } - } - } - if (!LIST_IS_EMPTY(&bc->cf_last->alu)) { - lalu = LIST_ENTRY(struct r600_bc_alu, bc->cf_last->alu.prev, list); - if (!lalu->last && lalu->nliteral > nalu->nliteral) { - nalu->nliteral = lalu->nliteral; + if (nalu->src[i].sel >= bc->ngpr && nalu->src[i].sel < 128) { + bc->ngpr = nalu->src[i].sel + 1; } + if (nalu->src[i].sel == V_SQ_ALU_SRC_LITERAL) + r600_bc_special_constants( + nalu->src[i].value[nalu->src[i].chan], + &nalu->src[i].sel, &nalu->src[i].neg); } - if (alu->dst.sel >= bc->ngpr) { - bc->ngpr = alu->dst.sel + 1; + if (nalu->dst.sel >= bc->ngpr) { + bc->ngpr = nalu->dst.sel + 1; } LIST_ADDTAIL(&nalu->list, &bc->cf_last->alu); /* each alu use 2 dwords */ bc->cf_last->ndw += 2; bc->ndw += 2; - bc->cf_last->kcache0_mode = 2; - /* process cur ALU instructions for bank swizzle */ - if (alu->last) { - check_and_set_bank_swizzle(bc, bc->cf_last->curr_bs_head); + if (nalu->last) { + struct r600_bc_alu *slots[5]; + r = assign_alu_units(bc, bc->cf_last->curr_bs_head, slots); + if (r) + return r; + + if (bc->cf_last->prev_bs_head) { + r = merge_inst_groups(bc, slots, bc->cf_last->prev_bs_head); + if (r) + return r; + } + + if (bc->cf_last->prev_bs_head) { + r = replace_gpr_with_pv_ps(bc, slots, bc->cf_last->prev_bs_head); + if (r) + return r; + } + + r = check_and_set_bank_swizzle(bc, slots); + if (r) + return r; + + bc->cf_last->prev2_bs_head = bc->cf_last->prev_bs_head; + bc->cf_last->prev_bs_head = bc->cf_last->curr_bs_head; bc->cf_last->curr_bs_head = NULL; } return 0; @@ -482,44 +1166,6 @@ int r600_bc_add_alu(struct r600_bc *bc, const struct r600_bc_alu *alu) return r600_bc_add_alu_type(bc, alu, BC_INST(bc, V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU)); } -int r600_bc_add_literal(struct r600_bc *bc, const u32 *value) -{ - struct r600_bc_alu *alu; - - if (bc->cf_last == NULL) { - return 0; - } - if (bc->cf_last->inst == V_SQ_CF_WORD1_SQ_CF_INST_TEX) { - return 0; - } - /* all same on EG */ - if (bc->cf_last->inst == V_SQ_CF_WORD1_SQ_CF_INST_JUMP || - bc->cf_last->inst == V_SQ_CF_WORD1_SQ_CF_INST_ELSE || - bc->cf_last->inst == V_SQ_CF_WORD1_SQ_CF_INST_LOOP_START_NO_AL || - bc->cf_last->inst == V_SQ_CF_WORD1_SQ_CF_INST_LOOP_BREAK || - bc->cf_last->inst == V_SQ_CF_WORD1_SQ_CF_INST_LOOP_CONTINUE || - bc->cf_last->inst == V_SQ_CF_WORD1_SQ_CF_INST_LOOP_END || - bc->cf_last->inst == V_SQ_CF_WORD1_SQ_CF_INST_POP) { - return 0; - } - /* same on EG */ - if (((bc->cf_last->inst != (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU << 3)) && - (bc->cf_last->inst != (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_PUSH_BEFORE << 3))) || - LIST_IS_EMPTY(&bc->cf_last->alu)) { - R600_ERR("last CF is not ALU (%p)\n", bc->cf_last); - return -EINVAL; - } - alu = LIST_ENTRY(struct r600_bc_alu, bc->cf_last->alu.prev, list); - if (!alu->last || !alu->nliteral || alu->literal_added) { - return 0; - } - memcpy(alu->value, value, 4 * 4); - bc->cf_last->ndw += alu->nliteral; - bc->ndw += alu->nliteral; - alu->literal_added = 1; - return 0; -} - int r600_bc_add_vtx(struct r600_bc *bc, const struct r600_bc_vtx *vtx) { struct r600_bc_vtx *nvtx = r600_bc_vtx(); @@ -545,7 +1191,7 @@ int r600_bc_add_vtx(struct r600_bc *bc, const struct r600_bc_vtx *vtx) /* each fetch use 4 dwords */ bc->cf_last->ndw += 4; bc->ndw += 4; - if ((bc->ndw / 4) > 7) + if ((bc->cf_last->ndw / 4) > 7) bc->force_add_cf = 1; return 0; } @@ -570,11 +1216,17 @@ int r600_bc_add_tex(struct r600_bc *bc, const struct r600_bc_tex *tex) } bc->cf_last->inst = V_SQ_CF_WORD1_SQ_CF_INST_TEX; } + if (ntex->src_gpr >= bc->ngpr) { + bc->ngpr = ntex->src_gpr + 1; + } + if (ntex->dst_gpr >= bc->ngpr) { + bc->ngpr = ntex->dst_gpr + 1; + } LIST_ADDTAIL(&ntex->list, &bc->cf_last->tex); /* each texture fetch use 4 dwords */ bc->cf_last->ndw += 4; bc->ndw += 4; - if ((bc->ndw / 4) > 7) + if ((bc->cf_last->ndw / 4) > 7) bc->force_add_cf = 1; return 0; } @@ -670,8 +1322,6 @@ static int r600_bc_tex_build(struct r600_bc *bc, struct r600_bc_tex *tex, unsign /* r600 only, r700/eg bits in r700_asm.c */ static int r600_bc_alu_build(struct r600_bc *bc, struct r600_bc_alu *alu, unsigned id) { - unsigned i; - /* don't replace gpr by pv or ps for destination register */ bc->bytecode[id++] = S_SQ_ALU_WORD0_SRC0_SEL(alu->src[0].sel) | S_SQ_ALU_WORD0_SRC0_REL(alu->src[0].rel) | @@ -702,19 +1352,12 @@ static int r600_bc_alu_build(struct r600_bc *bc, struct r600_bc_alu *alu, unsign S_SQ_ALU_WORD1_OP2_SRC0_ABS(alu->src[0].abs) | S_SQ_ALU_WORD1_OP2_SRC1_ABS(alu->src[1].abs) | S_SQ_ALU_WORD1_OP2_WRITE_MASK(alu->dst.write) | + S_SQ_ALU_WORD1_OP2_OMOD(alu->omod) | S_SQ_ALU_WORD1_OP2_ALU_INST(alu->inst) | S_SQ_ALU_WORD1_BANK_SWIZZLE(alu->bank_swizzle) | S_SQ_ALU_WORD1_OP2_UPDATE_EXECUTE_MASK(alu->predicate) | S_SQ_ALU_WORD1_OP2_UPDATE_PRED(alu->predicate); } - if (alu->last) { - if (alu->nliteral && !alu->literal_added) { - R600_ERR("Bug in ALU processing for instruction 0x%08x, literal not added correctly\n", alu->inst); - } - for (i = 0; i < alu->nliteral; i++) { - bc->bytecode[id++] = alu->value[i]; - } - } return 0; } @@ -726,15 +1369,17 @@ static int r600_bc_cf_build(struct r600_bc *bc, struct r600_bc_cf *cf) switch (cf->inst) { case (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU << 3): case (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_PUSH_BEFORE << 3): + case (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP_AFTER << 3): + case (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP2_AFTER << 3): bc->bytecode[id++] = S_SQ_CF_ALU_WORD0_ADDR(cf->addr >> 1) | - S_SQ_CF_ALU_WORD0_KCACHE_MODE0(cf->kcache0_mode) | - S_SQ_CF_ALU_WORD0_KCACHE_BANK0(cf->kcache0_bank) | - S_SQ_CF_ALU_WORD0_KCACHE_BANK1(cf->kcache1_bank); + S_SQ_CF_ALU_WORD0_KCACHE_MODE0(cf->kcache[0].mode) | + S_SQ_CF_ALU_WORD0_KCACHE_BANK0(cf->kcache[0].bank) | + S_SQ_CF_ALU_WORD0_KCACHE_BANK1(cf->kcache[1].bank); bc->bytecode[id++] = S_SQ_CF_ALU_WORD1_CF_INST(cf->inst >> 3) | - S_SQ_CF_ALU_WORD1_KCACHE_MODE1(cf->kcache1_mode) | - S_SQ_CF_ALU_WORD1_KCACHE_ADDR0(cf->kcache0_addr) | - S_SQ_CF_ALU_WORD1_KCACHE_ADDR1(cf->kcache1_addr) | + S_SQ_CF_ALU_WORD1_KCACHE_MODE1(cf->kcache[1].mode) | + S_SQ_CF_ALU_WORD1_KCACHE_ADDR0(cf->kcache[0].addr) | + S_SQ_CF_ALU_WORD1_KCACHE_ADDR1(cf->kcache[1].addr) | S_SQ_CF_ALU_WORD1_BARRIER(1) | S_SQ_CF_ALU_WORD1_USES_WATERFALL(bc->chiprev == CHIPREV_R600 ? cf->r6xx_uses_waterfall : 0) | S_SQ_CF_ALU_WORD1_COUNT((cf->ndw / 2) - 1); @@ -790,8 +1435,10 @@ int r600_bc_build(struct r600_bc *bc) struct r600_bc_alu *alu; struct r600_bc_vtx *vtx; struct r600_bc_tex *tex; + uint32_t literal[4]; + unsigned nliteral; unsigned addr; - int r; + int i, r; if (bc->callstack[0].max > 0) bc->nstack = ((bc->callstack[0].max + 3) >> 2) + 2; @@ -805,7 +1452,19 @@ int r600_bc_build(struct r600_bc *bc) LIST_FOR_EACH_ENTRY(cf, &bc->cf, list) { switch (cf->inst) { case (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU << 3): + case (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP_AFTER << 3): + case (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP2_AFTER << 3): case (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_PUSH_BEFORE << 3): + nliteral = 0; + LIST_FOR_EACH_ENTRY(alu, &cf->alu, list) { + r = r600_bc_alu_nliterals(bc, alu, literal, &nliteral); + if (r) + return r; + if (alu->last) { + cf->ndw += align(nliteral, 2); + nliteral = 0; + } + } break; case V_SQ_CF_WORD1_SQ_CF_INST_TEX: case V_SQ_CF_WORD1_SQ_CF_INST_VTX: @@ -851,8 +1510,15 @@ int r600_bc_build(struct r600_bc *bc) return r; switch (cf->inst) { case (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU << 3): + case (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP_AFTER << 3): + case (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP2_AFTER << 3): case (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_PUSH_BEFORE << 3): + nliteral = 0; LIST_FOR_EACH_ENTRY(alu, &cf->alu, list) { + r = r600_bc_alu_nliterals(bc, alu, literal, &nliteral); + if (r) + return r; + r600_bc_alu_adjust_literals(bc, alu, literal, nliteral); switch(bc->chiprev) { case CHIPREV_R600: r = r600_bc_alu_build(bc, alu, addr); @@ -869,7 +1535,10 @@ int r600_bc_build(struct r600_bc *bc) return r; addr += 2; if (alu->last) { - addr += alu->nliteral; + for (i = 0; i < align(nliteral, 2); ++i) { + bc->bytecode[addr++] = literal[i]; + } + nliteral = 0; } } break; @@ -947,3 +1616,447 @@ void r600_bc_clear(struct r600_bc *bc) LIST_INITHEAD(&cf->list); } + +void r600_bc_dump(struct r600_bc *bc) +{ + struct r600_bc_cf *cf = NULL; + struct r600_bc_alu *alu = NULL; + struct r600_bc_vtx *vtx = NULL; + struct r600_bc_tex *tex = NULL; + + unsigned i, id; + uint32_t literal[4]; + unsigned nliteral; + char chip = '6'; + + switch (bc->chiprev) { + case 1: + chip = '7'; + break; + case 2: + chip = 'E'; + break; + case 0: + default: + chip = '6'; + break; + } + fprintf(stderr, "bytecode %d dw -- %d gprs ---------------------\n", bc->ndw, bc->ngpr); + fprintf(stderr, " %c\n", chip); + + LIST_FOR_EACH_ENTRY(cf, &bc->cf, list) { + id = cf->id; + + switch (cf->inst) { + case (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU << 3): + case (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP_AFTER << 3): + case (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP2_AFTER << 3): + case (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_PUSH_BEFORE << 3): + fprintf(stderr, "%04d %08X ALU ", id, bc->bytecode[id]); + fprintf(stderr, "ADDR:%d ", cf->addr); + fprintf(stderr, "KCACHE_MODE0:%X ", cf->kcache[0].mode); + fprintf(stderr, "KCACHE_BANK0:%X ", cf->kcache[0].bank); + fprintf(stderr, "KCACHE_BANK1:%X\n", cf->kcache[1].bank); + id++; + fprintf(stderr, "%04d %08X ALU ", id, bc->bytecode[id]); + fprintf(stderr, "INST:%d ", cf->inst); + fprintf(stderr, "KCACHE_MODE1:%X ", cf->kcache[1].mode); + fprintf(stderr, "KCACHE_ADDR0:%X ", cf->kcache[0].addr); + fprintf(stderr, "KCACHE_ADDR1:%X ", cf->kcache[1].addr); + fprintf(stderr, "COUNT:%d\n", cf->ndw / 2); + break; + case V_SQ_CF_WORD1_SQ_CF_INST_TEX: + case V_SQ_CF_WORD1_SQ_CF_INST_VTX: + case V_SQ_CF_WORD1_SQ_CF_INST_VTX_TC: + fprintf(stderr, "%04d %08X TEX/VTX ", id, bc->bytecode[id]); + fprintf(stderr, "ADDR:%d\n", cf->addr); + id++; + fprintf(stderr, "%04d %08X TEX/VTX ", id, bc->bytecode[id]); + fprintf(stderr, "INST:%d ", cf->inst); + fprintf(stderr, "COUNT:%d\n", cf->ndw / 4); + break; + case V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT: + case V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT_DONE: + fprintf(stderr, "%04d %08X EXPORT ", id, bc->bytecode[id]); + fprintf(stderr, "GPR:%X ", cf->output.gpr); + fprintf(stderr, "ELEM_SIZE:%X ", cf->output.elem_size); + fprintf(stderr, "ARRAY_BASE:%X ", cf->output.array_base); + fprintf(stderr, "TYPE:%X\n", cf->output.type); + id++; + fprintf(stderr, "%04d %08X EXPORT ", id, bc->bytecode[id]); + fprintf(stderr, "SWIZ_X:%X ", cf->output.swizzle_x); + fprintf(stderr, "SWIZ_Y:%X ", cf->output.swizzle_y); + fprintf(stderr, "SWIZ_Z:%X ", cf->output.swizzle_z); + fprintf(stderr, "SWIZ_W:%X ", cf->output.swizzle_w); + fprintf(stderr, "SWIZ_W:%X ", cf->output.swizzle_w); + fprintf(stderr, "BARRIER:%X ", cf->output.barrier); + fprintf(stderr, "INST:%d ", cf->output.inst); + fprintf(stderr, "EOP:%X\n", cf->output.end_of_program); + break; + case V_SQ_CF_WORD1_SQ_CF_INST_JUMP: + case V_SQ_CF_WORD1_SQ_CF_INST_ELSE: + case V_SQ_CF_WORD1_SQ_CF_INST_POP: + case V_SQ_CF_WORD1_SQ_CF_INST_LOOP_START_NO_AL: + case V_SQ_CF_WORD1_SQ_CF_INST_LOOP_END: + case V_SQ_CF_WORD1_SQ_CF_INST_LOOP_CONTINUE: + case V_SQ_CF_WORD1_SQ_CF_INST_LOOP_BREAK: + case V_SQ_CF_WORD1_SQ_CF_INST_CALL_FS: + case V_SQ_CF_WORD1_SQ_CF_INST_RETURN: + fprintf(stderr, "%04d %08X CF ", id, bc->bytecode[id]); + fprintf(stderr, "ADDR:%d\n", cf->cf_addr); + id++; + fprintf(stderr, "%04d %08X CF ", id, bc->bytecode[id]); + fprintf(stderr, "INST:%d ", cf->inst); + fprintf(stderr, "COND:%X ", cf->cond); + fprintf(stderr, "POP_COUNT:%X\n", cf->pop_count); + break; + } + + id = cf->addr; + nliteral = 0; + LIST_FOR_EACH_ENTRY(alu, &cf->alu, list) { + r600_bc_alu_nliterals(bc, alu, literal, &nliteral); + + fprintf(stderr, "%04d %08X ", id, bc->bytecode[id]); + fprintf(stderr, "SRC0(SEL:%d ", alu->src[0].sel); + fprintf(stderr, "REL:%d ", alu->src[0].rel); + fprintf(stderr, "CHAN:%d ", alu->src[0].chan); + fprintf(stderr, "NEG:%d) ", alu->src[0].neg); + fprintf(stderr, "SRC1(SEL:%d ", alu->src[1].sel); + fprintf(stderr, "REL:%d ", alu->src[1].rel); + fprintf(stderr, "CHAN:%d ", alu->src[1].chan); + fprintf(stderr, "NEG:%d) ", alu->src[1].neg); + fprintf(stderr, "LAST:%d)\n", alu->last); + id++; + fprintf(stderr, "%04d %08X %c ", id, bc->bytecode[id], alu->last ? '*' : ' '); + fprintf(stderr, "INST:%d ", alu->inst); + fprintf(stderr, "DST(SEL:%d ", alu->dst.sel); + fprintf(stderr, "CHAN:%d ", alu->dst.chan); + fprintf(stderr, "REL:%d ", alu->dst.rel); + fprintf(stderr, "CLAMP:%d) ", alu->dst.clamp); + fprintf(stderr, "BANK_SWIZZLE:%d ", alu->bank_swizzle); + if (alu->is_op3) { + fprintf(stderr, "SRC2(SEL:%d ", alu->src[2].sel); + fprintf(stderr, "REL:%d ", alu->src[2].rel); + fprintf(stderr, "CHAN:%d ", alu->src[2].chan); + fprintf(stderr, "NEG:%d)\n", alu->src[2].neg); + } else { + fprintf(stderr, "SRC0_ABS:%d ", alu->src[0].abs); + fprintf(stderr, "SRC1_ABS:%d ", alu->src[1].abs); + fprintf(stderr, "WRITE_MASK:%d ", alu->dst.write); + fprintf(stderr, "OMOD:%d ", alu->omod); + fprintf(stderr, "EXECUTE_MASK:%d ", alu->predicate); + fprintf(stderr, "UPDATE_PRED:%d\n", alu->predicate); + } + + id++; + if (alu->last) { + for (i = 0; i < nliteral; i++, id++) { + float *f = (float*)(bc->bytecode + id); + fprintf(stderr, "%04d %08X\t%f\n", id, bc->bytecode[id], *f); + } + id += nliteral & 1; + nliteral = 0; + } + } + + LIST_FOR_EACH_ENTRY(tex, &cf->tex, list) { + //TODO + } + + LIST_FOR_EACH_ENTRY(vtx, &cf->vtx, list) { + //TODO + } + } + + fprintf(stderr, "--------------------------------------\n"); +} + +void r600_cf_vtx(struct r600_vertex_element *ve, u32 *bytecode, unsigned count) +{ + struct r600_pipe_state *rstate; + unsigned i = 0; + + if (count > 8) { + bytecode[i++] = S_SQ_CF_WORD0_ADDR(8 >> 1); + bytecode[i++] = S_SQ_CF_WORD1_CF_INST(V_SQ_CF_WORD1_SQ_CF_INST_VTX) | + S_SQ_CF_WORD1_BARRIER(1) | + S_SQ_CF_WORD1_COUNT(8 - 1); + bytecode[i++] = S_SQ_CF_WORD0_ADDR(40 >> 1); + bytecode[i++] = S_SQ_CF_WORD1_CF_INST(V_SQ_CF_WORD1_SQ_CF_INST_VTX) | + S_SQ_CF_WORD1_BARRIER(1) | + S_SQ_CF_WORD1_COUNT(count - 8 - 1); + } else { + bytecode[i++] = S_SQ_CF_WORD0_ADDR(8 >> 1); + bytecode[i++] = S_SQ_CF_WORD1_CF_INST(V_SQ_CF_WORD1_SQ_CF_INST_VTX) | + S_SQ_CF_WORD1_BARRIER(1) | + S_SQ_CF_WORD1_COUNT(count - 1); + } + bytecode[i++] = S_SQ_CF_WORD0_ADDR(0); + bytecode[i++] = S_SQ_CF_WORD1_CF_INST(V_SQ_CF_WORD1_SQ_CF_INST_RETURN) | + S_SQ_CF_WORD1_BARRIER(1); + + rstate = &ve->rstate; + rstate->id = R600_PIPE_STATE_FETCH_SHADER; + rstate->nregs = 0; + r600_pipe_state_add_reg(rstate, R_0288A4_SQ_PGM_RESOURCES_FS, + 0x00000000, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_0288DC_SQ_PGM_CF_OFFSET_FS, + 0x00000000, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028894_SQ_PGM_START_FS, + r600_bo_offset(ve->fetch_shader) >> 8, + 0xFFFFFFFF, ve->fetch_shader); +} + +void r600_cf_vtx_tc(struct r600_vertex_element *ve, u32 *bytecode, unsigned count) +{ + struct r600_pipe_state *rstate; + unsigned i = 0; + + if (count > 8) { + bytecode[i++] = S_SQ_CF_WORD0_ADDR(8 >> 1); + bytecode[i++] = S_SQ_CF_WORD1_CF_INST(V_SQ_CF_WORD1_SQ_CF_INST_VTX_TC) | + S_SQ_CF_WORD1_BARRIER(1) | + S_SQ_CF_WORD1_COUNT(8 - 1); + bytecode[i++] = S_SQ_CF_WORD0_ADDR(40 >> 1); + bytecode[i++] = S_SQ_CF_WORD1_CF_INST(V_SQ_CF_WORD1_SQ_CF_INST_VTX_TC) | + S_SQ_CF_WORD1_BARRIER(1) | + S_SQ_CF_WORD1_COUNT((count - 8) - 1); + } else { + bytecode[i++] = S_SQ_CF_WORD0_ADDR(8 >> 1); + bytecode[i++] = S_SQ_CF_WORD1_CF_INST(V_SQ_CF_WORD1_SQ_CF_INST_VTX_TC) | + S_SQ_CF_WORD1_BARRIER(1) | + S_SQ_CF_WORD1_COUNT(count - 1); + } + bytecode[i++] = S_SQ_CF_WORD0_ADDR(0); + bytecode[i++] = S_SQ_CF_WORD1_CF_INST(V_SQ_CF_WORD1_SQ_CF_INST_RETURN) | + S_SQ_CF_WORD1_BARRIER(1); + + rstate = &ve->rstate; + rstate->id = R600_PIPE_STATE_FETCH_SHADER; + rstate->nregs = 0; + r600_pipe_state_add_reg(rstate, R_0288A4_SQ_PGM_RESOURCES_FS, + 0x00000000, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_0288DC_SQ_PGM_CF_OFFSET_FS, + 0x00000000, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028894_SQ_PGM_START_FS, + r600_bo_offset(ve->fetch_shader) >> 8, + 0xFFFFFFFF, ve->fetch_shader); +} + +static void r600_vertex_data_type(enum pipe_format pformat, unsigned *format, + unsigned *num_format, unsigned *format_comp) +{ + const struct util_format_description *desc; + unsigned i; + + *format = 0; + *num_format = 0; + *format_comp = 0; + + desc = util_format_description(pformat); + if (desc->layout != UTIL_FORMAT_LAYOUT_PLAIN) { + goto out_unknown; + } + + /* Find the first non-VOID channel. */ + for (i = 0; i < 4; i++) { + if (desc->channel[i].type != UTIL_FORMAT_TYPE_VOID) { + break; + } + } + + switch (desc->channel[i].type) { + /* Half-floats, floats, doubles */ + case UTIL_FORMAT_TYPE_FLOAT: + switch (desc->channel[i].size) { + case 16: + switch (desc->nr_channels) { + case 1: + *format = FMT_16_FLOAT; + break; + case 2: + *format = FMT_16_16_FLOAT; + break; + case 3: + *format = FMT_16_16_16_FLOAT; + break; + case 4: + *format = FMT_16_16_16_16_FLOAT; + break; + } + break; + case 32: + switch (desc->nr_channels) { + case 1: + *format = FMT_32_FLOAT; + break; + case 2: + *format = FMT_32_32_FLOAT; + break; + case 3: + *format = FMT_32_32_32_FLOAT; + break; + case 4: + *format = FMT_32_32_32_32_FLOAT; + break; + } + break; + default: + goto out_unknown; + } + break; + /* Unsigned ints */ + case UTIL_FORMAT_TYPE_UNSIGNED: + /* Signed ints */ + case UTIL_FORMAT_TYPE_SIGNED: + switch (desc->channel[i].size) { + case 8: + switch (desc->nr_channels) { + case 1: + *format = FMT_8; + break; + case 2: + *format = FMT_8_8; + break; + case 3: + // *format = FMT_8_8_8; /* fails piglit draw-vertices test */ + // break; + case 4: + *format = FMT_8_8_8_8; + break; + } + break; + case 16: + switch (desc->nr_channels) { + case 1: + *format = FMT_16; + break; + case 2: + *format = FMT_16_16; + break; + case 3: + // *format = FMT_16_16_16; /* fails piglit draw-vertices test */ + // break; + case 4: + *format = FMT_16_16_16_16; + break; + } + break; + case 32: + switch (desc->nr_channels) { + case 1: + *format = FMT_32; + break; + case 2: + *format = FMT_32_32; + break; + case 3: + *format = FMT_32_32_32; + break; + case 4: + *format = FMT_32_32_32_32; + break; + } + break; + default: + goto out_unknown; + } + break; + default: + goto out_unknown; + } + + if (desc->channel[i].type == UTIL_FORMAT_TYPE_SIGNED) { + *format_comp = 1; + } + if (desc->channel[i].normalized) { + *num_format = 0; + } else { + *num_format = 2; + } + return; +out_unknown: + R600_ERR("unsupported vertex format %s\n", util_format_name(pformat)); +} + +int r600_vertex_elements_build_fetch_shader(struct r600_pipe_context *rctx, struct r600_vertex_element *ve) +{ + unsigned ndw, i; + u32 *bytecode; + unsigned fetch_resource_start = 0, format, num_format, format_comp; + struct pipe_vertex_element *elements = ve->elements; + const struct util_format_description *desc; + + /* 2 dwords for cf aligned to 4 + 4 dwords per input */ + ndw = 8 + ve->count * 4; + ve->fs_size = ndw * 4; + + /* use PIPE_BIND_VERTEX_BUFFER so we use the cache buffer manager */ + ve->fetch_shader = r600_bo(rctx->radeon, ndw*4, 256, PIPE_BIND_VERTEX_BUFFER, 0); + if (ve->fetch_shader == NULL) { + return -ENOMEM; + } + + bytecode = r600_bo_map(rctx->radeon, ve->fetch_shader, 0, NULL); + if (bytecode == NULL) { + r600_bo_reference(rctx->radeon, &ve->fetch_shader, NULL); + return -ENOMEM; + } + + if (rctx->family >= CHIP_CEDAR) { + eg_cf_vtx(ve, &bytecode[0], (ndw - 8) / 4); + } else { + r600_cf_vtx(ve, &bytecode[0], (ndw - 8) / 4); + fetch_resource_start = 160; + } + + /* vertex elements offset need special handling, if offset is bigger + * than what we can put in fetch instruction then we need to alterate + * the vertex resource offset. In such case in order to simplify code + * we will bound one resource per elements. It's a worst case scenario. + */ + for (i = 0; i < ve->count; i++) { + ve->vbuffer_offset[i] = C_SQ_VTX_WORD2_OFFSET & elements[i].src_offset; + if (ve->vbuffer_offset[i]) { + ve->vbuffer_need_offset = 1; + } + } + + for (i = 0; i < ve->count; i++) { + unsigned vbuffer_index; + r600_vertex_data_type(ve->hw_format[i], &format, &num_format, &format_comp); + desc = util_format_description(ve->hw_format[i]); + if (desc == NULL) { + R600_ERR("unknown format %d\n", ve->hw_format[i]); + r600_bo_reference(rctx->radeon, &ve->fetch_shader, NULL); + return -EINVAL; + } + + /* see above for vbuffer_need_offset explanation */ + vbuffer_index = elements[i].vertex_buffer_index; + if (ve->vbuffer_need_offset) { + bytecode[8 + i * 4 + 0] = S_SQ_VTX_WORD0_BUFFER_ID(i + fetch_resource_start); + } else { + bytecode[8 + i * 4 + 0] = S_SQ_VTX_WORD0_BUFFER_ID(vbuffer_index + fetch_resource_start); + } + bytecode[8 + i * 4 + 0] |= S_SQ_VTX_WORD0_SRC_GPR(0) | + S_SQ_VTX_WORD0_SRC_SEL_X(0) | + S_SQ_VTX_WORD0_MEGA_FETCH_COUNT(0x1F); + bytecode[8 + i * 4 + 1] = S_SQ_VTX_WORD1_DST_SEL_X(desc->swizzle[0]) | + S_SQ_VTX_WORD1_DST_SEL_Y(desc->swizzle[1]) | + S_SQ_VTX_WORD1_DST_SEL_Z(desc->swizzle[2]) | + S_SQ_VTX_WORD1_DST_SEL_W(desc->swizzle[3]) | + S_SQ_VTX_WORD1_USE_CONST_FIELDS(0) | + S_SQ_VTX_WORD1_DATA_FORMAT(format) | + S_SQ_VTX_WORD1_NUM_FORMAT_ALL(num_format) | + S_SQ_VTX_WORD1_FORMAT_COMP_ALL(format_comp) | + S_SQ_VTX_WORD1_SRF_MODE_ALL(1) | + S_SQ_VTX_WORD1_GPR_DST_GPR(i + 1); + bytecode[8 + i * 4 + 2] = S_SQ_VTX_WORD2_OFFSET(elements[i].src_offset) | + S_SQ_VTX_WORD2_MEGA_FETCH(1); + bytecode[8 + i * 4 + 3] = 0; + } + r600_bo_unmap(rctx->radeon, ve->fetch_shader); + return 0; +} diff --git a/src/gallium/drivers/r600/r600_asm.h b/src/gallium/drivers/r600/r600_asm.h index f2016af3e72..278b4466cb0 100644 --- a/src/gallium/drivers/r600/r600_asm.h +++ b/src/gallium/drivers/r600/r600_asm.h @@ -25,8 +25,8 @@ #include "util/u_double_list.h" -#define NUM_OF_CYCLES 3 -#define NUM_OF_COMPONENTS 4 +struct r600_vertex_element; +struct r600_pipe_context; struct r600_bc_alu_src { unsigned sel; @@ -34,6 +34,7 @@ struct r600_bc_alu_src { unsigned neg; unsigned abs; unsigned rel; + u32 *value; }; struct r600_bc_alu_dst { @@ -46,19 +47,15 @@ struct r600_bc_alu_dst { struct r600_bc_alu { struct list_head list; - struct list_head bs_list; /* bank swizzle list */ struct r600_bc_alu_src src[3]; struct r600_bc_alu_dst dst; unsigned inst; unsigned last; unsigned is_op3; unsigned predicate; - unsigned nliteral; - unsigned literal_added; unsigned bank_swizzle; unsigned bank_swizzle_force; - u32 value[4]; - int hw_gpr[NUM_OF_CYCLES][NUM_OF_COMPONENTS]; + unsigned omod; }; struct r600_bc_tex { @@ -122,6 +119,12 @@ struct r600_bc_output { unsigned barrier; }; +struct r600_bc_kcache { + unsigned bank; + unsigned mode; + unsigned addr; +}; + struct r600_bc_cf { struct list_head list; unsigned inst; @@ -131,18 +134,15 @@ struct r600_bc_cf { unsigned cond; unsigned pop_count; unsigned cf_addr; /* control flow addr */ - unsigned kcache0_mode; - unsigned kcache1_mode; - unsigned kcache0_addr; - unsigned kcache1_addr; - unsigned kcache0_bank; - unsigned kcache1_bank; + struct r600_bc_kcache kcache[2]; unsigned r6xx_uses_waterfall; struct list_head alu; struct list_head tex; struct list_head vtx; struct r600_bc_output output; struct r600_bc_alu *curr_bs_head; + struct r600_bc_alu *prev_bs_head; + struct r600_bc_alu *prev2_bs_head; }; #define FC_NONE 0 @@ -188,18 +188,24 @@ struct r600_bc { /* eg_asm.c */ int eg_bc_cf_build(struct r600_bc *bc, struct r600_bc_cf *cf); +void eg_cf_vtx(struct r600_vertex_element *ve, u32 *bytecode, unsigned count); /* r600_asm.c */ int r600_bc_init(struct r600_bc *bc, enum radeon_family family); void r600_bc_clear(struct r600_bc *bc); int r600_bc_add_alu(struct r600_bc *bc, const struct r600_bc_alu *alu); -int r600_bc_add_literal(struct r600_bc *bc, const u32 *value); int r600_bc_add_vtx(struct r600_bc *bc, const struct r600_bc_vtx *vtx); int r600_bc_add_tex(struct r600_bc *bc, const struct r600_bc_tex *tex); int r600_bc_add_output(struct r600_bc *bc, const struct r600_bc_output *output); int r600_bc_build(struct r600_bc *bc); int r600_bc_add_cfinst(struct r600_bc *bc, int inst); int r600_bc_add_alu_type(struct r600_bc *bc, const struct r600_bc_alu *alu, int type); +void r600_bc_special_constants(u32 value, unsigned *sel, unsigned *neg); +void r600_bc_dump(struct r600_bc *bc); +void r600_cf_vtx(struct r600_vertex_element *ve, u32 *bytecode, unsigned count); +void r600_cf_vtx_tc(struct r600_vertex_element *ve, u32 *bytecode, unsigned count); + +int r600_vertex_elements_build_fetch_shader(struct r600_pipe_context *rctx, struct r600_vertex_element *ve); /* r700_asm.c */ int r700_bc_alu_build(struct r600_bc *bc, struct r600_bc_alu *alu, unsigned id); diff --git a/src/gallium/drivers/r600/r600_blit.c b/src/gallium/drivers/r600/r600_blit.c index 74cf9687999..b9ec9592e35 100644 --- a/src/gallium/drivers/r600/r600_blit.c +++ b/src/gallium/drivers/r600/r600_blit.c @@ -78,19 +78,24 @@ static void r600_blitter_end(struct pipe_context *ctx) r600_context_queries_resume(&rctx->ctx); } -int r600_blit_uncompress_depth(struct pipe_context *ctx, struct r600_resource_texture *texture) +void r600_blit_uncompress_depth(struct pipe_context *ctx, struct r600_resource_texture *texture) { struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; - struct pipe_surface *zsurf, *cbsurf; + struct pipe_surface *zsurf, *cbsurf, surf_tmpl; int level = 0; float depth = 1.0f; + surf_tmpl.format = texture->resource.base.b.format; + surf_tmpl.u.tex.level = level; + surf_tmpl.u.tex.first_layer = 0; + surf_tmpl.u.tex.last_layer = 0; + surf_tmpl.usage = PIPE_BIND_DEPTH_STENCIL; - zsurf = ctx->screen->get_tex_surface(ctx->screen, &texture->resource.base.b, 0, level, 0, - PIPE_BIND_DEPTH_STENCIL); + zsurf = ctx->create_surface(ctx, &texture->resource.base.b, &surf_tmpl); - cbsurf = ctx->screen->get_tex_surface(ctx->screen, - (struct pipe_resource*)texture->flushed_depth_texture, - 0, level, 0, PIPE_BIND_RENDER_TARGET); + surf_tmpl.format = ((struct pipe_resource*)texture->flushed_depth_texture)->format; + surf_tmpl.usage = PIPE_BIND_RENDER_TARGET; + cbsurf = ctx->create_surface(ctx, + (struct pipe_resource*)texture->flushed_depth_texture, &surf_tmpl); if (rctx->family == CHIP_RV610 || rctx->family == CHIP_RV630 || rctx->family == CHIP_RV620 || rctx->family == CHIP_RV635) @@ -102,9 +107,6 @@ int r600_blit_uncompress_depth(struct pipe_context *ctx, struct r600_resource_te pipe_surface_reference(&zsurf, NULL); pipe_surface_reference(&cbsurf, NULL); - - - return 0; } static void r600_clear(struct pipe_context *ctx, unsigned buffers, @@ -154,41 +156,38 @@ static void r600_clear_depth_stencil(struct pipe_context *ctx, /* Copy a block of pixels from one surface to another using HW. */ static void r600_hw_copy_region(struct pipe_context *ctx, - struct pipe_resource *dst, - struct pipe_subresource subdst, - unsigned dstx, unsigned dsty, unsigned dstz, - struct pipe_resource *src, - struct pipe_subresource subsrc, - unsigned srcx, unsigned srcy, unsigned srcz, - unsigned width, unsigned height) + struct pipe_resource *dst, + unsigned dst_level, + unsigned dstx, unsigned dsty, unsigned dstz, + struct pipe_resource *src, + unsigned src_level, + const struct pipe_box *src_box) { struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; r600_blitter_begin(ctx, R600_COPY); - util_blitter_copy_region(rctx->blitter, dst, subdst, dstx, dsty, dstz, - src, subsrc, srcx, srcy, srcz, width, height, - TRUE); + util_blitter_copy_region(rctx->blitter, dst, dst_level, dstx, dsty, dstz, + src, src_level, src_box, TRUE); r600_blitter_end(ctx); } static void r600_resource_copy_region(struct pipe_context *ctx, struct pipe_resource *dst, - struct pipe_subresource subdst, + unsigned dst_level, unsigned dstx, unsigned dsty, unsigned dstz, struct pipe_resource *src, - struct pipe_subresource subsrc, - unsigned srcx, unsigned srcy, unsigned srcz, - unsigned width, unsigned height) + unsigned src_level, + const struct pipe_box *src_box) { boolean is_depth; /* there is something wrong with depth resource copies at the moment so avoid them for now */ is_depth = util_format_get_component_bits(src->format, UTIL_FORMAT_COLORSPACE_ZS, 0) != 0; if (is_depth) - util_resource_copy_region(ctx, dst, subdst, dstx, dsty, dstz, - src, subsrc, srcx, srcy, srcz, width, height); + util_resource_copy_region(ctx, dst, dst_level, dstx, dsty, dstz, + src, src_level, src_box); else - r600_hw_copy_region(ctx, dst, subdst, dstx, dsty, dstz, - src, subsrc, srcx, srcy, srcz, width, height); + r600_hw_copy_region(ctx, dst, dst_level, dstx, dsty, dstz, + src, src_level, src_box); } void r600_init_blit_functions(struct r600_pipe_context *rctx) diff --git a/src/gallium/drivers/r600/r600_buffer.c b/src/gallium/drivers/r600/r600_buffer.c index a432271b82d..469c8195fe9 100644 --- a/src/gallium/drivers/r600/r600_buffer.c +++ b/src/gallium/drivers/r600/r600_buffer.c @@ -29,7 +29,6 @@ #include <util/u_math.h> #include <util/u_inlines.h> #include <util/u_memory.h> -#include <util/u_upload_mgr.h> #include "state_tracker/drm_driver.h" #include <xf86drm.h> #include "radeon_drm.h" @@ -53,12 +52,13 @@ struct pipe_resource *r600_buffer_create(struct pipe_screen *screen, rbuffer->magic = R600_BUFFER_MAGIC; rbuffer->user_buffer = NULL; - rbuffer->num_ranges = 0; rbuffer->r.base.b = *templ; pipe_reference_init(&rbuffer->r.base.b.reference, 1); rbuffer->r.base.b.screen = screen; rbuffer->r.base.vtbl = &r600_buffer_vtbl; rbuffer->r.size = rbuffer->r.base.b.width0; + rbuffer->r.bo_size = rbuffer->r.size; + rbuffer->uploaded = FALSE; bo = r600_bo((struct radeon*)screen->winsys, rbuffer->r.base.b.width0, alignment, rbuffer->r.base.b.bind, rbuffer->r.base.b.usage); if (bo == NULL) { FREE(rbuffer); @@ -89,10 +89,12 @@ struct pipe_resource *r600_user_buffer_create(struct pipe_screen *screen, rbuffer->r.base.b.width0 = bytes; rbuffer->r.base.b.height0 = 1; rbuffer->r.base.b.depth0 = 1; + rbuffer->r.base.b.array_size = 1; rbuffer->r.base.b.flags = 0; - rbuffer->num_ranges = 0; rbuffer->r.bo = NULL; + rbuffer->r.bo_size = 0; rbuffer->user_buffer = ptr; + rbuffer->uploaded = FALSE; return &rbuffer->r.base.b; } @@ -104,6 +106,7 @@ static void r600_buffer_destroy(struct pipe_screen *screen, if (rbuffer->r.bo) { r600_bo_reference((struct radeon*)screen->winsys, &rbuffer->r.bo, NULL); } + rbuffer->r.bo = NULL; FREE(rbuffer); } @@ -113,29 +116,10 @@ static void *r600_buffer_transfer_map(struct pipe_context *pipe, struct r600_resource_buffer *rbuffer = r600_buffer(transfer->resource); int write = 0; uint8_t *data; - int i; - boolean flush = FALSE; if (rbuffer->user_buffer) return (uint8_t*)rbuffer->user_buffer + transfer->box.x; - if (transfer->usage & PIPE_TRANSFER_DISCARD) { - for (i = 0; i < rbuffer->num_ranges; i++) { - if ((transfer->box.x >= rbuffer->ranges[i].start) && - (transfer->box.x < rbuffer->ranges[i].end)) - flush = TRUE; - - if (flush) { - r600_bo_reference((struct radeon*)pipe->winsys, &rbuffer->r.bo, NULL); - rbuffer->num_ranges = 0; - rbuffer->r.bo = r600_bo((struct radeon*)pipe->winsys, - rbuffer->r.base.b.width0, 0, - rbuffer->r.base.b.bind, - rbuffer->r.base.b.usage); - break; - } - } - } if (transfer->usage & PIPE_TRANSFER_DONTBLOCK) { /* FIXME */ } @@ -154,41 +138,22 @@ static void r600_buffer_transfer_unmap(struct pipe_context *pipe, { struct r600_resource_buffer *rbuffer = r600_buffer(transfer->resource); + if (rbuffer->user_buffer) + return; + if (rbuffer->r.bo) r600_bo_unmap((struct radeon*)pipe->winsys, rbuffer->r.bo); } static void r600_buffer_transfer_flush_region(struct pipe_context *pipe, - struct pipe_transfer *transfer, - const struct pipe_box *box) + struct pipe_transfer *transfer, + const struct pipe_box *box) { - struct r600_resource_buffer *rbuffer = r600_buffer(transfer->resource); - unsigned i; - unsigned offset = transfer->box.x + box->x; - unsigned length = box->width; - - assert(box->x + box->width <= transfer->box.width); - - if (rbuffer->user_buffer) - return; - - /* mark the range as used */ - for(i = 0; i < rbuffer->num_ranges; ++i) { - if(offset <= rbuffer->ranges[i].end && rbuffer->ranges[i].start <= (offset+box->width)) { - rbuffer->ranges[i].start = MIN2(rbuffer->ranges[i].start, offset); - rbuffer->ranges[i].end = MAX2(rbuffer->ranges[i].end, (offset+length)); - return; - } - } - - rbuffer->ranges[rbuffer->num_ranges].start = offset; - rbuffer->ranges[rbuffer->num_ranges].end = offset+length; - rbuffer->num_ranges++; } unsigned r600_buffer_is_referenced_by_cs(struct pipe_context *context, struct pipe_resource *buf, - unsigned face, unsigned level) + unsigned level, int layer) { /* FIXME */ return PIPE_REFERENCED_FOR_READ | PIPE_REFERENCED_FOR_WRITE; @@ -235,29 +200,25 @@ struct u_resource_vtbl r600_buffer_vtbl = int r600_upload_index_buffer(struct r600_pipe_context *rctx, struct r600_drawl *draw) { - struct pipe_resource *upload_buffer = NULL; - unsigned index_offset = draw->index_buffer_offset; - int ret = 0; - if (r600_buffer_is_user_buffer(draw->index_buffer)) { - ret = u_upload_buffer(rctx->upload_ib, - index_offset, - draw->count * draw->index_size, - draw->index_buffer, - &index_offset, - &upload_buffer); - if (ret) { - goto done; - } - draw->index_buffer_offset = index_offset; - - /* Transfer ownership. */ - pipe_resource_reference(&draw->index_buffer, upload_buffer); - pipe_resource_reference(&upload_buffer, NULL); + struct r600_resource_buffer *rbuffer = r600_buffer(draw->index_buffer); + unsigned upload_offset; + int ret = 0; + + ret = r600_upload_buffer(rctx->rupload_vb, + draw->index_buffer_offset, + draw->count * draw->index_size, + rbuffer, + &upload_offset, + &rbuffer->r.bo_size, + &rbuffer->r.bo); + if (ret) + return ret; + rbuffer->uploaded = TRUE; + draw->index_buffer_offset = upload_offset; } -done: - return ret; + return 0; } int r600_upload_user_buffers(struct r600_pipe_context *rctx) @@ -266,27 +227,52 @@ int r600_upload_user_buffers(struct r600_pipe_context *rctx) int i, nr; nr = rctx->vertex_elements->count; + nr = rctx->nvertex_buffer; for (i = 0; i < nr; i++) { - struct pipe_vertex_buffer *vb = - &rctx->vertex_buffer[rctx->vertex_elements->elements[i].vertex_buffer_index]; + struct pipe_vertex_buffer *vb = &rctx->vertex_buffer[i]; if (r600_buffer_is_user_buffer(vb->buffer)) { - struct pipe_resource *upload_buffer = NULL; - unsigned offset = 0; /*vb->buffer_offset * 4;*/ - unsigned size = vb->buffer->width0; + struct r600_resource_buffer *rbuffer = r600_buffer(vb->buffer); unsigned upload_offset; - ret = u_upload_buffer(rctx->upload_vb, - offset, size, - vb->buffer, - &upload_offset, &upload_buffer); + + ret = r600_upload_buffer(rctx->rupload_vb, + 0, vb->buffer->width0, + rbuffer, + &upload_offset, + &rbuffer->r.bo_size, + &rbuffer->r.bo); if (ret) return ret; - - pipe_resource_reference(&vb->buffer, NULL); - vb->buffer = upload_buffer; + rbuffer->uploaded = TRUE; vb->buffer_offset = upload_offset; } } return ret; } + + +int r600_upload_const_buffer(struct r600_pipe_context *rctx, struct pipe_resource *cbuffer, + uint32_t *const_offset) +{ + if (r600_buffer_is_user_buffer(cbuffer)) { + struct r600_resource_buffer *rbuffer = r600_buffer(cbuffer); + unsigned upload_offset; + int ret = 0; + + ret = r600_upload_buffer(rctx->rupload_const, + 0, cbuffer->width0, + rbuffer, + &upload_offset, + &rbuffer->r.bo_size, + &rbuffer->r.bo); + if (ret) + return ret; + rbuffer->uploaded = TRUE; + *const_offset = upload_offset; + return 0; + } + + *const_offset = 0; + return 0; +} diff --git a/src/gallium/drivers/r600/r600_opcodes.h b/src/gallium/drivers/r600/r600_opcodes.h index 2ee0c83e5d3..a85d0bbf1e1 100644 --- a/src/gallium/drivers/r600/r600_opcodes.h +++ b/src/gallium/drivers/r600/r600_opcodes.h @@ -330,10 +330,14 @@ #define EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_CLAMPED_64 0x00000098 #define EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SQRT_64 0x00000099 /* TODO Fill in more ALU */ +#define EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_INT_TO_FLT 0x0000009B +#define EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_UINT_TO_FLT 0x0000009C +/* TODO Fill in more ALU */ #define EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT_FLOOR 0x000000B1 #define EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4 0x000000BE #define EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4_IEEE 0x000000BF #define EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_CUBE 0x000000C0 +#define EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX4 0x000000C1 #define EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_INT 0x000000CC #define EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INTERP_XY 0x000000D6 diff --git a/src/gallium/drivers/r600/r600_pipe.c b/src/gallium/drivers/r600/r600_pipe.c index 511e52fbce1..20838e4d98f 100644 --- a/src/gallium/drivers/r600/r600_pipe.c +++ b/src/gallium/drivers/r600/r600_pipe.c @@ -35,7 +35,6 @@ #include <util/u_pack_color.h> #include <util/u_memory.h> #include <util/u_inlines.h> -#include <util/u_upload_mgr.h> #include <pipebuffer/pb_buffer.h> #include "r600.h" #include "r600d.h" @@ -59,9 +58,6 @@ static void r600_flush(struct pipe_context *ctx, unsigned flags, if (!rctx->ctx.pm4_cdwords) return; - u_upload_flush(rctx->upload_vb); - u_upload_flush(rctx->upload_ib); - #if 0 sprintf(dname, "gallium-%08d.bof", dc); if (dc < 20) { @@ -71,6 +67,9 @@ static void r600_flush(struct pipe_context *ctx, unsigned flags, dc++; #endif r600_context_flush(&rctx->ctx); + + r600_upload_flush(rctx->rupload_vb); + r600_upload_flush(rctx->rupload_const); } static void r600_destroy_context(struct pipe_context *context) @@ -79,6 +78,8 @@ static void r600_destroy_context(struct pipe_context *context) rctx->context.delete_depth_stencil_alpha_state(&rctx->context, rctx->custom_dsa_flush); + r600_end_vertex_translate(rctx); + r600_context_fini(&rctx->ctx); util_blitter_destroy(rctx->blitter); @@ -87,8 +88,8 @@ static void r600_destroy_context(struct pipe_context *context) free(rctx->states[i]); } - u_upload_destroy(rctx->upload_vb); - u_upload_destroy(rctx->upload_ib); + r600_upload_destroy(rctx->rupload_vb); + r600_upload_destroy(rctx->rupload_const); if (rctx->tran.translate_cache) translate_cache_destroy(rctx->tran.translate_cache); @@ -120,6 +121,7 @@ static struct pipe_context *r600_create_context(struct pipe_screen *screen, void r600_init_blit_functions(rctx); r600_init_query_functions(rctx); r600_init_context_resource_functions(rctx); + r600_init_surface_functions(rctx); switch (r600_get_family(rctx->radeon)) { case CHIP_R600: @@ -148,6 +150,9 @@ static struct pipe_context *r600_create_context(struct pipe_screen *screen, void case CHIP_CYPRESS: case CHIP_HEMLOCK: case CHIP_PALM: + case CHIP_BARTS: + case CHIP_TURKS: + case CHIP_CAICOS: rctx->context.draw_vbo = evergreen_draw; evergreen_init_state_functions(rctx); if (evergreen_context_init(&rctx->ctx, rctx->radeon)) { @@ -162,16 +167,14 @@ static struct pipe_context *r600_create_context(struct pipe_screen *screen, void return NULL; } - rctx->upload_ib = u_upload_create(&rctx->context, 32 * 1024, 16, - PIPE_BIND_INDEX_BUFFER); - if (rctx->upload_ib == NULL) { + rctx->rupload_vb = r600_upload_create(rctx, 128 * 1024, 16); + if (rctx->rupload_vb == NULL) { r600_destroy_context(&rctx->context); return NULL; } - rctx->upload_vb = u_upload_create(&rctx->context, 128 * 1024, 16, - PIPE_BIND_VERTEX_BUFFER); - if (rctx->upload_vb == NULL) { + rctx->rupload_const = r600_upload_create(rctx, 128 * 1024, 256); + if (rctx->rupload_const == NULL) { r600_destroy_context(&rctx->context); return NULL; } @@ -206,8 +209,6 @@ static struct pipe_context *r600_create_context(struct pipe_screen *screen, void else rctx->custom_dsa_flush = evergreen_create_db_flush_dsa(rctx); - r600_blit_uncompress_depth_ptr = r600_blit_uncompress_depth; - return &rctx->context; } @@ -240,6 +241,9 @@ static const char *r600_get_family_name(enum radeon_family family) case CHIP_CYPRESS: return "AMD CYPRESS"; case CHIP_HEMLOCK: return "AMD HEMLOCK"; case CHIP_PALM: return "AMD PALM"; + case CHIP_BARTS: return "AMD BARTS"; + case CHIP_TURKS: return "AMD TURKS"; + case CHIP_CAICOS: return "AMD CAICOS"; default: return "AMD unknown"; } } @@ -254,6 +258,9 @@ static const char* r600_get_name(struct pipe_screen* pscreen) static int r600_get_param(struct pipe_screen* pscreen, enum pipe_cap param) { + struct r600_screen *rscreen = (struct r600_screen *)pscreen; + enum radeon_family family = r600_get_family(rscreen->radeon); + switch (param) { /* Supported features (boolean caps). */ case PIPE_CAP_NPOT_TEXTURES: @@ -286,7 +293,10 @@ static int r600_get_param(struct pipe_screen* pscreen, enum pipe_cap param) case PIPE_CAP_MAX_TEXTURE_2D_LEVELS: case PIPE_CAP_MAX_TEXTURE_3D_LEVELS: case PIPE_CAP_MAX_TEXTURE_CUBE_LEVELS: - return 14; + if (family >= CHIP_CEDAR) + return 15; + else + return 14; case PIPE_CAP_MAX_VERTEX_TEXTURE_UNITS: /* FIXME allow this once infrastructure is there */ return 16; @@ -315,12 +325,18 @@ static int r600_get_param(struct pipe_screen* pscreen, enum pipe_cap param) static float r600_get_paramf(struct pipe_screen* pscreen, enum pipe_cap param) { + struct r600_screen *rscreen = (struct r600_screen *)pscreen; + enum radeon_family family = r600_get_family(rscreen->radeon); + switch (param) { case PIPE_CAP_MAX_LINE_WIDTH: case PIPE_CAP_MAX_LINE_WIDTH_AA: case PIPE_CAP_MAX_POINT_WIDTH: case PIPE_CAP_MAX_POINT_WIDTH_AA: - return 8192.0f; + if (family >= CHIP_CEDAR) + return 16384.0f; + else + return 8192.0f; case PIPE_CAP_MAX_TEXTURE_ANISOTROPY: return 16.0f; case PIPE_CAP_MAX_TEXTURE_LOD_BIAS: @@ -407,9 +423,9 @@ static boolean r600_is_format_supported(struct pipe_screen* screen, } if ((usage & (PIPE_BIND_RENDER_TARGET | - PIPE_BIND_DISPLAY_TARGET | - PIPE_BIND_SCANOUT | - PIPE_BIND_SHARED)) && + PIPE_BIND_DISPLAY_TARGET | + PIPE_BIND_SCANOUT | + PIPE_BIND_SHARED)) && r600_is_colorbuffer_format_supported(format)) { retval |= usage & (PIPE_BIND_RENDER_TARGET | @@ -467,7 +483,6 @@ struct pipe_screen *r600_screen_create(struct radeon *radeon) rscreen->screen.get_paramf = r600_get_paramf; rscreen->screen.is_format_supported = r600_is_format_supported; rscreen->screen.context_create = r600_create_context; - r600_init_screen_texture_functions(&rscreen->screen); r600_init_screen_resource_functions(&rscreen->screen); rscreen->tiling_info = r600_get_tiling_info(radeon); diff --git a/src/gallium/drivers/r600/r600_pipe.h b/src/gallium/drivers/r600/r600_pipe.h index ba9fedf0b6c..2112a40f696 100644 --- a/src/gallium/drivers/r600/r600_pipe.h +++ b/src/gallium/drivers/r600/r600_pipe.h @@ -53,6 +53,8 @@ enum r600_pipe_state_id { R600_PIPE_STATE_CONSTANT, R600_PIPE_STATE_SAMPLER, R600_PIPE_STATE_RESOURCE, + R600_PIPE_STATE_POLYGON_OFFSET, + R600_PIPE_STATE_FETCH_SHADER, R600_PIPE_NSTATES }; @@ -86,7 +88,15 @@ struct r600_vertex_element struct pipe_vertex_element elements[PIPE_MAX_ATTRIBS]; enum pipe_format hw_format[PIPE_MAX_ATTRIBS]; unsigned hw_format_size[PIPE_MAX_ATTRIBS]; - boolean incompatible_layout; + boolean incompatible_layout; + struct r600_bo *fetch_shader; + unsigned fs_size; + struct r600_pipe_state rstate; + /* if offset is to big for fetch instructio we need to alterate + * offset of vertex buffer, record here the offset need to add + */ + unsigned vbuffer_need_offset; + unsigned vbuffer_offset[PIPE_MAX_ATTRIBS]; }; struct r600_pipe_shader { @@ -101,29 +111,31 @@ struct r600_pipe_shader { #define NUM_TEX_UNITS 16 struct r600_textures_info { - struct r600_pipe_sampler_view *views[NUM_TEX_UNITS]; - unsigned n_views; + struct r600_pipe_sampler_view *views[NUM_TEX_UNITS]; + unsigned n_views; void *samplers[NUM_TEX_UNITS]; - unsigned n_samplers; + unsigned n_samplers; }; +/* vertex buffer translation context, used to translate vertex input that + * hw doesn't natively support, so far only FLOAT64 is unsupported. + */ struct r600_translate_context { /* Translate cache for incompatible vertex offset/stride/format fallback. */ - struct translate_cache *translate_cache; - + struct translate_cache *translate_cache; /* The vertex buffer slot containing the translated buffer. */ - unsigned vb_slot; - /* Saved and new vertex element state. */ - void *saved_velems, *new_velems; + unsigned vb_slot; + void *new_velems; }; #define R600_CONSTANT_ARRAY_SIZE 256 #define R600_RESOURCE_ARRAY_SIZE 160 +struct r600_upload; + struct r600_pipe_context { struct pipe_context context; struct blitter_context *blitter; - struct pipe_framebuffer_state *pframebuffer; unsigned family; void *custom_dsa_flush; struct r600_screen *screen; @@ -140,6 +152,7 @@ struct r600_pipe_context { struct pipe_stencil_ref stencil_ref; struct pipe_viewport_state viewport; struct pipe_clip_state clip; + unsigned nvs_resource; struct r600_pipe_state *vs_resource; struct r600_pipe_state *ps_resource; struct r600_pipe_state config; @@ -151,14 +164,12 @@ struct r600_pipe_context { /* shader information */ unsigned sprite_coord_enable; bool flatshade; - struct u_upload_mgr *upload_vb; - struct u_upload_mgr *upload_ib; + struct r600_upload *rupload_vb; unsigned any_user_vbs; - struct r600_textures_info ps_samplers; - - unsigned vb_max_index; - struct r600_translate_context tran; - + struct r600_textures_info ps_samplers; + unsigned vb_max_index; + struct r600_translate_context tran; + struct r600_upload *rupload_const; }; struct r600_drawl { @@ -181,10 +192,12 @@ void evergreen_draw(struct pipe_context *ctx, const struct pipe_draw_info *info) void evergreen_pipe_shader_ps(struct pipe_context *ctx, struct r600_pipe_shader *shader); void evergreen_pipe_shader_vs(struct pipe_context *ctx, struct r600_pipe_shader *shader); void *evergreen_create_db_flush_dsa(struct r600_pipe_context *rctx); +void evergreen_polygon_offset_update(struct r600_pipe_context *rctx); +void evergreen_vertex_buffer_update(struct r600_pipe_context *rctx); /* r600_blit.c */ void r600_init_blit_functions(struct r600_pipe_context *rctx); -int r600_blit_uncompress_depth(struct pipe_context *ctx, struct r600_resource_texture *texture); +void r600_blit_uncompress_depth(struct pipe_context *ctx, struct r600_resource_texture *texture); /* r600_buffer.c */ struct pipe_resource *r600_buffer_create(struct pipe_screen *screen, @@ -194,7 +207,7 @@ struct pipe_resource *r600_user_buffer_create(struct pipe_screen *screen, unsigned bind); unsigned r600_buffer_is_referenced_by_cs(struct pipe_context *context, struct pipe_resource *buf, - unsigned face, unsigned level); + unsigned level, int layer); struct pipe_resource *r600_buffer_from_handle(struct pipe_screen *screen, struct winsys_handle *whandle); int r600_upload_index_buffer(struct r600_pipe_context *rctx, struct r600_drawl *draw); @@ -207,7 +220,7 @@ void r600_init_query_functions(struct r600_pipe_context *rctx); void r600_init_context_resource_functions(struct r600_pipe_context *r600); /* r600_shader.c */ -int r600_pipe_shader_update(struct pipe_context *ctx, struct r600_pipe_shader *shader); +int r600_pipe_shader(struct pipe_context *ctx, struct r600_pipe_shader *shader); int r600_pipe_shader_create(struct pipe_context *ctx, struct r600_pipe_shader *shader, const struct tgsi_token *tokens); void r600_pipe_shader_destroy(struct pipe_context *ctx, struct r600_pipe_shader *shader); int r600_find_vs_semantic_index(struct r600_shader *vs, @@ -218,14 +231,20 @@ void r600_init_state_functions(struct r600_pipe_context *rctx); void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info); void r600_init_config(struct r600_pipe_context *rctx); void *r600_create_db_flush_dsa(struct r600_pipe_context *rctx); +void r600_polygon_offset_update(struct r600_pipe_context *rctx); +void r600_vertex_buffer_update(struct r600_pipe_context *rctx); + /* r600_helper.h */ int r600_conv_pipe_prim(unsigned pprim, unsigned *prim); /* r600_texture.c */ void r600_init_screen_texture_functions(struct pipe_screen *screen); +void r600_init_surface_functions(struct r600_pipe_context *r600); uint32_t r600_translate_texformat(enum pipe_format format, const unsigned char *swizzle_view, uint32_t *word4_p, uint32_t *yuv_format_p); +unsigned r600_texture_get_offset(struct r600_resource_texture *rtex, + unsigned level, unsigned layer); /* r600_translate.c */ void r600_begin_vertex_translate(struct r600_pipe_context *rctx); @@ -252,13 +271,13 @@ void r600_sampler_view_destroy(struct pipe_context *ctx, void r600_bind_state(struct pipe_context *ctx, void *state); void r600_delete_state(struct pipe_context *ctx, void *state); void r600_bind_vertex_elements(struct pipe_context *ctx, void *state); - void *r600_create_shader_state(struct pipe_context *ctx, const struct pipe_shader_state *state); void r600_bind_ps_shader(struct pipe_context *ctx, void *state); void r600_bind_vs_shader(struct pipe_context *ctx, void *state); void r600_delete_ps_shader(struct pipe_context *ctx, void *state); void r600_delete_vs_shader(struct pipe_context *ctx, void *state); + /* * common helpers */ diff --git a/src/gallium/drivers/r600/r600_resource.h b/src/gallium/drivers/r600/r600_resource.h index 7a2d1f44122..28b3e1e5e40 100644 --- a/src/gallium/drivers/r600/r600_resource.h +++ b/src/gallium/drivers/r600/r600_resource.h @@ -46,6 +46,7 @@ struct r600_resource { struct u_resource base; struct r600_bo *bo; u32 size; + unsigned bo_size; }; struct r600_resource_texture { @@ -61,7 +62,21 @@ struct r600_resource_texture { unsigned tile_type; unsigned depth; unsigned dirty; - struct r600_resource_texture *flushed_depth_texture; + struct r600_resource_texture *flushed_depth_texture; +}; + +#define R600_BUFFER_MAGIC 0xabcd1600 + +struct r600_resource_buffer { + struct r600_resource r; + uint32_t magic; + void *user_buffer; + bool uploaded; +}; + +struct r600_surface { + struct pipe_surface base; + unsigned aligned_height; }; void r600_init_screen_resource_functions(struct pipe_screen *screen); @@ -73,46 +88,29 @@ struct pipe_resource *r600_texture_from_handle(struct pipe_screen *screen, const struct pipe_resource *base, struct winsys_handle *whandle); -#define R600_BUFFER_MAGIC 0xabcd1600 -#define R600_BUFFER_MAX_RANGES 32 - -struct r600_buffer_range { - uint32_t start; - uint32_t end; -}; - -struct r600_resource_buffer { - struct r600_resource r; - uint32_t magic; - void *user_buffer; - struct r600_buffer_range ranges[R600_BUFFER_MAX_RANGES]; - unsigned num_ranges; -}; - /* r600_buffer */ static INLINE struct r600_resource_buffer *r600_buffer(struct pipe_resource *buffer) { if (buffer) { assert(((struct r600_resource_buffer *)buffer)->magic == R600_BUFFER_MAGIC); return (struct r600_resource_buffer *)buffer; - } - return NULL; + } + return NULL; } static INLINE boolean r600_buffer_is_user_buffer(struct pipe_resource *buffer) { - return r600_buffer(buffer)->user_buffer ? TRUE : FALSE; + if (r600_buffer(buffer)->uploaded) + return FALSE; + return r600_buffer(buffer)->user_buffer ? TRUE : FALSE; } -int r600_texture_depth_flush(struct pipe_context *ctx, - struct pipe_resource *texture); - -extern int (*r600_blit_uncompress_depth_ptr)(struct pipe_context *ctx, struct r600_resource_texture *texture); +int r600_texture_depth_flush(struct pipe_context *ctx, struct pipe_resource *texture); /* r600_texture.c texture transfer functions. */ struct pipe_transfer* r600_texture_get_transfer(struct pipe_context *ctx, struct pipe_resource *texture, - struct pipe_subresource sr, + unsigned level, unsigned usage, const struct pipe_box *box); void r600_texture_transfer_destroy(struct pipe_context *ctx, @@ -122,9 +120,16 @@ void* r600_texture_transfer_map(struct pipe_context *ctx, void r600_texture_transfer_unmap(struct pipe_context *ctx, struct pipe_transfer* transfer); -struct r600_surface { - struct pipe_surface base; - unsigned aligned_height; -}; - +struct r600_pipe_context; +struct r600_upload *r600_upload_create(struct r600_pipe_context *rctx, + unsigned default_size, + unsigned alignment); +void r600_upload_flush(struct r600_upload *upload); +void r600_upload_destroy(struct r600_upload *upload); +int r600_upload_buffer(struct r600_upload *upload, unsigned offset, + unsigned size, struct r600_resource_buffer *in_buffer, + unsigned *out_offset, unsigned *out_size, + struct r600_bo **out_buffer); + +int r600_upload_const_buffer(struct r600_pipe_context *rctx, struct pipe_resource *cbuffer, uint32_t *offset); #endif diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c index 77b180984dd..e85e829badd 100644 --- a/src/gallium/drivers/r600/r600_shader.c +++ b/src/gallium/drivers/r600/r600_shader.c @@ -44,6 +44,9 @@ static void r600_pipe_shader_vs(struct pipe_context *ctx, struct r600_pipe_shade rstate->nregs = 0; /* so far never got proper semantic id from tgsi */ + /* FIXME better to move this in config things so they get emited + * only one time per cs + */ for (i = 0; i < 10; i++) { spi_vs_out_id[i] = 0; } @@ -67,20 +70,11 @@ static void r600_pipe_shader_vs(struct pipe_context *ctx, struct r600_pipe_shade S_028868_STACK_SIZE(rshader->bc.nstack), 0xFFFFFFFF, NULL); r600_pipe_state_add_reg(rstate, - R_0288A4_SQ_PGM_RESOURCES_FS, - 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_0288D0_SQ_PGM_CF_OFFSET_VS, 0x00000000, 0xFFFFFFFF, NULL); r600_pipe_state_add_reg(rstate, - R_0288DC_SQ_PGM_CF_OFFSET_FS, - 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_028858_SQ_PGM_START_VS, r600_bo_offset(shader->bo) >> 8, 0xFFFFFFFF, shader->bo); - r600_pipe_state_add_reg(rstate, - R_028894_SQ_PGM_START_FS, - r600_bo_offset(shader->bo_fetch) >> 8, 0xFFFFFFFF, shader->bo_fetch); r600_pipe_state_add_reg(rstate, R_03E200_SQ_LOOP_CONST_0 + (32 * 4), 0x01000FFF, @@ -104,37 +98,20 @@ int r600_find_vs_semantic_index(struct r600_shader *vs, static void r600_pipe_shader_ps(struct pipe_context *ctx, struct r600_pipe_shader *shader) { - struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; struct r600_pipe_state *rstate = &shader->rstate; struct r600_shader *rshader = &shader->shader; - unsigned i, tmp, exports_ps, num_cout, spi_ps_in_control_0, spi_input_z, spi_ps_in_control_1; + unsigned i, exports_ps, num_cout, spi_ps_in_control_0, spi_input_z, spi_ps_in_control_1; int pos_index = -1, face_index = -1; - /* clear previous register */ rstate->nregs = 0; for (i = 0; i < rshader->ninput; i++) { - tmp = S_028644_SEMANTIC(r600_find_vs_semantic_index(&rctx->vs_shader->shader, rshader, i)); - if (rshader->input[i].centroid) - tmp |= S_028644_SEL_CENTROID(1); - if (rshader->input[i].interpolate == TGSI_INTERPOLATE_LINEAR) - tmp |= S_028644_SEL_LINEAR(1); - if (rshader->input[i].name == TGSI_SEMANTIC_POSITION) pos_index = i; - if (rshader->input[i].name == TGSI_SEMANTIC_COLOR || - rshader->input[i].name == TGSI_SEMANTIC_BCOLOR || - rshader->input[i].name == TGSI_SEMANTIC_POSITION) { - tmp |= S_028644_FLAT_SHADE(rshader->flat_shade); - } if (rshader->input[i].name == TGSI_SEMANTIC_FACE) face_index = i; - if (rshader->input[i].name == TGSI_SEMANTIC_GENERIC && - rctx->sprite_coord_enable & (1 << rshader->input[i].sid)) { - tmp |= S_028644_PT_SPRITE_TEX(1); - } - r600_pipe_state_add_reg(rstate, R_028644_SPI_PS_INPUT_CNTL_0 + i * 4, tmp, 0xFFFFFFFF, NULL); } + for (i = 0; i < rshader->noutput; i++) { if (rshader->output[i].name == TGSI_SEMANTIC_POSITION) r600_pipe_state_add_reg(rstate, @@ -210,22 +187,13 @@ static void r600_pipe_shader_ps(struct pipe_context *ctx, struct r600_pipe_shade 0xFFFFFFFF, NULL); } -static int r600_pipe_shader(struct pipe_context *ctx, struct r600_pipe_shader *shader) +int r600_pipe_shader(struct pipe_context *ctx, struct r600_pipe_shader *shader) { struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; struct r600_shader *rshader = &shader->shader; void *ptr; /* copy new shader */ - if (rshader->processor_type == TGSI_PROCESSOR_VERTEX && shader->bo_fetch == NULL) { - shader->bo_fetch = r600_bo(rctx->radeon, rshader->bc_fetch.ndw * 4, 4096, 0, 0); - if (shader->bo_fetch == NULL) { - return -ENOMEM; - } - ptr = r600_bo_map(rctx->radeon, shader->bo_fetch, 0, NULL); - memcpy(ptr, rshader->bc_fetch.bytecode, rshader->bc_fetch.ndw * 4); - r600_bo_unmap(rctx->radeon, shader->bo_fetch); - } if (shader->bo == NULL) { shader->bo = r600_bo(rctx->radeon, rshader->bc.ndw * 4, 4096, 0, 0); if (shader->bo == NULL) { @@ -236,7 +204,6 @@ static int r600_pipe_shader(struct pipe_context *ctx, struct r600_pipe_shader *s r600_bo_unmap(rctx->radeon, shader->bo); } /* build state */ - rshader->flat_shade = rctx->flatshade; switch (rshader->processor_type) { case TGSI_PROCESSOR_VERTEX: if (rshader->family >= CHIP_CEDAR) { @@ -255,116 +222,51 @@ static int r600_pipe_shader(struct pipe_context *ctx, struct r600_pipe_shader *s default: return -EINVAL; } - r600_context_pipe_state_set(&rctx->ctx, &shader->rstate); return 0; } -static int r600_shader_update(struct pipe_context *ctx, struct r600_pipe_shader *rshader) -{ - struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; - struct r600_shader *shader = &rshader->shader; - const struct util_format_description *desc; - enum pipe_format resource_format[160]; - unsigned i, nresources = 0; - struct r600_bc *bc = &shader->bc_fetch; - struct r600_bc_cf *cf; - struct r600_bc_vtx *vtx; - - if (shader->processor_type != TGSI_PROCESSOR_VERTEX) - return 0; - /* doing a full memcmp fell over the refcount */ - if ((rshader->vertex_elements.count == rctx->vertex_elements->count) && - (!memcmp(&rshader->vertex_elements.elements, &rctx->vertex_elements->elements, - rctx->vertex_elements->count * sizeof(struct pipe_vertex_element)))) { - return 0; - } - rshader->vertex_elements = *rctx->vertex_elements; - for (i = 0; i < rctx->vertex_elements->count; i++) { - resource_format[nresources++] = rctx->vertex_elements->hw_format[i]; - } - r600_bo_reference(rctx->radeon, &rshader->bo_fetch, NULL); - LIST_FOR_EACH_ENTRY(cf, &bc->cf, list) { - switch (cf->inst) { - case V_SQ_CF_WORD1_SQ_CF_INST_VTX: - case V_SQ_CF_WORD1_SQ_CF_INST_VTX_TC: - LIST_FOR_EACH_ENTRY(vtx, &cf->vtx, list) { - desc = util_format_description(resource_format[vtx->buffer_id]); - if (desc == NULL) { - R600_ERR("unknown format %d\n", resource_format[vtx->buffer_id]); - return -EINVAL; - } - vtx->dst_sel_x = desc->swizzle[0]; - vtx->dst_sel_y = desc->swizzle[1]; - vtx->dst_sel_z = desc->swizzle[2]; - vtx->dst_sel_w = desc->swizzle[3]; - } - break; - default: - break; - } - } - return r600_bc_build(&shader->bc_fetch); -} - -int r600_pipe_shader_update(struct pipe_context *ctx, struct r600_pipe_shader *shader) -{ - struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; - int r; - - if (shader == NULL) - return -EINVAL; - /* there should be enough input */ - if (rctx->vertex_elements->count < shader->shader.bc.nresource) { - R600_ERR("%d resources provided, expecting %d\n", - rctx->vertex_elements->count, shader->shader.bc.nresource); - return -EINVAL; - } - r = r600_shader_update(ctx, shader); - if (r) - return r; - return r600_pipe_shader(ctx, shader); -} - -int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *shader); +int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *shader, u32 **literals); int r600_pipe_shader_create(struct pipe_context *ctx, struct r600_pipe_shader *shader, const struct tgsi_token *tokens) { + static int dump_shaders = -1; struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; + u32 *literals; int r; -//fprintf(stderr, "--------------------------------------------------------------\n"); -//tgsi_dump(tokens, 0); + /* Would like some magic "get_bool_option_once" routine. + */ + if (dump_shaders == -1) + dump_shaders = debug_get_bool_option("R600_DUMP_SHADERS", FALSE); + + if (dump_shaders) { + fprintf(stderr, "--------------------------------------------------------------\n"); + tgsi_dump(tokens, 0); + } shader->shader.family = r600_get_family(rctx->radeon); - r = r600_shader_from_tgsi(tokens, &shader->shader); + r = r600_shader_from_tgsi(tokens, &shader->shader, &literals); if (r) { R600_ERR("translation from TGSI failed !\n"); return r; } r = r600_bc_build(&shader->shader.bc); + free(literals); if (r) { R600_ERR("building bytecode failed !\n"); return r; } - if (shader->shader.processor_type == TGSI_PROCESSOR_VERTEX) { - r = r600_bc_build(&shader->shader.bc_fetch); - if (r) { - R600_ERR("building bytecode failed !\n"); - return r; - } + if (dump_shaders) { + r600_bc_dump(&shader->shader.bc); + fprintf(stderr, "______________________________________________________________\n"); } -//fprintf(stderr, "______________________________________________________________\n"); - return 0; + return r600_pipe_shader(ctx, shader); } -void -r600_pipe_shader_destroy(struct pipe_context *ctx, struct r600_pipe_shader *shader) +void r600_pipe_shader_destroy(struct pipe_context *ctx, struct r600_pipe_shader *shader) { struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; r600_bo_reference(rctx->radeon, &shader->bo, NULL); - r600_bc_clear(&shader->shader.bc); - - /* FIXME: is there more stuff to free? */ } /* @@ -381,9 +283,7 @@ struct r600_shader_ctx { unsigned temp_reg; struct r600_shader_tgsi_instruction *inst_info; struct r600_bc *bc; - struct r600_bc *bc_fetch; struct r600_shader *shader; - u32 value[4]; u32 *literals; u32 nliterals; u32 max_driver_temp_used; @@ -501,9 +401,7 @@ static int evergreen_interp_alu(struct r600_shader_ctx *ctx, int input) static int tgsi_declaration(struct r600_shader_ctx *ctx) { struct tgsi_full_declaration *d = &ctx->parse.FullToken.FullDeclaration; - struct r600_bc_vtx vtx; unsigned i; - int r; switch (d->Declaration.File) { case TGSI_FILE_INPUT: @@ -513,26 +411,6 @@ static int tgsi_declaration(struct r600_shader_ctx *ctx) ctx->shader->input[i].interpolate = d->Declaration.Interpolate; ctx->shader->input[i].centroid = d->Declaration.Centroid; ctx->shader->input[i].gpr = ctx->file_offset[TGSI_FILE_INPUT] + i; - if (ctx->type == TGSI_PROCESSOR_VERTEX) { - /* turn input into fetch */ - memset(&vtx, 0, sizeof(struct r600_bc_vtx)); - vtx.inst = 0; - vtx.fetch_type = 0; - vtx.buffer_id = i; - /* register containing the index into the buffer */ - vtx.src_gpr = 0; - vtx.src_sel_x = 0; - vtx.mega_fetch_count = 0x1F; - vtx.dst_gpr = ctx->shader->input[i].gpr; - vtx.dst_sel_x = 0; - vtx.dst_sel_y = 1; - vtx.dst_sel_z = 2; - vtx.dst_sel_w = 3; - vtx.use_const_fields = 1; - r = r600_bc_add_vtx(ctx->bc_fetch, &vtx); - if (r) - return r; - } if (ctx->type == TGSI_PROCESSOR_FRAGMENT && ctx->bc->chiprev == CHIPREV_EVERGREEN) { /* turn input into interpolate on EG */ if (ctx->shader->input[i].name != TGSI_SEMANTIC_POSITION) { @@ -614,7 +492,7 @@ static int evergreen_gpr_count(struct r600_shader_ctx *ctx) return ctx->num_interp_gpr; } -int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *shader) +int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *shader, u32 **literals) { struct tgsi_full_immediate *immediate; struct r600_shader_ctx ctx; @@ -624,7 +502,6 @@ int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *s int i, r = 0, pos0; ctx.bc = &shader->bc; - ctx.bc_fetch = &shader->bc_fetch; ctx.shader = shader; r = r600_bc_init(ctx.bc, shader->family); if (r) @@ -634,19 +511,15 @@ int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *s tgsi_parse_init(&ctx.parse, tokens); ctx.type = ctx.parse.FullHeader.Processor.Processor; shader->processor_type = ctx.type; - if (shader->processor_type == TGSI_PROCESSOR_VERTEX) { - r = r600_bc_init(ctx.bc_fetch, shader->family); - if (r) - return r; - ctx.bc_fetch->type = -1; - } ctx.bc->type = shader->processor_type; /* register allocations */ /* Values [0,127] correspond to GPR[0..127]. * Values [128,159] correspond to constant buffer bank 0 * Values [160,191] correspond to constant buffer bank 1 - * Values [256,511] correspond to cfile constants c[0..255]. + * Values [256,511] correspond to cfile constants c[0..255]. (Gone on EG) + * Values [256,287] correspond to constant buffer bank 2 (EG) + * Values [288,319] correspond to constant buffer bank 3 (EG) * Other special values are shown in the list below. * 244 ALU_SRC_1_DBL_L: special constant 1.0 double-float, LSW. (RV670+) * 245 ALU_SRC_1_DBL_M: special constant 1.0 double-float, MSW. (RV670+) @@ -680,9 +553,11 @@ int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *s ctx.file_offset[TGSI_FILE_TEMPORARY] = ctx.file_offset[TGSI_FILE_OUTPUT] + ctx.info.file_count[TGSI_FILE_OUTPUT]; - ctx.file_offset[TGSI_FILE_CONSTANT] = 128; + /* Outside the GPR range. This will be translated to one of the + * kcache banks later. */ + ctx.file_offset[TGSI_FILE_CONSTANT] = 512; - ctx.file_offset[TGSI_FILE_IMMEDIATE] = 253; + ctx.file_offset[TGSI_FILE_IMMEDIATE] = V_SQ_ALU_SRC_LITERAL; ctx.temp_reg = ctx.file_offset[TGSI_FILE_TEMPORARY] + ctx.info.file_count[TGSI_FILE_TEMPORARY]; @@ -725,9 +600,8 @@ int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *s r = ctx.inst_info->process(&ctx); if (r) goto out_err; - r = r600_bc_add_literal(ctx.bc, ctx.value); - if (r) - goto out_err; + break; + case TGSI_TOKEN_TYPE_PROPERTY: break; default: R600_ERR("unsupported token type %d\n", ctx.parse.FullToken.Token.Type); @@ -840,21 +714,13 @@ int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *s output[i].inst = BC_INST(ctx.bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT_DONE); } } - /* add return to fetch shader */ - if (ctx.type == TGSI_PROCESSOR_VERTEX) { - if (ctx.bc->chiprev == CHIPREV_EVERGREEN) { - r600_bc_add_cfinst(ctx.bc_fetch, EG_V_SQ_CF_WORD1_SQ_CF_INST_RETURN); - } else { - r600_bc_add_cfinst(ctx.bc_fetch, V_SQ_CF_WORD1_SQ_CF_INST_RETURN); - } - } /* add output to bytecode */ for (i = 0; i < noutput; i++) { r = r600_bc_add_output(ctx.bc, &output[i]); if (r) goto out_err; } - free(ctx.literals); + *literals = ctx.literals; tgsi_parse_free(&ctx.parse); return 0; out_err: @@ -878,22 +744,29 @@ static int tgsi_src(struct r600_shader_ctx *ctx, const struct tgsi_full_src_register *tgsi_src, struct r600_bc_alu_src *r600_src) { - int index; memset(r600_src, 0, sizeof(struct r600_bc_alu_src)); - r600_src->sel = tgsi_src->Register.Index; - if (tgsi_src->Register.File == TGSI_FILE_IMMEDIATE) { - r600_src->sel = 0; - index = tgsi_src->Register.Index; - ctx->value[0] = ctx->literals[index * 4 + 0]; - ctx->value[1] = ctx->literals[index * 4 + 1]; - ctx->value[2] = ctx->literals[index * 4 + 2]; - ctx->value[3] = ctx->literals[index * 4 + 3]; - } - if (tgsi_src->Register.Indirect) - r600_src->rel = V_SQ_REL_RELATIVE; r600_src->neg = tgsi_src->Register.Negate; r600_src->abs = tgsi_src->Register.Absolute; - r600_src->sel += ctx->file_offset[tgsi_src->Register.File]; + if (tgsi_src->Register.File == TGSI_FILE_IMMEDIATE) { + int index; + if((tgsi_src->Register.SwizzleX == tgsi_src->Register.SwizzleY) && + (tgsi_src->Register.SwizzleX == tgsi_src->Register.SwizzleZ) && + (tgsi_src->Register.SwizzleX == tgsi_src->Register.SwizzleW)) { + + index = tgsi_src->Register.Index * 4 + tgsi_src->Register.SwizzleX; + r600_bc_special_constants(ctx->literals[index], &r600_src->sel, &r600_src->neg); + if (r600_src->sel != V_SQ_ALU_SRC_LITERAL) + return 0; + } + index = tgsi_src->Register.Index; + r600_src->sel = V_SQ_ALU_SRC_LITERAL; + r600_src->value = ctx->literals + index * 4; + } else { + if (tgsi_src->Register.Indirect) + r600_src->rel = V_SQ_REL_RELATIVE; + r600_src->sel = tgsi_src->Register.Index; + r600_src->sel += ctx->file_offset[tgsi_src->Register.File]; + } return 0; } @@ -981,18 +854,19 @@ static int tgsi_split_literal_constant(struct r600_shader_ctx *ctx, struct r600_ int i, j, k, nliteral, r; for (i = 0, nliteral = 0; i < inst->Instruction.NumSrcRegs; i++) { - if (inst->Src[i].Register.File == TGSI_FILE_IMMEDIATE) { + if (r600_src[i].sel == V_SQ_ALU_SRC_LITERAL) { nliteral++; } } for (i = 0, j = nliteral - 1; i < inst->Instruction.NumSrcRegs; i++) { - if (j > 0 && inst->Src[i].Register.File == TGSI_FILE_IMMEDIATE) { + if (j > 0 && r600_src[i].sel == V_SQ_ALU_SRC_LITERAL) { int treg = r600_get_temp(ctx); for (k = 0; k < 4; k++) { memset(&alu, 0, sizeof(struct r600_bc_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); alu.src[0].sel = r600_src[i].sel; alu.src[0].chan = k; + alu.src[0].value = r600_src[i].value; alu.dst.sel = treg; alu.dst.chan = k; alu.dst.write = 1; @@ -1002,9 +876,6 @@ static int tgsi_split_literal_constant(struct r600_shader_ctx *ctx, struct r600_ if (r) return r; } - r = r600_bc_add_literal(ctx->bc, &ctx->literals[inst->Src[i].Register.Index * 4]); - if (r) - return r; r600_src[i].sel = treg; j--; } @@ -1012,19 +883,25 @@ static int tgsi_split_literal_constant(struct r600_shader_ctx *ctx, struct r600_ return 0; } -static int tgsi_op2_s(struct r600_shader_ctx *ctx, int swap) +static int tgsi_last_instruction(unsigned writemask) { - struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; - struct r600_bc_alu_src r600_src[3]; - struct r600_bc_alu alu; - int i, j, r; - int lasti = 0; + int i, lasti = 0; for (i = 0; i < 4; i++) { - if (inst->Dst[0].Register.WriteMask & (1 << i)) { + if (writemask & (1 << i)) { lasti = i; } } + return lasti; +} + +static int tgsi_op2_s(struct r600_shader_ctx *ctx, int swap) +{ + struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; + struct r600_bc_alu_src r600_src[3]; + struct r600_bc_alu alu; + int i, j, r; + int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask); r = tgsi_split_constant(ctx, r600_src); if (r) @@ -1093,12 +970,14 @@ static int tgsi_op2_swap(struct r600_shader_ctx *ctx) static int tgsi_setup_trig(struct r600_shader_ctx *ctx, struct r600_bc_alu_src r600_src[3]) { + static float half_inv_pi = 1.0 /(3.1415926535 * 2); + static float double_pi = 3.1415926535 * 2; + static float neg_pi = -3.1415926535; + struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; int r; - uint32_t lit_vals[4]; struct r600_bc_alu alu; - memset(lit_vals, 0, 4*4); r = tgsi_split_constant(ctx, r600_src); if (r) return r; @@ -1106,9 +985,6 @@ static int tgsi_setup_trig(struct r600_shader_ctx *ctx, if (r) return r; - lit_vals[0] = fui(1.0 /(3.1415926535 * 2)); - lit_vals[1] = fui(0.5f); - memset(&alu, 0, sizeof(struct r600_bc_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD); alu.is_op3 = 1; @@ -1122,15 +998,13 @@ static int tgsi_setup_trig(struct r600_shader_ctx *ctx, alu.src[1].sel = V_SQ_ALU_SRC_LITERAL; alu.src[1].chan = 0; - alu.src[2].sel = V_SQ_ALU_SRC_LITERAL; + alu.src[1].value = (uint32_t *)&half_inv_pi; + alu.src[2].sel = V_SQ_ALU_SRC_0_5; alu.src[2].chan = 1; alu.last = 1; r = r600_bc_add_alu(ctx->bc, &alu); if (r) return r; - r = r600_bc_add_literal(ctx->bc, lit_vals); - if (r) - return r; memset(&alu, 0, sizeof(struct r600_bc_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT); @@ -1146,14 +1020,6 @@ static int tgsi_setup_trig(struct r600_shader_ctx *ctx, if (r) return r; - if (ctx->bc->chiprev == CHIPREV_R600) { - lit_vals[0] = fui(3.1415926535897f * 2.0f); - lit_vals[1] = fui(-3.1415926535897f); - } else { - lit_vals[0] = fui(1.0f); - lit_vals[1] = fui(-0.5f); - } - memset(&alu, 0, sizeof(struct r600_bc_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD); alu.is_op3 = 1; @@ -1169,13 +1035,20 @@ static int tgsi_setup_trig(struct r600_shader_ctx *ctx, alu.src[1].chan = 0; alu.src[2].sel = V_SQ_ALU_SRC_LITERAL; alu.src[2].chan = 1; + + if (ctx->bc->chiprev == CHIPREV_R600) { + alu.src[1].value = (uint32_t *)&double_pi; + alu.src[2].value = (uint32_t *)&neg_pi; + } else { + alu.src[1].sel = V_SQ_ALU_SRC_1; + alu.src[2].sel = V_SQ_ALU_SRC_0_5; + alu.src[2].neg = 1; + } + alu.last = 1; r = r600_bc_add_alu(ctx->bc, &alu); if (r) return r; - r = r600_bc_add_literal(ctx->bc, lit_vals); - if (r) - return r; return 0; } @@ -1185,7 +1058,7 @@ static int tgsi_trig(struct r600_shader_ctx *ctx) struct r600_bc_alu_src r600_src[3]; struct r600_bc_alu alu; int i, r; - int lasti = 0; + int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask); r = tgsi_setup_trig(ctx, r600_src); if (r) @@ -1205,10 +1078,6 @@ static int tgsi_trig(struct r600_shader_ctx *ctx) return r; /* replicate result */ - for (i = 0; i < 4; i++) { - if (inst->Dst[0].Register.WriteMask & (1 << i)) - lasti = i; - } for (i = 0; i < lasti + 1; i++) { if (!(inst->Dst[0].Register.WriteMask & (1 << i))) continue; @@ -1295,10 +1164,6 @@ static int tgsi_scs(struct r600_shader_ctx *ctx) r = r600_bc_add_alu(ctx->bc, &alu); if (r) return r; - - r = r600_bc_add_literal(ctx->bc, ctx->value); - if (r) - return r; } /* dst.w = 1.0; */ @@ -1319,10 +1184,6 @@ static int tgsi_scs(struct r600_shader_ctx *ctx) r = r600_bc_add_alu(ctx->bc, &alu); if (r) return r; - - r = r600_bc_add_literal(ctx->bc, ctx->value); - if (r) - return r; } return 0; @@ -1358,9 +1219,6 @@ static int tgsi_kill(struct r600_shader_ctx *ctx) if (r) return r; } - r = r600_bc_add_literal(ctx->bc, ctx->value); - if (r) - return r; /* kill must be last in ALU */ ctx->bc->force_add_cf = 1; @@ -1423,10 +1281,6 @@ static int tgsi_lit(struct r600_shader_ctx *ctx) if (r) return r; - r = r600_bc_add_literal(ctx->bc, ctx->value); - if (r) - return r; - if (inst->Dst[0].Register.WriteMask & (1 << 2)) { int chan; @@ -1445,10 +1299,6 @@ static int tgsi_lit(struct r600_shader_ctx *ctx) if (r) return r; - r = r600_bc_add_literal(ctx->bc, ctx->value); - if (r) - return r; - chan = alu.dst.chan; sel = alu.dst.sel; @@ -1471,9 +1321,6 @@ static int tgsi_lit(struct r600_shader_ctx *ctx) if (r) return r; - r = r600_bc_add_literal(ctx->bc, ctx->value); - if (r) - return r; /* dst.z = exp(tmp.x) */ memset(&alu, 0, sizeof(struct r600_bc_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE); @@ -1517,9 +1364,6 @@ static int tgsi_rsq(struct r600_shader_ctx *ctx) r = r600_bc_add_alu(ctx->bc, &alu); if (r) return r; - r = r600_bc_add_literal(ctx->bc, ctx->value); - if (r) - return r; /* replicate result */ return tgsi_helper_tempx_replicate(ctx); } @@ -1568,9 +1412,6 @@ static int tgsi_trans_srcx_replicate(struct r600_shader_ctx *ctx) r = r600_bc_add_alu(ctx->bc, &alu); if (r) return r; - r = r600_bc_add_literal(ctx->bc, ctx->value); - if (r) - return r; /* replicate result */ return tgsi_helper_tempx_replicate(ctx); } @@ -1594,12 +1435,9 @@ static int tgsi_pow(struct r600_shader_ctx *ctx) r = r600_bc_add_alu(ctx->bc, &alu); if (r) return r; - r = r600_bc_add_literal(ctx->bc,ctx->value); - if (r) - return r; /* b * LOG2(a) */ memset(&alu, 0, sizeof(struct r600_bc_alu)); - alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL_IEEE); + alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL); r = tgsi_src(ctx, &inst->Src[1], &alu.src[0]); if (r) return r; @@ -1611,9 +1449,6 @@ static int tgsi_pow(struct r600_shader_ctx *ctx) r = r600_bc_add_alu(ctx->bc, &alu); if (r) return r; - r = r600_bc_add_literal(ctx->bc,ctx->value); - if (r) - return r; /* POW(a,b) = EXP2(b * LOG2(a))*/ memset(&alu, 0, sizeof(struct r600_bc_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE); @@ -1624,9 +1459,6 @@ static int tgsi_pow(struct r600_shader_ctx *ctx) r = r600_bc_add_alu(ctx->bc, &alu); if (r) return r; - r = r600_bc_add_literal(ctx->bc,ctx->value); - if (r) - return r; return tgsi_helper_tempx_replicate(ctx); } @@ -1666,9 +1498,6 @@ static int tgsi_ssg(struct r600_shader_ctx *ctx) if (r) return r; } - r = r600_bc_add_literal(ctx->bc, ctx->value); - if (r) - return r; /* dst = (-tmp > 0 ? -1 : tmp) */ for (i = 0; i < 4; i++) { @@ -1703,9 +1532,6 @@ static int tgsi_helper_copy(struct r600_shader_ctx *ctx, struct tgsi_full_instru struct r600_bc_alu alu; int i, r; - r = r600_bc_add_literal(ctx->bc, ctx->value); - if (r) - return r; for (i = 0; i < 4; i++) { memset(&alu, 0, sizeof(struct r600_bc_alu)); if (!(inst->Dst[0].Register.WriteMask & (1 << i))) { @@ -1735,6 +1561,7 @@ static int tgsi_op3(struct r600_shader_ctx *ctx) struct r600_bc_alu_src r600_src[3]; struct r600_bc_alu alu; int i, j, r; + int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask); r = tgsi_split_constant(ctx, r600_src); if (r) @@ -1742,26 +1569,32 @@ static int tgsi_op3(struct r600_shader_ctx *ctx) r = tgsi_split_literal_constant(ctx, r600_src); if (r) return r; - /* do it in 2 step as op3 doesn't support writemask */ - for (i = 0; i < 4; i++) { + for (i = 0; i < lasti + 1; i++) { + if (!(inst->Dst[0].Register.WriteMask & (1 << i))) + continue; + memset(&alu, 0, sizeof(struct r600_bc_alu)); alu.inst = ctx->inst_info->r600_opcode; for (j = 0; j < inst->Instruction.NumSrcRegs; j++) { alu.src[j] = r600_src[j]; alu.src[j].chan = tgsi_chan(&inst->Src[j], i); } - alu.dst.sel = ctx->temp_reg; + + r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); + if (r) + return r; + alu.dst.chan = i; alu.dst.write = 1; alu.is_op3 = 1; - if (i == 3) { + if (i == lasti) { alu.last = 1; } r = r600_bc_add_alu(ctx->bc, &alu); if (r) return r; } - return tgsi_helper_copy(ctx, inst); + return 0; } static int tgsi_dp(struct r600_shader_ctx *ctx) @@ -1784,9 +1617,13 @@ static int tgsi_dp(struct r600_shader_ctx *ctx) alu.src[j] = r600_src[j]; alu.src[j].chan = tgsi_chan(&inst->Src[j], i); } - alu.dst.sel = ctx->temp_reg; + + r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); + if (r) + return r; + alu.dst.chan = i; - alu.dst.write = 1; + alu.dst.write = (inst->Dst[0].Register.WriteMask >> i) & 1; /* handle some special cases */ switch (ctx->inst_info->tgsi_opcode) { case TGSI_OPCODE_DP2: @@ -1818,19 +1655,21 @@ static int tgsi_dp(struct r600_shader_ctx *ctx) if (r) return r; } - return tgsi_helper_copy(ctx, inst); + return 0; } static int tgsi_tex(struct r600_shader_ctx *ctx) { + static float one_point_five = 1.5f; struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; struct r600_bc_tex tex; struct r600_bc_alu alu; unsigned src_gpr; int r, i; int opcode; - boolean src_not_temp = inst->Src[0].Register.File != TGSI_FILE_TEMPORARY; - uint32_t lit_vals[4]; + boolean src_not_temp = + inst->Src[0].Register.File != TGSI_FILE_TEMPORARY && + inst->Src[0].Register.File != TGSI_FILE_INPUT; src_gpr = ctx->file_offset[inst->Src[0].Register.File] + inst->Src[0].Register.Index; @@ -1959,6 +1798,7 @@ static int tgsi_tex(struct r600_shader_ctx *ctx) alu.src[2].sel = V_SQ_ALU_SRC_LITERAL; alu.src[2].chan = 0; + alu.src[2].value = (u32*)&one_point_five; alu.dst.sel = ctx->temp_reg; alu.dst.chan = 0; @@ -1979,6 +1819,7 @@ static int tgsi_tex(struct r600_shader_ctx *ctx) alu.src[2].sel = V_SQ_ALU_SRC_LITERAL; alu.src[2].chan = 0; + alu.src[2].value = (u32*)&one_point_five; alu.dst.sel = ctx->temp_reg; alu.dst.chan = 1; @@ -1989,11 +1830,6 @@ static int tgsi_tex(struct r600_shader_ctx *ctx) if (r) return r; - lit_vals[0] = fui(1.5f); - - r = r600_bc_add_literal(ctx->bc, lit_vals); - if (r) - return r; src_not_temp = FALSE; src_gpr = ctx->temp_reg; } @@ -2066,6 +1902,7 @@ static int tgsi_lrp(struct r600_shader_ctx *ctx) struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; struct r600_bc_alu_src r600_src[3]; struct r600_bc_alu alu; + int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask); unsigned i; int r; @@ -2075,8 +1912,40 @@ static int tgsi_lrp(struct r600_shader_ctx *ctx) r = tgsi_split_literal_constant(ctx, r600_src); if (r) return r; + + /* optimize if it's just an equal balance */ + if(r600_src[0].sel == V_SQ_ALU_SRC_0_5) { + for (i = 0; i < lasti + 1; i++) { + if (!(inst->Dst[0].Register.WriteMask & (1 << i))) + continue; + + memset(&alu, 0, sizeof(struct r600_bc_alu)); + alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD); + alu.src[0] = r600_src[1]; + alu.src[0].chan = tgsi_chan(&inst->Src[1], i); + alu.src[1] = r600_src[2]; + alu.src[1].chan = tgsi_chan(&inst->Src[2], i); + alu.omod = 3; + r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); + if (r) + return r; + + alu.dst.chan = i; + if (i == lasti) { + alu.last = 1; + } + r = r600_bc_add_alu(ctx->bc, &alu); + if (r) + return r; + } + return 0; + } + /* 1 - src0 */ - for (i = 0; i < 4; i++) { + for (i = 0; i < lasti + 1; i++) { + if (!(inst->Dst[0].Register.WriteMask & (1 << i))) + continue; + memset(&alu, 0, sizeof(struct r600_bc_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD); alu.src[0].sel = V_SQ_ALU_SRC_1; @@ -2086,7 +1955,7 @@ static int tgsi_lrp(struct r600_shader_ctx *ctx) alu.src[1].neg = 1; alu.dst.sel = ctx->temp_reg; alu.dst.chan = i; - if (i == 3) { + if (i == lasti) { alu.last = 1; } alu.dst.write = 1; @@ -2094,12 +1963,12 @@ static int tgsi_lrp(struct r600_shader_ctx *ctx) if (r) return r; } - r = r600_bc_add_literal(ctx->bc, ctx->value); - if (r) - return r; /* (1 - src0) * src2 */ - for (i = 0; i < 4; i++) { + for (i = 0; i < lasti + 1; i++) { + if (!(inst->Dst[0].Register.WriteMask & (1 << i))) + continue; + memset(&alu, 0, sizeof(struct r600_bc_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL); alu.src[0].sel = ctx->temp_reg; @@ -2108,7 +1977,7 @@ static int tgsi_lrp(struct r600_shader_ctx *ctx) alu.src[1].chan = tgsi_chan(&inst->Src[2], i); alu.dst.sel = ctx->temp_reg; alu.dst.chan = i; - if (i == 3) { + if (i == lasti) { alu.last = 1; } alu.dst.write = 1; @@ -2116,12 +1985,12 @@ static int tgsi_lrp(struct r600_shader_ctx *ctx) if (r) return r; } - r = r600_bc_add_literal(ctx->bc, ctx->value); - if (r) - return r; /* src0 * src1 + (1 - src0) * src2 */ - for (i = 0; i < 4; i++) { + for (i = 0; i < lasti + 1; i++) { + if (!(inst->Dst[0].Register.WriteMask & (1 << i))) + continue; + memset(&alu, 0, sizeof(struct r600_bc_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD); alu.is_op3 = 1; @@ -2131,16 +2000,20 @@ static int tgsi_lrp(struct r600_shader_ctx *ctx) alu.src[1].chan = tgsi_chan(&inst->Src[1], i); alu.src[2].sel = ctx->temp_reg; alu.src[2].chan = i; - alu.dst.sel = ctx->temp_reg; + + r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); + if (r) + return r; + alu.dst.chan = i; - if (i == 3) { + if (i == lasti) { alu.last = 1; } r = r600_bc_add_alu(ctx->bc, &alu); if (r) return r; } - return tgsi_helper_copy(ctx, inst); + return 0; } static int tgsi_cmp(struct r600_shader_ctx *ctx) @@ -2148,8 +2021,8 @@ static int tgsi_cmp(struct r600_shader_ctx *ctx) struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; struct r600_bc_alu_src r600_src[3]; struct r600_bc_alu alu; - int use_temp = 0; int i, r; + int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask); r = tgsi_split_constant(ctx, r600_src); if (r) @@ -2158,10 +2031,10 @@ static int tgsi_cmp(struct r600_shader_ctx *ctx) if (r) return r; - if (inst->Dst[0].Register.WriteMask != 0xf) - use_temp = 1; + for (i = 0; i < lasti + 1; i++) { + if (!(inst->Dst[0].Register.WriteMask & (1 << i))) + continue; - for (i = 0; i < 4; i++) { memset(&alu, 0, sizeof(struct r600_bc_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGE); alu.src[0] = r600_src[0]; @@ -2173,24 +2046,19 @@ static int tgsi_cmp(struct r600_shader_ctx *ctx) alu.src[2] = r600_src[1]; alu.src[2].chan = tgsi_chan(&inst->Src[1], i); - if (use_temp) - alu.dst.sel = ctx->temp_reg; - else { - r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); - if (r) - return r; - } + r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); + if (r) + return r; + alu.dst.chan = i; alu.dst.write = 1; alu.is_op3 = 1; - if (i == 3) + if (i == lasti) alu.last = 1; r = r600_bc_add_alu(ctx->bc, &alu); if (r) return r; } - if (use_temp) - return tgsi_helper_copy(ctx, inst); return 0; } @@ -2257,10 +2125,6 @@ static int tgsi_xpd(struct r600_shader_ctx *ctx) r = r600_bc_add_alu(ctx->bc, &alu); if (r) return r; - - r = r600_bc_add_literal(ctx->bc, ctx->value); - if (r) - return r; } for (i = 0; i < 4; i++) { @@ -2318,10 +2182,6 @@ static int tgsi_xpd(struct r600_shader_ctx *ctx) r = r600_bc_add_alu(ctx->bc, &alu); if (r) return r; - - r = r600_bc_add_literal(ctx->bc, ctx->value); - if (r) - return r; } if (use_temp) return tgsi_helper_copy(ctx, inst); @@ -2354,10 +2214,6 @@ static int tgsi_exp(struct r600_shader_ctx *ctx) if (r) return r; - r = r600_bc_add_literal(ctx->bc, ctx->value); - if (r) - return r; - alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE); alu.src[0].sel = ctx->temp_reg; alu.src[0].chan = 0; @@ -2369,10 +2225,6 @@ static int tgsi_exp(struct r600_shader_ctx *ctx) r = r600_bc_add_alu(ctx->bc, &alu); if (r) return r; - - r = r600_bc_add_literal(ctx->bc, ctx->value); - if (r) - return r; } /* result.y = tmp - floor(tmp); */ @@ -2398,9 +2250,6 @@ static int tgsi_exp(struct r600_shader_ctx *ctx) r = r600_bc_add_alu(ctx->bc, &alu); if (r) return r; - r = r600_bc_add_literal(ctx->bc, ctx->value); - if (r) - return r; } /* result.z = RoughApprox2ToX(tmp);*/ @@ -2421,9 +2270,6 @@ static int tgsi_exp(struct r600_shader_ctx *ctx) r = r600_bc_add_alu(ctx->bc, &alu); if (r) return r; - r = r600_bc_add_literal(ctx->bc, ctx->value); - if (r) - return r; } /* result.w = 1.0;*/ @@ -2441,9 +2287,6 @@ static int tgsi_exp(struct r600_shader_ctx *ctx) r = r600_bc_add_alu(ctx->bc, &alu); if (r) return r; - r = r600_bc_add_literal(ctx->bc, ctx->value); - if (r) - return r; } return tgsi_helper_copy(ctx, inst); } @@ -2473,10 +2316,6 @@ static int tgsi_log(struct r600_shader_ctx *ctx) if (r) return r; - r = r600_bc_add_literal(ctx->bc, ctx->value); - if (r) - return r; - alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR); alu.src[0].sel = ctx->temp_reg; alu.src[0].chan = 0; @@ -2489,10 +2328,6 @@ static int tgsi_log(struct r600_shader_ctx *ctx) r = r600_bc_add_alu(ctx->bc, &alu); if (r) return r; - - r = r600_bc_add_literal(ctx->bc, ctx->value); - if (r) - return r; } /* result.y = src.x / (2 ^ floor(log2(src.x))); */ @@ -2515,10 +2350,6 @@ static int tgsi_log(struct r600_shader_ctx *ctx) if (r) return r; - r = r600_bc_add_literal(ctx->bc, ctx->value); - if (r) - return r; - memset(&alu, 0, sizeof(struct r600_bc_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR); @@ -2534,10 +2365,6 @@ static int tgsi_log(struct r600_shader_ctx *ctx) if (r) return r; - r = r600_bc_add_literal(ctx->bc, ctx->value); - if (r) - return r; - memset(&alu, 0, sizeof(struct r600_bc_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE); @@ -2553,10 +2380,6 @@ static int tgsi_log(struct r600_shader_ctx *ctx) if (r) return r; - r = r600_bc_add_literal(ctx->bc, ctx->value); - if (r) - return r; - memset(&alu, 0, sizeof(struct r600_bc_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE); @@ -2572,10 +2395,6 @@ static int tgsi_log(struct r600_shader_ctx *ctx) if (r) return r; - r = r600_bc_add_literal(ctx->bc, ctx->value); - if (r) - return r; - memset(&alu, 0, sizeof(struct r600_bc_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL); @@ -2597,10 +2416,6 @@ static int tgsi_log(struct r600_shader_ctx *ctx) r = r600_bc_add_alu(ctx->bc, &alu); if (r) return r; - - r = r600_bc_add_literal(ctx->bc, ctx->value); - if (r) - return r; } /* result.z = log2(src);*/ @@ -2622,10 +2437,6 @@ static int tgsi_log(struct r600_shader_ctx *ctx) r = r600_bc_add_alu(ctx->bc, &alu); if (r) return r; - - r = r600_bc_add_literal(ctx->bc, ctx->value); - if (r) - return r; } /* result.w = 1.0; */ @@ -2644,10 +2455,6 @@ static int tgsi_log(struct r600_shader_ctx *ctx) r = r600_bc_add_alu(ctx->bc, &alu); if (r) return r; - - r = r600_bc_add_literal(ctx->bc, ctx->value); - if (r) - return r; } return tgsi_helper_copy(ctx, inst); @@ -2802,8 +2609,25 @@ static int emit_logic_pred(struct r600_shader_ctx *ctx, int opcode) static int pops(struct r600_shader_ctx *ctx, int pops) { - r600_bc_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_POP)); - ctx->bc->cf_last->pop_count = pops; + int alu_pop = 3; + if (ctx->bc->cf_last) { + if (ctx->bc->cf_last->inst == CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU) << 3) + alu_pop = 0; + else if (ctx->bc->cf_last->inst == CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP_AFTER) << 3) + alu_pop = 1; + } + alu_pop += pops; + if (alu_pop == 1) { + ctx->bc->cf_last->inst = CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP_AFTER) << 3; + ctx->bc->force_add_cf = 1; + } else if (alu_pop == 2) { + ctx->bc->cf_last->inst = CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP2_AFTER) << 3; + ctx->bc->force_add_cf = 1; + } else { + r600_bc_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_POP)); + ctx->bc->cf_last->pop_count = pops; + ctx->bc->cf_last->cf_addr = ctx->bc->cf_last->id + 2; + } return 0; } diff --git a/src/gallium/drivers/r600/r600_shader.h b/src/gallium/drivers/r600/r600_shader.h index cd108da4915..935dd6fe3ab 100644 --- a/src/gallium/drivers/r600/r600_shader.h +++ b/src/gallium/drivers/r600/r600_shader.h @@ -38,7 +38,6 @@ struct r600_shader_io { struct r600_shader { unsigned processor_type; struct r600_bc bc; - boolean flat_shade; unsigned ninput; unsigned noutput; unsigned nlds; @@ -46,9 +45,8 @@ struct r600_shader { struct r600_shader_io output[32]; enum radeon_family family; boolean uses_kill; - struct r600_bc bc_fetch; }; -int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *shader); +int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *shader, u32 **literals); #endif diff --git a/src/gallium/drivers/r600/r600_sq.h b/src/gallium/drivers/r600/r600_sq.h index 0573e63dc82..56ed35e8b32 100644 --- a/src/gallium/drivers/r600/r600_sq.h +++ b/src/gallium/drivers/r600/r600_sq.h @@ -74,6 +74,10 @@ #define S_SQ_CF_ALU_WORD0_KCACHE_MODE0(x) (((x) & 0x3) << 30) #define G_SQ_CF_ALU_WORD0_KCACHE_MODE0(x) (((x) >> 30) & 0x3) #define C_SQ_CF_ALU_WORD0_KCACHE_MODE0 0x3FFFFFFF +#define V_SQ_CF_KCACHE_NOP 0x00000000 +#define V_SQ_CF_KCACHE_LOCK_1 0x00000001 +#define V_SQ_CF_KCACHE_LOCK_2 0x00000002 +#define V_SQ_CF_KCACHE_LOCK_LOOP_INDEX 0x00000003 #define P_SQ_CF_ALU_WORD1 #define S_SQ_CF_ALU_WORD1_KCACHE_MODE1(x) (((x) & 0x3) << 0) #define G_SQ_CF_ALU_WORD1_KCACHE_MODE1(x) (((x) >> 0) & 0x3) @@ -187,6 +191,8 @@ #define V_SQ_ALU_SRC_M_1_INT 0x000000FB #define V_SQ_ALU_SRC_0_5 0x000000FC #define V_SQ_ALU_SRC_LITERAL 0x000000FD +#define V_SQ_ALU_SRC_PV 0x000000FE +#define V_SQ_ALU_SRC_PS 0x000000FF #define V_SQ_ALU_SRC_PARAM_BASE 0x000001C0 #define S_SQ_ALU_WORD0_SRC0_REL(x) (((x) & 0x1) << 9) #define G_SQ_ALU_WORD0_SRC0_REL(x) (((x) >> 9) & 0x1) diff --git a/src/gallium/drivers/r600/r600_state.c b/src/gallium/drivers/r600/r600_state.c index bf4ca057d28..96b02d72b94 100644 --- a/src/gallium/drivers/r600/r600_state.c +++ b/src/gallium/drivers/r600/r600_state.c @@ -36,7 +36,6 @@ #include <util/u_pack_color.h> #include <util/u_memory.h> #include <util/u_inlines.h> -#include <util/u_upload_mgr.h> #include <util/u_framebuffer.h> #include <pipebuffer/pb_buffer.h> #include "r600.h" @@ -46,14 +45,164 @@ #include "r600_pipe.h" #include "r600_state_inlines.h" +void r600_polygon_offset_update(struct r600_pipe_context *rctx) +{ + struct r600_pipe_state state; + + state.id = R600_PIPE_STATE_POLYGON_OFFSET; + state.nregs = 0; + if (rctx->rasterizer && rctx->framebuffer.zsbuf) { + float offset_units = rctx->rasterizer->offset_units; + unsigned offset_db_fmt_cntl = 0, depth; + + switch (rctx->framebuffer.zsbuf->texture->format) { + case PIPE_FORMAT_Z24X8_UNORM: + case PIPE_FORMAT_Z24_UNORM_S8_USCALED: + depth = -24; + offset_units *= 2.0f; + break; + case PIPE_FORMAT_Z32_FLOAT: + depth = -23; + offset_units *= 1.0f; + offset_db_fmt_cntl |= S_028DF8_POLY_OFFSET_DB_IS_FLOAT_FMT(1); + break; + case PIPE_FORMAT_Z16_UNORM: + depth = -16; + offset_units *= 4.0f; + break; + default: + return; + } + /* FIXME some of those reg can be computed with cso */ + offset_db_fmt_cntl |= S_028DF8_POLY_OFFSET_NEG_NUM_DB_BITS(depth); + r600_pipe_state_add_reg(&state, + R_028E00_PA_SU_POLY_OFFSET_FRONT_SCALE, + fui(rctx->rasterizer->offset_scale), 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(&state, + R_028E04_PA_SU_POLY_OFFSET_FRONT_OFFSET, + fui(offset_units), 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(&state, + R_028E08_PA_SU_POLY_OFFSET_BACK_SCALE, + fui(rctx->rasterizer->offset_scale), 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(&state, + R_028E0C_PA_SU_POLY_OFFSET_BACK_OFFSET, + fui(offset_units), 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(&state, + R_028DF8_PA_SU_POLY_OFFSET_DB_FMT_CNTL, + offset_db_fmt_cntl, 0xFFFFFFFF, NULL); + r600_context_pipe_state_set(&rctx->ctx, &state); + } +} + +/* FIXME optimize away spi update when it's not needed */ +static void r600_spi_update(struct r600_pipe_context *rctx) +{ + struct r600_pipe_shader *shader = rctx->ps_shader; + struct r600_pipe_state rstate; + struct r600_shader *rshader = &shader->shader; + unsigned i, tmp; + + rstate.nregs = 0; + for (i = 0; i < rshader->ninput; i++) { + tmp = S_028644_SEMANTIC(r600_find_vs_semantic_index(&rctx->vs_shader->shader, rshader, i)); + if (rshader->input[i].centroid) + tmp |= S_028644_SEL_CENTROID(1); + if (rshader->input[i].interpolate == TGSI_INTERPOLATE_LINEAR) + tmp |= S_028644_SEL_LINEAR(1); + + if (rshader->input[i].name == TGSI_SEMANTIC_COLOR || + rshader->input[i].name == TGSI_SEMANTIC_BCOLOR || + rshader->input[i].name == TGSI_SEMANTIC_POSITION) { + tmp |= S_028644_FLAT_SHADE(rctx->flatshade); + } + if (rshader->input[i].name == TGSI_SEMANTIC_GENERIC && + rctx->sprite_coord_enable & (1 << rshader->input[i].sid)) { + tmp |= S_028644_PT_SPRITE_TEX(1); + } + r600_pipe_state_add_reg(&rstate, R_028644_SPI_PS_INPUT_CNTL_0 + i * 4, tmp, 0xFFFFFFFF, NULL); + } + r600_context_pipe_state_set(&rctx->ctx, &rstate); +} + +void r600_vertex_buffer_update(struct r600_pipe_context *rctx) +{ + struct r600_pipe_state *rstate; + struct r600_resource *rbuffer; + struct pipe_vertex_buffer *vertex_buffer; + unsigned i, offset; + + /* we don't update until we know vertex elements */ + if (rctx->vertex_elements == NULL || !rctx->nvertex_buffer) + return; + + if (rctx->vertex_elements->incompatible_layout) { + /* translate rebind new vertex elements so + * return once translated + */ + r600_begin_vertex_translate(rctx); + return; + } + + if (rctx->any_user_vbs) { + r600_upload_user_buffers(rctx); + rctx->any_user_vbs = FALSE; + } + + if (rctx->vertex_elements->vbuffer_need_offset) { + /* one resource per vertex elements */ + rctx->nvs_resource = rctx->vertex_elements->count; + } else { + /* bind vertex buffer once */ + rctx->nvs_resource = rctx->nvertex_buffer; + } + + for (i = 0 ; i < rctx->nvs_resource; i++) { + rstate = &rctx->vs_resource[i]; + rstate->id = R600_PIPE_STATE_RESOURCE; + rstate->nregs = 0; + + if (rctx->vertex_elements->vbuffer_need_offset) { + /* one resource per vertex elements */ + unsigned vbuffer_index; + vbuffer_index = rctx->vertex_elements->elements[i].vertex_buffer_index; + vertex_buffer = &rctx->vertex_buffer[vbuffer_index]; + rbuffer = (struct r600_resource*)vertex_buffer->buffer; + offset = rctx->vertex_elements->vbuffer_offset[i]; + } else { + /* bind vertex buffer once */ + vertex_buffer = &rctx->vertex_buffer[i]; + rbuffer = (struct r600_resource*)vertex_buffer->buffer; + offset = 0; + } + if (vertex_buffer == NULL || rbuffer == NULL) + continue; + offset += vertex_buffer->buffer_offset + r600_bo_offset(rbuffer->bo); + + r600_pipe_state_add_reg(rstate, R_038000_RESOURCE0_WORD0, + offset, 0xFFFFFFFF, rbuffer->bo); + r600_pipe_state_add_reg(rstate, R_038004_RESOURCE0_WORD1, + rbuffer->bo_size - offset - 1, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_038008_RESOURCE0_WORD2, + S_038008_STRIDE(vertex_buffer->stride), + 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_03800C_RESOURCE0_WORD3, + 0x00000000, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_038010_RESOURCE0_WORD4, + 0x00000000, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_038014_RESOURCE0_WORD5, + 0x00000000, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_038018_RESOURCE0_WORD6, + 0xC0000000, 0xFFFFFFFF, NULL); + r600_context_pipe_state_set_fs_resource(&rctx->ctx, rstate, i); + } +} + static void r600_draw_common(struct r600_drawl *draw) { struct r600_pipe_context *rctx = (struct r600_pipe_context *)draw->ctx; - struct r600_pipe_state *rstate; struct r600_resource *rbuffer; - unsigned i, j, offset, prim; + unsigned prim; u32 vgt_dma_index_type, vgt_draw_initiator, mask; - struct pipe_vertex_buffer *vertex_buffer; struct r600_draw rdraw; struct r600_pipe_state vgt; @@ -76,42 +225,23 @@ static void r600_draw_common(struct r600_drawl *draw) } if (r600_conv_pipe_prim(draw->mode, &prim)) return; - - - /* rebuild vertex shader if input format changed */ - if (r600_pipe_shader_update(&rctx->context, rctx->vs_shader)) + if (unlikely(rctx->ps_shader == NULL)) { + R600_ERR("missing vertex shader\n"); return; - if (r600_pipe_shader_update(&rctx->context, rctx->ps_shader)) + } + if (unlikely(rctx->vs_shader == NULL)) { + R600_ERR("missing vertex shader\n"); + return; + } + /* there should be enough input */ + if (rctx->vertex_elements->count < rctx->vs_shader->shader.bc.nresource) { + R600_ERR("%d resources provided, expecting %d\n", + rctx->vertex_elements->count, rctx->vs_shader->shader.bc.nresource); return; - - for (i = 0 ; i < rctx->vertex_elements->count; i++) { - uint32_t word2, format; - - rstate = &rctx->vs_resource[i]; - rstate->id = R600_PIPE_STATE_RESOURCE; - rstate->nregs = 0; - - j = rctx->vertex_elements->elements[i].vertex_buffer_index; - vertex_buffer = &rctx->vertex_buffer[j]; - rbuffer = (struct r600_resource*)vertex_buffer->buffer; - offset = rctx->vertex_elements->elements[i].src_offset + - vertex_buffer->buffer_offset + - r600_bo_offset(rbuffer->bo); - - format = r600_translate_vertex_data_type(rctx->vertex_elements->hw_format[i]); - - word2 = format | S_038008_STRIDE(vertex_buffer->stride); - - r600_pipe_state_add_reg(rstate, R_038000_RESOURCE0_WORD0, offset, 0xFFFFFFFF, rbuffer->bo); - r600_pipe_state_add_reg(rstate, R_038004_RESOURCE0_WORD1, rbuffer->size - offset - 1, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_038008_RESOURCE0_WORD2, word2, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_03800C_RESOURCE0_WORD3, 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_038010_RESOURCE0_WORD4, 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_038014_RESOURCE0_WORD5, 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_038018_RESOURCE0_WORD6, 0xC0000000, 0xFFFFFFFF, NULL); - r600_context_pipe_state_set_fs_resource(&rctx->ctx, rstate, i); } + r600_spi_update(rctx); + mask = 0; for (int i = 0; i < rctx->framebuffer.nr_cbufs; i++) { mask |= (0xF << (i * 4)); @@ -126,46 +256,6 @@ static void r600_draw_common(struct r600_drawl *draw) r600_pipe_state_add_reg(&vgt, R_028238_CB_TARGET_MASK, rctx->cb_target_mask & mask, 0xFFFFFFFF, NULL); r600_pipe_state_add_reg(&vgt, R_03CFF0_SQ_VTX_BASE_VTX_LOC, 0, 0xFFFFFFFF, NULL); r600_pipe_state_add_reg(&vgt, R_03CFF4_SQ_VTX_START_INST_LOC, 0, 0xFFFFFFFF, NULL); - /* build late state */ - if (rctx->rasterizer && rctx->framebuffer.zsbuf) { - float offset_units = rctx->rasterizer->offset_units; - unsigned offset_db_fmt_cntl = 0, depth; - - switch (rctx->framebuffer.zsbuf->texture->format) { - case PIPE_FORMAT_Z24X8_UNORM: - case PIPE_FORMAT_Z24_UNORM_S8_USCALED: - depth = -24; - offset_units *= 2.0f; - break; - case PIPE_FORMAT_Z32_FLOAT: - depth = -23; - offset_units *= 1.0f; - offset_db_fmt_cntl |= S_028DF8_POLY_OFFSET_DB_IS_FLOAT_FMT(1); - break; - case PIPE_FORMAT_Z16_UNORM: - depth = -16; - offset_units *= 4.0f; - break; - default: - return; - } - offset_db_fmt_cntl |= S_028DF8_POLY_OFFSET_NEG_NUM_DB_BITS(depth); - r600_pipe_state_add_reg(&vgt, - R_028E00_PA_SU_POLY_OFFSET_FRONT_SCALE, - fui(rctx->rasterizer->offset_scale), 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(&vgt, - R_028E04_PA_SU_POLY_OFFSET_FRONT_OFFSET, - fui(offset_units), 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(&vgt, - R_028E08_PA_SU_POLY_OFFSET_BACK_SCALE, - fui(rctx->rasterizer->offset_scale), 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(&vgt, - R_028E0C_PA_SU_POLY_OFFSET_BACK_OFFSET, - fui(offset_units), 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(&vgt, - R_028DF8_PA_SU_POLY_OFFSET_DB_FMT_CNTL, - offset_db_fmt_cntl, 0xFFFFFFFF, NULL); - } r600_context_pipe_state_set(&rctx->ctx, &vgt); rdraw.vgt_num_indices = draw->count; @@ -185,17 +275,7 @@ void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info) { struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; struct r600_drawl draw; - boolean translate = FALSE; - if (rctx->vertex_elements->incompatible_layout) { - r600_begin_vertex_translate(rctx); - translate = TRUE; - } - - if (rctx->any_user_vbs) { - r600_upload_user_buffers(rctx); - rctx->any_user_vbs = FALSE; - } memset(&draw, 0, sizeof(struct r600_drawl)); draw.ctx = ctx; draw.mode = info->mode; @@ -226,9 +306,6 @@ void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info) } r600_draw_common(&draw); - if (translate) - r600_end_vertex_translate(rctx); - pipe_resource_reference(&draw.index_buffer, NULL); } @@ -603,9 +680,9 @@ static struct pipe_sampler_view *r600_create_sampler_view(struct pipe_context *c word4 | S_038010_NUM_FORMAT_ALL(V_038010_SQ_NUM_FORMAT_NORM) | S_038010_SRF_MODE_ALL(V_038010_SFR_MODE_NO_ZERO) | S_038010_REQUEST_SIZE(1) | - S_038010_BASE_LEVEL(state->first_level), 0xFFFFFFFF, NULL); + S_038010_BASE_LEVEL(state->u.tex.first_level), 0xFFFFFFFF, NULL); r600_pipe_state_add_reg(rstate, R_038014_RESOURCE0_WORD5, - S_038014_LAST_LEVEL(state->last_level) | + S_038014_LAST_LEVEL(state->u.tex.last_level) | S_038014_BASE_ARRAY(0) | S_038014_LAST_ARRAY(0), 0xFFFFFFFF, NULL); r600_pipe_state_add_reg(rstate, R_038018_RESOURCE0_WORD6, @@ -824,10 +901,11 @@ static void r600_cb(struct r600_pipe_context *rctx, struct r600_pipe_state *rsta struct r600_resource_texture *rtex; struct r600_resource *rbuffer; struct r600_surface *surf; - unsigned level = state->cbufs[cb]->level; + unsigned level = state->cbufs[cb]->u.tex.level; unsigned pitch, slice; unsigned color_info; unsigned format, swap, ntype; + unsigned offset; const struct util_format_description *desc; struct r600_bo *bo[3]; @@ -838,6 +916,9 @@ static void r600_cb(struct r600_pipe_context *rctx, struct r600_pipe_state *rsta bo[1] = rbuffer->bo; bo[2] = rbuffer->bo; + /* XXX quite sure for dx10+ hw don't need any offset hacks */ + offset = r600_texture_get_offset((struct r600_resource_texture *)state->cbufs[cb]->texture, + level, state->cbufs[cb]->u.tex.first_layer); pitch = rtex->pitch_in_pixels[level] / 8 - 1; slice = rtex->pitch_in_pixels[level] * surf->aligned_height / 64 - 1; ntype = 0; @@ -857,7 +938,7 @@ static void r600_cb(struct r600_pipe_context *rctx, struct r600_pipe_state *rsta r600_pipe_state_add_reg(rstate, R_028040_CB_COLOR0_BASE + cb * 4, - (state->cbufs[cb]->offset + r600_bo_offset(bo[0])) >> 8, 0xFFFFFFFF, bo[0]); + (offset + r600_bo_offset(bo[0])) >> 8, 0xFFFFFFFF, bo[0]); r600_pipe_state_add_reg(rstate, R_0280A0_CB_COLOR0_INFO + cb * 4, color_info, 0xFFFFFFFF, bo[0]); @@ -888,11 +969,12 @@ static void r600_db(struct r600_pipe_context *rctx, struct r600_pipe_state *rsta struct r600_surface *surf; unsigned level; unsigned pitch, slice, format; + unsigned offset; if (state->zsbuf == NULL) return; - level = state->zsbuf->level; + level = state->zsbuf->u.tex.level; surf = (struct r600_surface *)state->zsbuf; rtex = (struct r600_resource_texture*)state->zsbuf->texture; @@ -902,12 +984,15 @@ static void r600_db(struct r600_pipe_context *rctx, struct r600_pipe_state *rsta rtex->depth = 1; rbuffer = &rtex->resource; + /* XXX quite sure for dx10+ hw don't need any offset hacks */ + offset = r600_texture_get_offset((struct r600_resource_texture *)state->zsbuf->texture, + level, state->zsbuf->u.tex.first_layer); pitch = rtex->pitch_in_pixels[level] / 8 - 1; slice = rtex->pitch_in_pixels[level] * surf->aligned_height / 64 - 1; format = r600_translate_dbformat(state->zsbuf->texture->format); r600_pipe_state_add_reg(rstate, R_02800C_DB_DEPTH_BASE, - (state->zsbuf->offset + r600_bo_offset(rbuffer->bo)) >> 8, 0xFFFFFFFF, rbuffer->bo); + (offset + r600_bo_offset(rbuffer->bo)) >> 8, 0xFFFFFFFF, rbuffer->bo); r600_pipe_state_add_reg(rstate, R_028000_DB_DEPTH_SIZE, S_028000_PITCH_TILE_MAX(pitch) | S_028000_SLICE_TILE_MAX(slice), 0xFFFFFFFF, NULL); @@ -934,8 +1019,6 @@ static void r600_set_framebuffer_state(struct pipe_context *ctx, util_copy_framebuffer_state(&rctx->framebuffer, state); - rctx->pframebuffer = &rctx->framebuffer; - /* build states */ for (int i = 0; i < state->nr_cbufs; i++) { r600_cb(rctx, rstate, state, i); @@ -1015,6 +1098,10 @@ static void r600_set_framebuffer_state(struct pipe_context *ctx, free(rctx->states[R600_PIPE_STATE_FRAMEBUFFER]); rctx->states[R600_PIPE_STATE_FRAMEBUFFER] = rstate; r600_context_pipe_state_set(&rctx->ctx, rstate); + + if (state->zsbuf) { + r600_polygon_offset_update(rctx); + } } static void r600_set_constant_buffer(struct pipe_context *ctx, uint shader, uint index, @@ -1022,6 +1109,7 @@ static void r600_set_constant_buffer(struct pipe_context *ctx, uint shader, uint { struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; struct r600_resource *rbuffer = (struct r600_resource*)buffer; + uint32_t offset; /* Note that the state tracker can unbind constant buffers by * passing NULL here. @@ -1030,6 +1118,8 @@ static void r600_set_constant_buffer(struct pipe_context *ctx, uint shader, uint return; } + r600_upload_const_buffer(rctx, buffer, &offset); + switch (shader) { case PIPE_SHADER_VERTEX: rctx->vs_const_buffer.nregs = 0; @@ -1039,7 +1129,7 @@ static void r600_set_constant_buffer(struct pipe_context *ctx, uint shader, uint 0xFFFFFFFF, NULL); r600_pipe_state_add_reg(&rctx->vs_const_buffer, R_028980_ALU_CONST_CACHE_VS_0, - r600_bo_offset(rbuffer->bo) >> 8, 0xFFFFFFFF, rbuffer->bo); + (r600_bo_offset(rbuffer->bo) + offset) >> 8, 0xFFFFFFFF, rbuffer->bo); r600_context_pipe_state_set(&rctx->ctx, &rctx->vs_const_buffer); break; case PIPE_SHADER_FRAGMENT: @@ -1050,7 +1140,7 @@ static void r600_set_constant_buffer(struct pipe_context *ctx, uint shader, uint 0xFFFFFFFF, NULL); r600_pipe_state_add_reg(&rctx->ps_const_buffer, R_028940_ALU_CONST_CACHE_PS_0, - r600_bo_offset(rbuffer->bo) >> 8, 0xFFFFFFFF, rbuffer->bo); + (r600_bo_offset(rbuffer->bo) + offset) >> 8, 0xFFFFFFFF, rbuffer->bo); r600_context_pipe_state_set(&rctx->ctx, &rctx->ps_const_buffer); break; default: diff --git a/src/gallium/drivers/r600/r600_state_common.c b/src/gallium/drivers/r600/r600_state_common.c index 55bc5d0d22b..3603376f738 100644 --- a/src/gallium/drivers/r600/r600_state_common.c +++ b/src/gallium/drivers/r600/r600_state_common.c @@ -58,6 +58,12 @@ void r600_bind_rs_state(struct pipe_context *ctx, void *state) rctx->states[rs->rstate.id] = &rs->rstate; r600_context_pipe_state_set(&rctx->ctx, &rs->rstate); + + if (rctx->family >= CHIP_CEDAR) { + evergreen_polygon_offset_update(rctx); + } else { + r600_polygon_offset_update(rctx); + } } void r600_delete_rs_state(struct pipe_context *ctx, void *state) @@ -113,8 +119,23 @@ void r600_bind_vertex_elements(struct pipe_context *ctx, void *state) struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; struct r600_vertex_element *v = (struct r600_vertex_element*)state; + /* delete previous translated vertex elements */ + if (rctx->tran.new_velems) { + r600_end_vertex_translate(rctx); + } + rctx->vertex_elements = v; if (v) { + rctx->states[v->rstate.id] = &v->rstate; + r600_context_pipe_state_set(&rctx->ctx, &v->rstate); + if (rctx->family >= CHIP_CEDAR) { + evergreen_vertex_buffer_update(rctx); + } else { + r600_vertex_buffer_update(rctx); + } + } + + if (v) { // rctx->vs_rebuild = TRUE; } } @@ -122,11 +143,16 @@ void r600_bind_vertex_elements(struct pipe_context *ctx, void *state) void r600_delete_vertex_element(struct pipe_context *ctx, void *state) { struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; + struct r600_vertex_element *v = (struct r600_vertex_element*)state; - FREE(state); - + if (rctx->states[v->rstate.id] == &v->rstate) { + rctx->states[v->rstate.id] = NULL; + } if (rctx->vertex_elements == state) rctx->vertex_elements = NULL; + + r600_bo_reference(rctx->radeon, &v->fetch_shader, NULL); + FREE(state); } @@ -153,8 +179,16 @@ void r600_set_vertex_buffers(struct pipe_context *ctx, unsigned count, struct pipe_vertex_buffer *vbo; unsigned max_index = (unsigned)-1; - for (int i = 0; i < rctx->nvertex_buffer; i++) { - pipe_resource_reference(&rctx->vertex_buffer[i].buffer, NULL); + if (rctx->family >= CHIP_CEDAR) { + for (int i = 0; i < rctx->nvertex_buffer; i++) { + pipe_resource_reference(&rctx->vertex_buffer[i].buffer, NULL); + evergreen_context_pipe_state_set_fs_resource(&rctx->ctx, NULL, i); + } + } else { + for (int i = 0; i < rctx->nvertex_buffer; i++) { + pipe_resource_reference(&rctx->vertex_buffer[i].buffer, NULL); + r600_context_pipe_state_set_fs_resource(&rctx->ctx, NULL, i); + } } memcpy(rctx->vertex_buffer, buffers, sizeof(struct pipe_vertex_buffer) * count); @@ -162,20 +196,29 @@ void r600_set_vertex_buffers(struct pipe_context *ctx, unsigned count, vbo = (struct pipe_vertex_buffer*)&buffers[i]; rctx->vertex_buffer[i].buffer = NULL; + if (buffers[i].buffer == NULL) + continue; if (r600_buffer_is_user_buffer(buffers[i].buffer)) rctx->any_user_vbs = TRUE; pipe_resource_reference(&rctx->vertex_buffer[i].buffer, buffers[i].buffer); + /* The stride of zero means we will be fetching only the first + * vertex, so don't care about max_index. */ + if (!vbo->stride) + continue; + if (vbo->max_index == ~0) { - if (!vbo->stride) - vbo->max_index = 1; - else - vbo->max_index = (vbo->buffer->width0 - vbo->buffer_offset) / vbo->stride; + vbo->max_index = (vbo->buffer->width0 - vbo->buffer_offset) / vbo->stride; } max_index = MIN2(vbo->max_index, max_index); } rctx->nvertex_buffer = count; rctx->vb_max_index = max_index; + if (rctx->family >= CHIP_CEDAR) { + evergreen_vertex_buffer_update(rctx); + } else { + r600_vertex_buffer_update(rctx); + } } @@ -186,9 +229,10 @@ void *r600_create_vertex_elements(struct pipe_context *ctx, unsigned count, const struct pipe_vertex_element *elements) { + struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; struct r600_vertex_element *v = CALLOC_STRUCT(r600_vertex_element); - int i; enum pipe_format *format; + int i; assert(count < 32); if (!v) @@ -210,12 +254,16 @@ void *r600_create_vertex_elements(struct pipe_context *ctx, } v->incompatible_layout = v->incompatible_layout || - v->elements[i].src_format != v->hw_format[i] || - v->elements[i].src_offset % 4 != 0; + v->elements[i].src_format != v->hw_format[i]; v->hw_format_size[i] = align(util_format_get_blocksize(v->hw_format[i]), 4); } + if (r600_vertex_elements_build_fetch_shader(rctx, v)) { + FREE(v); + return NULL; + } + return v; } @@ -238,6 +286,9 @@ void r600_bind_ps_shader(struct pipe_context *ctx, void *state) /* TODO delete old shader */ rctx->ps_shader = (struct r600_pipe_shader *)state; + if (state) { + r600_context_pipe_state_set(&rctx->ctx, &rctx->ps_shader->rstate); + } } void r600_bind_vs_shader(struct pipe_context *ctx, void *state) @@ -246,6 +297,9 @@ void r600_bind_vs_shader(struct pipe_context *ctx, void *state) /* TODO delete old shader */ rctx->vs_shader = (struct r600_pipe_shader *)state; + if (state) { + r600_context_pipe_state_set(&rctx->ctx, &rctx->vs_shader->rstate); + } } void r600_delete_ps_shader(struct pipe_context *ctx, void *state) diff --git a/src/gallium/drivers/r600/r600_state_inlines.h b/src/gallium/drivers/r600/r600_state_inlines.h index 781612af570..d994196e19d 100644 --- a/src/gallium/drivers/r600/r600_state_inlines.h +++ b/src/gallium/drivers/r600/r600_state_inlines.h @@ -305,6 +305,7 @@ static inline uint32_t r600_translate_colorswap(enum pipe_format format) return V_0280A0_SWAP_STD; case PIPE_FORMAT_L8A8_UNORM: + return V_0280A0_SWAP_ALT; case PIPE_FORMAT_R8G8_UNORM: return V_0280A0_SWAP_STD; diff --git a/src/gallium/drivers/r600/r600_texture.c b/src/gallium/drivers/r600/r600_texture.c index 8ecd434a43a..e2745624575 100644 --- a/src/gallium/drivers/r600/r600_texture.c +++ b/src/gallium/drivers/r600/r600_texture.c @@ -45,14 +45,10 @@ static void r600_copy_to_staging_texture(struct pipe_context *ctx, struct r600_t { struct pipe_transfer *transfer = (struct pipe_transfer*)rtransfer; struct pipe_resource *texture = transfer->resource; - struct pipe_subresource subdst; - subdst.face = 0; - subdst.level = 0; ctx->resource_copy_region(ctx, rtransfer->staging_texture, - subdst, 0, 0, 0, texture, transfer->sr, - transfer->box.x, transfer->box.y, transfer->box.z, - transfer->box.width, transfer->box.height); + 0, 0, 0, 0, texture, transfer->level, + &transfer->box); } @@ -61,34 +57,32 @@ static void r600_copy_from_staging_texture(struct pipe_context *ctx, struct r600 { struct pipe_transfer *transfer = (struct pipe_transfer*)rtransfer; struct pipe_resource *texture = transfer->resource; - struct pipe_subresource subsrc; - - subsrc.face = 0; - subsrc.level = 0; - ctx->resource_copy_region(ctx, texture, transfer->sr, + struct pipe_box sbox; + + sbox.x = sbox.y = sbox.z = 0; + sbox.width = transfer->box.width; + sbox.height = transfer->box.height; + /* XXX that might be wrong */ + sbox.depth = 1; + ctx->resource_copy_region(ctx, texture, transfer->level, transfer->box.x, transfer->box.y, transfer->box.z, - rtransfer->staging_texture, subsrc, - 0, 0, 0, - transfer->box.width, transfer->box.height); + rtransfer->staging_texture, + 0, &sbox); ctx->flush(ctx, 0, NULL); } -static unsigned r600_texture_get_offset(struct r600_resource_texture *rtex, - unsigned level, unsigned zslice, - unsigned face) +unsigned r600_texture_get_offset(struct r600_resource_texture *rtex, + unsigned level, unsigned layer) { unsigned offset = rtex->offset[level]; switch (rtex->resource.base.b.target) { case PIPE_TEXTURE_3D: - assert(face == 0); - return offset + zslice * rtex->layer_size[level]; case PIPE_TEXTURE_CUBE: - assert(zslice == 0); - return offset + face * rtex->layer_size[level]; + return offset + layer * rtex->layer_size[level]; default: - assert(zslice == 0 && face == 0); + assert(layer == 0); return offset; } } @@ -175,10 +169,7 @@ static unsigned r600_texture_get_stride(struct pipe_screen *screen, struct r600_resource_texture *rtex, unsigned level) { - struct r600_screen* rscreen = (struct r600_screen *)screen; struct pipe_resource *ptex = &rtex->resource.base.b; - struct radeon *radeon = (struct radeon *)screen->winsys; - enum chip_class chipc = r600_get_family_class(radeon); unsigned width, stride, tile_width; if (rtex->pitch_override) @@ -212,11 +203,10 @@ static unsigned r600_texture_get_nblocksy(struct pipe_screen *screen, } /* Get a width in pixels from a stride in bytes. */ -static unsigned pitch_to_width(enum pipe_format format, - unsigned pitch_in_bytes) +static unsigned pitch_to_width(enum pipe_format format, unsigned pitch_in_bytes) { - return (pitch_in_bytes / util_format_get_blocksize(format)) * - util_format_get_blockwidth(format); + return (pitch_in_bytes / util_format_get_blocksize(format)) * + util_format_get_blockwidth(format); } static void r600_texture_set_array_mode(struct pipe_screen *screen, @@ -335,12 +325,12 @@ struct pipe_resource *r600_texture_create(struct pipe_screen *screen, const struct pipe_resource *templ) { unsigned array_mode = 0; - static int force_tiling = -1; + static int force_tiling = -1; - /* Would like some magic "get_bool_option_once" routine. + /* Would like some magic "get_bool_option_once" routine. */ if (force_tiling == -1) - force_tiling = debug_get_bool_option("R600_FORCE_TILING", FALSE); + force_tiling = debug_get_bool_option("R600_FORCE_TILING", FALSE); if (force_tiling) { if (!(templ->flags & R600_RESOURCE_FLAG_TRANSFER) && @@ -371,8 +361,8 @@ static void r600_texture_destroy(struct pipe_screen *screen, } static boolean r600_texture_get_handle(struct pipe_screen* screen, - struct pipe_resource *ptex, - struct winsys_handle *whandle) + struct pipe_resource *ptex, + struct winsys_handle *whandle) { struct r600_resource_texture *rtex = (struct r600_resource_texture*)ptex; struct r600_resource *resource = &rtex->resource; @@ -382,36 +372,39 @@ static boolean r600_texture_get_handle(struct pipe_screen* screen, rtex->pitch_in_bytes[0], whandle); } -static struct pipe_surface *r600_get_tex_surface(struct pipe_screen *screen, +static struct pipe_surface *r600_create_surface(struct pipe_context *pipe, struct pipe_resource *texture, - unsigned face, unsigned level, - unsigned zslice, unsigned flags) + const struct pipe_surface *surf_tmpl) { struct r600_resource_texture *rtex = (struct r600_resource_texture*)texture; struct r600_surface *surface = CALLOC_STRUCT(r600_surface); - unsigned offset, tile_height; + unsigned tile_height; + unsigned level = surf_tmpl->u.tex.level; + assert(surf_tmpl->u.tex.first_layer == surf_tmpl->u.tex.last_layer); if (surface == NULL) return NULL; - offset = r600_texture_get_offset(rtex, level, zslice, face); + /* XXX no offset */ +/* offset = r600_texture_get_offset(rtex, level, surf_tmpl->u.tex.first_layer);*/ pipe_reference_init(&surface->base.reference, 1); pipe_resource_reference(&surface->base.texture, texture); - surface->base.format = texture->format; + surface->base.context = pipe; + surface->base.format = surf_tmpl->format; surface->base.width = mip_minify(texture->width0, level); surface->base.height = mip_minify(texture->height0, level); - surface->base.offset = offset; - surface->base.usage = flags; - surface->base.zslice = zslice; + surface->base.usage = surf_tmpl->usage; surface->base.texture = texture; - surface->base.face = face; - surface->base.level = level; + surface->base.u.tex.first_layer = surf_tmpl->u.tex.first_layer; + surface->base.u.tex.last_layer = surf_tmpl->u.tex.last_layer; + surface->base.u.tex.level = level; - tile_height = r600_get_height_alignment(screen, rtex->array_mode[level]); + tile_height = r600_get_height_alignment(pipe->screen, rtex->array_mode[level]); surface->aligned_height = align(surface->base.height, tile_height); return &surface->base; } -static void r600_tex_surface_destroy(struct pipe_surface *surface) +static void r600_surface_destroy(struct pipe_context *pipe, + struct pipe_surface *surface) { pipe_resource_reference(&surface->texture, NULL); FREE(surface); @@ -444,14 +437,12 @@ struct pipe_resource *r600_texture_from_handle(struct pipe_screen *screen, static unsigned int r600_texture_is_referenced(struct pipe_context *context, struct pipe_resource *texture, - unsigned face, unsigned level) + unsigned level, int layer) { /* FIXME */ return PIPE_REFERENCED_FOR_READ | PIPE_REFERENCED_FOR_WRITE; } -int (*r600_blit_uncompress_depth_ptr)(struct pipe_context *ctx, struct r600_resource_texture *texture); - int r600_texture_depth_flush(struct pipe_context *ctx, struct pipe_resource *texture) { @@ -483,7 +474,7 @@ int r600_texture_depth_flush(struct pipe_context *ctx, out: /* XXX: only do this if the depth texture has actually changed: */ - r600_blit_uncompress_depth_ptr(ctx, rtex); + r600_blit_uncompress_depth(ctx, rtex); return 0; } @@ -491,7 +482,7 @@ out: */ static INLINE unsigned u_box_volume( const struct pipe_box *box ) { - return box->width * box->depth * box->height; + return box->width * box->depth * box->height; }; @@ -499,44 +490,44 @@ static INLINE unsigned u_box_volume( const struct pipe_box *box ) * If so, don't use a staging resource. */ static boolean permit_hardware_blit(struct pipe_screen *screen, - struct pipe_resource *res) + struct pipe_resource *res) { - unsigned bind; + unsigned bind; - if (util_format_is_depth_or_stencil(res->format)) - bind = PIPE_BIND_DEPTH_STENCIL; - else - bind = PIPE_BIND_RENDER_TARGET; + if (util_format_is_depth_or_stencil(res->format)) + bind = PIPE_BIND_DEPTH_STENCIL; + else + bind = PIPE_BIND_RENDER_TARGET; /* See r600_resource_copy_region: there is something wrong - * with depth resource copies at the moment so avoid them for - * now. - */ + * with depth resource copies at the moment so avoid them for + * now. + */ if (util_format_get_component_bits(res->format, - UTIL_FORMAT_COLORSPACE_ZS, - 0) != 0) - return FALSE; - - if (!screen->is_format_supported(screen, - res->format, - res->target, - res->nr_samples, - bind, 0)) - return FALSE; - - if (!screen->is_format_supported(screen, - res->format, - res->target, - res->nr_samples, - PIPE_BIND_SAMPLER_VIEW, 0)) - return FALSE; - - return TRUE; + UTIL_FORMAT_COLORSPACE_ZS, + 0) != 0) + return FALSE; + + if (!screen->is_format_supported(screen, + res->format, + res->target, + res->nr_samples, + bind, 0)) + return FALSE; + + if (!screen->is_format_supported(screen, + res->format, + res->target, + res->nr_samples, + PIPE_BIND_SAMPLER_VIEW, 0)) + return FALSE; + + return TRUE; } struct pipe_transfer* r600_texture_get_transfer(struct pipe_context *ctx, struct pipe_resource *texture, - struct pipe_subresource sr, + unsigned level, unsigned usage, const struct pipe_box *box) { @@ -556,37 +547,36 @@ struct pipe_transfer* r600_texture_get_transfer(struct pipe_context *ctx, if (rtex->tiled) use_staging_texture = TRUE; - if ((usage & PIPE_TRANSFER_READ) && - u_box_volume(box) > 1024) - use_staging_texture = TRUE; - - /* XXX: Use a staging texture for uploads if the underlying BO - * is busy. No interface for checking that currently? so do - * it eagerly whenever the transfer doesn't require a readback - * and might block. - */ - if ((usage & PIPE_TRANSFER_WRITE) && - !(usage & (PIPE_TRANSFER_READ | - PIPE_TRANSFER_DONTBLOCK | - PIPE_TRANSFER_UNSYNCHRONIZED))) - use_staging_texture = TRUE; - - if (!permit_hardware_blit(ctx->screen, texture) || - (texture->flags & R600_RESOURCE_FLAG_TRANSFER)) - use_staging_texture = FALSE; + if ((usage & PIPE_TRANSFER_READ) && u_box_volume(box) > 1024) + use_staging_texture = TRUE; + + /* XXX: Use a staging texture for uploads if the underlying BO + * is busy. No interface for checking that currently? so do + * it eagerly whenever the transfer doesn't require a readback + * and might block. + */ + if ((usage & PIPE_TRANSFER_WRITE) && + !(usage & (PIPE_TRANSFER_READ | + PIPE_TRANSFER_DONTBLOCK | + PIPE_TRANSFER_UNSYNCHRONIZED))) + use_staging_texture = TRUE; + + if (!permit_hardware_blit(ctx->screen, texture) || + (texture->flags & R600_RESOURCE_FLAG_TRANSFER)) + use_staging_texture = FALSE; trans = CALLOC_STRUCT(r600_transfer); if (trans == NULL) return NULL; pipe_resource_reference(&trans->transfer.resource, texture); - trans->transfer.sr = sr; + trans->transfer.level = level; trans->transfer.usage = usage; trans->transfer.box = *box; if (rtex->depth) { - /* XXX: only readback the rectangle which is being mapped? - */ - /* XXX: when discard is true, no need to read back from depth texture - */ + /* XXX: only readback the rectangle which is being mapped? + */ + /* XXX: when discard is true, no need to read back from depth texture + */ r = r600_texture_depth_flush(ctx, texture); if (r < 0) { R600_ERR("failed to create temporary texture to hold untiled copy\n"); @@ -600,6 +590,7 @@ struct pipe_transfer* r600_texture_get_transfer(struct pipe_context *ctx, resource.width0 = box->width; resource.height0 = box->height; resource.depth0 = 1; + resource.array_size = 1; resource.last_level = 0; resource.nr_samples = 0; resource.usage = PIPE_USAGE_STAGING; @@ -625,7 +616,7 @@ struct pipe_transfer* r600_texture_get_transfer(struct pipe_context *ctx, } trans->transfer.stride = - ((struct r600_resource_texture *)trans->staging_texture)->pitch_in_bytes[0]; + ((struct r600_resource_texture *)trans->staging_texture)->pitch_in_bytes[0]; if (usage & PIPE_TRANSFER_READ) { r600_copy_to_staging_texture(ctx, trans); /* Always referenced in the blit. */ @@ -633,8 +624,8 @@ struct pipe_transfer* r600_texture_get_transfer(struct pipe_context *ctx, } return &trans->transfer; } - trans->transfer.stride = rtex->pitch_in_bytes[sr.level]; - trans->offset = r600_texture_get_offset(rtex, sr.level, box->z, sr.face); + trans->transfer.stride = rtex->pitch_in_bytes[level]; + trans->offset = r600_texture_get_offset(rtex, level, box->z); return &trans->transfer; } @@ -747,10 +738,10 @@ struct u_resource_vtbl r600_texture_vtbl = u_default_transfer_inline_write /* transfer_inline_write */ }; -void r600_init_screen_texture_functions(struct pipe_screen *screen) +void r600_init_surface_functions(struct r600_pipe_context *r600) { - screen->get_tex_surface = r600_get_tex_surface; - screen->tex_surface_destroy = r600_tex_surface_destroy; + r600->context.create_surface = r600_create_surface; + r600->context.surface_destroy = r600_surface_destroy; } static unsigned r600_get_swizzle_combined(const unsigned char *swizzle_format, @@ -851,8 +842,8 @@ uint32_t r600_translate_texformat(enum pipe_format format, case UTIL_FORMAT_COLORSPACE_YUV: yuv_format |= (1 << 30); switch (format) { - case PIPE_FORMAT_UYVY: - case PIPE_FORMAT_YUYV: + case PIPE_FORMAT_UYVY: + case PIPE_FORMAT_YUYV: default: break; } @@ -870,29 +861,29 @@ uint32_t r600_translate_texformat(enum pipe_format format, /* S3TC formats. TODO */ if (desc->layout == UTIL_FORMAT_LAYOUT_S3TC) { - static int r600_enable_s3tc = -1; + static int r600_enable_s3tc = -1; - if (r600_enable_s3tc == -1) - r600_enable_s3tc = - debug_get_bool_option("R600_ENABLE_S3TC", FALSE); + if (r600_enable_s3tc == -1) + r600_enable_s3tc = + debug_get_bool_option("R600_ENABLE_S3TC", FALSE); - if (!r600_enable_s3tc) - goto out_unknown; + if (!r600_enable_s3tc) + goto out_unknown; switch (format) { case PIPE_FORMAT_DXT1_RGB: case PIPE_FORMAT_DXT1_RGBA: - result = FMT_BC1; - goto out_word4; + result = FMT_BC1; + goto out_word4; case PIPE_FORMAT_DXT3_RGBA: - result = FMT_BC2; - goto out_word4; + result = FMT_BC2; + goto out_word4; case PIPE_FORMAT_DXT5_RGBA: - result = FMT_BC3; - goto out_word4; - default: - goto out_unknown; - } + result = FMT_BC3; + goto out_word4; + default: + goto out_unknown; + } } diff --git a/src/gallium/drivers/r600/r600_translate.c b/src/gallium/drivers/r600/r600_translate.c index 9a07cf2073f..f80fa7af941 100644 --- a/src/gallium/drivers/r600/r600_translate.c +++ b/src/gallium/drivers/r600/r600_translate.c @@ -41,6 +41,8 @@ void r600_begin_vertex_translate(struct r600_pipe_context *rctx) struct pipe_transfer *vb_transfer[PIPE_MAX_ATTRIBS] = {0}, *out_transfer; struct pipe_resource *out_buffer; unsigned i, num_verts; + struct pipe_vertex_element new_velems[PIPE_MAX_ATTRIBS]; + void *tmp; /* Initialize the translate key, i.e. the recipe how vertices should be * translated. */ @@ -51,9 +53,7 @@ void r600_begin_vertex_translate(struct r600_pipe_context *rctx) unsigned output_format_size = ve->hw_format_size[i]; /* Check for support. */ - if (ve->elements[i].src_format == ve->hw_format[i] && - (vb->buffer_offset + ve->elements[i].src_offset) % 4 == 0 && - vb->stride % 4 == 0) { + if (ve->elements[i].src_format == ve->hw_format[i]) { continue; } @@ -125,12 +125,11 @@ void r600_begin_vertex_translate(struct r600_pipe_context *rctx) /* Unmap all buffers. */ for (i = 0; i < rctx->nvertex_buffer; i++) { if (vb_translated[i]) { - pipe_buffer_unmap(pipe, rctx->vertex_buffer[i].buffer, - vb_transfer[i]); + pipe_buffer_unmap(pipe, vb_transfer[i]); } } - pipe_buffer_unmap(pipe, out_buffer, out_transfer); + pipe_buffer_unmap(pipe, out_transfer); /* Setup the new vertex buffer in the first free slot. */ for (i = 0; i < PIPE_MAX_ATTRIBS; i++) { @@ -147,29 +146,23 @@ void r600_begin_vertex_translate(struct r600_pipe_context *rctx) } /* Save and replace vertex elements. */ - { - struct pipe_vertex_element new_velems[PIPE_MAX_ATTRIBS]; - - rctx->tran.saved_velems = rctx->vertex_elements; - - for (i = 0; i < ve->count; i++) { - if (vb_translated[ve->elements[i].vertex_buffer_index]) { - te = &key.element[tr_elem_index[i]]; - new_velems[i].instance_divisor = ve->elements[i].instance_divisor; - new_velems[i].src_format = te->output_format; - new_velems[i].src_offset = te->output_offset; - new_velems[i].vertex_buffer_index = rctx->tran.vb_slot; - } else { - memcpy(&new_velems[i], &ve->elements[i], - sizeof(struct pipe_vertex_element)); - } + for (i = 0; i < ve->count; i++) { + if (vb_translated[ve->elements[i].vertex_buffer_index]) { + te = &key.element[tr_elem_index[i]]; + new_velems[i].instance_divisor = ve->elements[i].instance_divisor; + new_velems[i].src_format = te->output_format; + new_velems[i].src_offset = te->output_offset; + new_velems[i].vertex_buffer_index = rctx->tran.vb_slot; + } else { + memcpy(&new_velems[i], &ve->elements[i], + sizeof(struct pipe_vertex_element)); } - - rctx->tran.new_velems = - pipe->create_vertex_elements_state(pipe, ve->count, new_velems); - pipe->bind_vertex_elements_state(pipe, rctx->tran.new_velems); } + tmp = pipe->create_vertex_elements_state(pipe, ve->count, new_velems); + pipe->bind_vertex_elements_state(pipe, tmp); + rctx->tran.new_velems = tmp; + pipe_resource_reference(&out_buffer, NULL); } @@ -177,13 +170,15 @@ void r600_end_vertex_translate(struct r600_pipe_context *rctx) { struct pipe_context *pipe = &rctx->context; + if (rctx->tran.new_velems == NULL) { + return; + } /* Restore vertex elements. */ - pipe->bind_vertex_elements_state(pipe, rctx->tran.saved_velems); pipe->delete_vertex_elements_state(pipe, rctx->tran.new_velems); + rctx->tran.new_velems = NULL; /* Delete the now-unused VBO. */ - pipe_resource_reference(&rctx->vertex_buffer[rctx->tran.vb_slot].buffer, - NULL); + pipe_resource_reference(&rctx->vertex_buffer[rctx->tran.vb_slot].buffer, NULL); } void r600_translate_index_buffer(struct r600_pipe_context *r600, @@ -197,14 +192,7 @@ void r600_translate_index_buffer(struct r600_pipe_context *r600, *index_size = 2; *start = 0; break; - case 2: - if (*start % 2 != 0) { - util_rebuild_ushort_elts(&r600->context, index_buffer, 0, *start, count); - *start = 0; - } - break; - case 4: break; } diff --git a/src/gallium/drivers/r600/r600_upload.c b/src/gallium/drivers/r600/r600_upload.c new file mode 100644 index 00000000000..44102ff55b6 --- /dev/null +++ b/src/gallium/drivers/r600/r600_upload.c @@ -0,0 +1,114 @@ +/* + * Copyright 2010 Jerome Glisse <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: + * Jerome Glisse <[email protected]> + */ +#include <errno.h> +#include "util/u_inlines.h" +#include "util/u_memory.h" +#include "r600.h" +#include "r600_pipe.h" +#include "r600_resource.h" + +struct r600_upload { + struct r600_pipe_context *rctx; + struct r600_bo *buffer; + char *ptr; + unsigned size; + unsigned default_size; + unsigned total_alloc_size; + unsigned offset; + unsigned alignment; +}; + +struct r600_upload *r600_upload_create(struct r600_pipe_context *rctx, + unsigned default_size, + unsigned alignment) +{ + struct r600_upload *upload = CALLOC_STRUCT(r600_upload); + + if (upload == NULL) + return NULL; + + upload->rctx = rctx; + upload->size = 0; + upload->default_size = default_size; + upload->alignment = alignment; + upload->ptr = NULL; + upload->buffer = NULL; + upload->total_alloc_size = 0; + + return upload; +} + +void r600_upload_flush(struct r600_upload *upload) +{ + if (upload->buffer) { + r600_bo_reference(upload->rctx->radeon, &upload->buffer, NULL); + } + upload->default_size = MAX2(upload->total_alloc_size, upload->default_size); + upload->total_alloc_size = 0; + upload->size = 0; + upload->offset = 0; + upload->ptr = NULL; + upload->buffer = NULL; +} + +void r600_upload_destroy(struct r600_upload *upload) +{ + r600_upload_flush(upload); + FREE(upload); +} + +int r600_upload_buffer(struct r600_upload *upload, unsigned offset, + unsigned size, struct r600_resource_buffer *in_buffer, + unsigned *out_offset, unsigned *out_size, + struct r600_bo **out_buffer) +{ + unsigned alloc_size = align(size, upload->alignment); + const void *in_ptr = NULL; + + if (upload->offset + alloc_size > upload->size) { + if (upload->size) { + r600_bo_reference(upload->rctx->radeon, &upload->buffer, NULL); + } + upload->size = align(MAX2(upload->default_size, alloc_size), 4096); + upload->total_alloc_size += upload->size; + upload->offset = 0; + upload->buffer = r600_bo(upload->rctx->radeon, upload->size, 4096, PIPE_BIND_VERTEX_BUFFER, 0); + if (upload->buffer == NULL) { + return -ENOMEM; + } + upload->ptr = r600_bo_map(upload->rctx->radeon, upload->buffer, 0, NULL); + } + + in_ptr = in_buffer->user_buffer; + memcpy(upload->ptr + upload->offset, (uint8_t *) in_ptr + offset, size); + *out_offset = upload->offset; + *out_size = upload->size; + *out_buffer = NULL; + r600_bo_reference(upload->rctx->radeon, out_buffer, upload->buffer); + upload->offset += alloc_size; + + return 0; +} diff --git a/src/gallium/drivers/r600/r700_asm.c b/src/gallium/drivers/r600/r700_asm.c index 892dee86baf..a7f2f54736e 100644 --- a/src/gallium/drivers/r600/r700_asm.c +++ b/src/gallium/drivers/r600/r700_asm.c @@ -29,8 +29,6 @@ int r700_bc_alu_build(struct r600_bc *bc, struct r600_bc_alu *alu, unsigned id) { - unsigned i; - bc->bytecode[id++] = S_SQ_ALU_WORD0_SRC0_SEL(alu->src[0].sel) | S_SQ_ALU_WORD0_SRC0_REL(alu->src[0].rel) | S_SQ_ALU_WORD0_SRC0_CHAN(alu->src[0].chan) | @@ -61,18 +59,11 @@ int r700_bc_alu_build(struct r600_bc *bc, struct r600_bc_alu *alu, unsigned id) S_SQ_ALU_WORD1_OP2_SRC0_ABS(alu->src[0].abs) | S_SQ_ALU_WORD1_OP2_SRC1_ABS(alu->src[1].abs) | S_SQ_ALU_WORD1_OP2_WRITE_MASK(alu->dst.write) | + S_SQ_ALU_WORD1_OP2_OMOD(alu->omod) | S_SQ_ALU_WORD1_OP2_ALU_INST(alu->inst) | S_SQ_ALU_WORD1_BANK_SWIZZLE(alu->bank_swizzle) | S_SQ_ALU_WORD1_OP2_UPDATE_EXECUTE_MASK(alu->predicate) | S_SQ_ALU_WORD1_OP2_UPDATE_PRED(alu->predicate); } - if (alu->last) { - if (alu->nliteral && !alu->literal_added) { - R600_ERR("Bug in ALU processing for instruction 0x%08x, literal not added correctly\n", alu->inst); - } - for (i = 0; i < alu->nliteral; i++) { - bc->bytecode[id++] = alu->value[i]; - } - } return 0; } diff --git a/src/gallium/drivers/rbug/rbug_context.c b/src/gallium/drivers/rbug/rbug_context.c index 413da59e559..94e57e40f86 100644 --- a/src/gallium/drivers/rbug/rbug_context.c +++ b/src/gallium/drivers/rbug/rbug_context.c @@ -707,17 +707,13 @@ rbug_set_sample_mask(struct pipe_context *_pipe, static void rbug_resource_copy_region(struct pipe_context *_pipe, struct pipe_resource *_dst, - struct pipe_subresource subdst, + unsigned dst_level, unsigned dstx, unsigned dsty, unsigned dstz, struct pipe_resource *_src, - struct pipe_subresource subsrc, - unsigned srcx, - unsigned srcy, - unsigned srcz, - unsigned width, - unsigned height) + unsigned src_level, + const struct pipe_box *src_box) { struct rbug_context *rb_pipe = rbug_context(_pipe); struct rbug_resource *rb_resource_dst = rbug_resource(_dst); @@ -728,17 +724,13 @@ rbug_resource_copy_region(struct pipe_context *_pipe, pipe->resource_copy_region(pipe, dst, - subdst, + dst_level, dstx, dsty, dstz, src, - subsrc, - srcx, - srcy, - srcz, - width, - height); + src_level, + src_box); } static void @@ -820,8 +812,8 @@ rbug_flush(struct pipe_context *_pipe, static unsigned int rbug_is_resource_referenced(struct pipe_context *_pipe, struct pipe_resource *_resource, - unsigned face, - unsigned level) + unsigned level, + int layer) { struct rbug_context *rb_pipe = rbug_context(_pipe); struct rbug_resource *rb_resource = rbug_resource(_resource); @@ -830,8 +822,8 @@ rbug_is_resource_referenced(struct pipe_context *_pipe, return pipe->is_resource_referenced(pipe, resource, - face, - level); + level, + layer); } static struct pipe_sampler_view * @@ -862,10 +854,40 @@ rbug_context_sampler_view_destroy(struct pipe_context *_pipe, rbug_sampler_view(_view)); } +static struct pipe_surface * +rbug_context_create_surface(struct pipe_context *_pipe, + struct pipe_resource *_resource, + const struct pipe_surface *surf_tmpl) +{ + struct rbug_context *rb_pipe = rbug_context(_pipe); + struct rbug_resource *rb_resource = rbug_resource(_resource); + struct pipe_context *pipe = rb_pipe->pipe; + struct pipe_resource *resource = rb_resource->resource; + struct pipe_surface *result; + + result = pipe->create_surface(pipe, + resource, + surf_tmpl); + + if (result) + return rbug_surface_create(rb_pipe, rb_resource, result); + return NULL; +} + +static void +rbug_context_surface_destroy(struct pipe_context *_pipe, + struct pipe_surface *_surface) +{ + rbug_surface_destroy(rbug_context(_pipe), + rbug_surface(_surface)); +} + + + static struct pipe_transfer * rbug_context_get_transfer(struct pipe_context *_context, struct pipe_resource *_resource, - struct pipe_subresource sr, + unsigned level, unsigned usage, const struct pipe_box *box) { @@ -877,7 +899,7 @@ rbug_context_get_transfer(struct pipe_context *_context, result = context->get_transfer(context, resource, - sr, + level, usage, box); @@ -942,12 +964,12 @@ rbug_context_transfer_unmap(struct pipe_context *_context, static void rbug_context_transfer_inline_write(struct pipe_context *_context, struct pipe_resource *_resource, - struct pipe_subresource sr, + unsigned level, unsigned usage, const struct pipe_box *box, const void *data, unsigned stride, - unsigned slice_stride) + unsigned layer_stride) { struct rbug_context *rb_pipe = rbug_context(_context); struct rbug_resource *rb_resource = rbug_resource(_resource); @@ -956,12 +978,12 @@ rbug_context_transfer_inline_write(struct pipe_context *_context, context->transfer_inline_write(context, resource, - sr, + level, usage, box, data, stride, - slice_stride); + layer_stride); } @@ -1042,6 +1064,8 @@ rbug_context_create(struct pipe_screen *_screen, struct pipe_context *pipe) rb_pipe->base.is_resource_referenced = rbug_is_resource_referenced; rb_pipe->base.create_sampler_view = rbug_context_create_sampler_view; rb_pipe->base.sampler_view_destroy = rbug_context_sampler_view_destroy; + rb_pipe->base.create_surface = rbug_context_create_surface; + rb_pipe->base.surface_destroy = rbug_context_surface_destroy; rb_pipe->base.get_transfer = rbug_context_get_transfer; rb_pipe->base.transfer_destroy = rbug_context_transfer_destroy; rb_pipe->base.transfer_map = rbug_context_transfer_map; diff --git a/src/gallium/drivers/rbug/rbug_core.c b/src/gallium/drivers/rbug/rbug_core.c index 9dc663b079a..eb772d19d05 100644 --- a/src/gallium/drivers/rbug/rbug_core.c +++ b/src/gallium/drivers/rbug/rbug_core.c @@ -266,9 +266,9 @@ rbug_texture_read(struct rbug_rbug *tr_rbug, struct rbug_header *header, uint32_ tex = tr_tex->resource; t = pipe_get_transfer(context, tex, - gptr->face, gptr->level, gptr->zslice, - PIPE_TRANSFER_READ, - gptr->x, gptr->y, gptr->w, gptr->h); + gptr->level, gptr->face + gptr->zslice, + PIPE_TRANSFER_READ, + gptr->x, gptr->y, gptr->w, gptr->h); map = context->transfer_map(context, t); @@ -279,7 +279,7 @@ rbug_texture_read(struct rbug_rbug *tr_rbug, struct rbug_header *header, uint32_ util_format_get_blocksize(t->resource->format), (uint8_t*)map, t->stride * util_format_get_nblocksy(t->resource->format, - t->box.height), + t->box.height), t->stride, NULL); diff --git a/src/gallium/drivers/rbug/rbug_objects.c b/src/gallium/drivers/rbug/rbug_objects.c index 0979fcff957..7d7cc482ae6 100644 --- a/src/gallium/drivers/rbug/rbug_objects.c +++ b/src/gallium/drivers/rbug/rbug_objects.c @@ -79,7 +79,8 @@ rbug_resource_destroy(struct rbug_resource *rb_resource) struct pipe_surface * -rbug_surface_create(struct rbug_resource *rb_resource, +rbug_surface_create(struct rbug_context *rb_context, + struct rbug_resource *rb_resource, struct pipe_surface *surface) { struct rbug_surface *rb_surface; @@ -108,10 +109,12 @@ error: } void -rbug_surface_destroy(struct rbug_surface *rb_surface) +rbug_surface_destroy(struct rbug_context *rb_context, + struct rbug_surface *rb_surface) { pipe_resource_reference(&rb_surface->base.texture, NULL); - pipe_surface_reference(&rb_surface->surface, NULL); + rb_context->pipe->surface_destroy(rb_context->pipe, + rb_surface->surface); FREE(rb_surface); } diff --git a/src/gallium/drivers/rbug/rbug_objects.h b/src/gallium/drivers/rbug/rbug_objects.h index 49c128d3d1a..3fba3334228 100644 --- a/src/gallium/drivers/rbug/rbug_objects.h +++ b/src/gallium/drivers/rbug/rbug_objects.h @@ -189,11 +189,13 @@ void rbug_resource_destroy(struct rbug_resource *rb_resource); struct pipe_surface * -rbug_surface_create(struct rbug_resource *rb_resource, +rbug_surface_create(struct rbug_context *rb_context, + struct rbug_resource *rb_resource, struct pipe_surface *surface); void -rbug_surface_destroy(struct rbug_surface *rb_surface); +rbug_surface_destroy(struct rbug_context *rb_context, + struct rbug_surface *rb_surface); struct pipe_sampler_view * rbug_sampler_view_create(struct rbug_context *rb_context, diff --git a/src/gallium/drivers/rbug/rbug_screen.c b/src/gallium/drivers/rbug/rbug_screen.c index 961df482c29..d635ce575c0 100644 --- a/src/gallium/drivers/rbug/rbug_screen.c +++ b/src/gallium/drivers/rbug/rbug_screen.c @@ -188,40 +188,6 @@ rbug_screen_resource_destroy(struct pipe_screen *screen, rbug_resource_destroy(rbug_resource(_resource)); } -static struct pipe_surface * -rbug_screen_get_tex_surface(struct pipe_screen *_screen, - struct pipe_resource *_resource, - unsigned face, - unsigned level, - unsigned zslice, - unsigned usage) -{ - struct rbug_screen *rb_screen = rbug_screen(_screen); - struct rbug_resource *rb_resource = rbug_resource(_resource); - struct pipe_screen *screen = rb_screen->screen; - struct pipe_resource *resource = rb_resource->resource; - struct pipe_surface *result; - - result = screen->get_tex_surface(screen, - resource, - face, - level, - zslice, - usage); - - if (result) - return rbug_surface_create(rb_resource, result); - return NULL; -} - -static void -rbug_screen_tex_surface_destroy(struct pipe_surface *_surface) -{ - rbug_surface_destroy(rbug_surface(_surface)); -} - - - static struct pipe_resource * rbug_screen_user_buffer_create(struct pipe_screen *_screen, void *ptr, @@ -246,16 +212,18 @@ rbug_screen_user_buffer_create(struct pipe_screen *_screen, static void rbug_screen_flush_frontbuffer(struct pipe_screen *_screen, - struct pipe_surface *_surface, + struct pipe_resource *_resource, + unsigned level, unsigned layer, void *context_private) { struct rbug_screen *rb_screen = rbug_screen(_screen); - struct rbug_surface *rb_surface = rbug_surface(_surface); + struct rbug_resource *rb_resource = rbug_resource(_resource); struct pipe_screen *screen = rb_screen->screen; - struct pipe_surface *surface = rb_surface->surface; + struct pipe_resource *resource = rb_resource->resource; screen->flush_frontbuffer(screen, - surface, + resource, + level, layer, context_private); } @@ -336,8 +304,6 @@ rbug_screen_create(struct pipe_screen *screen) rb_screen->base.resource_from_handle = rbug_screen_resource_from_handle; rb_screen->base.resource_get_handle = rbug_screen_resource_get_handle; rb_screen->base.resource_destroy = rbug_screen_resource_destroy; - rb_screen->base.get_tex_surface = rbug_screen_get_tex_surface; - rb_screen->base.tex_surface_destroy = rbug_screen_tex_surface_destroy; rb_screen->base.user_buffer_create = rbug_screen_user_buffer_create; rb_screen->base.flush_frontbuffer = rbug_screen_flush_frontbuffer; rb_screen->base.fence_reference = rbug_screen_fence_reference; diff --git a/src/gallium/drivers/softpipe/sp_context.c b/src/gallium/drivers/softpipe/sp_context.c index b5d30bc6fc9..f3489c1c793 100644 --- a/src/gallium/drivers/softpipe/sp_context.c +++ b/src/gallium/drivers/softpipe/sp_context.c @@ -129,6 +129,10 @@ softpipe_destroy( struct pipe_context *pipe ) } } + for (i = 0; i < softpipe->num_vertex_buffers; i++) { + pipe_resource_reference(&softpipe->vertex_buffer[i].buffer, NULL); + } + tgsi_exec_machine_destroy(softpipe->fs_machine); FREE( softpipe ); @@ -145,15 +149,15 @@ softpipe_destroy( struct pipe_context *pipe ) */ static unsigned int softpipe_is_resource_referenced( struct pipe_context *pipe, - struct pipe_resource *texture, - unsigned face, unsigned level) + struct pipe_resource *texture, + unsigned level, int layer) { struct softpipe_context *softpipe = softpipe_context( pipe ); unsigned i; if (texture->target == PIPE_BUFFER) return PIPE_UNREFERENCED; - + /* check if any of the bound drawing surfaces are this texture */ if (softpipe->dirty_render_cache) { for (i = 0; i < softpipe->framebuffer.nr_cbufs; i++) { diff --git a/src/gallium/drivers/softpipe/sp_context.h b/src/gallium/drivers/softpipe/sp_context.h index 9361a3df09e..903574b7e19 100644 --- a/src/gallium/drivers/softpipe/sp_context.h +++ b/src/gallium/drivers/softpipe/sp_context.h @@ -154,9 +154,9 @@ struct softpipe_context { /** TGSI exec things */ struct { - struct sp_sampler_varient *geom_samplers_list[PIPE_MAX_GEOMETRY_SAMPLERS]; - struct sp_sampler_varient *vert_samplers_list[PIPE_MAX_VERTEX_SAMPLERS]; - struct sp_sampler_varient *frag_samplers_list[PIPE_MAX_SAMPLERS]; + struct sp_sampler_variant *geom_samplers_list[PIPE_MAX_GEOMETRY_SAMPLERS]; + struct sp_sampler_variant *vert_samplers_list[PIPE_MAX_VERTEX_SAMPLERS]; + struct sp_sampler_variant *frag_samplers_list[PIPE_MAX_SAMPLERS]; } tgsi; struct tgsi_exec_machine *fs_machine; @@ -192,7 +192,7 @@ softpipe_context( struct pipe_context *pipe ) } void -softpipe_reset_sampler_varients(struct softpipe_context *softpipe); +softpipe_reset_sampler_variants(struct softpipe_context *softpipe); struct pipe_context * softpipe_create_context( struct pipe_screen *, void *priv ); diff --git a/src/gallium/drivers/softpipe/sp_flush.c b/src/gallium/drivers/softpipe/sp_flush.c index 1071011db0e..4258395063b 100644 --- a/src/gallium/drivers/softpipe/sp_flush.c +++ b/src/gallium/drivers/softpipe/sp_flush.c @@ -120,8 +120,8 @@ softpipe_flush( struct pipe_context *pipe, boolean softpipe_flush_resource(struct pipe_context *pipe, struct pipe_resource *texture, - unsigned face, unsigned level, + int layer, unsigned flush_flags, boolean read_only, boolean cpu_access, @@ -129,7 +129,7 @@ softpipe_flush_resource(struct pipe_context *pipe, { unsigned referenced; - referenced = pipe->is_resource_referenced(pipe, texture, face, level); + referenced = pipe->is_resource_referenced(pipe, texture, level, layer); if ((referenced & PIPE_REFERENCED_FOR_WRITE) || ((referenced & PIPE_REFERENCED_FOR_READ) && !read_only)) { diff --git a/src/gallium/drivers/softpipe/sp_flush.h b/src/gallium/drivers/softpipe/sp_flush.h index cb97482a71b..22a5ceeb9ec 100644 --- a/src/gallium/drivers/softpipe/sp_flush.h +++ b/src/gallium/drivers/softpipe/sp_flush.h @@ -40,8 +40,8 @@ softpipe_flush(struct pipe_context *pipe, unsigned flags, boolean softpipe_flush_resource(struct pipe_context *pipe, struct pipe_resource *texture, - unsigned face, unsigned level, + int layer, unsigned flush_flags, boolean read_only, boolean cpu_access, diff --git a/src/gallium/targets/dri-noop/swrast_drm_api.c b/src/gallium/drivers/softpipe/sp_limits.h index a99779f1837..a7a24c98d57 100644 --- a/src/gallium/targets/dri-noop/swrast_drm_api.c +++ b/src/gallium/drivers/softpipe/sp_limits.h @@ -1,9 +1,8 @@ /************************************************************************** - * - * Copyright 2009, VMware, Inc. + * + * Copyright 2010 VMware, Inc. * All Rights Reserved. - * Copyright 2010 George Sapountzis <[email protected]> - * + * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the * "Software"), to deal in the Software without restriction, including @@ -11,53 +10,33 @@ * distribute, sub license, and/or sell copies of the Software, and to * permit persons to whom the Software is furnished to do so, subject to * the following conditions: - * + * * The above copyright notice and this permission notice (including the * next paragraph) shall be included in all copies or substantial portions * of the Software. - * + * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * IN NO EVENT SHALL THE AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * + * **************************************************************************/ -#include "pipe/p_compiler.h" -#include "util/u_memory.h" -#include "dri_sw_winsys.h" -#include "noop/noop_public.h" +#ifndef SP_LIMITS_H +#define SP_LIMITS_H -#include "target-helpers/inline_debug_helper.h" -#include "target-helpers/inline_sw_helper.h" -struct pipe_screen * -drisw_create_screen(struct drisw_loader_funcs *lf) -{ - struct sw_winsys *winsys = NULL; - struct pipe_screen *screen = NULL; +#define SP_MAX_TEXTURE_2D_LEVELS 15 /* 16K x 16K */ +#define SP_MAX_TEXTURE_3D_LEVELS 9 /* 512 x 512 x 512 */ - winsys = dri_create_sw_winsys(lf); - if (winsys == NULL) - return NULL; - screen = noop_screen_create(winsys); - if (!screen) - goto fail; +/** Max surface size */ +#define MAX_WIDTH (1 << (SP_MAX_TEXTURE_2D_LEVELS - 1)) +#define MAX_HEIGHT (1 << (SP_MAX_TEXTURE_2D_LEVELS - 1)) - screen = debug_screen_wrap(screen); - return screen; - -fail: - if (winsys) - winsys->destroy(winsys); - - return NULL; -} - -/* vim: set sw=3 ts=8 sts=3 expandtab: */ +#endif /* SP_LIMITS_H */ diff --git a/src/gallium/drivers/softpipe/sp_quad_blend.c b/src/gallium/drivers/softpipe/sp_quad_blend.c index 6af1b2d0618..76cfc0bf51c 100644 --- a/src/gallium/drivers/softpipe/sp_quad_blend.c +++ b/src/gallium/drivers/softpipe/sp_quad_blend.c @@ -35,6 +35,7 @@ #include "util/u_memory.h" #include "util/u_format.h" #include "sp_context.h" +#include "sp_state.h" #include "sp_quad.h" #include "sp_tile_cache.h" #include "sp_quad_pipe.h" @@ -794,6 +795,9 @@ blend_fallback(struct quad_stage *qs, struct softpipe_context *softpipe = qs->softpipe; const struct pipe_blend_state *blend = softpipe->blend; unsigned cbuf; + boolean write_all; + + write_all = softpipe->fs->color0_writes_all_cbufs; for (cbuf = 0; cbuf < softpipe->framebuffer.nr_cbufs; cbuf++) { @@ -806,15 +810,19 @@ blend_fallback(struct quad_stage *qs, quads[0]->input.y0); boolean has_dst_alpha = util_format_has_alpha(softpipe->framebuffer.cbufs[cbuf]->format); - uint q, i, j; + uint q, i, j, qbuf; + + qbuf = write_all ? 0 : cbuf; for (q = 0; q < nr; q++) { struct quad_header *quad = quads[q]; - float (*quadColor)[4] = quad->output.color[cbuf]; + float (*quadColor)[4]; const int itx = (quad->input.x0 & (TILE_SIZE-1)); const int ity = (quad->input.y0 & (TILE_SIZE-1)); - /* get/swizzle dest colors + quadColor = quad->output.color[qbuf]; + + /* get/swizzle dest colors */ for (j = 0; j < QUAD_SIZE; j++) { int x = itx + (j & 1); diff --git a/src/gallium/drivers/softpipe/sp_screen.c b/src/gallium/drivers/softpipe/sp_screen.c index 9b54babdfb7..e19f2e6fc7d 100644 --- a/src/gallium/drivers/softpipe/sp_screen.c +++ b/src/gallium/drivers/softpipe/sp_screen.c @@ -268,12 +268,13 @@ softpipe_destroy_screen( struct pipe_screen *screen ) */ static void softpipe_flush_frontbuffer(struct pipe_screen *_screen, - struct pipe_surface *surface, + struct pipe_resource *resource, + unsigned level, unsigned layer, void *context_private) { struct softpipe_screen *screen = softpipe_screen(_screen); struct sw_winsys *winsys = screen->winsys; - struct softpipe_resource *texture = softpipe_resource(surface->texture); + struct softpipe_resource *texture = softpipe_resource(resource); assert(texture->dt); if (texture->dt) diff --git a/src/gallium/drivers/softpipe/sp_state.h b/src/gallium/drivers/softpipe/sp_state.h index 525bf23734a..bb19f8cff20 100644 --- a/src/gallium/drivers/softpipe/sp_state.h +++ b/src/gallium/drivers/softpipe/sp_state.h @@ -74,7 +74,7 @@ struct sp_fragment_shader { boolean origin_lower_left; /**< fragment shader uses lower left position origin? */ boolean pixel_center_integer; /**< fragment shader uses integer pixel center? */ - + boolean color0_writes_all_cbufs; /**< fragment shader writes color0 to all bound cbufs */ void (*prepare)( const struct sp_fragment_shader *shader, struct tgsi_exec_machine *machine, struct tgsi_sampler **samplers); diff --git a/src/gallium/drivers/softpipe/sp_state_derived.c b/src/gallium/drivers/softpipe/sp_state_derived.c index 3ba4d934fd2..bf4c12701af 100644 --- a/src/gallium/drivers/softpipe/sp_state_derived.c +++ b/src/gallium/drivers/softpipe/sp_state_derived.c @@ -197,7 +197,7 @@ update_tgsi_samplers( struct softpipe_context *softpipe ) { unsigned i; - softpipe_reset_sampler_varients( softpipe ); + softpipe_reset_sampler_variants( softpipe ); for (i = 0; i < PIPE_MAX_SAMPLERS; i++) { struct softpipe_tex_tile_cache *tc = softpipe->tex_cache[i]; diff --git a/src/gallium/drivers/softpipe/sp_state_sampler.c b/src/gallium/drivers/softpipe/sp_state_sampler.c index b59fbc33ed6..cfa211b60a0 100644 --- a/src/gallium/drivers/softpipe/sp_state_sampler.c +++ b/src/gallium/drivers/softpipe/sp_state_sampler.c @@ -43,8 +43,8 @@ struct sp_sampler { struct pipe_sampler_state base; - struct sp_sampler_varient *varients; - struct sp_sampler_varient *current; + struct sp_sampler_variant *variants; + struct sp_sampler_variant *current; }; static struct sp_sampler *sp_sampler( struct pipe_sampler_state *sampler ) @@ -60,7 +60,7 @@ softpipe_create_sampler_state(struct pipe_context *pipe, struct sp_sampler *sp_sampler = CALLOC_STRUCT(sp_sampler); sp_sampler->base = *sampler; - sp_sampler->varients = NULL; + sp_sampler->variants = NULL; return (void *)sp_sampler; } @@ -277,23 +277,24 @@ softpipe_set_geometry_sampler_views(struct pipe_context *pipe, /** - * Find/create an sp_sampler_varient object for sampling the given texture, + * Find/create an sp_sampler_variant object for sampling the given texture, * sampler and tex unit. * * Note that the tex unit is significant. We can't re-use a sampler - * varient for multiple texture units because the sampler varient contains + * variant for multiple texture units because the sampler variant contains * the texture object pointer. If the texture object pointer were stored - * somewhere outside the sampler varient, we could re-use samplers for + * somewhere outside the sampler variant, we could re-use samplers for * multiple texture units. */ -static struct sp_sampler_varient * -get_sampler_varient( unsigned unit, +static struct sp_sampler_variant * +get_sampler_variant( unsigned unit, struct sp_sampler *sampler, + struct pipe_sampler_view *view, struct pipe_resource *resource, unsigned processor ) { struct softpipe_resource *sp_texture = softpipe_resource(resource); - struct sp_sampler_varient *v = NULL; + struct sp_sampler_variant *v = NULL; union sp_sampler_key key; /* if this fails, widen the key.unit field and update this assertion */ @@ -303,6 +304,10 @@ get_sampler_varient( unsigned unit, key.bits.is_pot = sp_texture->pot; key.bits.processor = processor; key.bits.unit = unit; + key.bits.swizzle_r = view->swizzle_r; + key.bits.swizzle_g = view->swizzle_g; + key.bits.swizzle_b = view->swizzle_b; + key.bits.swizzle_a = view->swizzle_a; key.bits.pad = 0; if (sampler->current && @@ -311,14 +316,14 @@ get_sampler_varient( unsigned unit, } if (v == NULL) { - for (v = sampler->varients; v; v = v->next) + for (v = sampler->variants; v; v = v->next) if (v->key.value == key.value) break; if (v == NULL) { - v = sp_create_sampler_varient( &sampler->base, key ); - v->next = sampler->varients; - sampler->varients = v; + v = sp_create_sampler_variant( &sampler->base, key ); + v->next = sampler->variants; + sampler->variants = v; } } @@ -328,7 +333,7 @@ get_sampler_varient( unsigned unit, void -softpipe_reset_sampler_varients(struct softpipe_context *softpipe) +softpipe_reset_sampler_variants(struct softpipe_context *softpipe) { int i; @@ -345,12 +350,13 @@ softpipe_reset_sampler_varients(struct softpipe_context *softpipe) } softpipe->tgsi.vert_samplers_list[i] = - get_sampler_varient( i, + get_sampler_variant( i, sp_sampler(softpipe->vertex_samplers[i]), + softpipe->vertex_sampler_views[i], texture, TGSI_PROCESSOR_VERTEX ); - sp_sampler_varient_bind_texture( softpipe->tgsi.vert_samplers_list[i], + sp_sampler_variant_bind_texture( softpipe->tgsi.vert_samplers_list[i], softpipe->vertex_tex_cache[i], texture ); } @@ -366,13 +372,14 @@ softpipe_reset_sampler_varients(struct softpipe_context *softpipe) } softpipe->tgsi.geom_samplers_list[i] = - get_sampler_varient( + get_sampler_variant( i, sp_sampler(softpipe->geometry_samplers[i]), + softpipe->geometry_sampler_views[i], texture, TGSI_PROCESSOR_GEOMETRY ); - sp_sampler_varient_bind_texture( + sp_sampler_variant_bind_texture( softpipe->tgsi.geom_samplers_list[i], softpipe->geometry_tex_cache[i], texture ); @@ -389,12 +396,13 @@ softpipe_reset_sampler_varients(struct softpipe_context *softpipe) } softpipe->tgsi.frag_samplers_list[i] = - get_sampler_varient( i, + get_sampler_variant( i, sp_sampler(softpipe->sampler[i]), + softpipe->sampler_views[i], texture, TGSI_PROCESSOR_FRAGMENT ); - sp_sampler_varient_bind_texture( softpipe->tgsi.frag_samplers_list[i], + sp_sampler_variant_bind_texture( softpipe->tgsi.frag_samplers_list[i], softpipe->tex_cache[i], texture ); } @@ -406,11 +414,11 @@ softpipe_delete_sampler_state(struct pipe_context *pipe, void *sampler) { struct sp_sampler *sp_sampler = (struct sp_sampler *)sampler; - struct sp_sampler_varient *v, *tmp; + struct sp_sampler_variant *v, *tmp; - for (v = sp_sampler->varients; v; v = tmp) { + for (v = sp_sampler->variants; v; v = tmp) { tmp = v->next; - sp_sampler_varient_destroy(v); + sp_sampler_variant_destroy(v); } FREE( sampler ); diff --git a/src/gallium/drivers/softpipe/sp_state_shader.c b/src/gallium/drivers/softpipe/sp_state_shader.c index 7fff338ccea..66ddc565722 100644 --- a/src/gallium/drivers/softpipe/sp_state_shader.c +++ b/src/gallium/drivers/softpipe/sp_state_shader.c @@ -78,6 +78,8 @@ softpipe_create_fs_state(struct pipe_context *pipe, state->origin_lower_left = state->info.properties[i].data[0]; else if (state->info.properties[i].name == TGSI_PROPERTY_FS_COORD_PIXEL_CENTER) state->pixel_center_integer = state->info.properties[i].data[0]; + else if (state->info.properties[i].name == TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS) + state->color0_writes_all_cbufs = state->info.properties[i].data[0]; } return state; diff --git a/src/gallium/drivers/softpipe/sp_state_vertex.c b/src/gallium/drivers/softpipe/sp_state_vertex.c index 7d8055f2baf..5f4d661abde 100644 --- a/src/gallium/drivers/softpipe/sp_state_vertex.c +++ b/src/gallium/drivers/softpipe/sp_state_vertex.c @@ -33,6 +33,7 @@ #include "sp_state.h" #include "util/u_memory.h" +#include "util/u_inlines.h" #include "draw/draw_context.h" @@ -84,8 +85,9 @@ softpipe_set_vertex_buffers(struct pipe_context *pipe, assert(count <= PIPE_MAX_ATTRIBS); - memcpy(softpipe->vertex_buffer, buffers, count * sizeof(buffers[0])); - softpipe->num_vertex_buffers = count; + util_copy_vertex_buffers(softpipe->vertex_buffer, + &softpipe->num_vertex_buffers, + buffers, count); softpipe->dirty |= SP_NEW_VERTEX; diff --git a/src/gallium/drivers/softpipe/sp_tex_sample.c b/src/gallium/drivers/softpipe/sp_tex_sample.c index 2eac4c7a82b..cbc40d4b446 100644 --- a/src/gallium/drivers/softpipe/sp_tex_sample.c +++ b/src/gallium/drivers/softpipe/sp_tex_sample.c @@ -545,7 +545,7 @@ wrap_linear_unorm_clamp_to_edge(const float s[4], unsigned size, * derivatives w.r.t X and Y, then compute lambda (level of detail). */ static float -compute_lambda_1d(const struct sp_sampler_varient *samp, +compute_lambda_1d(const struct sp_sampler_variant *samp, const float s[QUAD_SIZE], const float t[QUAD_SIZE], const float p[QUAD_SIZE]) @@ -560,7 +560,7 @@ compute_lambda_1d(const struct sp_sampler_varient *samp, static float -compute_lambda_2d(const struct sp_sampler_varient *samp, +compute_lambda_2d(const struct sp_sampler_variant *samp, const float s[QUAD_SIZE], const float t[QUAD_SIZE], const float p[QUAD_SIZE]) @@ -579,7 +579,7 @@ compute_lambda_2d(const struct sp_sampler_varient *samp, static float -compute_lambda_3d(const struct sp_sampler_varient *samp, +compute_lambda_3d(const struct sp_sampler_variant *samp, const float s[QUAD_SIZE], const float t[QUAD_SIZE], const float p[QUAD_SIZE]) @@ -608,7 +608,7 @@ compute_lambda_3d(const struct sp_sampler_varient *samp, * Since there aren't derivatives to use, just return 0. */ static float -compute_lambda_vert(const struct sp_sampler_varient *samp, +compute_lambda_vert(const struct sp_sampler_variant *samp, const float s[QUAD_SIZE], const float t[QUAD_SIZE], const float p[QUAD_SIZE]) @@ -634,7 +634,7 @@ compute_lambda_vert(const struct sp_sampler_varient *samp, static INLINE const float * -get_texel_2d_no_border(const struct sp_sampler_varient *samp, +get_texel_2d_no_border(const struct sp_sampler_variant *samp, union tex_tile_address addr, int x, int y) { const struct softpipe_tex_cached_tile *tile; @@ -651,7 +651,7 @@ get_texel_2d_no_border(const struct sp_sampler_varient *samp, static INLINE const float * -get_texel_2d(const struct sp_sampler_varient *samp, +get_texel_2d(const struct sp_sampler_variant *samp, union tex_tile_address addr, int x, int y) { const struct pipe_resource *texture = samp->texture; @@ -659,8 +659,7 @@ get_texel_2d(const struct sp_sampler_varient *samp, if (x < 0 || x >= (int) u_minify(texture->width0, level) || y < 0 || y >= (int) u_minify(texture->height0, level)) { - return sp_tex_tile_cache_border_color(samp->cache, - samp->sampler->border_color); + return samp->sampler->border_color; } else { return get_texel_2d_no_border( samp, addr, x, y ); @@ -671,7 +670,7 @@ get_texel_2d(const struct sp_sampler_varient *samp, /* Gather a quad of adjacent texels within a tile: */ static INLINE void -get_texel_quad_2d_no_border_single_tile(const struct sp_sampler_varient *samp, +get_texel_quad_2d_no_border_single_tile(const struct sp_sampler_variant *samp, union tex_tile_address addr, unsigned x, unsigned y, const float *out[4]) @@ -695,7 +694,7 @@ get_texel_quad_2d_no_border_single_tile(const struct sp_sampler_varient *samp, /* Gather a quad of potentially non-adjacent texels: */ static INLINE void -get_texel_quad_2d_no_border(const struct sp_sampler_varient *samp, +get_texel_quad_2d_no_border(const struct sp_sampler_variant *samp, union tex_tile_address addr, int x0, int y0, int x1, int y1, @@ -710,7 +709,7 @@ get_texel_quad_2d_no_border(const struct sp_sampler_varient *samp, /* Can involve a lot of unnecessary checks for border color: */ static INLINE void -get_texel_quad_2d(const struct sp_sampler_varient *samp, +get_texel_quad_2d(const struct sp_sampler_variant *samp, union tex_tile_address addr, int x0, int y0, int x1, int y1, @@ -724,10 +723,10 @@ get_texel_quad_2d(const struct sp_sampler_varient *samp, -/* 3d varients: +/* 3d variants: */ static INLINE const float * -get_texel_3d_no_border(const struct sp_sampler_varient *samp, +get_texel_3d_no_border(const struct sp_sampler_variant *samp, union tex_tile_address addr, int x, int y, int z) { const struct softpipe_tex_cached_tile *tile; @@ -745,7 +744,7 @@ get_texel_3d_no_border(const struct sp_sampler_varient *samp, static INLINE const float * -get_texel_3d(const struct sp_sampler_varient *samp, +get_texel_3d(const struct sp_sampler_variant *samp, union tex_tile_address addr, int x, int y, int z) { const struct pipe_resource *texture = samp->texture; @@ -754,8 +753,7 @@ get_texel_3d(const struct sp_sampler_varient *samp, if (x < 0 || x >= (int) u_minify(texture->width0, level) || y < 0 || y >= (int) u_minify(texture->height0, level) || z < 0 || z >= (int) u_minify(texture->depth0, level)) { - return sp_tex_tile_cache_border_color(samp->cache, - samp->sampler->border_color); + return samp->sampler->border_color; } else { return get_texel_3d_no_border( samp, addr, x, y, z ); @@ -800,7 +798,7 @@ img_filter_2d_linear_repeat_POT(struct tgsi_sampler *tgsi_sampler, enum tgsi_sampler_control control, float rgba[NUM_CHANNELS][QUAD_SIZE]) { - const struct sp_sampler_varient *samp = sp_sampler_varient(tgsi_sampler); + const struct sp_sampler_variant *samp = sp_sampler_variant(tgsi_sampler); unsigned j; unsigned level = samp->level; unsigned xpot = pot_level_size(samp->xpot, level); @@ -863,7 +861,7 @@ img_filter_2d_nearest_repeat_POT(struct tgsi_sampler *tgsi_sampler, enum tgsi_sampler_control control, float rgba[NUM_CHANNELS][QUAD_SIZE]) { - const struct sp_sampler_varient *samp = sp_sampler_varient(tgsi_sampler); + const struct sp_sampler_variant *samp = sp_sampler_variant(tgsi_sampler); unsigned j; unsigned level = samp->level; unsigned xpot = pot_level_size(samp->xpot, level); @@ -907,7 +905,7 @@ img_filter_2d_nearest_clamp_POT(struct tgsi_sampler *tgsi_sampler, enum tgsi_sampler_control control, float rgba[NUM_CHANNELS][QUAD_SIZE]) { - const struct sp_sampler_varient *samp = sp_sampler_varient(tgsi_sampler); + const struct sp_sampler_variant *samp = sp_sampler_variant(tgsi_sampler); unsigned j; unsigned level = samp->level; unsigned xpot = pot_level_size(samp->xpot, level); @@ -960,7 +958,7 @@ img_filter_1d_nearest(struct tgsi_sampler *tgsi_sampler, enum tgsi_sampler_control control, float rgba[NUM_CHANNELS][QUAD_SIZE]) { - const struct sp_sampler_varient *samp = sp_sampler_varient(tgsi_sampler); + const struct sp_sampler_variant *samp = sp_sampler_variant(tgsi_sampler); const struct pipe_resource *texture = samp->texture; unsigned level0, j; int width; @@ -1000,7 +998,7 @@ img_filter_2d_nearest(struct tgsi_sampler *tgsi_sampler, enum tgsi_sampler_control control, float rgba[NUM_CHANNELS][QUAD_SIZE]) { - const struct sp_sampler_varient *samp = sp_sampler_varient(tgsi_sampler); + const struct sp_sampler_variant *samp = sp_sampler_variant(tgsi_sampler); const struct pipe_resource *texture = samp->texture; unsigned level0, j; int width, height; @@ -1052,7 +1050,7 @@ img_filter_cube_nearest(struct tgsi_sampler *tgsi_sampler, enum tgsi_sampler_control control, float rgba[NUM_CHANNELS][QUAD_SIZE]) { - const struct sp_sampler_varient *samp = sp_sampler_varient(tgsi_sampler); + const struct sp_sampler_variant *samp = sp_sampler_variant(tgsi_sampler); const struct pipe_resource *texture = samp->texture; const unsigned *faces = samp->faces; /* zero when not cube-mapping */ unsigned level0, j; @@ -1096,7 +1094,7 @@ img_filter_3d_nearest(struct tgsi_sampler *tgsi_sampler, enum tgsi_sampler_control control, float rgba[NUM_CHANNELS][QUAD_SIZE]) { - const struct sp_sampler_varient *samp = sp_sampler_varient(tgsi_sampler); + const struct sp_sampler_variant *samp = sp_sampler_variant(tgsi_sampler); const struct pipe_resource *texture = samp->texture; unsigned level0, j; int width, height, depth; @@ -1138,7 +1136,7 @@ img_filter_1d_linear(struct tgsi_sampler *tgsi_sampler, enum tgsi_sampler_control control, float rgba[NUM_CHANNELS][QUAD_SIZE]) { - const struct sp_sampler_varient *samp = sp_sampler_varient(tgsi_sampler); + const struct sp_sampler_variant *samp = sp_sampler_variant(tgsi_sampler); const struct pipe_resource *texture = samp->texture; unsigned level0, j; int width; @@ -1178,7 +1176,7 @@ img_filter_2d_linear(struct tgsi_sampler *tgsi_sampler, enum tgsi_sampler_control control, float rgba[NUM_CHANNELS][QUAD_SIZE]) { - const struct sp_sampler_varient *samp = sp_sampler_varient(tgsi_sampler); + const struct sp_sampler_variant *samp = sp_sampler_variant(tgsi_sampler); const struct pipe_resource *texture = samp->texture; unsigned level0, j; int width, height; @@ -1225,7 +1223,7 @@ img_filter_cube_linear(struct tgsi_sampler *tgsi_sampler, enum tgsi_sampler_control control, float rgba[NUM_CHANNELS][QUAD_SIZE]) { - const struct sp_sampler_varient *samp = sp_sampler_varient(tgsi_sampler); + const struct sp_sampler_variant *samp = sp_sampler_variant(tgsi_sampler); const struct pipe_resource *texture = samp->texture; const unsigned *faces = samp->faces; /* zero when not cube-mapping */ unsigned level0, j; @@ -1274,7 +1272,7 @@ img_filter_3d_linear(struct tgsi_sampler *tgsi_sampler, enum tgsi_sampler_control control, float rgba[NUM_CHANNELS][QUAD_SIZE]) { - const struct sp_sampler_varient *samp = sp_sampler_varient(tgsi_sampler); + const struct sp_sampler_variant *samp = sp_sampler_variant(tgsi_sampler); const struct pipe_resource *texture = samp->texture; unsigned level0, j; int width, height, depth; @@ -1350,7 +1348,7 @@ mip_filter_linear(struct tgsi_sampler *tgsi_sampler, enum tgsi_sampler_control control, float rgba[NUM_CHANNELS][QUAD_SIZE]) { - struct sp_sampler_varient *samp = sp_sampler_varient(tgsi_sampler); + struct sp_sampler_variant *samp = sp_sampler_variant(tgsi_sampler); const struct pipe_resource *texture = samp->texture; int level0; float lambda; @@ -1417,7 +1415,7 @@ mip_filter_nearest(struct tgsi_sampler *tgsi_sampler, enum tgsi_sampler_control control, float rgba[NUM_CHANNELS][QUAD_SIZE]) { - struct sp_sampler_varient *samp = sp_sampler_varient(tgsi_sampler); + struct sp_sampler_variant *samp = sp_sampler_variant(tgsi_sampler); const struct pipe_resource *texture = samp->texture; float lambda; float lod[QUAD_SIZE]; @@ -1460,7 +1458,7 @@ mip_filter_none(struct tgsi_sampler *tgsi_sampler, enum tgsi_sampler_control control, float rgba[NUM_CHANNELS][QUAD_SIZE]) { - struct sp_sampler_varient *samp = sp_sampler_varient(tgsi_sampler); + struct sp_sampler_variant *samp = sp_sampler_variant(tgsi_sampler); float lambda; float lod[QUAD_SIZE]; @@ -1501,7 +1499,7 @@ mip_filter_linear_2d_linear_repeat_POT( enum tgsi_sampler_control control, float rgba[NUM_CHANNELS][QUAD_SIZE]) { - struct sp_sampler_varient *samp = sp_sampler_varient(tgsi_sampler); + struct sp_sampler_variant *samp = sp_sampler_variant(tgsi_sampler); const struct pipe_resource *texture = samp->texture; int level0; float lambda; @@ -1569,7 +1567,7 @@ sample_compare(struct tgsi_sampler *tgsi_sampler, enum tgsi_sampler_control control, float rgba[NUM_CHANNELS][QUAD_SIZE]) { - struct sp_sampler_varient *samp = sp_sampler_varient(tgsi_sampler); + struct sp_sampler_variant *samp = sp_sampler_variant(tgsi_sampler); const struct pipe_sampler_state *sampler = samp->sampler; int j, k0, k1, k2, k3; float val; @@ -1656,7 +1654,7 @@ sample_cube(struct tgsi_sampler *tgsi_sampler, enum tgsi_sampler_control control, float rgba[NUM_CHANNELS][QUAD_SIZE]) { - struct sp_sampler_varient *samp = sp_sampler_varient(tgsi_sampler); + struct sp_sampler_variant *samp = sp_sampler_variant(tgsi_sampler); unsigned j; float ssss[4], tttt[4]; @@ -1731,6 +1729,86 @@ sample_cube(struct tgsi_sampler *tgsi_sampler, } +static void +sample_swizzle(struct tgsi_sampler *tgsi_sampler, + const float s[QUAD_SIZE], + const float t[QUAD_SIZE], + const float p[QUAD_SIZE], + const float c0[QUAD_SIZE], + enum tgsi_sampler_control control, + float rgba[NUM_CHANNELS][QUAD_SIZE]) +{ + struct sp_sampler_variant *samp = sp_sampler_variant(tgsi_sampler); + float rgba_temp[NUM_CHANNELS][QUAD_SIZE]; + const unsigned swizzle_r = samp->key.bits.swizzle_r; + const unsigned swizzle_g = samp->key.bits.swizzle_g; + const unsigned swizzle_b = samp->key.bits.swizzle_b; + const unsigned swizzle_a = samp->key.bits.swizzle_a; + unsigned j; + + samp->sample_target(tgsi_sampler, s, t, p, c0, control, rgba_temp); + + switch (swizzle_r) { + case PIPE_SWIZZLE_ZERO: + for (j = 0; j < 4; j++) + rgba[0][j] = 0.0f; + break; + case PIPE_SWIZZLE_ONE: + for (j = 0; j < 4; j++) + rgba[0][j] = 1.0f; + break; + default: + assert(swizzle_r < 4); + for (j = 0; j < 4; j++) + rgba[0][j] = rgba_temp[swizzle_r][j]; + } + + switch (swizzle_g) { + case PIPE_SWIZZLE_ZERO: + for (j = 0; j < 4; j++) + rgba[1][j] = 0.0f; + break; + case PIPE_SWIZZLE_ONE: + for (j = 0; j < 4; j++) + rgba[1][j] = 1.0f; + break; + default: + assert(swizzle_g < 4); + for (j = 0; j < 4; j++) + rgba[1][j] = rgba_temp[swizzle_g][j]; + } + + switch (swizzle_b) { + case PIPE_SWIZZLE_ZERO: + for (j = 0; j < 4; j++) + rgba[2][j] = 0.0f; + break; + case PIPE_SWIZZLE_ONE: + for (j = 0; j < 4; j++) + rgba[2][j] = 1.0f; + break; + default: + assert(swizzle_b < 4); + for (j = 0; j < 4; j++) + rgba[2][j] = rgba_temp[swizzle_b][j]; + } + + switch (swizzle_a) { + case PIPE_SWIZZLE_ZERO: + for (j = 0; j < 4; j++) + rgba[3][j] = 0.0f; + break; + case PIPE_SWIZZLE_ONE: + for (j = 0; j < 4; j++) + rgba[3][j] = 1.0f; + break; + default: + assert(swizzle_a < 4); + for (j = 0; j < 4; j++) + rgba[3][j] = rgba_temp[swizzle_a][j]; + } +} + static wrap_nearest_func get_nearest_unorm_wrap(unsigned mode) @@ -1909,10 +1987,10 @@ get_img_filter(const union sp_sampler_key key, /** - * Bind the given texture object and texture cache to the sampler varient. + * Bind the given texture object and texture cache to the sampler variant. */ void -sp_sampler_varient_bind_texture( struct sp_sampler_varient *samp, +sp_sampler_variant_bind_texture( struct sp_sampler_variant *samp, struct softpipe_tex_tile_cache *tex_cache, const struct pipe_resource *texture ) { @@ -1927,20 +2005,20 @@ sp_sampler_varient_bind_texture( struct sp_sampler_varient *samp, void -sp_sampler_varient_destroy( struct sp_sampler_varient *samp ) +sp_sampler_variant_destroy( struct sp_sampler_variant *samp ) { FREE(samp); } /** - * Create a sampler varient for a given set of non-orthogonal state. + * Create a sampler variant for a given set of non-orthogonal state. */ -struct sp_sampler_varient * -sp_create_sampler_varient( const struct pipe_sampler_state *sampler, +struct sp_sampler_variant * +sp_create_sampler_variant( const struct pipe_sampler_state *sampler, const union sp_sampler_key key ) { - struct sp_sampler_varient *samp = CALLOC_STRUCT(sp_sampler_varient); + struct sp_sampler_variant *samp = CALLOC_STRUCT(sp_sampler_variant); if (!samp) return NULL; @@ -2015,7 +2093,7 @@ sp_create_sampler_varient( const struct pipe_sampler_state *sampler, } if (key.bits.target == PIPE_TEXTURE_CUBE) { - samp->base.get_samples = sample_cube; + samp->sample_target = sample_cube; } else { samp->faces[0] = 0; @@ -2026,7 +2104,17 @@ sp_create_sampler_varient( const struct pipe_sampler_state *sampler, /* Skip cube face determination by promoting the compare * function pointer: */ - samp->base.get_samples = samp->compare; + samp->sample_target = samp->compare; + } + + if (key.bits.swizzle_r != PIPE_SWIZZLE_RED || + key.bits.swizzle_g != PIPE_SWIZZLE_GREEN || + key.bits.swizzle_b != PIPE_SWIZZLE_BLUE || + key.bits.swizzle_a != PIPE_SWIZZLE_ALPHA) { + samp->base.get_samples = sample_swizzle; + } + else { + samp->base.get_samples = samp->sample_target; } return samp; diff --git a/src/gallium/drivers/softpipe/sp_tex_sample.h b/src/gallium/drivers/softpipe/sp_tex_sample.h index 6114acf7371..ed99006ab02 100644 --- a/src/gallium/drivers/softpipe/sp_tex_sample.h +++ b/src/gallium/drivers/softpipe/sp_tex_sample.h @@ -32,7 +32,7 @@ #include "tgsi/tgsi_exec.h" -struct sp_sampler_varient; +struct sp_sampler_variant; typedef void (*wrap_nearest_func)(const float s[4], unsigned size, @@ -44,7 +44,7 @@ typedef void (*wrap_linear_func)(const float s[4], int icoord1[4], float w[4]); -typedef float (*compute_lambda_func)(const struct sp_sampler_varient *sampler, +typedef float (*compute_lambda_func)(const struct sp_sampler_variant *sampler, const float s[QUAD_SIZE], const float t[QUAD_SIZE], const float p[QUAD_SIZE]); @@ -64,7 +64,11 @@ union sp_sampler_key { unsigned is_pot:1; unsigned processor:2; unsigned unit:4; - unsigned pad:22; + unsigned swizzle_r:3; + unsigned swizzle_g:3; + unsigned swizzle_b:3; + unsigned swizzle_a:3; + unsigned pad:10; } bits; unsigned value; }; @@ -72,7 +76,7 @@ union sp_sampler_key { /** * Subclass of tgsi_sampler */ -struct sp_sampler_varient +struct sp_sampler_variant { struct tgsi_sampler base; /**< base class */ @@ -113,32 +117,33 @@ struct sp_sampler_varient filter_func mip_filter; filter_func compare; + filter_func sample_target; /* Linked list: */ - struct sp_sampler_varient *next; + struct sp_sampler_variant *next; }; struct sp_sampler; -/* Create a sampler varient for a given set of non-orthogonal state. Currently the +/* Create a sampler variant for a given set of non-orthogonal state. Currently the */ -struct sp_sampler_varient * -sp_create_sampler_varient( const struct pipe_sampler_state *sampler, +struct sp_sampler_variant * +sp_create_sampler_variant( const struct pipe_sampler_state *sampler, const union sp_sampler_key key ); -void sp_sampler_varient_bind_texture( struct sp_sampler_varient *varient, +void sp_sampler_variant_bind_texture( struct sp_sampler_variant *variant, struct softpipe_tex_tile_cache *tex_cache, const struct pipe_resource *tex ); -void sp_sampler_varient_destroy( struct sp_sampler_varient * ); +void sp_sampler_variant_destroy( struct sp_sampler_variant * ); -static INLINE struct sp_sampler_varient * -sp_sampler_varient(const struct tgsi_sampler *sampler) +static INLINE struct sp_sampler_variant * +sp_sampler_variant(const struct tgsi_sampler *sampler) { - return (struct sp_sampler_varient *) sampler; + return (struct sp_sampler_variant *) sampler; } extern void diff --git a/src/gallium/drivers/softpipe/sp_tex_tile_cache.c b/src/gallium/drivers/softpipe/sp_tex_tile_cache.c index e817c0c8cf5..e42015ad498 100644 --- a/src/gallium/drivers/softpipe/sp_tex_tile_cache.c +++ b/src/gallium/drivers/softpipe/sp_tex_tile_cache.c @@ -48,6 +48,9 @@ sp_create_tex_tile_cache( struct pipe_context *pipe ) struct softpipe_tex_tile_cache *tc; uint pos; + /* make sure max texture size works */ + assert((TILE_SIZE << TEX_ADDR_BITS) >= (1 << (SP_MAX_TEXTURE_2D_LEVELS-1))); + tc = CALLOC_STRUCT( softpipe_tex_tile_cache ); if (tc) { tc->pipe = pipe; @@ -260,15 +263,14 @@ sp_find_cached_tile_tex(struct softpipe_tex_tile_cache *tc, } tc->tex_trans = - pipe_get_transfer(tc->pipe, tc->texture, - addr.bits.face, - addr.bits.level, - addr.bits.z, - PIPE_TRANSFER_READ | PIPE_TRANSFER_UNSYNCHRONIZED, - 0, 0, - u_minify(tc->texture->width0, addr.bits.level), - u_minify(tc->texture->height0, addr.bits.level)); - + pipe_get_transfer(tc->pipe, tc->texture, + addr.bits.level, + addr.bits.face + addr.bits.z, + PIPE_TRANSFER_READ | PIPE_TRANSFER_UNSYNCHRONIZED, + 0, 0, + u_minify(tc->texture->width0, addr.bits.level), + u_minify(tc->texture->height0, addr.bits.level)); + tc->tex_trans_map = tc->pipe->transfer_map(tc->pipe, tc->tex_trans); tc->tex_face = addr.bits.face; @@ -276,45 +278,26 @@ sp_find_cached_tile_tex(struct softpipe_tex_tile_cache *tc, tc->tex_z = addr.bits.z; } - /* get tile from the transfer (view into texture) */ + /* get tile from the transfer (view into texture) + * Note we're using the swizzle version of this fuction only because + * we need to pass the texture cache's format explicitly. + */ pipe_get_tile_swizzle(tc->pipe, tc->tex_trans, addr.bits.x * TILE_SIZE, addr.bits.y * TILE_SIZE, TILE_SIZE, TILE_SIZE, - tc->swizzle_r, - tc->swizzle_g, - tc->swizzle_b, - tc->swizzle_a, + PIPE_SWIZZLE_RED, + PIPE_SWIZZLE_GREEN, + PIPE_SWIZZLE_BLUE, + PIPE_SWIZZLE_ALPHA, tc->format, (float *) tile->data.color); + tile->addr = addr; } tc->last_tile = tile; return tile; } - - - -/** - * Return the swizzled border color. - */ -const float * -sp_tex_tile_cache_border_color(struct softpipe_tex_tile_cache *tc, - const float border_color[4]) -{ - float rgba01[6]; - - COPY_4V(rgba01, border_color); - rgba01[PIPE_SWIZZLE_ZERO] = 0.0f; - rgba01[PIPE_SWIZZLE_ONE] = 1.0f; - - tc->swz_border_color[0] = rgba01[tc->swizzle_r]; - tc->swz_border_color[1] = rgba01[tc->swizzle_g]; - tc->swz_border_color[2] = rgba01[tc->swizzle_b]; - tc->swz_border_color[3] = rgba01[tc->swizzle_a]; - - return tc->swz_border_color; -} diff --git a/src/gallium/drivers/softpipe/sp_tex_tile_cache.h b/src/gallium/drivers/softpipe/sp_tex_tile_cache.h index 05f25133daa..2220955b715 100644 --- a/src/gallium/drivers/softpipe/sp_tex_tile_cache.h +++ b/src/gallium/drivers/softpipe/sp_tex_tile_cache.h @@ -30,6 +30,7 @@ #include "pipe/p_compiler.h" +#include "sp_limits.h" struct softpipe_context; @@ -39,22 +40,26 @@ struct softpipe_tex_tile_cache; /** * Cache tile size (width and height). This needs to be a power of two. */ -#define TILE_SIZE 64 +#define TILE_SIZE_LOG2 6 +#define TILE_SIZE (1 << TILE_SIZE_LOG2) -/* If we need to support > 4096, just expand this to be a 64 bit - * union, or consider tiling in Z as well. +#define TEX_ADDR_BITS (SP_MAX_TEXTURE_2D_LEVELS - 1 - TILE_SIZE_LOG2) +#define TEX_Z_BITS (SP_MAX_TEXTURE_2D_LEVELS - 1) + +/** + * Texture tile address as a union for fast compares. */ union tex_tile_address { struct { - unsigned x:6; /* 4096 / TILE_SIZE */ - unsigned y:6; /* 4096 / TILE_SIZE */ - unsigned z:12; /* 4096 -- z not tiled */ + unsigned x:TEX_ADDR_BITS; /* 16K / TILE_SIZE */ + unsigned y:TEX_ADDR_BITS; /* 16K / TILE_SIZE */ + unsigned z:TEX_Z_BITS; /* 16K -- z not tiled */ unsigned face:3; unsigned level:4; unsigned invalid:1; } bits; - unsigned value; + uint64_t value; }; @@ -90,8 +95,6 @@ struct softpipe_tex_tile_cache unsigned format; struct softpipe_tex_cached_tile *last_tile; /**< most recently retrieved tile */ - - float swz_border_color[4]; /**< swizzled border color */ }; @@ -126,10 +129,10 @@ sp_find_cached_tile_tex(struct softpipe_tex_tile_cache *tc, static INLINE union tex_tile_address tex_tile_address( unsigned x, - unsigned y, - unsigned z, - unsigned face, - unsigned level ) + unsigned y, + unsigned z, + unsigned face, + unsigned level ) { union tex_tile_address addr; @@ -139,7 +142,7 @@ tex_tile_address( unsigned x, addr.bits.z = z; addr.bits.face = face; addr.bits.level = level; - + return addr; } @@ -156,10 +159,5 @@ sp_get_cached_tile_tex(struct softpipe_tex_tile_cache *tc, } -const float * -sp_tex_tile_cache_border_color(struct softpipe_tex_tile_cache *tc, - const float border_color[4]); - - #endif /* SP_TEX_TILE_CACHE_H */ diff --git a/src/gallium/drivers/softpipe/sp_texture.c b/src/gallium/drivers/softpipe/sp_texture.c index 4e6123fbd07..509d9982b17 100644 --- a/src/gallium/drivers/softpipe/sp_texture.c +++ b/src/gallium/drivers/softpipe/sp_texture.c @@ -220,23 +220,18 @@ softpipe_resource_get_handle(struct pipe_screen *screen, */ static unsigned sp_get_tex_image_offset(const struct softpipe_resource *spr, - unsigned level, unsigned face, unsigned zslice) + unsigned level, unsigned layer) { const unsigned hgt = u_minify(spr->base.height0, level); const unsigned nblocksy = util_format_get_nblocksy(spr->base.format, hgt); unsigned offset = spr->level_offset[level]; - if (spr->base.target == PIPE_TEXTURE_CUBE) { - assert(zslice == 0); - offset += face * nblocksy * spr->stride[level]; - } - else if (spr->base.target == PIPE_TEXTURE_3D) { - assert(face == 0); - offset += zslice * nblocksy * spr->stride[level]; + if (spr->base.target == PIPE_TEXTURE_CUBE || + spr->base.target == PIPE_TEXTURE_3D) { + offset += layer * nblocksy * spr->stride[level]; } else { - assert(face == 0); - assert(zslice == 0); + assert(layer == 0); } return offset; @@ -247,39 +242,40 @@ sp_get_tex_image_offset(const struct softpipe_resource *spr, * Get a pipe_surface "view" into a texture resource. */ static struct pipe_surface * -softpipe_get_tex_surface(struct pipe_screen *screen, - struct pipe_resource *pt, - unsigned face, unsigned level, unsigned zslice, - unsigned usage) +softpipe_create_surface(struct pipe_context *pipe, + struct pipe_resource *pt, + const struct pipe_surface *surf_tmpl) { - struct softpipe_resource *spr = softpipe_resource(pt); struct pipe_surface *ps; + unsigned level = surf_tmpl->u.tex.level; assert(level <= pt->last_level); + assert(surf_tmpl->u.tex.first_layer == surf_tmpl->u.tex.last_layer); ps = CALLOC_STRUCT(pipe_surface); if (ps) { pipe_reference_init(&ps->reference, 1); pipe_resource_reference(&ps->texture, pt); - ps->format = pt->format; + ps->context = pipe; + ps->format = surf_tmpl->format; ps->width = u_minify(pt->width0, level); ps->height = u_minify(pt->height0, level); - ps->offset = sp_get_tex_image_offset(spr, level, face, zslice); - ps->usage = usage; + ps->usage = surf_tmpl->usage; - ps->face = face; - ps->level = level; - ps->zslice = zslice; + ps->u.tex.level = level; + ps->u.tex.first_layer = surf_tmpl->u.tex.first_layer; + ps->u.tex.last_layer = surf_tmpl->u.tex.last_layer; } return ps; } /** - * Free a pipe_surface which was created with softpipe_get_tex_surface(). + * Free a pipe_surface which was created with softpipe_create_surface(). */ static void -softpipe_tex_surface_destroy(struct pipe_surface *surf) +softpipe_surface_destroy(struct pipe_context *pipe, + struct pipe_surface *surf) { /* Effectively do the texture_update work here - if texture images * needed post-processing to put them into hardware layout, this is @@ -302,21 +298,21 @@ softpipe_tex_surface_destroy(struct pipe_surface *surf) */ static struct pipe_transfer * softpipe_get_transfer(struct pipe_context *pipe, - struct pipe_resource *resource, - struct pipe_subresource sr, - unsigned usage, - const struct pipe_box *box) + struct pipe_resource *resource, + unsigned level, + unsigned usage, + const struct pipe_box *box) { struct softpipe_resource *spr = softpipe_resource(resource); struct softpipe_transfer *spt; assert(resource); - assert(sr.level <= resource->last_level); + assert(level <= resource->last_level); /* make sure the requested region is in the image bounds */ - assert(box->x + box->width <= u_minify(resource->width0, sr.level)); - assert(box->y + box->height <= u_minify(resource->height0, sr.level)); - assert(box->z + box->depth <= u_minify(resource->depth0, sr.level)); + assert(box->x + box->width <= u_minify(resource->width0, level)); + assert(box->y + box->height <= u_minify(resource->height0, level)); + assert(box->z + box->depth <= (u_minify(resource->depth0, level) + resource->array_size - 1)); /* * Transfers, like other pipe operations, must happen in order, so flush the @@ -326,7 +322,7 @@ softpipe_get_transfer(struct pipe_context *pipe, boolean read_only = !(usage & PIPE_TRANSFER_WRITE); boolean do_not_block = !!(usage & PIPE_TRANSFER_DONTBLOCK); if (!softpipe_flush_resource(pipe, resource, - sr.face, sr.level, + level, box->depth > 1 ? -1 : box->z, 0, /* flush_flags */ read_only, TRUE, /* cpu_access */ @@ -343,21 +339,21 @@ softpipe_get_transfer(struct pipe_context *pipe, if (spt) { struct pipe_transfer *pt = &spt->base; enum pipe_format format = resource->format; - const unsigned hgt = u_minify(spr->base.height0, sr.level); + const unsigned hgt = u_minify(spr->base.height0, level); const unsigned nblocksy = util_format_get_nblocksy(format, hgt); pipe_resource_reference(&pt->resource, resource); - pt->sr = sr; + pt->level = level; pt->usage = usage; pt->box = *box; - pt->stride = spr->stride[sr.level]; - pt->slice_stride = pt->stride * nblocksy; + pt->stride = spr->stride[level]; + pt->layer_stride = pt->stride * nblocksy; - spt->offset = sp_get_tex_image_offset(spr, sr.level, sr.face, box->z); + spt->offset = sp_get_tex_image_offset(spr, level, box->z); spt->offset += - box->y / util_format_get_blockheight(format) * spt->base.stride + - box->x / util_format_get_blockwidth(format) * util_format_get_blocksize(format); + box->y / util_format_get_blockheight(format) * spt->base.stride + + box->x / util_format_get_blockwidth(format) * util_format_get_blocksize(format); return pt; } @@ -454,6 +450,7 @@ softpipe_user_buffer_create(struct pipe_screen *screen, spr->base.width0 = bytes; spr->base.height0 = 1; spr->base.depth0 = 1; + spr->base.array_size = 1; spr->userBuffer = TRUE; spr->data = ptr; @@ -471,6 +468,9 @@ softpipe_init_texture_funcs(struct pipe_context *pipe) pipe->transfer_flush_region = u_default_transfer_flush_region; pipe->transfer_inline_write = u_default_transfer_inline_write; + + pipe->create_surface = softpipe_create_surface; + pipe->surface_destroy = softpipe_surface_destroy; } @@ -483,6 +483,4 @@ softpipe_init_screen_texture_funcs(struct pipe_screen *screen) screen->resource_get_handle = softpipe_resource_get_handle; screen->user_buffer_create = softpipe_user_buffer_create; - screen->get_tex_surface = softpipe_get_tex_surface; - screen->tex_surface_destroy = softpipe_tex_surface_destroy; } diff --git a/src/gallium/drivers/softpipe/sp_texture.h b/src/gallium/drivers/softpipe/sp_texture.h index 6b205dc5329..5603110eeb3 100644 --- a/src/gallium/drivers/softpipe/sp_texture.h +++ b/src/gallium/drivers/softpipe/sp_texture.h @@ -30,10 +30,7 @@ #include "pipe/p_state.h" - - -#define SP_MAX_TEXTURE_2D_LEVELS 13 /* 4K x 4K */ -#define SP_MAX_TEXTURE_3D_LEVELS 9 /* 512 x 512 x 512 */ +#include "sp_limits.h" struct pipe_context; diff --git a/src/gallium/drivers/softpipe/sp_tile_cache.c b/src/gallium/drivers/softpipe/sp_tile_cache.c index aa76b8aa1ec..480860af63b 100644 --- a/src/gallium/drivers/softpipe/sp_tile_cache.c +++ b/src/gallium/drivers/softpipe/sp_tile_cache.c @@ -92,6 +92,10 @@ sp_create_tile_cache( struct pipe_context *pipe ) maxTexSize = 1 << (maxLevels - 1); assert(MAX_WIDTH >= maxTexSize); + assert(sizeof(union tile_address) == 4); + + assert((TILE_SIZE << TILE_ADDR_BITS) >= MAX_WIDTH); + tc = CALLOC_STRUCT( softpipe_tile_cache ); if (tc) { tc->pipe = pipe; @@ -170,11 +174,11 @@ sp_tile_cache_set_surface(struct softpipe_tile_cache *tc, tc->surface = ps; if (ps) { - tc->transfer = pipe_get_transfer(pipe, ps->texture, ps->face, - ps->level, ps->zslice, - PIPE_TRANSFER_READ_WRITE | - PIPE_TRANSFER_UNSYNCHRONIZED, - 0, 0, ps->width, ps->height); + tc->transfer = pipe_get_transfer(pipe, ps->texture, + ps->u.tex.level, ps->u.tex.first_layer, + PIPE_TRANSFER_READ_WRITE | + PIPE_TRANSFER_UNSYNCHRONIZED, + 0, 0, ps->width, ps->height); tc->depth_stencil = (ps->format == PIPE_FORMAT_Z24_UNORM_S8_USCALED || ps->format == PIPE_FORMAT_Z24X8_UNORM || diff --git a/src/gallium/drivers/softpipe/sp_tile_cache.h b/src/gallium/drivers/softpipe/sp_tile_cache.h index 4151a47c323..68140b1d2f9 100644 --- a/src/gallium/drivers/softpipe/sp_tile_cache.h +++ b/src/gallium/drivers/softpipe/sp_tile_cache.h @@ -30,6 +30,7 @@ #include "pipe/p_compiler.h" +#include "sp_texture.h" struct softpipe_tile_cache; @@ -38,18 +39,22 @@ struct softpipe_tile_cache; /** * Cache tile size (width and height). This needs to be a power of two. */ -#define TILE_SIZE 64 +#define TILE_SIZE_LOG2 6 +#define TILE_SIZE (1 << TILE_SIZE_LOG2) -/* If we need to support > 4096, just expand this to be a 64 bit - * union, or consider tiling in Z as well. +#define TILE_ADDR_BITS (SP_MAX_TEXTURE_2D_LEVELS - 1 - TILE_SIZE_LOG2) + + +/** + * Surface tile address as a union for fast compares. */ union tile_address { struct { - unsigned x:6; /* 4096 / TILE_SIZE */ - unsigned y:6; /* 4096 / TILE_SIZE */ + unsigned x:TILE_ADDR_BITS; /* 16K / TILE_SIZE */ + unsigned y:TILE_ADDR_BITS; /* 16K / TILE_SIZE */ unsigned invalid:1; - unsigned pad:19; + unsigned pad:15; } bits; unsigned value; }; @@ -70,11 +75,6 @@ struct softpipe_cached_tile #define NUM_ENTRIES 50 -/** XXX move these */ -#define MAX_WIDTH 4096 -#define MAX_HEIGHT 4096 - - struct softpipe_tile_cache { struct pipe_context *pipe; diff --git a/src/gallium/drivers/svga/SConscript b/src/gallium/drivers/svga/SConscript index 12ce4732d15..5455ed07573 100644 --- a/src/gallium/drivers/svga/SConscript +++ b/src/gallium/drivers/svga/SConscript @@ -73,4 +73,6 @@ svga = env.ConvenienceLibrary( source = sources, ) +env.Alias('svga', svga) + Export('svga') diff --git a/src/gallium/drivers/svga/svga_cmd.c b/src/gallium/drivers/svga/svga_cmd.c index e975f3b02fa..05eab8a517d 100644 --- a/src/gallium/drivers/svga/svga_cmd.c +++ b/src/gallium/drivers/svga/svga_cmd.c @@ -455,8 +455,8 @@ SVGA3D_SurfaceDMA(struct svga_winsys_context *swc, cmd->guest.pitch = st->base.stride; swc->surface_relocation(swc, &cmd->host.sid, texture->handle, surface_flags); - cmd->host.face = st->base.sr.face; /* PIPE_TEX_FACE_* and SVGA3D_CUBEFACE_* match */ - cmd->host.mipmap = st->base.sr.level; + cmd->host.face = st->face; /* PIPE_TEX_FACE_* and SVGA3D_CUBEFACE_* match */ + cmd->host.mipmap = st->base.level; cmd->transfer = transfer; diff --git a/src/gallium/drivers/svga/svga_context.c b/src/gallium/drivers/svga/svga_context.c index cd3f6b89825..1e513f1039f 100644 --- a/src/gallium/drivers/svga/svga_context.c +++ b/src/gallium/drivers/svga/svga_context.c @@ -109,6 +109,7 @@ struct pipe_context *svga_context_create( struct pipe_screen *screen, svga_init_vertex_functions(svga); svga_init_constbuffer_functions(svga); svga_init_query_functions(svga); + svga_init_surface_functions(svga); /* debug */ diff --git a/src/gallium/drivers/svga/svga_context.h b/src/gallium/drivers/svga/svga_context.h index 1fb5a04887f..d4970908b1e 100644 --- a/src/gallium/drivers/svga/svga_context.h +++ b/src/gallium/drivers/svga/svga_context.h @@ -288,6 +288,11 @@ struct svga_sw_state boolean need_swvfetch; boolean need_pipeline; boolean need_swtnl; + + /* Flag to make sure that need sw is on while + * updating state within a swtnl call. + */ + boolean in_swtnl_draw; }; @@ -422,6 +427,7 @@ void svga_init_vertex_functions( struct svga_context *svga ); void svga_init_constbuffer_functions( struct svga_context *svga ); void svga_init_draw_functions( struct svga_context *svga ); void svga_init_query_functions( struct svga_context *svga ); +void svga_init_surface_functions(struct svga_context *svga); void svga_cleanup_vertex_state( struct svga_context *svga ); void svga_cleanup_tss_binding( struct svga_context *svga ); diff --git a/src/gallium/drivers/svga/svga_draw.c b/src/gallium/drivers/svga/svga_draw.c index 81dd4778d0a..97cbac447d6 100644 --- a/src/gallium/drivers/svga/svga_draw.c +++ b/src/gallium/drivers/svga/svga_draw.c @@ -315,7 +315,6 @@ enum pipe_error svga_hwtnl_prim( struct svga_hwtnl *hwtnl, break; } - assert(!stride || width <= stride); if (max_index != ~0) { assert(offset + (index_bias + max_index) * stride + width <= size); } diff --git a/src/gallium/drivers/svga/svga_draw_arrays.c b/src/gallium/drivers/svga/svga_draw_arrays.c index da33fae62f1..be0e7abe21b 100644 --- a/src/gallium/drivers/svga/svga_draw_arrays.c +++ b/src/gallium/drivers/svga/svga_draw_arrays.c @@ -65,14 +65,14 @@ static enum pipe_error generate_indices( struct svga_hwtnl *hwtnl, generate( nr, dst_map ); - pipe_buffer_unmap( pipe, dst, transfer ); + pipe_buffer_unmap( pipe, transfer ); *out_buf = dst; return PIPE_OK; fail: if (dst_map) - pipe_buffer_unmap( pipe, dst, transfer ); + pipe_buffer_unmap( pipe, transfer ); if (dst) pipe->screen->resource_destroy( pipe->screen, dst ); diff --git a/src/gallium/drivers/svga/svga_draw_elements.c b/src/gallium/drivers/svga/svga_draw_elements.c index c4579177b77..83527c6ef49 100644 --- a/src/gallium/drivers/svga/svga_draw_elements.c +++ b/src/gallium/drivers/svga/svga_draw_elements.c @@ -72,18 +72,18 @@ translate_indices( struct svga_hwtnl *hwtnl, nr, dst_map ); - pipe_buffer_unmap( pipe, src, src_transfer ); - pipe_buffer_unmap( pipe, dst, dst_transfer ); + pipe_buffer_unmap( pipe, src_transfer ); + pipe_buffer_unmap( pipe, dst_transfer ); *out_buf = dst; return PIPE_OK; fail: if (src_map) - pipe_buffer_unmap( pipe, src, src_transfer ); + pipe_buffer_unmap( pipe, src_transfer ); if (dst_map) - pipe_buffer_unmap( pipe, dst, dst_transfer ); + pipe_buffer_unmap( pipe, dst_transfer ); if (dst) pipe->screen->resource_destroy( pipe->screen, dst ); @@ -120,14 +120,17 @@ svga_hwtnl_simple_draw_range_elements( struct svga_hwtnl *hwtnl, if (index_buffer && svga_buffer_is_user_buffer(index_buffer)) { + boolean flushed; assert( index_buffer->width0 >= index_offset + count * index_size ); ret = u_upload_buffer( hwtnl->upload_ib, + 0, index_offset, count * index_size, index_buffer, &index_offset, - &upload_buffer ); + &upload_buffer, + &flushed ); if (ret) goto done; diff --git a/src/gallium/drivers/svga/svga_pipe_blit.c b/src/gallium/drivers/svga/svga_pipe_blit.c index ca036a6463b..426698806c8 100644 --- a/src/gallium/drivers/svga/svga_pipe_blit.c +++ b/src/gallium/drivers/svga/svga_pipe_blit.c @@ -32,37 +32,40 @@ #define FILE_DEBUG_FLAG DEBUG_BLIT -/* XXX I got my doubts about this, should maybe use svga_texture_copy_handle directly? */ +/* XXX still have doubts about this... */ static void svga_surface_copy(struct pipe_context *pipe, struct pipe_resource* dst_tex, - struct pipe_subresource subdst, + unsigned dst_level, unsigned dstx, unsigned dsty, unsigned dstz, struct pipe_resource* src_tex, - struct pipe_subresource subsrc, - unsigned srcx, unsigned srcy, unsigned srcz, - unsigned width, unsigned height) -{ + unsigned src_level, + const struct pipe_box *src_box) + { struct svga_context *svga = svga_context(pipe); - struct pipe_screen *screen = pipe->screen; + struct svga_texture *stex = svga_texture(src_tex); + struct svga_texture *dtex = svga_texture(dst_tex); +/* struct pipe_screen *screen = pipe->screen; SVGA3dCopyBox *box; enum pipe_error ret; - struct pipe_surface *srcsurf, *dstsurf; + struct pipe_surface *srcsurf, *dstsurf;*/ + unsigned dst_face, dst_z, src_face, src_z; svga_hwtnl_flush_retry( svga ); +#if 0 srcsurf = screen->get_tex_surface(screen, src_tex, - subsrc.face, subsrc.level, srcz, + src_level, src_box->z, src_box->z, PIPE_BIND_SAMPLER_VIEW); dstsurf = screen->get_tex_surface(screen, dst_tex, - subdst.face, subdst.level, dstz, + dst_level, dst_box->z, dst_box->z, PIPE_BIND_RENDER_TARGET); SVGA_DBG(DEBUG_DMA, "blit to sid %p (%d,%d), from sid %p (%d,%d) sz %dx%d\n", svga_surface(dstsurf)->handle, dstx, dsty, svga_surface(srcsurf)->handle, - srcx, srcy, + src_box->x, src_box->y, width, height); ret = SVGA3D_BeginSurfaceCopy(svga->swc, @@ -88,8 +91,8 @@ static void svga_surface_copy(struct pipe_context *pipe, box->w = width; box->h = height; box->d = 1; - box->srcx = srcx; - box->srcy = srcy; + box->srcx = src_box->x; + box->srcy = src_box->y; box->srcz = 0; SVGA_FIFOCommitAll(svga->swc); @@ -100,6 +103,37 @@ static void svga_surface_copy(struct pipe_context *pipe, pipe_surface_reference(&srcsurf, NULL); pipe_surface_reference(&dstsurf, NULL); +#else + if (src_tex->target == PIPE_TEXTURE_CUBE) { + src_face = src_box->z; + src_z = 0; + assert(src_box->depth == 1); + } + else { + src_face = 0; + src_z = src_box->z; + } + /* different src/dst type???*/ + if (dst_tex->target == PIPE_TEXTURE_CUBE) { + dst_face = dstz; + dst_z = 0; + assert(src_box->depth == 1); + } + else { + dst_face = 0; + dst_z = dstz; + } + svga_texture_copy_handle(svga, + stex->handle, + src_box->x, src_box->y, src_z, + src_level, src_face, + dtex->handle, + dstx, dsty, dst_z, + dst_level, dst_face, + src_box->width, src_box->height, src_box->depth); + +#endif + } diff --git a/src/gallium/drivers/svga/svga_pipe_rasterizer.c b/src/gallium/drivers/svga/svga_pipe_rasterizer.c index 660eb0757a6..e97b4e57415 100644 --- a/src/gallium/drivers/svga/svga_pipe_rasterizer.c +++ b/src/gallium/drivers/svga/svga_pipe_rasterizer.c @@ -68,7 +68,7 @@ svga_create_rasterizer_state(struct pipe_context *pipe, /* need this for draw module. */ rast->templ = *templ; - /* light_twoside - XXX: need fragment shader varient */ + /* light_twoside - XXX: need fragment shader variant */ /* poly_smooth - XXX: no fallback available */ /* poly_stipple_enable - draw module */ /* sprite_coord_enable - ? */ diff --git a/src/gallium/drivers/svga/svga_resource_buffer.c b/src/gallium/drivers/svga/svga_resource_buffer.c index 198d4013328..f12e2b68627 100644 --- a/src/gallium/drivers/svga/svga_resource_buffer.c +++ b/src/gallium/drivers/svga/svga_resource_buffer.c @@ -53,8 +53,8 @@ svga_buffer_needs_hw_storage(unsigned usage) static unsigned int svga_buffer_is_referenced( struct pipe_context *pipe, - struct pipe_resource *buf, - unsigned face, unsigned level) + struct pipe_resource *buf, + unsigned level, int layer) { struct svga_screen *ss = svga_screen(pipe->screen); struct svga_buffer *sbuf = svga_buffer(buf); @@ -337,6 +337,7 @@ svga_user_buffer_create(struct pipe_screen *screen, sbuf->b.b.width0 = bytes; sbuf->b.b.height0 = 1; sbuf->b.b.depth0 = 1; + sbuf->b.b.array_size = 1; sbuf->swbuf = ptr; sbuf->user = TRUE; diff --git a/src/gallium/drivers/svga/svga_resource_texture.c b/src/gallium/drivers/svga/svga_resource_texture.c index 26eb03a895a..7c9e600b9f4 100644 --- a/src/gallium/drivers/svga/svga_resource_texture.c +++ b/src/gallium/drivers/svga/svga_resource_texture.c @@ -50,8 +50,8 @@ static unsigned int svga_texture_is_referenced( struct pipe_context *pipe, - struct pipe_resource *texture, - unsigned face, unsigned level) + struct pipe_resource *texture, + unsigned level, int layer) { struct svga_texture *tex = svga_texture(texture); struct svga_screen *ss = svga_screen(pipe->screen); @@ -171,20 +171,7 @@ svga_transfer_dma_band(struct svga_context *svga, struct svga_texture *texture = svga_texture(st->base.resource); SVGA3dCopyBox box; enum pipe_error ret; - - SVGA_DBG(DEBUG_DMA, "dma %s sid %p, face %u, (%u, %u, %u) - (%u, %u, %u), %ubpp\n", - transfer == SVGA3D_WRITE_HOST_VRAM ? "to" : "from", - texture->handle, - st->base.sr.face, - st->base.box.x, - y, - st->base.box.z, - st->base.box.x + st->base.box.width, - y + h, - st->base.box.z + 1, - util_format_get_blocksize(texture->b.b.format) * 8 / - (util_format_get_blockwidth(texture->b.b.format)*util_format_get_blockheight(texture->b.b.format))); - + box.x = st->base.box.x; box.y = y; box.z = st->base.box.z; @@ -195,6 +182,26 @@ svga_transfer_dma_band(struct svga_context *svga, box.srcy = srcy; box.srcz = 0; + if (st->base.resource->target == PIPE_TEXTURE_CUBE) { + st->face = st->base.box.z; + box.z = 0; + } + else + st->face = 0; + + SVGA_DBG(DEBUG_DMA, "dma %s sid %p, face %u, (%u, %u, %u) - (%u, %u, %u), %ubpp\n", + transfer == SVGA3D_WRITE_HOST_VRAM ? "to" : "from", + texture->handle, + st->face, + st->base.box.x, + y, + box.z, + st->base.box.x + st->base.box.width, + y + h, + box.z + 1, + util_format_get_blocksize(texture->b.b.format) * 8 / + (util_format_get_blockwidth(texture->b.b.format)*util_format_get_blockheight(texture->b.b.format))); + ret = SVGA3D_SurfaceDMA(svga->swc, st, transfer, &box, 1); if(ret != PIPE_OK) { svga->swc->flush(svga->swc, NULL); @@ -213,7 +220,7 @@ svga_transfer_dma(struct svga_context *svga, struct svga_screen *screen = svga_screen(texture->b.b.screen); struct svga_winsys_screen *sws = screen->sws; struct pipe_fence_handle *fence = NULL; - + if (transfer == SVGA3D_READ_HOST_VRAM) { SVGA_DBG(DEBUG_PERF, "%s: readback transfer\n", __FUNCTION__); } @@ -221,7 +228,7 @@ svga_transfer_dma(struct svga_context *svga, if(!st->swbuf) { /* Do the DMA transfer in a single go */ - + svga_transfer_dma_band(svga, st, transfer, st->base.box.y, st->base.box.height, 0); if(transfer == SVGA3D_READ_HOST_VRAM) { @@ -245,12 +252,12 @@ svga_transfer_dma(struct svga_context *svga, /* Transfer band must be aligned to pixel block boundaries */ assert(y % blockheight == 0); assert(h % blockheight == 0); - + offset = y * st->base.stride / blockheight; length = h * st->base.stride / blockheight; sw = (uint8_t *)st->swbuf + offset; - + if(transfer == SVGA3D_WRITE_HOST_VRAM) { /* Wait for the previous DMAs to complete */ /* TODO: keep one DMA (at half the size) in the background */ @@ -267,9 +274,9 @@ svga_transfer_dma(struct svga_context *svga, sws->buffer_unmap(sws, st->hwbuf); } } - + svga_transfer_dma_band(svga, st, transfer, y, h, srcy); - + if(transfer == SVGA3D_READ_HOST_VRAM) { svga_context_flush(svga, &fence); sws->fence_finish(sws, fence, 0); @@ -336,10 +343,10 @@ svga_texture_destroy(struct pipe_screen *screen, */ static struct pipe_transfer * svga_texture_get_transfer(struct pipe_context *pipe, - struct pipe_resource *texture, - struct pipe_subresource sr, - unsigned usage, - const struct pipe_box *box) + struct pipe_resource *texture, + unsigned level, + unsigned usage, + const struct pipe_box *box) { struct svga_context *svga = svga_context(pipe); struct svga_screen *ss = svga_screen(pipe->screen); @@ -352,19 +359,20 @@ svga_texture_get_transfer(struct pipe_context *pipe, if (usage & PIPE_TRANSFER_MAP_DIRECTLY) return NULL; + assert(box->depth == 1); st = CALLOC_STRUCT(svga_transfer); if (!st) return NULL; - + pipe_resource_reference(&st->base.resource, texture); - st->base.sr = sr; + st->base.level = level; st->base.usage = usage; st->base.box = *box; st->base.stride = nblocksx*util_format_get_blocksize(texture->format); - st->base.slice_stride = 0; + st->base.layer_stride = 0; st->hw_nblocksy = nblocksy; - + st->hwbuf = svga_winsys_buffer_create(svga, 1, 0, @@ -391,7 +399,7 @@ svga_texture_get_transfer(struct pipe_context *pipe, if(!st->swbuf) goto no_swbuf; } - + if (usage & PIPE_TRANSFER_READ) svga_transfer_dma(svga, st, SVGA3D_READ_HOST_VRAM); @@ -454,8 +462,11 @@ svga_texture_transfer_destroy(struct pipe_context *pipe, if (st->base.usage & PIPE_TRANSFER_WRITE) { svga_transfer_dma(svga, st, SVGA3D_WRITE_HOST_VRAM); ss->texture_timestamp++; - tex->view_age[transfer->sr.level] = ++(tex->age); - tex->defined[transfer->sr.face][transfer->sr.level] = TRUE; + tex->view_age[transfer->level] = ++(tex->age); + if (transfer->resource->target == PIPE_TEXTURE_CUBE) + tex->defined[transfer->box.z][transfer->level] = TRUE; + else + tex->defined[0][transfer->level] = TRUE; } pipe_resource_reference(&st->base.resource, NULL); @@ -490,7 +501,7 @@ svga_texture_create(struct pipe_screen *screen, { struct svga_screen *svgascreen = svga_screen(screen); struct svga_texture *tex = CALLOC_STRUCT(svga_texture); - + if (!tex) goto error1; @@ -507,7 +518,7 @@ svga_texture_create(struct pipe_screen *screen, tex->key.size.width = template->width0; tex->key.size.height = template->height0; tex->key.size.depth = template->depth0; - + if(template->target == PIPE_TEXTURE_CUBE) { tex->key.flags |= SVGA3D_SURFACE_CUBEMAP; tex->key.numFaces = 6; diff --git a/src/gallium/drivers/svga/svga_resource_texture.h b/src/gallium/drivers/svga/svga_resource_texture.h index 631937f2eb0..9a2911c2a95 100644 --- a/src/gallium/drivers/svga/svga_resource_texture.h +++ b/src/gallium/drivers/svga/svga_resource_texture.h @@ -85,6 +85,8 @@ struct svga_transfer { struct pipe_transfer base; + unsigned face; + struct svga_winsys_buffer *hwbuf; /* Height of the hardware buffer in pixel blocks */ diff --git a/src/gallium/drivers/svga/svga_screen.c b/src/gallium/drivers/svga/svga_screen.c index 666b498d145..d0f42c614c9 100644 --- a/src/gallium/drivers/svga/svga_screen.c +++ b/src/gallium/drivers/svga/svga_screen.c @@ -35,7 +35,6 @@ #include "svga_resource_texture.h" #include "svga_resource.h" #include "svga_debug.h" -#include "svga_surface.h" #include "svga3d_shaderdefs.h" @@ -499,7 +498,6 @@ svga_screen_create(struct svga_winsys_screen *sws) screen->fence_finish = svga_fence_finish; svgascreen->sws = sws; - svga_screen_init_surface_functions(screen); svga_init_screen_resource_functions(svgascreen); svgascreen->use_ps30 = diff --git a/src/gallium/drivers/svga/svga_state_constants.c b/src/gallium/drivers/svga/svga_state_constants.c index 97c818cd379..daf1024fd02 100644 --- a/src/gallium/drivers/svga/svga_state_constants.c +++ b/src/gallium/drivers/svga/svga_state_constants.c @@ -110,7 +110,7 @@ static int emit_consts( struct svga_context *svga, done: if (data) - pipe_buffer_unmap(&svga->pipe, svga->curr.cb[unit], transfer); + pipe_buffer_unmap(&svga->pipe, transfer); return ret; } diff --git a/src/gallium/drivers/svga/svga_state_fs.c b/src/gallium/drivers/svga/svga_state_fs.c index ad6f2947137..9c04adec8ee 100644 --- a/src/gallium/drivers/svga/svga_state_fs.c +++ b/src/gallium/drivers/svga/svga_state_fs.c @@ -136,7 +136,7 @@ static int make_fs_key( const struct svga_context *svga, /* The blend workaround for simulating logicop xor behaviour * requires that the incoming fragment color be white. This change - * achieves that by creating a varient of the current fragment + * achieves that by creating a variant of the current fragment * shader that overrides all output colors with 1,1,1,1 * * This will work for most shaders, including those containing diff --git a/src/gallium/drivers/svga/svga_state_need_swtnl.c b/src/gallium/drivers/svga/svga_state_need_swtnl.c index d34d68f5350..8ba5ac8cdb4 100644 --- a/src/gallium/drivers/svga/svga_state_need_swtnl.c +++ b/src/gallium/drivers/svga/svga_state_need_swtnl.c @@ -114,7 +114,7 @@ static int update_need_pipeline( struct svga_context *svga, /* SVGA_NEW_RAST, SVGA_NEW_REDUCED_PRIMITIVE */ if (svga->curr.rast->need_pipeline & (1 << svga->curr.reduced_prim)) { - SVGA_DBG(DEBUG_SWTNL, "%s: rast need_pipeline (%d) & prim (%x)\n", + SVGA_DBG(DEBUG_SWTNL, "%s: rast need_pipeline (0x%x) & prim (0x%x)\n", __FUNCTION__, svga->curr.rast->need_pipeline, (1 << svga->curr.reduced_prim) ); @@ -175,9 +175,17 @@ static int update_need_swtnl( struct svga_context *svga, need_swtnl = 1; } + /* + * Some state changes the draw module does makes us belive we + * we don't need swtnl. This causes the vdecl code to pickup + * the wrong buffers and vertex formats. Try trivial/line-wide. + */ + if (svga->state.sw.in_swtnl_draw) + need_swtnl = 1; + if (need_swtnl != svga->state.sw.need_swtnl) { SVGA_DBG(DEBUG_SWTNL|DEBUG_PERF, - "%s need_swvfetch: %s, need_pipeline %s\n", + "%s: need_swvfetch %s, need_pipeline %s\n", __FUNCTION__, svga->state.sw.need_swvfetch ? "true" : "false", svga->state.sw.need_pipeline ? "true" : "false"); diff --git a/src/gallium/drivers/svga/svga_state_tss.c b/src/gallium/drivers/svga/svga_state_tss.c index 4a50b19474c..f8b269a101e 100644 --- a/src/gallium/drivers/svga/svga_state_tss.c +++ b/src/gallium/drivers/svga/svga_state_tss.c @@ -238,7 +238,6 @@ update_tss(struct svga_context *svga, // TEXCOORDINDEX -- hopefully not needed if (svga->curr.tex_flags.flag_1d & (1 << i)) { - debug_printf("wrap 1d tex %d\n", i); EMIT_TS(svga, i, SVGA3D_TEX_ADDRESS_WRAP, ADDRESSV, fail); } else diff --git a/src/gallium/drivers/svga/svga_state_vdecl.c b/src/gallium/drivers/svga/svga_state_vdecl.c index 3af7bf2b358..958d00393f2 100644 --- a/src/gallium/drivers/svga/svga_state_vdecl.c +++ b/src/gallium/drivers/svga/svga_state_vdecl.c @@ -57,12 +57,14 @@ upload_user_buffers( struct svga_context *svga ) struct svga_buffer *buffer = svga_buffer(svga->curr.vb[i].buffer); if (!buffer->uploaded.buffer) { + boolean flushed; ret = u_upload_buffer( svga->upload_vb, - 0, + 0, 0, buffer->b.b.width0, &buffer->b.b, &buffer->uploaded.offset, - &buffer->uploaded.buffer ); + &buffer->uploaded.buffer, + &flushed); if (ret) return ret; diff --git a/src/gallium/drivers/svga/svga_state_vs.c b/src/gallium/drivers/svga/svga_state_vs.c index 5133c70593c..6682a1efe66 100644 --- a/src/gallium/drivers/svga/svga_state_vs.c +++ b/src/gallium/drivers/svga/svga_state_vs.c @@ -233,9 +233,7 @@ static int update_zero_stride( struct svga_context *svga, translate->run(translate, 0, 1, 0, svga->curr.zero_stride_constants); - pipe_buffer_unmap(&svga->pipe, - vbuffer->buffer, - transfer); + pipe_buffer_unmap(&svga->pipe, transfer); translate->release(translate); } diff --git a/src/gallium/drivers/svga/svga_surface.c b/src/gallium/drivers/svga/svga_surface.c index b21dc5fd9af..3e4bed76c05 100644 --- a/src/gallium/drivers/svga/svga_surface.c +++ b/src/gallium/drivers/svga/svga_surface.c @@ -179,36 +179,50 @@ svga_texture_view_surface(struct pipe_context *pipe, static struct pipe_surface * -svga_get_tex_surface(struct pipe_screen *screen, - struct pipe_resource *pt, - unsigned face, unsigned level, unsigned zslice, - unsigned flags) +svga_create_surface(struct pipe_context *pipe, + struct pipe_resource *pt, + const struct pipe_surface *surf_tmpl) { struct svga_texture *tex = svga_texture(pt); + struct pipe_screen *screen = pipe->screen; struct svga_surface *s; - boolean render = (flags & (PIPE_BIND_RENDER_TARGET | - PIPE_BIND_DEPTH_STENCIL)) ? TRUE : FALSE; + unsigned face, zslice; + /* XXX surfaces should only be used for rendering purposes nowadays */ + boolean render = (surf_tmpl->usage & (PIPE_BIND_RENDER_TARGET | + PIPE_BIND_DEPTH_STENCIL)) ? TRUE : FALSE; boolean view = FALSE; SVGA3dSurfaceFormat format; + assert(surf_tmpl->u.tex.first_layer == surf_tmpl->u.tex.last_layer); + s = CALLOC_STRUCT(svga_surface); if (!s) return NULL; + if (pt->target == PIPE_TEXTURE_CUBE) { + face = surf_tmpl->u.tex.first_layer; + zslice = 0; + } + else { + face = 0; + zslice = surf_tmpl->u.tex.first_layer; + } + pipe_reference_init(&s->base.reference, 1); pipe_resource_reference(&s->base.texture, pt); - s->base.format = pt->format; - s->base.width = u_minify(pt->width0, level); - s->base.height = u_minify(pt->height0, level); - s->base.usage = flags; - s->base.level = level; - s->base.face = face; - s->base.zslice = zslice; + s->base.context = pipe; + s->base.format = surf_tmpl->format; + s->base.width = u_minify(pt->width0, surf_tmpl->u.tex.level); + s->base.height = u_minify(pt->height0, surf_tmpl->u.tex.level); + s->base.usage = surf_tmpl->usage; + s->base.u.tex.level = surf_tmpl->u.tex.level; + s->base.u.tex.first_layer = surf_tmpl->u.tex.first_layer; + s->base.u.tex.last_layer = surf_tmpl->u.tex.last_layer; if (!render) - format = svga_translate_format(pt->format); + format = svga_translate_format(surf_tmpl->format); else - format = svga_translate_format_render(pt->format); + format = svga_translate_format_render(surf_tmpl->format); assert(format != SVGA3D_FORMAT_INVALID); @@ -217,11 +231,11 @@ svga_get_tex_surface(struct pipe_screen *screen, /* Currently only used for compressed textures */ if (render && - format != svga_translate_format(pt->format)) { + format != svga_translate_format(surf_tmpl->format)) { view = TRUE; } - if (level != 0 && + if (surf_tmpl->u.tex.level != 0 && svga_screen(screen)->debug.force_level_surface_view) view = TRUE; @@ -233,22 +247,22 @@ svga_get_tex_surface(struct pipe_screen *screen, if (view) { SVGA_DBG(DEBUG_VIEWS, "svga: Surface view: yes %p, level %u face %u z %u, %p\n", - pt, level, face, zslice, s); + pt, surf_tmpl->u.tex.level, face, zslice, s); - s->handle = svga_texture_view_surface(NULL, tex, format, level, 1, face, zslice, - &s->key); + s->handle = svga_texture_view_surface(NULL, tex, format, surf_tmpl->u.tex.level, + 1, face, zslice, &s->key); s->real_face = 0; s->real_level = 0; s->real_zslice = 0; } else { SVGA_DBG(DEBUG_VIEWS, "svga: Surface view: no %p, level %u, face %u, z %u, %p\n", - pt, level, face, zslice, s); + pt, surf_tmpl->u.tex.level, face, zslice, s); memset(&s->key, 0, sizeof s->key); s->handle = tex->handle; s->real_face = face; - s->real_level = level; s->real_zslice = zslice; + s->real_level = surf_tmpl->u.tex.level; } return &s->base; @@ -256,7 +270,8 @@ svga_get_tex_surface(struct pipe_screen *screen, static void -svga_tex_surface_destroy(struct pipe_surface *surf) +svga_surface_destroy(struct pipe_context *pipe, + struct pipe_surface *surf) { struct svga_surface *s = svga_surface(surf); struct svga_texture *t = svga_texture(surf->texture); @@ -282,8 +297,13 @@ svga_mark_surface_dirty(struct pipe_surface *surf) s->dirty = TRUE; - if (s->handle == tex->handle) - tex->defined[surf->face][surf->level] = TRUE; + if (s->handle == tex->handle) { + /* hmm so 3d textures always have all their slices marked ? */ + if (surf->texture->target == PIPE_TEXTURE_CUBE) + tex->defined[surf->u.tex.first_layer][surf->u.tex.level] = TRUE; + else + tex->defined[0][surf->u.tex.level] = TRUE; + } else { /* this will happen later in svga_propagate_surface */ } @@ -314,22 +334,32 @@ svga_propagate_surface(struct pipe_context *pipe, struct pipe_surface *surf) struct svga_surface *s = svga_surface(surf); struct svga_texture *tex = svga_texture(surf->texture); struct svga_screen *ss = svga_screen(surf->texture->screen); + unsigned zslice, face; if (!s->dirty) return; + if (surf->texture->target == PIPE_TEXTURE_CUBE) { + zslice = 0; + face = surf->u.tex.first_layer; + } + else { + zslice = surf->u.tex.first_layer; + face = 0; + } + s->dirty = FALSE; ss->texture_timestamp++; - tex->view_age[surf->level] = ++(tex->age); + tex->view_age[surf->u.tex.level] = ++(tex->age); if (s->handle != tex->handle) { - SVGA_DBG(DEBUG_VIEWS, "svga: Surface propagate: tex %p, level %u, from %p\n", tex, surf->level, surf); + SVGA_DBG(DEBUG_VIEWS, "svga: Surface propagate: tex %p, level %u, from %p\n", tex, surf->u.tex.level, surf); svga_texture_copy_handle(svga_context(pipe), s->handle, 0, 0, 0, s->real_level, s->real_face, - tex->handle, 0, 0, surf->zslice, surf->level, surf->face, - u_minify(tex->b.b.width0, surf->level), - u_minify(tex->b.b.height0, surf->level), 1); - tex->defined[surf->face][surf->level] = TRUE; + tex->handle, 0, 0, zslice, surf->u.tex.level, face, + u_minify(tex->b.b.width0, surf->u.tex.level), + u_minify(tex->b.b.height0, surf->u.tex.level), 1); + tex->defined[face][surf->u.tex.level] = TRUE; } } @@ -351,9 +381,9 @@ svga_surface_needs_propagation(struct pipe_surface *surf) void -svga_screen_init_surface_functions(struct pipe_screen *screen) +svga_init_surface_functions(struct svga_context *svga) { - screen->get_tex_surface = svga_get_tex_surface; - screen->tex_surface_destroy = svga_tex_surface_destroy; + svga->pipe.create_surface = svga_create_surface; + svga->pipe.surface_destroy = svga_surface_destroy; } diff --git a/src/gallium/drivers/svga/svga_surface.h b/src/gallium/drivers/svga/svga_surface.h index 13bd5b19b61..afb8326e1f3 100644 --- a/src/gallium/drivers/svga/svga_surface.h +++ b/src/gallium/drivers/svga/svga_surface.h @@ -90,7 +90,4 @@ svga_surface(struct pipe_surface *surface) return (struct svga_surface *)surface; } -void -svga_screen_init_surface_functions(struct pipe_screen *screen); - #endif diff --git a/src/gallium/drivers/svga/svga_swtnl_backend.c b/src/gallium/drivers/svga/svga_swtnl_backend.c index ff3da842729..d5db6bf641a 100644 --- a/src/gallium/drivers/svga/svga_swtnl_backend.c +++ b/src/gallium/drivers/svga/svga_swtnl_backend.c @@ -141,7 +141,7 @@ svga_vbuf_render_unmap_vertices( struct vbuf_render *render, pipe_buffer_flush_mapped_range(&svga->pipe, svga_render->vbuf_transfer, offset, length); - pipe_buffer_unmap(&svga->pipe, svga_render->vbuf, svga_render->vbuf_transfer); + pipe_buffer_unmap(&svga->pipe, svga_render->vbuf_transfer); svga_render->min_index = min_index; svga_render->max_index = max_index; svga_render->vbuf_used = MAX2(svga_render->vbuf_used, used); @@ -158,7 +158,7 @@ svga_vbuf_render_set_primitive( struct vbuf_render *render, } static void -svga_vbuf_sumbit_state( struct svga_vbuf_render *svga_render ) +svga_vbuf_submit_state( struct svga_vbuf_render *svga_render ) { struct svga_context *svga = svga_render->svga; SVGA3dVertexDecl vdecl[PIPE_MAX_ATTRIBS]; @@ -221,7 +221,8 @@ svga_vbuf_render_draw_arrays( struct vbuf_render *render, unsigned bias = (svga_render->vbuf_offset - svga_render->vdecl_offset) / svga_render->vertex_size; enum pipe_error ret = 0; - svga_vbuf_sumbit_state(svga_render); + /* off to hardware */ + svga_vbuf_submit_state(svga_render); /* Need to call update_state() again as the draw module may have * altered some of our state behind our backs. Testcase: @@ -267,9 +268,8 @@ svga_vbuf_render_draw_elements( struct vbuf_render *render, pipe_buffer_write_nooverlap(&svga->pipe, svga_render->ibuf, svga_render->ibuf_offset, 2 * nr_indices, indices); - /* off to hardware */ - svga_vbuf_sumbit_state(svga_render); + svga_vbuf_submit_state(svga_render); /* Need to call update_state() again as the draw module may have * altered some of our state behind our backs. Testcase: diff --git a/src/gallium/drivers/svga/svga_swtnl_draw.c b/src/gallium/drivers/svga/svga_swtnl_draw.c index 814e8edd70f..05d86e1fb16 100644 --- a/src/gallium/drivers/svga/svga_swtnl_draw.c +++ b/src/gallium/drivers/svga/svga_swtnl_draw.c @@ -51,6 +51,9 @@ svga_swtnl_draw_vbo(struct svga_context *svga, assert(svga->state.sw.need_swtnl); assert(draw); + /* Make sure that the need_swtnl flag does not go away */ + svga->state.sw.in_swtnl_draw = TRUE; + ret = svga_update_state(svga, SVGA_STATE_SWTNL_DRAW); if (ret) { svga_context_flush(svga, NULL); @@ -106,22 +109,22 @@ svga_swtnl_draw_vbo(struct svga_context *svga, * unmap vertex/index buffers */ for (i = 0; i < svga->curr.num_vertex_buffers; i++) { - pipe_buffer_unmap(&svga->pipe, svga->curr.vb[i].buffer, - vb_transfer[i]); + pipe_buffer_unmap(&svga->pipe, vb_transfer[i]); draw_set_mapped_vertex_buffer(draw, i, NULL); } if (ib_transfer) { - pipe_buffer_unmap(&svga->pipe, svga->curr.ib.buffer, ib_transfer); + pipe_buffer_unmap(&svga->pipe, ib_transfer); draw_set_mapped_index_buffer(draw, NULL); } if (svga->curr.cb[PIPE_SHADER_VERTEX]) { - pipe_buffer_unmap(&svga->pipe, - svga->curr.cb[PIPE_SHADER_VERTEX], - cb_transfer); + pipe_buffer_unmap(&svga->pipe, cb_transfer); } + /* Now safe to remove the need_swtnl flag in any update_state call */ + svga->state.sw.in_swtnl_draw = FALSE; + return ret; } diff --git a/src/gallium/drivers/trace/tr_context.c b/src/gallium/drivers/trace/tr_context.c index 04f30f82c3d..eaabae8ce42 100644 --- a/src/gallium/drivers/trace/tr_context.c +++ b/src/gallium/drivers/trace/tr_context.c @@ -314,6 +314,9 @@ trace_context_bind_vertex_sampler_states(struct pipe_context *_pipe, struct trace_context *tr_ctx = trace_context(_pipe); struct pipe_context *pipe = tr_ctx->pipe; + if (!pipe->bind_vertex_sampler_states) + return; + trace_dump_call_begin("pipe_context", "bind_vertex_sampler_states"); trace_dump_arg(ptr, pipe); @@ -885,6 +888,60 @@ trace_sampler_view_destroy(struct pipe_context *_pipe, FREE(_view); } +/******************************************************************** + * surface + */ + + +static struct pipe_surface * +trace_create_surface(struct pipe_context *_pipe, + struct pipe_resource *_texture, + const struct pipe_surface *surf_tmpl) +{ + struct trace_context *tr_ctx = trace_context(_pipe); + struct trace_resource *tr_tex = trace_resource(_texture); + struct pipe_context *pipe = tr_ctx->pipe; + struct pipe_resource *texture = tr_tex->resource; + struct pipe_surface *result = NULL; + + trace_dump_call_begin("pipe_context", "create_surface"); + + trace_dump_arg(ptr, pipe); + trace_dump_arg(ptr, texture); + /* hmm some values unitialized there */ + trace_dump_arg(surface, surf_tmpl); + + result = pipe->create_surface(pipe, texture, surf_tmpl); + + trace_dump_ret(ptr, result); + + trace_dump_call_end(); + + result = trace_surf_create(tr_tex, result); + + return result; +} + + +static void +trace_surface_destroy(struct pipe_context *_pipe, + struct pipe_surface *_surface) +{ + struct trace_context *tr_ctx = trace_context(_pipe); + struct pipe_context *pipe = tr_ctx->pipe; + struct trace_surface *tr_surf = trace_surface(_surface); + struct pipe_surface *surface = tr_surf->surface; + + trace_dump_call_begin("pipe_context", "surface_destroy"); + + trace_dump_arg(ptr, pipe); + trace_dump_arg(ptr, surface); + + trace_dump_call_end(); + + trace_surf_destroy(tr_surf); +} + static INLINE void trace_context_set_fragment_sampler_views(struct pipe_context *_pipe, @@ -926,6 +983,9 @@ trace_context_set_vertex_sampler_views(struct pipe_context *_pipe, struct pipe_sampler_view *unwrapped_views[PIPE_MAX_VERTEX_SAMPLERS]; unsigned i; + if (!pipe->set_vertex_sampler_views) + return; + for(i = 0; i < num; ++i) { tr_view = trace_sampler_view(views[i]); unwrapped_views[i] = tr_view ? tr_view->sampler_view : NULL; @@ -1004,12 +1064,11 @@ trace_context_set_index_buffer(struct pipe_context *_pipe, static INLINE void trace_context_resource_copy_region(struct pipe_context *_pipe, struct pipe_resource *dst, - struct pipe_subresource subdst, + unsigned dst_level, unsigned dstx, unsigned dsty, unsigned dstz, struct pipe_resource *src, - struct pipe_subresource subsrc, - unsigned srcx, unsigned srcy, unsigned srcz, - unsigned width, unsigned height) + unsigned src_level, + const struct pipe_box *src_box) { struct trace_context *tr_ctx = trace_context(_pipe); struct pipe_context *pipe = tr_ctx->pipe; @@ -1021,21 +1080,17 @@ trace_context_resource_copy_region(struct pipe_context *_pipe, trace_dump_arg(ptr, pipe); trace_dump_arg(ptr, dst); - trace_dump_arg_struct(subresource, subdst); + trace_dump_arg(uint, dst_level); trace_dump_arg(uint, dstx); trace_dump_arg(uint, dsty); trace_dump_arg(uint, dstz); trace_dump_arg(ptr, src); - trace_dump_arg_struct(subresource, subsrc); - trace_dump_arg(uint, srcx); - trace_dump_arg(uint, srcy); - trace_dump_arg(uint, srcz); - trace_dump_arg(uint, width); - trace_dump_arg(uint, height); + trace_dump_arg(uint, src_level); + trace_dump_arg(box, src_box); pipe->resource_copy_region(pipe, - dst, subdst, dstx, dsty, dstz, - src, subsrc, srcx, srcy, srcz, width, height); + dst, dst_level, dstx, dsty, dstz, + src, src_level, src_box); trace_dump_call_end(); } @@ -1166,8 +1221,8 @@ trace_context_destroy(struct pipe_context *_pipe) static unsigned int trace_is_resource_referenced( struct pipe_context *_pipe, - struct pipe_resource *_resource, - unsigned face, unsigned level) + struct pipe_resource *_resource, + unsigned level, int layer) { struct trace_context *tr_ctx = trace_context(_pipe); struct trace_resource *tr_tex = trace_resource(_resource); @@ -1178,10 +1233,10 @@ trace_is_resource_referenced( struct pipe_context *_pipe, trace_dump_call_begin("pipe_context", "is_resource_referenced"); trace_dump_arg(ptr, pipe); trace_dump_arg(ptr, texture); - trace_dump_arg(uint, face); trace_dump_arg(uint, level); + trace_dump_arg(int, layer); - referenced = pipe->is_resource_referenced(pipe, texture, face, level); + referenced = pipe->is_resource_referenced(pipe, texture, level, layer); trace_dump_ret(uint, referenced); trace_dump_call_end(); @@ -1197,10 +1252,10 @@ trace_is_resource_referenced( struct pipe_context *_pipe, static struct pipe_transfer * trace_context_get_transfer(struct pipe_context *_context, - struct pipe_resource *_resource, - struct pipe_subresource sr, - unsigned usage, - const struct pipe_box *box) + struct pipe_resource *_resource, + unsigned level, + unsigned usage, + const struct pipe_box *box) { struct trace_context *tr_context = trace_context(_context); struct trace_resource *tr_tex = trace_resource(_resource); @@ -1215,7 +1270,7 @@ trace_context_get_transfer(struct pipe_context *_context, * to transfer_inline_write and ignore read transfers. */ - result = context->get_transfer(context, texture, sr, usage, box); + result = context->get_transfer(context, texture, level, usage, box); if (result) result = trace_transfer_create(tr_context, tr_tex, result); @@ -1226,7 +1281,7 @@ trace_context_get_transfer(struct pipe_context *_context, static void trace_context_transfer_destroy(struct pipe_context *_context, - struct pipe_transfer *_transfer) + struct pipe_transfer *_transfer) { struct trace_context *tr_context = trace_context(_context); struct trace_transfer *tr_trans = trace_transfer(_transfer); @@ -1274,7 +1329,7 @@ trace_context_transfer_flush_region( struct pipe_context *_context, static void trace_context_transfer_unmap(struct pipe_context *_context, - struct pipe_transfer *_transfer) + struct pipe_transfer *_transfer) { struct trace_context *tr_ctx = trace_context(_context); struct trace_transfer *tr_trans = trace_transfer(_transfer); @@ -1287,17 +1342,17 @@ trace_context_transfer_unmap(struct pipe_context *_context, */ struct pipe_resource *resource = transfer->resource; - struct pipe_subresource sr = transfer->sr; + unsigned level = transfer->level; unsigned usage = transfer->usage; const struct pipe_box *box = &transfer->box; unsigned stride = transfer->stride; - unsigned slice_stride = transfer->slice_stride; + unsigned layer_stride = transfer->layer_stride; trace_dump_call_begin("pipe_context", "transfer_inline_write"); trace_dump_arg(ptr, context); trace_dump_arg(ptr, resource); - trace_dump_arg_struct(subresource, sr); + trace_dump_arg(uint, level); trace_dump_arg(uint, usage); trace_dump_arg(box, box); @@ -1306,11 +1361,11 @@ trace_context_transfer_unmap(struct pipe_context *_context, resource->format, box, stride, - slice_stride); + layer_stride); trace_dump_arg_end(); trace_dump_arg(uint, stride); - trace_dump_arg(uint, slice_stride); + trace_dump_arg(uint, layer_stride); trace_dump_call_end(); @@ -1323,13 +1378,13 @@ trace_context_transfer_unmap(struct pipe_context *_context, static void trace_context_transfer_inline_write(struct pipe_context *_context, - struct pipe_resource *_resource, - struct pipe_subresource sr, - unsigned usage, - const struct pipe_box *box, - const void *data, - unsigned stride, - unsigned slice_stride) + struct pipe_resource *_resource, + unsigned level, + unsigned usage, + const struct pipe_box *box, + const void *data, + unsigned stride, + unsigned layer_stride) { struct trace_context *tr_context = trace_context(_context); struct trace_resource *tr_tex = trace_resource(_resource); @@ -1342,7 +1397,7 @@ trace_context_transfer_inline_write(struct pipe_context *_context, trace_dump_arg(ptr, context); trace_dump_arg(ptr, resource); - trace_dump_arg_struct(subresource, sr); + trace_dump_arg(uint, level); trace_dump_arg(uint, usage); trace_dump_arg(box, box); @@ -1351,16 +1406,16 @@ trace_context_transfer_inline_write(struct pipe_context *_context, resource->format, box, stride, - slice_stride); + layer_stride); trace_dump_arg_end(); trace_dump_arg(uint, stride); - trace_dump_arg(uint, slice_stride); + trace_dump_arg(uint, layer_stride); trace_dump_call_end(); context->transfer_inline_write(context, resource, - sr, usage, box, data, stride, slice_stride); + level, usage, box, data, stride, layer_stride); } @@ -1434,6 +1489,8 @@ trace_context_create(struct trace_screen *tr_scr, tr_ctx->base.set_vertex_sampler_views = trace_context_set_vertex_sampler_views; tr_ctx->base.create_sampler_view = trace_create_sampler_view; tr_ctx->base.sampler_view_destroy = trace_sampler_view_destroy; + tr_ctx->base.create_surface = trace_create_surface; + tr_ctx->base.surface_destroy = trace_surface_destroy; tr_ctx->base.set_vertex_buffers = trace_context_set_vertex_buffers; tr_ctx->base.set_index_buffer = trace_context_set_index_buffer; tr_ctx->base.resource_copy_region = trace_context_resource_copy_region; diff --git a/src/gallium/drivers/trace/tr_dump_state.c b/src/gallium/drivers/trace/tr_dump_state.c index 8f816060324..155c869fbd9 100644 --- a/src/gallium/drivers/trace/tr_dump_state.c +++ b/src/gallium/drivers/trace/tr_dump_state.c @@ -71,6 +71,10 @@ void trace_dump_resource_template(const struct pipe_resource *templat) trace_dump_uint(templat->depth0); trace_dump_member_end(); + trace_dump_member_begin("array_size"); + trace_dump_uint(templat->array_size); + trace_dump_member_end(); + trace_dump_member(uint, templat, last_level); trace_dump_member(uint, templat, usage); trace_dump_member(uint, templat, bind); @@ -80,25 +84,6 @@ void trace_dump_resource_template(const struct pipe_resource *templat) } -void trace_dump_subresource(const struct pipe_subresource *subresource) -{ - if (!trace_dumping_enabled_locked()) - return; - - if(!subresource) { - trace_dump_null(); - return; - } - - trace_dump_struct_begin("pipe_subresource"); - - trace_dump_member(uint, subresource, face); - trace_dump_member(uint, subresource, level); - - trace_dump_struct_end(); -} - - void trace_dump_box(const struct pipe_box *box) { if (!trace_dumping_enabled_locked()) @@ -445,8 +430,13 @@ void trace_dump_sampler_view_template(const struct pipe_sampler_view *state) trace_dump_struct_begin("pipe_sampler_view"); trace_dump_member(format, state, format); - trace_dump_member(uint, state, first_level); - trace_dump_member(uint, state, last_level); + /* XXX */ + trace_dump_member(uint, state, u.tex.first_level); + trace_dump_member(uint, state, u.tex.last_level); + trace_dump_member(uint, state, u.tex.first_layer); + trace_dump_member(uint, state, u.tex.last_layer); + trace_dump_member(uint, state, u.buf.first_element); + trace_dump_member(uint, state, u.buf.last_element); trace_dump_member(uint, state, swizzle_r); trace_dump_member(uint, state, swizzle_g); trace_dump_member(uint, state, swizzle_b); @@ -472,14 +462,14 @@ void trace_dump_surface(const struct pipe_surface *state) trace_dump_member(uint, state, width); trace_dump_member(uint, state, height); - trace_dump_member(uint, state, layout); - trace_dump_member(uint, state, offset); trace_dump_member(uint, state, usage); trace_dump_member(ptr, state, texture); - trace_dump_member(uint, state, face); - trace_dump_member(uint, state, level); - trace_dump_member(uint, state, zslice); + trace_dump_member(uint, state, u.tex.level); + trace_dump_member(uint, state, u.tex.first_layer); + trace_dump_member(uint, state, u.tex.last_layer); + trace_dump_member(uint, state, u.buf.first_element); + trace_dump_member(uint, state, u.buf.last_element); trace_dump_struct_end(); } @@ -497,16 +487,18 @@ void trace_dump_transfer(const struct pipe_transfer *state) trace_dump_struct_begin("pipe_transfer"); + trace_dump_member(uint, state, box.x); + trace_dump_member(uint, state, box.y); + trace_dump_member(uint, state, box.z); trace_dump_member(uint, state, box.width); trace_dump_member(uint, state, box.height); + trace_dump_member(uint, state, box.depth); trace_dump_member(uint, state, stride); + trace_dump_member(uint, state, layer_stride); trace_dump_member(uint, state, usage); trace_dump_member(ptr, state, resource); - trace_dump_member(uint, state, sr.face); - trace_dump_member(uint, state, sr.level); - trace_dump_member(uint, state, box.z); trace_dump_struct_end(); } diff --git a/src/gallium/drivers/trace/tr_dump_state.h b/src/gallium/drivers/trace/tr_dump_state.h index 078d2086109..fe8ece78d43 100644 --- a/src/gallium/drivers/trace/tr_dump_state.h +++ b/src/gallium/drivers/trace/tr_dump_state.h @@ -37,8 +37,6 @@ void trace_dump_format(enum pipe_format format); void trace_dump_resource_template(const struct pipe_resource *templat); -void trace_dump_subresource(const struct pipe_subresource *subresource); - void trace_dump_box(const struct pipe_box *box); void trace_dump_rasterizer_state(const struct pipe_rasterizer_state *state); diff --git a/src/gallium/drivers/trace/tr_screen.c b/src/gallium/drivers/trace/tr_screen.c index 935831071e6..c2de2daa883 100644 --- a/src/gallium/drivers/trace/tr_screen.c +++ b/src/gallium/drivers/trace/tr_screen.c @@ -210,23 +210,26 @@ trace_screen_context_create(struct pipe_screen *_screen, void *priv) static void trace_screen_flush_frontbuffer(struct pipe_screen *_screen, - struct pipe_surface *_surface, + struct pipe_resource *_resource, + unsigned level, unsigned layer, void *context_private) { struct trace_screen *tr_scr = trace_screen(_screen); - struct trace_surface *tr_surf = trace_surface(_surface); + struct trace_resource *tr_res = trace_resource(_resource); struct pipe_screen *screen = tr_scr->screen; - struct pipe_surface *surface = tr_surf->surface; + struct pipe_resource *resource = tr_res->resource; trace_dump_call_begin("pipe_screen", "flush_frontbuffer"); trace_dump_arg(ptr, screen); - trace_dump_arg(ptr, surface); + trace_dump_arg(ptr, resource); + trace_dump_arg(uint, level); + trace_dump_arg(uint, layer); /* XXX: hide, as there is nothing we can do with this trace_dump_arg(ptr, context_private); */ - screen->flush_frontbuffer(screen, surface, context_private); + screen->flush_frontbuffer(screen, resource, level, layer, context_private); trace_dump_call_end(); } @@ -318,68 +321,6 @@ trace_screen_resource_destroy(struct pipe_screen *_screen, } -/******************************************************************** - * surface - */ - - -static struct pipe_surface * -trace_screen_get_tex_surface(struct pipe_screen *_screen, - struct pipe_resource *_texture, - unsigned face, unsigned level, - unsigned zslice, - unsigned usage) -{ - struct trace_screen *tr_scr = trace_screen(_screen); - struct trace_resource *tr_tex = trace_resource(_texture); - struct pipe_screen *screen = tr_scr->screen; - struct pipe_resource *texture = tr_tex->resource; - struct pipe_surface *result = NULL; - - assert(texture->screen == screen); - - trace_dump_call_begin("pipe_screen", "get_tex_surface"); - - trace_dump_arg(ptr, screen); - trace_dump_arg(ptr, texture); - trace_dump_arg(uint, face); - trace_dump_arg(uint, level); - trace_dump_arg(uint, zslice); - trace_dump_arg(uint, usage); - - result = screen->get_tex_surface(screen, texture, face, level, zslice, usage); - - trace_dump_ret(ptr, result); - - trace_dump_call_end(); - - result = trace_surface_create(tr_tex, result); - - return result; -} - - -static void -trace_screen_tex_surface_destroy(struct pipe_surface *_surface) -{ - struct trace_screen *tr_scr = trace_screen(_surface->texture->screen); - struct trace_surface *tr_surf = trace_surface(_surface); - struct pipe_screen *screen = tr_scr->screen; - struct pipe_surface *surface = tr_surf->surface; - - trace_dump_call_begin("pipe_screen", "tex_surface_destroy"); - - trace_dump_arg(ptr, screen); - trace_dump_arg(ptr, surface); - - trace_dump_call_end(); - - trace_surface_destroy(tr_surf); -} - - - - /******************************************************************** * buffer @@ -580,8 +521,6 @@ trace_screen_create(struct pipe_screen *screen) tr_scr->base.resource_from_handle = trace_screen_resource_from_handle; tr_scr->base.resource_get_handle = trace_screen_resource_get_handle; tr_scr->base.resource_destroy = trace_screen_resource_destroy; - tr_scr->base.get_tex_surface = trace_screen_get_tex_surface; - tr_scr->base.tex_surface_destroy = trace_screen_tex_surface_destroy; tr_scr->base.user_buffer_create = trace_screen_user_buffer_create; tr_scr->base.fence_reference = trace_screen_fence_reference; tr_scr->base.fence_signalled = trace_screen_fence_signalled; diff --git a/src/gallium/drivers/trace/tr_texture.c b/src/gallium/drivers/trace/tr_texture.c index 9914b98b39c..27997346471 100644 --- a/src/gallium/drivers/trace/tr_texture.c +++ b/src/gallium/drivers/trace/tr_texture.c @@ -74,8 +74,8 @@ trace_resource_destroy(struct trace_screen *tr_scr, struct pipe_surface * -trace_surface_create(struct trace_resource *tr_tex, - struct pipe_surface *surface) +trace_surf_create(struct trace_resource *tr_tex, + struct pipe_surface *surface) { struct trace_surface *tr_surf; @@ -104,7 +104,7 @@ error: void -trace_surface_destroy(struct trace_surface *tr_surf) +trace_surf_destroy(struct trace_surface *tr_surf) { pipe_resource_reference(&tr_surf->base.texture, NULL); pipe_surface_reference(&tr_surf->surface, NULL); diff --git a/src/gallium/drivers/trace/tr_texture.h b/src/gallium/drivers/trace/tr_texture.h index 6513995d505..3352c96e59a 100644 --- a/src/gallium/drivers/trace/tr_texture.h +++ b/src/gallium/drivers/trace/tr_texture.h @@ -125,11 +125,11 @@ trace_resource_destroy(struct trace_screen *tr_scr, struct trace_resource *tr_tex); struct pipe_surface * -trace_surface_create(struct trace_resource *tr_tex, +trace_surf_create(struct trace_resource *tr_tex, struct pipe_surface *surface); void -trace_surface_destroy(struct trace_surface *tr_surf); +trace_surf_destroy(struct trace_surface *tr_surf); struct pipe_transfer * trace_transfer_create(struct trace_context *tr_ctx, diff --git a/src/gallium/include/pipe/p_context.h b/src/gallium/include/pipe/p_context.h index 0e53aef6d2e..589cac2ddd3 100644 --- a/src/gallium/include/pipe/p_context.h +++ b/src/gallium/include/pipe/p_context.h @@ -54,7 +54,6 @@ struct pipe_scissor_state; struct pipe_shader_state; struct pipe_stencil_ref; struct pipe_stream_output_state; -struct pipe_subresource; struct pipe_surface; struct pipe_vertex_buffer; struct pipe_vertex_element; @@ -256,12 +255,11 @@ struct pipe_context { */ void (*resource_copy_region)(struct pipe_context *pipe, struct pipe_resource *dst, - struct pipe_subresource subdst, + unsigned dst_level, unsigned dstx, unsigned dsty, unsigned dstz, struct pipe_resource *src, - struct pipe_subresource subsrc, - unsigned srcx, unsigned srcy, unsigned srcz, - unsigned width, unsigned height); + unsigned src_level, + const struct pipe_box *src_box); /** * Resolve a multisampled resource into a non-multisampled one. @@ -269,9 +267,9 @@ struct pipe_context { */ void (*resource_resolve)(struct pipe_context *pipe, struct pipe_resource *dst, - struct pipe_subresource subdst, + unsigned dst_layer, struct pipe_resource *src, - struct pipe_subresource subsrc); + unsigned src_layer); /*@}*/ @@ -328,13 +326,13 @@ struct pipe_context { * PIPE_REFERENCED_FOR_READ | PIPE_REFERENCED_FOR_WRITE. * \param pipe context whose unflushed hw commands will be checked. * \param texture texture to check. - * \param face cubemap face. Use 0 for non-cubemap texture. * \param level mipmap level. + * \param layer cubemap face, 2d array or 3d slice, 0 otherwise. Use -1 for any layer. * \return mask of PIPE_REFERENCED_FOR_READ/WRITE or PIPE_UNREFERENCED */ unsigned int (*is_resource_referenced)(struct pipe_context *pipe, - struct pipe_resource *texture, - unsigned face, unsigned level); + struct pipe_resource *texture, + unsigned level, int layer); /** * Create a view on a texture to be used by a shader stage. @@ -348,20 +346,32 @@ struct pipe_context { /** + * Get a surface which is a "view" into a resource, used by + * render target / depth stencil stages. + * \param usage bitmaks of PIPE_BIND_* flags + */ + struct pipe_surface *(*create_surface)(struct pipe_context *ctx, + struct pipe_resource *resource, + const struct pipe_surface *templat); + + void (*surface_destroy)(struct pipe_context *ctx, + struct pipe_surface *); + + /** * Get a transfer object for transferring data to/from a texture. * * Transfers are (by default) context-private and allow uploads to be * interleaved with */ struct pipe_transfer *(*get_transfer)(struct pipe_context *, - struct pipe_resource *resource, - struct pipe_subresource, - unsigned usage, /* a combination of PIPE_TRANSFER_x */ - const struct pipe_box *); + struct pipe_resource *resource, + unsigned level, + unsigned usage, /* a combination of PIPE_TRANSFER_x */ + const struct pipe_box *); void (*transfer_destroy)(struct pipe_context *, - struct pipe_transfer *); - + struct pipe_transfer *); + void *(*transfer_map)( struct pipe_context *, struct pipe_transfer *transfer ); @@ -381,13 +391,13 @@ struct pipe_context { * pointer. XXX: strides?? */ void (*transfer_inline_write)( struct pipe_context *, - struct pipe_resource *, - struct pipe_subresource, - unsigned usage, /* a combination of PIPE_TRANSFER_x */ - const struct pipe_box *, - const void *data, - unsigned stride, - unsigned slice_stride); + struct pipe_resource *, + unsigned level, + unsigned usage, /* a combination of PIPE_TRANSFER_x */ + const struct pipe_box *, + const void *data, + unsigned stride, + unsigned layer_stride); }; diff --git a/src/gallium/include/pipe/p_defines.h b/src/gallium/include/pipe/p_defines.h index 9f2b081e33a..2135c19dcfc 100644 --- a/src/gallium/include/pipe/p_defines.h +++ b/src/gallium/include/pipe/p_defines.h @@ -138,12 +138,14 @@ enum pipe_error { /** Texture types. * See the documentation for info on PIPE_TEXTURE_RECT vs PIPE_TEXTURE_2D */ enum pipe_texture_target { - PIPE_BUFFER = 0, - PIPE_TEXTURE_1D = 1, - PIPE_TEXTURE_2D = 2, - PIPE_TEXTURE_3D = 3, - PIPE_TEXTURE_CUBE = 4, - PIPE_TEXTURE_RECT = 5, + PIPE_BUFFER = 0, + PIPE_TEXTURE_1D = 1, + PIPE_TEXTURE_2D = 2, + PIPE_TEXTURE_3D = 3, + PIPE_TEXTURE_CUBE = 4, + PIPE_TEXTURE_RECT = 5, + PIPE_TEXTURE_1D_ARRAY = 6, + PIPE_TEXTURE_2D_ARRAY = 7, PIPE_MAX_TEXTURE_TYPES }; @@ -178,14 +180,6 @@ enum pipe_texture_target { #define PIPE_TEX_COMPARE_NONE 0 #define PIPE_TEX_COMPARE_R_TO_TEXTURE 1 - -/** - * Surface layout -- a hint? Or some driver-internal poking out into - * the interface? - */ -#define PIPE_SURFACE_LAYOUT_LINEAR 0 - - /** * Clear buffer bits */ @@ -281,9 +275,9 @@ enum pipe_transfer_usage { * Resource binding flags -- state tracker must specify in advance all * the ways a resource might be used. */ -#define PIPE_BIND_DEPTH_STENCIL (1 << 0) /* get_tex_surface */ -#define PIPE_BIND_RENDER_TARGET (1 << 1) /* get_tex_surface */ -#define PIPE_BIND_SAMPLER_VIEW (1 << 2) /* get_sampler_view */ +#define PIPE_BIND_DEPTH_STENCIL (1 << 0) /* create_surface */ +#define PIPE_BIND_RENDER_TARGET (1 << 1) /* create_surface */ +#define PIPE_BIND_SAMPLER_VIEW (1 << 2) /* create_sampler_view */ #define PIPE_BIND_VERTEX_BUFFER (1 << 3) /* set_vertex_buffers */ #define PIPE_BIND_INDEX_BUFFER (1 << 4) /* draw_elements */ #define PIPE_BIND_CONSTANT_BUFFER (1 << 5) /* set_constant_buffer */ @@ -461,6 +455,7 @@ enum pipe_cap { /** different blend funcs per rendertarget */ PIPE_CAP_INDEP_BLEND_FUNC, PIPE_CAP_DEPTHSTENCIL_CLEAR_SEPARATE, + PIPE_CAP_ARRAY_TEXTURES, PIPE_CAP_TGSI_FS_COORD_ORIGIN_UPPER_LEFT, PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT, PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER, diff --git a/src/gallium/include/pipe/p_format.h b/src/gallium/include/pipe/p_format.h index 22cc7aa18a2..74a9939df73 100644 --- a/src/gallium/include/pipe/p_format.h +++ b/src/gallium/include/pipe/p_format.h @@ -190,6 +190,12 @@ enum pipe_format { PIPE_FORMAT_X24S8_USCALED = 136, PIPE_FORMAT_S8X24_USCALED = 137, PIPE_FORMAT_X32_S8X24_USCALED = 138, + + PIPE_FORMAT_B2G3R3_UNORM = 139, + PIPE_FORMAT_L16A16_UNORM = 140, + PIPE_FORMAT_A16_UNORM = 141, + PIPE_FORMAT_I16_UNORM = 142, + PIPE_FORMAT_COUNT }; diff --git a/src/gallium/include/pipe/p_screen.h b/src/gallium/include/pipe/p_screen.h index 912631242f5..850eb84a3c8 100644 --- a/src/gallium/include/pipe/p_screen.h +++ b/src/gallium/include/pipe/p_screen.h @@ -117,7 +117,7 @@ struct pipe_screen { /** * Create a texture from a winsys_handle. The handle is often created in * another process by first creating a pipe texture and then calling - * texture_get_handle. + * resource_get_handle. */ struct pipe_resource * (*resource_from_handle)(struct pipe_screen *, const struct pipe_resource *templat, @@ -136,18 +136,6 @@ struct pipe_screen { void (*resource_destroy)(struct pipe_screen *, struct pipe_resource *pt); - /** Get a 2D surface which is a "view" into a texture - * \param usage bitmaks of PIPE_BIND_* flags - */ - struct pipe_surface *(*get_tex_surface)(struct pipe_screen *, - struct pipe_resource *resource, - unsigned face, unsigned level, - unsigned zslice, - unsigned usage ); - - void (*tex_surface_destroy)(struct pipe_surface *); - - /** * Create a buffer that wraps user-space data. @@ -182,7 +170,8 @@ struct pipe_screen { * gets out-of-band */ void (*flush_frontbuffer)( struct pipe_screen *screen, - struct pipe_surface *surf, + struct pipe_resource *resource, + unsigned level, unsigned layer, void *winsys_drawable_handle ); diff --git a/src/gallium/include/pipe/p_shader_tokens.h b/src/gallium/include/pipe/p_shader_tokens.h index ba433b2bd2a..0a9e14154d7 100644 --- a/src/gallium/include/pipe/p_shader_tokens.h +++ b/src/gallium/include/pipe/p_shader_tokens.h @@ -177,7 +177,8 @@ union tgsi_immediate_data #define TGSI_PROPERTY_GS_MAX_OUTPUT_VERTICES 2 #define TGSI_PROPERTY_FS_COORD_ORIGIN 3 #define TGSI_PROPERTY_FS_COORD_PIXEL_CENTER 4 -#define TGSI_PROPERTY_COUNT 5 +#define TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS 5 +#define TGSI_PROPERTY_COUNT 6 struct tgsi_property { unsigned Type : 4; /**< TGSI_TOKEN_TYPE_PROPERTY */ diff --git a/src/gallium/include/pipe/p_state.h b/src/gallium/include/pipe/p_state.h index fc6dba346da..226ae8667b7 100644 --- a/src/gallium/include/pipe/p_state.h +++ b/src/gallium/include/pipe/p_state.h @@ -271,25 +271,33 @@ struct pipe_sampler_state /** - * 2D surface. This is basically a view into a memory buffer. - * May be a renderbuffer, texture mipmap level, etc. + * A view into a texture that can be bound to a color render target / + * depth stencil attachment point. */ struct pipe_surface { struct pipe_reference reference; struct pipe_resource *texture; /**< resource into which this is a view */ + struct pipe_context *context; /**< context this view belongs to */ enum pipe_format format; + /* XXX width/height should be removed */ unsigned width; /**< logical width in pixels */ unsigned height; /**< logical height in pixels */ - unsigned layout; /**< PIPE_SURFACE_LAYOUT_x */ - unsigned offset; /**< offset from start of buffer, in bytes */ unsigned usage; /**< bitmask of PIPE_BIND_x */ - unsigned zslice; - unsigned face; - unsigned level; + union { + struct { + unsigned level; + unsigned first_layer:16; + unsigned last_layer:16; + } tex; + struct { + unsigned first_element; + unsigned last_element; + } buf; + } u; }; @@ -302,8 +310,18 @@ struct pipe_sampler_view enum pipe_format format; /**< typed PIPE_FORMAT_x */ struct pipe_resource *texture; /**< texture into which this is a view */ struct pipe_context *context; /**< context this view belongs to */ - unsigned first_level:8; /**< first mipmap level */ - unsigned last_level:8; /**< last mipmap level */ + union { + struct { + unsigned first_layer:16; /**< first layer to use for array textures */ + unsigned last_layer:16; /**< last layer to use for array textures */ + unsigned first_level:8; /**< first mipmap level to use */ + unsigned last_level:8; /**< last mipmap level to use */ + } tex; + struct { + unsigned first_element; + unsigned last_element; + } buf; + } u; unsigned swizzle_r:3; /**< PIPE_SWIZZLE_x for red component */ unsigned swizzle_g:3; /**< PIPE_SWIZZLE_x for green component */ unsigned swizzle_b:3; /**< PIPE_SWIZZLE_x for blue component */ @@ -338,13 +356,14 @@ struct pipe_resource unsigned width0; unsigned height0; unsigned depth0; + unsigned array_size; unsigned last_level:8; /**< Index of last mipmap level present/defined */ unsigned nr_samples:8; /**< for multisampled surfaces, nr of samples */ unsigned usage:8; /**< PIPE_USAGE_x (not a bitmask) */ - unsigned bind; /**< bitmask of PIPE_BIND_x */ - unsigned flags; /**< bitmask of PIPE_RESOURCE_FLAG_x */ + unsigned bind; /**< bitmask of PIPE_BIND_x */ + unsigned flags; /**< bitmask of PIPE_RESOURCE_FLAG_x */ }; struct pipe_stream_output_state @@ -363,15 +382,6 @@ struct pipe_stream_output_state unsigned stride; }; -/** - * Extra indexing info for (cube) texture resources. - */ -struct pipe_subresource -{ - unsigned face:16; - unsigned level:16; -}; - /** * Transfer object. For data transfer to/from a resource. @@ -379,11 +389,11 @@ struct pipe_subresource struct pipe_transfer { struct pipe_resource *resource; /**< resource to transfer to/from */ - struct pipe_subresource sr; + unsigned level; enum pipe_transfer_usage usage; struct pipe_box box; unsigned stride; - unsigned slice_stride; + unsigned layer_stride; void *data; }; diff --git a/src/gallium/include/state_tracker/st_api.h b/src/gallium/include/state_tracker/st_api.h index 565a09614a4..1c2148b78f2 100644 --- a/src/gallium/include/state_tracker/st_api.h +++ b/src/gallium/include/state_tracker/st_api.h @@ -164,9 +164,8 @@ struct st_egl_image /* this is owned by the caller */ struct pipe_resource *texture; - unsigned face; unsigned level; - unsigned zslice; + unsigned layer; }; /** diff --git a/src/gallium/include/state_tracker/xlib_sw_winsys.h b/src/gallium/include/state_tracker/xlib_sw_winsys.h index f22c22bb620..4e7ccc13ac8 100644 --- a/src/gallium/include/state_tracker/xlib_sw_winsys.h +++ b/src/gallium/include/state_tracker/xlib_sw_winsys.h @@ -6,7 +6,7 @@ struct pipe_screen; -struct pipe_surface; +struct pipe_resource; /* This is what the xlib software winsys expects to find in the * "private" field of flush_frontbuffers(). diff --git a/src/gallium/state_trackers/d3d1x/dxgi/src/dxgi_native.cpp b/src/gallium/state_trackers/d3d1x/dxgi/src/dxgi_native.cpp index c246fc5ef76..a54324a04f2 100644 --- a/src/gallium/state_trackers/d3d1x/dxgi/src/dxgi_native.cpp +++ b/src/gallium/state_trackers/d3d1x/dxgi/src/dxgi_native.cpp @@ -1159,8 +1159,15 @@ struct GalliumDXGISwapChain : public GalliumDXGIObject<IDXGISwapChain, GalliumDX unsigned blit_x, blit_y, blit_w, blit_h; float black[4] = {0, 0, 0, 0}; - if(!formats_compatible || src->width0 != dst_w || src->height0 != dst_h) - dst_surface = pipe->screen->get_tex_surface(pipe->screen, dst, 0, 0, 0, PIPE_BIND_RENDER_TARGET); + if(!formats_compatible || src->width0 != dst_w || src->height0 != dst_h) { + struct pipe_surface templat; + templat.usage = PIPE_BIND_RENDER_TARGET; + templat.format = dst->format; + templat.u.tex.level = 0; + templat.u.tex.first_layer = 0; + templat.u.tex.last_layer = 0; + dst_surface = pipe->create_surface(pipe, dst, &templat); + } if(preserve_aspect_ratio) { @@ -1199,10 +1206,12 @@ struct GalliumDXGISwapChain : public GalliumDXGIObject<IDXGISwapChain, GalliumDX if(formats_compatible && blit_w == src->width0 && blit_h == src->height0) { - pipe_subresource sr; - sr.face = 0; - sr.level = 0; - pipe->resource_copy_region(pipe, dst, sr, rect.left, rect.top, 0, src, sr, 0, 0, 0, blit_w, blit_h); + pipe_box box; + box.x = box.y = box.z; + box.width = blit_w; + box.height = blit_h; + box.z = 1; + pipe->resource_copy_region(pipe, dst, 0, rect.left, rect.top, 0, src, 0, &box); } else { @@ -1218,7 +1227,7 @@ struct GalliumDXGISwapChain : public GalliumDXGIObject<IDXGISwapChain, GalliumDX } if(dst_surface) - pipe->screen->tex_surface_destroy(dst_surface); + pipe->surface_destroy(pipe, dst_surface); pipe->flush(pipe, PIPE_FLUSH_RENDER_CACHE | PIPE_FLUSH_FRAME, 0); diff --git a/src/gallium/state_trackers/d3d1x/gd3d11/d3d11_context.h b/src/gallium/state_trackers/d3d1x/gd3d11/d3d11_context.h index 36110595c20..e1ba6c184fd 100644 --- a/src/gallium/state_trackers/d3d1x/gd3d11/d3d11_context.h +++ b/src/gallium/state_trackers/d3d1x/gd3d11/d3d11_context.h @@ -1416,23 +1416,33 @@ changed: *out_predicate_value = render_predicate_value; } - static pipe_subresource d3d11_to_pipe_subresource(struct pipe_resource* resource, unsigned subresource) + static unsigned d3d11_subresource_to_level(struct pipe_resource* resource, unsigned subresource) { - pipe_subresource sr; if(subresource <= resource->last_level) { - sr.level = subresource; - sr.face = 0; + return subresource; } else { unsigned levels = resource->last_level + 1; - sr.level = subresource % levels; - sr.face = subresource / levels; + return subresource % levels; } - return sr; } + static unsigned d3d11_subresource_to_face(struct pipe_resource* resource, unsigned subresource) + { + if(subresource <= resource->last_level) + { + return 0; + } + else + { + unsigned levels = resource->last_level + 1; + return subresource / levels; + } + } + + /* TODO: deferred contexts will need a different implementation of this, * because we can't put the transfer info into the resource itself. * Also, there are very different restrictions, for obvious reasons. @@ -1448,8 +1458,10 @@ changed: GalliumD3D11Resource<>* resource = (GalliumD3D11Resource<>*)iresource; if(resource->transfers.count(subresource)) return E_FAIL; - pipe_subresource sr = d3d11_to_pipe_subresource(resource->resource, subresource); - pipe_box box = d3d11_to_pipe_box(resource->resource, sr.level, 0); + unsigned level = d3d11_subresource_to_level(resource->resource, subresource); + unsigned face = d3d11_subresource_to_face(resource->resource, subresource); + pipe_box box = d3d11_to_pipe_box(resource->resource, level, 0); + /* XXX the translation from subresource to level/face(zslice/array layer) isn't quite right */ unsigned usage = 0; if(map_type == D3D11_MAP_READ) usage = PIPE_TRANSFER_READ; @@ -1465,7 +1477,7 @@ changed: return E_INVALIDARG; if(map_type & D3D10_MAP_FLAG_DO_NOT_WAIT) usage |= PIPE_TRANSFER_DONTBLOCK; - struct pipe_transfer* transfer = pipe->get_transfer(pipe, resource->resource, sr, usage, &box); + struct pipe_transfer* transfer = pipe->get_transfer(pipe, resource->resource, level, usage, &box); if(!transfer) { if(map_type & D3D10_MAP_FLAG_DO_NOT_WAIT) return DXGI_ERROR_WAS_STILL_DRAWING; @@ -1475,7 +1487,7 @@ changed: resource->transfers[subresource] = transfer; mapped_resource->pData = pipe->transfer_map(pipe, transfer); mapped_resource->RowPitch = transfer->stride; - mapped_resource->DepthPitch = transfer->slice_stride; + mapped_resource->DepthPitch = transfer->layer_stride; return S_OK; } @@ -1507,15 +1519,16 @@ changed: SYNCHRONIZED; GalliumD3D11Resource<>* dst = (GalliumD3D11Resource<>*)dst_resource; GalliumD3D11Resource<>* src = (GalliumD3D11Resource<>*)src_resource; - pipe_subresource subdst = d3d11_to_pipe_subresource(dst->resource, dst_subresource); - pipe_subresource subsrc = d3d11_to_pipe_subresource(src->resource, src_subresource); - pipe_box box = d3d11_to_pipe_box(src->resource, subsrc.level, src_box); - for(unsigned i = 0; i < box.depth; ++i) + unsigned dst_level = d3d11_subresource_to_level(dst->resource, dst_subresource); + unsigned dst_face = d3d11_subresource_to_face(dst->resource, dst_subresource); + unsigned src_level = d3d11_subresource_to_level(src->resource, src_subresource); + unsigned src_face = d3d11_subresource_to_face(src->resource, src_subresource); + /* XXX the translation from subresource to level/face(zslice/array layer) isn't quite right */ + pipe_box box = d3d11_to_pipe_box(src->resource, src_level, src_box); { pipe->resource_copy_region(pipe, - dst->resource, subdst, dst_x, dst_y, dst_z + i, - src->resource, subsrc, box.x, box.y, box.z + i, - box.width, box.height); + dst->resource, dst_level, dst_x, dst_y, dst_z, + src->resource, src_level, &box); } } @@ -1526,24 +1539,23 @@ changed: SYNCHRONIZED; GalliumD3D11Resource<>* dst = (GalliumD3D11Resource<>*)dst_resource; GalliumD3D11Resource<>* src = (GalliumD3D11Resource<>*)src_resource; - pipe_subresource sr; - unsigned faces = dst->resource->target == PIPE_TEXTURE_CUBE ? 6 : 1; - - for(sr.face = 0; sr.face < faces; ++sr.face) + unsigned level; + for(level = 0; level <= dst->resource->last_level; ++level) { - for(sr.level = 0; sr.level <= dst->resource->last_level; ++sr.level) - { - unsigned w = u_minify(dst->resource->width0, sr.level); - unsigned h = u_minify(dst->resource->height0, sr.level); - unsigned d = u_minify(dst->resource->depth0, sr.level); - for(unsigned i = 0; i < d; ++i) - { - pipe->resource_copy_region(pipe, - dst->resource, sr, 0, 0, i, - src->resource, sr, 0, 0, i, - w, h); - } - } + unsigned layers = 1; + pipe_box box; + if (dst->resource->target == PIPE_TEXTURE_CUBE) + layers = 6; + else if (dst->resource->target == PIPE_TEXTURE_3D) + layers = u_minify(dst->resource->depth0, level); + /* else layers = dst->resource->array_size; */ + box.x = box.y = box.z = 0; + box.width = u_minify(dst->resource->width0, level); + box.height = u_minify(dst->resource->height0, level); + box.depth = layers; + pipe->resource_copy_region(pipe, + dst->resource, level, 0, 0, 0, + src->resource, level, &box); } } @@ -1557,9 +1569,10 @@ changed: { SYNCHRONIZED; GalliumD3D11Resource<>* dst = (GalliumD3D11Resource<>*)dst_resource; - pipe_subresource subdst = d3d11_to_pipe_subresource(dst->resource, dst_subresource); - pipe_box box = d3d11_to_pipe_box(dst->resource, subdst.level, pDstBox); - pipe->transfer_inline_write(pipe, dst->resource, subdst, PIPE_TRANSFER_WRITE, &box, pSrcData, src_row_pitch, src_depth_pitch); + unsigned dst_level = d3d11_subresource_to_level(dst->resource, dst_subresource); + /* XXX the translation from subresource to level/face(zslice/array layer) isn't quite right */ + pipe_box box = d3d11_to_pipe_box(dst->resource, dst_level, pDstBox); + pipe->transfer_inline_write(pipe, dst->resource, dst_level, PIPE_TRANSFER_WRITE, &box, pSrcData, src_row_pitch, src_depth_pitch); } #if API >= 11 @@ -1714,9 +1727,9 @@ changed: SYNCHRONIZED; GalliumD3D11Resource<>* dst = (GalliumD3D11Resource<>*)dst_resource; GalliumD3D11Resource<>* src = (GalliumD3D11Resource<>*)src_resource; - pipe_subresource subdst = d3d11_to_pipe_subresource(dst->resource, dst_subresource); - pipe_subresource subsrc = d3d11_to_pipe_subresource(src->resource, src_subresource); - pipe->resource_resolve(pipe, dst->resource, subdst, src->resource, subsrc); + unsigned dst_layer = d3d11_subresource_to_face(dst->resource, dst_subresource); + unsigned src_layer = d3d11_subresource_to_face(src->resource, src_subresource); + pipe->resource_resolve(pipe, dst->resource, dst_layer, src->resource, src_layer); } #if API >= 11 diff --git a/src/gallium/state_trackers/d3d1x/gd3d11/d3d11_screen.h b/src/gallium/state_trackers/d3d1x/gd3d11/d3d11_screen.h index 95ea4e00fc1..9cfdc837d8e 100644 --- a/src/gallium/state_trackers/d3d1x/gd3d11/d3d11_screen.h +++ b/src/gallium/state_trackers/d3d1x/gd3d11/d3d11_screen.h @@ -718,15 +718,13 @@ struct GalliumD3D11ScreenImpl : public GalliumD3D11Screen { for(unsigned level = 0; level <= templat.last_level; ++level) { - struct pipe_subresource sr; - sr.level = level; - sr.face = slice; struct pipe_box box; - box.x = box.y = box.z = 0; + box.x = box.y = 0; + box.z = slice; box.width = u_minify(width, level); box.height = u_minify(height, level); - box.depth = u_minify(depth, level); - immediate_pipe->transfer_inline_write(immediate_pipe, resource, sr, PIPE_TRANSFER_WRITE | PIPE_TRANSFER_DISCARD | PIPE_TRANSFER_UNSYNCHRONIZED, &box, initial_data->pSysMem, initial_data->SysMemPitch, initial_data->SysMemSlicePitch); + box.depth = 1; + immediate_pipe->transfer_inline_write(immediate_pipe, resource, level, PIPE_TRANSFER_WRITE | PIPE_TRANSFER_DISCARD | PIPE_TRANSFER_UNSYNCHRONIZED, &box, initial_data->pSysMem, initial_data->SysMemPitch, initial_data->SysMemSlicePitch); ++initial_data; } } @@ -978,8 +976,8 @@ struct GalliumD3D11ScreenImpl : public GalliumD3D11Screen case D3D11_SRV_DIMENSION_TEXTURE1DARRAY: case D3D11_SRV_DIMENSION_TEXTURE2DARRAY: /* yes, this works for all of these types (but TODO: texture arrays) */ - templat.first_level = desc->Texture1D.MostDetailedMip; - templat.last_level = templat.first_level + desc->Texture1D.MipLevels - 1; + templat.u.tex.first_level = desc->Texture1D.MostDetailedMip; + templat.u.tex.last_level = templat.u.tex.first_level + desc->Texture1D.MipLevels - 1; break; case D3D11_SRV_DIMENSION_BUFFER: case D3D11_SRV_DIMENSION_TEXTURE2DMS: @@ -1054,30 +1052,34 @@ struct GalliumD3D11ScreenImpl : public GalliumD3D11Screen desc = &def_desc; } - unsigned zslice = 0; - unsigned face = 0; - unsigned level; - enum pipe_format format; + struct pipe_surface templat; + memset(&templat, 0, sizeof(templat)); if(invalid(desc->format >= DXGI_FORMAT_COUNT)) return E_INVALIDARG; - format = dxgi_to_pipe_format[desc->Format]; - if(!format) + templat.format = dxgi_to_pipe_format[desc->Format]; + if(!templat.format) return E_NOTIMPL; + templat.usage = PIPE_BIND_RENDER_TARGET; + templat.texture = ((GalliumD3D11Resource<>*)iresource)->resource; switch(desc->ViewDimension) { case D3D11_RTV_DIMENSION_TEXTURE1D: case D3D11_RTV_DIMENSION_TEXTURE2D: - level = desc->Texture1D.MipSlice; + templat.u.tex.level = desc->Texture1D.MipSlice; break; case D3D11_RTV_DIMENSION_TEXTURE3D: - level = desc->Texture3D.MipSlice; - zslice = desc->Texture3D.FirstWSlice; + templat.u.tex.level = desc->Texture3D.MipSlice; + templat.u.tex.first_layer = desc->Texture3D.FirstWSlice; + /* XXX FIXME */ + templat.u.tex.last_layer = desc->Texture3D.FirstWSlice; break; case D3D11_RTV_DIMENSION_TEXTURE1DARRAY: case D3D11_RTV_DIMENSION_TEXTURE2DARRAY: - level = desc->Texture1DArray.MipSlice; - face = desc->Texture1DArray.FirstArraySlice; + templat.u.tex.level = desc->Texture1DArray.MipSlice; + templat.u.tex.first_layer = desc->Texture1DArray.FirstArraySlice; + /* XXX FIXME */ + templat.u.tex.last_layer = desc->Texture1DArray.FirstArraySlice; break; case D3D11_RTV_DIMENSION_BUFFER: case D3D11_RTV_DIMENSION_TEXTURE2DMS: @@ -1090,13 +1092,9 @@ struct GalliumD3D11ScreenImpl : public GalliumD3D11Screen if(!out_rtv) return S_FALSE; - struct pipe_surface* surface = screen->get_tex_surface(screen, - ((GalliumD3D11Resource<>*)iresource)->resource, - face, level, zslice, PIPE_BIND_RENDER_TARGET); + struct pipe_surface* surface = immediate_pipe->create_surface(immediate_pipe, templat.texture, &templat); if(!surface) return E_FAIL; - /* muhahahahaha, let's hope this actually works */ - surface->format = format; *out_rtv = new GalliumD3D11RenderTargetView(this, (GalliumD3D11Resource<>*)iresource, surface, *desc); return S_OK; } @@ -1134,26 +1132,28 @@ struct GalliumD3D11ScreenImpl : public GalliumD3D11Screen desc = &def_desc; } - unsigned zslice = 0; - unsigned face = 0; - unsigned level; - enum pipe_format format; + struct pipe_surface templat; + memset(&templat, 0, sizeof(templat)); if(invalid(desc->format >= DXGI_FORMAT_COUNT)) return E_INVALIDARG; - format = dxgi_to_pipe_format[desc->Format]; - if(!format) + templat.format = dxgi_to_pipe_format[desc->Format]; + if(!templat.format) return E_NOTIMPL; + templat.usage = PIPE_BIND_DEPTH_STENCIL; + templat.texture = ((GalliumD3D11Resource<>*)iresource)->resource; switch(desc->ViewDimension) { case D3D11_DSV_DIMENSION_TEXTURE1D: case D3D11_DSV_DIMENSION_TEXTURE2D: - level = desc->Texture1D.MipSlice; + templat.u.tex.level = desc->Texture1D.MipSlice; break; case D3D11_DSV_DIMENSION_TEXTURE1DARRAY: case D3D11_DSV_DIMENSION_TEXTURE2DARRAY: - level = desc->Texture1DArray.MipSlice; - face = desc->Texture1DArray.FirstArraySlice; + templat.u.tex.level = desc->Texture1DArray.MipSlice; + templat.u.tex.first_layer = desc->Texture1DArray.FirstArraySlice; + /* XXX FIXME */ + templat.u.tex.last_layer = desc->Texture1DArray.FirstArraySlice; break; case D3D11_DSV_DIMENSION_TEXTURE2DMS: case D3D11_DSV_DIMENSION_TEXTURE2DMSARRAY: @@ -1165,13 +1165,9 @@ struct GalliumD3D11ScreenImpl : public GalliumD3D11Screen if(!out_depth_stencil_view) return S_FALSE; - struct pipe_surface* surface = screen->get_tex_surface(screen, - ((GalliumD3D11Resource<>*)iresource)->resource, - face, level, zslice, PIPE_BIND_DEPTH_STENCIL); + struct pipe_surface* surface = immediate_pipe->create_surface(immediate_pipe, templat.texture, &templat); if(!surface) return E_FAIL; - /* muhahahahaha, let's hope this actually works */ - surface->format = format; *out_depth_stencil_view = new GalliumD3D11DepthStencilView(this, (GalliumD3D11Resource<>*)iresource, surface, *desc); return S_OK; } diff --git a/src/gallium/state_trackers/dri/common/dri_screen.c b/src/gallium/state_trackers/dri/common/dri_screen.c index 1302e9bc013..f6e22c74b4e 100644 --- a/src/gallium/state_trackers/dri/common/dri_screen.c +++ b/src/gallium/state_trackers/dri/common/dri_screen.c @@ -304,9 +304,8 @@ dri_get_egl_image(struct st_manager *smapi, stimg->texture = NULL; pipe_resource_reference(&stimg->texture, img->texture); - stimg->face = img->face; stimg->level = img->level; - stimg->zslice = img->zslice; + stimg->layer = img->layer; return TRUE; } diff --git a/src/gallium/state_trackers/dri/common/dri_screen.h b/src/gallium/state_trackers/dri/common/dri_screen.h index 0da9b5510fc..8cb0a102c8d 100644 --- a/src/gallium/state_trackers/dri/common/dri_screen.h +++ b/src/gallium/state_trackers/dri/common/dri_screen.h @@ -83,9 +83,8 @@ dri_screen(__DRIscreen * sPriv) struct __DRIimageRec { struct pipe_resource *texture; - unsigned face; unsigned level; - unsigned zslice; + unsigned layer; void *loader_private; }; diff --git a/src/gallium/state_trackers/dri/drm/dri2.c b/src/gallium/state_trackers/dri/drm/dri2.c index 3c5b0756174..a9d05a80fbd 100644 --- a/src/gallium/state_trackers/dri/drm/dri2.c +++ b/src/gallium/state_trackers/dri/drm/dri2.c @@ -200,6 +200,7 @@ dri2_drawable_process_buffers(struct dri_drawable *drawable, templ.width0 = dri_drawable->w; templ.height0 = dri_drawable->h; templ.depth0 = 1; + templ.array_size = 1; memset(&whandle, 0, sizeof(whandle)); @@ -348,6 +349,7 @@ dri2_create_image_from_name(__DRIscreen *_screen, templ.width0 = width; templ.height0 = height; templ.depth0 = 1; + templ.array_size = 1; memset(&whandle, 0, sizeof(whandle)); whandle.handle = name; @@ -360,9 +362,8 @@ dri2_create_image_from_name(__DRIscreen *_screen, return NULL; } - img->face = 0; img->level = 0; - img->zslice = 0; + img->layer = 0; img->loader_private = loaderPrivate; return img; @@ -430,9 +431,8 @@ dri2_create_image(__DRIscreen *_screen, return NULL; } - img->face = 0; img->level = 0; - img->zslice = 0; + img->layer = 0; img->loader_private = loaderPrivate; return img; diff --git a/src/gallium/state_trackers/dri/sw/drisw.c b/src/gallium/state_trackers/dri/sw/drisw.c index c48cc440367..30088b09685 100644 --- a/src/gallium/state_trackers/dri/sw/drisw.c +++ b/src/gallium/state_trackers/dri/sw/drisw.c @@ -87,40 +87,17 @@ drisw_put_image(struct dri_drawable *drawable, put_image(dPriv, data, width, height); } -static struct pipe_surface * -drisw_get_pipe_surface(struct dri_drawable *drawable, struct pipe_resource *ptex) -{ - struct pipe_screen *pipe_screen = dri_screen(drawable->sPriv)->base.screen; - struct pipe_surface *psurf = drawable->drisw_surface; - - if (!psurf || psurf->texture != ptex) { - pipe_surface_reference(&drawable->drisw_surface, NULL); - - drawable->drisw_surface = pipe_screen->get_tex_surface(pipe_screen, - ptex, 0, 0, 0, 0/* no bind flag???*/); - - psurf = drawable->drisw_surface; - } - - return psurf; -} - static INLINE void drisw_present_texture(__DRIdrawable *dPriv, struct pipe_resource *ptex) { struct dri_drawable *drawable = dri_drawable(dPriv); struct dri_screen *screen = dri_screen(drawable->sPriv); - struct pipe_surface *psurf; if (swrast_no_present) return; - psurf = drisw_get_pipe_surface(drawable, ptex); - if (!psurf) - return; - - screen->base.screen->flush_frontbuffer(screen->base.screen, psurf, drawable); + screen->base.screen->flush_frontbuffer(screen->base.screen, ptex, 0, 0, drawable); } static INLINE void @@ -220,6 +197,7 @@ drisw_allocate_textures(struct dri_drawable *drawable, templ.width0 = width; templ.height0 = height; templ.depth0 = 1; + templ.array_size = 1; templ.last_level = 0; for (i = 0; i < count; i++) { diff --git a/src/gallium/state_trackers/egl/SConscript b/src/gallium/state_trackers/egl/SConscript index 50c76819954..9ade76ecbb2 100644 --- a/src/gallium/state_trackers/egl/SConscript +++ b/src/gallium/state_trackers/egl/SConscript @@ -10,11 +10,8 @@ env.Append(CPPPATH = [ '#/src/gallium/winsys/sw', '.', ]) -env.Append(CPPDEFINES = [ - 'HAVE_GDI_BACKEND', -]) -common_sources = [ +sources = [ 'common/egl_g3d.c', 'common/egl_g3d_api.c', 'common/egl_g3d_image.c', @@ -23,12 +20,31 @@ common_sources = [ 'common/native_helper.c', ] -gdi_sources = common_sources + [ - 'gdi/native_gdi.c', -] +if env['platform'] == 'windows': + env.Append(CPPDEFINES = ['HAVE_GDI_BACKEND']) + sources.append('gdi/native_gdi.c') +else: + if env['x11']: + env.Append(CPPDEFINES = ['HAVE_X11_BACKEND']) + env.Prepend(CPPPATH = [ + '#/src/glx', + '#/src/mapi', + ]) + sources.append([ + 'x11/native_x11.c', + 'x11/native_dri2.c', + 'x11/native_ximage.c', + 'x11/x11_screen.c', + 'x11/glxinit.c']) + if env['dri']: + env.Append(CPPDEFINES = ['GLX_DIRECT_RENDERING']) + sources.append(['#/src/glx/dri2.c']) + if env['drm']: + env.Append(CPPDEFINES = ['HAVE_DRM_BACKEND']) + sources.append(['drm/native_drm.c', 'drm/modeset.c']) -st_egl_gdi = env.ConvenienceLibrary( - target = 'st_egl_gdi', - source = gdi_sources, +st_egl = env.ConvenienceLibrary( + target = 'st_egl', + source = sources, ) -Export('st_egl_gdi') +Export('st_egl') diff --git a/src/gallium/state_trackers/egl/common/egl_g3d.c b/src/gallium/state_trackers/egl/common/egl_g3d.c index a3750ac56fb..9024f945b8c 100644 --- a/src/gallium/state_trackers/egl/common/egl_g3d.c +++ b/src/gallium/state_trackers/egl/common/egl_g3d.c @@ -38,6 +38,46 @@ #include "egl_g3d_loader.h" #include "native.h" +static void +egl_g3d_invalid_surface(struct native_display *ndpy, + struct native_surface *nsurf, + unsigned int seq_num) +{ + /* XXX not thread safe? */ + struct egl_g3d_surface *gsurf = egl_g3d_surface(nsurf->user_data); + struct egl_g3d_context *gctx; + + /* + * Some functions such as egl_g3d_copy_buffers create a temporary native + * surface. There is no gsurf associated with it. + */ + gctx = (gsurf) ? egl_g3d_context(gsurf->base.CurrentContext) : NULL; + if (gctx) + gctx->stctxi->notify_invalid_framebuffer(gctx->stctxi, gsurf->stfbi); +} + +static struct pipe_screen * +egl_g3d_new_drm_screen(struct native_display *ndpy, const char *name, int fd) +{ + _EGLDisplay *dpy = (_EGLDisplay *) ndpy->user_data; + struct egl_g3d_display *gdpy = egl_g3d_display(dpy); + return gdpy->loader->create_drm_screen(name, fd); +} + +static struct pipe_screen * +egl_g3d_new_sw_screen(struct native_display *ndpy, struct sw_winsys *ws) +{ + _EGLDisplay *dpy = (_EGLDisplay *) ndpy->user_data; + struct egl_g3d_display *gdpy = egl_g3d_display(dpy); + return gdpy->loader->create_sw_screen(ws); +} + +static struct native_event_handler egl_g3d_native_event_handler = { + egl_g3d_invalid_surface, + egl_g3d_new_drm_screen, + egl_g3d_new_sw_screen +}; + /** * Get the native platform. */ @@ -79,7 +119,9 @@ egl_g3d_get_platform(_EGLDriver *drv, _EGLPlatformType plat) break; } - if (!nplat) + if (nplat) + nplat->set_event_handler(&egl_g3d_native_event_handler); + else _eglLog(_EGL_WARNING, "unsupported platform %s", plat_name); gdrv->platforms[plat] = nplat; @@ -183,17 +225,21 @@ init_config_attributes(_EGLConfig *conf, const struct native_config *nconf, } surface_type = 0x0; - if (nconf->window_bit) - surface_type |= EGL_WINDOW_BIT; - if (nconf->pixmap_bit) - surface_type |= EGL_PIXMAP_BIT; + /* pixmap surfaces should be EGL_SINGLE_BUFFER */ + if (nconf->buffer_mask & (1 << NATIVE_ATTACHMENT_FRONT_LEFT)) { + if (nconf->pixmap_bit) + surface_type |= EGL_PIXMAP_BIT; + } + /* the others surfaces should be EGL_BACK_BUFFER (or settable) */ + if (nconf->buffer_mask & (1 << NATIVE_ATTACHMENT_BACK_LEFT)) { + if (nconf->window_bit) + surface_type |= EGL_WINDOW_BIT; #ifdef EGL_MESA_screen_surface - if (nconf->scanout_bit) - surface_type |= EGL_SCREEN_BIT_MESA; + if (nconf->scanout_bit) + surface_type |= EGL_SCREEN_BIT_MESA; #endif - - if (nconf->buffer_mask & (1 << NATIVE_ATTACHMENT_BACK_LEFT)) surface_type |= EGL_PBUFFER_BIT; + } conf->Conformant = api_mask; conf->RenderableType = api_mask; @@ -226,11 +272,6 @@ init_config_attributes(_EGLConfig *conf, const struct native_config *nconf, } conf->Level = nconf->level; - conf->Samples = nconf->samples; - conf->SampleBuffers = 0; - - if (nconf->slow_config) - conf->ConfigCaveat = EGL_SLOW_CONFIG; if (nconf->transparent_rgb) { conf->TransparentType = EGL_TRANSPARENT_RGB; @@ -257,13 +298,9 @@ egl_g3d_init_config(_EGLDriver *drv, _EGLDisplay *dpy, int preserve_buffer, int max_swap_interval) { struct egl_g3d_config *gconf = egl_g3d_config(conf); - EGLint buffer_mask, api_mask; + EGLint buffer_mask; EGLBoolean valid; - /* skip single-buffered configs */ - if (!(nconf->buffer_mask & (1 << NATIVE_ATTACHMENT_BACK_LEFT))) - return EGL_FALSE; - buffer_mask = 0x0; if (nconf->buffer_mask & (1 << NATIVE_ATTACHMENT_FRONT_LEFT)) buffer_mask |= ST_ATTACHMENT_FRONT_LEFT_MASK; @@ -278,24 +315,14 @@ egl_g3d_init_config(_EGLDriver *drv, _EGLDisplay *dpy, gconf->stvis.color_format = nconf->color_format; gconf->stvis.depth_stencil_format = depth_stencil_format; gconf->stvis.accum_format = PIPE_FORMAT_NONE; - gconf->stvis.samples = nconf->samples; + gconf->stvis.samples = 0; + /* will be overridden per surface */ gconf->stvis.render_buffer = (buffer_mask & ST_ATTACHMENT_BACK_LEFT_MASK) ? ST_ATTACHMENT_BACK_LEFT : ST_ATTACHMENT_FRONT_LEFT; - api_mask = dpy->ClientAPIsMask; - /* this is required by EGL, not by OpenGL ES */ - if (nconf->window_bit && - gconf->stvis.render_buffer != ST_ATTACHMENT_BACK_LEFT) - api_mask &= ~(EGL_OPENGL_ES_BIT | EGL_OPENGL_ES2_BIT); - - if (!api_mask) { - _eglLog(_EGL_DEBUG, "no state tracker supports config 0x%x", - nconf->native_visual_id); - } - valid = init_config_attributes(&gconf->base, - nconf, api_mask, depth_stencil_format, + nconf, dpy->ClientAPIs, depth_stencil_format, preserve_buffer, max_swap_interval); if (!valid) { _eglLog(_EGL_DEBUG, "skip invalid config 0x%x", nconf->native_visual_id); @@ -399,46 +426,6 @@ egl_g3d_add_configs(_EGLDriver *drv, _EGLDisplay *dpy, EGLint id) } static void -egl_g3d_invalid_surface(struct native_display *ndpy, - struct native_surface *nsurf, - unsigned int seq_num) -{ - /* XXX not thread safe? */ - struct egl_g3d_surface *gsurf = egl_g3d_surface(nsurf->user_data); - struct egl_g3d_context *gctx; - - /* - * Some functions such as egl_g3d_copy_buffers create a temporary native - * surface. There is no gsurf associated with it. - */ - gctx = (gsurf) ? egl_g3d_context(gsurf->base.CurrentContext) : NULL; - if (gctx) - gctx->stctxi->notify_invalid_framebuffer(gctx->stctxi, gsurf->stfbi); -} - -static struct pipe_screen * -egl_g3d_new_drm_screen(struct native_display *ndpy, const char *name, int fd) -{ - _EGLDisplay *dpy = (_EGLDisplay *) ndpy->user_data; - struct egl_g3d_display *gdpy = egl_g3d_display(dpy); - return gdpy->loader->create_drm_screen(name, fd); -} - -static struct pipe_screen * -egl_g3d_new_sw_screen(struct native_display *ndpy, struct sw_winsys *ws) -{ - _EGLDisplay *dpy = (_EGLDisplay *) ndpy->user_data; - struct egl_g3d_display *gdpy = egl_g3d_display(dpy); - return gdpy->loader->create_sw_screen(ws); -} - -static struct native_event_handler egl_g3d_native_event_handler = { - egl_g3d_invalid_surface, - egl_g3d_new_drm_screen, - egl_g3d_new_sw_screen -}; - -static void egl_g3d_free_config(void *conf) { struct egl_g3d_config *gconf = egl_g3d_config((_EGLConfig *) conf); @@ -489,8 +476,7 @@ egl_g3d_terminate(_EGLDriver *drv, _EGLDisplay *dpy) } static EGLBoolean -egl_g3d_initialize(_EGLDriver *drv, _EGLDisplay *dpy, - EGLint *major, EGLint *minor) +egl_g3d_initialize(_EGLDriver *drv, _EGLDisplay *dpy) { struct egl_g3d_driver *gdrv = egl_g3d_driver(drv); struct egl_g3d_display *gdpy; @@ -500,6 +486,9 @@ egl_g3d_initialize(_EGLDriver *drv, _EGLDisplay *dpy, if (!nplat) return EGL_FALSE; + if (dpy->Options.TestOnly) + return EGL_TRUE; + gdpy = CALLOC_STRUCT(egl_g3d_display); if (!gdpy) { _eglError(EGL_BAD_ALLOC, "eglInitialize"); @@ -510,20 +499,20 @@ egl_g3d_initialize(_EGLDriver *drv, _EGLDisplay *dpy, _eglLog(_EGL_INFO, "use %s for display %p", nplat->name, dpy->PlatformDisplay); gdpy->native = nplat->create_display(dpy->PlatformDisplay, - &egl_g3d_native_event_handler, (void *) dpy); + dpy->Options.UseFallback, (void *) dpy); if (!gdpy->native) { _eglError(EGL_NOT_INITIALIZED, "eglInitialize(no usable display)"); goto fail; } if (gdpy->loader->profile_masks[ST_API_OPENGL] & ST_PROFILE_DEFAULT_MASK) - dpy->ClientAPIsMask |= EGL_OPENGL_BIT; + dpy->ClientAPIs |= EGL_OPENGL_BIT; if (gdpy->loader->profile_masks[ST_API_OPENGL] & ST_PROFILE_OPENGL_ES1_MASK) - dpy->ClientAPIsMask |= EGL_OPENGL_ES_BIT; + dpy->ClientAPIs |= EGL_OPENGL_ES_BIT; if (gdpy->loader->profile_masks[ST_API_OPENGL] & ST_PROFILE_OPENGL_ES2_MASK) - dpy->ClientAPIsMask |= EGL_OPENGL_ES2_BIT; + dpy->ClientAPIs |= EGL_OPENGL_ES2_BIT; if (gdpy->loader->profile_masks[ST_API_OPENVG] & ST_PROFILE_DEFAULT_MASK) - dpy->ClientAPIsMask |= EGL_OPENVG_BIT; + dpy->ClientAPIs |= EGL_OPENVG_BIT; gdpy->smapi = egl_g3d_create_st_manager(dpy); if (!gdpy->smapi) { @@ -562,8 +551,8 @@ egl_g3d_initialize(_EGLDriver *drv, _EGLDisplay *dpy, goto fail; } - *major = 1; - *minor = 4; + dpy->VersionMajor = 1; + dpy->VersionMinor = 4; return EGL_TRUE; @@ -588,12 +577,6 @@ egl_g3d_get_proc_address(_EGLDriver *drv, const char *procname) stapi->get_proc_address(stapi, procname) : NULL); } -static EGLint -egl_g3d_probe(_EGLDriver *drv, _EGLDisplay *dpy) -{ - return (egl_g3d_get_platform(drv, dpy->Platform)) ? 90 : 0; -} - _EGLDriver * egl_g3d_create_driver(const struct egl_g3d_loader *loader) { @@ -610,8 +593,6 @@ egl_g3d_create_driver(const struct egl_g3d_loader *loader) gdrv->base.API.Terminate = egl_g3d_terminate; gdrv->base.API.GetProcAddress = egl_g3d_get_proc_address; - gdrv->base.Probe = egl_g3d_probe; - /* to be filled by the caller */ gdrv->base.Name = NULL; gdrv->base.Unload = NULL; diff --git a/src/gallium/state_trackers/egl/common/egl_g3d.h b/src/gallium/state_trackers/egl/common/egl_g3d.h index 72c14f0ac49..9873fee6ec2 100644 --- a/src/gallium/state_trackers/egl/common/egl_g3d.h +++ b/src/gallium/state_trackers/egl/common/egl_g3d.h @@ -92,9 +92,8 @@ struct egl_g3d_config { struct egl_g3d_image { _EGLImage base; struct pipe_resource *texture; - unsigned face; unsigned level; - unsigned zslice; + unsigned layer; }; /* standard typecasts */ diff --git a/src/gallium/state_trackers/egl/common/egl_g3d_api.c b/src/gallium/state_trackers/egl/common/egl_g3d_api.c index fd7dc8f8149..c9f94a3c498 100644 --- a/src/gallium/state_trackers/egl/common/egl_g3d_api.c +++ b/src/gallium/state_trackers/egl/common/egl_g3d_api.c @@ -30,6 +30,7 @@ #include "pipe/p_screen.h" #include "util/u_memory.h" #include "util/u_inlines.h" +#include "util/u_box.h" #include "egl_g3d.h" #include "egl_g3d_api.h" @@ -140,23 +141,34 @@ egl_g3d_choose_config(_EGLDriver *drv, _EGLDisplay *dpy, const EGLint *attribs, if (!_eglParseConfigAttribList(&criteria, dpy, attribs)) return _eglError(EGL_BAD_ATTRIBUTE, "eglChooseConfig"); - tmp_configs = (_EGLConfig **) _eglFilterArray(dpy->Configs, &tmp_size, + /* get the number of matched configs */ + tmp_size = _eglFilterArray(dpy->Configs, NULL, 0, (_EGLArrayForEach) egl_g3d_match_config, (void *) &criteria); + if (!tmp_size) { + *num_configs = tmp_size; + return EGL_TRUE; + } + + tmp_configs = MALLOC(sizeof(tmp_configs[0]) * tmp_size); if (!tmp_configs) return _eglError(EGL_BAD_ALLOC, "eglChooseConfig(out of memory)"); + /* get the matched configs */ + _eglFilterArray(dpy->Configs, (void **) tmp_configs, tmp_size, + (_EGLArrayForEach) egl_g3d_match_config, (void *) &criteria); + /* perform sorting of configs */ - if (tmp_configs && tmp_size) { + if (configs && tmp_size) { _eglSortConfigs((const _EGLConfig **) tmp_configs, tmp_size, egl_g3d_compare_config, (void *) &criteria); - size = MIN2(tmp_size, size); - for (i = 0; i < size; i++) + tmp_size = MIN2(tmp_size, size); + for (i = 0; i < tmp_size; i++) configs[i] = _eglGetConfigHandle(tmp_configs[i]); } - free(tmp_configs); + FREE(tmp_configs); - *num_configs = size; + *num_configs = tmp_size; return EGL_TRUE; } @@ -312,7 +324,8 @@ egl_g3d_create_surface(_EGLDriver *drv, _EGLDisplay *dpy, _EGLConfig *conf, } gsurf->stvis = gconf->stvis; - if (gsurf->base.RenderBuffer == EGL_SINGLE_BUFFER) + if (gsurf->base.RenderBuffer == EGL_SINGLE_BUFFER && + gconf->stvis.buffer_mask & ST_ATTACHMENT_FRONT_LEFT_MASK) gsurf->stvis.render_buffer = ST_ATTACHMENT_FRONT_LEFT; gsurf->stfbi = egl_g3d_create_st_framebuffer(&gsurf->base); @@ -390,7 +403,6 @@ egl_g3d_create_pbuffer_surface(_EGLDriver *drv, _EGLDisplay *dpy, _EGLConfig *conf, const EGLint *attribs) { struct egl_g3d_surface *gsurf; - struct pipe_resource *ptex = NULL; gsurf = create_pbuffer_surface(dpy, conf, attribs, "eglCreatePbufferSurface"); @@ -399,13 +411,6 @@ egl_g3d_create_pbuffer_surface(_EGLDriver *drv, _EGLDisplay *dpy, gsurf->client_buffer_type = EGL_NONE; - if (!gsurf->stfbi->validate(gsurf->stfbi, - &gsurf->stvis.render_buffer, 1, &ptex)) { - egl_g3d_destroy_st_framebuffer(gsurf->stfbi); - FREE(gsurf); - return NULL; - } - return &gsurf->base; } @@ -465,12 +470,14 @@ egl_g3d_create_pbuffer_from_client_buffer(_EGLDriver *drv, _EGLDisplay *dpy, gsurf->client_buffer_type = buftype; gsurf->client_buffer = buffer; + /* validate now so that it fails if the client buffer is invalid */ if (!gsurf->stfbi->validate(gsurf->stfbi, &gsurf->stvis.render_buffer, 1, &ptex)) { egl_g3d_destroy_st_framebuffer(gsurf->stfbi); FREE(gsurf); return NULL; } + pipe_resource_reference(&ptex, NULL); return &gsurf->base; } @@ -631,19 +638,13 @@ egl_g3d_copy_buffers(_EGLDriver *drv, _EGLDisplay *dpy, _EGLSurface *surf, struct egl_g3d_display *gdpy = egl_g3d_display(dpy); struct egl_g3d_surface *gsurf = egl_g3d_surface(surf); _EGLContext *ctx = _eglGetCurrentContext(); - struct egl_g3d_config *gconf; struct native_surface *nsurf; struct pipe_resource *ptex; if (!gsurf->render_texture) return EGL_TRUE; - gconf = egl_g3d_config(egl_g3d_find_pixmap_config(dpy, target)); - if (!gconf) - return _eglError(EGL_BAD_NATIVE_PIXMAP, "eglCopyBuffers"); - - nsurf = gdpy->native->create_pixmap_surface(gdpy->native, - target, gconf->native); + nsurf = gdpy->native->create_pixmap_surface(gdpy->native, target, NULL); if (!nsurf) return _eglError(EGL_BAD_NATIVE_PIXMAP, "eglCopyBuffers"); @@ -664,18 +665,13 @@ egl_g3d_copy_buffers(_EGLDriver *drv, _EGLDisplay *dpy, _EGLSurface *surf, ptex = get_pipe_resource(gdpy->native, nsurf, NATIVE_ATTACHMENT_FRONT_LEFT); if (ptex) { - struct pipe_resource *psrc = gsurf->render_texture; - struct pipe_subresource subsrc, subdst; - subsrc.face = 0; - subsrc.level = 0; - subdst.face = 0; - subdst.level = 0; - - if (psrc) { - gdpy->pipe->resource_copy_region(gdpy->pipe, ptex, subdst, 0, 0, 0, - gsurf->render_texture, subsrc, 0, 0, 0, ptex->width0, ptex->height0); - nsurf->present(nsurf, NATIVE_ATTACHMENT_FRONT_LEFT, FALSE, 0); - } + struct pipe_box src_box; + + u_box_origin_2d(ptex->width0, ptex->height0, &src_box); + gdpy->pipe->resource_copy_region(gdpy->pipe, ptex, 0, 0, 0, 0, + gsurf->render_texture, 0, &src_box); + gdpy->pipe->flush(gdpy->pipe, PIPE_FLUSH_RENDER_CACHE, NULL); + nsurf->present(nsurf, NATIVE_ATTACHMENT_FRONT_LEFT, FALSE, 0); pipe_resource_reference(&ptex, NULL); } @@ -880,25 +876,6 @@ egl_g3d_show_screen_surface(_EGLDriver *drv, _EGLDisplay *dpy, #endif /* EGL_MESA_screen_surface */ -/** - * Find a config that supports the pixmap. - */ -_EGLConfig * -egl_g3d_find_pixmap_config(_EGLDisplay *dpy, EGLNativePixmapType pix) -{ - struct egl_g3d_display *gdpy = egl_g3d_display(dpy); - struct egl_g3d_config *gconf; - EGLint i; - - for (i = 0; i < dpy->Configs->Size; i++) { - gconf = egl_g3d_config((_EGLConfig *) dpy->Configs->Elements[i]); - if (gdpy->native->is_pixmap_supported(gdpy->native, pix, gconf->native)) - break; - } - - return (i < dpy->Configs->Size) ? &gconf->base : NULL; -} - void egl_g3d_init_driver_api(_EGLDriver *drv) { diff --git a/src/gallium/state_trackers/egl/common/egl_g3d_api.h b/src/gallium/state_trackers/egl/common/egl_g3d_api.h index d5196c12fe9..17fd7958aa6 100644 --- a/src/gallium/state_trackers/egl/common/egl_g3d_api.h +++ b/src/gallium/state_trackers/egl/common/egl_g3d_api.h @@ -31,7 +31,4 @@ void egl_g3d_init_driver_api(_EGLDriver *drv); -_EGLConfig * -egl_g3d_find_pixmap_config(_EGLDisplay *dpy, EGLNativePixmapType pix); - #endif /* _EGL_G3D_API_H_ */ diff --git a/src/gallium/state_trackers/egl/common/egl_g3d_image.c b/src/gallium/state_trackers/egl/common/egl_g3d_image.c index 6a1f8cc697b..78c035a2af0 100644 --- a/src/gallium/state_trackers/egl/common/egl_g3d_image.c +++ b/src/gallium/state_trackers/egl/common/egl_g3d_image.c @@ -35,7 +35,6 @@ #include "native.h" #include "egl_g3d.h" -#include "egl_g3d_api.h" #include "egl_g3d_image.h" /* for struct winsys_handle */ @@ -48,17 +47,11 @@ static struct pipe_resource * egl_g3d_reference_native_pixmap(_EGLDisplay *dpy, EGLNativePixmapType pix) { struct egl_g3d_display *gdpy = egl_g3d_display(dpy); - struct egl_g3d_config *gconf; struct native_surface *nsurf; struct pipe_resource *textures[NUM_NATIVE_ATTACHMENTS]; enum native_attachment natt; - gconf = egl_g3d_config(egl_g3d_find_pixmap_config(dpy, pix)); - if (!gconf) - return NULL; - - nsurf = gdpy->native->create_pixmap_surface(gdpy->native, - pix, gconf->native); + nsurf = gdpy->native->create_pixmap_surface(gdpy->native, pix, NULL); if (!nsurf) return NULL; @@ -173,6 +166,7 @@ egl_g3d_reference_drm_buffer(_EGLDisplay *dpy, EGLint name, templ.width0 = attrs.Width; templ.height0 = attrs.Height; templ.depth0 = 1; + templ.array_size = 1; memset(&wsh, 0, sizeof(wsh)); wsh.handle = (unsigned) name; @@ -191,7 +185,7 @@ egl_g3d_create_image(_EGLDriver *drv, _EGLDisplay *dpy, _EGLContext *ctx, { struct pipe_resource *ptex; struct egl_g3d_image *gimg; - unsigned face = 0, level = 0, zslice = 0; + unsigned level = 0, layer = 0; gimg = CALLOC_STRUCT(egl_g3d_image); if (!gimg) { @@ -231,7 +225,7 @@ egl_g3d_create_image(_EGLDriver *drv, _EGLDisplay *dpy, _EGLContext *ctx, FREE(gimg); return NULL; } - if (zslice > ptex->depth0) { + if (layer >= (u_minify(ptex->depth0, level) + ptex->array_size - 1)) { _eglError(EGL_BAD_PARAMETER, "eglCreateEGLImageKHR"); pipe_resource_reference(&gimg->texture, NULL); FREE(gimg); @@ -240,9 +234,8 @@ egl_g3d_create_image(_EGLDriver *drv, _EGLDisplay *dpy, _EGLContext *ctx, /* transfer the ownership to the image */ gimg->texture = ptex; - gimg->face = face; gimg->level = level; - gimg->zslice = zslice; + gimg->layer = layer; return &gimg->base; } @@ -288,9 +281,8 @@ egl_g3d_create_drm_image(_EGLDriver *drv, _EGLDisplay *dpy, /* transfer the ownership to the image */ gimg->texture = ptex; - gimg->face = 0; gimg->level = 0; - gimg->zslice = 0; + gimg->layer = 0; return &gimg->base; } diff --git a/src/gallium/state_trackers/egl/common/egl_g3d_st.c b/src/gallium/state_trackers/egl/common/egl_g3d_st.c index 25e2999590c..2b944b521a4 100644 --- a/src/gallium/state_trackers/egl/common/egl_g3d_st.c +++ b/src/gallium/state_trackers/egl/common/egl_g3d_st.c @@ -72,9 +72,8 @@ egl_g3d_st_manager_get_egl_image(struct st_manager *smapi, out->texture = NULL; pipe_resource_reference(&out->texture, gimg->texture); - out->face = gimg->face; out->level = gimg->level; - out->zslice = gimg->zslice; + out->layer = gimg->layer; _eglUnlockMutex(&gsmapi->display->Mutex); @@ -140,6 +139,7 @@ pbuffer_allocate_render_texture(struct egl_g3d_surface *gsurf) templ.width0 = gsurf->base.Width; templ.height0 = gsurf->base.Height; templ.depth0 = 1; + templ.array_size = 1; templ.format = gsurf->stvis.color_format; templ.bind = PIPE_BIND_RENDER_TARGET; diff --git a/src/gallium/state_trackers/egl/common/native.h b/src/gallium/state_trackers/egl/common/native.h index 3886ca20562..6461b5edbdf 100644 --- a/src/gallium/state_trackers/egl/common/native.h +++ b/src/gallium/state_trackers/egl/common/native.h @@ -126,8 +126,6 @@ struct native_config { int native_visual_id; int native_visual_type; int level; - int samples; - boolean slow_config; boolean transparent_rgb; int transparent_rgb_values[3]; }; @@ -185,7 +183,9 @@ struct native_display { const struct native_config *nconf); /** - * Create a pixmap surface. Required unless no config has pixmap_bit set. + * Create a pixmap surface. The native config may be NULL. In that case, a + * "best config" will be picked. Required unless no config has pixmap_bit + * set. */ struct native_surface *(*create_pixmap_surface)(struct native_display *ndpy, EGLNativePixmapType pix, @@ -226,8 +226,9 @@ native_attachment_mask_test(uint mask, enum native_attachment att) struct native_platform { const char *name; + void (*set_event_handler)(struct native_event_handler *handler); struct native_display *(*create_display)(void *dpy, - struct native_event_handler *handler, + boolean use_sw, void *user_data); }; diff --git a/src/gallium/state_trackers/egl/common/native_helper.c b/src/gallium/state_trackers/egl/common/native_helper.c index 7832b2b693f..0f2d02032b5 100644 --- a/src/gallium/state_trackers/egl/common/native_helper.c +++ b/src/gallium/state_trackers/egl/common/native_helper.c @@ -40,7 +40,6 @@ struct resource_surface { uint bind; struct pipe_resource *resources[NUM_NATIVE_ATTACHMENTS]; - struct pipe_surface *present_surfaces[NUM_NATIVE_ATTACHMENTS]; uint resource_mask; uint width, height; }; @@ -67,8 +66,6 @@ resource_surface_free_resources(struct resource_surface *rsurf) int i; for (i = 0; i < NUM_NATIVE_ATTACHMENTS; i++) { - if (rsurf->present_surfaces[i]) - pipe_surface_reference(&rsurf->present_surfaces[i], NULL); if (rsurf->resources[i]) pipe_resource_reference(&rsurf->resources[i], NULL); } @@ -130,6 +127,7 @@ resource_surface_add_resources(struct resource_surface *rsurf, templ.width0 = rsurf->width; templ.height0 = rsurf->height; templ.depth0 = 1; + templ.array_size = 1; for (i = 0; i < NUM_NATIVE_ATTACHMENTS; i++) { if (resource_mask & (1 <<i)) { @@ -193,8 +191,6 @@ resource_surface_swap_buffers(struct resource_surface *rsurf, pointer_swap((const void **) &rsurf->resources[buf1], (const void **) &rsurf->resources[buf2]); - pointer_swap((const void **) &rsurf->present_surfaces[buf1], - (const void **) &rsurf->present_surfaces[buf2]); /* swap mask bits */ mask = rsurf->resource_mask & ~(buf1_bit | buf2_bit); @@ -212,24 +208,12 @@ resource_surface_present(struct resource_surface *rsurf, void *winsys_drawable_handle) { struct pipe_resource *pres = rsurf->resources[which]; - struct pipe_surface *psurf = rsurf->present_surfaces[which]; if (!pres) return TRUE; - if (!psurf) { - psurf = rsurf->screen->get_tex_surface(rsurf->screen, - pres, 0, 0, 0, PIPE_BIND_DISPLAY_TARGET); - if (!psurf) - return FALSE; - - rsurf->present_surfaces[which] = psurf; - } - - assert(psurf->texture == pres); - rsurf->screen->flush_frontbuffer(rsurf->screen, - psurf, winsys_drawable_handle); + pres, 0, 0, winsys_drawable_handle); return TRUE; } diff --git a/src/gallium/state_trackers/egl/drm/native_drm.c b/src/gallium/state_trackers/egl/drm/native_drm.c index 3759c2a26dd..14c134ea1ad 100644 --- a/src/gallium/state_trackers/egl/drm/native_drm.c +++ b/src/gallium/state_trackers/egl/drm/native_drm.c @@ -34,7 +34,7 @@ /* see get_drm_screen_name */ #include <radeon_drm.h> -#include "radeon/drm/radeon_drm.h" +#include "radeon/drm/radeon_drm_public.h" static boolean drm_display_is_format_supported(struct native_display *ndpy, @@ -237,9 +237,16 @@ drm_create_display(int fd, struct native_event_handler *event_handler, return &drmdpy->base; } +static struct native_event_handler *drm_event_handler; + +static void +native_set_event_handler(struct native_event_handler *event_handler) +{ + drm_event_handler = event_handler; +} + static struct native_display * -native_create_display(void *dpy, struct native_event_handler *event_handler, - void *user_data) +native_create_display(void *dpy, boolean use_sw, void *user_data) { int fd; @@ -252,11 +259,12 @@ native_create_display(void *dpy, struct native_event_handler *event_handler, if (fd < 0) return NULL; - return drm_create_display(fd, event_handler, user_data); + return drm_create_display(fd, drm_event_handler, user_data); } static const struct native_platform drm_platform = { "DRM", /* name */ + native_set_event_handler, native_create_display }; diff --git a/src/gallium/state_trackers/egl/fbdev/native_fbdev.c b/src/gallium/state_trackers/egl/fbdev/native_fbdev.c index 1b5ea8bf9d5..a1e91ba701c 100644 --- a/src/gallium/state_trackers/egl/fbdev/native_fbdev.c +++ b/src/gallium/state_trackers/egl/fbdev/native_fbdev.c @@ -459,9 +459,16 @@ fbdev_display_create(int fd, struct native_event_handler *event_handler, return &fbdpy->base; } +static struct native_event_handler *fbdev_event_handler; + +static void +native_set_event_handler(struct native_event_handler *event_handler) +{ + fbdev_event_handler = event_handler; +} + static struct native_display * -native_create_display(void *dpy, struct native_event_handler *event_handler, - void *user_data) +native_create_display(void *dpy, boolean use_sw, void *user_data) { struct native_display *ndpy; int fd; @@ -476,7 +483,7 @@ native_create_display(void *dpy, struct native_event_handler *event_handler, if (fd < 0) return NULL; - ndpy = fbdev_display_create(fd, event_handler, user_data); + ndpy = fbdev_display_create(fd, fbdev_event_handler, user_data); if (!ndpy) close(fd); @@ -485,6 +492,7 @@ native_create_display(void *dpy, struct native_event_handler *event_handler, static const struct native_platform fbdev_platform = { "FBDEV", /* name */ + native_set_event_handler, native_create_display }; diff --git a/src/gallium/state_trackers/egl/gdi/native_gdi.c b/src/gallium/state_trackers/egl/gdi/native_gdi.c index d259e6edc89..3cc4aefa937 100644 --- a/src/gallium/state_trackers/egl/gdi/native_gdi.c +++ b/src/gallium/state_trackers/egl/gdi/native_gdi.c @@ -319,7 +319,6 @@ gdi_display_get_configs(struct native_display *ndpy, int *num_configs) nconf->color_format = formats[i]; nconf->window_bit = TRUE; - nconf->slow_config = TRUE; } gdpy->num_configs = count; @@ -407,15 +406,23 @@ gdi_create_display(HDC hDC, struct native_event_handler *event_handler, return &gdpy->base; } +static struct native_event_handler *gdi_event_handler; + +static void +native_set_event_handler(struct native_event_handler *event_handler) +{ + gdi_event_handler = event_handler; +} + static struct native_display * -native_create_display(void *dpy, struct native_event_handler *event_handler, - void *user_data) +native_create_display(void *dpy, boolean use_sw, void *user_data) { - return gdi_create_display((HDC) dpy, event_handler, user_data); + return gdi_create_display((HDC) dpy, gdi_event_handler, user_data); } static const struct native_platform gdi_platform = { "GDI", /* name */ + native_set_event_handler, native_create_display }; diff --git a/src/gallium/state_trackers/egl/x11/native_dri2.c b/src/gallium/state_trackers/egl/x11/native_dri2.c index 331a7de432a..c82e2da863b 100644 --- a/src/gallium/state_trackers/egl/x11/native_dri2.c +++ b/src/gallium/state_trackers/egl/x11/native_dri2.c @@ -40,11 +40,6 @@ #ifdef GLX_DIRECT_RENDERING -enum dri2_surface_type { - DRI2_SURFACE_TYPE_WINDOW, - DRI2_SURFACE_TYPE_PIXMAP, -}; - struct dri2_display { struct native_display base; Display *dpy; @@ -66,7 +61,6 @@ struct dri2_display { struct dri2_surface { struct native_surface base; Drawable drawable; - enum dri2_surface_type type; enum pipe_format color_format; struct dri2_display *dri2dpy; @@ -136,6 +130,7 @@ dri2_surface_process_drawable_buffers(struct native_surface *nsurf, templ.width0 = dri2surf->width; templ.height0 = dri2surf->height; templ.depth0 = 1; + templ.array_size = 1; templ.format = dri2surf->color_format; templ.bind = PIPE_BIND_RENDER_TARGET; @@ -438,12 +433,10 @@ dri2_surface_destroy(struct native_surface *nsurf) static struct dri2_surface * dri2_display_create_surface(struct native_display *ndpy, - enum dri2_surface_type type, Drawable drawable, - const struct native_config *nconf) + enum pipe_format color_format) { struct dri2_display *dri2dpy = dri2_display(ndpy); - struct dri2_config *dri2conf = dri2_config(nconf); struct dri2_surface *dri2surf; dri2surf = CALLOC_STRUCT(dri2_surface); @@ -451,9 +444,8 @@ dri2_display_create_surface(struct native_display *ndpy, return NULL; dri2surf->dri2dpy = dri2dpy; - dri2surf->type = type; dri2surf->drawable = drawable; - dri2surf->color_format = dri2conf->base.color_format; + dri2surf->color_format = color_format; dri2surf->base.destroy = dri2_surface_destroy; dri2surf->base.present = dri2_surface_present; @@ -479,8 +471,8 @@ dri2_display_create_window_surface(struct native_display *ndpy, { struct dri2_surface *dri2surf; - dri2surf = dri2_display_create_surface(ndpy, DRI2_SURFACE_TYPE_WINDOW, - (Drawable) win, nconf); + dri2surf = dri2_display_create_surface(ndpy, + (Drawable) win, nconf->color_format); return (dri2surf) ? &dri2surf->base : NULL; } @@ -491,8 +483,29 @@ dri2_display_create_pixmap_surface(struct native_display *ndpy, { struct dri2_surface *dri2surf; - dri2surf = dri2_display_create_surface(ndpy, DRI2_SURFACE_TYPE_PIXMAP, - (Drawable) pix, nconf); + if (!nconf) { + struct dri2_display *dri2dpy = dri2_display(ndpy); + uint depth, nconf_depth; + int i; + + depth = x11_drawable_get_depth(dri2dpy->xscr, (Drawable) pix); + for (i = 0; i < dri2dpy->num_configs; i++) { + nconf_depth = util_format_get_blocksizebits( + dri2dpy->configs[i].base.color_format); + /* simple depth match for now */ + if (depth == nconf_depth || + (depth == 24 && depth + 8 == nconf_depth)) { + nconf = &dri2dpy->configs[i].base; + break; + } + } + + if (!nconf) + return NULL; + } + + dri2surf = dri2_display_create_surface(ndpy, + (Drawable) pix, nconf->color_format); return (dri2surf) ? &dri2surf->base : NULL; } @@ -547,6 +560,10 @@ dri2_display_convert_config(struct native_display *ndpy, if (!mode->xRenderable || !mode->drawableType) return FALSE; + /* fast/slow configs are probably not relevant */ + if (mode->visualRating == GLX_SLOW_CONFIG) + return FALSE; + nconf->buffer_mask = 1 << NATIVE_ATTACHMENT_FRONT_LEFT; if (mode->doubleBufferMode) nconf->buffer_mask |= 1 << NATIVE_ATTACHMENT_BACK_LEFT; @@ -567,17 +584,33 @@ dri2_display_convert_config(struct native_display *ndpy, if (nconf->color_format == PIPE_FORMAT_NONE) return FALSE; - if (mode->drawableType & GLX_WINDOW_BIT) + if ((mode->drawableType & GLX_WINDOW_BIT) && mode->visualID) nconf->window_bit = TRUE; if (mode->drawableType & GLX_PIXMAP_BIT) nconf->pixmap_bit = TRUE; nconf->native_visual_id = mode->visualID; - nconf->native_visual_type = mode->visualType; + switch (mode->visualType) { + case GLX_TRUE_COLOR: + nconf->native_visual_type = TrueColor; + break; + case GLX_DIRECT_COLOR: + nconf->native_visual_type = DirectColor; + break; + case GLX_PSEUDO_COLOR: + nconf->native_visual_type = PseudoColor; + break; + case GLX_STATIC_COLOR: + nconf->native_visual_type = StaticColor; + break; + case GLX_GRAY_SCALE: + nconf->native_visual_type = GrayScale; + break; + case GLX_STATIC_GRAY: + nconf->native_visual_type = StaticGray; + break; + } nconf->level = mode->level; - nconf->samples = mode->samples; - - nconf->slow_config = (mode->visualRating == GLX_SLOW_CONFIG); if (mode->transparentPixel == GLX_TRANSPARENT_RGB) { nconf->transparent_rgb = TRUE; @@ -613,8 +646,17 @@ dri2_display_get_configs(struct native_display *ndpy, int *num_configs) count = 0; for (i = 0; i < num_modes; i++) { struct native_config *nconf = &dri2dpy->configs[count].base; - if (dri2_display_convert_config(&dri2dpy->base, modes, nconf)) - count++; + + if (dri2_display_convert_config(&dri2dpy->base, modes, nconf)) { + int j; + /* look for duplicates */ + for (j = 0; j < count; j++) { + if (memcmp(&dri2dpy->configs[j], nconf, sizeof(*nconf)) == 0) + break; + } + if (j == count) + count++; + } modes = modes->next; } @@ -757,7 +799,7 @@ dri2_display_hash_table_hash(void *key) static int dri2_display_hash_table_compare(void *key1, void *key2) { - return (key1 - key2); + return ((char *) key1 - (char *) key2); } struct native_display * diff --git a/src/gallium/state_trackers/egl/x11/native_x11.c b/src/gallium/state_trackers/egl/x11/native_x11.c index 37c8b01541f..a0bcad4c734 100644 --- a/src/gallium/state_trackers/egl/x11/native_x11.c +++ b/src/gallium/state_trackers/egl/x11/native_x11.c @@ -30,25 +30,30 @@ #include "native_x11.h" +static struct native_event_handler *x11_event_handler; + +static void +native_set_event_handler(struct native_event_handler *event_handler) +{ + x11_event_handler = event_handler; +} + static struct native_display * -native_create_display(void *dpy, struct native_event_handler *event_handler, - void *user_data) +native_create_display(void *dpy, boolean use_sw, void *user_data) { struct native_display *ndpy = NULL; boolean force_sw; force_sw = debug_get_bool_option("EGL_SOFTWARE", FALSE); - if (!force_sw) { - ndpy = x11_create_dri2_display((Display *) dpy, - event_handler, user_data); - } - - if (!ndpy) { - EGLint level = (force_sw) ? _EGL_INFO : _EGL_WARNING; - _eglLog(level, "use software fallback"); + if (force_sw || use_sw) { + _eglLog(_EGL_INFO, "use software fallback"); ndpy = x11_create_ximage_display((Display *) dpy, - event_handler, user_data); + x11_event_handler, user_data); + } + else { + ndpy = x11_create_dri2_display((Display *) dpy, + x11_event_handler, user_data); } return ndpy; @@ -56,6 +61,7 @@ native_create_display(void *dpy, struct native_event_handler *event_handler, static const struct native_platform x11_platform = { "X11", /* name */ + native_set_event_handler, native_create_display }; diff --git a/src/gallium/state_trackers/egl/x11/native_ximage.c b/src/gallium/state_trackers/egl/x11/native_ximage.c index 84811fb6e14..d4f4dd04df4 100644 --- a/src/gallium/state_trackers/egl/x11/native_ximage.c +++ b/src/gallium/state_trackers/egl/x11/native_ximage.c @@ -38,11 +38,6 @@ #include "native_x11.h" #include "x11_screen.h" -enum ximage_surface_type { - XIMAGE_SURFACE_TYPE_WINDOW, - XIMAGE_SURFACE_TYPE_PIXMAP, -}; - struct ximage_display { struct native_display base; Display *dpy; @@ -60,7 +55,6 @@ struct ximage_display { struct ximage_surface { struct native_surface base; Drawable drawable; - enum ximage_surface_type type; enum pipe_format color_format; XVisualInfo visual; struct ximage_display *xdpy; @@ -245,7 +239,6 @@ ximage_surface_destroy(struct native_surface *nsurf) static struct ximage_surface * ximage_display_create_surface(struct native_display *ndpy, - enum ximage_surface_type type, Drawable drawable, const struct native_config *nconf) { @@ -258,7 +251,6 @@ ximage_display_create_surface(struct native_display *ndpy, return NULL; xsurf->xdpy = xdpy; - xsurf->type = type; xsurf->color_format = xconf->base.color_format; xsurf->drawable = drawable; @@ -297,11 +289,37 @@ ximage_display_create_window_surface(struct native_display *ndpy, { struct ximage_surface *xsurf; - xsurf = ximage_display_create_surface(ndpy, XIMAGE_SURFACE_TYPE_WINDOW, - (Drawable) win, nconf); + xsurf = ximage_display_create_surface(ndpy, (Drawable) win, nconf); return (xsurf) ? &xsurf->base : NULL; } +static enum pipe_format +get_pixmap_format(struct native_display *ndpy, EGLNativePixmapType pix) +{ + struct ximage_display *xdpy = ximage_display(ndpy); + enum pipe_format fmt; + uint depth; + + depth = x11_drawable_get_depth(xdpy->xscr, (Drawable) pix); + + switch (depth) { + case 32: + fmt = PIPE_FORMAT_B8G8R8A8_UNORM; + break; + case 24: + fmt = PIPE_FORMAT_B8G8R8X8_UNORM; + break; + case 16: + fmt = PIPE_FORMAT_B5G6R5_UNORM; + break; + default: + fmt = PIPE_FORMAT_NONE; + break; + } + + return fmt; +} + static struct native_surface * ximage_display_create_pixmap_surface(struct native_display *ndpy, EGLNativePixmapType pix, @@ -309,8 +327,26 @@ ximage_display_create_pixmap_surface(struct native_display *ndpy, { struct ximage_surface *xsurf; - xsurf = ximage_display_create_surface(ndpy, XIMAGE_SURFACE_TYPE_PIXMAP, - (Drawable) pix, nconf); + /* find the config */ + if (!nconf) { + struct ximage_display *xdpy = ximage_display(ndpy); + enum pipe_format fmt = get_pixmap_format(&xdpy->base, pix); + int i; + + if (fmt != PIPE_FORMAT_NONE) { + for (i = 0; i < xdpy->num_configs; i++) { + if (xdpy->configs[i].base.color_format == fmt) { + nconf = &xdpy->configs[i].base; + break; + } + } + } + + if (!nconf) + return NULL; + } + + xsurf = ximage_display_create_surface(ndpy, (Drawable) pix, nconf); return (xsurf) ? &xsurf->base : NULL; } @@ -384,8 +420,6 @@ ximage_display_get_configs(struct native_display *ndpy, int *num_configs) xconf->base.native_visual_type = xconf->visual->class; #endif - xconf->base.slow_config = TRUE; - count++; } @@ -408,24 +442,7 @@ ximage_display_is_pixmap_supported(struct native_display *ndpy, const struct native_config *nconf) { struct ximage_display *xdpy = ximage_display(ndpy); - enum pipe_format fmt; - uint depth; - - depth = x11_drawable_get_depth(xdpy->xscr, (Drawable) pix); - switch (depth) { - case 32: - fmt = PIPE_FORMAT_B8G8R8A8_UNORM; - break; - case 24: - fmt = PIPE_FORMAT_B8G8R8X8_UNORM; - break; - case 16: - fmt = PIPE_FORMAT_B5G6R5_UNORM; - break; - default: - fmt = PIPE_FORMAT_NONE; - break; - } + enum pipe_format fmt = get_pixmap_format(&xdpy->base, pix); return (fmt == nconf->color_format); } diff --git a/src/gallium/state_trackers/glx/xlib/xm_st.c b/src/gallium/state_trackers/glx/xlib/xm_st.c index e7466bdbee5..6bfe8b0788c 100644 --- a/src/gallium/state_trackers/glx/xlib/xm_st.c +++ b/src/gallium/state_trackers/glx/xlib/xm_st.c @@ -42,7 +42,7 @@ struct xmesa_st_framebuffer { unsigned texture_width, texture_height, texture_mask; struct pipe_resource *textures[ST_ATTACHMENT_COUNT]; - struct pipe_surface *display_surface; + struct pipe_resource *display_resource; }; static INLINE struct xmesa_st_framebuffer * @@ -60,25 +60,19 @@ xmesa_st_framebuffer_display(struct st_framebuffer_iface *stfbi, { struct xmesa_st_framebuffer *xstfb = xmesa_st_framebuffer(stfbi); struct pipe_resource *ptex = xstfb->textures[statt]; - struct pipe_surface *psurf; + struct pipe_resource *pres; if (!ptex) return TRUE; - psurf = xstfb->display_surface; + pres = xstfb->display_resource; /* (re)allocate the surface for the texture to be displayed */ - if (!psurf || psurf->texture != ptex) { - pipe_surface_reference(&xstfb->display_surface, NULL); - - psurf = xstfb->screen->get_tex_surface(xstfb->screen, - ptex, 0, 0, 0, PIPE_BIND_DISPLAY_TARGET); - if (!psurf) - return FALSE; - - xstfb->display_surface = psurf; + if (!pres || pres != ptex) { + pipe_resource_reference(&xstfb->display_resource, ptex); + pres = xstfb->display_resource; } - xstfb->screen->flush_frontbuffer(xstfb->screen, psurf, &xstfb->buffer->ws); + xstfb->screen->flush_frontbuffer(xstfb->screen, pres, 0, 0, &xstfb->buffer->ws); return TRUE; } @@ -96,7 +90,7 @@ xmesa_st_framebuffer_copy_textures(struct st_framebuffer_iface *stfbi, struct xmesa_st_framebuffer *xstfb = xmesa_st_framebuffer(stfbi); struct pipe_resource *src_ptex = xstfb->textures[src_statt]; struct pipe_resource *dst_ptex = xstfb->textures[dst_statt]; - struct pipe_subresource subsrc, subdst; + struct pipe_box src_box; struct pipe_context *pipe; if (!src_ptex || !dst_ptex) @@ -110,14 +104,11 @@ xmesa_st_framebuffer_copy_textures(struct st_framebuffer_iface *stfbi, xstfb->display->pipe = pipe; } - subsrc.face = 0; - subsrc.level = 0; - subdst.face = 0; - subdst.level = 0; + u_box_2d(x, y, width, height, &src_box); if (src_ptex && dst_ptex) - pipe->resource_copy_region(pipe, dst_ptex, subdst, x, y, 0, - src_ptex, subsrc, x, y, 0, width, height); + pipe->resource_copy_region(pipe, dst_ptex, 0, x, y, 0, + src_ptex, 0, &src_box); } /** @@ -144,6 +135,7 @@ xmesa_st_framebuffer_validate_textures(struct st_framebuffer_iface *stfbi, templ.width0 = width; templ.height0 = height; templ.depth0 = 1; + templ.array_size = 1; templ.last_level = 0; for (i = 0; i < ST_ATTACHMENT_COUNT; i++) { @@ -321,7 +313,7 @@ xmesa_destroy_st_framebuffer(struct st_framebuffer_iface *stfbi) struct xmesa_st_framebuffer *xstfb = xmesa_st_framebuffer(stfbi); int i; - pipe_surface_reference(&xstfb->display_surface, NULL); + pipe_resource_reference(&xstfb->display_resource, NULL); for (i = 0; i < ST_ATTACHMENT_COUNT; i++) pipe_resource_reference(&xstfb->textures[i], NULL); diff --git a/src/gallium/state_trackers/python/p_context.i b/src/gallium/state_trackers/python/p_context.i index 40c4603fb9d..d694651eef7 100644 --- a/src/gallium/state_trackers/python/p_context.i +++ b/src/gallium/state_trackers/python/p_context.i @@ -132,6 +132,8 @@ struct st_context { enum pipe_format format = PIPE_FORMAT_NONE, unsigned first_level = 0, unsigned last_level = ~0, + unsigned first_layer = 0, + unsigned last_layer = ~0, unsigned swizzle_r = 0, unsigned swizzle_g = 1, unsigned swizzle_b = 2, @@ -146,9 +148,10 @@ struct st_context { } else { templat.format = format; } - templat.last_level = MIN2(last_level, texture->last_level); - templat.first_level = first_level; - templat.last_level = last_level; + templat.u.tex.last_level = MIN2(last_level, texture->last_level); + templat.u.tex.first_level = first_level; + templat.u.tex.first_layer = first_layer; + templat.u.tex.last_layer = last_layer; templat.swizzle_r = swizzle_r; templat.swizzle_g = swizzle_g; templat.swizzle_b = swizzle_b; @@ -357,7 +360,7 @@ struct st_context { if (!map) goto error2; memcpy(map, vertices, size); - pipe_buffer_unmap(pipe, vbuf, transfer); + pipe_buffer_unmap(pipe, transfer); cso_save_vertex_elements($self->cso); @@ -412,17 +415,15 @@ error1: */ void resource_copy_region(struct pipe_resource *dst, - struct pipe_subresource subdst, + unsigned dst_level, unsigned dstx, unsigned dsty, unsigned dstz, struct pipe_resource *src, - struct pipe_subresource subsrc, - unsigned srcx, unsigned srcy, unsigned srcz, - unsigned width, unsigned height) + unsigned src_level, + const struct pipe_box *src_box) { $self->pipe->resource_copy_region($self->pipe, - dst, subdst, dstx, dsty, dstz, - src, subsrc, srcx, srcy, srcz, - width, height); + dst, dst_level, dstx, dsty, dstz, + src, src_level, src_box); } @@ -433,7 +434,7 @@ error1: { struct pipe_surface *_dst = NULL; - _dst = st_pipe_surface(dst, PIPE_BIND_RENDER_TARGET); + _dst = st_pipe_surface($self->pipe, dst, PIPE_BIND_RENDER_TARGET); if(!_dst) SWIG_exception(SWIG_ValueError, "couldn't acquire destination surface for writing"); @@ -452,7 +453,7 @@ error1: { struct pipe_surface *_dst = NULL; - _dst = st_pipe_surface(dst, PIPE_BIND_DEPTH_STENCIL); + _dst = st_pipe_surface($self->pipe, dst, PIPE_BIND_DEPTH_STENCIL); if(!_dst) SWIG_exception(SWIG_ValueError, "couldn't acquire destination surface for writing"); @@ -482,9 +483,8 @@ error1: transfer = pipe_get_transfer(pipe, surface->texture, - surface->face, surface->level, - surface->zslice, + surface->layer, PIPE_TRANSFER_READ, x, y, w, h); if(transfer) { @@ -511,9 +511,8 @@ error1: transfer = pipe_get_transfer(pipe, surface->texture, - surface->face, surface->level, - surface->zslice, + surface->layer, PIPE_TRANSFER_WRITE, x, y, w, h); if(!transfer) @@ -535,9 +534,8 @@ error1: struct pipe_transfer *transfer; transfer = pipe_get_transfer(pipe, surface->texture, - surface->face, surface->level, - surface->zslice, + surface->layer, PIPE_TRANSFER_READ, x, y, w, h); if(transfer) { @@ -555,9 +553,8 @@ error1: struct pipe_transfer *transfer; transfer = pipe_get_transfer(pipe, surface->texture, - surface->face, surface->level, - surface->zslice, + surface->layer, PIPE_TRANSFER_WRITE, x, y, w, h); if(transfer) { @@ -597,9 +594,8 @@ error1: transfer = pipe_get_transfer(pipe, surface->texture, - surface->face, surface->level, - surface->zslice, + surface->layer, PIPE_TRANSFER_READ, x, y, w, h); if(transfer) { @@ -624,9 +620,8 @@ error1: struct pipe_transfer *transfer; transfer = pipe_get_transfer(pipe, surface->texture, - surface->face, surface->level, - surface->zslice, + surface->layer, PIPE_TRANSFER_READ, x, y, w, h); if(transfer) { @@ -644,9 +639,8 @@ error1: struct pipe_transfer *transfer; transfer = pipe_get_transfer(pipe, surface->texture, - surface->face, surface->level, - surface->zslice, + surface->layer, PIPE_TRANSFER_WRITE, x, y, w, h); if(transfer) { @@ -681,9 +675,8 @@ error1: transfer = pipe_get_transfer(pipe, surface->texture, - surface->face, surface->level, - surface->zslice, + surface->layer, PIPE_TRANSFER_READ, x, y, w, h); if(!transfer) { @@ -715,16 +708,16 @@ error1: %cstring_input_binary(const char *STRING, unsigned LENGTH); void transfer_inline_write(struct pipe_resource *resource, - struct pipe_subresource *sr, + unsigned level, unsigned usage, const struct pipe_box *box, const char *STRING, unsigned LENGTH, unsigned stride, - unsigned slice_stride) + unsigned layer_stride) { struct pipe_context *pipe = $self->pipe; - pipe->transfer_inline_write(pipe, resource, *sr, usage, box, STRING, stride, slice_stride); + pipe->transfer_inline_write(pipe, resource, level, usage, box, STRING, stride, layer_stride); } %cstring_output_allocate_size(char **STRING, int *LENGTH, free(*$1)); diff --git a/src/gallium/state_trackers/python/p_state.i b/src/gallium/state_trackers/python/p_state.i index c1e6ea1b43c..0537557661d 100644 --- a/src/gallium/state_trackers/python/p_state.i +++ b/src/gallium/state_trackers/python/p_state.i @@ -114,7 +114,8 @@ SWIG_exception(SWIG_ValueError, "index out of bounds"); if(surface) { - _surface = st_pipe_surface(surface, PIPE_BIND_RENDER_TARGET); + /* XXX need a context here */ + _surface = st_pipe_surface(NULL, surface, PIPE_BIND_RENDER_TARGET); if(!_surface) SWIG_exception(SWIG_ValueError, "couldn't acquire surface for writing"); } @@ -131,7 +132,8 @@ struct pipe_surface *_surface = NULL; if(surface) { - _surface = st_pipe_surface(surface, PIPE_BIND_DEPTH_STENCIL); + /* XXX need a context here */ + _surface = st_pipe_surface(NULL, surface, PIPE_BIND_DEPTH_STENCIL); if(!_surface) SWIG_exception(SWIG_ValueError, "couldn't acquire surface for writing"); } diff --git a/src/gallium/state_trackers/python/p_texture.i b/src/gallium/state_trackers/python/p_texture.i index ae506944c45..1f4b4fd596b 100644 --- a/src/gallium/state_trackers/python/p_texture.i +++ b/src/gallium/state_trackers/python/p_texture.i @@ -42,9 +42,8 @@ %ignore pipe_resource::screen; %immutable st_surface::texture; -%immutable st_surface::face; %immutable st_surface::level; -%immutable st_surface::zslice; +%immutable st_surface::layer; %newobject pipe_resource::get_surface; @@ -73,25 +72,23 @@ /** Get a surface which is a "view" into a texture */ struct st_surface * - get_surface(unsigned face=0, unsigned level=0, unsigned zslice=0) + get_surface(unsigned level=0, unsigned layer=0) { struct st_surface *surface; - if(face >= ($self->target == PIPE_TEXTURE_CUBE ? 6U : 1U)) - SWIG_exception(SWIG_ValueError, "face out of bounds"); if(level > $self->last_level) SWIG_exception(SWIG_ValueError, "level out of bounds"); - if(zslice >= u_minify($self->depth0, level)) - SWIG_exception(SWIG_ValueError, "zslice out of bounds"); + if(layer >= ($self->target == PIPE_TEXTURE_3D ? + u_minify($self->depth0, level) : $self->depth0)) + SWIG_exception(SWIG_ValueError, "layer out of bounds"); surface = CALLOC_STRUCT(st_surface); if(!surface) return NULL; pipe_resource_reference(&surface->texture, $self); - surface->face = face; surface->level = level; - surface->zslice = zslice; + surface->layer = layer; return surface; @@ -113,9 +110,8 @@ struct st_surface %immutable; struct pipe_resource *texture; - unsigned face; unsigned level; - unsigned zslice; + unsigned layer; }; diff --git a/src/gallium/state_trackers/python/st_device.c b/src/gallium/state_trackers/python/st_device.c index 29813456b5f..d0a4295c7a2 100644 --- a/src/gallium/state_trackers/python/st_device.c +++ b/src/gallium/state_trackers/python/st_device.c @@ -240,6 +240,7 @@ st_context_create(struct st_device *st_dev) templat.width0 = 1; templat.height0 = 1; templat.depth0 = 1; + templat.array_size = 1; templat.last_level = 0; templat.bind = PIPE_BIND_SAMPLER_VIEW; @@ -252,7 +253,7 @@ st_context_create(struct st_device *st_dev) pipe->transfer_inline_write(pipe, st_ctx->default_texture, - u_subresource(0,0), + 0, PIPE_TRANSFER_WRITE, &box, &zero, diff --git a/src/gallium/state_trackers/python/st_device.h b/src/gallium/state_trackers/python/st_device.h index 2dca7a1974e..53e2556cb0d 100644 --- a/src/gallium/state_trackers/python/st_device.h +++ b/src/gallium/state_trackers/python/st_device.h @@ -41,9 +41,8 @@ struct st_winsys; struct st_surface { struct pipe_resource *texture; - unsigned face; unsigned level; - unsigned zslice; + unsigned layer; }; @@ -83,11 +82,17 @@ struct st_device static INLINE struct pipe_surface * -st_pipe_surface(struct st_surface *surface, unsigned usage) +st_pipe_surface(struct pipe_context *pipe, struct st_surface *surface, unsigned usage) { struct pipe_resource *texture = surface->texture; - struct pipe_screen *screen = texture->screen; - return screen->get_tex_surface(screen, texture, surface->face, surface->level, surface->zslice, usage); + struct pipe_surface surf_tmpl; + memset(&surf_tmpl, 0, sizeof(surf_tmpl)); + surf_tmpl.format = texture->format; + surf_tmpl.usage = usage; + surf_tmpl.u.tex.level = surface->level; + surf_tmpl.u.tex.first_layer = surface->layer; + surf_tmpl.u.tex.last_layer = surface->layer; + return pipe->create_surface(pipe, texture, &surf_tmpl); } struct st_context * diff --git a/src/gallium/state_trackers/python/st_sample.c b/src/gallium/state_trackers/python/st_sample.c index 25bfbf1ab73..cac9a1a6204 100644 --- a/src/gallium/state_trackers/python/st_sample.c +++ b/src/gallium/state_trackers/python/st_sample.c @@ -555,9 +555,8 @@ st_sample_surface(struct pipe_context *pipe, transfer = pipe_get_transfer(pipe, surface->texture, - surface->face, surface->level, - surface->zslice, + surface->layer, PIPE_TRANSFER_WRITE, 0, 0, width, diff --git a/src/gallium/state_trackers/vega/.gitignore b/src/gallium/state_trackers/vega/.gitignore new file mode 100644 index 00000000000..c452229f7f4 --- /dev/null +++ b/src/gallium/state_trackers/vega/.gitignore @@ -0,0 +1 @@ +api_tmp.h diff --git a/src/gallium/state_trackers/vega/Makefile b/src/gallium/state_trackers/vega/Makefile index 0543fac0946..7342c124c28 100644 --- a/src/gallium/state_trackers/vega/Makefile +++ b/src/gallium/state_trackers/vega/Makefile @@ -38,7 +38,14 @@ C_SOURCES = \ renderer.c \ stroker.c \ mask.c \ + text.c \ shader.c \ shaders_cache.c +GENERATED_SOURCES := api_tmp.h + include ../../Makefile.template + +MAPI := $(TOP)/src/mapi +api_tmp.h: $(MAPI)/mapi/mapi_abi.py $(MAPI)/vgapi/vgapi.csv + $(PYTHON2) $< --printer vgapi --mode app $(MAPI)/vgapi/vgapi.csv > $@ diff --git a/src/gallium/state_trackers/vega/SConscript b/src/gallium/state_trackers/vega/SConscript index a62783ab18e..4900135a1c5 100644 --- a/src/gallium/state_trackers/vega/SConscript +++ b/src/gallium/state_trackers/vega/SConscript @@ -3,6 +3,8 @@ Import('*') +from sys import executable as python_cmd + env = env.Clone() env.Append(CPPPATH = [ @@ -40,10 +42,16 @@ vega_sources = [ 'mask.c', 'shader.c', 'shaders_cache.c', + 'text.c', ] -# vgapi_header must be generated first -env.Depends(vega_sources, vgapi_header) +api_tmp = env.CodeGenerate( + target = '#/src/gallium/state_trackers/vega/api_tmp.h', + script = '#src/mapi/mapi/mapi_abi.py', + source = '#src/mapi/vgapi/vgapi.csv', + command = python_cmd + ' $SCRIPT --printer vgapi --mode app $SOURCE > $TARGET' +) +env.Depends(vega_sources, api_tmp) st_vega = env.ConvenienceLibrary( target = 'st_vega', diff --git a/src/gallium/state_trackers/vega/api.c b/src/gallium/state_trackers/vega/api.c index bf1d37493af..4bf7c71d453 100644 --- a/src/gallium/state_trackers/vega/api.c +++ b/src/gallium/state_trackers/vega/api.c @@ -28,23 +28,10 @@ #include "mapi/mapi.h" +/* define vega_spec and vega_procs for use with mapi */ +#define API_TMP_DEFINE_SPEC #include "api.h" -static const char vega_spec[] = - "1" -#define MAPI_ABI_ENTRY(ret, name, params) \ - "\0" #name "\0" -#define MAPI_ALIAS_ENTRY(alias, ret, name, params) \ - #name "\0" -#include "vgapi/vgapi_tmp.h" - "\0"; - -static const mapi_proc vega_procs[] = { -#define MAPI_ABI_ENTRY(ret, name, params) \ - (mapi_proc) vega ## name, -#include "vgapi/vgapi_tmp.h" -}; - static void api_init(void) { static boolean initialized = FALSE; diff --git a/src/gallium/state_trackers/vega/api.h b/src/gallium/state_trackers/vega/api.h index 955508dae9a..459dafb4bb0 100644 --- a/src/gallium/state_trackers/vega/api.h +++ b/src/gallium/state_trackers/vega/api.h @@ -33,10 +33,7 @@ #include "VG/vgext.h" #include "vg_manager.h" -/* declare the prototypes */ -#define MAPI_ABI_ENTRY(ret, name, params) \ - ret VG_API_ENTRY vega ## name params; -#include "vgapi/vgapi_tmp.h" +#include "api_tmp.h" struct mapi_table; diff --git a/src/gallium/state_trackers/vega/api_filters.c b/src/gallium/state_trackers/vega/api_filters.c index fa1e00d1f88..724e38241b5 100644 --- a/src/gallium/state_trackers/vega/api_filters.c +++ b/src/gallium/state_trackers/vega/api_filters.c @@ -31,7 +31,6 @@ #include "api.h" #include "renderer.h" #include "shaders_cache.h" -#include "st_inlines.h" #include "pipe/p_context.h" #include "pipe/p_state.h" @@ -42,7 +41,6 @@ #include "util/u_sampler.h" #include "util/u_string.h" - #include "asm_filters.h" @@ -73,6 +71,7 @@ static INLINE struct pipe_resource *create_texture_1d(struct vg_context *ctx, templ.width0 = color_data_len; templ.height0 = 1; templ.depth0 = 1; + templ.array_size = 1; templ.bind = PIPE_BIND_SAMPLER_VIEW; tex = screen->resource_create(screen, &templ); @@ -80,9 +79,9 @@ static INLINE struct pipe_resource *create_texture_1d(struct vg_context *ctx, { /* upload color_data */ struct pipe_transfer *transfer = pipe_get_transfer(pipe, tex, - 0, 0, 0, - PIPE_TRANSFER_READ_WRITE , - 0, 0, tex->width0, tex->height0); + 0, 0, + PIPE_TRANSFER_READ_WRITE , + 0, 0, tex->width0, tex->height0); void *map = pipe->transfer_map(pipe, transfer); memcpy(map, color_data, sizeof(VGint)*color_data_len); pipe->transfer_unmap(pipe, transfer); @@ -114,153 +113,11 @@ static INLINE struct pipe_sampler_view *create_texture_1d_view(struct vg_context return view; } -static INLINE struct pipe_surface * setup_framebuffer(struct vg_image *dst) -{ - struct vg_context *ctx = vg_current_context(); - struct pipe_context *pipe = ctx->pipe; - struct pipe_framebuffer_state fb; - struct pipe_surface *dst_surf = pipe->screen->get_tex_surface( - pipe->screen, dst->sampler_view->texture, 0, 0, 0, - PIPE_BIND_RENDER_TARGET); - - /* drawing dest */ - memset(&fb, 0, sizeof(fb)); - fb.width = dst->x + dst_surf->width; - fb.height = dst->y + dst_surf->height; - fb.nr_cbufs = 1; - fb.cbufs[0] = dst_surf; - { - VGint i; - for (i = 1; i < PIPE_MAX_COLOR_BUFS; ++i) - fb.cbufs[i] = 0; - } - cso_set_framebuffer(ctx->cso_context, &fb); - - return dst_surf; -} - -static void setup_viewport(struct vg_image *dst) -{ - struct vg_context *ctx = vg_current_context(); - vg_set_viewport(ctx, VEGA_Y0_TOP); -} - -static void setup_blend() -{ - struct vg_context *ctx = vg_current_context(); - struct pipe_blend_state blend; - memset(&blend, 0, sizeof(blend)); - blend.rt[0].rgb_src_factor = PIPE_BLENDFACTOR_ONE; - blend.rt[0].alpha_src_factor = PIPE_BLENDFACTOR_ONE; - blend.rt[0].rgb_dst_factor = PIPE_BLENDFACTOR_ZERO; - blend.rt[0].alpha_dst_factor = PIPE_BLENDFACTOR_ZERO; - if (ctx->state.vg.filter_channel_mask & VG_RED) - blend.rt[0].colormask |= PIPE_MASK_R; - if (ctx->state.vg.filter_channel_mask & VG_GREEN) - blend.rt[0].colormask |= PIPE_MASK_G; - if (ctx->state.vg.filter_channel_mask & VG_BLUE) - blend.rt[0].colormask |= PIPE_MASK_B; - if (ctx->state.vg.filter_channel_mask & VG_ALPHA) - blend.rt[0].colormask |= PIPE_MASK_A; - blend.rt[0].blend_enable = 0; - cso_set_blend(ctx->cso_context, &blend); -} - -static void setup_constant_buffer(struct vg_context *ctx, const void *buffer, - VGint param_bytes) -{ - struct pipe_context *pipe = ctx->pipe; - struct pipe_resource **cbuf = &ctx->filter.buffer; - - /* We always need to get a new buffer, to keep the drivers simple and - * avoid gratuitous rendering synchronization. */ - pipe_resource_reference(cbuf, NULL); - - *cbuf = pipe_buffer_create(pipe->screen, - PIPE_BIND_CONSTANT_BUFFER, - param_bytes); - - if (*cbuf) { - st_no_flush_pipe_buffer_write(ctx, *cbuf, - 0, param_bytes, buffer); - } - - ctx->pipe->set_constant_buffer(ctx->pipe, PIPE_SHADER_FRAGMENT, 0, *cbuf); -} - -static void setup_samplers(struct vg_context *ctx, struct filter_info *info) -{ - struct pipe_sampler_state *samplers[PIPE_MAX_SAMPLERS]; - struct pipe_sampler_view *sampler_views[PIPE_MAX_SAMPLERS]; - struct pipe_sampler_state sampler[3]; - int num_samplers = 0; - int num_textures = 0; - - samplers[0] = NULL; - samplers[1] = NULL; - samplers[2] = NULL; - samplers[3] = NULL; - sampler_views[0] = NULL; - sampler_views[1] = NULL; - sampler_views[2] = NULL; - sampler_views[3] = NULL; - - memset(&sampler[0], 0, sizeof(struct pipe_sampler_state)); - sampler[0].wrap_s = PIPE_TEX_WRAP_CLAMP_TO_EDGE; - sampler[0].wrap_t = PIPE_TEX_WRAP_CLAMP_TO_EDGE; - sampler[0].wrap_r = PIPE_TEX_WRAP_CLAMP_TO_EDGE; - sampler[0].min_img_filter = PIPE_TEX_MIPFILTER_LINEAR; - sampler[0].mag_img_filter = PIPE_TEX_MIPFILTER_LINEAR; - sampler[0].normalized_coords = 1; - - switch(info->tiling_mode) { - case VG_TILE_FILL: - sampler[0].wrap_s = PIPE_TEX_WRAP_CLAMP_TO_BORDER; - sampler[0].wrap_t = PIPE_TEX_WRAP_CLAMP_TO_BORDER; - memcpy(sampler[0].border_color, - ctx->state.vg.tile_fill_color, - sizeof(VGfloat) * 4); - break; - case VG_TILE_PAD: - sampler[0].wrap_s = PIPE_TEX_WRAP_CLAMP_TO_EDGE; - sampler[0].wrap_t = PIPE_TEX_WRAP_CLAMP_TO_EDGE; - break; - case VG_TILE_REPEAT: - sampler[0].wrap_s = PIPE_TEX_WRAP_REPEAT; - sampler[0].wrap_t = PIPE_TEX_WRAP_REPEAT; - break; - case VG_TILE_REFLECT: - sampler[0].wrap_s = PIPE_TEX_WRAP_MIRROR_REPEAT; - sampler[0].wrap_t = PIPE_TEX_WRAP_MIRROR_REPEAT; - break; - default: - debug_assert(!"Unknown tiling mode"); - } - - samplers[0] = &sampler[0]; - sampler_views[0] = info->src->sampler_view; - ++num_samplers; - ++num_textures; - - if (info->extra_texture_view) { - memcpy(&sampler[1], &sampler[0], sizeof(struct pipe_sampler_state)); - samplers[1] = &sampler[1]; - sampler_views[1] = info->extra_texture_view; - ++num_samplers; - ++num_textures; - } - - - cso_set_samplers(ctx->cso_context, num_samplers, (const struct pipe_sampler_state **)samplers); - cso_set_fragment_sampler_views(ctx->cso_context, num_textures, sampler_views); -} - static struct vg_shader * setup_color_matrix(struct vg_context *ctx, void *user_data) { struct vg_shader *shader = shader_create_from_text(ctx->pipe, color_matrix_asm, 200, PIPE_SHADER_FRAGMENT); - cso_set_fragment_shader_handle(ctx->cso_context, shader->driver); return shader; } @@ -275,7 +132,6 @@ static struct vg_shader * setup_convolution(struct vg_context *ctx, void *user_d shader = shader_create_from_text(ctx->pipe, buffer, 200, PIPE_SHADER_FRAGMENT); - cso_set_fragment_shader_handle(ctx->cso_context, shader->driver); return shader; } @@ -285,7 +141,6 @@ static struct vg_shader * setup_lookup(struct vg_context *ctx, void *user_data) shader_create_from_text(ctx->pipe, lookup_asm, 200, PIPE_SHADER_FRAGMENT); - cso_set_fragment_shader_handle(ctx->cso_context, shader->driver); return shader; } @@ -316,49 +171,68 @@ static struct vg_shader * setup_lookup_single(struct vg_context *ctx, void *user shader = shader_create_from_text(ctx->pipe, buffer, 200, PIPE_SHADER_FRAGMENT); - cso_set_fragment_shader_handle(ctx->cso_context, shader->driver); return shader; } static void execute_filter(struct vg_context *ctx, struct filter_info *info) { - struct pipe_surface *dst_surf; struct vg_shader *shader; + const struct pipe_sampler_state *samplers[2]; + struct pipe_sampler_view *views[2]; + struct pipe_sampler_state sampler; + uint tex_wrap; + + memset(&sampler, 0, sizeof(sampler)); + sampler.min_img_filter = PIPE_TEX_FILTER_LINEAR; + sampler.mag_img_filter = PIPE_TEX_FILTER_LINEAR; + sampler.min_mip_filter = PIPE_TEX_MIPFILTER_NONE; + sampler.normalized_coords = 1; + + switch (info->tiling_mode) { + case VG_TILE_FILL: + tex_wrap = PIPE_TEX_WRAP_CLAMP_TO_BORDER; + /* copy border color */ + memcpy(sampler.border_color, ctx->state.vg.tile_fill_color, + sizeof(sampler.border_color)); + break; + case VG_TILE_PAD: + tex_wrap = PIPE_TEX_WRAP_CLAMP_TO_EDGE;; + break; + case VG_TILE_REPEAT: + tex_wrap = PIPE_TEX_WRAP_REPEAT;; + break; + case VG_TILE_REFLECT: + tex_wrap = PIPE_TEX_WRAP_MIRROR_REPEAT; + break; + default: + debug_assert(!"Unknown tiling mode"); + tex_wrap = 0; + break; + } + + sampler.wrap_s = tex_wrap; + sampler.wrap_t = tex_wrap; + sampler.wrap_r = PIPE_TEX_WRAP_CLAMP_TO_EDGE; + + samplers[0] = samplers[1] = &sampler; + views[0] = info->src->sampler_view; + views[1] = info->extra_texture_view; - cso_save_framebuffer(ctx->cso_context); - cso_save_fragment_shader(ctx->cso_context); - cso_save_viewport(ctx->cso_context); - cso_save_blend(ctx->cso_context); - cso_save_samplers(ctx->cso_context); - cso_save_fragment_sampler_views(ctx->cso_context); - - dst_surf = setup_framebuffer(info->dst); - setup_viewport(info->dst); - setup_blend(); - setup_constant_buffer(ctx, info->const_buffer, info->const_buffer_len); shader = info->setup_shader(ctx, info->user_data); - setup_samplers(ctx, info); - - renderer_draw_texture(ctx->renderer, - info->src->sampler_view->texture, - info->dst->x, info->dst->y, - info->dst->x + info->dst->width, - info->dst->y + info->dst->height, - info->dst->x, info->dst->y, - info->dst->x + info->dst->width, - info->dst->y + info->dst->height); - - cso_restore_framebuffer(ctx->cso_context); - cso_restore_fragment_shader(ctx->cso_context); - cso_restore_viewport(ctx->cso_context); - cso_restore_blend(ctx->cso_context); - cso_restore_samplers(ctx->cso_context); - cso_restore_fragment_sampler_views(ctx->cso_context); - vg_shader_destroy(ctx, shader); + if (renderer_filter_begin(ctx->renderer, + info->dst->sampler_view->texture, VG_TRUE, + ctx->state.vg.filter_channel_mask, + samplers, views, (info->extra_texture_view) ? 2 : 1, + shader->driver, info->const_buffer, info->const_buffer_len)) { + renderer_filter(ctx->renderer, + info->dst->x, info->dst->y, info->dst->width, info->dst->height, + info->src->x, info->src->y, info->src->width, info->src->height); + renderer_filter_end(ctx->renderer); + } - pipe_surface_reference(&dst_surf, NULL); + vg_shader_destroy(ctx, shader); } void vegaColorMatrix(VGImage dst, VGImage src, diff --git a/src/gallium/state_trackers/vega/api_images.c b/src/gallium/state_trackers/vega/api_images.c index e9f038c5f92..ad95409cd00 100644 --- a/src/gallium/state_trackers/vega/api_images.c +++ b/src/gallium/state_trackers/vega/api_images.c @@ -303,7 +303,8 @@ void vegaDrawImage(VGImage image) } vg_validate_state(ctx); - image_draw((struct vg_image*)image); + image_draw((struct vg_image*)image, + &ctx->state.vg.image_user_to_surface_matrix); } void vegaSetPixels(VGint dx, VGint dy, @@ -399,7 +400,6 @@ void vegaReadPixels(void * data, VGint dataStride, struct st_framebuffer *stfb = ctx->draw_buffer; struct st_renderbuffer *strb = stfb->strb; - struct pipe_framebuffer_state *fb = &ctx->state.g3d.fb; VGfloat temp[VEGA_MAX_IMAGE_WIDTH][4]; VGfloat *df = (VGfloat*)temp; @@ -435,21 +435,21 @@ void vegaReadPixels(void * data, VGint dataStride, sy = 0; } - if (sx + width > fb->width || sy + height > fb->height) { - width = fb->width - sx; - height = fb->height - sy; + if (sx + width > stfb->width || sy + height > stfb->height) { + width = stfb->width - sx; + height = stfb->height - sy; /* nothing to read */ if (width <= 0 || height <= 0) return; } { - VGint y = (fb->height - sy) - 1, yStep = -1; + VGint y = (stfb->height - sy) - 1, yStep = -1; struct pipe_transfer *transfer; - transfer = pipe_get_transfer(pipe, strb->texture, 0, 0, 0, - PIPE_TRANSFER_READ, - 0, 0, sx + width, fb->height - sy); + transfer = pipe_get_transfer(pipe, strb->texture, 0, 0, + PIPE_TRANSFER_READ, + 0, 0, sx + width, stfb->height - sy); /* Do a row at a time to flip image data vertically */ for (i = 0; i < height; i++) { @@ -472,8 +472,8 @@ void vegaCopyPixels(VGint dx, VGint dy, VGint width, VGint height) { struct vg_context *ctx = vg_current_context(); - struct pipe_framebuffer_state *fb = &ctx->state.g3d.fb; - struct st_renderbuffer *strb = ctx->draw_buffer->strb; + struct st_framebuffer *stfb = ctx->draw_buffer; + struct st_renderbuffer *strb = stfb->strb; if (width <= 0 || height <= 0) { vg_set_error(ctx, VG_ILLEGAL_ARGUMENT_ERROR); @@ -481,8 +481,8 @@ void vegaCopyPixels(VGint dx, VGint dy, } /* do nothing if we copy from outside the fb */ - if (dx >= (VGint)fb->width || dy >= (VGint)fb->height || - sx >= (VGint)fb->width || sy >= (VGint)fb->height) + if (dx >= (VGint)stfb->width || dy >= (VGint)stfb->height || + sx >= (VGint)stfb->width || sy >= (VGint)stfb->height) return; vg_validate_state(ctx); diff --git a/src/gallium/state_trackers/vega/api_masks.c b/src/gallium/state_trackers/vega/api_masks.c index 189390ec2d3..d96afe66387 100644 --- a/src/gallium/state_trackers/vega/api_masks.c +++ b/src/gallium/state_trackers/vega/api_masks.c @@ -28,126 +28,10 @@ #include "mask.h" #include "api.h" +#include "renderer.h" #include "vg_context.h" #include "pipe/p_context.h" -#include "util/u_inlines.h" - -#include "util/u_pack_color.h" -#include "util/u_draw_quad.h" - -#define DISABLE_1_1_MASKING 1 - -/** - * Draw a screen-aligned quadrilateral. - * Coords are window coords with y=0=bottom. These coords will be transformed - * by the vertex shader and viewport transform. - */ -static void -draw_clear_quad(struct vg_context *st, - float x0, float y0, float x1, float y1, float z, - const VGfloat color[4]) -{ - struct pipe_context *pipe = st->pipe; - struct pipe_resource *buf; - VGuint i; - - /* positions */ - st->clear.vertices[0][0][0] = x0; - st->clear.vertices[0][0][1] = y0; - - st->clear.vertices[1][0][0] = x1; - st->clear.vertices[1][0][1] = y0; - - st->clear.vertices[2][0][0] = x1; - st->clear.vertices[2][0][1] = y1; - - st->clear.vertices[3][0][0] = x0; - st->clear.vertices[3][0][1] = y1; - - /* same for all verts: */ - for (i = 0; i < 4; i++) { - st->clear.vertices[i][0][2] = z; - st->clear.vertices[i][0][3] = 1.0; - st->clear.vertices[i][1][0] = color[0]; - st->clear.vertices[i][1][1] = color[1]; - st->clear.vertices[i][1][2] = color[2]; - st->clear.vertices[i][1][3] = color[3]; - } - - - /* put vertex data into vbuf */ - buf = pipe_user_buffer_create(pipe->screen, - st->clear.vertices, - sizeof(st->clear.vertices), - PIPE_BIND_VERTEX_BUFFER); - - - /* draw */ - if (buf) { - cso_set_vertex_elements(st->cso_context, 2, st->velems); - - util_draw_vertex_buffer(pipe, buf, 0, - PIPE_PRIM_TRIANGLE_FAN, - 4, /* verts */ - 2); /* attribs/vert */ - - pipe_resource_reference(&buf, NULL); - } -} - -/** - * Do vgClear by drawing a quadrilateral. - */ -static void -clear_with_quad(struct vg_context *st, float x0, float y0, - float width, float height, const VGfloat clear_color[4]) -{ - VGfloat x1, y1; - - vg_validate_state(st); - - x1 = x0 + width; - y1 = y0 + height; - - /* - printf("%s %f,%f %f,%f\n", __FUNCTION__, - x0, y0, - x1, y1); - */ - - cso_save_blend(st->cso_context); - cso_save_rasterizer(st->cso_context); - cso_save_fragment_shader(st->cso_context); - cso_save_vertex_shader(st->cso_context); - - /* blend state: RGBA masking */ - { - struct pipe_blend_state blend; - memset(&blend, 0, sizeof(blend)); - blend.rt[0].rgb_src_factor = PIPE_BLENDFACTOR_ONE; - blend.rt[0].alpha_src_factor = PIPE_BLENDFACTOR_ONE; - blend.rt[0].rgb_dst_factor = PIPE_BLENDFACTOR_ZERO; - blend.rt[0].alpha_dst_factor = PIPE_BLENDFACTOR_ZERO; - blend.rt[0].colormask = PIPE_MASK_RGBA; - cso_set_blend(st->cso_context, &blend); - } - - cso_set_rasterizer(st->cso_context, &st->clear.raster); - - cso_set_fragment_shader_handle(st->cso_context, st->clear.fs); - cso_set_vertex_shader_handle(st->cso_context, vg_clear_vs(st)); - - /* draw quad matching scissor rect (XXX verify coord round-off) */ - draw_clear_quad(st, x0, y0, x1, y1, 0, clear_color); - - /* Restore pipe state */ - cso_restore_blend(st->cso_context); - cso_restore_rasterizer(st->cso_context); - cso_restore_fragment_shader(st->cso_context); - cso_restore_vertex_shader(st->cso_context); -} - void vegaMask(VGHandle mask, VGMaskOperation operation, VGint x, VGint y, @@ -176,12 +60,8 @@ void vegaMask(VGHandle mask, VGMaskOperation operation, struct vg_image *image = (struct vg_image *)mask; mask_using_image(image, operation, x, y, width, height); } else if (vg_object_is_valid((void*)mask, VG_OBJECT_MASK)) { -#if DISABLE_1_1_MASKING - return; -#else struct vg_mask_layer *layer = (struct vg_mask_layer *)mask; mask_using_layer(layer, operation, x, y, width, height); -#endif } else { vg_set_error(ctx, VG_BAD_HANDLE_ERROR); } @@ -191,7 +71,7 @@ void vegaClear(VGint x, VGint y, VGint width, VGint height) { struct vg_context *ctx = vg_current_context(); - struct pipe_framebuffer_state *fb; + struct st_framebuffer *stfb = ctx->draw_buffer; if (width <= 0 || height <= 0) { vg_set_error(ctx, VG_ILLEGAL_ARGUMENT_ERROR); @@ -208,14 +88,15 @@ void vegaClear(VGint x, VGint y, ctx->state.vg.clear_color[3]); #endif - fb = &ctx->state.g3d.fb; /* check for a whole surface clear */ if (!ctx->state.vg.scissoring && - (x == 0 && y == 0 && width == fb->width && height == fb->height)) { + (x == 0 && y == 0 && width == stfb->width && height == stfb->height)) { ctx->pipe->clear(ctx->pipe, PIPE_CLEAR_COLOR | PIPE_CLEAR_DEPTHSTENCIL, ctx->state.vg.clear_color, 1., 0); - } else { - clear_with_quad(ctx, x, y, width, height, ctx->state.vg.clear_color); + } else if (renderer_clear_begin(ctx->renderer)) { + /* XXX verify coord round-off */ + renderer_clear(ctx->renderer, x, y, width, height, ctx->state.vg.clear_color); + renderer_clear_end(ctx->renderer); } } @@ -247,10 +128,6 @@ void vegaRenderToMask(VGPath path, return; } -#if DISABLE_1_1_MASKING - return; -#endif - vg_validate_state(ctx); mask_render_to((struct path *)path, paintModes, operation); @@ -328,9 +205,8 @@ void vegaFillMaskLayer(VGMaskLayer maskLayer, return; } -#if DISABLE_1_1_MASKING - return; -#endif + vg_validate_state(ctx); + mask_layer_fill(mask, x, y, width, height, value); } @@ -355,9 +231,7 @@ void vegaCopyMask(VGMaskLayer maskLayer, return; } -#if DISABLE_1_1_MASKING - return; -#endif + vg_validate_state(ctx); mask = (struct vg_mask_layer*)maskLayer; mask_copy(mask, sx, sy, dx, dy, width, height); diff --git a/src/gallium/state_trackers/vega/api_misc.c b/src/gallium/state_trackers/vega/api_misc.c index e648549745b..9e2ab03e01a 100644 --- a/src/gallium/state_trackers/vega/api_misc.c +++ b/src/gallium/state_trackers/vega/api_misc.c @@ -63,8 +63,8 @@ const VGubyte *vegaGetString(VGStringID name) { struct vg_context *ctx = vg_current_context(); static const VGubyte *vendor = (VGubyte *)"Tungsten Graphics, Inc"; - static const VGubyte *renderer = (VGubyte *)"Vega OpenVG 1.0"; - static const VGubyte *version = (VGubyte *)"1.0"; + static const VGubyte *renderer = (VGubyte *)"Vega OpenVG 1.1"; + static const VGubyte *version = (VGubyte *)"1.1"; if (!ctx) return NULL; diff --git a/src/gallium/state_trackers/vega/api_params.c b/src/gallium/state_trackers/vega/api_params.c index a10b009e631..a73b6c3effe 100644 --- a/src/gallium/state_trackers/vega/api_params.c +++ b/src/gallium/state_trackers/vega/api_params.c @@ -30,6 +30,7 @@ #include "paint.h" #include "path.h" #include "image.h" +#include "text.h" #include "matrix.h" #include "api_consts.h" #include "api.h" @@ -174,6 +175,7 @@ void vegaSeti (VGParamType type, VGint value) error = VG_ILLEGAL_ARGUMENT_ERROR; else state->image_mode = value; + break; #ifdef OPENVG_VERSION_1_1 case VG_COLOR_TRANSFORM: state->color_transform = value; @@ -1500,7 +1502,8 @@ VGint vegaGetParameteri(VGHandle object, #ifdef OPENVG_VERSION_1_1 case VG_FONT_NUM_GLYPHS: { - return 1; + struct vg_font *font = (struct vg_font*)object; + return font_num_glyphs(font); } break; #endif diff --git a/src/gallium/state_trackers/vega/api_path.c b/src/gallium/state_trackers/vega/api_path.c index f76adddb584..fe57b7671d9 100644 --- a/src/gallium/state_trackers/vega/api_path.c +++ b/src/gallium/state_trackers/vega/api_path.c @@ -479,6 +479,7 @@ void vegaDrawPath(VGPath path, VGbitfield paintModes) if (path_is_empty((struct path*)path)) return; - path_render((struct path*)path, paintModes); + path_render((struct path*)path, paintModes, + &ctx->state.vg.path_user_to_surface_matrix); } diff --git a/src/gallium/state_trackers/vega/api_text.c b/src/gallium/state_trackers/vega/api_text.c index 2a62da0a1de..7c6b4794099 100644 --- a/src/gallium/state_trackers/vega/api_text.c +++ b/src/gallium/state_trackers/vega/api_text.c @@ -27,21 +27,15 @@ #include "VG/openvg.h" #include "vg_context.h" +#include "text.h" +#include "api.h" #include "util/u_memory.h" #ifdef OPENVG_VERSION_1_1 -struct vg_font { - struct vg_object base; - - VGint glyph_indices[200]; - VGint num_glyphs; -}; - VGFont vegaCreateFont(VGint glyphCapacityHint) { - struct vg_font *font = 0; struct vg_context *ctx = vg_current_context(); if (glyphCapacityHint < 0) { @@ -49,10 +43,7 @@ VGFont vegaCreateFont(VGint glyphCapacityHint) return VG_INVALID_HANDLE; } - font = CALLOC_STRUCT(vg_font); - vg_init_object(&font->base, ctx, VG_OBJECT_FONT); - vg_context_add_object(ctx, VG_OBJECT_FONT, font); - return (VGFont)font; + return (VGFont) font_create(glyphCapacityHint); } void vegaDestroyFont(VGFont f) @@ -64,20 +55,23 @@ void vegaDestroyFont(VGFont f) vg_set_error(ctx, VG_BAD_HANDLE_ERROR); return; } + if (!vg_object_is_valid((void *) font, VG_OBJECT_FONT)) { + vg_set_error(ctx, VG_BAD_HANDLE_ERROR); + return; + } - vg_context_remove_object(ctx, VG_OBJECT_FONT, font); - /*free(font);*/ + font_destroy(font); } void vegaSetGlyphToPath(VGFont font, VGuint glyphIndex, VGPath path, VGboolean isHinted, - VGfloat glyphOrigin [2], - VGfloat escapement[2]) + const VGfloat glyphOrigin[2], + const VGfloat escapement[2]) { struct vg_context *ctx = vg_current_context(); - struct vg_object *pathObj; + struct path *pathObj; struct vg_font *f; if (font == VG_INVALID_HANDLE || @@ -95,25 +89,22 @@ void vegaSetGlyphToPath(VGFont font, vg_set_error(ctx, VG_BAD_HANDLE_ERROR); return; } - pathObj = (struct vg_object*)path; - if (pathObj && pathObj->type != VG_OBJECT_PATH) { - vg_set_error(ctx, VG_BAD_HANDLE_ERROR); - return; - } - f = (struct vg_font*)font; - f->glyph_indices[f->num_glyphs] = glyphIndex; - ++f->num_glyphs; + pathObj = (struct path*) path; + f = (struct vg_font*) font; + + font_set_glyph_to_path(f, glyphIndex, pathObj, + isHinted, glyphOrigin, escapement); } void vegaSetGlyphToImage(VGFont font, VGuint glyphIndex, VGImage image, - VGfloat glyphOrigin [2], - VGfloat escapement[2]) + const VGfloat glyphOrigin[2], + const VGfloat escapement[2]) { struct vg_context *ctx = vg_current_context(); - struct vg_object *img_obj; + struct vg_image *img_obj; struct vg_font *f; if (font == VG_INVALID_HANDLE || @@ -131,26 +122,11 @@ void vegaSetGlyphToImage(VGFont font, vg_set_error(ctx, VG_BAD_HANDLE_ERROR); return; } - img_obj = (struct vg_object*)image; - if (img_obj && img_obj->type != VG_OBJECT_IMAGE) { - vg_set_error(ctx, VG_BAD_HANDLE_ERROR); - return; - } + + img_obj = (struct vg_image*)image; f = (struct vg_font*)font; - f->glyph_indices[f->num_glyphs] = glyphIndex; - ++f->num_glyphs; -} -static INLINE VGboolean font_contains_glyph(struct vg_font *font, - VGuint glyph_index) -{ - VGint i; - for (i = 0; i < font->num_glyphs; ++i) { - if (font->glyph_indices[i] == glyph_index) { - return VG_TRUE; - } - } - return VG_FALSE; + font_set_glyph_to_image(f, glyphIndex, img_obj, glyphOrigin, escapement); } void vegaClearGlyph(VGFont font, @@ -158,30 +134,15 @@ void vegaClearGlyph(VGFont font, { struct vg_context *ctx = vg_current_context(); struct vg_font *f; - VGint i; if (font == VG_INVALID_HANDLE) { vg_set_error(ctx, VG_BAD_HANDLE_ERROR); return; } - if (glyphIndex <= 0) { - vg_set_error(ctx, VG_ILLEGAL_ARGUMENT_ERROR); - return; - } - f = (struct vg_font*)font; - if (!font_contains_glyph(f, glyphIndex)) { - vg_set_error(ctx, VG_ILLEGAL_ARGUMENT_ERROR); - return; - } - for (i = 0; i < f->num_glyphs; ++i) { - if (f->glyph_indices[i] == glyphIndex) { - /*FIXME*/ - f->glyph_indices[f->num_glyphs] = 0; - --f->num_glyphs; - return; - } - } + f = (struct vg_font*) font; + + font_clear_glyph(f, glyphIndex); } void vegaDrawGlyph(VGFont font, @@ -196,31 +157,24 @@ void vegaDrawGlyph(VGFont font, vg_set_error(ctx, VG_BAD_HANDLE_ERROR); return; } - if (glyphIndex <= 0) { - vg_set_error(ctx, VG_ILLEGAL_ARGUMENT_ERROR); - return; - } if (paintModes & (~(VG_STROKE_PATH|VG_FILL_PATH))) { vg_set_error(ctx, VG_ILLEGAL_ARGUMENT_ERROR); return; } f = (struct vg_font*)font; - if (!font_contains_glyph(f, glyphIndex)) { - vg_set_error(ctx, VG_ILLEGAL_ARGUMENT_ERROR); - return; - } + + font_draw_glyph(f, glyphIndex, paintModes, allowAutoHinting); } void vegaDrawGlyphs(VGFont font, VGint glyphCount, - VGuint *glyphIndices, - VGfloat *adjustments_x, - VGfloat *adjustments_y, + const VGuint *glyphIndices, + const VGfloat *adjustments_x, + const VGfloat *adjustments_y, VGbitfield paintModes, VGboolean allowAutoHinting) { struct vg_context *ctx = vg_current_context(); - VGint i; struct vg_font *f; if (font == VG_INVALID_HANDLE) { @@ -235,8 +189,8 @@ void vegaDrawGlyphs(VGFont font, vg_set_error(ctx, VG_ILLEGAL_ARGUMENT_ERROR); return; } - if (!adjustments_x || !is_aligned(adjustments_x) || - !adjustments_y || !is_aligned(adjustments_y)) { + if ((adjustments_x && !is_aligned(adjustments_x)) || + (adjustments_y && !is_aligned(adjustments_y))) { vg_set_error(ctx, VG_ILLEGAL_ARGUMENT_ERROR); return; } @@ -246,13 +200,9 @@ void vegaDrawGlyphs(VGFont font, } f = (struct vg_font*)font; - for (i = 0; i < glyphCount; ++i) { - VGuint glyph_index = glyphIndices[i]; - if (!font_contains_glyph(f, glyph_index)) { - vg_set_error(ctx, VG_ILLEGAL_ARGUMENT_ERROR); - return; - } - } + + font_draw_glyphs(f, glyphCount, glyphIndices, + adjustments_x, adjustments_y, paintModes, allowAutoHinting); } -#endif +#endif /* OPENVG_VERSION_1_1 */ diff --git a/src/gallium/state_trackers/vega/asm_fill.h b/src/gallium/state_trackers/vega/asm_fill.h index 27773467fa8..77e6a14fe99 100644 --- a/src/gallium/state_trackers/vega/asm_fill.h +++ b/src/gallium/state_trackers/vega/asm_fill.h @@ -44,9 +44,31 @@ solid_fill( struct ureg_program *ureg, struct ureg_dst *temp, struct ureg_src *constant) { - ureg_MOV(ureg, *out, constant[0]); + ureg_MOV(ureg, *out, constant[2]); } +/** + * Perform frag-coord-to-paint-coord transform. The transformation is in + * CONST[4..6]. + */ +#define PAINT_TRANSFORM \ + ureg_MOV(ureg, ureg_writemask(temp[0], TGSI_WRITEMASK_XY), in[0]); \ + ureg_MOV(ureg, \ + ureg_writemask(temp[0], TGSI_WRITEMASK_Z), \ + ureg_scalar(constant[3], TGSI_SWIZZLE_Y)); \ + ureg_DP3(ureg, temp[1], constant[4], ureg_src(temp[0])); \ + ureg_DP3(ureg, temp[2], constant[5], ureg_src(temp[0])); \ + ureg_DP3(ureg, temp[3], constant[6], ureg_src(temp[0])); \ + ureg_RCP(ureg, temp[3], ureg_src(temp[3])); \ + ureg_MUL(ureg, temp[1], ureg_src(temp[1]), ureg_src(temp[3])); \ + ureg_MUL(ureg, temp[2], ureg_src(temp[2]), ureg_src(temp[3])); \ + ureg_MOV(ureg, \ + ureg_writemask(temp[4], TGSI_WRITEMASK_X), \ + ureg_src(temp[1])); \ + ureg_MOV(ureg, \ + ureg_writemask(temp[4], TGSI_WRITEMASK_Y), \ + ureg_src(temp[2])); + static INLINE void linear_grad( struct ureg_program *ureg, struct ureg_dst *out, @@ -55,30 +77,19 @@ linear_grad( struct ureg_program *ureg, struct ureg_dst *temp, struct ureg_src *constant) { + PAINT_TRANSFORM - ureg_MOV(ureg, - ureg_writemask(temp[0], TGSI_WRITEMASK_XY), - in[0]); - ureg_MOV(ureg, - ureg_writemask(temp[0], TGSI_WRITEMASK_Z), - ureg_scalar(constant[1], TGSI_SWIZZLE_Y)); - ureg_DP3(ureg, temp[1], constant[2], ureg_src(temp[0])); - ureg_DP3(ureg, temp[2], constant[3], ureg_src(temp[0])); - ureg_DP3(ureg, temp[3], constant[4], ureg_src(temp[0])); - ureg_RCP(ureg, temp[3], ureg_src(temp[3])); - ureg_MUL(ureg, temp[1], ureg_src(temp[1]), ureg_src(temp[3])); - ureg_MUL(ureg, temp[2], ureg_src(temp[2]), ureg_src(temp[3])); - ureg_MOV(ureg, ureg_writemask(temp[4], TGSI_WRITEMASK_X), ureg_src(temp[1])); - ureg_MOV(ureg, ureg_writemask(temp[4], TGSI_WRITEMASK_Y), ureg_src(temp[2])); + /* grad = DP2((x, y), CONST[2].xy) * CONST[2].z */ ureg_MUL(ureg, temp[0], - ureg_scalar(constant[0], TGSI_SWIZZLE_Y), + ureg_scalar(constant[2], TGSI_SWIZZLE_Y), ureg_scalar(ureg_src(temp[4]), TGSI_SWIZZLE_Y)); ureg_MAD(ureg, temp[1], - ureg_scalar(constant[0], TGSI_SWIZZLE_X), + ureg_scalar(constant[2], TGSI_SWIZZLE_X), ureg_scalar(ureg_src(temp[4]), TGSI_SWIZZLE_X), ureg_src(temp[0])); ureg_MUL(ureg, temp[2], ureg_src(temp[1]), - ureg_scalar(constant[0], TGSI_SWIZZLE_Z)); + ureg_scalar(constant[2], TGSI_SWIZZLE_Z)); + ureg_TEX(ureg, *out, TGSI_TEXTURE_1D, ureg_src(temp[2]), sampler[0]); } @@ -90,52 +101,32 @@ radial_grad( struct ureg_program *ureg, struct ureg_dst *temp, struct ureg_src *constant) { - - ureg_MOV(ureg, ureg_writemask(temp[0], TGSI_WRITEMASK_XY), in[0]); - ureg_MOV(ureg, - ureg_writemask(temp[0], TGSI_WRITEMASK_Z), - ureg_scalar(constant[1], TGSI_SWIZZLE_Y)); - ureg_DP3(ureg, temp[1], constant[2], ureg_src(temp[0])); - ureg_DP3(ureg, temp[2], constant[3], ureg_src(temp[0])); - ureg_DP3(ureg, temp[3], constant[4], ureg_src(temp[0])); + PAINT_TRANSFORM + + /* + * Calculate (sqrt(B^2 + AC) - B) / A, where + * + * A is CONST[2].z, + * B is DP2((x, y), CONST[2].xy), and + * C is DP2((x, y), (x, y)). + */ + + /* B and C */ + ureg_DP2(ureg, temp[0], ureg_src(temp[4]), constant[2]); + ureg_DP2(ureg, temp[1], ureg_src(temp[4]), ureg_src(temp[4])); + + /* the square root */ + ureg_MUL(ureg, temp[2], ureg_src(temp[0]), ureg_src(temp[0])); + ureg_MAD(ureg, temp[3], ureg_src(temp[1]), + ureg_scalar(constant[2], TGSI_SWIZZLE_Z), ureg_src(temp[2])); + ureg_RSQ(ureg, temp[3], ureg_src(temp[3])); ureg_RCP(ureg, temp[3], ureg_src(temp[3])); - ureg_MUL(ureg, temp[1], ureg_src(temp[1]), ureg_src(temp[3])); - ureg_MUL(ureg, temp[2], ureg_src(temp[2]), ureg_src(temp[3])); - ureg_MOV(ureg, ureg_writemask(temp[5], TGSI_WRITEMASK_X), ureg_src(temp[1])); - ureg_MOV(ureg, ureg_writemask(temp[5], TGSI_WRITEMASK_Y), ureg_src(temp[2])); - ureg_MUL(ureg, temp[0], ureg_scalar(constant[0], TGSI_SWIZZLE_Y), - ureg_scalar(ureg_src(temp[5]), TGSI_SWIZZLE_Y)); - ureg_MAD(ureg, temp[1], - ureg_scalar(constant[0], TGSI_SWIZZLE_X), - ureg_scalar(ureg_src(temp[5]), TGSI_SWIZZLE_X), ureg_src(temp[0])); - ureg_ADD(ureg, temp[1], ureg_src(temp[1]), ureg_src(temp[1])); - ureg_MUL(ureg, temp[3], - ureg_scalar(ureg_src(temp[5]), TGSI_SWIZZLE_Y), - ureg_scalar(ureg_src(temp[5]), TGSI_SWIZZLE_Y)); - ureg_MAD(ureg, temp[4], - ureg_scalar(ureg_src(temp[5]), TGSI_SWIZZLE_X), - ureg_scalar(ureg_src(temp[5]), TGSI_SWIZZLE_X), - ureg_src(temp[3])); - ureg_MOV(ureg, temp[4], ureg_negate(ureg_src(temp[4]))); - ureg_MUL(ureg, temp[2], - ureg_scalar(constant[0], TGSI_SWIZZLE_Z), - ureg_src(temp[4])); - ureg_MUL(ureg, temp[0], - ureg_scalar(constant[1], TGSI_SWIZZLE_W), - ureg_src(temp[2])); - ureg_MUL(ureg, temp[3], ureg_src(temp[1]), ureg_src(temp[1])); - - ureg_SUB(ureg, temp[2], ureg_src(temp[3]), ureg_src(temp[0])); - ureg_RSQ(ureg, temp[2], ureg_abs(ureg_src(temp[2]))); - ureg_RCP(ureg, temp[2], ureg_src(temp[2])); - ureg_SUB(ureg, temp[1], ureg_src(temp[2]), ureg_src(temp[1])); - ureg_ADD(ureg, temp[0], - ureg_scalar(constant[0], TGSI_SWIZZLE_Z), - ureg_scalar(constant[0], TGSI_SWIZZLE_Z)); - ureg_RCP(ureg, temp[0], ureg_src(temp[0])); - ureg_MUL(ureg, temp[2], ureg_src(temp[1]), ureg_src(temp[0])); - ureg_TEX(ureg, *out, TGSI_TEXTURE_1D, ureg_src(temp[2]), sampler[0]); + ureg_SUB(ureg, temp[3], ureg_src(temp[3]), ureg_src(temp[0])); + ureg_RCP(ureg, temp[0], ureg_scalar(constant[2], TGSI_SWIZZLE_Z)); + ureg_MUL(ureg, temp[0], ureg_src(temp[0]), ureg_src(temp[3])); + + ureg_TEX(ureg, *out, TGSI_TEXTURE_1D, ureg_src(temp[0]), sampler[0]); } @@ -147,22 +138,11 @@ pattern( struct ureg_program *ureg, struct ureg_dst *temp, struct ureg_src *constant) { - ureg_MOV(ureg, - ureg_writemask(temp[0], TGSI_WRITEMASK_XY), - in[0]); - ureg_MOV(ureg, - ureg_writemask(temp[0], TGSI_WRITEMASK_Z), - ureg_scalar(constant[1], TGSI_SWIZZLE_Y)); - ureg_DP3(ureg, temp[1], constant[2], ureg_src(temp[0])); - ureg_DP3(ureg, temp[2], constant[3], ureg_src(temp[0])); - ureg_DP3(ureg, temp[3], constant[4], ureg_src(temp[0])); - ureg_RCP(ureg, temp[3], ureg_src(temp[3])); - ureg_MUL(ureg, temp[1], ureg_src(temp[1]), ureg_src(temp[3])); - ureg_MUL(ureg, temp[2], ureg_src(temp[2]), ureg_src(temp[3])); - ureg_MOV(ureg, ureg_writemask(temp[4], TGSI_WRITEMASK_X), ureg_src(temp[1])); - ureg_MOV(ureg, ureg_writemask(temp[4], TGSI_WRITEMASK_Y), ureg_src(temp[2])); + PAINT_TRANSFORM + + /* (s, t) = (x / tex_width, y / tex_height) */ ureg_RCP(ureg, temp[0], - ureg_swizzle(constant[1], + ureg_swizzle(constant[3], TGSI_SWIZZLE_Z, TGSI_SWIZZLE_W, TGSI_SWIZZLE_Z, @@ -176,22 +156,21 @@ pattern( struct ureg_program *ureg, ureg_writemask(temp[1], TGSI_WRITEMASK_Y), ureg_src(temp[1]), ureg_src(temp[0])); + ureg_TEX(ureg, *out, TGSI_TEXTURE_2D, ureg_src(temp[1]), sampler[0]); } static INLINE void -mask( struct ureg_program *ureg, - struct ureg_dst *out, - struct ureg_src *in, - struct ureg_src *sampler, - struct ureg_dst *temp, - struct ureg_src *constant) +paint_degenerate( struct ureg_program *ureg, + struct ureg_dst *out, + struct ureg_src *in, + struct ureg_src *sampler, + struct ureg_dst *temp, + struct ureg_src *constant) { - ureg_TEX(ureg, temp[1], TGSI_TEXTURE_2D, in[0], sampler[1]); - ureg_MUL(ureg, ureg_writemask(temp[0], TGSI_WRITEMASK_W), - ureg_scalar(ureg_src(temp[0]), TGSI_SWIZZLE_W), - ureg_scalar(ureg_src(temp[1]), TGSI_SWIZZLE_W)); - ureg_MOV(ureg, *out, ureg_src(temp[0])); + /* CONST[3].y is 1.0f */ + ureg_MOV(ureg, temp[1], ureg_scalar(constant[3], TGSI_SWIZZLE_Y)); + ureg_TEX(ureg, *out, TGSI_TEXTURE_1D, ureg_src(temp[1]), sampler[0]); } static INLINE void @@ -202,7 +181,9 @@ image_normal( struct ureg_program *ureg, struct ureg_dst *temp, struct ureg_src *constant) { - ureg_TEX(ureg, *out, TGSI_TEXTURE_2D, in[1], sampler[3]); + /* store and pass image color in TEMP[1] */ + ureg_TEX(ureg, temp[1], TGSI_TEXTURE_2D, in[1], sampler[3]); + ureg_MOV(ureg, *out, ureg_src(temp[1])); } @@ -214,6 +195,7 @@ image_multiply( struct ureg_program *ureg, struct ureg_dst *temp, struct ureg_src *constant) { + /* store and pass image color in TEMP[1] */ ureg_TEX(ureg, temp[1], TGSI_TEXTURE_2D, in[1], sampler[3]); ureg_MUL(ureg, *out, ureg_src(temp[0]), ureg_src(temp[1])); } @@ -227,43 +209,279 @@ image_stencil( struct ureg_program *ureg, struct ureg_dst *temp, struct ureg_src *constant) { + /* store and pass image color in TEMP[1] */ ureg_TEX(ureg, temp[1], TGSI_TEXTURE_2D, in[1], sampler[3]); - ureg_MUL(ureg, *out, ureg_src(temp[0]), ureg_src(temp[1])); + ureg_MOV(ureg, *out, ureg_src(temp[0])); +} + +static INLINE void +color_transform( struct ureg_program *ureg, + struct ureg_dst *out, + struct ureg_src *in, + struct ureg_src *sampler, + struct ureg_dst *temp, + struct ureg_src *constant) +{ + /* note that TEMP[1] may already be used for image color */ + + ureg_MAD(ureg, temp[2], ureg_src(temp[0]), constant[0], constant[1]); + /* clamp to [0.0f, 1.0f] */ + ureg_CLAMP(ureg, temp[2], + ureg_src(temp[2]), + ureg_scalar(constant[3], TGSI_SWIZZLE_X), + ureg_scalar(constant[3], TGSI_SWIZZLE_Y)); + ureg_MOV(ureg, *out, ureg_src(temp[2])); +} + +static INLINE void +alpha_normal( struct ureg_program *ureg, + struct ureg_dst *out, + struct ureg_src *in, + struct ureg_src *sampler, + struct ureg_dst *temp, + struct ureg_src *constant) +{ + /* save per-channel alpha in TEMP[1] */ + ureg_MOV(ureg, temp[1], ureg_scalar(ureg_src(temp[0]), TGSI_SWIZZLE_W)); + + ureg_MOV(ureg, *out, ureg_src(temp[0])); } -#define EXTENDED_BLENDER_OVER_FUNC \ - ureg_SUB(ureg, temp[3], \ - ureg_scalar(constant[1], TGSI_SWIZZLE_Y), \ - ureg_scalar(ureg_src(temp[1]), TGSI_SWIZZLE_W)); \ - ureg_SUB(ureg, temp[3], \ - ureg_scalar(constant[1], TGSI_SWIZZLE_Y), \ - ureg_scalar(ureg_src(temp[0]), TGSI_SWIZZLE_W)); \ - ureg_MUL(ureg, temp[3], ureg_src(temp[0]), ureg_src(temp[3])); \ - ureg_MUL(ureg, temp[4], ureg_src(temp[1]), ureg_src(temp[4])); \ - ureg_ADD(ureg, temp[3], ureg_src(temp[3]), ureg_src(temp[4])); +static INLINE void +alpha_per_channel( struct ureg_program *ureg, + struct ureg_dst *out, + struct ureg_src *in, + struct ureg_src *sampler, + struct ureg_dst *temp, + struct ureg_src *constant) +{ + /* save per-channel alpha in TEMP[1] */ + ureg_MUL(ureg, + ureg_writemask(temp[1], TGSI_WRITEMASK_W), + ureg_src(temp[0]), + ureg_src(temp[1])); + ureg_MUL(ureg, + ureg_writemask(temp[1], TGSI_WRITEMASK_XYZ), + ureg_src(temp[1]), + ureg_scalar(ureg_src(temp[1]), TGSI_SWIZZLE_W)); + /* update alpha */ + ureg_MOV(ureg, + ureg_writemask(temp[0], TGSI_WRITEMASK_W), + ureg_src(temp[1])); + ureg_MOV(ureg, *out, ureg_src(temp[0])); +} +/** + * Premultiply src and dst. + */ static INLINE void -blend_multiply( struct ureg_program *ureg, +blend_premultiply( struct ureg_program *ureg, + struct ureg_src src, + struct ureg_src src_channel_alpha, + struct ureg_src dst) +{ + /* premultiply src */ + ureg_MUL(ureg, + ureg_writemask(ureg_dst(src), TGSI_WRITEMASK_XYZ), + src, + src_channel_alpha); + /* premultiply dst */ + ureg_MUL(ureg, + ureg_writemask(ureg_dst(dst), TGSI_WRITEMASK_XYZ), + dst, + ureg_scalar(dst, TGSI_SWIZZLE_W)); +} + +/** + * Unpremultiply src. + */ +static INLINE void +blend_unpremultiply( struct ureg_program *ureg, + struct ureg_src src, + struct ureg_src one, + struct ureg_dst temp[1]) +{ + /* replace 0.0f by 1.0f before calculating reciprocal */ + ureg_CMP(ureg, + temp[0], + ureg_negate(ureg_scalar(src, TGSI_SWIZZLE_W)), + ureg_scalar(src, TGSI_SWIZZLE_W), + one); + ureg_RCP(ureg, temp[0], ureg_src(temp[0])); + + ureg_MUL(ureg, + ureg_writemask(ureg_dst(src), TGSI_WRITEMASK_XYZ), + src, + ureg_src(temp[0])); +} + +/** + * Emit instructions for the specified blend mode. Colors will be + * unpremultiplied. Two temporary registers are required. + * + * The output is written back to src. + */ +static INLINE void +blend_generic(struct ureg_program *ureg, + VGBlendMode mode, + struct ureg_src src, + struct ureg_src src_channel_alpha, + struct ureg_src dst, + struct ureg_src one, + struct ureg_dst temp[2]) +{ + struct ureg_dst out; + + blend_premultiply(ureg, src, src_channel_alpha, dst); + + /* blend in-place */ + out = ureg_dst(src); + + switch (mode) { + case VG_BLEND_SRC: + ureg_MOV(ureg, out, src); + break; + case VG_BLEND_SRC_OVER: + /* RGBA_out = RGBA_src + (1 - A_src) * RGBA_dst */ + ureg_SUB(ureg, temp[0], one, src_channel_alpha); + ureg_MAD(ureg, out, ureg_src(temp[0]), dst, src); + break; + case VG_BLEND_DST_OVER: + /* RGBA_out = RGBA_dst + (1 - A_dst) * RGBA_src */ + ureg_SUB(ureg, temp[0], one, ureg_scalar(dst, TGSI_SWIZZLE_W)); + ureg_MAD(ureg, out, ureg_src(temp[0]), src, dst); + break; + case VG_BLEND_SRC_IN: + ureg_MUL(ureg, out, src, ureg_scalar(dst, TGSI_SWIZZLE_W)); + break; + case VG_BLEND_DST_IN: + ureg_MUL(ureg, out, dst, src_channel_alpha); + break; + case VG_BLEND_MULTIPLY: + /* + * RGB_out = (1 - A_dst) * RGB_src + (1 - A_src) * RGB_dst + + * RGB_src * RGB_dst + */ + ureg_MAD(ureg, temp[0], + ureg_scalar(dst, TGSI_SWIZZLE_W), ureg_negate(src), src); + ureg_MAD(ureg, temp[1], + src_channel_alpha, ureg_negate(dst), dst); + ureg_MAD(ureg, temp[0], src, dst, ureg_src(temp[0])); + ureg_ADD(ureg, out, ureg_src(temp[0]), ureg_src(temp[1])); + /* alpha is src over */ + ureg_ADD(ureg, ureg_writemask(out, TGSI_WRITEMASK_W), + src, ureg_src(temp[1])); + break; + case VG_BLEND_SCREEN: + /* RGBA_out = RGBA_src + (1 - RGBA_src) * RGBA_dst */ + ureg_SUB(ureg, temp[0], one, src); + ureg_MAD(ureg, out, ureg_src(temp[0]), dst, src); + break; + case VG_BLEND_DARKEN: + case VG_BLEND_LIGHTEN: + /* src over */ + ureg_SUB(ureg, temp[0], one, src_channel_alpha); + ureg_MAD(ureg, temp[0], ureg_src(temp[0]), dst, src); + /* dst over */ + ureg_SUB(ureg, temp[1], one, ureg_scalar(dst, TGSI_SWIZZLE_W)); + ureg_MAD(ureg, temp[1], ureg_src(temp[1]), src, dst); + /* take min/max for colors */ + if (mode == VG_BLEND_DARKEN) { + ureg_MIN(ureg, ureg_writemask(out, TGSI_WRITEMASK_XYZ), + ureg_src(temp[0]), ureg_src(temp[1])); + } + else { + ureg_MAX(ureg, ureg_writemask(out, TGSI_WRITEMASK_XYZ), + ureg_src(temp[0]), ureg_src(temp[1])); + } + break; + case VG_BLEND_ADDITIVE: + /* RGBA_out = RGBA_src + RGBA_dst */ + ureg_ADD(ureg, temp[0], src, dst); + ureg_MIN(ureg, out, ureg_src(temp[0]), one); + break; + default: + assert(0); + break; + } + + blend_unpremultiply(ureg, src, one, temp); +} + +#define BLEND_GENERIC(mode) \ + do { \ + ureg_TEX(ureg, temp[2], TGSI_TEXTURE_2D, in[0], sampler[2]); \ + blend_generic(ureg, (mode), ureg_src(temp[0]), ureg_src(temp[1]), \ + ureg_src(temp[2]), \ + ureg_scalar(constant[3], TGSI_SWIZZLE_Y), temp + 3); \ + ureg_MOV(ureg, *out, ureg_src(temp[0])); \ + } while (0) + +static INLINE void +blend_src( struct ureg_program *ureg, + struct ureg_dst *out, + struct ureg_src *in, + struct ureg_src *sampler, + struct ureg_dst *temp, + struct ureg_src *constant) +{ + BLEND_GENERIC(VG_BLEND_SRC); +} + +static INLINE void +blend_src_over( struct ureg_program *ureg, struct ureg_dst *out, struct ureg_src *in, struct ureg_src *sampler, struct ureg_dst *temp, struct ureg_src *constant) { - ureg_TEX(ureg, temp[1], TGSI_TEXTURE_2D, in[0], sampler[2]); - EXTENDED_BLENDER_OVER_FUNC - ureg_MUL(ureg, temp[4], ureg_src(temp[0]), ureg_src(temp[1])); - ureg_ADD(ureg, temp[1], ureg_src(temp[4]), ureg_src(temp[3])); + BLEND_GENERIC(VG_BLEND_SRC_OVER); +} - ureg_MUL(ureg, temp[2], ureg_scalar(ureg_src(temp[0]), TGSI_SWIZZLE_W), - ureg_scalar(ureg_src(temp[1]), TGSI_SWIZZLE_W)); - ureg_ADD(ureg, temp[3], ureg_scalar(ureg_src(temp[0]), TGSI_SWIZZLE_W), - ureg_scalar(ureg_src(temp[1]), TGSI_SWIZZLE_W)); - ureg_SUB(ureg, ureg_writemask(temp[1], TGSI_WRITEMASK_W), - ureg_src(temp[3]), ureg_src(temp[2])); +static INLINE void +blend_dst_over( struct ureg_program *ureg, + struct ureg_dst *out, + struct ureg_src *in, + struct ureg_src *sampler, + struct ureg_dst *temp, + struct ureg_src *constant) +{ + BLEND_GENERIC(VG_BLEND_DST_OVER); +} - ureg_MOV(ureg, *out, ureg_src(temp[1])); +static INLINE void +blend_src_in( struct ureg_program *ureg, + struct ureg_dst *out, + struct ureg_src *in, + struct ureg_src *sampler, + struct ureg_dst *temp, + struct ureg_src *constant) +{ + BLEND_GENERIC(VG_BLEND_SRC_IN); +} + +static INLINE void +blend_dst_in( struct ureg_program *ureg, + struct ureg_dst *out, + struct ureg_src *in, + struct ureg_src *sampler, + struct ureg_dst *temp, + struct ureg_src *constant) +{ + BLEND_GENERIC(VG_BLEND_DST_IN); +} + +static INLINE void +blend_multiply( struct ureg_program *ureg, + struct ureg_dst *out, + struct ureg_src *in, + struct ureg_src *sampler, + struct ureg_dst *temp, + struct ureg_src *constant) +{ + BLEND_GENERIC(VG_BLEND_MULTIPLY); } static INLINE void @@ -274,10 +492,7 @@ blend_screen( struct ureg_program *ureg, struct ureg_dst *temp, struct ureg_src *constant) { - ureg_TEX(ureg, temp[1], TGSI_TEXTURE_2D, in[0], sampler[2]); - ureg_ADD(ureg, temp[3], ureg_src(temp[0]), ureg_src(temp[1])); - ureg_MUL(ureg, temp[2], ureg_src(temp[0]), ureg_src(temp[1])); - ureg_SUB(ureg, *out, ureg_src(temp[3]), ureg_src(temp[2])); + BLEND_GENERIC(VG_BLEND_SCREEN); } static INLINE void @@ -288,23 +503,7 @@ blend_darken( struct ureg_program *ureg, struct ureg_dst *temp, struct ureg_src *constant) { - ureg_TEX(ureg, temp[1], TGSI_TEXTURE_2D, in[0], sampler[2]); - EXTENDED_BLENDER_OVER_FUNC - ureg_MUL(ureg, temp[4], ureg_src(temp[0]), - ureg_scalar(ureg_src(temp[1]), TGSI_SWIZZLE_W)); - ureg_MUL(ureg, temp[5], ureg_src(temp[1]), - ureg_scalar(ureg_src(temp[0]), TGSI_SWIZZLE_W)); - ureg_MIN(ureg, temp[4], ureg_src(temp[4]), ureg_src(temp[5])); - ureg_ADD(ureg, temp[1], ureg_src(temp[3]), ureg_src(temp[4])); - - ureg_MUL(ureg, temp[2], ureg_scalar(ureg_src(temp[0]), TGSI_SWIZZLE_W), - ureg_scalar(ureg_src(temp[1]), TGSI_SWIZZLE_W)); - ureg_ADD(ureg, temp[3], ureg_scalar(ureg_src(temp[0]), TGSI_SWIZZLE_W), - ureg_scalar(ureg_src(temp[1]), TGSI_SWIZZLE_W)); - ureg_SUB(ureg, ureg_writemask(temp[1], TGSI_WRITEMASK_W), - ureg_src(temp[3]), ureg_src(temp[2])); - - ureg_MOV(ureg, *out, ureg_src(temp[1])); + BLEND_GENERIC(VG_BLEND_DARKEN); } static INLINE void @@ -315,23 +514,33 @@ blend_lighten( struct ureg_program *ureg, struct ureg_dst *temp, struct ureg_src *constant) { - ureg_TEX(ureg, temp[1], TGSI_TEXTURE_2D, in[0], sampler[2]); - EXTENDED_BLENDER_OVER_FUNC - ureg_MUL(ureg, temp[4], ureg_src(temp[0]), - ureg_scalar(ureg_src(temp[1]), TGSI_SWIZZLE_W)); - ureg_MUL(ureg, temp[5], ureg_src(temp[1]), - ureg_scalar(ureg_src(temp[0]), TGSI_SWIZZLE_W)); - ureg_MAX(ureg, temp[4], ureg_src(temp[4]), ureg_src(temp[5])); - ureg_ADD(ureg, temp[1], ureg_src(temp[3]), ureg_src(temp[4])); + BLEND_GENERIC(VG_BLEND_LIGHTEN); +} - ureg_MUL(ureg, temp[2], ureg_scalar(ureg_src(temp[0]), TGSI_SWIZZLE_W), - ureg_scalar(ureg_src(temp[1]), TGSI_SWIZZLE_W)); - ureg_ADD(ureg, temp[3], ureg_scalar(ureg_src(temp[0]), TGSI_SWIZZLE_W), - ureg_scalar(ureg_src(temp[1]), TGSI_SWIZZLE_W)); - ureg_SUB(ureg, ureg_writemask(temp[1], TGSI_WRITEMASK_W), - ureg_src(temp[3]), ureg_src(temp[2])); +static INLINE void +blend_additive( struct ureg_program *ureg, + struct ureg_dst *out, + struct ureg_src *in, + struct ureg_src *sampler, + struct ureg_dst *temp, + struct ureg_src *constant) +{ + BLEND_GENERIC(VG_BLEND_ADDITIVE); +} - ureg_MOV(ureg, *out, ureg_src(temp[1])); +static INLINE void +mask( struct ureg_program *ureg, + struct ureg_dst *out, + struct ureg_src *in, + struct ureg_src *sampler, + struct ureg_dst *temp, + struct ureg_src *constant) +{ + ureg_TEX(ureg, temp[1], TGSI_TEXTURE_2D, in[0], sampler[1]); + ureg_MUL(ureg, ureg_writemask(temp[0], TGSI_WRITEMASK_W), + ureg_scalar(ureg_src(temp[0]), TGSI_SWIZZLE_W), + ureg_scalar(ureg_src(temp[1]), TGSI_SWIZZLE_W)); + ureg_MOV(ureg, *out, ureg_src(temp[0])); } static INLINE void @@ -369,11 +578,11 @@ color_bw( struct ureg_program *ureg, struct ureg_src *constant) { ureg_ADD(ureg, temp[1], - ureg_scalar(constant[1], TGSI_SWIZZLE_Y), - ureg_scalar(constant[1], TGSI_SWIZZLE_Y)); + ureg_scalar(constant[3], TGSI_SWIZZLE_Y), + ureg_scalar(constant[3], TGSI_SWIZZLE_Y)); ureg_RCP(ureg, temp[2], ureg_src(temp[1])); ureg_ADD(ureg, temp[1], - ureg_scalar(constant[1], TGSI_SWIZZLE_Y), + ureg_scalar(constant[3], TGSI_SWIZZLE_Y), ureg_src(temp[2])); ureg_ADD(ureg, ureg_writemask(temp[2], TGSI_WRITEMASK_X), ureg_scalar(ureg_src(temp[0]), TGSI_SWIZZLE_X), @@ -410,46 +619,75 @@ struct shader_asm_info { }; -static const struct shader_asm_info shaders_asm[] = { - /* fills */ +/* paint types */ +static const struct shader_asm_info shaders_paint_asm[] = { {VEGA_SOLID_FILL_SHADER, solid_fill, - VG_FALSE, 0, 1, 0, 0, 0, 0}, + VG_FALSE, 2, 1, 0, 0, 0, 0}, {VEGA_LINEAR_GRADIENT_SHADER, linear_grad, - VG_TRUE, 0, 5, 0, 1, 0, 5}, + VG_TRUE, 2, 5, 0, 1, 0, 5}, {VEGA_RADIAL_GRADIENT_SHADER, radial_grad, - VG_TRUE, 0, 5, 0, 1, 0, 6}, + VG_TRUE, 2, 5, 0, 1, 0, 5}, {VEGA_PATTERN_SHADER, pattern, - VG_TRUE, 1, 4, 0, 1, 0, 5}, + VG_TRUE, 3, 4, 0, 1, 0, 5}, + {VEGA_PAINT_DEGENERATE_SHADER, paint_degenerate, + VG_FALSE, 3, 1, 0, 1, 0, 2} +}; - /* image draw modes */ +/* image draw modes */ +static const struct shader_asm_info shaders_image_asm[] = { {VEGA_IMAGE_NORMAL_SHADER, image_normal, - VG_TRUE, 0, 0, 3, 1, 0, 0}, + VG_TRUE, 0, 0, 3, 1, 0, 2}, {VEGA_IMAGE_MULTIPLY_SHADER, image_multiply, VG_TRUE, 0, 0, 3, 1, 0, 2}, {VEGA_IMAGE_STENCIL_SHADER, image_stencil, - VG_TRUE, 0, 0, 3, 1, 0, 2}, + VG_TRUE, 0, 0, 3, 1, 0, 2} +}; +static const struct shader_asm_info shaders_color_transform_asm[] = { + {VEGA_COLOR_TRANSFORM_SHADER, color_transform, + VG_FALSE, 0, 4, 0, 0, 0, 3} +}; + +static const struct shader_asm_info shaders_alpha_asm[] = { + {VEGA_ALPHA_NORMAL_SHADER, alpha_normal, + VG_FALSE, 0, 0, 0, 0, 0, 2}, + {VEGA_ALPHA_PER_CHANNEL_SHADER, alpha_per_channel, + VG_FALSE, 0, 0, 0, 0, 0, 2} +}; + +/* extra blend modes */ +static const struct shader_asm_info shaders_blend_asm[] = { +#define BLEND_ASM_INFO(id, func) { (id), (func), VG_TRUE, 3, 1, 2, 1, 0, 5 } + BLEND_ASM_INFO(VEGA_BLEND_SRC_SHADER, blend_src), + BLEND_ASM_INFO(VEGA_BLEND_SRC_OVER_SHADER, blend_src_over), + BLEND_ASM_INFO(VEGA_BLEND_DST_OVER_SHADER, blend_dst_over), + BLEND_ASM_INFO(VEGA_BLEND_SRC_IN_SHADER, blend_src_in), + BLEND_ASM_INFO(VEGA_BLEND_DST_IN_SHADER, blend_dst_in), + BLEND_ASM_INFO(VEGA_BLEND_MULTIPLY_SHADER, blend_multiply), + BLEND_ASM_INFO(VEGA_BLEND_SCREEN_SHADER, blend_screen), + BLEND_ASM_INFO(VEGA_BLEND_DARKEN_SHADER, blend_darken), + BLEND_ASM_INFO(VEGA_BLEND_LIGHTEN_SHADER, blend_lighten), + BLEND_ASM_INFO(VEGA_BLEND_ADDITIVE_SHADER, blend_additive) +#undef BLEND_ASM_INFO +}; + +static const struct shader_asm_info shaders_mask_asm[] = { {VEGA_MASK_SHADER, mask, - VG_TRUE, 0, 0, 1, 1, 0, 2}, - - /* extra blend modes */ - {VEGA_BLEND_MULTIPLY_SHADER, blend_multiply, - VG_TRUE, 1, 1, 2, 1, 0, 5}, - {VEGA_BLEND_SCREEN_SHADER, blend_screen, - VG_TRUE, 0, 0, 2, 1, 0, 4}, - {VEGA_BLEND_DARKEN_SHADER, blend_darken, - VG_TRUE, 1, 1, 2, 1, 0, 6}, - {VEGA_BLEND_LIGHTEN_SHADER, blend_lighten, - VG_TRUE, 1, 1, 2, 1, 0, 6}, - - /* premultiply */ + VG_TRUE, 0, 0, 1, 1, 0, 2} +}; + +/* premultiply */ +static const struct shader_asm_info shaders_premultiply_asm[] = { {VEGA_PREMULTIPLY_SHADER, premultiply, VG_FALSE, 0, 0, 0, 0, 0, 1}, {VEGA_UNPREMULTIPLY_SHADER, unpremultiply, VG_FALSE, 0, 0, 0, 0, 0, 1}, +}; - /* color transform to black and white */ +/* color transform to black and white */ +static const struct shader_asm_info shaders_bw_asm[] = { {VEGA_BW_SHADER, color_bw, - VG_FALSE, 1, 1, 0, 0, 0, 3}, + VG_FALSE, 3, 1, 0, 0, 0, 3}, }; + #endif diff --git a/src/gallium/state_trackers/vega/asm_util.h b/src/gallium/state_trackers/vega/asm_util.h index 903bfc88a4d..ae1842a62cd 100644 --- a/src/gallium/state_trackers/vega/asm_util.h +++ b/src/gallium/state_trackers/vega/asm_util.h @@ -27,16 +27,6 @@ #ifndef ASM_UTIL_H #define ASM_UTIL_H - -static const char pass_through_depth_asm[] = - "FRAG\n" - "DCL IN[0], POSITION, LINEAR\n" - "DCL OUT[0].z, POSITION, CONSTANT\n" - "0: MOV OUT[0].z, IN[0].zzzz\n" - "1: END\n"; - - - /* μnew = μmask */ static const char set_mask_asm[] = "FRAG\n" @@ -92,45 +82,4 @@ static const char subtract_mask_asm[] = "3: MUL OUT[0], TEMP[2].wwww, TEMP[0].wwww\n" "4: END\n"; - -static const char vs_plain_asm[] = - "VERT\n" - "DCL IN[0]\n" - "DCL OUT[0], POSITION\n" - "DCL TEMP[0]\n" - "DCL CONST[0..1]\n" - "0: MUL TEMP[0], IN[0], CONST[0]\n" - "1: ADD TEMP[0], TEMP[0], CONST[1]\n" - "2: MOV OUT[0], TEMP[0]\n" - "3: END\n"; - -static const char vs_clear_asm[] = - "VERT\n" - "DCL IN[0]\n" - "DCL IN[1]\n" - "DCL OUT[0], POSITION\n" - "DCL OUT[1], COLOR\n" - "DCL TEMP[0]\n" - "DCL CONST[0..1]\n" - "0: MUL TEMP[0], IN[0], CONST[0]\n" - "1: ADD TEMP[0], TEMP[0], CONST[1]\n" - "2: MOV OUT[0], TEMP[0]\n" - "3: MOV OUT[1], IN[1]\n" - "4: END\n"; - - -static const char vs_texture_asm[] = - "VERT\n" - "DCL IN[0]\n" - "DCL IN[1]\n" - "DCL OUT[0], POSITION\n" - "DCL OUT[1], GENERIC\n" - "DCL TEMP[0]\n" - "DCL CONST[0..1]\n" - "0: MUL TEMP[0], IN[0], CONST[0]\n" - "1: ADD TEMP[0], TEMP[0], CONST[1]\n" - "2: MOV OUT[0], TEMP[0]\n" - "3: MOV OUT[1], IN[1]\n" - "4: END\n"; - #endif diff --git a/src/gallium/state_trackers/vega/image.c b/src/gallium/state_trackers/vega/image.c index 28bbe420a21..318ea94bdfb 100644 --- a/src/gallium/state_trackers/vega/image.c +++ b/src/gallium/state_trackers/vega/image.c @@ -42,6 +42,7 @@ #include "util/u_memory.h" #include "util/u_math.h" #include "util/u_sampler.h" +#include "util/u_surface.h" static enum pipe_format vg_format_to_pipe(VGImageFormat format) { @@ -77,33 +78,23 @@ static INLINE void vg_sync_size(VGfloat *src_loc, VGfloat *dst_loc) dst_loc[3] = src_loc[3]; } - -static void vg_copy_texture(struct vg_context *ctx, - struct pipe_resource *dst, VGint dx, VGint dy, - struct pipe_sampler_view *src, VGint sx, VGint sy, - VGint width, VGint height) +static void vg_get_copy_coords(VGfloat *src_loc, + VGfloat src_width, VGfloat src_height, + VGfloat *dst_loc, + VGfloat dst_width, VGfloat dst_height) { - VGfloat dst_loc[4], src_loc[4]; VGfloat dst_bounds[4], src_bounds[4]; VGfloat src_shift[4], dst_shift[4], shift[4]; - dst_loc[0] = dx; - dst_loc[1] = dy; - dst_loc[2] = width; - dst_loc[3] = height; dst_bounds[0] = 0.f; dst_bounds[1] = 0.f; - dst_bounds[2] = dst->width0; - dst_bounds[3] = dst->height0; + dst_bounds[2] = dst_width; + dst_bounds[3] = dst_height; - src_loc[0] = sx; - src_loc[1] = sy; - src_loc[2] = width; - src_loc[3] = height; src_bounds[0] = 0.f; src_bounds[1] = 0.f; - src_bounds[2] = src->texture->width0; - src_bounds[3] = src->texture->height0; + src_bounds[2] = src_width; + src_bounds[3] = src_height; vg_bound_rect(src_loc, src_bounds, src_shift); vg_bound_rect(dst_loc, dst_bounds, dst_shift); @@ -121,22 +112,44 @@ static void vg_copy_texture(struct vg_context *ctx, vg_shift_recty(dst_loc, dst_bounds, shift[1]); vg_sync_size(src_loc, dst_loc); +} + +static void vg_copy_texture(struct vg_context *ctx, + struct pipe_resource *dst, VGint dx, VGint dy, + struct pipe_sampler_view *src, VGint sx, VGint sy, + VGint width, VGint height) +{ + VGfloat dst_loc[4], src_loc[4]; + + dst_loc[0] = dx; + dst_loc[1] = dy; + dst_loc[2] = width; + dst_loc[3] = height; + + src_loc[0] = sx; + src_loc[1] = sy; + src_loc[2] = width; + src_loc[3] = height; + + vg_get_copy_coords(src_loc, src->texture->width0, src->texture->height0, + dst_loc, dst->width0, dst->height0); if (src_loc[2] >= 0 && src_loc[3] >= 0 && dst_loc[2] >= 0 && dst_loc[3] >= 0) { - renderer_copy_texture(ctx->renderer, - src, - src_loc[0], - src_loc[1] + src_loc[3], - src_loc[0] + src_loc[2], - src_loc[1], - dst, - dst_loc[0], - dst_loc[1] + dst_loc[3], - dst_loc[0] + dst_loc[2], - dst_loc[1]); - } + struct pipe_surface *surf, surf_tmpl; + + /* get the destination surface */ + u_surface_default_template(&surf_tmpl, dst, PIPE_BIND_RENDER_TARGET); + surf = ctx->pipe->create_surface(ctx->pipe, dst, &surf_tmpl); + if (surf && renderer_copy_begin(ctx->renderer, surf, VG_TRUE, src)) { + renderer_copy(ctx->renderer, + dst_loc[0], dst_loc[1], dst_loc[2], dst_loc[3], + src_loc[0], src_loc[1], src_loc[2], src_loc[3]); + renderer_copy_end(ctx->renderer); + } + pipe_surface_reference(&surf, NULL); + } } void vg_copy_surface(struct vg_context *ctx, @@ -145,43 +158,19 @@ void vg_copy_surface(struct vg_context *ctx, VGint width, VGint height) { VGfloat dst_loc[4], src_loc[4]; - VGfloat dst_bounds[4], src_bounds[4]; - VGfloat src_shift[4], dst_shift[4], shift[4]; dst_loc[0] = dx; dst_loc[1] = dy; dst_loc[2] = width; dst_loc[3] = height; - dst_bounds[0] = 0.f; - dst_bounds[1] = 0.f; - dst_bounds[2] = dst->width; - dst_bounds[3] = dst->height; src_loc[0] = sx; src_loc[1] = sy; src_loc[2] = width; src_loc[3] = height; - src_bounds[0] = 0.f; - src_bounds[1] = 0.f; - src_bounds[2] = src->width; - src_bounds[3] = src->height; - - vg_bound_rect(src_loc, src_bounds, src_shift); - vg_bound_rect(dst_loc, dst_bounds, dst_shift); - shift[0] = src_shift[0] - dst_shift[0]; - shift[1] = src_shift[1] - dst_shift[1]; - - if (shift[0] < 0) - vg_shift_rectx(src_loc, src_bounds, -shift[0]); - else - vg_shift_rectx(dst_loc, dst_bounds, shift[0]); - - if (shift[1] < 0) - vg_shift_recty(src_loc, src_bounds, -shift[1]); - else - vg_shift_recty(dst_loc, dst_bounds, shift[1]); - vg_sync_size(src_loc, dst_loc); + vg_get_copy_coords(src_loc, src->width, src->height, + dst_loc, dst->width, dst->height); if (src_loc[2] > 0 && src_loc[3] > 0 && dst_loc[2] > 0 && dst_loc[3] > 0) { @@ -277,6 +266,7 @@ struct vg_image * image_create(VGImageFormat format, pt.width0 = width; pt.height0 = height; pt.depth0 = 1; + pt.array_size = 1; pt.bind = PIPE_BIND_SAMPLER_VIEW; newtex = screen->resource_create(screen, &pt); @@ -284,6 +274,13 @@ struct vg_image * image_create(VGImageFormat format, debug_assert(newtex); u_sampler_view_default_template(&view_templ, newtex, newtex->format); + /* R, G, and B are treated as 1.0 for alpha-only formats in OpenVG */ + if (newtex->format == PIPE_FORMAT_A8_UNORM) { + view_templ.swizzle_r = PIPE_SWIZZLE_ONE; + view_templ.swizzle_g = PIPE_SWIZZLE_ONE; + view_templ.swizzle_b = PIPE_SWIZZLE_ONE; + } + view = pipe->create_sampler_view(pipe, newtex, &view_templ); /* want the texture to go away if the view is freed */ pipe_resource_reference(&newtex, NULL); @@ -420,7 +417,7 @@ void image_sub_data(struct vg_image *image, { /* upload color_data */ struct pipe_transfer *transfer = pipe_get_transfer( - pipe, texture, 0, 0, 0, + pipe, texture, 0, 0, PIPE_TRANSFER_WRITE, 0, 0, texture->width0, texture->height0); src += (dataStride * yoffset); for (i = 0; i < height; i++) { @@ -451,11 +448,11 @@ void image_get_sub_data(struct vg_image * image, { struct pipe_transfer *transfer = pipe_get_transfer(pipe, - image->sampler_view->texture, 0, 0, 0, - PIPE_TRANSFER_READ, - 0, 0, - image->x + image->width, - image->y + image->height); + image->sampler_view->texture, 0, 0, + PIPE_TRANSFER_READ, + 0, 0, + image->x + image->width, + image->y + image->height); /* Do a row at a time to flip image data vertically */ for (i = 0; i < height; i++) { #if 0 @@ -525,14 +522,20 @@ void image_copy(struct vg_image *dst, VGint dx, VGint dy, src->sampler_view, src->x + sx, src->y + sy, width, height); } -void image_draw(struct vg_image *img) +void image_draw(struct vg_image *img, struct matrix *matrix) { struct vg_context *ctx = vg_current_context(); + struct matrix paint_matrix; VGfloat x1, y1; VGfloat x2, y2; VGfloat x3, y3; VGfloat x4, y4; - struct matrix *matrix; + + if (!vg_get_paint_matrix(ctx, + &ctx->state.vg.fill_paint_to_user_matrix, + matrix, + &paint_matrix)) + return; x1 = 0; y1 = 0; @@ -543,15 +546,10 @@ void image_draw(struct vg_image *img) x4 = 0; y4 = img->height; - matrix = &ctx->state.vg.image_user_to_surface_matrix; - - matrix_map_point(matrix, x1, y1, &x1, &y1); - matrix_map_point(matrix, x2, y2, &x2, &y2); - matrix_map_point(matrix, x3, y3, &x3, &y3); - matrix_map_point(matrix, x4, y4, &x4, &y4); - + shader_set_surface_matrix(ctx->shader, matrix); shader_set_drawing_image(ctx->shader, VG_TRUE); shader_set_paint(ctx->shader, ctx->state.vg.fill_paint); + shader_set_paint_matrix(ctx->shader, &paint_matrix); shader_set_image(ctx->shader, img); shader_bind(ctx->shader); @@ -566,20 +564,21 @@ void image_set_pixels(VGint dx, VGint dy, { struct vg_context *ctx = vg_current_context(); struct pipe_context *pipe = ctx->pipe; - struct pipe_screen *screen = pipe->screen; - struct pipe_surface *surf; + struct pipe_surface *surf, surf_tmpl; struct st_renderbuffer *strb = ctx->draw_buffer->strb; /* make sure rendering has completed */ pipe->flush(pipe, PIPE_FLUSH_RENDER_CACHE, NULL); - surf = screen->get_tex_surface(screen, image_texture(src), 0, 0, 0, - 0 /* no bind flags as surf isn't actually used??? */); + memset(&surf_tmpl, 0, sizeof(surf_tmpl)); + u_surface_default_template(&surf_tmpl, image_texture(src), + 0 /* no bind flag - not a surface*/); + surf = pipe->create_surface(pipe, image_texture(src), &surf_tmpl); vg_copy_surface(ctx, strb->surface, dx, dy, surf, sx+src->x, sy+src->y, width, height); - screen->tex_surface_destroy(surf); + pipe->surface_destroy(pipe, surf); } void image_get_pixels(struct vg_image *dst, VGint dx, VGint dy, @@ -588,8 +587,7 @@ void image_get_pixels(struct vg_image *dst, VGint dx, VGint dy, { struct vg_context *ctx = vg_current_context(); struct pipe_context *pipe = ctx->pipe; - struct pipe_screen *screen = pipe->screen; - struct pipe_surface *surf; + struct pipe_surface *surf, surf_tmpl; struct st_renderbuffer *strb = ctx->draw_buffer->strb; /* flip the y coordinates */ @@ -598,8 +596,10 @@ void image_get_pixels(struct vg_image *dst, VGint dx, VGint dy, /* make sure rendering has completed */ pipe->flush(pipe, PIPE_FLUSH_RENDER_CACHE, NULL); - surf = screen->get_tex_surface(screen, image_texture(dst), 0, 0, 0, - 0 /* no bind flags as surf isn't actually used??? */); + memset(&surf_tmpl, 0, sizeof(surf_tmpl)); + u_surface_default_template(&surf_tmpl, image_texture(dst), + PIPE_BIND_RENDER_TARGET); + surf = pipe->create_surface(pipe, image_texture(dst), &surf_tmpl); vg_copy_surface(ctx, surf, dst->x + dx, dst->y + dy, strb->surface, sx, sy, width, height); diff --git a/src/gallium/state_trackers/vega/image.h b/src/gallium/state_trackers/vega/image.h index a990c9c5873..391c0485948 100644 --- a/src/gallium/state_trackers/vega/image.h +++ b/src/gallium/state_trackers/vega/image.h @@ -79,7 +79,7 @@ void image_copy(struct vg_image *dst, VGint dx, VGint dy, VGint width, VGint height, VGboolean dither); -void image_draw(struct vg_image *img); +void image_draw(struct vg_image *img, struct matrix *matrix); void image_set_pixels(VGint dx, VGint dy, struct vg_image *src, VGint sx, VGint sy, diff --git a/src/gallium/state_trackers/vega/mask.c b/src/gallium/state_trackers/vega/mask.c index ef28ebd740c..dfd0600e444 100644 --- a/src/gallium/state_trackers/vega/mask.c +++ b/src/gallium/state_trackers/vega/mask.c @@ -31,13 +31,14 @@ #include "shaders_cache.h" #include "renderer.h" #include "asm_util.h" -#include "st_inlines.h" #include "pipe/p_context.h" #include "pipe/p_screen.h" #include "util/u_inlines.h" #include "util/u_format.h" #include "util/u_memory.h" +#include "util/u_surface.h" +#include "util/u_sampler.h" struct vg_mask_layer { struct vg_object base; @@ -48,17 +49,6 @@ struct vg_mask_layer { struct pipe_sampler_view *sampler_view; }; -static INLINE struct pipe_surface * -alpha_mask_surface(struct vg_context *ctx, int usage) -{ - struct pipe_screen *screen = ctx->pipe->screen; - struct st_framebuffer *stfb = ctx->draw_buffer; - return screen->get_tex_surface(screen, - stfb->alpha_mask_view->texture, - 0, 0, 0, - usage); -} - static INLINE VGboolean intersect_rectangles(VGint dwidth, VGint dheight, VGint swidth, VGint sheight, @@ -110,15 +100,13 @@ static void read_alpha_mask(void * data, VGint dataStride, { struct vg_context *ctx = vg_current_context(); struct pipe_context *pipe = ctx->pipe; - struct pipe_screen *screen = pipe->screen; struct st_framebuffer *stfb = ctx->draw_buffer; struct st_renderbuffer *strb = stfb->alpha_mask; - struct pipe_framebuffer_state *fb = &ctx->state.g3d.fb; VGfloat temp[VEGA_MAX_IMAGE_WIDTH][4]; VGfloat *df = (VGfloat*)temp; - VGint y = (fb->height - sy) - 1, yStep = -1; + VGint y = (stfb->height - sy) - 1, yStep = -1; VGint i; VGubyte *dst = (VGubyte *)data; VGint xoffset = 0, yoffset = 0; @@ -135,15 +123,15 @@ static void read_alpha_mask(void * data, VGint dataStride, yoffset = -sy; height += sy; sy = 0; - y = (fb->height - sy) - 1; + y = (stfb->height - sy) - 1; yoffset *= dataStride; } { struct pipe_surface *surf; - surf = screen->get_tex_surface(screen, strb->texture, 0, 0, 0, - PIPE_BIND_TRANSFER_READ); + surf = pipe->create_surface(pipe, strb->texture, 0, 0, 0, + PIPE_BIND_TRANSFER_READ); /* Do a row at a time to flip image data vertically */ for (i = 0; i < height; i++) { @@ -164,23 +152,23 @@ static void read_alpha_mask(void * data, VGint dataStride, void save_alpha_to_file(const char *filename) { struct vg_context *ctx = vg_current_context(); - struct pipe_framebuffer_state *fb = &ctx->state.g3d.fb; + struct st_framebuffer *stfb = ctx->draw_buffer; VGint *data; int i, j; - data = malloc(sizeof(int) * fb->width * fb->height); - read_alpha_mask(data, fb->width * sizeof(int), + data = malloc(sizeof(int) * stfb->width * stfb->height); + read_alpha_mask(data, stfb->width * sizeof(int), VG_sRGBA_8888, - 0, 0, fb->width, fb->height); + 0, 0, stfb->width, stfb->height); fprintf(stderr, "/*---------- start */\n"); fprintf(stderr, "const int image_width = %d;\n", - fb->width); + stfb->width); fprintf(stderr, "const int image_height = %d;\n", - fb->height); + stfb->height); fprintf(stderr, "const int image_data = {\n"); - for (i = 0; i < fb->height; ++i) { - for (j = 0; j < fb->width; ++j) { - int rgba = data[i * fb->height + j]; + for (i = 0; i < stfb->height; ++i) { + for (j = 0; j < stfb->width; ++j) { + int rgba = data[i * stfb->height + j]; int argb = 0; argb = (rgba >> 8); argb |= ((rgba & 0xff) << 24); @@ -193,49 +181,12 @@ void save_alpha_to_file(const char *filename) } #endif -static void setup_mask_framebuffer(struct pipe_surface *surf, - VGint surf_width, VGint surf_height) -{ - struct vg_context *ctx = vg_current_context(); - struct pipe_framebuffer_state fb; - - memset(&fb, 0, sizeof(fb)); - fb.width = surf_width; - fb.height = surf_height; - fb.nr_cbufs = 1; - fb.cbufs[0] = surf; - { - VGint i; - for (i = 1; i < PIPE_MAX_COLOR_BUFS; ++i) - fb.cbufs[i] = 0; - } - cso_set_framebuffer(ctx->cso_context, &fb); -} - - -/* setup shader constants */ -static void setup_mask_operation(VGMaskOperation operation) +/* setup mask shader */ +static void *setup_mask_operation(VGMaskOperation operation) { struct vg_context *ctx = vg_current_context(); - struct pipe_resource **cbuf = &ctx->mask.cbuf; - const VGint param_bytes = 4 * sizeof(VGfloat); - const VGfloat ones[4] = {1.f, 1.f, 1.f, 1.f}; void *shader = 0; - /* We always need to get a new buffer, to keep the drivers simple and - * avoid gratuitous rendering synchronization. - */ - pipe_resource_reference(cbuf, NULL); - - *cbuf = pipe_buffer_create(ctx->pipe->screen, - PIPE_BIND_CONSTANT_BUFFER, - param_bytes); - if (*cbuf) { - st_no_flush_pipe_buffer_write(ctx, *cbuf, - 0, param_bytes, ones); - } - - ctx->pipe->set_constant_buffer(ctx->pipe, PIPE_SHADER_FRAGMENT, 0, *cbuf); switch (operation) { case VG_UNION_MASK: { if (!ctx->mask.union_fs) { @@ -281,94 +232,21 @@ static void setup_mask_operation(VGMaskOperation operation) assert(0); break; } - cso_set_fragment_shader_handle(ctx->cso_context, shader); -} - -static void setup_mask_samplers(struct pipe_sampler_view *umask) -{ - struct vg_context *ctx = vg_current_context(); - struct pipe_sampler_state *samplers[PIPE_MAX_SAMPLERS]; - struct pipe_sampler_view *sampler_views[PIPE_MAX_SAMPLERS]; - struct st_framebuffer *fb_buffers = ctx->draw_buffer; - struct pipe_sampler_view *uprev = NULL; - struct pipe_sampler_state sampler; - - uprev = fb_buffers->blend_texture_view; - sampler = ctx->mask.sampler; - sampler.normalized_coords = 1; - - samplers[0] = NULL; - samplers[1] = NULL; - sampler_views[0] = NULL; - sampler_views[1] = NULL; - - samplers[0] = &sampler; - samplers[1] = &ctx->mask.sampler; - - sampler_views[0] = umask; - sampler_views[1] = uprev; - - cso_set_samplers(ctx->cso_context, 2, - (const struct pipe_sampler_state **)samplers); - cso_set_fragment_sampler_views(ctx->cso_context, 2, sampler_views); -} - - -/* setup shader constants */ -static void setup_mask_fill(const VGfloat color[4]) -{ - struct vg_context *ctx = vg_current_context(); - struct pipe_resource **cbuf = &ctx->mask.cbuf; - const VGint param_bytes = 4 * sizeof(VGfloat); - - /* We always need to get a new buffer, to keep the drivers simple and - * avoid gratuitous rendering synchronization. - */ - pipe_resource_reference(cbuf, NULL); - - *cbuf = pipe_buffer_create(ctx->pipe->screen, - PIPE_BIND_CONSTANT_BUFFER, - param_bytes); - if (*cbuf) { - st_no_flush_pipe_buffer_write(ctx, *cbuf, 0, param_bytes, color); - } - - ctx->pipe->set_constant_buffer(ctx->pipe, PIPE_SHADER_FRAGMENT, 0, *cbuf); - cso_set_fragment_shader_handle(ctx->cso_context, - shaders_cache_fill(ctx->sc, - VEGA_SOLID_FILL_SHADER)); -} - -static void setup_mask_viewport() -{ - struct vg_context *ctx = vg_current_context(); - vg_set_viewport(ctx, VEGA_Y0_TOP); -} - -static void setup_mask_blend() -{ - struct vg_context *ctx = vg_current_context(); - - struct pipe_blend_state blend; - memset(&blend, 0, sizeof(struct pipe_blend_state)); - blend.rt[0].blend_enable = 0; - blend.rt[0].colormask = PIPE_MASK_RGBA; - blend.rt[0].rgb_src_factor = PIPE_BLENDFACTOR_ONE; - blend.rt[0].alpha_src_factor = PIPE_BLENDFACTOR_ONE; - blend.rt[0].rgb_dst_factor = PIPE_BLENDFACTOR_ZERO; - blend.rt[0].alpha_dst_factor = PIPE_BLENDFACTOR_ZERO; - - cso_set_blend(ctx->cso_context, &blend); + return shader; } - -static void surface_fill(struct pipe_surface *surf, - int surf_width, int surf_height, - int x, int y, int width, int height, - const VGfloat color[4]) +static void mask_resource_fill(struct pipe_resource *dst, + int x, int y, int width, int height, + VGfloat coverage) { struct vg_context *ctx = vg_current_context(); + VGfloat fs_consts[12] = { + 0.0f, 0.0f, 0.0f, 0.0f, /* not used */ + 0.0f, 0.0f, 0.0f, 0.0f, /* not used */ + 0.0f, 0.0f, 0.0f, coverage /* color */ + }; + void *fs; if (x < 0) { width += x; @@ -379,50 +257,38 @@ static void surface_fill(struct pipe_surface *surf, y = 0; } - cso_save_framebuffer(ctx->cso_context); - cso_save_blend(ctx->cso_context); - cso_save_fragment_shader(ctx->cso_context); - cso_save_viewport(ctx->cso_context); - - setup_mask_blend(); - setup_mask_fill(color); - setup_mask_framebuffer(surf, surf_width, surf_height); - setup_mask_viewport(); + fs = shaders_cache_fill(ctx->sc, VEGA_SOLID_FILL_SHADER); - renderer_draw_quad(ctx->renderer, x, y, - x + width, y + height, 0.0f/*depth should be disabled*/); - - - /* make sure rendering has completed */ - ctx->pipe->flush(ctx->pipe, - PIPE_FLUSH_RENDER_CACHE | PIPE_FLUSH_FRAME, - NULL); + if (renderer_filter_begin(ctx->renderer, dst, VG_FALSE, ~0, + NULL, NULL, 0, fs, (const void *) fs_consts, sizeof(fs_consts))) { + renderer_filter(ctx->renderer, x, y, width, height, 0, 0, 0, 0); + renderer_filter_end(ctx->renderer); + } #if DEBUG_MASKS save_alpha_to_file(0); #endif - - cso_restore_blend(ctx->cso_context); - cso_restore_framebuffer(ctx->cso_context); - cso_restore_fragment_shader(ctx->cso_context); - cso_restore_viewport(ctx->cso_context); } static void mask_using_texture(struct pipe_sampler_view *sampler_view, + VGboolean is_layer, VGMaskOperation operation, VGint x, VGint y, VGint width, VGint height) { struct vg_context *ctx = vg_current_context(); + struct pipe_sampler_view *dst_view = vg_get_surface_mask(ctx); + struct pipe_resource *dst = dst_view->texture; struct pipe_resource *texture = sampler_view->texture; - struct pipe_surface *surface = - alpha_mask_surface(ctx, PIPE_BIND_RENDER_TARGET); + const struct pipe_sampler_state *samplers[2]; + struct pipe_sampler_view *views[2]; + struct pipe_sampler_state sampler; VGint offsets[4], loc[4]; + const VGfloat ones[4] = {1.f, 1.f, 1.f, 1.f}; + void *fs; - if (!surface) - return; - if (!intersect_rectangles(surface->width, surface->height, + if (!intersect_rectangles(dst->width0, dst->height0, texture->width0, texture->height0, x, y, width, height, offsets, loc)) @@ -434,38 +300,30 @@ static void mask_using_texture(struct pipe_sampler_view *sampler_view, loc[1], loc[2], loc[3]); #endif + + sampler = ctx->mask.sampler; + sampler.normalized_coords = 1; + samplers[0] = &sampler; + views[0] = sampler_view; + /* prepare our blend surface */ - vg_prepare_blend_surface_from_mask(ctx); - - cso_save_samplers(ctx->cso_context); - cso_save_fragment_sampler_views(ctx->cso_context); - cso_save_framebuffer(ctx->cso_context); - cso_save_blend(ctx->cso_context); - cso_save_fragment_shader(ctx->cso_context); - cso_save_viewport(ctx->cso_context); - - setup_mask_samplers(sampler_view); - setup_mask_blend(); - setup_mask_operation(operation); - setup_mask_framebuffer(surface, surface->width, surface->height); - setup_mask_viewport(); - - /* render the quad to propagate the rendering from stencil */ - renderer_draw_texture(ctx->renderer, texture, - offsets[0], offsets[1], - offsets[0] + offsets[2], offsets[1] + offsets[3], - loc[0], loc[1], loc[0] + loc[2], loc[1] + loc[3]); + samplers[1] = &ctx->mask.sampler; + views[1] = vg_prepare_blend_surface_from_mask(ctx); - /* make sure rendering has completed */ - ctx->pipe->flush(ctx->pipe, PIPE_FLUSH_RENDER_CACHE, NULL); - cso_restore_blend(ctx->cso_context); - cso_restore_framebuffer(ctx->cso_context); - cso_restore_fragment_shader(ctx->cso_context); - cso_restore_samplers(ctx->cso_context); - cso_restore_fragment_sampler_views(ctx->cso_context); - cso_restore_viewport(ctx->cso_context); - - pipe_surface_reference(&surface, NULL); + fs = setup_mask_operation(operation); + + if (renderer_filter_begin(ctx->renderer, dst, VG_FALSE, + ~0, samplers, views, 2, fs, (const void *) ones, sizeof(ones))) { + /* layer should be flipped when used as a texture */ + if (is_layer) { + offsets[1] += offsets[3]; + offsets[3] = -offsets[3]; + } + renderer_filter(ctx->renderer, + loc[0], loc[1], loc[2], loc[3], + offsets[0], offsets[1], offsets[2], offsets[3]); + renderer_filter_end(ctx->renderer); + } } @@ -496,8 +354,8 @@ struct vg_mask_layer * mask_layer_create(VGint width, VGint height) pt.width0 = width; pt.height0 = height; pt.depth0 = 1; + pt.array_size = 1; pt.bind = PIPE_BIND_SAMPLER_VIEW; - pt.compressed = 0; texture = screen->resource_create(screen, &pt); @@ -519,7 +377,7 @@ void mask_layer_destroy(struct vg_mask_layer *layer) struct vg_context *ctx = vg_current_context(); vg_context_remove_object(ctx, VG_OBJECT_MASK, layer); - pipe_resource_release(&layer->texture); + pipe_sampler_view_reference(&layer->sampler_view, NULL); FREE(layer); } @@ -528,22 +386,12 @@ void mask_layer_fill(struct vg_mask_layer *layer, VGint width, VGint height, VGfloat value) { - struct vg_context *ctx = vg_current_context(); VGfloat alpha_color[4] = {0, 0, 0, 0}; - struct pipe_surface *surface; alpha_color[3] = value; - surface = ctx->pipe->screen->get_tex_surface( - ctx->pipe->screen, layer->sampler_view->texture, - 0, 0, 0, - PIPE_BIND_RENDER_TARGET); - - surface_fill(surface, - layer->width, layer->height, - x, y, width, height, alpha_color); - - ctx->pipe->screen->tex_surface_release(ctx->pipe->screen, &surface); + mask_resource_fill(layer->sampler_view->texture, + x, y, width, height, value); } void mask_copy(struct vg_mask_layer *layer, @@ -551,16 +399,27 @@ void mask_copy(struct vg_mask_layer *layer, VGint dx, VGint dy, VGint width, VGint height) { - struct vg_context *ctx = vg_current_context(); - struct st_framebuffer *fb_buffers = ctx->draw_buffer; - - renderer_copy_texture(ctx->renderer, - layer->sampler_view, - sx, sy, - sx + width, sy + height, - fb_buffers->alpha_mask_view->texture, - dx, dy, - dx + width, dy + height); + struct vg_context *ctx = vg_current_context(); + struct pipe_sampler_view *src = vg_get_surface_mask(ctx); + struct pipe_surface *surf, surf_tmpl; + + /* get the destination surface */ + u_surface_default_template(&surf_tmpl, layer->sampler_view->texture, + PIPE_BIND_RENDER_TARGET); + surf = ctx->pipe->create_surface(ctx->pipe, layer->sampler_view->texture, + &surf_tmpl); + if (surf && renderer_copy_begin(ctx->renderer, surf, VG_FALSE, src)) { + /* layer should be flipped when used as a texture */ + sy += height; + height = -height; + + renderer_copy(ctx->renderer, + dx, dy, width, height, + sx, sy, width, height); + renderer_copy_end(ctx->renderer); + } + + pipe_surface_reference(&surf, NULL); } static void mask_layer_render_to(struct vg_mask_layer *layer, @@ -568,41 +427,25 @@ static void mask_layer_render_to(struct vg_mask_layer *layer, VGbitfield paint_modes) { struct vg_context *ctx = vg_current_context(); - const VGfloat fill_color[4] = {1.f, 1.f, 1.f, 1.f}; - struct pipe_screen *screen = ctx->pipe->screen; - struct pipe_surface *surface; - - surface = screen->get_tex_surface(screen, layer->sampler_view->texture, 0, 0, 0, - PIPE_BIND_RENDER_TARGET); - - cso_save_framebuffer(ctx->cso_context); - cso_save_fragment_shader(ctx->cso_context); - cso_save_viewport(ctx->cso_context); + struct pipe_context *pipe = ctx->pipe; + struct pipe_sampler_view *view = vg_get_surface_mask(ctx); + struct matrix *mat = &ctx->state.vg.path_user_to_surface_matrix; + struct pipe_surface *surf, surf_tmpl; + u_surface_default_template(&surf_tmpl, view->texture, + PIPE_BIND_RENDER_TARGET); + surf = pipe->create_surface(pipe, view->texture, &surf_tmpl); - setup_mask_blend(); - setup_mask_fill(fill_color); - setup_mask_framebuffer(surface, layer->width, layer->height); - setup_mask_viewport(); + renderer_validate_for_mask_rendering(ctx->renderer, surf, mat); if (paint_modes & VG_FILL_PATH) { - struct matrix *mat = &ctx->state.vg.path_user_to_surface_matrix; - path_fill(path, mat); + path_fill(path); } if (paint_modes & VG_STROKE_PATH){ path_stroke(path); } - - /* make sure rendering has completed */ - ctx->pipe->flush(ctx->pipe, PIPE_FLUSH_RENDER_CACHE, NULL); - - cso_restore_framebuffer(ctx->cso_context); - cso_restore_fragment_shader(ctx->cso_context); - cso_restore_viewport(ctx->cso_context); - ctx->state.dirty |= BLEND_DIRTY; - - screen->tex_surface_release(ctx->pipe->screen, &surface); + pipe_surface_reference(&surf, NULL); } void mask_render_to(struct path *path, @@ -610,19 +453,19 @@ void mask_render_to(struct path *path, VGMaskOperation operation) { struct vg_context *ctx = vg_current_context(); - struct st_framebuffer *fb_buffers = ctx->draw_buffer; + struct st_framebuffer *stfb = ctx->draw_buffer; struct vg_mask_layer *temp_layer; VGint width, height; - width = fb_buffers->alpha_mask_view->texture->width0; - height = fb_buffers->alpha_mask_view->texture->width0; + width = stfb->width; + height = stfb->height; temp_layer = mask_layer_create(width, height); + mask_layer_fill(temp_layer, 0, 0, width, height, 0.0f); mask_layer_render_to(temp_layer, path, paint_modes); - mask_using_layer(temp_layer, 0, 0, width, height, - operation); + mask_using_layer(temp_layer, operation, 0, 0, width, height); mask_layer_destroy(temp_layer); } @@ -632,7 +475,7 @@ void mask_using_layer(struct vg_mask_layer *layer, VGint x, VGint y, VGint width, VGint height) { - mask_using_texture(layer->sampler_view, operation, + mask_using_texture(layer->sampler_view, VG_TRUE, operation, x, y, width, height); } @@ -654,7 +497,7 @@ void mask_using_image(struct vg_image *image, VGint x, VGint y, VGint width, VGint height) { - mask_using_texture(image->sampler_view, operation, + mask_using_texture(image->sampler_view, VG_FALSE, operation, x, y, width, height); } @@ -662,23 +505,15 @@ void mask_fill(VGint x, VGint y, VGint width, VGint height, VGfloat value) { struct vg_context *ctx = vg_current_context(); - VGfloat alpha_color[4] = {.0f, .0f, .0f, value}; - struct pipe_surface *surf = alpha_mask_surface( - ctx, PIPE_BIND_RENDER_TARGET); + struct pipe_sampler_view *view = vg_get_surface_mask(ctx); #if DEBUG_MASKS debug_printf("mask_fill(%d, %d, %d, %d) with rgba(%f, %f, %f, %f)\n", x, y, width, height, - alpha_color[0], alpha_color[1], - alpha_color[2], alpha_color[3]); - debug_printf("XXX %f === %f \n", - alpha_color[3], value); + 0.0f, 0.0f, 0.0f, value); #endif - surface_fill(surf, surf->width, surf->height, - x, y, width, height, alpha_color); - - pipe_surface_reference(&surf, NULL); + mask_resource_fill(view->texture, x, y, width, height, value); } VGint mask_bind_samplers(struct pipe_sampler_state **samplers, @@ -687,10 +522,8 @@ VGint mask_bind_samplers(struct pipe_sampler_state **samplers, struct vg_context *ctx = vg_current_context(); if (ctx->state.vg.masking) { - struct st_framebuffer *fb_buffers = ctx->draw_buffer; - samplers[1] = &ctx->mask.sampler; - sampler_views[1] = fb_buffers->alpha_mask_view; + sampler_views[1] = vg_get_surface_mask(ctx); return 1; } else return 0; diff --git a/src/gallium/state_trackers/vega/matrix.h b/src/gallium/state_trackers/vega/matrix.h index 4c207f912a8..bed2b3193d0 100644 --- a/src/gallium/state_trackers/vega/matrix.h +++ b/src/gallium/state_trackers/vega/matrix.h @@ -129,7 +129,7 @@ static INLINE void matrix_make_affine(struct matrix *matrix) } static INLINE void matrix_mult(struct matrix *dst, - struct matrix *src) + const struct matrix *src) { VGfloat m11 = dst->m[0]*src->m[0] + dst->m[3]*src->m[1] + dst->m[6]*src->m[2]; VGfloat m12 = dst->m[0]*src->m[3] + dst->m[3]*src->m[4] + dst->m[6]*src->m[5]; diff --git a/src/gallium/state_trackers/vega/paint.c b/src/gallium/state_trackers/vega/paint.c index c0c8cddabe9..2db8cbcf7c8 100644 --- a/src/gallium/state_trackers/vega/paint.c +++ b/src/gallium/state_trackers/vega/paint.c @@ -28,7 +28,6 @@ #include "matrix.h" #include "image.h" -#include "st_inlines.h" #include "pipe/p_compiler.h" #include "util/u_inlines.h" @@ -155,14 +154,15 @@ static INLINE struct pipe_resource *create_gradient_texture(struct vg_paint *p) templ.width0 = 1024; templ.height0 = 1; templ.depth0 = 1; + templ.array_size = 1; templ.bind = PIPE_BIND_SAMPLER_VIEW; tex = screen->resource_create(screen, &templ); { /* upload color_data */ struct pipe_transfer *transfer = - st_no_flush_get_transfer(p->base.ctx, tex, 0, 0, 0, - PIPE_TRANSFER_WRITE, 0, 0, 1024, 1); + pipe_get_transfer(p->base.ctx->pipe, tex, 0, 0, + PIPE_TRANSFER_WRITE, 0, 0, 1024, 1); void *map = pipe->transfer_map(pipe, transfer); memcpy(map, p->gradient.color_data, sizeof(VGint)*1024); pipe->transfer_unmap(pipe, transfer); @@ -261,14 +261,18 @@ static INLINE void paint_color_buffer(struct vg_paint *paint, void *buffer) map[7] = 4.f; } -static INLINE void paint_linear_gradient_buffer(struct vg_paint *paint, void *buffer) +static INLINE void paint_linear_gradient_buffer(struct vg_paint *paint, + const struct matrix *inv, + void *buffer) { - struct vg_context *ctx = paint->base.ctx; VGfloat *map = (VGfloat*)buffer; + VGfloat dd; map[0] = paint->gradient.linear.coords[2] - paint->gradient.linear.coords[0]; map[1] = paint->gradient.linear.coords[3] - paint->gradient.linear.coords[1]; - map[2] = 1.f / (map[0] * map[0] + map[1] * map[1]); + dd = (map[0] * map[0] + map[1] * map[1]); + + map[2] = (dd > 0.0f) ? 1.f / dd : 0.f; map[3] = 1.f; map[4] = 0.f; @@ -277,15 +281,10 @@ static INLINE void paint_linear_gradient_buffer(struct vg_paint *paint, void *bu map[7] = 4.f; { struct matrix mat; - struct matrix inv; matrix_load_identity(&mat); + /* VEGA_LINEAR_GRADIENT_SHADER expects the first point to be at (0, 0) */ matrix_translate(&mat, -paint->gradient.linear.coords[0], -paint->gradient.linear.coords[1]); - memcpy(&inv, &ctx->state.vg.fill_paint_to_user_matrix, - sizeof(struct matrix)); - matrix_invert(&inv); - matrix_mult(&inv, &mat); - memcpy(&mat, &inv, - sizeof(struct matrix)); + matrix_mult(&mat, inv); map[8] = mat.m[0]; map[9] = mat.m[3]; map[10] = mat.m[6]; map[11] = 0.f; map[12] = mat.m[1]; map[13] = mat.m[4]; map[14] = mat.m[7]; map[15] = 0.f; @@ -298,17 +297,37 @@ static INLINE void paint_linear_gradient_buffer(struct vg_paint *paint, void *bu } -static INLINE void paint_radial_gradient_buffer(struct vg_paint *paint, void *buffer) +static INLINE void paint_radial_gradient_buffer(struct vg_paint *paint, + const struct matrix *inv, + void *buffer) { - VGfloat *radialCoords = paint->gradient.radial.vals; - struct vg_context *ctx = paint->base.ctx; - + const VGfloat *center = &paint->gradient.radial.vals[0]; + const VGfloat *focal = &paint->gradient.radial.vals[2]; + VGfloat rr = paint->gradient.radial.vals[4]; VGfloat *map = (VGfloat*)buffer; + VGfloat dd, new_focal[2]; + + rr *= rr; + + map[0] = center[0] - focal[0]; + map[1] = center[1] - focal[1]; + dd = map[0] * map[0] + map[1] * map[1]; + + /* focal point must lie inside the circle */ + if (0.998f * rr < dd) { + VGfloat scale; + + scale = (dd > 0.0f) ? sqrt(0.998f * rr / dd) : 0.0f; + map[0] *= scale; + map[1] *= scale; - map[0] = radialCoords[0] - radialCoords[2]; - map[1] = radialCoords[1] - radialCoords[3]; - map[2] = -map[0] * map[0] - map[1] * map[1] + - radialCoords[4] * radialCoords[4]; + new_focal[0] = center[0] - map[0]; + new_focal[1] = center[1] - map[1]; + dd = map[0] * map[0] + map[1] * map[1]; + focal = new_focal; + } + + map[2] = (rr > dd) ? rr - dd : 1.0f; map[3] = 1.f; map[4] = 0.f; @@ -318,15 +337,9 @@ static INLINE void paint_radial_gradient_buffer(struct vg_paint *paint, void *bu { struct matrix mat; - struct matrix inv; matrix_load_identity(&mat); - matrix_translate(&mat, -radialCoords[2], -radialCoords[3]); - memcpy(&inv, &ctx->state.vg.fill_paint_to_user_matrix, - sizeof(struct matrix)); - matrix_invert(&inv); - matrix_mult(&inv, &mat); - memcpy(&mat, &inv, - sizeof(struct matrix)); + matrix_translate(&mat, -focal[0], -focal[1]); + matrix_mult(&mat, inv); map[8] = mat.m[0]; map[9] = mat.m[3]; map[10] = mat.m[6]; map[11] = 0.f; map[12] = mat.m[1]; map[13] = mat.m[4]; map[14] = mat.m[7]; map[15] = 0.f; @@ -340,10 +353,10 @@ static INLINE void paint_radial_gradient_buffer(struct vg_paint *paint, void *bu } -static INLINE void paint_pattern_buffer(struct vg_paint *paint, void *buffer) +static INLINE void paint_pattern_buffer(struct vg_paint *paint, + const struct matrix *inv, + void *buffer) { - struct vg_context *ctx = paint->base.ctx; - VGfloat *map = (VGfloat *)buffer; memcpy(map, paint->solid.color, 4 * sizeof(VGfloat)); @@ -353,17 +366,8 @@ static INLINE void paint_pattern_buffer(struct vg_paint *paint, void *buffer) map[7] = paint->pattern.sampler_view->texture->height0; { struct matrix mat; - memcpy(&mat, &ctx->state.vg.fill_paint_to_user_matrix, - sizeof(struct matrix)); - matrix_invert(&mat); - { - struct matrix pm; - memcpy(&pm, &ctx->state.vg.path_user_to_surface_matrix, - sizeof(struct matrix)); - matrix_invert(&pm); - matrix_mult(&pm, &mat); - memcpy(&mat, &pm, sizeof(struct matrix)); - } + + memcpy(&mat, inv, sizeof(*inv)); map[8] = mat.m[0]; map[9] = mat.m[3]; map[10] = mat.m[6]; map[11] = 0.f; map[12] = mat.m[1]; map[13] = mat.m[4]; map[14] = mat.m[7]; map[15] = 0.f; @@ -672,6 +676,34 @@ void paint_resolve_type(struct vg_paint *paint) } } +VGboolean paint_is_degenerate(struct vg_paint *paint) +{ + VGboolean degen; + VGfloat *vals; + + + switch (paint->type) { + case VG_PAINT_TYPE_LINEAR_GRADIENT: + vals = paint->gradient.linear.coords; + /* two points are coincident */ + degen = (floatsEqual(vals[0], vals[2]) && + floatsEqual(vals[1], vals[3])); + break; + case VG_PAINT_TYPE_RADIAL_GRADIENT: + vals = paint->gradient.radial.vals; + /* radius <= 0 */ + degen = (vals[4] <= 0.0f); + break; + case VG_PAINT_TYPE_COLOR: + case VG_PAINT_TYPE_PATTERN: + default: + degen = VG_FALSE; + break; + } + + return degen; +} + VGint paint_constant_buffer_size(struct vg_paint *paint) { switch(paint->type) { @@ -695,6 +727,7 @@ VGint paint_constant_buffer_size(struct vg_paint *paint) } void paint_fill_constant_buffer(struct vg_paint *paint, + const struct matrix *mat, void *buffer) { switch(paint->type) { @@ -702,13 +735,13 @@ void paint_fill_constant_buffer(struct vg_paint *paint, paint_color_buffer(paint, buffer); break; case VG_PAINT_TYPE_LINEAR_GRADIENT: - paint_linear_gradient_buffer(paint, buffer); + paint_linear_gradient_buffer(paint, mat, buffer); break; case VG_PAINT_TYPE_RADIAL_GRADIENT: - paint_radial_gradient_buffer(paint, buffer); + paint_radial_gradient_buffer(paint, mat, buffer); break; case VG_PAINT_TYPE_PATTERN: - paint_pattern_buffer(paint, buffer); + paint_pattern_buffer(paint, mat, buffer); break; default: diff --git a/src/gallium/state_trackers/vega/paint.h b/src/gallium/state_trackers/vega/paint.h index 012cd3e5618..3de3bbe12ed 100644 --- a/src/gallium/state_trackers/vega/paint.h +++ b/src/gallium/state_trackers/vega/paint.h @@ -110,8 +110,12 @@ VGboolean paint_color_ramp_premultiplied(struct vg_paint *paint); VGint paint_bind_samplers(struct vg_paint *paint, struct pipe_sampler_state **samplers, struct pipe_sampler_view **sampler_views); +VGboolean paint_is_degenerate(struct vg_paint *paint); + VGint paint_constant_buffer_size(struct vg_paint *paint); + void paint_fill_constant_buffer(struct vg_paint *paint, + const struct matrix *mat, void *buffer); diff --git a/src/gallium/state_trackers/vega/path.c b/src/gallium/state_trackers/vega/path.c index 05f8b0d9979..d7253befd03 100644 --- a/src/gallium/state_trackers/vega/path.c +++ b/src/gallium/state_trackers/vega/path.c @@ -207,13 +207,29 @@ struct path * path_create(VGPathDatatype dt, VGfloat scale, VGfloat bias, return path; } +static void polygon_array_cleanup(struct polygon_array *polyarray) +{ + if (polyarray->array) { + VGint i; + + for (i = 0; i < polyarray->array->num_elements; i++) { + struct polygon *p = ((struct polygon **) polyarray->array->data)[i]; + polygon_destroy(p); + } + + array_destroy(polyarray->array); + polyarray->array = NULL; + } +} + void path_destroy(struct path *p) { vg_context_remove_object(vg_current_context(), VG_OBJECT_PATH, p); array_destroy(p->segments); array_destroy(p->control_points); - array_destroy(p->fill_polys.polygon_array.array); + + polygon_array_cleanup(&p->fill_polys.polygon_array); if (p->stroked.path) path_destroy(p->stroked.path); @@ -302,7 +318,6 @@ static void convert_path(struct path *p, } } - static void polygon_array_calculate_bounds( struct polygon_array *polyarray ) { struct array *polys = polyarray->array; @@ -312,7 +327,15 @@ static void polygon_array_calculate_bounds( struct polygon_array *polyarray ) unsigned i; assert(polys); - assert(polys->num_elements); + + if (!polys->num_elements) { + polyarray->min_x = 0.0f; + polyarray->min_y = 0.0f; + polyarray->max_x = 0.0f; + polyarray->max_y = 0.0f; + return; + } + polygon_bounding_rect((((struct polygon**)polys->data)[0]), bounds); min_x = bounds[0]; min_y = bounds[1]; @@ -353,16 +376,17 @@ static struct polygon_array * path_get_fill_polygons(struct path *p, struct matr return &p->fill_polys.polygon_array; } else { - array_destroy( p->fill_polys.polygon_array.array ); - p->fill_polys.polygon_array.array = NULL; + polygon_array_cleanup(&p->fill_polys.polygon_array); } } - array = array_create(sizeof(struct array*)); + /* an array of pointers to polygons */ + array = array_create(sizeof(struct polygon *)); sx = sy = px = py = ox = oy = 0.f; - current = polygon_create(32); + if (p->num_segments) + current = polygon_create(32); for (i = 0; i < p->num_segments; ++i) { VGubyte segment = ((VGubyte*)(p->segments->data))[i]; @@ -1519,10 +1543,11 @@ struct path * path_create_stroke(struct path *p, return stroker.base.path; } -void path_render(struct path *p, VGbitfield paintModes) +void path_render(struct path *p, VGbitfield paintModes, + struct matrix *mat) { struct vg_context *ctx = vg_current_context(); - struct matrix *mat = &ctx->state.vg.path_user_to_surface_matrix; + struct matrix paint_matrix; vg_validate_state(ctx); @@ -1534,29 +1559,45 @@ void path_render(struct path *p, VGbitfield paintModes) mat->m[3], mat->m[4], mat->m[5], mat->m[6], mat->m[7], mat->m[8]); #endif - if (paintModes & VG_FILL_PATH) { + if ((paintModes & VG_FILL_PATH) && + vg_get_paint_matrix(ctx, + &ctx->state.vg.fill_paint_to_user_matrix, + mat, + &paint_matrix)) { /* First the fill */ + shader_set_surface_matrix(ctx->shader, mat); shader_set_paint(ctx->shader, ctx->state.vg.fill_paint); + shader_set_paint_matrix(ctx->shader, &paint_matrix); shader_bind(ctx->shader); - path_fill(p, mat); + path_fill(p); } - if (paintModes & VG_STROKE_PATH){ + if ((paintModes & VG_STROKE_PATH) && + vg_get_paint_matrix(ctx, + &ctx->state.vg.stroke_paint_to_user_matrix, + mat, + &paint_matrix)) { /* 8.7.5: "line width less than or equal to 0 prevents stroking from * taking place."*/ if (ctx->state.vg.stroke.line_width.f <= 0) return; + shader_set_surface_matrix(ctx->shader, mat); shader_set_paint(ctx->shader, ctx->state.vg.stroke_paint); + shader_set_paint_matrix(ctx->shader, &paint_matrix); shader_bind(ctx->shader); path_stroke(p); } } -void path_fill(struct path *p, struct matrix *mat) +void path_fill(struct path *p) { struct vg_context *ctx = vg_current_context(); + struct matrix identity; + + matrix_load_identity(&identity); + { - struct polygon_array *polygon_array = path_get_fill_polygons(p, mat); + struct polygon_array *polygon_array = path_get_fill_polygons(p, &identity); struct array *polys = polygon_array->array; if (!polygon_array || !polys || !polys->num_elements) { @@ -1569,7 +1610,6 @@ void path_fill(struct path *p, struct matrix *mat) void path_stroke(struct path *p) { struct vg_context *ctx = vg_current_context(); - struct matrix *mat = &ctx->state.vg.path_user_to_surface_matrix; VGFillRule old_fill = ctx->state.vg.fill_rule; struct matrix identity; struct path *stroke; @@ -1579,7 +1619,7 @@ void path_stroke(struct path *p) if (stroke && !path_is_empty(stroke)) { ctx->state.vg.fill_rule = VG_NON_ZERO; - path_fill(stroke, mat); + path_fill(stroke); ctx->state.vg.fill_rule = old_fill; } diff --git a/src/gallium/state_trackers/vega/path.h b/src/gallium/state_trackers/vega/path.h index e34538b7368..d84b1f083ce 100644 --- a/src/gallium/state_trackers/vega/path.h +++ b/src/gallium/state_trackers/vega/path.h @@ -104,8 +104,8 @@ VGboolean path_interpolate(struct path *dst, VGfloat amount); void path_clear(struct path *p, VGbitfield capabilities); -void path_render(struct path *p, VGbitfield paintModes); -void path_fill(struct path *p, struct matrix *mat); +void path_render(struct path *p, VGbitfield paintModes, struct matrix *mat); +void path_fill(struct path *p); void path_stroke(struct path *p); void path_move_to(struct path *p, float x, float y); diff --git a/src/gallium/state_trackers/vega/polygon.c b/src/gallium/state_trackers/vega/polygon.c index ca8831064c9..a491de27fa6 100644 --- a/src/gallium/state_trackers/vega/polygon.c +++ b/src/gallium/state_trackers/vega/polygon.c @@ -244,24 +244,11 @@ VGboolean polygon_is_closed(struct polygon *p) return floatsEqual(start[0], end[0]) && floatsEqual(start[1], end[1]); } -static void set_blend_for_fill(struct pipe_blend_state *blend) -{ - memset(blend, 0, sizeof(struct pipe_blend_state)); - blend->rt[0].colormask = 0; /*disable colorwrites*/ - - blend->rt[0].rgb_src_factor = PIPE_BLENDFACTOR_ONE; - blend->rt[0].alpha_src_factor = PIPE_BLENDFACTOR_ONE; - blend->rt[0].rgb_dst_factor = PIPE_BLENDFACTOR_INV_SRC_ALPHA; - blend->rt[0].alpha_dst_factor = PIPE_BLENDFACTOR_INV_SRC_ALPHA; -} - -static void draw_polygon(struct vg_context *ctx, - struct polygon *poly) +static void polygon_prepare_buffer(struct vg_context *ctx, + struct polygon *poly) { int vert_size; struct pipe_context *pipe; - struct pipe_vertex_buffer vbuffer; - struct pipe_vertex_element velement; vert_size = poly->num_verts * COMPONENTS * sizeof(float); @@ -281,35 +268,15 @@ static void draw_polygon(struct vg_context *ctx, PIPE_BIND_VERTEX_BUFFER); poly->dirty = VG_FALSE; } - - - /* tell pipe about the vertex buffer */ - memset(&vbuffer, 0, sizeof(vbuffer)); - vbuffer.buffer = poly->vbuf; - vbuffer.stride = COMPONENTS * sizeof(float); /* vertex size */ - vbuffer.buffer_offset = 0; - vbuffer.max_index = poly->num_verts - 1; - pipe->set_vertex_buffers(pipe, 1, &vbuffer); - - /* tell pipe about the vertex attributes */ - memset(&velement, 0, sizeof(velement)); - velement.src_offset = 0; - velement.instance_divisor = 0; - velement.vertex_buffer_index = 0; - velement.src_format = PIPE_FORMAT_R32G32_FLOAT; - cso_set_vertex_elements(ctx->cso_context, 1, &velement); - - /* draw */ - util_draw_arrays(pipe, PIPE_PRIM_TRIANGLE_FAN, 0, (uint) poly->num_verts); } void polygon_fill(struct polygon *poly, struct vg_context *ctx) { - struct pipe_depth_stencil_alpha_state dsa; - struct pipe_stencil_ref sr; - struct pipe_blend_state blend; + struct pipe_vertex_element velement; + struct pipe_vertex_buffer vbuffer; VGfloat bounds[4]; VGfloat min_x, min_y, max_x, max_y; + assert(poly); polygon_bounding_rect(poly, bounds); min_x = bounds[0]; @@ -322,113 +289,42 @@ void polygon_fill(struct polygon *poly, struct vg_context *ctx) min_x, min_y, max_x, max_y); #endif - set_blend_for_fill(&blend); - - memset(&dsa, 0, sizeof(struct pipe_depth_stencil_alpha_state)); - memset(&sr, 0, sizeof(struct pipe_stencil_ref)); - /* only need a fixed 0. Rely on default or move it out at least? */ - cso_set_stencil_ref(ctx->cso_context, &sr); - - cso_save_blend(ctx->cso_context); - cso_save_depth_stencil_alpha(ctx->cso_context); - - dsa.stencil[0].enabled = 1; - if (ctx->state.vg.fill_rule == VG_EVEN_ODD) { - dsa.stencil[0].writemask = 1; - dsa.stencil[0].fail_op = PIPE_STENCIL_OP_KEEP; - dsa.stencil[0].zfail_op = PIPE_STENCIL_OP_KEEP; - dsa.stencil[0].zpass_op = PIPE_STENCIL_OP_INVERT; - dsa.stencil[0].func = PIPE_FUNC_ALWAYS; - dsa.stencil[0].valuemask = ~0; - - cso_set_blend(ctx->cso_context, &blend); - cso_set_depth_stencil_alpha(ctx->cso_context, &dsa); - draw_polygon(ctx, poly); - } else if (ctx->state.vg.fill_rule == VG_NON_ZERO) { - struct pipe_screen *screen = ctx->pipe->screen; - - if (screen->get_param(screen, PIPE_CAP_TWO_SIDED_STENCIL)) { - /* front */ - dsa.stencil[0].writemask = ~0; - dsa.stencil[0].fail_op = PIPE_STENCIL_OP_KEEP; - dsa.stencil[0].zfail_op = PIPE_STENCIL_OP_KEEP; - dsa.stencil[0].zpass_op = PIPE_STENCIL_OP_INCR_WRAP; - dsa.stencil[0].func = PIPE_FUNC_ALWAYS; - dsa.stencil[0].valuemask = ~0; - - /* back */ - dsa.stencil[1].enabled = 1; - dsa.stencil[1].writemask = ~0; - dsa.stencil[1].fail_op = PIPE_STENCIL_OP_KEEP; - dsa.stencil[1].zfail_op = PIPE_STENCIL_OP_KEEP; - dsa.stencil[1].zpass_op = PIPE_STENCIL_OP_DECR_WRAP; - dsa.stencil[1].func = PIPE_FUNC_ALWAYS; - dsa.stencil[1].valuemask = ~0; - - cso_set_blend(ctx->cso_context, &blend); - cso_set_depth_stencil_alpha(ctx->cso_context, &dsa); - draw_polygon(ctx, poly); - } else { - struct pipe_rasterizer_state raster; - - memcpy(&raster, &ctx->state.g3d.rasterizer, sizeof(struct pipe_rasterizer_state)); - - cso_save_rasterizer(ctx->cso_context); - dsa.stencil[0].func = PIPE_FUNC_ALWAYS; - dsa.stencil[0].valuemask = ~0; - - raster.cull_face = PIPE_FACE_BACK; - dsa.stencil[0].fail_op = PIPE_STENCIL_OP_KEEP; - dsa.stencil[0].zfail_op = PIPE_STENCIL_OP_KEEP; - dsa.stencil[0].zpass_op = PIPE_STENCIL_OP_INCR_WRAP; - - cso_set_blend(ctx->cso_context, &blend); - cso_set_depth_stencil_alpha(ctx->cso_context, &dsa); - cso_set_rasterizer(ctx->cso_context, &raster); - draw_polygon(ctx, poly); - - raster.cull_face = PIPE_FACE_FRONT; - dsa.stencil[0].fail_op = PIPE_STENCIL_OP_KEEP; - dsa.stencil[0].zfail_op = PIPE_STENCIL_OP_KEEP; - dsa.stencil[0].zpass_op = PIPE_STENCIL_OP_DECR_WRAP; - cso_set_depth_stencil_alpha(ctx->cso_context, &dsa); - cso_set_rasterizer(ctx->cso_context, &raster); - draw_polygon(ctx, poly); - - cso_restore_rasterizer(ctx->cso_context); - } - } + polygon_prepare_buffer(ctx, poly); + + /* tell renderer about the vertex attributes */ + memset(&velement, 0, sizeof(velement)); + velement.src_offset = 0; + velement.instance_divisor = 0; + velement.vertex_buffer_index = 0; + velement.src_format = PIPE_FORMAT_R32G32_FLOAT; + + /* tell renderer about the vertex buffer */ + memset(&vbuffer, 0, sizeof(vbuffer)); + vbuffer.buffer = poly->vbuf; + vbuffer.stride = COMPONENTS * sizeof(float); /* vertex size */ + vbuffer.buffer_offset = 0; + vbuffer.max_index = poly->num_verts - 1; + + renderer_polygon_stencil_begin(ctx->renderer, + &velement, ctx->state.vg.fill_rule, VG_FALSE); + renderer_polygon_stencil(ctx->renderer, &vbuffer, + PIPE_PRIM_TRIANGLE_FAN, 0, (VGuint) poly->num_verts); + renderer_polygon_stencil_end(ctx->renderer); - /* restore color writes */ - cso_restore_blend(ctx->cso_context); - /* setup stencil ops */ - dsa.stencil[0].func = PIPE_FUNC_NOTEQUAL; - dsa.stencil[0].fail_op = PIPE_STENCIL_OP_REPLACE; - dsa.stencil[0].zfail_op = PIPE_STENCIL_OP_REPLACE; - dsa.stencil[0].zpass_op = PIPE_STENCIL_OP_REPLACE; - dsa.stencil[0].valuemask = dsa.stencil[0].writemask; - dsa.stencil[1].enabled = 0; - memcpy(&dsa.depth, &ctx->state.g3d.dsa.depth, - sizeof(struct pipe_depth_state)); - cso_set_depth_stencil_alpha(ctx->cso_context, &dsa); - - /* render the quad to propagate the rendering from stencil */ - renderer_draw_quad(ctx->renderer, min_x, min_y, - max_x, max_y, 0.0f/*depth should be disabled*/); - - cso_restore_depth_stencil_alpha(ctx->cso_context); + renderer_polygon_fill_begin(ctx->renderer, VG_FALSE); + renderer_polygon_fill(ctx->renderer, min_x, min_y, max_x, max_y); + renderer_polygon_fill_end(ctx->renderer); } void polygon_array_fill(struct polygon_array *polyarray, struct vg_context *ctx) { struct array *polys = polyarray->array; - struct pipe_depth_stencil_alpha_state dsa; - struct pipe_stencil_ref sr; - struct pipe_blend_state blend; VGfloat min_x = polyarray->min_x; VGfloat min_y = polyarray->min_y; VGfloat max_x = polyarray->max_x; VGfloat max_y = polyarray->max_y; + struct pipe_vertex_element velement; + struct pipe_vertex_buffer vbuffer; VGint i; @@ -438,111 +334,35 @@ void polygon_array_fill(struct polygon_array *polyarray, struct vg_context *ctx) min_x, min_y, max_x, max_y); #endif - set_blend_for_fill(&blend); - - memset(&dsa, 0, sizeof(struct pipe_depth_stencil_alpha_state)); - memset(&sr, 0, sizeof(struct pipe_stencil_ref)); - /* only need a fixed 0. Rely on default or move it out at least? */ - cso_set_stencil_ref(ctx->cso_context, &sr); - - cso_save_blend(ctx->cso_context); - cso_save_depth_stencil_alpha(ctx->cso_context); - - dsa.stencil[0].enabled = 1; - if (ctx->state.vg.fill_rule == VG_EVEN_ODD) { - dsa.stencil[0].writemask = 1; - dsa.stencil[0].fail_op = PIPE_STENCIL_OP_KEEP; - dsa.stencil[0].zfail_op = PIPE_STENCIL_OP_KEEP; - dsa.stencil[0].zpass_op = PIPE_STENCIL_OP_INVERT; - dsa.stencil[0].func = PIPE_FUNC_ALWAYS; - dsa.stencil[0].valuemask = ~0; - - cso_set_blend(ctx->cso_context, &blend); - cso_set_depth_stencil_alpha(ctx->cso_context, &dsa); - for (i = 0; i < polys->num_elements; ++i) { - struct polygon *poly = (((struct polygon**)polys->data)[i]); - draw_polygon(ctx, poly); - } - } else if (ctx->state.vg.fill_rule == VG_NON_ZERO) { - struct pipe_screen *screen = ctx->pipe->screen; - - if (screen->get_param(screen, PIPE_CAP_TWO_SIDED_STENCIL)) { - /* front */ - dsa.stencil[0].writemask = ~0; - dsa.stencil[0].fail_op = PIPE_STENCIL_OP_KEEP; - dsa.stencil[0].zfail_op = PIPE_STENCIL_OP_KEEP; - dsa.stencil[0].zpass_op = PIPE_STENCIL_OP_INCR_WRAP; - dsa.stencil[0].func = PIPE_FUNC_ALWAYS; - dsa.stencil[0].valuemask = ~0; - - /* back */ - dsa.stencil[1].enabled = 1; - dsa.stencil[1].writemask = ~0; - dsa.stencil[1].fail_op = PIPE_STENCIL_OP_KEEP; - dsa.stencil[1].zfail_op = PIPE_STENCIL_OP_KEEP; - dsa.stencil[1].zpass_op = PIPE_STENCIL_OP_DECR_WRAP; - dsa.stencil[1].func = PIPE_FUNC_ALWAYS; - dsa.stencil[1].valuemask = ~0; - - cso_set_blend(ctx->cso_context, &blend); - cso_set_depth_stencil_alpha(ctx->cso_context, &dsa); - for (i = 0; i < polys->num_elements; ++i) { - struct polygon *poly = (((struct polygon**)polys->data)[i]); - draw_polygon(ctx, poly); - } - } else { - struct pipe_rasterizer_state raster; - - memcpy(&raster, &ctx->state.g3d.rasterizer, sizeof(struct pipe_rasterizer_state)); - - cso_save_rasterizer(ctx->cso_context); - dsa.stencil[0].func = PIPE_FUNC_ALWAYS; - dsa.stencil[0].valuemask = ~0; - - raster.cull_face = PIPE_FACE_BACK; - dsa.stencil[0].fail_op = PIPE_STENCIL_OP_KEEP; - dsa.stencil[0].zfail_op = PIPE_STENCIL_OP_KEEP; - dsa.stencil[0].zpass_op = PIPE_STENCIL_OP_INCR_WRAP; - - cso_set_blend(ctx->cso_context, &blend); - cso_set_depth_stencil_alpha(ctx->cso_context, &dsa); - cso_set_rasterizer(ctx->cso_context, &raster); - for (i = 0; i < polys->num_elements; ++i) { - struct polygon *poly = (((struct polygon**)polys->data)[i]); - draw_polygon(ctx, poly); - } - - raster.cull_face = PIPE_FACE_FRONT; - dsa.stencil[0].fail_op = PIPE_STENCIL_OP_KEEP; - dsa.stencil[0].zfail_op = PIPE_STENCIL_OP_KEEP; - dsa.stencil[0].zpass_op = PIPE_STENCIL_OP_DECR_WRAP; - cso_set_depth_stencil_alpha(ctx->cso_context, &dsa); - cso_set_rasterizer(ctx->cso_context, &raster); - for (i = 0; i < polys->num_elements; ++i) { - struct polygon *poly = (((struct polygon**)polys->data)[i]); - draw_polygon(ctx, poly); - } - - cso_restore_rasterizer(ctx->cso_context); - } + /* tell renderer about the vertex attributes */ + memset(&velement, 0, sizeof(velement)); + velement.src_offset = 0; + velement.instance_divisor = 0; + velement.vertex_buffer_index = 0; + velement.src_format = PIPE_FORMAT_R32G32_FLOAT; + + /* tell renderer about the vertex buffer */ + memset(&vbuffer, 0, sizeof(vbuffer)); + vbuffer.stride = COMPONENTS * sizeof(float); /* vertex size */ + vbuffer.buffer_offset = 0; + + /* prepare the stencil buffer */ + renderer_polygon_stencil_begin(ctx->renderer, + &velement, ctx->state.vg.fill_rule, VG_FALSE); + for (i = 0; i < polys->num_elements; ++i) { + struct polygon *poly = (((struct polygon**)polys->data)[i]); + + polygon_prepare_buffer(ctx, poly); + vbuffer.buffer = poly->vbuf; + vbuffer.max_index = poly->num_verts - 1; + + renderer_polygon_stencil(ctx->renderer, &vbuffer, + PIPE_PRIM_TRIANGLE_FAN, 0, (VGuint) poly->num_verts); } + renderer_polygon_stencil_end(ctx->renderer); - /* restore color writes */ - cso_restore_blend(ctx->cso_context); - /* setup stencil ops */ - dsa.stencil[0].func = PIPE_FUNC_NOTEQUAL; - dsa.stencil[0].fail_op = PIPE_STENCIL_OP_REPLACE; - dsa.stencil[0].zfail_op = PIPE_STENCIL_OP_REPLACE; - dsa.stencil[0].zpass_op = PIPE_STENCIL_OP_REPLACE; - dsa.stencil[0].valuemask = dsa.stencil[0].writemask; - dsa.stencil[1].enabled = 0; - memcpy(&dsa.depth, &ctx->state.g3d.dsa.depth, - sizeof(struct pipe_depth_state)); - cso_set_depth_stencil_alpha(ctx->cso_context, &dsa); - - /* render the quad to propagate the rendering from stencil */ - renderer_draw_quad(ctx->renderer, min_x, min_y, - max_x, max_y, 0.0f/*depth should be disabled*/); - - cso_restore_depth_stencil_alpha(ctx->cso_context); + /* fill it */ + renderer_polygon_fill_begin(ctx->renderer, VG_FALSE); + renderer_polygon_fill(ctx->renderer, min_x, min_y, max_x, max_y); + renderer_polygon_fill_end(ctx->renderer); } diff --git a/src/gallium/state_trackers/vega/renderer.c b/src/gallium/state_trackers/vega/renderer.c index c6f5f7a05b8..e42bad76492 100644 --- a/src/gallium/state_trackers/vega/renderer.c +++ b/src/gallium/state_trackers/vega/renderer.c @@ -1,6 +1,7 @@ /************************************************************************** * * Copyright 2009 VMware, Inc. All Rights Reserved. + * Copyright 2010 LunarG, Inc. All Rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the @@ -38,364 +39,1382 @@ #include "util/u_simple_shaders.h" #include "util/u_memory.h" #include "util/u_sampler.h" +#include "util/u_surface.h" +#include "util/u_math.h" +#include "util/u_format.h" #include "cso_cache/cso_context.h" +#include "tgsi/tgsi_ureg.h" + +typedef enum { + RENDERER_STATE_INIT, + RENDERER_STATE_COPY, + RENDERER_STATE_DRAWTEX, + RENDERER_STATE_SCISSOR, + RENDERER_STATE_CLEAR, + RENDERER_STATE_FILTER, + RENDERER_STATE_POLYGON_STENCIL, + RENDERER_STATE_POLYGON_FILL, + NUM_RENDERER_STATES +} RendererState; + +typedef enum { + RENDERER_VS_PLAIN, + RENDERER_VS_COLOR, + RENDERER_VS_TEXTURE, + NUM_RENDERER_VS +} RendererVs; + +typedef enum { + RENDERER_FS_COLOR, + RENDERER_FS_TEXTURE, + RENDERER_FS_SCISSOR, + RENDERER_FS_WHITE, + NUM_RENDERER_FS +} RendererFs; struct renderer { struct pipe_context *pipe; - struct vg_context *owner; - struct cso_context *cso; - void *fs; + VGbitfield dirty; + struct { + struct pipe_rasterizer_state rasterizer; + struct pipe_depth_stencil_alpha_state dsa; + struct pipe_framebuffer_state fb; + } g3d; + struct matrix projection; + + struct matrix mvp; + struct pipe_resource *vs_cbuf; + + struct pipe_resource *fs_cbuf; + VGfloat fs_cbuf_data[32]; + VGint fs_cbuf_len; + struct pipe_vertex_element velems[2]; VGfloat vertices[4][2][4]; + + void *cached_vs[NUM_RENDERER_VS]; + void *cached_fs[NUM_RENDERER_FS]; + + RendererState state; + + /* state data */ + union { + struct { + VGint tex_width; + VGint tex_height; + } copy; + + struct { + VGint tex_width; + VGint tex_height; + } drawtex; + + struct { + VGboolean restore_dsa; + } scissor; + + struct { + VGboolean use_sampler; + VGint tex_width, tex_height; + } filter; + + struct { + struct pipe_depth_stencil_alpha_state dsa; + VGboolean manual_two_sides; + VGboolean restore_dsa; + } polygon_stencil; + } u; }; -static void setup_shaders(struct renderer *ctx) +/** + * Return VG_TRUE if the renderer can use the resource as the asked bindings. + */ +static VGboolean renderer_can_support(struct renderer *renderer, + struct pipe_resource *res, + unsigned bindings) { - struct pipe_context *pipe = ctx->pipe; - /* fragment shader */ - ctx->fs = util_make_fragment_tex_shader(pipe, TGSI_TEXTURE_2D, - TGSI_INTERPOLATE_LINEAR); -} - -static struct pipe_resource * -setup_vertex_data(struct renderer *ctx, - float x0, float y0, float x1, float y1, float z) -{ - ctx->vertices[0][0][0] = x0; - ctx->vertices[0][0][1] = y0; - ctx->vertices[0][0][2] = z; - ctx->vertices[0][1][0] = 0.0f; /*s*/ - ctx->vertices[0][1][1] = 0.0f; /*t*/ - - ctx->vertices[1][0][0] = x1; - ctx->vertices[1][0][1] = y0; - ctx->vertices[1][0][2] = z; - ctx->vertices[1][1][0] = 1.0f; /*s*/ - ctx->vertices[1][1][1] = 0.0f; /*t*/ - - ctx->vertices[2][0][0] = x1; - ctx->vertices[2][0][1] = y1; - ctx->vertices[2][0][2] = z; - ctx->vertices[2][1][0] = 1.0f; - ctx->vertices[2][1][1] = 1.0f; - - ctx->vertices[3][0][0] = x0; - ctx->vertices[3][0][1] = y1; - ctx->vertices[3][0][2] = z; - ctx->vertices[3][1][0] = 0.0f; - ctx->vertices[3][1][1] = 1.0f; - - return pipe_user_buffer_create( ctx->pipe->screen, - ctx->vertices, - sizeof(ctx->vertices), - PIPE_BIND_VERTEX_BUFFER); -} - -static struct pipe_resource * -setup_vertex_data_tex(struct renderer *ctx, - float x0, float y0, float x1, float y1, - float s0, float t0, float s1, float t1, - float z) -{ - ctx->vertices[0][0][0] = x0; - ctx->vertices[0][0][1] = y0; - ctx->vertices[0][0][2] = z; - ctx->vertices[0][1][0] = s0; /*s*/ - ctx->vertices[0][1][1] = t0; /*t*/ - - ctx->vertices[1][0][0] = x1; - ctx->vertices[1][0][1] = y0; - ctx->vertices[1][0][2] = z; - ctx->vertices[1][1][0] = s1; /*s*/ - ctx->vertices[1][1][1] = t0; /*t*/ - - ctx->vertices[2][0][0] = x1; - ctx->vertices[2][0][1] = y1; - ctx->vertices[2][0][2] = z; - ctx->vertices[2][1][0] = s1; - ctx->vertices[2][1][1] = t1; - - ctx->vertices[3][0][0] = x0; - ctx->vertices[3][0][1] = y1; - ctx->vertices[3][0][2] = z; - ctx->vertices[3][1][0] = s0; - ctx->vertices[3][1][1] = t1; - - return pipe_user_buffer_create( ctx->pipe->screen, - ctx->vertices, - sizeof(ctx->vertices), - PIPE_BIND_VERTEX_BUFFER); -} - - -static struct pipe_resource * -setup_vertex_data_qtex(struct renderer *ctx, - float x0, float y0, float x1, float y1, - float x2, float y2, float x3, float y3, - float s0, float t0, float s1, float t1, - float z) -{ - ctx->vertices[0][0][0] = x0; - ctx->vertices[0][0][1] = y0; - ctx->vertices[0][0][2] = z; - ctx->vertices[0][1][0] = s0; /*s*/ - ctx->vertices[0][1][1] = t0; /*t*/ - - ctx->vertices[1][0][0] = x1; - ctx->vertices[1][0][1] = y1; - ctx->vertices[1][0][2] = z; - ctx->vertices[1][1][0] = s1; /*s*/ - ctx->vertices[1][1][1] = t0; /*t*/ - - ctx->vertices[2][0][0] = x2; - ctx->vertices[2][0][1] = y2; - ctx->vertices[2][0][2] = z; - ctx->vertices[2][1][0] = s1; - ctx->vertices[2][1][1] = t1; - - ctx->vertices[3][0][0] = x3; - ctx->vertices[3][0][1] = y3; - ctx->vertices[3][0][2] = z; - ctx->vertices[3][1][0] = s0; - ctx->vertices[3][1][1] = t1; - - return pipe_user_buffer_create( ctx->pipe->screen, - ctx->vertices, - sizeof(ctx->vertices), - PIPE_BIND_VERTEX_BUFFER); + struct pipe_screen *screen = renderer->pipe->screen; + + return screen->is_format_supported(screen, + res->format, res->target, 0, bindings, 0); } -struct renderer * renderer_create(struct vg_context *owner) +/** + * Set the model-view-projection matrix used by vertex shaders. + */ +static void renderer_set_mvp(struct renderer *renderer, + const struct matrix *mvp) { + struct matrix *cur = &renderer->mvp; + struct pipe_resource *cbuf; + VGfloat consts[3][4]; VGint i; - struct renderer *renderer = CALLOC_STRUCT(renderer); - if (!renderer) + /* projection only */ + if (!mvp) + mvp = &renderer->projection; + + /* re-upload only if necessary */ + if (memcmp(cur, mvp, sizeof(*mvp)) == 0) + return; + + /* 3x3 matrix to 3 constant vectors (no Z) */ + for (i = 0; i < 3; i++) { + consts[i][0] = mvp->m[i + 0]; + consts[i][1] = mvp->m[i + 3]; + consts[i][2] = 0.0f; + consts[i][3] = mvp->m[i + 6]; + } + + cbuf = renderer->vs_cbuf; + pipe_resource_reference(&cbuf, NULL); + cbuf = pipe_buffer_create(renderer->pipe->screen, + PIPE_BIND_CONSTANT_BUFFER, + sizeof(consts)); + if (cbuf) { + pipe_buffer_write(renderer->pipe, cbuf, + 0, sizeof(consts), consts); + } + renderer->pipe->set_constant_buffer(renderer->pipe, + PIPE_SHADER_VERTEX, 0, cbuf); + + memcpy(cur, mvp, sizeof(*mvp)); + renderer->vs_cbuf = cbuf; +} + +/** + * Create a simple vertex shader that passes through position and the given + * attribute. + */ +static void *create_passthrough_vs(struct pipe_context *pipe, int semantic_name) +{ + struct ureg_program *ureg; + struct ureg_src src[2], constants[3]; + struct ureg_dst dst[2], tmp; + int i; + + ureg = ureg_create(TGSI_PROCESSOR_VERTEX); + if (!ureg) return NULL; - renderer->owner = owner; - renderer->pipe = owner->pipe; - renderer->cso = owner->cso_context; + /* position is in user coordinates */ + src[0] = ureg_DECL_vs_input(ureg, 0); + dst[0] = ureg_DECL_output(ureg, TGSI_SEMANTIC_POSITION, 0); + tmp = ureg_DECL_temporary(ureg); + for (i = 0; i < Elements(constants); i++) + constants[i] = ureg_DECL_constant(ureg, i); + + /* transform to clipped coordinates */ + ureg_DP4(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), src[0], constants[0]); + ureg_DP4(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_Y), src[0], constants[1]); + ureg_MOV(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_Z), src[0]); + ureg_DP4(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_W), src[0], constants[2]); + ureg_MOV(ureg, dst[0], ureg_src(tmp)); + + if (semantic_name >= 0) { + src[1] = ureg_DECL_vs_input(ureg, 1); + dst[1] = ureg_DECL_output(ureg, semantic_name, 0); + ureg_MOV(ureg, dst[1], src[1]); + } - setup_shaders(renderer); + ureg_END(ureg); - /* init vertex data that doesn't change */ - for (i = 0; i < 4; i++) { - renderer->vertices[i][0][3] = 1.0f; /* w */ - renderer->vertices[i][1][2] = 0.0f; /* r */ - renderer->vertices[i][1][3] = 1.0f; /* q */ + return ureg_create_shader_and_destroy(ureg, pipe); +} + +/** + * Set renderer vertex shader. + * + * This function modifies vertex_shader state. + */ +static void renderer_set_vs(struct renderer *r, RendererVs id) +{ + /* create as needed */ + if (!r->cached_vs[id]) { + int semantic_name = -1; + + switch (id) { + case RENDERER_VS_PLAIN: + break; + case RENDERER_VS_COLOR: + semantic_name = TGSI_SEMANTIC_COLOR; + break; + case RENDERER_VS_TEXTURE: + semantic_name = TGSI_SEMANTIC_GENERIC; + break; + default: + assert(!"Unknown renderer vs id"); + break; + } + + r->cached_vs[id] = create_passthrough_vs(r->pipe, semantic_name); } - return renderer; + cso_set_vertex_shader_handle(r->cso, r->cached_vs[id]); } -void renderer_destroy(struct renderer *ctx) +/** + * Create a simple fragment shader that sets the depth to 0.0f. + */ +static void *create_scissor_fs(struct pipe_context *pipe) +{ + struct ureg_program *ureg; + struct ureg_dst out; + struct ureg_src imm; + + ureg = ureg_create(TGSI_PROCESSOR_FRAGMENT); + out = ureg_DECL_output(ureg, TGSI_SEMANTIC_POSITION, 0); + imm = ureg_imm4f(ureg, 0.0f, 0.0f, 0.0f, 0.0f); + + ureg_MOV(ureg, ureg_writemask(out, TGSI_WRITEMASK_Z), imm); + ureg_END(ureg); + + return ureg_create_shader_and_destroy(ureg, pipe); +} + +/** + * Create a simple fragment shader that sets the color to white. + */ +static void *create_white_fs(struct pipe_context *pipe) +{ + struct ureg_program *ureg; + struct ureg_dst out; + struct ureg_src imm; + + ureg = ureg_create(TGSI_PROCESSOR_FRAGMENT); + out = ureg_DECL_output(ureg, TGSI_SEMANTIC_COLOR, 0); + imm = ureg_imm4f(ureg, 1.0f, 1.0f, 1.0f, 1.0f); + + ureg_MOV(ureg, out, imm); + ureg_END(ureg); + + return ureg_create_shader_and_destroy(ureg, pipe); +} + +/** + * Set renderer fragment shader. + * + * This function modifies fragment_shader state. + */ +static void renderer_set_fs(struct renderer *r, RendererFs id) { -#if 0 - if (ctx->fs) { - cso_delete_fragment_shader(ctx->cso, ctx->fs); - ctx->fs = NULL; + /* create as needed */ + if (!r->cached_fs[id]) { + void *fs = NULL; + + switch (id) { + case RENDERER_FS_COLOR: + fs = util_make_fragment_passthrough_shader(r->pipe); + break; + case RENDERER_FS_TEXTURE: + fs = util_make_fragment_tex_shader(r->pipe, + TGSI_TEXTURE_2D, TGSI_INTERPOLATE_LINEAR); + break; + case RENDERER_FS_SCISSOR: + fs = create_scissor_fs(r->pipe); + break; + case RENDERER_FS_WHITE: + fs = create_white_fs(r->pipe); + break; + default: + assert(!"Unknown renderer fs id"); + break; + } + + r->cached_fs[id] = fs; } -#endif - FREE(ctx); + + cso_set_fragment_shader_handle(r->cso, r->cached_fs[id]); } -void renderer_draw_quad(struct renderer *r, - VGfloat x1, VGfloat y1, - VGfloat x2, VGfloat y2, - VGfloat depth) +typedef enum { + VEGA_Y0_TOP, + VEGA_Y0_BOTTOM +} VegaOrientation; + +static void vg_set_viewport(struct renderer *r, + VegaOrientation orientation) { - struct pipe_resource *buf; + const struct pipe_framebuffer_state *fb = &r->g3d.fb; + struct pipe_viewport_state viewport; + VGfloat y_scale = (orientation == VEGA_Y0_BOTTOM) ? -2.f : 2.f; + + viewport.scale[0] = fb->width / 2.f; + viewport.scale[1] = fb->height / y_scale; + viewport.scale[2] = 1.0; + viewport.scale[3] = 1.0; + viewport.translate[0] = fb->width / 2.f; + viewport.translate[1] = fb->height / 2.f; + viewport.translate[2] = 0.0; + viewport.translate[3] = 0.0; + + cso_set_viewport(r->cso, &viewport); +} + +/** + * Set renderer target. + * + * This function modifies framebuffer and viewport states. + */ +static void renderer_set_target(struct renderer *r, + struct pipe_surface *cbuf, + struct pipe_surface *zsbuf, + VGboolean y0_top) +{ + struct pipe_framebuffer_state fb; + + memset(&fb, 0, sizeof(fb)); + fb.width = cbuf->width; + fb.height = cbuf->height; + fb.cbufs[0] = cbuf; + fb.nr_cbufs = 1; + fb.zsbuf = zsbuf; + cso_set_framebuffer(r->cso, &fb); + + vg_set_viewport(r, (y0_top) ? VEGA_Y0_TOP : VEGA_Y0_BOTTOM); +} + +/** + * Set renderer blend state. Blending is disabled. + * + * This function modifies blend state. + */ +static void renderer_set_blend(struct renderer *r, + VGbitfield channel_mask) +{ + struct pipe_blend_state blend; + + memset(&blend, 0, sizeof(blend)); + + blend.rt[0].rgb_src_factor = PIPE_BLENDFACTOR_ONE; + blend.rt[0].alpha_src_factor = PIPE_BLENDFACTOR_ONE; + blend.rt[0].rgb_dst_factor = PIPE_BLENDFACTOR_ZERO; + blend.rt[0].alpha_dst_factor = PIPE_BLENDFACTOR_ZERO; + + if (channel_mask & VG_RED) + blend.rt[0].colormask |= PIPE_MASK_R; + if (channel_mask & VG_GREEN) + blend.rt[0].colormask |= PIPE_MASK_G; + if (channel_mask & VG_BLUE) + blend.rt[0].colormask |= PIPE_MASK_B; + if (channel_mask & VG_ALPHA) + blend.rt[0].colormask |= PIPE_MASK_A; + + cso_set_blend(r->cso, &blend); +} + +/** + * Set renderer sampler and view states. + * + * This function modifies samplers and fragment_sampler_views states. + */ +static void renderer_set_samplers(struct renderer *r, + uint num_views, + struct pipe_sampler_view **views) +{ + struct pipe_sampler_state sampler; + unsigned tex_filter = PIPE_TEX_FILTER_NEAREST; + unsigned tex_wrap = PIPE_TEX_WRAP_CLAMP_TO_EDGE; + uint i; + + memset(&sampler, 0, sizeof(sampler)); + + sampler.min_img_filter = tex_filter; + sampler.mag_img_filter = tex_filter; + sampler.min_mip_filter = PIPE_TEX_MIPFILTER_NONE; + + sampler.wrap_s = tex_wrap; + sampler.wrap_t = tex_wrap; + sampler.wrap_r = PIPE_TEX_WRAP_CLAMP_TO_EDGE; + + sampler.normalized_coords = 1; + + /* set samplers */ + for (i = 0; i < num_views; i++) + cso_single_sampler(r->cso, i, &sampler); + cso_single_sampler_done(r->cso); + + /* set views */ + cso_set_fragment_sampler_views(r->cso, num_views, views); +} + +/** + * Set custom renderer fragment shader, and optionally set samplers and views + * and upload the fragment constant buffer. + * + * This function modifies fragment_shader, samplers and fragment_sampler_views + * states. + */ +static void renderer_set_custom_fs(struct renderer *renderer, + void *fs, + const struct pipe_sampler_state **samplers, + struct pipe_sampler_view **views, + VGint num_samplers, + const void *const_buffer, + VGint const_buffer_len) +{ + cso_set_fragment_shader_handle(renderer->cso, fs); + + /* set samplers and views */ + if (num_samplers) { + cso_set_samplers(renderer->cso, num_samplers, samplers); + cso_set_fragment_sampler_views(renderer->cso, num_samplers, views); + } + + /* upload fs constant buffer */ + if (const_buffer_len) { + struct pipe_resource *cbuf = renderer->fs_cbuf; + + if (!cbuf || renderer->fs_cbuf_len != const_buffer_len || + memcmp(renderer->fs_cbuf_data, const_buffer, const_buffer_len)) { + pipe_resource_reference(&cbuf, NULL); + + cbuf = pipe_buffer_create(renderer->pipe->screen, + PIPE_BIND_CONSTANT_BUFFER, const_buffer_len); + pipe_buffer_write(renderer->pipe, cbuf, 0, + const_buffer_len, const_buffer); + renderer->pipe->set_constant_buffer(renderer->pipe, + PIPE_SHADER_FRAGMENT, 0, cbuf); + + renderer->fs_cbuf = cbuf; + if (const_buffer_len <= sizeof(renderer->fs_cbuf_data)) { + memcpy(renderer->fs_cbuf_data, const_buffer, const_buffer_len); + renderer->fs_cbuf_len = const_buffer_len; + } + else { + renderer->fs_cbuf_len = 0; + } + } + } +} + +/** + * Setup renderer quad position. + */ +static void renderer_quad_pos(struct renderer *r, + VGfloat x0, VGfloat y0, + VGfloat x1, VGfloat y1, + VGboolean scissor) +{ + VGfloat z; + + /* the depth test is used for scissoring */ + z = (scissor) ? 0.0f : 1.0f; + + /* positions */ + r->vertices[0][0][0] = x0; + r->vertices[0][0][1] = y0; + r->vertices[0][0][2] = z; - buf = setup_vertex_data(r, x1, y1, x2, y2, depth); + r->vertices[1][0][0] = x1; + r->vertices[1][0][1] = y0; + r->vertices[1][0][2] = z; + r->vertices[2][0][0] = x1; + r->vertices[2][0][1] = y1; + r->vertices[2][0][2] = z; + + r->vertices[3][0][0] = x0; + r->vertices[3][0][1] = y1; + r->vertices[3][0][2] = z; +} + +/** + * Setup renderer quad texture coordinates. + */ +static void renderer_quad_texcoord(struct renderer *r, + VGfloat x0, VGfloat y0, + VGfloat x1, VGfloat y1, + VGint tex_width, VGint tex_height) +{ + VGfloat s0, t0, s1, t1, r0, q0; + VGint i; + + s0 = x0 / tex_width; + s1 = x1 / tex_width; + t0 = y0 / tex_height; + t1 = y1 / tex_height; + r0 = 0.0f; + q0 = 1.0f; + + /* texcoords */ + r->vertices[0][1][0] = s0; + r->vertices[0][1][1] = t0; + + r->vertices[1][1][0] = s1; + r->vertices[1][1][1] = t0; + + r->vertices[2][1][0] = s1; + r->vertices[2][1][1] = t1; + + r->vertices[3][1][0] = s0; + r->vertices[3][1][1] = t1; + + for (i = 0; i < 4; i++) { + r->vertices[i][1][2] = r0; + r->vertices[i][1][3] = q0; + } +} + +/** + * Draw renderer quad. + */ +static void renderer_quad_draw(struct renderer *r) +{ + struct pipe_resource *buf; + + buf = pipe_user_buffer_create(r->pipe->screen, + r->vertices, + sizeof(r->vertices), + PIPE_BIND_VERTEX_BUFFER); if (buf) { - cso_set_vertex_elements(r->cso, 2, r->owner->velems); util_draw_vertex_buffer(r->pipe, buf, 0, PIPE_PRIM_TRIANGLE_FAN, - 4, /* verts */ - 2); /* attribs/vert */ + Elements(r->vertices), /* verts */ + Elements(r->vertices[0])); /* attribs/vert */ - pipe_resource_reference( &buf, - NULL ); + pipe_resource_reference(&buf, NULL); } } -void renderer_draw_texture(struct renderer *r, - struct pipe_resource *tex, - VGfloat x1offset, VGfloat y1offset, - VGfloat x2offset, VGfloat y2offset, - VGfloat x1, VGfloat y1, - VGfloat x2, VGfloat y2) +/** + * Prepare the renderer for copying. + */ +VGboolean renderer_copy_begin(struct renderer *renderer, + struct pipe_surface *dst, + VGboolean y0_top, + struct pipe_sampler_view *src) { - struct pipe_context *pipe = r->pipe; - struct pipe_resource *buf; - VGfloat s0, t0, s1, t1; + assert(renderer->state == RENDERER_STATE_INIT); - assert(tex->width0 != 0); - assert(tex->height0 != 0); + /* sanity check */ + if (!renderer_can_support(renderer, + dst->texture, PIPE_BIND_RENDER_TARGET) || + !renderer_can_support(renderer, + src->texture, PIPE_BIND_SAMPLER_VIEW)) + return VG_FALSE; - s0 = x1offset / tex->width0; - s1 = x2offset / tex->width0; - t0 = y1offset / tex->height0; - t1 = y2offset / tex->height0; + cso_save_framebuffer(renderer->cso); + cso_save_viewport(renderer->cso); + cso_save_blend(renderer->cso); + cso_save_samplers(renderer->cso); + cso_save_fragment_sampler_views(renderer->cso); + cso_save_fragment_shader(renderer->cso); + cso_save_vertex_shader(renderer->cso); - cso_save_vertex_shader(r->cso); - /* shaders */ - cso_set_vertex_shader_handle(r->cso, vg_texture_vs(r->owner)); + renderer_set_target(renderer, dst, NULL, y0_top); - /* draw quad */ - buf = setup_vertex_data_tex(r, x1, y1, x2, y2, - s0, t0, s1, t1, 0.0f); + renderer_set_blend(renderer, ~0); + renderer_set_samplers(renderer, 1, &src); - if (buf) { - cso_set_vertex_elements(r->cso, 2, r->owner->velems); - util_draw_vertex_buffer(pipe, buf, 0, - PIPE_PRIM_TRIANGLE_FAN, - 4, /* verts */ - 2); /* attribs/vert */ - - pipe_resource_reference( &buf, - NULL ); + renderer_set_fs(renderer, RENDERER_FS_TEXTURE); + renderer_set_vs(renderer, RENDERER_VS_TEXTURE); + + renderer_set_mvp(renderer, NULL); + + /* remember the texture size */ + renderer->u.copy.tex_width = src->texture->width0; + renderer->u.copy.tex_height = src->texture->height0; + renderer->state = RENDERER_STATE_COPY; + + return VG_TRUE; +} + +/** + * Draw into the destination rectangle given by (x, y, w, h). The texture is + * sampled from within the rectangle given by (sx, sy, sw, sh). + * + * The coordinates are in surface coordinates. + */ +void renderer_copy(struct renderer *renderer, + VGint x, VGint y, VGint w, VGint h, + VGint sx, VGint sy, VGint sw, VGint sh) +{ + assert(renderer->state == RENDERER_STATE_COPY); + + /* there is no depth buffer for scissoring anyway */ + renderer_quad_pos(renderer, x, y, x + w, y + h, VG_FALSE); + renderer_quad_texcoord(renderer, sx, sy, sx + sw, sy + sh, + renderer->u.copy.tex_width, + renderer->u.copy.tex_height); + + renderer_quad_draw(renderer); +} + +/** + * End copying and restore the states. + */ +void renderer_copy_end(struct renderer *renderer) +{ + assert(renderer->state == RENDERER_STATE_COPY); + + cso_restore_framebuffer(renderer->cso); + cso_restore_viewport(renderer->cso); + cso_restore_blend(renderer->cso); + cso_restore_samplers(renderer->cso); + cso_restore_fragment_sampler_views(renderer->cso); + cso_restore_fragment_shader(renderer->cso); + cso_restore_vertex_shader(renderer->cso); + + renderer->state = RENDERER_STATE_INIT; +} + +/** + * Prepare the renderer for textured drawing. + */ +VGboolean renderer_drawtex_begin(struct renderer *renderer, + struct pipe_sampler_view *src) +{ + assert(renderer->state == RENDERER_STATE_INIT); + + if (!renderer_can_support(renderer, src->texture, PIPE_BIND_SAMPLER_VIEW)) + return VG_FALSE; + + cso_save_blend(renderer->cso); + cso_save_samplers(renderer->cso); + cso_save_fragment_sampler_views(renderer->cso); + cso_save_fragment_shader(renderer->cso); + cso_save_vertex_shader(renderer->cso); + + renderer_set_blend(renderer, ~0); + + renderer_set_samplers(renderer, 1, &src); + + renderer_set_fs(renderer, RENDERER_FS_TEXTURE); + renderer_set_vs(renderer, RENDERER_VS_TEXTURE); + + renderer_set_mvp(renderer, NULL); + + /* remember the texture size */ + renderer->u.drawtex.tex_width = src->texture->width0; + renderer->u.drawtex.tex_height = src->texture->height0; + renderer->state = RENDERER_STATE_DRAWTEX; + + return VG_TRUE; +} + +/** + * Draw into the destination rectangle given by (x, y, w, h). The texture is + * sampled from within the rectangle given by (sx, sy, sw, sh). + * + * The coordinates are in surface coordinates. + */ +void renderer_drawtex(struct renderer *renderer, + VGint x, VGint y, VGint w, VGint h, + VGint sx, VGint sy, VGint sw, VGint sh) +{ + assert(renderer->state == RENDERER_STATE_DRAWTEX); + + /* with scissoring */ + renderer_quad_pos(renderer, x, y, x + w, y + h, VG_TRUE); + renderer_quad_texcoord(renderer, sx, sy, sx + sw, sy + sh, + renderer->u.drawtex.tex_width, + renderer->u.drawtex.tex_height); + + renderer_quad_draw(renderer); +} + +/** + * End textured drawing and restore the states. + */ +void renderer_drawtex_end(struct renderer *renderer) +{ + assert(renderer->state == RENDERER_STATE_DRAWTEX); + + cso_restore_blend(renderer->cso); + cso_restore_samplers(renderer->cso); + cso_restore_fragment_sampler_views(renderer->cso); + cso_restore_fragment_shader(renderer->cso); + cso_restore_vertex_shader(renderer->cso); + + renderer->state = RENDERER_STATE_INIT; +} + +/** + * Prepare the renderer for scissor update. This will reset the depth buffer + * to 1.0f. + */ +VGboolean renderer_scissor_begin(struct renderer *renderer, + VGboolean restore_dsa) +{ + struct pipe_depth_stencil_alpha_state dsa; + + assert(renderer->state == RENDERER_STATE_INIT); + + if (restore_dsa) + cso_save_depth_stencil_alpha(renderer->cso); + cso_save_blend(renderer->cso); + cso_save_fragment_shader(renderer->cso); + + /* enable depth writes */ + memset(&dsa, 0, sizeof(dsa)); + dsa.depth.enabled = 1; + dsa.depth.writemask = 1; + dsa.depth.func = PIPE_FUNC_ALWAYS; + cso_set_depth_stencil_alpha(renderer->cso, &dsa); + + /* disable color writes */ + renderer_set_blend(renderer, 0); + renderer_set_fs(renderer, RENDERER_FS_SCISSOR); + + renderer_set_mvp(renderer, NULL); + + renderer->u.scissor.restore_dsa = restore_dsa; + renderer->state = RENDERER_STATE_SCISSOR; + + /* clear the depth buffer to 1.0f */ + renderer->pipe->clear(renderer->pipe, + PIPE_CLEAR_DEPTHSTENCIL, NULL, 1.0f, 0); + + return VG_TRUE; +} + +/** + * Add a scissor rectangle. Depth values inside the rectangle will be set to + * 0.0f. + */ +void renderer_scissor(struct renderer *renderer, + VGint x, VGint y, VGint width, VGint height) +{ + assert(renderer->state == RENDERER_STATE_SCISSOR); + + renderer_quad_pos(renderer, x, y, x + width, y + height, VG_FALSE); + renderer_quad_draw(renderer); +} + +/** + * End scissor update and restore the states. + */ +void renderer_scissor_end(struct renderer *renderer) +{ + assert(renderer->state == RENDERER_STATE_SCISSOR); + + if (renderer->u.scissor.restore_dsa) + cso_restore_depth_stencil_alpha(renderer->cso); + cso_restore_blend(renderer->cso); + cso_restore_fragment_shader(renderer->cso); + + renderer->state = RENDERER_STATE_INIT; +} + +/** + * Prepare the renderer for clearing. + */ +VGboolean renderer_clear_begin(struct renderer *renderer) +{ + assert(renderer->state == RENDERER_STATE_INIT); + + cso_save_blend(renderer->cso); + cso_save_fragment_shader(renderer->cso); + cso_save_vertex_shader(renderer->cso); + + renderer_set_blend(renderer, ~0); + renderer_set_fs(renderer, RENDERER_FS_COLOR); + renderer_set_vs(renderer, RENDERER_VS_COLOR); + + renderer_set_mvp(renderer, NULL); + + renderer->state = RENDERER_STATE_CLEAR; + + return VG_TRUE; +} + +/** + * Clear the framebuffer with the specified region and color. + * + * The coordinates are in surface coordinates. + */ +void renderer_clear(struct renderer *renderer, + VGint x, VGint y, VGint width, VGint height, + const VGfloat color[4]) +{ + VGuint i; + + assert(renderer->state == RENDERER_STATE_CLEAR); + + renderer_quad_pos(renderer, x, y, x + width, y + height, VG_TRUE); + for (i = 0; i < 4; i++) + memcpy(renderer->vertices[i][1], color, sizeof(VGfloat) * 4); + + renderer_quad_draw(renderer); +} + +/** + * End clearing and retore the states. + */ +void renderer_clear_end(struct renderer *renderer) +{ + assert(renderer->state == RENDERER_STATE_CLEAR); + + cso_restore_blend(renderer->cso); + cso_restore_fragment_shader(renderer->cso); + cso_restore_vertex_shader(renderer->cso); + + renderer->state = RENDERER_STATE_INIT; +} + +/** + * Prepare the renderer for image filtering. + */ +VGboolean renderer_filter_begin(struct renderer *renderer, + struct pipe_resource *dst, + VGboolean y0_top, + VGbitfield channel_mask, + const struct pipe_sampler_state **samplers, + struct pipe_sampler_view **views, + VGint num_samplers, + void *fs, + const void *const_buffer, + VGint const_buffer_len) +{ + struct pipe_surface *surf, surf_tmpl; + + assert(renderer->state == RENDERER_STATE_INIT); + + if (!fs) + return VG_FALSE; + if (!renderer_can_support(renderer, dst, PIPE_BIND_RENDER_TARGET)) + return VG_FALSE; + + u_surface_default_template(&surf_tmpl, dst, + PIPE_BIND_RENDER_TARGET); + surf = renderer->pipe->create_surface(renderer->pipe, dst, &surf_tmpl); + if (!surf) + return VG_FALSE; + + cso_save_framebuffer(renderer->cso); + cso_save_viewport(renderer->cso); + cso_save_blend(renderer->cso); + + /* set the image as the target */ + renderer_set_target(renderer, surf, NULL, y0_top); + pipe_surface_reference(&surf, NULL); + + renderer_set_blend(renderer, channel_mask); + + if (num_samplers) { + struct pipe_resource *tex; + + cso_save_samplers(renderer->cso); + cso_save_fragment_sampler_views(renderer->cso); + cso_save_fragment_shader(renderer->cso); + cso_save_vertex_shader(renderer->cso); + + renderer_set_custom_fs(renderer, fs, + samplers, views, num_samplers, + const_buffer, const_buffer_len); + renderer_set_vs(renderer, RENDERER_VS_TEXTURE); + + tex = views[0]->texture; + renderer->u.filter.tex_width = tex->width0; + renderer->u.filter.tex_height = tex->height0; + renderer->u.filter.use_sampler = VG_TRUE; } + else { + cso_save_fragment_shader(renderer->cso); - cso_restore_vertex_shader(r->cso); + renderer_set_custom_fs(renderer, fs, NULL, NULL, 0, + const_buffer, const_buffer_len); + + renderer->u.filter.use_sampler = VG_FALSE; + } + + renderer_set_mvp(renderer, NULL); + + renderer->state = RENDERER_STATE_FILTER; + + return VG_TRUE; } -void renderer_copy_texture(struct renderer *ctx, - struct pipe_sampler_view *src, - VGfloat sx1, VGfloat sy1, - VGfloat sx2, VGfloat sy2, - struct pipe_resource *dst, - VGfloat dx1, VGfloat dy1, - VGfloat dx2, VGfloat dy2) +/** + * Draw into a rectangle of the destination with the specified region of the + * texture(s). + * + * The coordinates are in surface coordinates. + */ +void renderer_filter(struct renderer *renderer, + VGint x, VGint y, VGint w, VGint h, + VGint sx, VGint sy, VGint sw, VGint sh) { - struct pipe_context *pipe = ctx->pipe; - struct pipe_screen *screen = pipe->screen; - struct pipe_resource *tex = src->texture; - struct pipe_resource *buf; - struct pipe_surface *dst_surf = screen->get_tex_surface( - screen, dst, 0, 0, 0, - PIPE_BIND_RENDER_TARGET); - struct pipe_framebuffer_state fb; - float s0, t0, s1, t1; + assert(renderer->state == RENDERER_STATE_FILTER); - assert(tex->width0 != 0); - assert(tex->height0 != 0); - assert(dst->width0 != 0); - assert(dst->height0 != 0); - -#if 0 - debug_printf("copy texture [%f, %f, %f, %f], [%f, %f, %f, %f]\n", - sx1, sy1, sx2, sy2, dx1, dy1, dx2, dy2); -#endif - -#if 1 - s0 = sx1 / tex->width0; - s1 = sx2 / tex->width0; - t0 = sy1 / tex->height0; - t1 = sy2 / tex->height0; -#else - s0 = 0; - s1 = 1; - t0 = 0; - t1 = 1; -#endif - - assert(screen->is_format_supported(screen, dst_surf->format, PIPE_TEXTURE_2D, - 0, PIPE_BIND_RENDER_TARGET, 0)); + renderer_quad_pos(renderer, x, y, x + w, y + h, VG_FALSE); + if (renderer->u.filter.use_sampler) { + renderer_quad_texcoord(renderer, sx, sy, sx + sw, sy + sh, + renderer->u.filter.tex_width, + renderer->u.filter.tex_height); + } - /* save state (restored below) */ - cso_save_blend(ctx->cso); - cso_save_samplers(ctx->cso); - cso_save_fragment_sampler_views(ctx->cso); - cso_save_framebuffer(ctx->cso); - cso_save_fragment_shader(ctx->cso); - cso_save_vertex_shader(ctx->cso); + renderer_quad_draw(renderer); +} + +/** + * End image filtering and restore the states. + */ +void renderer_filter_end(struct renderer *renderer) +{ + assert(renderer->state == RENDERER_STATE_FILTER); - cso_save_viewport(ctx->cso); + if (renderer->u.filter.use_sampler) { + cso_restore_samplers(renderer->cso); + cso_restore_fragment_sampler_views(renderer->cso); + cso_restore_vertex_shader(renderer->cso); + } + cso_restore_framebuffer(renderer->cso); + cso_restore_viewport(renderer->cso); + cso_restore_blend(renderer->cso); + cso_restore_fragment_shader(renderer->cso); - /* set misc state we care about */ - { - struct pipe_blend_state blend; - memset(&blend, 0, sizeof(blend)); - blend.rt[0].rgb_src_factor = PIPE_BLENDFACTOR_ONE; - blend.rt[0].alpha_src_factor = PIPE_BLENDFACTOR_ONE; - blend.rt[0].rgb_dst_factor = PIPE_BLENDFACTOR_ZERO; - blend.rt[0].alpha_dst_factor = PIPE_BLENDFACTOR_ZERO; - blend.rt[0].colormask = PIPE_MASK_RGBA; - cso_set_blend(ctx->cso, &blend); + renderer->state = RENDERER_STATE_INIT; +} + +/** + * Prepare the renderer for polygon silhouette rendering. + */ +VGboolean renderer_polygon_stencil_begin(struct renderer *renderer, + struct pipe_vertex_element *velem, + VGFillRule rule, + VGboolean restore_dsa) +{ + struct pipe_depth_stencil_alpha_state *dsa; + VGboolean manual_two_sides; + + assert(renderer->state == RENDERER_STATE_INIT); + + cso_save_vertex_elements(renderer->cso); + cso_save_blend(renderer->cso); + cso_save_depth_stencil_alpha(renderer->cso); + + cso_set_vertex_elements(renderer->cso, 1, velem); + + /* disable color writes */ + renderer_set_blend(renderer, 0); + + manual_two_sides = VG_FALSE; + dsa = &renderer->u.polygon_stencil.dsa; + memset(dsa, 0, sizeof(*dsa)); + if (rule == VG_EVEN_ODD) { + dsa->stencil[0].enabled = 1; + dsa->stencil[0].writemask = 1; + dsa->stencil[0].fail_op = PIPE_STENCIL_OP_KEEP; + dsa->stencil[0].zfail_op = PIPE_STENCIL_OP_KEEP; + dsa->stencil[0].zpass_op = PIPE_STENCIL_OP_INVERT; + dsa->stencil[0].func = PIPE_FUNC_ALWAYS; + dsa->stencil[0].valuemask = ~0; } + else { + assert(rule == VG_NON_ZERO); + + /* front face */ + dsa->stencil[0].enabled = 1; + dsa->stencil[0].writemask = ~0; + dsa->stencil[0].fail_op = PIPE_STENCIL_OP_KEEP; + dsa->stencil[0].zfail_op = PIPE_STENCIL_OP_KEEP; + dsa->stencil[0].zpass_op = PIPE_STENCIL_OP_INCR_WRAP; + dsa->stencil[0].func = PIPE_FUNC_ALWAYS; + dsa->stencil[0].valuemask = ~0; + + if (renderer->pipe->screen->get_param(renderer->pipe->screen, + PIPE_CAP_TWO_SIDED_STENCIL)) { + /* back face */ + dsa->stencil[1] = dsa->stencil[0]; + dsa->stencil[1].zpass_op = PIPE_STENCIL_OP_DECR_WRAP; + } + else { + manual_two_sides = VG_TRUE; + } + } + cso_set_depth_stencil_alpha(renderer->cso, dsa); + + if (manual_two_sides) + cso_save_rasterizer(renderer->cso); - /* sampler */ - { - struct pipe_sampler_state sampler; - memset(&sampler, 0, sizeof(sampler)); - sampler.wrap_s = PIPE_TEX_WRAP_CLAMP_TO_EDGE; - sampler.wrap_t = PIPE_TEX_WRAP_CLAMP_TO_EDGE; - sampler.wrap_r = PIPE_TEX_WRAP_CLAMP_TO_EDGE; - sampler.min_mip_filter = PIPE_TEX_MIPFILTER_NONE; - sampler.min_img_filter = PIPE_TEX_FILTER_NEAREST; - sampler.mag_img_filter = PIPE_TEX_FILTER_NEAREST; - sampler.normalized_coords = 1; - cso_single_sampler(ctx->cso, 0, &sampler); - cso_single_sampler_done(ctx->cso); + renderer->u.polygon_stencil.manual_two_sides = manual_two_sides; + renderer->u.polygon_stencil.restore_dsa = restore_dsa; + renderer->state = RENDERER_STATE_POLYGON_STENCIL; + + return VG_TRUE; +} + +/** + * Render a polygon silhouette to stencil buffer. + */ +void renderer_polygon_stencil(struct renderer *renderer, + struct pipe_vertex_buffer *vbuf, + VGuint mode, VGuint start, VGuint count) +{ + assert(renderer->state == RENDERER_STATE_POLYGON_STENCIL); + + renderer->pipe->set_vertex_buffers(renderer->pipe, 1, vbuf); + + if (!renderer->u.polygon_stencil.manual_two_sides) { + util_draw_arrays(renderer->pipe, mode, start, count); } + else { + struct pipe_rasterizer_state raster; + struct pipe_depth_stencil_alpha_state dsa; - vg_set_viewport(ctx->owner, VEGA_Y0_TOP); + raster = renderer->g3d.rasterizer; + dsa = renderer->u.polygon_stencil.dsa; - /* texture */ - cso_set_fragment_sampler_views(ctx->cso, 1, &src); + /* front */ + raster.cull_face = PIPE_FACE_BACK; + dsa.stencil[0].zpass_op = PIPE_STENCIL_OP_INCR_WRAP; - /* shaders */ - cso_set_vertex_shader_handle(ctx->cso, vg_texture_vs(ctx->owner)); - cso_set_fragment_shader_handle(ctx->cso, ctx->fs); + cso_set_rasterizer(renderer->cso, &raster); + cso_set_depth_stencil_alpha(renderer->cso, &dsa); + util_draw_arrays(renderer->pipe, mode, start, count); - /* drawing dest */ - memset(&fb, 0, sizeof(fb)); - fb.width = dst_surf->width; - fb.height = dst_surf->height; - fb.nr_cbufs = 1; - fb.cbufs[0] = dst_surf; - { - VGint i; - for (i = 1; i < PIPE_MAX_COLOR_BUFS; ++i) - fb.cbufs[i] = 0; + /* back */ + raster.cull_face = PIPE_FACE_FRONT; + dsa.stencil[0].zpass_op = PIPE_STENCIL_OP_DECR_WRAP; + + cso_set_rasterizer(renderer->cso, &raster); + cso_set_depth_stencil_alpha(renderer->cso, &dsa); + util_draw_arrays(renderer->pipe, mode, start, count); } - cso_set_framebuffer(ctx->cso, &fb); +} + +/** + * End polygon silhouette rendering. + */ +void renderer_polygon_stencil_end(struct renderer *renderer) +{ + assert(renderer->state == RENDERER_STATE_POLYGON_STENCIL); - /* draw quad */ - buf = setup_vertex_data_tex(ctx, - dx1, dy1, - dx2, dy2, - s0, t0, s1, t1, - 0.0f); + if (renderer->u.polygon_stencil.manual_two_sides) + cso_restore_rasterizer(renderer->cso); - if (buf) { - cso_set_vertex_elements(ctx->cso, 2, ctx->owner->velems); - util_draw_vertex_buffer(ctx->pipe, buf, 0, - PIPE_PRIM_TRIANGLE_FAN, - 4, /* verts */ - 2); /* attribs/vert */ + cso_restore_vertex_elements(renderer->cso); + + /* restore color writes */ + cso_restore_blend(renderer->cso); + + if (renderer->u.polygon_stencil.restore_dsa) + cso_restore_depth_stencil_alpha(renderer->cso); + + renderer->state = RENDERER_STATE_INIT; +} + +/** + * Prepare the renderer for polygon filling. + */ +VGboolean renderer_polygon_fill_begin(struct renderer *renderer, + VGboolean save_dsa) +{ + struct pipe_depth_stencil_alpha_state dsa; + + assert(renderer->state == RENDERER_STATE_INIT); + + if (save_dsa) + cso_save_depth_stencil_alpha(renderer->cso); + + /* setup stencil ops */ + memset(&dsa, 0, sizeof(dsa)); + dsa.stencil[0].enabled = 1; + dsa.stencil[0].func = PIPE_FUNC_NOTEQUAL; + dsa.stencil[0].fail_op = PIPE_STENCIL_OP_REPLACE; + dsa.stencil[0].zfail_op = PIPE_STENCIL_OP_REPLACE; + dsa.stencil[0].zpass_op = PIPE_STENCIL_OP_REPLACE; + dsa.stencil[0].valuemask = ~0; + dsa.stencil[0].writemask = ~0; + dsa.depth = renderer->g3d.dsa.depth; + cso_set_depth_stencil_alpha(renderer->cso, &dsa); + + renderer->state = RENDERER_STATE_POLYGON_FILL; + + return VG_TRUE; +} + +/** + * Fill a polygon. + */ +void renderer_polygon_fill(struct renderer *renderer, + VGfloat min_x, VGfloat min_y, + VGfloat max_x, VGfloat max_y) +{ + assert(renderer->state == RENDERER_STATE_POLYGON_FILL); + + renderer_quad_pos(renderer, min_x, min_y, max_x, max_y, VG_TRUE); + renderer_quad_draw(renderer); +} + +/** + * End polygon filling. + */ +void renderer_polygon_fill_end(struct renderer *renderer) +{ + assert(renderer->state == RENDERER_STATE_POLYGON_FILL); + + cso_restore_depth_stencil_alpha(renderer->cso); - pipe_resource_reference( &buf, - NULL ); + renderer->state = RENDERER_STATE_INIT; +} + +struct renderer * renderer_create(struct vg_context *owner) +{ + struct renderer *renderer; + struct pipe_rasterizer_state *raster; + struct pipe_stencil_ref sr; + VGint i; + + renderer = CALLOC_STRUCT(renderer); + if (!renderer) + return NULL; + + renderer->pipe = owner->pipe; + renderer->cso = owner->cso_context; + + /* init vertex data that doesn't change */ + for (i = 0; i < 4; i++) + renderer->vertices[i][0][3] = 1.0f; /* w */ + + for (i = 0; i < 2; i++) { + renderer->velems[i].src_offset = i * 4 * sizeof(float); + renderer->velems[i].instance_divisor = 0; + renderer->velems[i].vertex_buffer_index = 0; + renderer->velems[i].src_format = PIPE_FORMAT_R32G32B32A32_FLOAT; + } + cso_set_vertex_elements(renderer->cso, 2, renderer->velems); + + /* GL rasterization rules */ + raster = &renderer->g3d.rasterizer; + memset(raster, 0, sizeof(*raster)); + raster->gl_rasterization_rules = 1; + cso_set_rasterizer(renderer->cso, raster); + + /* fixed at 0 */ + memset(&sr, 0, sizeof(sr)); + cso_set_stencil_ref(renderer->cso, &sr); + + renderer_set_vs(renderer, RENDERER_VS_PLAIN); + + renderer->state = RENDERER_STATE_INIT; + + return renderer; +} + +void renderer_destroy(struct renderer *ctx) +{ + int i; + + for (i = 0; i < NUM_RENDERER_VS; i++) { + if (ctx->cached_vs[i]) + cso_delete_vertex_shader(ctx->cso, ctx->cached_vs[i]); + } + for (i = 0; i < NUM_RENDERER_FS; i++) { + if (ctx->cached_fs[i]) + cso_delete_fragment_shader(ctx->cso, ctx->cached_fs[i]); + } + + pipe_resource_reference(&ctx->vs_cbuf, NULL); + pipe_resource_reference(&ctx->fs_cbuf, NULL); + + FREE(ctx); +} + +static void update_clip_state(struct renderer *renderer, + const struct vg_state *state) +{ + struct pipe_depth_stencil_alpha_state *dsa = &renderer->g3d.dsa; + + memset(dsa, 0, sizeof(struct pipe_depth_stencil_alpha_state)); + + if (state->scissoring) { + struct pipe_framebuffer_state *fb = &renderer->g3d.fb; + int i; + + renderer_scissor_begin(renderer, VG_FALSE); + + for (i = 0; i < state->scissor_rects_num; ++i) { + const float x = state->scissor_rects[i * 4 + 0].f; + const float y = state->scissor_rects[i * 4 + 1].f; + const float width = state->scissor_rects[i * 4 + 2].f; + const float height = state->scissor_rects[i * 4 + 3].f; + VGint x0, y0, x1, y1, iw, ih; + + x0 = (VGint) x; + y0 = (VGint) y; + if (x0 < 0) + x0 = 0; + if (y0 < 0) + y0 = 0; + + /* note that x1 and y1 are exclusive */ + x1 = (VGint) ceilf(x + width); + y1 = (VGint) ceilf(y + height); + if (x1 > fb->width) + x1 = fb->width; + if (y1 > fb->height) + y1 = fb->height; + + iw = x1 - x0; + ih = y1 - y0; + if (iw > 0 && ih> 0 ) + renderer_scissor(renderer, x0, y0, iw, ih); + } + + renderer_scissor_end(renderer); + + dsa->depth.enabled = 1; /* glEnable(GL_DEPTH_TEST); */ + dsa->depth.writemask = 0;/*glDepthMask(FALSE);*/ + dsa->depth.func = PIPE_FUNC_GEQUAL; } +} + +static void renderer_validate_blend(struct renderer *renderer, + const struct vg_state *state, + enum pipe_format fb_format) +{ + struct pipe_blend_state blend; + + memset(&blend, 0, sizeof(blend)); + blend.rt[0].colormask = PIPE_MASK_RGBA; + blend.rt[0].rgb_src_factor = PIPE_BLENDFACTOR_ONE; + blend.rt[0].alpha_src_factor = PIPE_BLENDFACTOR_ONE; + blend.rt[0].rgb_dst_factor = PIPE_BLENDFACTOR_ZERO; + blend.rt[0].alpha_dst_factor = PIPE_BLENDFACTOR_ZERO; - /* restore state we changed */ - cso_restore_blend(ctx->cso); - cso_restore_samplers(ctx->cso); - cso_restore_fragment_sampler_views(ctx->cso); - cso_restore_framebuffer(ctx->cso); - cso_restore_vertex_shader(ctx->cso); - cso_restore_fragment_shader(ctx->cso); - cso_restore_viewport(ctx->cso); + /* TODO alpha masking happens after blending? */ + + switch (state->blend_mode) { + case VG_BLEND_SRC: + blend.rt[0].rgb_src_factor = PIPE_BLENDFACTOR_ONE; + blend.rt[0].alpha_src_factor = PIPE_BLENDFACTOR_ONE; + blend.rt[0].rgb_dst_factor = PIPE_BLENDFACTOR_ZERO; + blend.rt[0].alpha_dst_factor = PIPE_BLENDFACTOR_ZERO; + break; + case VG_BLEND_SRC_OVER: + if (!util_format_has_alpha(fb_format)) { + blend.rt[0].rgb_src_factor = PIPE_BLENDFACTOR_SRC_ALPHA; + blend.rt[0].alpha_src_factor = PIPE_BLENDFACTOR_ONE; + blend.rt[0].rgb_dst_factor = PIPE_BLENDFACTOR_INV_SRC_ALPHA; + blend.rt[0].alpha_dst_factor = PIPE_BLENDFACTOR_INV_SRC_ALPHA; + blend.rt[0].blend_enable = 1; + } + break; + case VG_BLEND_SRC_IN: + blend.rt[0].rgb_src_factor = PIPE_BLENDFACTOR_ONE; + blend.rt[0].alpha_src_factor = PIPE_BLENDFACTOR_DST_ALPHA; + blend.rt[0].rgb_dst_factor = PIPE_BLENDFACTOR_ZERO; + blend.rt[0].alpha_dst_factor = PIPE_BLENDFACTOR_ZERO; + blend.rt[0].blend_enable = 1; + break; + case VG_BLEND_DST_IN: + blend.rt[0].rgb_src_factor = PIPE_BLENDFACTOR_ZERO; + blend.rt[0].alpha_src_factor = PIPE_BLENDFACTOR_ZERO; + blend.rt[0].rgb_dst_factor = PIPE_BLENDFACTOR_ONE; + blend.rt[0].alpha_dst_factor = PIPE_BLENDFACTOR_SRC_ALPHA; + blend.rt[0].blend_enable = 1; + break; + case VG_BLEND_DST_OVER: + case VG_BLEND_MULTIPLY: + case VG_BLEND_SCREEN: + case VG_BLEND_DARKEN: + case VG_BLEND_LIGHTEN: + case VG_BLEND_ADDITIVE: + /* need a shader */ + break; + default: + assert(!"not implemented blend mode"); + break; + } - pipe_surface_reference(&dst_surf, NULL); + cso_set_blend(renderer->cso, &blend); +} + +/** + * Propogate OpenVG state changes to the renderer. Only framebuffer, blending + * and scissoring states are relevant here. + */ +void renderer_validate(struct renderer *renderer, + VGbitfield dirty, + const struct st_framebuffer *stfb, + const struct vg_state *state) +{ + assert(renderer->state == RENDERER_STATE_INIT); + + dirty |= renderer->dirty; + renderer->dirty = 0; + + if (dirty & FRAMEBUFFER_DIRTY) { + struct pipe_framebuffer_state *fb = &renderer->g3d.fb; + struct matrix *proj = &renderer->projection; + + memset(fb, 0, sizeof(struct pipe_framebuffer_state)); + fb->width = stfb->width; + fb->height = stfb->height; + fb->nr_cbufs = 1; + fb->cbufs[0] = stfb->strb->surface; + fb->zsbuf = stfb->dsrb->surface; + + cso_set_framebuffer(renderer->cso, fb); + vg_set_viewport(renderer, VEGA_Y0_BOTTOM); + + matrix_load_identity(proj); + matrix_translate(proj, -1.0f, -1.0f); + matrix_scale(proj, 2.0f / fb->width, 2.0f / fb->height); + + /* we also got a new depth buffer */ + if (dirty & DEPTH_STENCIL_DIRTY) { + renderer->pipe->clear(renderer->pipe, + PIPE_CLEAR_DEPTHSTENCIL, NULL, 0.0, 0); + } + } + + /* must be last because it renders to the depth buffer*/ + if (dirty & DEPTH_STENCIL_DIRTY) { + update_clip_state(renderer, state); + cso_set_depth_stencil_alpha(renderer->cso, &renderer->g3d.dsa); + } + + if (dirty & BLEND_DIRTY) + renderer_validate_blend(renderer, state, stfb->strb->format); +} + +/** + * Prepare the renderer for OpenVG pipeline. + */ +void renderer_validate_for_shader(struct renderer *renderer, + const struct pipe_sampler_state **samplers, + struct pipe_sampler_view **views, + VGint num_samplers, + const struct matrix *modelview, + void *fs, + const void *const_buffer, + VGint const_buffer_len) +{ + struct matrix mvp = renderer->projection; + + /* will be used in POLYGON_STENCIL and POLYGON_FILL */ + matrix_mult(&mvp, modelview); + renderer_set_mvp(renderer, &mvp); + + renderer_set_custom_fs(renderer, fs, + samplers, views, num_samplers, + const_buffer, const_buffer_len); +} + +void renderer_validate_for_mask_rendering(struct renderer *renderer, + struct pipe_surface *dst, + const struct matrix *modelview) +{ + struct matrix mvp = renderer->projection; + + /* will be used in POLYGON_STENCIL and POLYGON_FILL */ + matrix_mult(&mvp, modelview); + renderer_set_mvp(renderer, &mvp); + + renderer_set_target(renderer, dst, renderer->g3d.fb.zsbuf, VG_FALSE); + renderer_set_blend(renderer, ~0); + renderer_set_fs(renderer, RENDERER_FS_WHITE); + + /* set internal dirty flags (hacky!) */ + renderer->dirty = FRAMEBUFFER_DIRTY | BLEND_DIRTY; } void renderer_copy_surface(struct renderer *ctx, @@ -409,13 +1428,11 @@ void renderer_copy_surface(struct renderer *ctx, { struct pipe_context *pipe = ctx->pipe; struct pipe_screen *screen = pipe->screen; - struct pipe_resource *buf; struct pipe_sampler_view view_templ; struct pipe_sampler_view *view; + struct pipe_box src_box; struct pipe_resource texTemp, *tex; - struct pipe_subresource subsrc, subdst; - struct pipe_framebuffer_state fb; - struct st_framebuffer *stfb = ctx->owner->draw_buffer; + const struct pipe_framebuffer_state *fb = &ctx->g3d.fb; const int srcW = abs(srcX1 - srcX0); const int srcH = abs(srcY1 - srcY0); const int srcLeft = MIN2(srcX0, srcX1); @@ -458,6 +1475,7 @@ void renderer_copy_surface(struct renderer *ctx, texTemp.width0 = srcW; texTemp.height0 = srcH; texTemp.depth0 = 1; + texTemp.array_size = 1; texTemp.bind = PIPE_BIND_SAMPLER_VIEW; tex = screen->resource_create(screen, &texTemp); @@ -470,100 +1488,35 @@ void renderer_copy_surface(struct renderer *ctx, if (!view) return; - subdst.face = 0; - subdst.level = 0; - subsrc.face = src->face; - subsrc.level = src->level; + u_box_2d_zslice(srcLeft, srcTop, src->u.tex.first_layer, srcW, srcH, &src_box); pipe->resource_copy_region(pipe, - tex, subdst, 0, 0, 0, /* dest */ - src->texture, subsrc, srcLeft, srcTop, src->zslice, /* src */ - srcW, srcH); /* size */ - - /* save state (restored below) */ - cso_save_blend(ctx->cso); - cso_save_samplers(ctx->cso); - cso_save_fragment_sampler_views(ctx->cso); - cso_save_framebuffer(ctx->cso); - cso_save_fragment_shader(ctx->cso); - cso_save_vertex_shader(ctx->cso); - cso_save_viewport(ctx->cso); - - /* set misc state we care about */ - { - struct pipe_blend_state blend; - memset(&blend, 0, sizeof(blend)); - blend.rt[0].rgb_src_factor = PIPE_BLENDFACTOR_ONE; - blend.rt[0].alpha_src_factor = PIPE_BLENDFACTOR_ONE; - blend.rt[0].rgb_dst_factor = PIPE_BLENDFACTOR_ZERO; - blend.rt[0].alpha_dst_factor = PIPE_BLENDFACTOR_ZERO; - blend.rt[0].colormask = PIPE_MASK_RGBA; - cso_set_blend(ctx->cso, &blend); + tex, 0, 0, 0, 0, /* dest */ + src->texture, 0, &src_box); + + assert(floatsEqual(z, 0.0f)); + + /* draw */ + if (fb->cbufs[0] == dst) { + /* transform back to surface coordinates */ + dstY0 = dst->height - dstY0; + dstY1 = dst->height - dstY1; + + if (renderer_drawtex_begin(ctx, view)) { + renderer_drawtex(ctx, + dstX0, dstY0, dstX1 - dstX0, dstY1 - dstY0, + 0, 0, view->texture->width0, view->texture->height0); + renderer_drawtex_end(ctx); + } } - - vg_set_viewport(ctx->owner, VEGA_Y0_TOP); - - /* sampler */ - { - struct pipe_sampler_state sampler; - memset(&sampler, 0, sizeof(sampler)); - sampler.wrap_s = PIPE_TEX_WRAP_CLAMP_TO_EDGE; - sampler.wrap_t = PIPE_TEX_WRAP_CLAMP_TO_EDGE; - sampler.wrap_r = PIPE_TEX_WRAP_CLAMP_TO_EDGE; - sampler.min_mip_filter = PIPE_TEX_MIPFILTER_NONE; - sampler.min_img_filter = PIPE_TEX_FILTER_NEAREST; - sampler.mag_img_filter = PIPE_TEX_FILTER_NEAREST; - sampler.normalized_coords = 1; - cso_single_sampler(ctx->cso, 0, &sampler); - cso_single_sampler_done(ctx->cso); - } - - /* texture */ - cso_set_fragment_sampler_views(ctx->cso, 1, &view); - - /* shaders */ - cso_set_fragment_shader_handle(ctx->cso, ctx->fs); - cso_set_vertex_shader_handle(ctx->cso, vg_texture_vs(ctx->owner)); - - /* drawing dest */ - if (stfb->strb->surface != dst) { - memset(&fb, 0, sizeof(fb)); - fb.width = dst->width; - fb.height = dst->height; - fb.nr_cbufs = 1; - fb.cbufs[0] = dst; - fb.zsbuf = stfb->dsrb->surface; - cso_set_framebuffer(ctx->cso, &fb); + else { + if (renderer_copy_begin(ctx, dst, VG_TRUE, view)) { + renderer_copy(ctx, + dstX0, dstY0, dstX1 - dstX0, dstY1 - dstY0, + 0, 0, view->texture->width0, view->texture->height0); + renderer_copy_end(ctx); + } } - - /* draw quad */ - buf = setup_vertex_data(ctx, - (float) dstX0, (float) dstY0, - (float) dstX1, (float) dstY1, z); - - if (buf) { - cso_set_vertex_elements(ctx->cso, 2, ctx->owner->velems); - util_draw_vertex_buffer(ctx->pipe, buf, 0, - PIPE_PRIM_TRIANGLE_FAN, - 4, /* verts */ - 2); /* attribs/vert */ - - pipe_resource_reference( &buf, - NULL ); - } - - - /* restore state we changed */ - cso_restore_blend(ctx->cso); - cso_restore_samplers(ctx->cso); - cso_restore_fragment_sampler_views(ctx->cso); - cso_restore_framebuffer(ctx->cso); - cso_restore_fragment_shader(ctx->cso); - cso_restore_vertex_shader(ctx->cso); - cso_restore_viewport(ctx->cso); - - pipe_resource_reference(&tex, NULL); - pipe_sampler_view_reference(&view, NULL); } void renderer_texture_quad(struct renderer *r, @@ -575,36 +1528,38 @@ void renderer_texture_quad(struct renderer *r, VGfloat x3, VGfloat y3, VGfloat x4, VGfloat y4) { - struct pipe_context *pipe = r->pipe; - struct pipe_resource *buf; - VGfloat s0, t0, s1, t1; + const VGfloat z = 0.0f; + assert(r->state == RENDERER_STATE_INIT); assert(tex->width0 != 0); assert(tex->height0 != 0); - s0 = x1offset / tex->width0; - s1 = x2offset / tex->width0; - t0 = y1offset / tex->height0; - t1 = y2offset / tex->height0; - cso_save_vertex_shader(r->cso); - /* shaders */ - cso_set_vertex_shader_handle(r->cso, vg_texture_vs(r->owner)); - /* draw quad */ - buf = setup_vertex_data_qtex(r, x1, y1, x2, y2, x3, y3, x4, y4, - s0, t0, s1, t1, 0.0f); + renderer_set_vs(r, RENDERER_VS_TEXTURE); - if (buf) { - cso_set_vertex_elements(r->cso, 2, r->owner->velems); - util_draw_vertex_buffer(pipe, buf, 0, - PIPE_PRIM_TRIANGLE_FAN, - 4, /* verts */ - 2); /* attribs/vert */ + /* manually set up positions */ + r->vertices[0][0][0] = x1; + r->vertices[0][0][1] = y1; + r->vertices[0][0][2] = z; - pipe_resource_reference(&buf, - NULL); - } + r->vertices[1][0][0] = x2; + r->vertices[1][0][1] = y2; + r->vertices[1][0][2] = z; + + r->vertices[2][0][0] = x3; + r->vertices[2][0][1] = y3; + r->vertices[2][0][2] = z; + + r->vertices[3][0][0] = x4; + r->vertices[3][0][1] = y4; + r->vertices[3][0][2] = z; + + /* texcoords */ + renderer_quad_texcoord(r, x1offset, y1offset, + x2offset, y2offset, tex->width0, tex->height0); + + renderer_quad_draw(r); cso_restore_vertex_shader(r->cso); } diff --git a/src/gallium/state_trackers/vega/renderer.h b/src/gallium/state_trackers/vega/renderer.h index b1a9fb58be6..fe719936580 100644 --- a/src/gallium/state_trackers/vega/renderer.h +++ b/src/gallium/state_trackers/vega/renderer.h @@ -1,6 +1,7 @@ /************************************************************************** * * Copyright 2009 VMware, Inc. All Rights Reserved. + * Copyright 2010 LunarG, Inc. All Rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the @@ -32,23 +33,110 @@ struct renderer; struct vg_context; +struct vg_state; +struct st_framebuffer; struct pipe_resource; +struct pipe_sampler_state; struct pipe_sampler_view; struct pipe_surface; +struct pipe_vertex_element; +struct pipe_vertex_buffer; +struct matrix; struct renderer *renderer_create(struct vg_context *owner); void renderer_destroy(struct renderer *); -void renderer_draw_quad(struct renderer *, - VGfloat x1, VGfloat y1, - VGfloat x2, VGfloat y2, - VGfloat depth); -void renderer_draw_texture(struct renderer *, - struct pipe_resource *texture, - VGfloat x1offset, VGfloat y1offset, - VGfloat x2offset, VGfloat y2offset, - VGfloat x1, VGfloat y1, - VGfloat x2, VGfloat y2); +void renderer_validate(struct renderer *renderer, + VGbitfield dirty, + const struct st_framebuffer *stfb, + const struct vg_state *state); + +void renderer_validate_for_shader(struct renderer *renderer, + const struct pipe_sampler_state **samplers, + struct pipe_sampler_view **views, + VGint num_samplers, + const struct matrix *modelview, + void *fs, + const void *const_buffer, + VGint const_buffer_len); + +void renderer_validate_for_mask_rendering(struct renderer *renderer, + struct pipe_surface *dst, + const struct matrix *modelview); + +VGboolean renderer_copy_begin(struct renderer *renderer, + struct pipe_surface *dst, + VGboolean y0_top, + struct pipe_sampler_view *src); + +void renderer_copy(struct renderer *renderer, + VGint x, VGint y, VGint w, VGint h, + VGint sx, VGint sy, VGint sw, VGint sh); + +void renderer_copy_end(struct renderer *renderer); + +VGboolean renderer_drawtex_begin(struct renderer *renderer, + struct pipe_sampler_view *src); + +void renderer_drawtex(struct renderer *renderer, + VGint x, VGint y, VGint w, VGint h, + VGint sx, VGint sy, VGint sw, VGint sh); + +void renderer_drawtex_end(struct renderer *renderer); + +VGboolean renderer_scissor_begin(struct renderer *renderer, + VGboolean restore_dsa); + +void renderer_scissor(struct renderer *renderer, + VGint x, VGint y, VGint width, VGint height); + +void renderer_scissor_end(struct renderer *renderer); + +VGboolean renderer_clear_begin(struct renderer *renderer); + +void renderer_clear(struct renderer *renderer, + VGint x, VGint y, VGint width, VGint height, + const VGfloat color[4]); + +void renderer_clear_end(struct renderer *renderer); + +VGboolean renderer_filter_begin(struct renderer *renderer, + struct pipe_resource *dst, + VGboolean y0_top, + VGbitfield channel_mask, + const struct pipe_sampler_state **samplers, + struct pipe_sampler_view **views, + VGint num_samplers, + void *fs, + const void *const_buffer, + VGint const_buffer_len); + +void renderer_filter(struct renderer *renderer, + VGint x, VGint y, VGint w, VGint h, + VGint sx, VGint sy, VGint sw, VGint sh); + +void renderer_filter_end(struct renderer *renderer); + +VGboolean renderer_polygon_stencil_begin(struct renderer *renderer, + struct pipe_vertex_element *velem, + VGFillRule rule, + VGboolean restore_dsa); + +void renderer_polygon_stencil(struct renderer *renderer, + struct pipe_vertex_buffer *vbuf, + VGuint mode, VGuint start, VGuint count); + +void renderer_polygon_stencil_end(struct renderer *renderer); + +VGboolean renderer_polygon_fill_begin(struct renderer *renderer, + VGboolean save_dsa); + +void renderer_polygon_fill(struct renderer *renderer, + VGfloat min_x, VGfloat min_y, + VGfloat max_x, VGfloat max_y); + +void renderer_polygon_fill_end(struct renderer *renderer); + void renderer_texture_quad(struct renderer *, struct pipe_resource *texture, VGfloat x1offset, VGfloat y1offset, @@ -57,13 +145,7 @@ void renderer_texture_quad(struct renderer *, VGfloat x2, VGfloat y2, VGfloat x3, VGfloat y3, VGfloat x4, VGfloat y4); -void renderer_copy_texture(struct renderer *r, - struct pipe_sampler_view *src, - VGfloat sx1, VGfloat sy1, - VGfloat sx2, VGfloat sy2, - struct pipe_resource *dst, - VGfloat dx1, VGfloat dy1, - VGfloat dx2, VGfloat dy2); + void renderer_copy_surface(struct renderer *r, struct pipe_surface *src, int sx1, int sy1, diff --git a/src/gallium/state_trackers/vega/shader.c b/src/gallium/state_trackers/vega/shader.c index eab1349639c..2a6bae8a630 100644 --- a/src/gallium/state_trackers/vega/shader.c +++ b/src/gallium/state_trackers/vega/shader.c @@ -31,22 +31,27 @@ #include "paint.h" #include "mask.h" #include "image.h" +#include "renderer.h" #include "pipe/p_context.h" -#include "pipe/p_screen.h" #include "pipe/p_state.h" -#include "util/u_inlines.h" #include "util/u_memory.h" +#include "util/u_math.h" +#include "util/u_format.h" -#define MAX_CONSTANTS 20 +#define MAX_CONSTANTS 28 struct shader { struct vg_context *context; + VGboolean color_transform; VGboolean masking; struct vg_paint *paint; struct vg_image *image; + struct matrix modelview; + struct matrix paint_matrix; + VGboolean drawing_image; VGImageMode image_mode; @@ -71,6 +76,11 @@ void shader_destroy(struct shader *shader) FREE(shader); } +void shader_set_color_transform(struct shader *shader, VGboolean set) +{ + shader->color_transform = set; +} + void shader_set_masking(struct shader *shader, VGboolean set) { shader->masking = set; @@ -91,49 +101,64 @@ struct vg_paint * shader_paint(struct shader *shader) return shader->paint; } - -static void setup_constant_buffer(struct shader *shader) +static VGint setup_constant_buffer(struct shader *shader) { - struct vg_context *ctx = shader->context; - struct pipe_context *pipe = shader->context->pipe; - struct pipe_resource **cbuf = &shader->cbuf; + const struct vg_state *state = &shader->context->state.vg; VGint param_bytes = paint_constant_buffer_size(shader->paint); - float temp_buf[MAX_CONSTANTS]; + VGint i; + + param_bytes += sizeof(VGfloat) * 8; + assert(param_bytes <= sizeof(shader->constants)); + + if (state->color_transform) { + for (i = 0; i < 8; i++) { + VGfloat val = (i < 4) ? 127.0f : 1.0f; + shader->constants[i] = + CLAMP(state->color_transform_values[i], -val, val); + } + } + else { + memset(shader->constants, 0, sizeof(VGfloat) * 8); + } - assert(param_bytes <= sizeof(temp_buf)); - paint_fill_constant_buffer(shader->paint, temp_buf); + paint_fill_constant_buffer(shader->paint, + &shader->paint_matrix, shader->constants + 8); - if (*cbuf == NULL || - memcmp(temp_buf, shader->constants, param_bytes) != 0) - { - pipe_resource_reference(cbuf, NULL); + return param_bytes; +} + +static VGboolean blend_use_shader(struct vg_context *ctx) +{ + VGboolean advanced_blending; - memcpy(shader->constants, temp_buf, param_bytes); - *cbuf = pipe_user_buffer_create(pipe->screen, - &shader->constants, - sizeof(shader->constants), - PIPE_BIND_VERTEX_BUFFER); + switch (ctx->state.vg.blend_mode) { + case VG_BLEND_SRC_OVER: + advanced_blending = + util_format_has_alpha(ctx->draw_buffer->strb->format); + break; + case VG_BLEND_DST_OVER: + case VG_BLEND_MULTIPLY: + case VG_BLEND_SCREEN: + case VG_BLEND_DARKEN: + case VG_BLEND_LIGHTEN: + case VG_BLEND_ADDITIVE: + advanced_blending = VG_TRUE; + break; + default: + advanced_blending = VG_FALSE; + break; } - ctx->pipe->set_constant_buffer(ctx->pipe, PIPE_SHADER_FRAGMENT, 0, *cbuf); + return advanced_blending; } static VGint blend_bind_samplers(struct vg_context *ctx, struct pipe_sampler_state **samplers, struct pipe_sampler_view **sampler_views) { - VGBlendMode bmode = ctx->state.vg.blend_mode; - - if (bmode == VG_BLEND_MULTIPLY || - bmode == VG_BLEND_SCREEN || - bmode == VG_BLEND_DARKEN || - bmode == VG_BLEND_LIGHTEN) { - struct st_framebuffer *stfb = ctx->draw_buffer; - - vg_prepare_blend_surface(ctx); - + if (blend_use_shader(ctx)) { samplers[2] = &ctx->blend_sampler; - sampler_views[2] = stfb->blend_texture_view; + sampler_views[2] = vg_prepare_blend_surface(ctx); if (!samplers[0] || !sampler_views[0]) { samplers[0] = samplers[2]; @@ -149,10 +174,10 @@ static VGint blend_bind_samplers(struct vg_context *ctx, return 0; } -static void setup_samplers(struct shader *shader) +static VGint setup_samplers(struct shader *shader, + struct pipe_sampler_state **samplers, + struct pipe_sampler_view **sampler_views) { - struct pipe_sampler_state *samplers[PIPE_MAX_SAMPLERS]; - struct pipe_sampler_view *sampler_views[PIPE_MAX_SAMPLERS]; struct vg_context *ctx = shader->context; /* a little wonky: we use the num as a boolean that just says * whether any sampler/textures have been set. the actual numbering @@ -179,10 +204,7 @@ static void setup_samplers(struct shader *shader) if (shader->drawing_image && shader->image) num += image_bind_samplers(shader->image, samplers, sampler_views); - if (num) { - cso_set_samplers(ctx->cso_context, 4, (const struct pipe_sampler_state **)samplers); - cso_set_fragment_sampler_views(ctx->cso_context, 4, sampler_views); - } + return (num) ? 4 : 0; } static INLINE VGboolean is_format_bw(struct shader *shader) @@ -227,6 +249,9 @@ static void setup_shader_program(struct shader *shader) default: abort(); } + + if (paint_is_degenerate(shader->paint)) + shader_id = VEGA_PAINT_DEGENERATE_SHADER; } /* second stage image */ @@ -246,43 +271,86 @@ static void setup_shader_program(struct shader *shader) } } - if (shader->masking) - shader_id |= VEGA_MASK_SHADER; + if (shader->color_transform) + shader_id |= VEGA_COLOR_TRANSFORM_SHADER; - switch(blend_mode) { - case VG_BLEND_MULTIPLY: - shader_id |= VEGA_BLEND_MULTIPLY_SHADER; - break; - case VG_BLEND_SCREEN: - shader_id |= VEGA_BLEND_SCREEN_SHADER; - break; - case VG_BLEND_DARKEN: - shader_id |= VEGA_BLEND_DARKEN_SHADER; - break; - case VG_BLEND_LIGHTEN: - shader_id |= VEGA_BLEND_LIGHTEN_SHADER; - break; - default: - /* handled by pipe_blend_state */ - break; + if (blend_use_shader(ctx)) { + if (shader->drawing_image && shader->image_mode == VG_DRAW_IMAGE_STENCIL) + shader_id |= VEGA_ALPHA_PER_CHANNEL_SHADER; + else + shader_id |= VEGA_ALPHA_NORMAL_SHADER; + + switch(blend_mode) { + case VG_BLEND_SRC: + shader_id |= VEGA_BLEND_SRC_SHADER; + break; + case VG_BLEND_SRC_OVER: + shader_id |= VEGA_BLEND_SRC_OVER_SHADER; + break; + case VG_BLEND_DST_OVER: + shader_id |= VEGA_BLEND_DST_OVER_SHADER; + break; + case VG_BLEND_SRC_IN: + shader_id |= VEGA_BLEND_SRC_IN_SHADER; + break; + case VG_BLEND_DST_IN: + shader_id |= VEGA_BLEND_DST_IN_SHADER; + break; + case VG_BLEND_MULTIPLY: + shader_id |= VEGA_BLEND_MULTIPLY_SHADER; + break; + case VG_BLEND_SCREEN: + shader_id |= VEGA_BLEND_SCREEN_SHADER; + break; + case VG_BLEND_DARKEN: + shader_id |= VEGA_BLEND_DARKEN_SHADER; + break; + case VG_BLEND_LIGHTEN: + shader_id |= VEGA_BLEND_LIGHTEN_SHADER; + break; + case VG_BLEND_ADDITIVE: + shader_id |= VEGA_BLEND_ADDITIVE_SHADER; + break; + default: + assert(0); + break; + } + } + else { + /* update alpha of the source */ + if (shader->drawing_image && shader->image_mode == VG_DRAW_IMAGE_STENCIL) + shader_id |= VEGA_ALPHA_PER_CHANNEL_SHADER; } + if (shader->masking) + shader_id |= VEGA_MASK_SHADER; + if (black_white) shader_id |= VEGA_BW_SHADER; shader->fs = shaders_cache_fill(ctx->sc, shader_id); - cso_set_fragment_shader_handle(ctx->cso_context, shader->fs); } void shader_bind(struct shader *shader) { + struct vg_context *ctx = shader->context; + struct pipe_sampler_state *samplers[PIPE_MAX_SAMPLERS]; + struct pipe_sampler_view *sampler_views[PIPE_MAX_SAMPLERS]; + VGint num_samplers, param_bytes; + /* first resolve the real paint type */ paint_resolve_type(shader->paint); - setup_constant_buffer(shader); - setup_samplers(shader); + num_samplers = setup_samplers(shader, samplers, sampler_views); + param_bytes = setup_constant_buffer(shader); setup_shader_program(shader); + + renderer_validate_for_shader(ctx->renderer, + (const struct pipe_sampler_state **) samplers, + sampler_views, num_samplers, + &shader->modelview, + shader->fs, (const void *) shader->constants, param_bytes); } void shader_set_image_mode(struct shader *shader, VGImageMode image_mode) @@ -309,3 +377,28 @@ void shader_set_image(struct shader *shader, struct vg_image *img) { shader->image = img; } + +/** + * Set the transformation to map a vertex to the surface coordinates. + */ +void shader_set_surface_matrix(struct shader *shader, + const struct matrix *mat) +{ + shader->modelview = *mat; +} + +/** + * Set the transformation to map a pixel to the paint coordinates. + */ +void shader_set_paint_matrix(struct shader *shader, const struct matrix *mat) +{ + const struct st_framebuffer *stfb = shader->context->draw_buffer; + const VGfloat px_center_offset = 0.5f; + + memcpy(&shader->paint_matrix, mat, sizeof(*mat)); + + /* make it window-to-paint for the shaders */ + matrix_translate(&shader->paint_matrix, px_center_offset, + stfb->height - 1.0f + px_center_offset); + matrix_scale(&shader->paint_matrix, 1.0f, -1.0f); +} diff --git a/src/gallium/state_trackers/vega/shader.h b/src/gallium/state_trackers/vega/shader.h index 847eee6a310..8b97e537efe 100644 --- a/src/gallium/state_trackers/vega/shader.h +++ b/src/gallium/state_trackers/vega/shader.h @@ -33,10 +33,13 @@ struct shader; struct vg_paint; struct vg_context; struct vg_image; +struct matrix; struct shader *shader_create(struct vg_context *context); void shader_destroy(struct shader *shader); +void shader_set_color_transform(struct shader *shader, VGboolean set); + void shader_set_masking(struct shader *shader, VGboolean set); VGboolean shader_is_masking(struct shader *shader); @@ -51,6 +54,10 @@ VGboolean shader_drawing_image(struct shader *shader); void shader_set_image(struct shader *shader, struct vg_image *img); +void shader_set_surface_matrix(struct shader *shader, + const struct matrix *mat); +void shader_set_paint_matrix(struct shader *shader, const struct matrix *mat); + void shader_bind(struct shader *shader); #endif diff --git a/src/gallium/state_trackers/vega/shaders_cache.c b/src/gallium/state_trackers/vega/shaders_cache.c index e002a7ed428..023996ce2d8 100644 --- a/src/gallium/state_trackers/vega/shaders_cache.c +++ b/src/gallium/state_trackers/vega/shaders_cache.c @@ -50,14 +50,16 @@ /* Essentially we construct an ubber-shader based on the state * of the pipeline. The stages are: - * 1) Fill (mandatory, solid color/gradient/pattern/image draw) - * 2) Image composition (image mode multiply and stencil) - * 3) Mask - * 4) Extended blend (multiply/screen/darken/lighten) - * 5) Premultiply/Unpremultiply - * 6) Color transform (to black and white) + * 1) Paint generation (color/gradient/pattern) + * 2) Image composition (normal/multiply/stencil) + * 3) Color transform + * 4) Per-channel alpha generation + * 5) Extended blend (multiply/screen/darken/lighten) + * 6) Mask + * 7) Premultiply/Unpremultiply + * 8) Color transform (to black and white) */ -#define SHADER_STAGES 6 +#define SHADER_STAGES 8 struct cached_shader { void *driver_shader; @@ -86,13 +88,6 @@ static INLINE struct tgsi_token *tokens_from_assembly(const char *txt, int num_t return tokens; } -#define ALL_FILLS (VEGA_SOLID_FILL_SHADER | \ - VEGA_LINEAR_GRADIENT_SHADER | \ - VEGA_RADIAL_GRADIENT_SHADER | \ - VEGA_PATTERN_SHADER | \ - VEGA_IMAGE_NORMAL_SHADER) - - /* static const char max_shader_preamble[] = "FRAG\n" @@ -257,98 +252,126 @@ create_shader(struct pipe_context *pipe, int id, struct pipe_shader_state *shader) { - int idx = 0; + int idx = 0, sh; const struct shader_asm_info * shaders[SHADER_STAGES]; - /* the shader has to have a fill */ - debug_assert(id & ALL_FILLS); - /* first stage */ - if (id & VEGA_SOLID_FILL_SHADER) { - debug_assert(idx == 0); - shaders[idx] = &shaders_asm[0]; - debug_assert(shaders_asm[0].id == VEGA_SOLID_FILL_SHADER); - ++idx; - } - if ((id & VEGA_LINEAR_GRADIENT_SHADER)) { - debug_assert(idx == 0); - shaders[idx] = &shaders_asm[1]; - debug_assert(shaders_asm[1].id == VEGA_LINEAR_GRADIENT_SHADER); - ++idx; - } - if ((id & VEGA_RADIAL_GRADIENT_SHADER)) { - debug_assert(idx == 0); - shaders[idx] = &shaders_asm[2]; - debug_assert(shaders_asm[2].id == VEGA_RADIAL_GRADIENT_SHADER); - ++idx; - } - if ((id & VEGA_PATTERN_SHADER)) { - debug_assert(idx == 0); - debug_assert(shaders_asm[3].id == VEGA_PATTERN_SHADER); - shaders[idx] = &shaders_asm[3]; - ++idx; - } - if ((id & VEGA_IMAGE_NORMAL_SHADER)) { - debug_assert(idx == 0); - debug_assert(shaders_asm[4].id == VEGA_IMAGE_NORMAL_SHADER); - shaders[idx] = &shaders_asm[4]; - ++idx; + sh = SHADERS_GET_PAINT_SHADER(id); + switch (sh << SHADERS_PAINT_SHIFT) { + case VEGA_SOLID_FILL_SHADER: + case VEGA_LINEAR_GRADIENT_SHADER: + case VEGA_RADIAL_GRADIENT_SHADER: + case VEGA_PATTERN_SHADER: + case VEGA_PAINT_DEGENERATE_SHADER: + shaders[idx] = &shaders_paint_asm[(sh >> SHADERS_PAINT_SHIFT) - 1]; + assert(shaders[idx]->id == sh); + idx++; + break; + default: + break; } /* second stage */ - if ((id & VEGA_IMAGE_MULTIPLY_SHADER)) { - debug_assert(shaders_asm[5].id == VEGA_IMAGE_MULTIPLY_SHADER); - shaders[idx] = &shaders_asm[5]; - ++idx; - } else if ((id & VEGA_IMAGE_STENCIL_SHADER)) { - debug_assert(shaders_asm[6].id == VEGA_IMAGE_STENCIL_SHADER); - shaders[idx] = &shaders_asm[6]; - ++idx; + sh = SHADERS_GET_IMAGE_SHADER(id); + switch (sh) { + case VEGA_IMAGE_NORMAL_SHADER: + case VEGA_IMAGE_MULTIPLY_SHADER: + case VEGA_IMAGE_STENCIL_SHADER: + shaders[idx] = &shaders_image_asm[(sh >> SHADERS_IMAGE_SHIFT) - 1]; + assert(shaders[idx]->id == sh); + idx++; + break; + default: + break; } + /* sanity check */ + assert(idx == ((!sh || sh == VEGA_IMAGE_NORMAL_SHADER) ? 1 : 2)); + /* third stage */ - if ((id & VEGA_MASK_SHADER)) { - debug_assert(idx == 1); - debug_assert(shaders_asm[7].id == VEGA_MASK_SHADER); - shaders[idx] = &shaders_asm[7]; - ++idx; + sh = SHADERS_GET_COLOR_TRANSFORM_SHADER(id); + switch (sh) { + case VEGA_COLOR_TRANSFORM_SHADER: + shaders[idx] = &shaders_color_transform_asm[ + (sh >> SHADERS_COLOR_TRANSFORM_SHIFT) - 1]; + assert(shaders[idx]->id == sh); + idx++; + break; + default: + break; } /* fourth stage */ - if ((id & VEGA_BLEND_MULTIPLY_SHADER)) { - debug_assert(shaders_asm[8].id == VEGA_BLEND_MULTIPLY_SHADER); - shaders[idx] = &shaders_asm[8]; - ++idx; - } else if ((id & VEGA_BLEND_SCREEN_SHADER)) { - debug_assert(shaders_asm[9].id == VEGA_BLEND_SCREEN_SHADER); - shaders[idx] = &shaders_asm[9]; - ++idx; - } else if ((id & VEGA_BLEND_DARKEN_SHADER)) { - debug_assert(shaders_asm[10].id == VEGA_BLEND_DARKEN_SHADER); - shaders[idx] = &shaders_asm[10]; - ++idx; - } else if ((id & VEGA_BLEND_LIGHTEN_SHADER)) { - debug_assert(shaders_asm[11].id == VEGA_BLEND_LIGHTEN_SHADER); - shaders[idx] = &shaders_asm[11]; - ++idx; + sh = SHADERS_GET_ALPHA_SHADER(id); + switch (sh) { + case VEGA_ALPHA_NORMAL_SHADER: + case VEGA_ALPHA_PER_CHANNEL_SHADER: + shaders[idx] = &shaders_alpha_asm[ + (sh >> SHADERS_ALPHA_SHIFT) - 1]; + assert(shaders[idx]->id == sh); + idx++; + break; + default: + break; } /* fifth stage */ - if ((id & VEGA_PREMULTIPLY_SHADER)) { - debug_assert(shaders_asm[12].id == VEGA_PREMULTIPLY_SHADER); - shaders[idx] = &shaders_asm[12]; - ++idx; - } else if ((id & VEGA_UNPREMULTIPLY_SHADER)) { - debug_assert(shaders_asm[13].id == VEGA_UNPREMULTIPLY_SHADER); - shaders[idx] = &shaders_asm[13]; - ++idx; + sh = SHADERS_GET_BLEND_SHADER(id); + switch (sh) { + case VEGA_BLEND_SRC_SHADER: + case VEGA_BLEND_SRC_OVER_SHADER: + case VEGA_BLEND_DST_OVER_SHADER: + case VEGA_BLEND_SRC_IN_SHADER: + case VEGA_BLEND_DST_IN_SHADER: + case VEGA_BLEND_MULTIPLY_SHADER: + case VEGA_BLEND_SCREEN_SHADER: + case VEGA_BLEND_DARKEN_SHADER: + case VEGA_BLEND_LIGHTEN_SHADER: + case VEGA_BLEND_ADDITIVE_SHADER: + shaders[idx] = &shaders_blend_asm[(sh >> SHADERS_BLEND_SHIFT) - 1]; + assert(shaders[idx]->id == sh); + idx++; + break; + default: + break; } /* sixth stage */ - if ((id & VEGA_BW_SHADER)) { - debug_assert(shaders_asm[14].id == VEGA_BW_SHADER); - shaders[idx] = &shaders_asm[14]; - ++idx; + sh = SHADERS_GET_MASK_SHADER(id); + switch (sh) { + case VEGA_MASK_SHADER: + shaders[idx] = &shaders_mask_asm[(sh >> SHADERS_MASK_SHIFT) - 1]; + assert(shaders[idx]->id == sh); + idx++; + break; + default: + break; + } + + /* seventh stage */ + sh = SHADERS_GET_PREMULTIPLY_SHADER(id); + switch (sh) { + case VEGA_PREMULTIPLY_SHADER: + case VEGA_UNPREMULTIPLY_SHADER: + shaders[idx] = &shaders_premultiply_asm[ + (sh >> SHADERS_PREMULTIPLY_SHIFT) - 1]; + assert(shaders[idx]->id == sh); + idx++; + break; + default: + break; + } + + /* eighth stage */ + sh = SHADERS_GET_BW_SHADER(id); + switch (sh) { + case VEGA_BW_SHADER: + shaders[idx] = &shaders_bw_asm[(sh >> SHADERS_BW_SHIFT) - 1]; + assert(shaders[idx]->id == sh); + idx++; + break; + default: + break; } return combine_shaders(shaders, idx, pipe, shader); diff --git a/src/gallium/state_trackers/vega/shaders_cache.h b/src/gallium/state_trackers/vega/shaders_cache.h index feca58b61a9..05014f25dcc 100644 --- a/src/gallium/state_trackers/vega/shaders_cache.h +++ b/src/gallium/state_trackers/vega/shaders_cache.h @@ -33,26 +33,69 @@ struct pipe_context; struct tgsi_token; struct shaders_cache; +#define _SHADERS_PAINT_BITS 3 +#define _SHADERS_IMAGE_BITS 2 +#define _SHADERS_COLOR_TRANSFORM_BITS 1 +#define _SHADERS_ALPHA_BITS 2 +#define _SHADERS_BLEND_BITS 4 +#define _SHADERS_MASK_BITS 1 +#define _SHADERS_PREMULTIPLY_BITS 2 +#define _SHADERS_BW_BITS 1 + +#define SHADERS_PAINT_SHIFT (0) +#define SHADERS_IMAGE_SHIFT (SHADERS_PAINT_SHIFT + _SHADERS_PAINT_BITS) +#define SHADERS_COLOR_TRANSFORM_SHIFT (SHADERS_IMAGE_SHIFT + _SHADERS_IMAGE_BITS) +#define SHADERS_ALPHA_SHIFT (SHADERS_COLOR_TRANSFORM_SHIFT + _SHADERS_COLOR_TRANSFORM_BITS) +#define SHADERS_BLEND_SHIFT (SHADERS_ALPHA_SHIFT + _SHADERS_ALPHA_BITS) +#define SHADERS_MASK_SHIFT (SHADERS_BLEND_SHIFT + _SHADERS_BLEND_BITS) +#define SHADERS_PREMULTIPLY_SHIFT (SHADERS_MASK_SHIFT + _SHADERS_MASK_BITS) +#define SHADERS_BW_SHIFT (SHADERS_PREMULTIPLY_SHIFT + _SHADERS_PREMULTIPLY_BITS) + +#define _SHADERS_GET_STAGE(stage, id) \ + ((id) & (((1 << _SHADERS_ ## stage ## _BITS) - 1) << SHADERS_ ## stage ## _SHIFT)) + +#define SHADERS_GET_PAINT_SHADER(id) _SHADERS_GET_STAGE(PAINT, id) +#define SHADERS_GET_IMAGE_SHADER(id) _SHADERS_GET_STAGE(IMAGE, id) +#define SHADERS_GET_COLOR_TRANSFORM_SHADER(id) _SHADERS_GET_STAGE(COLOR_TRANSFORM, id) +#define SHADERS_GET_ALPHA_SHADER(id) _SHADERS_GET_STAGE(ALPHA, id) +#define SHADERS_GET_BLEND_SHADER(id) _SHADERS_GET_STAGE(BLEND, id) +#define SHADERS_GET_MASK_SHADER(id) _SHADERS_GET_STAGE(MASK, id) +#define SHADERS_GET_PREMULTIPLY_SHADER(id) _SHADERS_GET_STAGE(PREMULTIPLY, id) +#define SHADERS_GET_BW_SHADER(id) _SHADERS_GET_STAGE(BW, id) + enum VegaShaderType { - VEGA_SOLID_FILL_SHADER = 1 << 0, - VEGA_LINEAR_GRADIENT_SHADER = 1 << 1, - VEGA_RADIAL_GRADIENT_SHADER = 1 << 2, - VEGA_PATTERN_SHADER = 1 << 3, - VEGA_IMAGE_NORMAL_SHADER = 1 << 4, - VEGA_IMAGE_MULTIPLY_SHADER = 1 << 5, - VEGA_IMAGE_STENCIL_SHADER = 1 << 6, + VEGA_SOLID_FILL_SHADER = 1 << SHADERS_PAINT_SHIFT, + VEGA_LINEAR_GRADIENT_SHADER = 2 << SHADERS_PAINT_SHIFT, + VEGA_RADIAL_GRADIENT_SHADER = 3 << SHADERS_PAINT_SHIFT, + VEGA_PATTERN_SHADER = 4 << SHADERS_PAINT_SHIFT, + VEGA_PAINT_DEGENERATE_SHADER = 5 << SHADERS_PAINT_SHIFT, + + VEGA_IMAGE_NORMAL_SHADER = 1 << SHADERS_IMAGE_SHIFT, + VEGA_IMAGE_MULTIPLY_SHADER = 2 << SHADERS_IMAGE_SHIFT, + VEGA_IMAGE_STENCIL_SHADER = 3 << SHADERS_IMAGE_SHIFT, + + VEGA_COLOR_TRANSFORM_SHADER = 1 << SHADERS_COLOR_TRANSFORM_SHIFT, + + VEGA_ALPHA_NORMAL_SHADER = 1 << SHADERS_ALPHA_SHIFT, + VEGA_ALPHA_PER_CHANNEL_SHADER = 2 << SHADERS_ALPHA_SHIFT, - VEGA_MASK_SHADER = 1 << 7, + VEGA_BLEND_SRC_SHADER = 1 << SHADERS_BLEND_SHIFT, + VEGA_BLEND_SRC_OVER_SHADER = 2 << SHADERS_BLEND_SHIFT, + VEGA_BLEND_DST_OVER_SHADER = 3 << SHADERS_BLEND_SHIFT, + VEGA_BLEND_SRC_IN_SHADER = 4 << SHADERS_BLEND_SHIFT, + VEGA_BLEND_DST_IN_SHADER = 5 << SHADERS_BLEND_SHIFT, + VEGA_BLEND_MULTIPLY_SHADER = 6 << SHADERS_BLEND_SHIFT, + VEGA_BLEND_SCREEN_SHADER = 7 << SHADERS_BLEND_SHIFT, + VEGA_BLEND_DARKEN_SHADER = 8 << SHADERS_BLEND_SHIFT, + VEGA_BLEND_LIGHTEN_SHADER = 9 << SHADERS_BLEND_SHIFT, + VEGA_BLEND_ADDITIVE_SHADER = 10<< SHADERS_BLEND_SHIFT, - VEGA_BLEND_MULTIPLY_SHADER = 1 << 8, - VEGA_BLEND_SCREEN_SHADER = 1 << 9, - VEGA_BLEND_DARKEN_SHADER = 1 << 10, - VEGA_BLEND_LIGHTEN_SHADER = 1 << 11, + VEGA_MASK_SHADER = 1 << SHADERS_MASK_SHIFT, - VEGA_PREMULTIPLY_SHADER = 1 << 12, - VEGA_UNPREMULTIPLY_SHADER = 1 << 13, + VEGA_PREMULTIPLY_SHADER = 1 << SHADERS_PREMULTIPLY_SHIFT, + VEGA_UNPREMULTIPLY_SHADER = 2 << SHADERS_PREMULTIPLY_SHIFT, - VEGA_BW_SHADER = 1 << 14 + VEGA_BW_SHADER = 1 << SHADERS_BW_SHIFT }; struct vg_shader { diff --git a/src/gallium/state_trackers/vega/st_inlines.h b/src/gallium/state_trackers/vega/st_inlines.h deleted file mode 100644 index 7eaa67c76ae..00000000000 --- a/src/gallium/state_trackers/vega/st_inlines.h +++ /dev/null @@ -1,122 +0,0 @@ -/************************************************************************** - * - * Copyright 2009 VMware, Inc. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -/** - * Functions for checking if buffers/textures are referenced when we need - * to read/write from/to them. Flush when needed. - */ - -#ifndef ST_INLINES_H -#define ST_INLINES_H - -#include "vg_context.h" - -#include "pipe/p_context.h" -#include "pipe/p_screen.h" -#include "pipe/p_defines.h" -#include "util/u_inlines.h" -#include "pipe/p_state.h" - -static INLINE struct pipe_transfer * -st_cond_flush_get_transfer(struct vg_context *st, - struct pipe_resource *pt, - unsigned int face, - unsigned int level, - unsigned int zslice, - enum pipe_transfer_usage usage, - unsigned int x, unsigned int y, - unsigned int w, unsigned int h) -{ - struct pipe_context *pipe = st->pipe; - - return pipe_get_transfer(pipe, pt, face, level, zslice, usage, - x, y, w, h); -} - -static INLINE struct pipe_transfer * -st_no_flush_get_transfer(struct vg_context *st, - struct pipe_resource *pt, - unsigned int face, - unsigned int level, - unsigned int zslice, - enum pipe_transfer_usage usage, - unsigned int x, unsigned int y, - unsigned int w, unsigned int h) -{ - struct pipe_context *pipe = st->pipe; - - return pipe_get_transfer(pipe, pt, face, level, - zslice, usage, x, y, w, h); -} - - -static INLINE void -st_cond_flush_pipe_buffer_write(struct vg_context *st, - struct pipe_resource *buf, - unsigned int offset, - unsigned int size, - const void * data) -{ - struct pipe_context *pipe = st->pipe; - - pipe_buffer_write(pipe, buf, offset, size, data); -} - -static INLINE void -st_no_flush_pipe_buffer_write(struct vg_context *st, - struct pipe_resource *buf, - unsigned int offset, - unsigned int size, - const void * data) -{ - pipe_buffer_write(st->pipe, buf, offset, size, data); -} - -static INLINE void -st_cond_flush_pipe_buffer_read(struct vg_context *st, - struct pipe_resource *buf, - unsigned int offset, - unsigned int size, - void * data) -{ - struct pipe_context *pipe = st->pipe; - - pipe_buffer_read(pipe, buf, offset, size, data); -} - -static INLINE void -st_no_flush_pipe_buffer_read(struct vg_context *st, - struct pipe_resource *buf, - unsigned int offset, - unsigned int size, - void * data) -{ - pipe_buffer_read(st->pipe, buf, offset, size, data); -} - -#endif - diff --git a/src/gallium/state_trackers/vega/text.c b/src/gallium/state_trackers/vega/text.c new file mode 100644 index 00000000000..6714ee9ad35 --- /dev/null +++ b/src/gallium/state_trackers/vega/text.c @@ -0,0 +1,249 @@ +/************************************************************************** + * + * Copyright 2010 LunarG, Inc. All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include "util/u_memory.h" +#include "cso_cache/cso_hash.h" + +#include "text.h" +#include "image.h" +#include "path.h" + +#ifdef OPENVG_VERSION_1_1 + +struct vg_font { + struct vg_object base; + struct cso_hash *glyphs; +}; + +struct vg_glyph { + struct vg_object *object; /* it could be NULL */ + VGboolean is_hinted; + VGfloat glyph_origin[2]; + VGfloat escapement[2]; +}; + +static VGboolean del_glyph(struct vg_font *font, + VGuint glyphIndex) +{ + struct vg_glyph *glyph; + + glyph = (struct vg_glyph *) + cso_hash_take(font->glyphs, (unsigned) glyphIndex); + if (glyph) + FREE(glyph); + + return (glyph != NULL); +} + +static void add_glyph(struct vg_font *font, + VGuint glyphIndex, + struct vg_object *obj, + VGboolean isHinted, + const VGfloat glyphOrigin[2], + const VGfloat escapement[2]) +{ + struct vg_glyph *glyph; + + /* remove the existing one */ + del_glyph(font, glyphIndex); + + glyph = CALLOC_STRUCT(vg_glyph); + glyph->object = obj; + glyph->is_hinted = isHinted; + memcpy(glyph->glyph_origin, glyphOrigin, sizeof(glyphOrigin)); + memcpy(glyph->escapement, escapement, sizeof(escapement)); + + cso_hash_insert(font->glyphs, (unsigned) glyphIndex, glyph); +} + +static struct vg_glyph *get_glyph(struct vg_font *font, + VGuint glyphIndex) +{ + struct cso_hash_iter iter; + + iter = cso_hash_find(font->glyphs, (unsigned) glyphIndex); + return (struct vg_glyph *) cso_hash_iter_data(iter); +} + +static void vg_render_glyph(struct vg_context *ctx, + struct vg_glyph *glyph, + VGbitfield paintModes, + VGboolean allowAutoHinting) +{ + if (glyph->object && paintModes) { + struct vg_state *state = &ctx->state.vg; + struct matrix m; + + m = state->glyph_user_to_surface_matrix; + matrix_translate(&m, + state->glyph_origin[0].f - glyph->glyph_origin[0], + state->glyph_origin[1].f - glyph->glyph_origin[1]); + + if (glyph->object->type == VG_OBJECT_PATH) { + path_render((struct path *) glyph->object, paintModes, &m); + } + else { + assert(glyph->object->type == VG_OBJECT_IMAGE); + image_draw((struct vg_image *) glyph->object, &m); + } + } +} + +static void vg_advance_glyph(struct vg_context *ctx, + struct vg_glyph *glyph, + VGfloat adjustment_x, + VGfloat adjustment_y, + VGboolean last) +{ + struct vg_value *glyph_origin = ctx->state.vg.glyph_origin; + + glyph_origin[0].f += glyph->escapement[0] + adjustment_x; + glyph_origin[1].f += glyph->escapement[1] + adjustment_y; + + if (last) { + glyph_origin[0].i = float_to_int_floor(glyph_origin[0].f); + glyph_origin[1].i = float_to_int_floor(glyph_origin[1].f); + } +} + +struct vg_font *font_create(VGint glyphCapacityHint) +{ + struct vg_context *ctx = vg_current_context(); + struct vg_font *font; + + font = CALLOC_STRUCT(vg_font); + vg_init_object(&font->base, ctx, VG_OBJECT_FONT); + font->glyphs = cso_hash_create(); + + vg_context_add_object(ctx, VG_OBJECT_FONT, font); + + return font; +} + +void font_destroy(struct vg_font *font) +{ + struct vg_context *ctx = vg_current_context(); + struct cso_hash_iter iter; + + vg_context_remove_object(ctx, VG_OBJECT_FONT, font); + + iter = cso_hash_first_node(font->glyphs); + while (!cso_hash_iter_is_null(iter)) { + struct vg_glyph *glyph = (struct vg_glyph *) cso_hash_iter_data(iter); + FREE(glyph); + iter = cso_hash_iter_next(iter); + } + cso_hash_delete(font->glyphs); + + FREE(font); +} + +void font_set_glyph_to_path(struct vg_font *font, + VGuint glyphIndex, + struct path *path, + VGboolean isHinted, + const VGfloat glyphOrigin[2], + const VGfloat escapement[2]) +{ + add_glyph(font, glyphIndex, (struct vg_object *) path, + isHinted, glyphOrigin, escapement); +} + +void font_set_glyph_to_image(struct vg_font *font, + VGuint glyphIndex, + struct vg_image *image, + const VGfloat glyphOrigin[2], + const VGfloat escapement[2]) +{ + add_glyph(font, glyphIndex, (struct vg_object *) image, + VG_TRUE, glyphOrigin, escapement); +} + +void font_clear_glyph(struct vg_font *font, + VGuint glyphIndex) +{ + if (!del_glyph(font, glyphIndex)) { + struct vg_context *ctx = vg_current_context(); + vg_set_error(ctx, VG_ILLEGAL_ARGUMENT_ERROR); + } +} + +void font_draw_glyph(struct vg_font *font, + VGuint glyphIndex, + VGbitfield paintModes, + VGboolean allowAutoHinting) +{ + struct vg_context *ctx = vg_current_context(); + struct vg_glyph *glyph; + + glyph = get_glyph(font, glyphIndex); + if (!glyph) { + vg_set_error(ctx, VG_ILLEGAL_ARGUMENT_ERROR); + return; + } + + vg_render_glyph(ctx, glyph, paintModes, allowAutoHinting); + vg_advance_glyph(ctx, glyph, 0.0f, 0.0f, VG_TRUE); +} + +void font_draw_glyphs(struct vg_font *font, + VGint glyphCount, + const VGuint *glyphIndices, + const VGfloat *adjustments_x, + const VGfloat *adjustments_y, + VGbitfield paintModes, + VGboolean allowAutoHinting) +{ + struct vg_context *ctx = vg_current_context(); + VGint i; + + for (i = 0; i < glyphCount; ++i) { + if (!get_glyph(font, glyphIndices[i])) { + vg_set_error(ctx, VG_ILLEGAL_ARGUMENT_ERROR); + return; + } + } + + for (i = 0; i < glyphCount; ++i) { + struct vg_glyph *glyph; + VGfloat adj_x, adj_y; + + glyph = get_glyph(font, glyphIndices[i]); + + vg_render_glyph(ctx, glyph, paintModes, allowAutoHinting); + + adj_x = (adjustments_x) ? adjustments_x[i] : 0.0f; + adj_y = (adjustments_y) ? adjustments_y[i] : 0.0f; + vg_advance_glyph(ctx, glyph, adj_x, adj_y, (i == glyphCount - 1)); + } +} + +VGint font_num_glyphs(struct vg_font *font) +{ + return cso_hash_size(font->glyphs); +} + +#endif /* OPENVG_VERSION_1_1 */ diff --git a/src/gallium/state_trackers/vega/text.h b/src/gallium/state_trackers/vega/text.h new file mode 100644 index 00000000000..6b3fa7323e9 --- /dev/null +++ b/src/gallium/state_trackers/vega/text.h @@ -0,0 +1,71 @@ +/************************************************************************** + * + * Copyright 2010 LunarG, Inc. All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef _TEXT_H +#define _TEXT_H + +#include "vg_context.h" +#include "cso_cache/cso_hash.h" + +struct vg_font; +struct vg_image; +struct path; + +struct vg_font *font_create(VGint glyphCapacityHint); +void font_destroy(struct vg_font *font); + +void font_set_glyph_to_path(struct vg_font *font, + VGuint glyphIndex, + struct path *path, + VGboolean isHinted, + const VGfloat glyphOrigin[2], + const VGfloat escapement[2]); + +void font_set_glyph_to_image(struct vg_font *font, + VGuint glyphIndex, + struct vg_image *image, + const VGfloat glyphOrigin[2], + const VGfloat escapement[2]); + +void font_clear_glyph(struct vg_font *font, + VGuint glyphIndex); + +void font_draw_glyph(struct vg_font *font, + VGuint glyphIndex, + VGbitfield paintModes, + VGboolean allowAutoHinting); + +void font_draw_glyphs(struct vg_font *font, + VGint glyphCount, + const VGuint *glyphIndices, + const VGfloat *adjustments_x, + const VGfloat *adjustments_y, + VGbitfield paintModes, + VGboolean allowAutoHinting); + +VGint font_num_glyphs(struct vg_font *font); + +#endif /* _TEXT_H */ diff --git a/src/gallium/state_trackers/vega/vg_context.c b/src/gallium/state_trackers/vega/vg_context.c index 99e444affdf..f36f55d6c89 100644 --- a/src/gallium/state_trackers/vega/vg_context.c +++ b/src/gallium/state_trackers/vega/vg_context.c @@ -30,20 +30,20 @@ #include "renderer.h" #include "shaders_cache.h" #include "shader.h" -#include "asm_util.h" -#include "st_inlines.h" #include "vg_manager.h" #include "api.h" +#include "mask.h" #include "pipe/p_context.h" #include "util/u_inlines.h" #include "cso_cache/cso_context.h" -#include "util/u_simple_shaders.h" #include "util/u_memory.h" #include "util/u_blit.h" #include "util/u_sampler.h" +#include "util/u_surface.h" +#include "util/u_format.h" struct vg_context *_vg_context = 0; @@ -52,19 +52,6 @@ struct vg_context * vg_current_context(void) return _vg_context; } -static void init_clear(struct vg_context *st) -{ - struct pipe_context *pipe = st->pipe; - - /* rasterizer state: bypass clipping */ - memset(&st->clear.raster, 0, sizeof(st->clear.raster)); - st->clear.raster.gl_rasterization_rules = 1; - - /* fragment shader state: color pass-through program */ - st->clear.fs = - util_make_fragment_passthrough_shader(pipe); -} - /** * A depth/stencil rb will be needed regardless of what the visual says. */ @@ -101,7 +88,6 @@ struct vg_context * vg_create_context(struct pipe_context *pipe, struct vg_context *share) { struct vg_context *ctx; - unsigned i; ctx = CALLOC_STRUCT(vg_context); @@ -118,8 +104,6 @@ struct vg_context * vg_create_context(struct pipe_context *pipe, ctx->cso_context = cso_create_context(pipe); - init_clear(ctx); - ctx->default_paint = paint_create(ctx); ctx->state.vg.stroke_paint = ctx->default_paint; ctx->state.vg.fill_paint = ctx->default_paint; @@ -141,13 +125,6 @@ struct vg_context * vg_create_context(struct pipe_context *pipe, ctx->blend_sampler.mag_img_filter = PIPE_TEX_FILTER_NEAREST; ctx->blend_sampler.normalized_coords = 0; - for (i = 0; i < 2; i++) { - ctx->velems[i].src_offset = i * 4 * sizeof(float); - ctx->velems[i].instance_divisor = 0; - ctx->velems[i].vertex_buffer_index = 0; - ctx->velems[i].src_format = PIPE_FORMAT_R32G32B32A32_FLOAT; - } - vg_set_error(ctx, VG_NO_ERROR); ctx->owned_objects[VG_OBJECT_PAINT] = cso_hash_create(); @@ -168,7 +145,6 @@ struct vg_context * vg_create_context(struct pipe_context *pipe, void vg_destroy_context(struct vg_context *ctx) { struct pipe_resource **cbuf = &ctx->mask.cbuf; - struct pipe_resource **vsbuf = &ctx->vs_const_buffer; util_destroy_blit(ctx->blit); renderer_destroy(ctx->renderer); @@ -179,29 +155,6 @@ void vg_destroy_context(struct vg_context *ctx) if (*cbuf) pipe_resource_reference(cbuf, NULL); - if (*vsbuf) - pipe_resource_reference(vsbuf, NULL); - - if (ctx->clear.fs) { - cso_delete_fragment_shader(ctx->cso_context, ctx->clear.fs); - ctx->clear.fs = NULL; - } - - if (ctx->plain_vs) { - vg_shader_destroy(ctx, ctx->plain_vs); - ctx->plain_vs = NULL; - } - if (ctx->clear_vs) { - vg_shader_destroy(ctx, ctx->clear_vs); - ctx->clear_vs = NULL; - } - if (ctx->texture_vs) { - vg_shader_destroy(ctx, ctx->texture_vs); - ctx->texture_vs = NULL; - } - - if (ctx->pass_through_depth_fs) - vg_shader_destroy(ctx, ctx->pass_through_depth_fs); if (ctx->mask.union_fs) vg_shader_destroy(ctx, ctx->mask.union_fs); if (ctx->mask.intersect_fs) @@ -268,186 +221,195 @@ void vg_context_remove_object(struct vg_context *ctx, } } -static void update_clip_state(struct vg_context *ctx) +static struct pipe_resource * +create_texture(struct pipe_context *pipe, enum pipe_format format, + VGint width, VGint height) { - struct pipe_depth_stencil_alpha_state *dsa = &ctx->state.g3d.dsa; - struct vg_state *state = &ctx->state.vg; - - memset(dsa, 0, sizeof(struct pipe_depth_stencil_alpha_state)); - - if (state->scissoring) { - struct pipe_blend_state *blend = &ctx->state.g3d.blend; - struct pipe_framebuffer_state *fb = &ctx->state.g3d.fb; - int i; - - dsa->depth.writemask = 1;/*glDepthMask(TRUE);*/ - dsa->depth.func = PIPE_FUNC_ALWAYS; - dsa->depth.enabled = 1; - - cso_save_blend(ctx->cso_context); - cso_save_fragment_shader(ctx->cso_context); - /* set a passthrough shader */ - if (!ctx->pass_through_depth_fs) - ctx->pass_through_depth_fs = shader_create_from_text(ctx->pipe, - pass_through_depth_asm, - 40, - PIPE_SHADER_FRAGMENT); - cso_set_fragment_shader_handle(ctx->cso_context, - ctx->pass_through_depth_fs->driver); - cso_set_depth_stencil_alpha(ctx->cso_context, dsa); - - ctx->pipe->clear(ctx->pipe, PIPE_CLEAR_DEPTHSTENCIL, NULL, 1.0, 0); - - /* disable color writes */ - blend->rt[0].colormask = 0; /*disable colorwrites*/ - cso_set_blend(ctx->cso_context, blend); - - /* enable scissoring */ - for (i = 0; i < state->scissor_rects_num; ++i) { - const float x = state->scissor_rects[i * 4 + 0].f; - const float y = state->scissor_rects[i * 4 + 1].f; - const float width = state->scissor_rects[i * 4 + 2].f; - const float height = state->scissor_rects[i * 4 + 3].f; - VGfloat minx, miny, maxx, maxy; - - minx = 0; - miny = 0; - maxx = fb->width; - maxy = fb->height; - - if (x > minx) - minx = x; - if (y > miny) - miny = y; - - if (x + width < maxx) - maxx = x + width; - if (y + height < maxy) - maxy = y + height; - - /* check for null space */ - if (minx >= maxx || miny >= maxy) - minx = miny = maxx = maxy = 0; - - /*glClear(GL_DEPTH_BUFFER_BIT);*/ - renderer_draw_quad(ctx->renderer, minx, miny, maxx, maxy, 0.0f); - } - - cso_restore_blend(ctx->cso_context); - cso_restore_fragment_shader(ctx->cso_context); - - dsa->depth.enabled = 1; /* glEnable(GL_DEPTH_TEST); */ - dsa->depth.writemask = 0;/*glDepthMask(FALSE);*/ - dsa->depth.func = PIPE_FUNC_GEQUAL; + struct pipe_resource templ; + + memset(&templ, 0, sizeof(templ)); + + if (format != PIPE_FORMAT_NONE) { + templ.format = format; + } + else { + templ.format = PIPE_FORMAT_B8G8R8A8_UNORM; + } + + templ.target = PIPE_TEXTURE_2D; + templ.width0 = width; + templ.height0 = height; + templ.depth0 = 1; + templ.array_size = 1; + templ.last_level = 0; + + if (util_format_get_component_bits(format, UTIL_FORMAT_COLORSPACE_ZS, 1)) { + templ.bind = PIPE_BIND_DEPTH_STENCIL; + } else { + templ.bind = (PIPE_BIND_DISPLAY_TARGET | + PIPE_BIND_RENDER_TARGET | + PIPE_BIND_SAMPLER_VIEW); } + + return pipe->screen->resource_create(pipe->screen, &templ); } -void vg_validate_state(struct vg_context *ctx) +static struct pipe_sampler_view * +create_tex_and_view(struct pipe_context *pipe, enum pipe_format format, + VGint width, VGint height) { - vg_manager_validate_framebuffer(ctx); + struct pipe_resource *texture; + struct pipe_sampler_view view_templ; + struct pipe_sampler_view *view; - if ((ctx->state.dirty & BLEND_DIRTY)) { - struct pipe_blend_state *blend = &ctx->state.g3d.blend; - memset(blend, 0, sizeof(struct pipe_blend_state)); - blend->rt[0].blend_enable = 1; - blend->rt[0].colormask = PIPE_MASK_RGBA; - - switch (ctx->state.vg.blend_mode) { - case VG_BLEND_SRC: - blend->rt[0].rgb_src_factor = PIPE_BLENDFACTOR_ONE; - blend->rt[0].alpha_src_factor = PIPE_BLENDFACTOR_ONE; - blend->rt[0].rgb_dst_factor = PIPE_BLENDFACTOR_ZERO; - blend->rt[0].alpha_dst_factor = PIPE_BLENDFACTOR_ZERO; - blend->rt[0].blend_enable = 0; - break; - case VG_BLEND_SRC_OVER: - blend->rt[0].rgb_src_factor = PIPE_BLENDFACTOR_SRC_ALPHA; - blend->rt[0].alpha_src_factor = PIPE_BLENDFACTOR_ONE; - blend->rt[0].rgb_dst_factor = PIPE_BLENDFACTOR_INV_SRC_ALPHA; - blend->rt[0].alpha_dst_factor = PIPE_BLENDFACTOR_INV_SRC_ALPHA; - break; - case VG_BLEND_DST_OVER: - blend->rt[0].rgb_src_factor = PIPE_BLENDFACTOR_INV_DST_ALPHA; - blend->rt[0].alpha_src_factor = PIPE_BLENDFACTOR_INV_DST_ALPHA; - blend->rt[0].rgb_dst_factor = PIPE_BLENDFACTOR_DST_ALPHA; - blend->rt[0].alpha_dst_factor = PIPE_BLENDFACTOR_DST_ALPHA; - break; - case VG_BLEND_SRC_IN: - blend->rt[0].rgb_src_factor = PIPE_BLENDFACTOR_DST_ALPHA; - blend->rt[0].alpha_src_factor = PIPE_BLENDFACTOR_DST_ALPHA; - blend->rt[0].rgb_dst_factor = PIPE_BLENDFACTOR_ZERO; - blend->rt[0].alpha_dst_factor = PIPE_BLENDFACTOR_ZERO; - break; - case VG_BLEND_DST_IN: - blend->rt[0].rgb_src_factor = PIPE_BLENDFACTOR_ZERO; - blend->rt[0].alpha_src_factor = PIPE_BLENDFACTOR_ZERO; - blend->rt[0].rgb_dst_factor = PIPE_BLENDFACTOR_SRC_ALPHA; - blend->rt[0].alpha_dst_factor = PIPE_BLENDFACTOR_SRC_ALPHA; - break; - case VG_BLEND_MULTIPLY: - case VG_BLEND_SCREEN: - case VG_BLEND_DARKEN: - case VG_BLEND_LIGHTEN: - blend->rt[0].rgb_src_factor = PIPE_BLENDFACTOR_ONE; - blend->rt[0].alpha_src_factor = PIPE_BLENDFACTOR_ONE; - blend->rt[0].rgb_dst_factor = PIPE_BLENDFACTOR_ZERO; - blend->rt[0].alpha_dst_factor = PIPE_BLENDFACTOR_ZERO; - blend->rt[0].blend_enable = 0; - break; - case VG_BLEND_ADDITIVE: - blend->rt[0].rgb_src_factor = PIPE_BLENDFACTOR_ONE; - blend->rt[0].alpha_src_factor = PIPE_BLENDFACTOR_ONE; - blend->rt[0].rgb_dst_factor = PIPE_BLENDFACTOR_ONE; - blend->rt[0].alpha_dst_factor = PIPE_BLENDFACTOR_ONE; - break; - default: - assert(!"not implemented blend mode"); - } - cso_set_blend(ctx->cso_context, &ctx->state.g3d.blend); - } - if ((ctx->state.dirty & RASTERIZER_DIRTY)) { - struct pipe_rasterizer_state *raster = &ctx->state.g3d.rasterizer; - memset(raster, 0, sizeof(struct pipe_rasterizer_state)); - raster->gl_rasterization_rules = 1; - cso_set_rasterizer(ctx->cso_context, &ctx->state.g3d.rasterizer); - } - if ((ctx->state.dirty & VIEWPORT_DIRTY)) { - struct pipe_framebuffer_state *fb = &ctx->state.g3d.fb; - const VGint param_bytes = 8 * sizeof(VGfloat); - VGfloat vs_consts[8] = { - 2.f/fb->width, 2.f/fb->height, 1, 1, - -1, -1, 0, 0 - }; - struct pipe_resource **cbuf = &ctx->vs_const_buffer; + texture = create_texture(pipe, format, width, height); - vg_set_viewport(ctx, VEGA_Y0_BOTTOM); + if (!texture) + return NULL; - pipe_resource_reference(cbuf, NULL); - *cbuf = pipe_buffer_create(ctx->pipe->screen, - PIPE_BIND_CONSTANT_BUFFER, - param_bytes); - - if (*cbuf) { - st_no_flush_pipe_buffer_write(ctx, *cbuf, - 0, param_bytes, vs_consts); - } - ctx->pipe->set_constant_buffer(ctx->pipe, PIPE_SHADER_VERTEX, 0, *cbuf); + u_sampler_view_default_template(&view_templ, texture, texture->format); + view = pipe->create_sampler_view(pipe, texture, &view_templ); + /* want the texture to go away if the view is freed */ + pipe_resource_reference(&texture, NULL); + + return view; +} + +static void +vg_context_update_surface_mask_view(struct vg_context *ctx, + uint width, uint height) +{ + struct st_framebuffer *stfb = ctx->draw_buffer; + struct pipe_sampler_view *old_sampler_view = stfb->surface_mask_view; + struct pipe_context *pipe = ctx->pipe; + + if (old_sampler_view && + old_sampler_view->texture->width0 == width && + old_sampler_view->texture->height0 == height) + return; + + /* + we use PIPE_FORMAT_B8G8R8A8_UNORM because we want to render to + this texture and use it as a sampler, so while this wastes some + space it makes both of those a lot simpler + */ + stfb->surface_mask_view = create_tex_and_view(pipe, + PIPE_FORMAT_B8G8R8A8_UNORM, width, height); + + if (!stfb->surface_mask_view) { + if (old_sampler_view) + pipe_sampler_view_reference(&old_sampler_view, NULL); + return; } - if ((ctx->state.dirty & VS_DIRTY)) { - cso_set_vertex_shader_handle(ctx->cso_context, - vg_plain_vs(ctx)); + + /* XXX could this call be avoided? */ + vg_validate_state(ctx); + + /* alpha mask starts with 1.f alpha */ + mask_fill(0, 0, width, height, 1.f); + + /* if we had an old surface copy it over */ + if (old_sampler_view) { + struct pipe_box src_box; + u_box_origin_2d(MIN2(old_sampler_view->texture->width0, + stfb->surface_mask_view->texture->width0), + MIN2(old_sampler_view->texture->height0, + stfb->surface_mask_view->texture->height0), + &src_box); + + pipe->resource_copy_region(pipe, + stfb->surface_mask_view->texture, + 0, 0, 0, 0, + old_sampler_view->texture, + 0, &src_box); } - /* must be last because it renders to the depth buffer*/ - if ((ctx->state.dirty & DEPTH_STENCIL_DIRTY)) { - update_clip_state(ctx); - cso_set_depth_stencil_alpha(ctx->cso_context, &ctx->state.g3d.dsa); + /* Free the old texture + */ + if (old_sampler_view) + pipe_sampler_view_reference(&old_sampler_view, NULL); +} + +static void +vg_context_update_blend_texture_view(struct vg_context *ctx, + uint width, uint height) +{ + struct pipe_context *pipe = ctx->pipe; + struct st_framebuffer *stfb = ctx->draw_buffer; + struct pipe_sampler_view *old = stfb->blend_texture_view; + + if (old && + old->texture->width0 == width && + old->texture->height0 == height) + return; + + stfb->blend_texture_view = create_tex_and_view(pipe, + PIPE_FORMAT_B8G8R8A8_UNORM, width, height); + + pipe_sampler_view_reference(&old, NULL); +} + +static boolean +vg_context_update_depth_stencil_rb(struct vg_context * ctx, + uint width, uint height) +{ + struct st_renderbuffer *dsrb = ctx->draw_buffer->dsrb; + struct pipe_context *pipe = ctx->pipe; + struct pipe_surface surf_tmpl; + + if ((dsrb->width == width && dsrb->height == height) && dsrb->texture) + return FALSE; + + /* unreference existing ones */ + pipe_surface_reference(&dsrb->surface, NULL); + pipe_resource_reference(&dsrb->texture, NULL); + dsrb->width = dsrb->height = 0; + + dsrb->texture = create_texture(pipe, dsrb->format, width, height); + if (!dsrb->texture) + return TRUE; + + memset(&surf_tmpl, 0, sizeof(surf_tmpl)); + u_surface_default_template(&surf_tmpl, dsrb->texture, + PIPE_BIND_DEPTH_STENCIL); + dsrb->surface = pipe->create_surface(pipe, + dsrb->texture, + &surf_tmpl); + if (!dsrb->surface) { + pipe_resource_reference(&dsrb->texture, NULL); + return TRUE; } + dsrb->width = width; + dsrb->height = height; + + assert(dsrb->surface->width == width); + assert(dsrb->surface->height == height); + + return TRUE; +} + +void vg_validate_state(struct vg_context *ctx) +{ + struct st_framebuffer *stfb = ctx->draw_buffer; + + vg_manager_validate_framebuffer(ctx); + + if (vg_context_update_depth_stencil_rb(ctx, stfb->width, stfb->height)) + ctx->state.dirty |= DEPTH_STENCIL_DIRTY; + + /* blend state depends on fb format */ + if (ctx->state.dirty & FRAMEBUFFER_DIRTY) + ctx->state.dirty |= BLEND_DIRTY; + + renderer_validate(ctx->renderer, ctx->state.dirty, + ctx->draw_buffer, &ctx->state.vg); + + ctx->state.dirty = 0; + shader_set_masking(ctx->shader, ctx->state.vg.masking); shader_set_image_mode(ctx->shader, ctx->state.vg.image_mode); - - ctx->state.dirty = NONE_DIRTY; + shader_set_color_transform(ctx->shader, ctx->state.vg.color_transform); } VGboolean vg_object_is_valid(void *ptr, enum vg_object_type type) @@ -470,130 +432,94 @@ void vg_set_error(struct vg_context *ctx, ctx->_error = code; } -void vg_prepare_blend_surface(struct vg_context *ctx) +static void vg_prepare_blend_texture(struct vg_context *ctx, + struct pipe_sampler_view *src) +{ + struct st_framebuffer *stfb = ctx->draw_buffer; + struct pipe_surface *surf; + struct pipe_surface surf_tmpl; + + vg_context_update_blend_texture_view(ctx, stfb->width, stfb->height); + + memset(&surf_tmpl, 0, sizeof(surf_tmpl)); + u_surface_default_template(&surf_tmpl, stfb->blend_texture_view->texture, + PIPE_BIND_RENDER_TARGET); + surf = ctx->pipe->create_surface(ctx->pipe, + stfb->blend_texture_view->texture, + &surf_tmpl); + if (surf) { + util_blit_pixels_tex(ctx->blit, + src, 0, 0, stfb->width, stfb->height, + surf, 0, 0, stfb->width, stfb->height, + 0.0, PIPE_TEX_MIPFILTER_NEAREST); + + pipe_surface_reference(&surf, NULL); + } +} + +struct pipe_sampler_view *vg_prepare_blend_surface(struct vg_context *ctx) { - struct pipe_surface *dest_surface = NULL; struct pipe_context *pipe = ctx->pipe; struct pipe_sampler_view *view; struct pipe_sampler_view view_templ; struct st_framebuffer *stfb = ctx->draw_buffer; struct st_renderbuffer *strb = stfb->strb; - /* first finish all pending rendering */ - vgFinish(); + vg_validate_state(ctx); u_sampler_view_default_template(&view_templ, strb->texture, strb->texture->format); view = pipe->create_sampler_view(pipe, strb->texture, &view_templ); - dest_surface = pipe->screen->get_tex_surface(pipe->screen, - stfb->blend_texture_view->texture, - 0, 0, 0, - PIPE_BIND_RENDER_TARGET); - /* flip it, because we want to use it as a sampler */ - util_blit_pixels_tex(ctx->blit, - view, - 0, strb->height, - strb->width, 0, - dest_surface, - 0, 0, - strb->width, strb->height, - 0.0, PIPE_TEX_MIPFILTER_NEAREST); - - if (dest_surface) - pipe_surface_reference(&dest_surface, NULL); - - /* make sure it's complete */ - vgFinish(); + vg_prepare_blend_texture(ctx, view); pipe_sampler_view_reference(&view, NULL); + + return stfb->blend_texture_view; } -void vg_prepare_blend_surface_from_mask(struct vg_context *ctx) +struct pipe_sampler_view *vg_prepare_blend_surface_from_mask(struct vg_context *ctx) { - struct pipe_surface *dest_surface = NULL; - struct pipe_context *pipe = ctx->pipe; struct st_framebuffer *stfb = ctx->draw_buffer; - struct st_renderbuffer *strb = stfb->strb; vg_validate_state(ctx); - /* first finish all pending rendering */ - vgFinish(); - - dest_surface = pipe->screen->get_tex_surface(pipe->screen, - stfb->blend_texture_view->texture, - 0, 0, 0, - PIPE_BIND_RENDER_TARGET); - - /* flip it, because we want to use it as a sampler */ - util_blit_pixels_tex(ctx->blit, - stfb->alpha_mask_view, - 0, strb->height, - strb->width, 0, - dest_surface, - 0, 0, - strb->width, strb->height, - 0.0, PIPE_TEX_MIPFILTER_NEAREST); - - /* make sure it's complete */ - vgFinish(); - - if (dest_surface) - pipe_surface_reference(&dest_surface, NULL); + vg_context_update_surface_mask_view(ctx, stfb->width, stfb->height); + vg_prepare_blend_texture(ctx, stfb->surface_mask_view); + + return stfb->blend_texture_view; } -void * vg_plain_vs(struct vg_context *ctx) +struct pipe_sampler_view *vg_get_surface_mask(struct vg_context *ctx) { - if (!ctx->plain_vs) { - ctx->plain_vs = shader_create_from_text(ctx->pipe, - vs_plain_asm, - 200, - PIPE_SHADER_VERTEX); - } + struct st_framebuffer *stfb = ctx->draw_buffer; - return ctx->plain_vs->driver; -} + vg_context_update_surface_mask_view(ctx, stfb->width, stfb->height); + return stfb->surface_mask_view; +} -void * vg_clear_vs(struct vg_context *ctx) +/** + * A transformation from window coordinates to paint coordinates. + */ +VGboolean vg_get_paint_matrix(struct vg_context *ctx, + const struct matrix *paint_to_user, + const struct matrix *user_to_surface, + struct matrix *mat) { - if (!ctx->clear_vs) { - ctx->clear_vs = shader_create_from_text(ctx->pipe, - vs_clear_asm, - 200, - PIPE_SHADER_VERTEX); - } + struct matrix tmp; - return ctx->clear_vs->driver; -} + /* get user-to-paint matrix */ + memcpy(mat, paint_to_user, sizeof(*paint_to_user)); + if (!matrix_invert(mat)) + return VG_FALSE; -void * vg_texture_vs(struct vg_context *ctx) -{ - if (!ctx->texture_vs) { - ctx->texture_vs = shader_create_from_text(ctx->pipe, - vs_texture_asm, - 200, - PIPE_SHADER_VERTEX); - } + /* get surface-to-user matrix */ + memcpy(&tmp, user_to_surface, sizeof(*user_to_surface)); + if (!matrix_invert(&tmp)) + return VG_FALSE; - return ctx->texture_vs->driver; -} + matrix_mult(mat, &tmp); -void vg_set_viewport(struct vg_context *ctx, VegaOrientation orientation) -{ - struct pipe_viewport_state viewport; - struct pipe_framebuffer_state *fb = &ctx->state.g3d.fb; - VGfloat y_scale = (orientation == VEGA_Y0_BOTTOM) ? -2.f : 2.f; - - viewport.scale[0] = fb->width / 2.f; - viewport.scale[1] = fb->height / y_scale; - viewport.scale[2] = 1.0; - viewport.scale[3] = 1.0; - viewport.translate[0] = fb->width / 2.f; - viewport.translate[1] = fb->height / 2.f; - viewport.translate[2] = 0.0; - viewport.translate[3] = 0.0; - - cso_set_viewport(ctx->cso_context, &viewport); + return VG_TRUE; } diff --git a/src/gallium/state_trackers/vega/vg_context.h b/src/gallium/state_trackers/vega/vg_context.h index 80a6c07c693..d616a20a3d9 100644 --- a/src/gallium/state_trackers/vega/vg_context.h +++ b/src/gallium/state_trackers/vega/vg_context.h @@ -56,7 +56,7 @@ struct st_framebuffer { struct st_renderbuffer *strb; struct st_renderbuffer *dsrb; - struct pipe_sampler_view *alpha_mask_view; + struct pipe_sampler_view *surface_mask_view; struct pipe_sampler_view *blend_texture_view; @@ -78,14 +78,13 @@ enum vg_object_type { VG_OBJECT_LAST }; enum dirty_state { - NONE_DIRTY = 0<<0, - BLEND_DIRTY = 1<<1, - RASTERIZER_DIRTY = 1<<2, - VIEWPORT_DIRTY = 1<<3, - VS_DIRTY = 1<<4, - DEPTH_STENCIL_DIRTY = 1<<5, - ALL_DIRTY = BLEND_DIRTY | RASTERIZER_DIRTY | - VIEWPORT_DIRTY | VS_DIRTY | DEPTH_STENCIL_DIRTY + BLEND_DIRTY = 1 << 0, + FRAMEBUFFER_DIRTY = 1 << 1, + DEPTH_STENCIL_DIRTY = 1 << 2, + + ALL_DIRTY = BLEND_DIRTY | + FRAMEBUFFER_DIRTY | + DEPTH_STENCIL_DIRTY }; struct vg_context @@ -98,13 +97,6 @@ struct vg_context struct { struct vg_state vg; - struct { - struct pipe_blend_state blend; - struct pipe_rasterizer_state rasterizer; - struct pipe_shader_state vs_state; - struct pipe_depth_stencil_alpha_state dsa; - struct pipe_framebuffer_state fb; - } g3d; VGbitfield dirty; } state; @@ -116,14 +108,6 @@ struct vg_context struct cso_hash *owned_objects[VG_OBJECT_LAST]; struct { - struct pipe_shader_state vert_shader; - struct pipe_shader_state frag_shader; - struct pipe_rasterizer_state raster; - void *fs; - float vertices[4][2][4]; /**< vertex pos + color */ - } clear; - - struct { struct pipe_resource *cbuf; struct pipe_sampler_state sampler; @@ -133,31 +117,16 @@ struct vg_context struct vg_shader *set_fs; } mask; - struct vg_shader *pass_through_depth_fs; - struct cso_context *cso_context; - struct pipe_resource *stencil_quad; - VGfloat stencil_vertices[4][2][4]; - struct renderer *renderer; struct shaders_cache *sc; struct shader *shader; struct pipe_sampler_state blend_sampler; - struct { - struct pipe_resource *buffer; - void *color_matrix_fs; - } filter; struct vg_paint *default_paint; struct blit_state *blit; - - struct vg_shader *plain_vs; - struct vg_shader *clear_vs; - struct vg_shader *texture_vs; - struct pipe_resource *vs_const_buffer; - struct pipe_vertex_element velems[2]; }; struct vg_object { @@ -189,9 +158,15 @@ void vg_validate_state(struct vg_context *ctx); void vg_set_error(struct vg_context *ctx, VGErrorCode code); -void vg_prepare_blend_surface(struct vg_context *ctx); -void vg_prepare_blend_surface_from_mask(struct vg_context *ctx); +struct pipe_sampler_view *vg_prepare_blend_surface(struct vg_context *ctx); +struct pipe_sampler_view *vg_prepare_blend_surface_from_mask(struct vg_context *ctx); +struct pipe_sampler_view *vg_get_surface_mask(struct vg_context *ctx); + +VGboolean vg_get_paint_matrix(struct vg_context *ctx, + const struct matrix *paint_to_user, + const struct matrix *user_to_surface, + struct matrix *mat); static INLINE VGboolean is_aligned_to(const void *ptr, VGbyte alignment) { @@ -292,13 +267,4 @@ static INLINE void vg_bound_rect(VGfloat coords[4], } } -void *vg_plain_vs(struct vg_context *ctx); -void *vg_clear_vs(struct vg_context *ctx); -void *vg_texture_vs(struct vg_context *ctx); -typedef enum { - VEGA_Y0_TOP, - VEGA_Y0_BOTTOM -} VegaOrientation; -void vg_set_viewport(struct vg_context *ctx, VegaOrientation orientation); - #endif diff --git a/src/gallium/state_trackers/vega/vg_manager.c b/src/gallium/state_trackers/vega/vg_manager.c index bb15ec024f2..ec713b7fb1d 100644 --- a/src/gallium/state_trackers/vega/vg_manager.c +++ b/src/gallium/state_trackers/vega/vg_manager.c @@ -33,168 +33,20 @@ #include "pipe/p_screen.h" #include "util/u_memory.h" #include "util/u_inlines.h" -#include "util/u_format.h" -#include "util/u_sampler.h" +#include "util/u_box.h" +#include "util/u_surface.h" #include "vg_api.h" #include "vg_manager.h" #include "vg_context.h" -#include "image.h" -#include "mask.h" #include "api.h" -static struct pipe_resource * -create_texture(struct pipe_context *pipe, enum pipe_format format, - VGint width, VGint height) -{ - struct pipe_resource templ; - - memset(&templ, 0, sizeof(templ)); - - if (format != PIPE_FORMAT_NONE) { - templ.format = format; - } - else { - templ.format = PIPE_FORMAT_B8G8R8A8_UNORM; - } - - templ.target = PIPE_TEXTURE_2D; - templ.width0 = width; - templ.height0 = height; - templ.depth0 = 1; - templ.last_level = 0; - - if (util_format_get_component_bits(format, UTIL_FORMAT_COLORSPACE_ZS, 1)) { - templ.bind = PIPE_BIND_DEPTH_STENCIL; - } else { - templ.bind = (PIPE_BIND_DISPLAY_TARGET | - PIPE_BIND_RENDER_TARGET | - PIPE_BIND_SAMPLER_VIEW); - } - - return pipe->screen->resource_create(pipe->screen, &templ); -} - -static struct pipe_sampler_view * -create_tex_and_view(struct pipe_context *pipe, enum pipe_format format, - VGint width, VGint height) -{ - struct pipe_resource *texture; - struct pipe_sampler_view view_templ; - struct pipe_sampler_view *view; - - texture = create_texture(pipe, format, width, height); - - if (!texture) - return NULL; - - u_sampler_view_default_template(&view_templ, texture, texture->format); - view = pipe->create_sampler_view(pipe, texture, &view_templ); - /* want the texture to go away if the view is freed */ - pipe_resource_reference(&texture, NULL); - - return view; -} - -static void -setup_new_alpha_mask(struct vg_context *ctx, struct st_framebuffer *stfb) -{ - struct pipe_context *pipe = ctx->pipe; - struct pipe_sampler_view *old_sampler_view = stfb->alpha_mask_view; - - /* - we use PIPE_FORMAT_B8G8R8A8_UNORM because we want to render to - this texture and use it as a sampler, so while this wastes some - space it makes both of those a lot simpler - */ - stfb->alpha_mask_view = create_tex_and_view(pipe, - PIPE_FORMAT_B8G8R8A8_UNORM, stfb->width, stfb->height); - - if (!stfb->alpha_mask_view) { - if (old_sampler_view) - pipe_sampler_view_reference(&old_sampler_view, NULL); - return; - } - - /* XXX could this call be avoided? */ - vg_validate_state(ctx); - - /* alpha mask starts with 1.f alpha */ - mask_fill(0, 0, stfb->width, stfb->height, 1.f); - - /* if we had an old surface copy it over */ - if (old_sampler_view) { - struct pipe_subresource subsurf, subold_surf; - subsurf.face = 0; - subsurf.level = 0; - subold_surf.face = 0; - subold_surf.level = 0; - pipe->resource_copy_region(pipe, - stfb->alpha_mask_view->texture, - subsurf, - 0, 0, 0, - old_sampler_view->texture, - subold_surf, - 0, 0, 0, - MIN2(old_sampler_view->texture->width0, - stfb->alpha_mask_view->texture->width0), - MIN2(old_sampler_view->texture->height0, - stfb->alpha_mask_view->texture->height0)); - } - - /* Free the old texture - */ - if (old_sampler_view) - pipe_sampler_view_reference(&old_sampler_view, NULL); -} - -static boolean -vg_context_update_depth_stencil_rb(struct vg_context * ctx, - uint width, uint height) -{ - struct st_renderbuffer *dsrb = ctx->draw_buffer->dsrb; - struct pipe_context *pipe = ctx->pipe; - unsigned surface_usage; - - if ((dsrb->width == width && dsrb->height == height) && dsrb->texture) - return FALSE; - - /* unreference existing ones */ - pipe_surface_reference(&dsrb->surface, NULL); - pipe_resource_reference(&dsrb->texture, NULL); - dsrb->width = dsrb->height = 0; - - /* Probably need dedicated flags for surface usage too: - */ - surface_usage = PIPE_BIND_DEPTH_STENCIL; /* XXX: was: RENDER_TARGET */ - - dsrb->texture = create_texture(pipe, dsrb->format, width, height); - if (!dsrb->texture) - return TRUE; - - dsrb->surface = pipe->screen->get_tex_surface(pipe->screen, - dsrb->texture, - 0, 0, 0, - surface_usage); - if (!dsrb->surface) { - pipe_resource_reference(&dsrb->texture, NULL); - return TRUE; - } - - dsrb->width = width; - dsrb->height = height; - - assert(dsrb->surface->width == width); - assert(dsrb->surface->height == height); - - return TRUE; -} - static boolean vg_context_update_color_rb(struct vg_context *ctx, struct pipe_resource *pt) { struct st_renderbuffer *strb = ctx->draw_buffer->strb; - struct pipe_screen *screen = ctx->pipe->screen; + struct pipe_context *pipe = ctx->pipe; + struct pipe_surface surf_tmpl; if (strb->texture == pt) { pipe_resource_reference(&pt, NULL); @@ -207,8 +59,12 @@ vg_context_update_color_rb(struct vg_context *ctx, struct pipe_resource *pt) strb->width = strb->height = 0; strb->texture = pt; - strb->surface = screen->get_tex_surface(screen, strb->texture, 0, 0, 0, - PIPE_BIND_RENDER_TARGET); + + memset(&surf_tmpl, 0, sizeof(surf_tmpl)); + u_surface_default_template(&surf_tmpl, strb->texture, + PIPE_BIND_RENDER_TARGET); + strb->surface = pipe->create_surface(pipe, strb->texture, &surf_tmpl); + if (!strb->surface) { pipe_resource_reference(&strb->texture, NULL); return TRUE; @@ -220,49 +76,6 @@ vg_context_update_color_rb(struct vg_context *ctx, struct pipe_resource *pt) return TRUE; } -static void -vg_context_update_draw_buffer(struct vg_context *ctx, struct pipe_resource *pt) -{ - struct st_framebuffer *stfb = ctx->draw_buffer; - boolean new_cbuf, new_zsbuf, new_size; - - new_cbuf = vg_context_update_color_rb(ctx, pt); - new_zsbuf = - vg_context_update_depth_stencil_rb(ctx, pt->width0, pt->height0); - - new_size = (stfb->width != pt->width0 || stfb->height != pt->height0); - stfb->width = pt->width0; - stfb->height = pt->height0; - - if (new_cbuf || new_zsbuf || new_size) { - struct pipe_framebuffer_state *state = &ctx->state.g3d.fb; - - memset(state, 0, sizeof(struct pipe_framebuffer_state)); - state->width = stfb->width; - state->height = stfb->height; - state->nr_cbufs = 1; - state->cbufs[0] = stfb->strb->surface; - state->zsbuf = stfb->dsrb->surface; - - cso_set_framebuffer(ctx->cso_context, state); - } - - if (new_zsbuf || new_size) { - ctx->state.dirty |= VIEWPORT_DIRTY; - ctx->state.dirty |= DEPTH_STENCIL_DIRTY;/*to reset the scissors*/ - - ctx->pipe->clear(ctx->pipe, PIPE_CLEAR_DEPTHSTENCIL, NULL, 0.0, 0); - - /* we need all the other state already set */ - - setup_new_alpha_mask(ctx, stfb); - - pipe_sampler_view_reference( &stfb->blend_texture_view, NULL); - stfb->blend_texture_view = create_tex_and_view(ctx->pipe, - PIPE_FORMAT_B8G8R8A8_UNORM, stfb->width, stfb->height); - } -} - /** * Flush the front buffer if the current context renders to the front buffer. */ @@ -304,15 +117,16 @@ vg_manager_validate_framebuffer(struct vg_context *ctx) if (!stfb->iface->validate(stfb->iface, &stfb->strb_att, 1, &pt) || !pt) return; - /* - * unset draw_buffer_invalid first because vg_context_update_draw_buffer - * will cause the framebuffer to be validated again because of a call to - * vg_validate_state - */ p_atomic_set(&ctx->draw_buffer_invalid, FALSE); - vg_context_update_draw_buffer(ctx, pt); -} + if (vg_context_update_color_rb(ctx, pt) || + stfb->width != pt->width0 || + stfb->height != pt->height0) + ctx->state.dirty |= FRAMEBUFFER_DIRTY; + + stfb->width = pt->width0; + stfb->height = pt->height0; +} static void vg_context_notify_invalid_framebuffer(struct st_context_iface *stctxi, @@ -336,7 +150,10 @@ static void vg_context_destroy(struct st_context_iface *stctxi) { struct vg_context *ctx = (struct vg_context *) stctxi; + struct pipe_context *pipe = ctx->pipe; + vg_destroy_context(ctx); + pipe->destroy(pipe); } static struct st_context_iface * diff --git a/src/gallium/state_trackers/wgl/SConscript b/src/gallium/state_trackers/wgl/SConscript index ec55f042f90..1b7597de440 100644 --- a/src/gallium/state_trackers/wgl/SConscript +++ b/src/gallium/state_trackers/wgl/SConscript @@ -13,7 +13,6 @@ env.Append(CPPPATH = [ env.AppendUnique(CPPDEFINES = [ '_GDI32_', # prevent wgl* being declared __declspec(dllimport) 'BUILD_GL32', # declare gl* as __declspec(dllexport) in Mesa headers - 'WIN32_THREADS', # use Win32 thread API 'WIN32_LEAN_AND_MEAN', # http://msdn2.microsoft.com/en-us/library/6dwk3a1z.aspx ]) @@ -22,6 +21,7 @@ sources = [ 'stw_device.c', 'stw_ext_extensionsstring.c', 'stw_ext_gallium.c', + 'stw_ext_pbuffer.c', 'stw_ext_pixelformat.c', 'stw_ext_swapinterval.c', 'stw_framebuffer.c', diff --git a/src/gallium/state_trackers/wgl/stw_context.c b/src/gallium/state_trackers/wgl/stw_context.c index 86c0a28e8da..cd4f3c8b3e2 100644 --- a/src/gallium/state_trackers/wgl/stw_context.c +++ b/src/gallium/state_trackers/wgl/stw_context.c @@ -264,75 +264,82 @@ stw_make_current( struct stw_context *curctx = NULL; struct stw_context *ctx = NULL; struct stw_framebuffer *fb = NULL; + BOOL ret = FALSE; if (!stw_dev) - goto fail; + return FALSE; curctx = stw_current_context(); if (curctx != NULL) { - if (curctx->dhglrc != dhglrc) + if (curctx->dhglrc == dhglrc) { + if (curctx->hdc == hdc) { + /* Return if already current. */ + return TRUE; + } + } else { curctx->st->flush(curctx->st, PIPE_FLUSH_RENDER_CACHE, NULL); - - /* Return if already current. */ - if (curctx->dhglrc == dhglrc && curctx->hdc == hdc) { - ctx = curctx; - fb = stw_framebuffer_from_hdc( hdc ); - goto success; } - - stw_framebuffer_reference(&curctx->current_framebuffer, NULL); } - if (hdc == NULL || dhglrc == 0) { - return stw_dev->stapi->make_current(stw_dev->stapi, NULL, NULL, NULL); - } - - pipe_mutex_lock( stw_dev->ctx_mutex ); - ctx = stw_lookup_context_locked( dhglrc ); - pipe_mutex_unlock( stw_dev->ctx_mutex ); - if(!ctx) - goto fail; - - fb = stw_framebuffer_from_hdc( hdc ); - if (fb) { - stw_framebuffer_update(fb); - } - else { - /* Applications should call SetPixelFormat before creating a context, - * but not all do, and the opengl32 runtime seems to use a default pixel - * format in some cases, so we must create a framebuffer for those here - */ - int iPixelFormat = GetPixelFormat(hdc); - if(iPixelFormat) - fb = stw_framebuffer_create( hdc, iPixelFormat ); - if(!fb) + if (dhglrc) { + pipe_mutex_lock( stw_dev->ctx_mutex ); + ctx = stw_lookup_context_locked( dhglrc ); + pipe_mutex_unlock( stw_dev->ctx_mutex ); + if (!ctx) { goto fail; - } - - if(fb->iPixelFormat != ctx->iPixelFormat) - goto fail; + } - /* Bind the new framebuffer */ - ctx->hdc = hdc; + fb = stw_framebuffer_from_hdc( hdc ); + if (fb) { + stw_framebuffer_update(fb); + } + else { + /* Applications should call SetPixelFormat before creating a context, + * but not all do, and the opengl32 runtime seems to use a default pixel + * format in some cases, so we must create a framebuffer for those here + */ + int iPixelFormat = GetPixelFormat(hdc); + if (iPixelFormat) + fb = stw_framebuffer_create( hdc, iPixelFormat ); + if (!fb) + goto fail; + } - if (!stw_dev->stapi->make_current(stw_dev->stapi, ctx->st, fb->stfb, fb->stfb)) - goto fail; + if (fb->iPixelFormat != ctx->iPixelFormat) { + SetLastError(ERROR_INVALID_PIXEL_FORMAT); + goto fail; + } - stw_framebuffer_reference(&ctx->current_framebuffer, fb); + /* Bind the new framebuffer */ + ctx->hdc = hdc; -success: - assert(fb); - if(fb) { - stw_framebuffer_release(fb); + ret = stw_dev->stapi->make_current(stw_dev->stapi, ctx->st, fb->stfb, fb->stfb); + stw_framebuffer_reference(&ctx->current_framebuffer, fb); + } else { + ret = stw_dev->stapi->make_current(stw_dev->stapi, NULL, NULL, NULL); } - return TRUE; - fail: - if(fb) + + if (fb) { stw_framebuffer_release(fb); - stw_dev->stapi->make_current(stw_dev->stapi, NULL, NULL, NULL); - return FALSE; + } + + /* On failure, make the thread's current rendering context not current + * before returning */ + if (!ret) { + stw_dev->stapi->make_current(stw_dev->stapi, NULL, NULL, NULL); + ctx = NULL; + } + + /* Unreference the previous framebuffer if any. It must be done after + * make_current, as it can be referenced inside. + */ + if (curctx && curctx != ctx) { + stw_framebuffer_reference(&curctx->current_framebuffer, NULL); + } + + return ret; } /** diff --git a/src/gallium/state_trackers/wgl/stw_device.c b/src/gallium/state_trackers/wgl/stw_device.c index 37809d084ce..c4822d4d8aa 100644 --- a/src/gallium/state_trackers/wgl/stw_device.c +++ b/src/gallium/state_trackers/wgl/stw_device.c @@ -41,9 +41,7 @@ #include "stw_framebuffer.h" #include "stw_st.h" -#ifdef WIN32_THREADS extern _glthread_Mutex OneTimeLock; -#endif struct stw_device *stw_dev = NULL; @@ -76,9 +74,7 @@ stw_init(const struct stw_winsys *stw_winsys) stw_dev->stw_winsys = stw_winsys; -#ifdef WIN32_THREADS _glthread_INIT_MUTEX(OneTimeLock); -#endif stw_dev->stapi = stw_st_create_api(); stw_dev->smapi = CALLOC_STRUCT(st_manager); @@ -96,6 +92,10 @@ stw_init(const struct stw_winsys *stw_winsys) stw_dev->smapi->get_param = stw_get_param; stw_dev->screen = screen; + stw_dev->max_2d_levels = + screen->get_param(screen, PIPE_CAP_MAX_TEXTURE_2D_LEVELS); + stw_dev->max_2d_length = 1 << (stw_dev->max_2d_levels - 1); + pipe_mutex_init( stw_dev->ctx_mutex ); pipe_mutex_init( stw_dev->fb_mutex ); @@ -168,11 +168,9 @@ stw_cleanup(void) stw_dev->screen->destroy(stw_dev->screen); -#ifdef WIN32_THREADS _glthread_DESTROY_MUTEX(OneTimeLock); _glapi_destroy_multithread(); -#endif #ifdef DEBUG debug_memory_end(stw_dev->memdbg_no); diff --git a/src/gallium/state_trackers/wgl/stw_device.h b/src/gallium/state_trackers/wgl/stw_device.h index 1b836960d0d..3c2b6d9c076 100644 --- a/src/gallium/state_trackers/wgl/stw_device.h +++ b/src/gallium/state_trackers/wgl/stw_device.h @@ -50,6 +50,10 @@ struct stw_device struct pipe_screen *screen; + /* Cache some PIPE_CAP_* */ + unsigned max_2d_levels; + unsigned max_2d_length; + struct st_api *stapi; struct st_manager *smapi; diff --git a/src/gallium/state_trackers/wgl/stw_ext_extensionsstring.c b/src/gallium/state_trackers/wgl/stw_ext_extensionsstring.c index 62c859e1f92..ecb326f1cf6 100644 --- a/src/gallium/state_trackers/wgl/stw_ext_extensionsstring.c +++ b/src/gallium/state_trackers/wgl/stw_ext_extensionsstring.c @@ -37,6 +37,7 @@ static const char *stw_extension_string = "WGL_ARB_extensions_string " "WGL_ARB_multisample " + "WGL_ARB_pbuffer " "WGL_ARB_pixel_format " /* "WGL_EXT_swap_interval " */ "WGL_EXT_extensions_string"; diff --git a/src/gallium/state_trackers/wgl/stw_ext_pbuffer.c b/src/gallium/state_trackers/wgl/stw_ext_pbuffer.c new file mode 100644 index 00000000000..32636c603dc --- /dev/null +++ b/src/gallium/state_trackers/wgl/stw_ext_pbuffer.c @@ -0,0 +1,212 @@ +/************************************************************************** + * + * Copyright 2010 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include <windows.h> + +#define WGL_WGLEXT_PROTOTYPES + +#include <GL/gl.h> +#include <GL/wglext.h> + +#include "pipe/p_defines.h" +#include "pipe/p_screen.h" + +#include "stw_device.h" +#include "stw_pixelformat.h" +#include "stw_framebuffer.h" + + +HPBUFFERARB WINAPI +wglCreatePbufferARB(HDC _hDC, + int iPixelFormat, + int iWidth, + int iHeight, + const int *piAttribList) +{ + static boolean first = TRUE; + const int *piAttrib; + int useLargest = 0; + const struct stw_pixelformat_info *info; + struct stw_framebuffer *fb; + HWND hWnd; + HDC hDC; + + info = stw_pixelformat_get_info(iPixelFormat); + if (!info) { + SetLastError(ERROR_INVALID_PIXEL_FORMAT); + return 0; + } + + if (iWidth <= 0 || iHeight <= 0) { + SetLastError(ERROR_INVALID_DATA); + return 0; + } + + for (piAttrib = piAttribList; *piAttrib; piAttrib++) { + switch (*piAttrib) { + case WGL_PBUFFER_LARGEST_ARB: + piAttrib++; + useLargest = *piAttrib; + break; + default: + SetLastError(ERROR_INVALID_DATA); + return 0; + } + } + + if (iWidth > stw_dev->max_2d_length) { + if (useLargest) { + iWidth = stw_dev->max_2d_length; + } else { + SetLastError(ERROR_NO_SYSTEM_RESOURCES); + return 0; + } + } + + if (iHeight > stw_dev->max_2d_length) { + if (useLargest) { + iHeight = stw_dev->max_2d_length; + } else { + SetLastError(ERROR_NO_SYSTEM_RESOURCES); + return 0; + } + } + + /* + * Implement pbuffers through invisible windows + */ + + if (first) { + WNDCLASS wc; + memset(&wc, 0, sizeof wc); + wc.hbrBackground = (HBRUSH) (COLOR_BTNFACE + 1); + wc.hCursor = LoadCursor(NULL, IDC_ARROW); + wc.hIcon = LoadIcon(NULL, IDI_APPLICATION); + wc.lpfnWndProc = DefWindowProc; + wc.lpszClassName = "wglpbuffer"; + wc.style = CS_OWNDC | CS_HREDRAW | CS_VREDRAW; + RegisterClass(&wc); + first = FALSE; + } + + hWnd = CreateWindowEx(0, + "wglpbuffer", /* wc.lpszClassName */ + "wglpbuffer", +#if 0 /* Useful for debugging what the application is drawing */ + WS_VISIBLE | +#endif + WS_CLIPSIBLINGS | WS_CLIPCHILDREN, + CW_USEDEFAULT, CW_USEDEFAULT, /* x, y */ + iWidth, iHeight, + NULL, + NULL, + NULL, + NULL); + if (!hWnd) { + return 0; + } + + hDC = GetDC(hWnd); + if (!hDC) { + return 0; + } + + SetPixelFormat(hDC, iPixelFormat, &info->pfd); + + fb = stw_framebuffer_create(hDC, iPixelFormat); + if (!fb) { + SetLastError(ERROR_NO_SYSTEM_RESOURCES); + } + + return (HPBUFFERARB)fb; +} + + +HDC WINAPI +wglGetPbufferDCARB(HPBUFFERARB hPbuffer) +{ + struct stw_framebuffer *fb; + HDC hDC; + + fb = (struct stw_framebuffer *)hPbuffer; + + hDC = GetDC(fb->hWnd); + SetPixelFormat(hDC, fb->iPixelFormat, &fb->pfi->pfd); + + return hDC; +} + + +int WINAPI +wglReleasePbufferDCARB(HPBUFFERARB hPbuffer, + HDC hDC) +{ + struct stw_framebuffer *fb; + + fb = (struct stw_framebuffer *)hPbuffer; + + return ReleaseDC(fb->hWnd, hDC); +} + + +BOOL WINAPI +wglDestroyPbufferARB(HPBUFFERARB hPbuffer) +{ + struct stw_framebuffer *fb; + + fb = (struct stw_framebuffer *)hPbuffer; + + /* This will destroy all our data */ + return DestroyWindow(fb->hWnd); +} + + +BOOL WINAPI +wglQueryPbufferARB(HPBUFFERARB hPbuffer, + int iAttribute, + int *piValue) +{ + struct stw_framebuffer *fb; + + fb = (struct stw_framebuffer *)hPbuffer; + + switch (iAttribute) { + case WGL_PBUFFER_WIDTH_ARB: + *piValue = fb->width; + return TRUE; + case WGL_PBUFFER_HEIGHT_ARB: + *piValue = fb->width; + return TRUE; + case WGL_PBUFFER_LOST_ARB: + /* We assume that no content is ever lost due to display mode change */ + *piValue = FALSE; + return TRUE; + default: + SetLastError(ERROR_INVALID_DATA); + return FALSE; + } +} diff --git a/src/gallium/state_trackers/wgl/stw_ext_pixelformat.c b/src/gallium/state_trackers/wgl/stw_ext_pixelformat.c index ab56800e28d..d0a95863bb4 100644 --- a/src/gallium/state_trackers/wgl/stw_ext_pixelformat.c +++ b/src/gallium/state_trackers/wgl/stw_ext_pixelformat.c @@ -43,6 +43,7 @@ #include "pipe/p_compiler.h" #include "util/u_memory.h" +#include "stw_device.h" #include "stw_pixelformat.h" @@ -234,6 +235,23 @@ stw_query_attrib( *pvalue = pfi->stvis.samples; break; + + /* WGL_ARB_pbuffer */ + + case WGL_MAX_PBUFFER_WIDTH_ARB: + case WGL_MAX_PBUFFER_HEIGHT_ARB: + *pvalue = stw_dev->max_2d_length; + break; + + case WGL_MAX_PBUFFER_PIXELS_ARB: + *pvalue = stw_dev->max_2d_length * stw_dev->max_2d_length; + break; + + case WGL_DRAW_TO_PBUFFER_ARB: + *pvalue = 1; + break; + + default: return FALSE; } diff --git a/src/gallium/state_trackers/wgl/stw_framebuffer.c b/src/gallium/state_trackers/wgl/stw_framebuffer.c index 259b22f22c7..733222aa21c 100644 --- a/src/gallium/state_trackers/wgl/stw_framebuffer.c +++ b/src/gallium/state_trackers/wgl/stw_framebuffer.c @@ -469,7 +469,7 @@ DrvPresentBuffers(HDC hdc, PGLPRESENTBUFFERSDATA data) { struct stw_framebuffer *fb; struct pipe_screen *screen; - struct pipe_surface *surface; + struct pipe_resource *res; if (!stw_dev) return FALSE; @@ -480,7 +480,7 @@ DrvPresentBuffers(HDC hdc, PGLPRESENTBUFFERSDATA data) screen = stw_dev->screen; - surface = (struct pipe_surface *)data->pPrivateData; + res = (struct pipe_resource *)data->pPrivateData; if(data->hSharedSurface != fb->hSharedSurface) { if(fb->shared_surface) { @@ -498,13 +498,13 @@ DrvPresentBuffers(HDC hdc, PGLPRESENTBUFFERSDATA data) if(fb->shared_surface) { stw_dev->stw_winsys->compose(screen, - surface, + res, fb->shared_surface, &fb->client_rect, data->PresentHistoryToken); } else { - stw_dev->stw_winsys->present( screen, surface, hdc ); + stw_dev->stw_winsys->present( screen, res, hdc ); } stw_framebuffer_update(fb); @@ -524,7 +524,7 @@ DrvPresentBuffers(HDC hdc, PGLPRESENTBUFFERSDATA data) BOOL stw_framebuffer_present_locked(HDC hdc, struct stw_framebuffer *fb, - struct pipe_surface *surface) + struct pipe_resource *res) { if(stw_dev->callbacks.wglCbPresentBuffers && stw_dev->stw_winsys->compose) { @@ -535,7 +535,7 @@ stw_framebuffer_present_locked(HDC hdc, data.magic2 = 0; data.AdapterLuid = stw_dev->AdapterLuid; data.rect = fb->client_rect; - data.pPrivateData = (void *)surface; + data.pPrivateData = (void *)res; stw_notify_current_locked(fb); stw_framebuffer_release(fb); @@ -545,7 +545,7 @@ stw_framebuffer_present_locked(HDC hdc, else { struct pipe_screen *screen = stw_dev->screen; - stw_dev->stw_winsys->present( screen, surface, hdc ); + stw_dev->stw_winsys->present( screen, res, hdc ); stw_framebuffer_update(fb); stw_notify_current_locked(fb); diff --git a/src/gallium/state_trackers/wgl/stw_framebuffer.h b/src/gallium/state_trackers/wgl/stw_framebuffer.h index 89d12300e67..6604e545cbc 100644 --- a/src/gallium/state_trackers/wgl/stw_framebuffer.h +++ b/src/gallium/state_trackers/wgl/stw_framebuffer.h @@ -32,7 +32,7 @@ #include "os/os_thread.h" -struct pipe_surface; +struct pipe_resource; struct st_framebuffer_iface; struct stw_pixelformat_info; @@ -143,7 +143,7 @@ stw_framebuffer_from_hdc( BOOL stw_framebuffer_present_locked(HDC hdc, struct stw_framebuffer *fb, - struct pipe_surface *surface); + struct pipe_resource *res); void stw_framebuffer_update( diff --git a/src/gallium/state_trackers/wgl/stw_getprocaddress.c b/src/gallium/state_trackers/wgl/stw_getprocaddress.c index d43a55fa9e6..b0aef943d30 100644 --- a/src/gallium/state_trackers/wgl/stw_getprocaddress.c +++ b/src/gallium/state_trackers/wgl/stw_getprocaddress.c @@ -50,6 +50,13 @@ static const struct stw_extension_entry stw_extension_entries[] = { /* WGL_ARB_extensions_string */ STW_EXTENSION_ENTRY( wglGetExtensionsStringARB ), + /* WGL_ARB_pbuffer */ + STW_EXTENSION_ENTRY( wglCreatePbufferARB ), + STW_EXTENSION_ENTRY( wglGetPbufferDCARB ), + STW_EXTENSION_ENTRY( wglReleasePbufferDCARB ), + STW_EXTENSION_ENTRY( wglDestroyPbufferARB ), + STW_EXTENSION_ENTRY( wglQueryPbufferARB ), + /* WGL_ARB_pixel_format */ STW_EXTENSION_ENTRY( wglChoosePixelFormatARB ), STW_EXTENSION_ENTRY( wglGetPixelFormatAttribfvARB ), diff --git a/src/gallium/state_trackers/wgl/stw_pixelformat.c b/src/gallium/state_trackers/wgl/stw_pixelformat.c index 18ac4823696..5ede1d43220 100644 --- a/src/gallium/state_trackers/wgl/stw_pixelformat.c +++ b/src/gallium/state_trackers/wgl/stw_pixelformat.c @@ -185,7 +185,7 @@ stw_pixelformat_add( */ pfi->stvis.buffer_mask = ST_ATTACHMENT_FRONT_LEFT_MASK; if (doublebuffer) - pfi->stvis.buffer_mask = ST_ATTACHMENT_BACK_LEFT_MASK; + pfi->stvis.buffer_mask |= ST_ATTACHMENT_BACK_LEFT_MASK; pfi->stvis.color_format = color->format; pfi->stvis.depth_stencil_format = depth->format; diff --git a/src/gallium/state_trackers/wgl/stw_st.c b/src/gallium/state_trackers/wgl/stw_st.c index bcdd82e4f66..b58d91673b7 100644 --- a/src/gallium/state_trackers/wgl/stw_st.c +++ b/src/gallium/state_trackers/wgl/stw_st.c @@ -43,8 +43,6 @@ struct stw_st_framebuffer { struct pipe_resource *textures[ST_ATTACHMENT_COUNT]; unsigned texture_width, texture_height; unsigned texture_mask; - - struct pipe_surface *front_surface, *back_surface; }; static INLINE struct stw_st_framebuffer * @@ -65,10 +63,6 @@ stw_st_framebuffer_validate_locked(struct st_framebuffer_iface *stfb, struct pipe_resource templ; unsigned i; - /* remove outdated surface */ - pipe_surface_reference(&stwfb->front_surface, NULL); - pipe_surface_reference(&stwfb->back_surface, NULL); - /* remove outdated textures */ if (stwfb->texture_width != width || stwfb->texture_height != height) { for (i = 0; i < ST_ATTACHMENT_COUNT; i++) @@ -80,6 +74,7 @@ stw_st_framebuffer_validate_locked(struct st_framebuffer_iface *stfb, templ.width0 = width; templ.height0 = height; templ.depth0 = 1; + templ.array_size = 1; templ.last_level = 0; for (i = 0; i < ST_ATTACHMENT_COUNT; i++) { @@ -155,45 +150,6 @@ stw_st_framebuffer_validate(struct st_framebuffer_iface *stfb, return TRUE; } -static struct pipe_surface * -get_present_surface_locked(struct st_framebuffer_iface *stfb, - enum st_attachment_type statt) -{ - struct stw_st_framebuffer *stwfb = stw_st_framebuffer(stfb); - struct pipe_resource *ptex; - struct pipe_surface *psurf, **cache; - - ptex = stwfb->textures[statt]; - if (!ptex) - return NULL; - - psurf = NULL; - - switch (statt) { - case ST_ATTACHMENT_FRONT_LEFT: - cache = &stwfb->front_surface; - break; - case ST_ATTACHMENT_BACK_LEFT: - cache = &stwfb->back_surface; - break; - default: - cache = &psurf; - break; - } - - if (!*cache) { - *cache = stw_dev->screen->get_tex_surface(stw_dev->screen, - ptex, 0, 0, 0, - PIPE_BIND_DISPLAY_TARGET | - PIPE_BIND_RENDER_TARGET); - } - - if (psurf != *cache) - pipe_surface_reference(&psurf, *cache); - - return psurf; -} - /** * Present an attachment of the framebuffer. */ @@ -202,12 +158,11 @@ stw_st_framebuffer_present_locked(struct st_framebuffer_iface *stfb, enum st_attachment_type statt) { struct stw_st_framebuffer *stwfb = stw_st_framebuffer(stfb); - struct pipe_surface *psurf; - - psurf = get_present_surface_locked(&stwfb->base, statt); - if (psurf) { - stw_framebuffer_present_locked(stwfb->fb->hDC, stwfb->fb, psurf); - pipe_surface_reference(&psurf, NULL); + struct pipe_resource *resource; + + resource = stwfb->textures[statt]; + if (resource) { + stw_framebuffer_present_locked(stwfb->fb->hDC, stwfb->fb, resource); } return TRUE; @@ -255,9 +210,6 @@ stw_st_destroy_framebuffer_locked(struct st_framebuffer_iface *stfb) struct stw_st_framebuffer *stwfb = stw_st_framebuffer(stfb); int i; - pipe_surface_reference(&stwfb->front_surface, NULL); - pipe_surface_reference(&stwfb->back_surface, NULL); - for (i = 0; i < ST_ATTACHMENT_COUNT; i++) pipe_resource_reference(&stwfb->textures[i], NULL); @@ -273,7 +225,6 @@ stw_st_swap_framebuffer_locked(struct st_framebuffer_iface *stfb) struct stw_st_framebuffer *stwfb = stw_st_framebuffer(stfb); unsigned front = ST_ATTACHMENT_FRONT_LEFT, back = ST_ATTACHMENT_BACK_LEFT; struct pipe_resource *ptex; - struct pipe_surface *psurf; unsigned mask; /* swap the textures */ @@ -281,11 +232,6 @@ stw_st_swap_framebuffer_locked(struct st_framebuffer_iface *stfb) stwfb->textures[front] = stwfb->textures[back]; stwfb->textures[back] = ptex; - /* swap the surfaces */ - psurf = stwfb->front_surface; - stwfb->front_surface = stwfb->back_surface; - stwfb->back_surface = psurf; - /* convert to mask */ front = 1 << front; back = 1 << back; diff --git a/src/gallium/state_trackers/wgl/stw_winsys.h b/src/gallium/state_trackers/wgl/stw_winsys.h index 270fad56a19..397065fc159 100644 --- a/src/gallium/state_trackers/wgl/stw_winsys.h +++ b/src/gallium/state_trackers/wgl/stw_winsys.h @@ -34,7 +34,7 @@ struct pipe_screen; struct pipe_context; -struct pipe_surface; +struct pipe_resource; struct stw_shared_surface; @@ -43,12 +43,13 @@ struct stw_winsys struct pipe_screen * (*create_screen)( void ); + /* XXX is it actually possible to have non-zero level/layer ??? */ /** * Present the color buffer to the window associated with the device context. */ void (*present)( struct pipe_screen *screen, - struct pipe_surface *surf, + struct pipe_resource *res, HDC hDC ); /** @@ -85,7 +86,7 @@ struct stw_winsys */ void (*compose)( struct pipe_screen *screen, - struct pipe_surface *src, + struct pipe_resource *res, struct stw_shared_surface *dest, LPCRECT pRect, ULONGLONG PresentHistoryToken ); diff --git a/src/gallium/state_trackers/xorg/xorg_composite.c b/src/gallium/state_trackers/xorg/xorg_composite.c index 4ff48026e50..d4dc84a122b 100644 --- a/src/gallium/state_trackers/xorg/xorg_composite.c +++ b/src/gallium/state_trackers/xorg/xorg_composite.c @@ -473,7 +473,7 @@ boolean xorg_composite_bind_state(struct exa_context *exa, struct exa_pixmap_priv *pMask, struct exa_pixmap_priv *pDst) { - struct pipe_surface *dst_surf = xorg_gpu_surface(exa->scrn, pDst); + struct pipe_surface *dst_surf = xorg_gpu_surface(exa->pipe, pDst); renderer_bind_destination(exa->renderer, dst_surf, pDst->width, @@ -529,7 +529,7 @@ boolean xorg_solid_bind_state(struct exa_context *exa, struct exa_pixmap_priv *pixmap, Pixel fg) { - struct pipe_surface *dst_surf = xorg_gpu_surface(exa->scrn, pixmap); + struct pipe_surface *dst_surf = xorg_gpu_surface(exa->pipe, pixmap); unsigned vs_traits, fs_traits; struct xorg_shader shader; diff --git a/src/gallium/state_trackers/xorg/xorg_crtc.c b/src/gallium/state_trackers/xorg/xorg_crtc.c index 80af82d97b2..71f7b8c21d0 100644 --- a/src/gallium/state_trackers/xorg/xorg_crtc.c +++ b/src/gallium/state_trackers/xorg/xorg_crtc.c @@ -133,6 +133,16 @@ crtc_set_mode_major(xf86CrtcPtr crtc, DisplayModePtr mode, if (ret) return FALSE; + /* Only set gamma when needed, to avoid unneeded delays. */ +#if defined(XF86_CRTC_VERSION) && XF86_CRTC_VERSION >= 3 + if (!crtc->active) +#endif + crtc->funcs->gamma_set(crtc, crtc->gamma_red, crtc->gamma_green, + crtc->gamma_blue, crtc->gamma_size); + +#if defined(XF86_CRTC_VERSION) && XF86_CRTC_VERSION >= 3 + crtc->active = TRUE; +#endif crtc->x = x; crtc->y = y; crtc->mode = *mode; @@ -145,7 +155,10 @@ static void crtc_gamma_set(xf86CrtcPtr crtc, CARD16 * red, CARD16 * green, CARD16 * blue, int size) { - /* XXX: hockup */ + modesettingPtr ms = modesettingPTR(crtc->scrn); + struct crtc_private *crtcp = crtc->driver_private; + + drmModeCrtcSetGamma(ms->fd, crtcp->drm_crtc->crtc_id, size, red, green, blue); } #if 0 /* Implement and enable to enable rotation and reflection. */ @@ -210,6 +223,7 @@ crtc_load_cursor_argb_ga3d(xf86CrtcPtr crtc, CARD32 * image) templat.target = PIPE_TEXTURE_2D; templat.last_level = 0; templat.depth0 = 1; + templat.array_size = 1; templat.format = PIPE_FORMAT_B8G8R8A8_UNORM; templat.width0 = 64; templat.height0 = 64; @@ -225,9 +239,9 @@ crtc_load_cursor_argb_ga3d(xf86CrtcPtr crtc, CARD32 * image) } transfer = pipe_get_transfer(ms->ctx, crtcp->cursor_tex, - 0, 0, 0, - PIPE_TRANSFER_WRITE, - 0, 0, 64, 64); + 0, 0, + PIPE_TRANSFER_WRITE, + 0, 0, 64, 64); ptr = ms->ctx->transfer_map(ms->ctx, transfer); util_copy_rect(ptr, crtcp->cursor_tex->format, transfer->stride, 0, 0, diff --git a/src/gallium/state_trackers/xorg/xorg_dri2.c b/src/gallium/state_trackers/xorg/xorg_dri2.c index b723a8e9cb0..17c34b7eac8 100644 --- a/src/gallium/state_trackers/xorg/xorg_dri2.c +++ b/src/gallium/state_trackers/xorg/xorg_dri2.c @@ -129,6 +129,7 @@ dri2_do_create_buffer(DrawablePtr pDraw, DRI2BufferPtr buffer, unsigned int form template.width0 = pDraw->width; template.height0 = pDraw->height; template.depth0 = 1; + template.array_size = 1; template.last_level = 0; template.bind = PIPE_BIND_DEPTH_STENCIL | PIPE_BIND_SHARED; diff --git a/src/gallium/state_trackers/xorg/xorg_driver.c b/src/gallium/state_trackers/xorg/xorg_driver.c index 1ee79ae177e..33bcacdcc2e 100644 --- a/src/gallium/state_trackers/xorg/xorg_driver.c +++ b/src/gallium/state_trackers/xorg/xorg_driver.c @@ -44,6 +44,7 @@ #include "xf86Crtc.h" #include "miscstruct.h" #include "dixstruct.h" +#include "xf86cmap.h" #include "xf86xv.h" #include "xorgVersion.h" #ifndef XSERVER_LIBPCIACCESS @@ -414,6 +415,7 @@ drv_pre_init(ScrnInfoPtr pScrn, int flags) return FALSE; switch (pScrn->depth) { + case 8: case 15: case 16: case 24: @@ -538,44 +540,37 @@ drv_pre_init(ScrnInfoPtr pScrn, int flags) return TRUE; } -static void drv_block_handler(int i, pointer blockData, pointer pTimeout, - pointer pReadmask) +void xorg_flush(ScreenPtr pScreen) { - ScreenPtr pScreen = screenInfo.screens[i]; modesettingPtr ms = modesettingPTR(xf86Screens[pScreen->myNum]); - pScreen->BlockHandler = ms->blockHandler; - pScreen->BlockHandler(i, blockData, pTimeout, pReadmask); - pScreen->BlockHandler = drv_block_handler; - if (ms->ctx) { - int j; + int j; - ms->ctx->flush(ms->ctx, PIPE_FLUSH_RENDER_CACHE, - ms->dirtyThrottling ? - &ms->fence[XORG_NR_FENCES-1] : - NULL); + ms->ctx->flush(ms->ctx, PIPE_FLUSH_RENDER_CACHE, + ms->dirtyThrottling ? + &ms->fence[XORG_NR_FENCES-1] : + NULL); - if (ms->dirtyThrottling) { - if (ms->fence[0]) - ms->ctx->screen->fence_finish(ms->ctx->screen, - ms->fence[0], 0); + if (ms->dirtyThrottling) { + if (ms->fence[0]) + ms->ctx->screen->fence_finish(ms->ctx->screen, + ms->fence[0], 0); - /* The amount of rendering generated by a block handler can be - * quite small. Let us get a fair way ahead of hardware before - * throttling. - */ - for (j = 0; j < XORG_NR_FENCES - 1; j++) - ms->screen->fence_reference(ms->screen, - &ms->fence[j], - ms->fence[j+1]); - - ms->screen->fence_reference(ms->screen, - &ms->fence[XORG_NR_FENCES-1], - NULL); - } + /* The amount of rendering generated by a block handler can be + * quite small. Let us get a fair way ahead of hardware before + * throttling. + */ + for (j = 0; j < XORG_NR_FENCES - 1; j++) + ms->screen->fence_reference(ms->screen, + &ms->fence[j], + ms->fence[j+1]); + + ms->screen->fence_reference(ms->screen, + &ms->fence[XORG_NR_FENCES-1], + NULL); + } } - #ifdef DRM_MODE_FEATURE_DIRTYFB { @@ -608,6 +603,19 @@ static void drv_block_handler(int i, pointer blockData, pointer pTimeout, #endif } +static void drv_block_handler(int i, pointer blockData, pointer pTimeout, + pointer pReadmask) +{ + ScreenPtr pScreen = screenInfo.screens[i]; + modesettingPtr ms = modesettingPTR(xf86Screens[pScreen->myNum]); + + pScreen->BlockHandler = ms->blockHandler; + pScreen->BlockHandler(i, blockData, pTimeout, pReadmask); + pScreen->BlockHandler = drv_block_handler; + + xorg_flush(pScreen); +} + static Bool drv_create_screen_resources(ScreenPtr pScreen) { @@ -671,6 +679,65 @@ drv_set_master(ScrnInfoPtr pScrn) } +static void drv_load_palette(ScrnInfoPtr pScrn, int numColors, + int *indices, LOCO *colors, VisualPtr pVisual) +{ + xf86CrtcConfigPtr xf86_config = XF86_CRTC_CONFIG_PTR(pScrn); + modesettingPtr ms = modesettingPTR(pScrn); + int index, j, i; + int c; + + switch(pScrn->depth) { + case 15: + for (i = 0; i < numColors; i++) { + index = indices[i]; + for (j = 0; j < 8; j++) { + ms->lut_r[index * 8 + j] = colors[index].red << 8; + ms->lut_g[index * 8 + j] = colors[index].green << 8; + ms->lut_b[index * 8 + j] = colors[index].blue << 8; + } + } + break; + case 16: + for (i = 0; i < numColors; i++) { + index = indices[i]; + + if (index < 32) { + for (j = 0; j < 8; j++) { + ms->lut_r[index * 8 + j] = colors[index].red << 8; + ms->lut_b[index * 8 + j] = colors[index].blue << 8; + } + } + + for (j = 0; j < 4; j++) { + ms->lut_g[index * 4 + j] = colors[index].green << 8; + } + } + break; + default: + for (i = 0; i < numColors; i++) { + index = indices[i]; + ms->lut_r[index] = colors[index].red << 8; + ms->lut_g[index] = colors[index].green << 8; + ms->lut_b[index] = colors[index].blue << 8; + } + break; + } + + for (c = 0; c < xf86_config->num_crtc; c++) { + xf86CrtcPtr crtc = xf86_config->crtc[c]; + + /* Make the change through RandR */ +#ifdef RANDR_12_INTERFACE + if (crtc->randr_crtc) + RRCrtcGammaSet(crtc->randr_crtc, ms->lut_r, ms->lut_g, ms->lut_b); + else +#endif + crtc->funcs->gamma_set(crtc, ms->lut_r, ms->lut_g, ms->lut_b, 256); + } +} + + static Bool drv_screen_init(int scrnIndex, ScreenPtr pScreen, int argc, char **argv) { @@ -810,6 +877,10 @@ drv_screen_init(int scrnIndex, ScreenPtr pScreen, int argc, char **argv) if (!miCreateDefColormap(pScreen)) return FALSE; + if (!xf86HandleColormaps(pScreen, 256, 8, drv_load_palette, NULL, + CMAP_PALETTED_TRUECOLOR | + CMAP_RELOAD_ON_MODE_SWITCH)) + return FALSE; xf86DPMSInit(pScreen, xf86DPMSSet, 0); diff --git a/src/gallium/state_trackers/xorg/xorg_exa.c b/src/gallium/state_trackers/xorg/xorg_exa.c index 4b1c02bad42..718a3453939 100644 --- a/src/gallium/state_trackers/xorg/xorg_exa.c +++ b/src/gallium/state_trackers/xorg/xorg_exa.c @@ -46,6 +46,8 @@ #include "util/u_math.h" #include "util/u_debug.h" #include "util/u_format.h" +#include "util/u_box.h" +#include "util/u_surface.h" #define DEBUG_PRINT 0 #define ROUND_UP_TEXTURES 1 @@ -188,8 +190,8 @@ ExaDownloadFromScreen(PixmapPtr pPix, int x, int y, int w, int h, char *dst, if (!priv || !priv->tex) return FALSE; - transfer = pipe_get_transfer(exa->pipe, priv->tex, 0, 0, 0, - PIPE_TRANSFER_READ, x, y, w, h); + transfer = pipe_get_transfer(exa->pipe, priv->tex, 0, 0, + PIPE_TRANSFER_READ, x, y, w, h); if (!transfer) return FALSE; @@ -222,8 +224,8 @@ ExaUploadToScreen(PixmapPtr pPix, int x, int y, int w, int h, char *src, if (!priv || !priv->tex) return FALSE; - transfer = pipe_get_transfer(exa->pipe, priv->tex, 0, 0, 0, - PIPE_TRANSFER_WRITE, x, y, w, h); + transfer = pipe_get_transfer(exa->pipe, priv->tex, 0, 0, + PIPE_TRANSFER_WRITE, x, y, w, h); if (!transfer) return FALSE; @@ -265,7 +267,7 @@ ExaPrepareAccess(PixmapPtr pPix, int index) assert(pPix->drawable.height <= priv->tex->height0); priv->map_transfer = - pipe_get_transfer(exa->pipe, priv->tex, 0, 0, 0, + pipe_get_transfer(exa->pipe, priv->tex, 0, 0, #ifdef EXA_MIXED_PIXMAPS PIPE_TRANSFER_MAP_DIRECTLY | #endif @@ -449,6 +451,7 @@ ExaPrepareCopy(PixmapPtr pSrcPixmap, PixmapPtr pDstPixmap, int xdir, exa->copy.use_surface_copy = TRUE; } else { + struct pipe_surface surf_tmpl; exa->copy.use_surface_copy = FALSE; if (exa->copy.dst == exa->copy.src) @@ -458,11 +461,13 @@ ExaPrepareCopy(PixmapPtr pSrcPixmap, PixmapPtr pDstPixmap, int xdir, pipe_resource_reference(&exa->copy.src_texture, exa->copy.src->tex); + memset(&surf_tmpl, 0, sizeof(surf_tmpl)); + u_surface_default_template(&surf_tmpl, exa->copy.dst->tex, + PIPE_BIND_RENDER_TARGET); exa->copy.dst_surface = - exa->scrn->get_tex_surface(exa->scrn, - exa->copy.dst->tex, - 0, 0, 0, - PIPE_BIND_RENDER_TARGET); + exa->pipe->create_surface(exa->pipe, + exa->copy.dst->tex, + &surf_tmpl); renderer_copy_prepare(exa->renderer, @@ -492,19 +497,14 @@ ExaCopy(PixmapPtr pDstPixmap, int srcX, int srcY, int dstX, int dstY, (void) priv; if (exa->copy.use_surface_copy) { - struct pipe_subresource subdst, subsrc; - subdst.face = 0; - subdst.level = 0; - subsrc.face = 0; - subsrc.level = 0; + struct pipe_box src_box; + u_box_2d(srcX, srcY, width, height, &src_box); exa->pipe->resource_copy_region( exa->pipe, exa->copy.dst->tex, - subdst, + 0, dstX, dstY, 0, exa->copy.src->tex, - subsrc, - srcX, srcY, 0, - width, height ); + 0, &src_box); } else { renderer_copy_pixmap(exa->renderer, @@ -874,24 +874,21 @@ ExaModifyPixmapHeader(PixmapPtr pPixmap, int width, int height, } template.depth0 = 1; + template.array_size = 1; template.last_level = 0; template.bind = PIPE_BIND_RENDER_TARGET | priv->flags; priv->tex_flags = priv->flags; texture = exa->scrn->resource_create(exa->scrn, &template); if (priv->tex) { - struct pipe_subresource subdst, subsrc; - - subdst.face = 0; - subdst.level = 0; - subsrc.face = 0; - subsrc.level = 0; + struct pipe_box src_box; + u_box_origin_2d(min(width, texture->width0), + min(height, texture->height0), + &src_box); exa->pipe->resource_copy_region(exa->pipe, texture, - subdst, 0, 0, 0, + 0, 0, 0, 0, priv->tex, - subsrc, 0, 0, 0, - min(width, texture->width0), - min(height, texture->height0)); + 0, &src_box); } pipe_resource_reference(&priv->tex, texture); @@ -947,6 +944,7 @@ xorg_exa_create_root_texture(ScrnInfoPtr pScrn, template.width0 = width; template.height0 = height; template.depth0 = 1; + template.array_size = 1; template.last_level = 0; template.bind |= PIPE_BIND_RENDER_TARGET; template.bind |= PIPE_BIND_SCANOUT; @@ -1063,10 +1061,14 @@ out_err: } struct pipe_surface * -xorg_gpu_surface(struct pipe_screen *scrn, struct exa_pixmap_priv *priv) +xorg_gpu_surface(struct pipe_context *pipe, struct exa_pixmap_priv *priv) { - return scrn->get_tex_surface(scrn, priv->tex, 0, 0, 0, - PIPE_BIND_RENDER_TARGET); + struct pipe_surface surf_tmpl; + memset(&surf_tmpl, 0, sizeof(surf_tmpl)); + u_surface_default_template(&surf_tmpl, priv->tex, + PIPE_BIND_RENDER_TARGET); + + return pipe->create_surface(pipe, priv->tex, &surf_tmpl); } diff --git a/src/gallium/state_trackers/xorg/xorg_exa.h b/src/gallium/state_trackers/xorg/xorg_exa.h index 86a1afc06e6..1f78f60be74 100644 --- a/src/gallium/state_trackers/xorg/xorg_exa.h +++ b/src/gallium/state_trackers/xorg/xorg_exa.h @@ -72,7 +72,7 @@ do { \ } while(0) struct pipe_surface * -xorg_gpu_surface(struct pipe_screen *scrn, struct exa_pixmap_priv *priv); +xorg_gpu_surface(struct pipe_context *pipe, struct exa_pixmap_priv *priv); void xorg_exa_flush(struct exa_context *exa, uint pipeFlushFlags, struct pipe_fence_handle **fence); diff --git a/src/gallium/state_trackers/xorg/xorg_renderer.c b/src/gallium/state_trackers/xorg/xorg_renderer.c index 92f1cc50653..a3d7c5a70e2 100644 --- a/src/gallium/state_trackers/xorg/xorg_renderer.c +++ b/src/gallium/state_trackers/xorg/xorg_renderer.c @@ -10,6 +10,7 @@ #include "util/u_sampler.h" #include "util/u_inlines.h" +#include "util/u_box.h" #include <math.h> @@ -535,6 +536,7 @@ renderer_clone_texture(struct xorg_renderer *r, templ.width0 = src->width0; templ.height0 = src->height0; templ.depth0 = 1; + templ.array_size = 1; templ.bind = PIPE_BIND_SAMPLER_VIEW; pt = screen->resource_create(screen, &templ); @@ -546,19 +548,15 @@ renderer_clone_texture(struct xorg_renderer *r, { /* copy source framebuffer surface into texture */ - struct pipe_subresource subsrc, subdst; - subsrc.face = 0; - subsrc.level = 0; - subdst.face = 0; - subdst.level = 0; + struct pipe_box src_box; + u_box_origin_2d(src->width0, src->height0, &src_box); + pipe->resource_copy_region(pipe, pt, /* dest */ - subdst, + 0, /* dest_level */ 0, 0, 0, /* destx/y/z */ src, - subsrc, - 0, 0, 0, - src->width0, src->height0); + 0, &src_box); } return pt; diff --git a/src/gallium/state_trackers/xorg/xorg_tracker.h b/src/gallium/state_trackers/xorg/xorg_tracker.h index a3fb5e5dad0..664e8c75730 100644 --- a/src/gallium/state_trackers/xorg/xorg_tracker.h +++ b/src/gallium/state_trackers/xorg/xorg_tracker.h @@ -129,6 +129,7 @@ typedef struct _modesettingRec /* kms */ struct kms_driver *kms; struct kms_bo *root_bo; + uint16_t lut_r[256], lut_g[256], lut_b[256]; /* gallium */ struct pipe_screen *screen; @@ -156,6 +157,7 @@ CustomizerPtr xorg_customizer(ScrnInfoPtr pScrn); Bool xorg_has_gallium(ScrnInfoPtr pScrn); +void xorg_flush(ScreenPtr pScreen); /*********************************************************************** * xorg_exa.c */ diff --git a/src/gallium/state_trackers/xorg/xorg_xv.c b/src/gallium/state_trackers/xorg/xorg_xv.c index f64959f00e9..c72ba9ef8db 100644 --- a/src/gallium/state_trackers/xorg/xorg_xv.c +++ b/src/gallium/state_trackers/xorg/xorg_xv.c @@ -171,6 +171,7 @@ create_component_texture(struct pipe_context *pipe, templ.width0 = width; templ.height0 = height; templ.depth0 = 1; + templ.array_size = 1; templ.bind = PIPE_BIND_SAMPLER_VIEW; tex = screen->resource_create(screen, &templ); @@ -312,17 +313,17 @@ copy_packed_data(ScrnInfoPtr pScrn, int y_array_size = w * h; ytrans = pipe_get_transfer(pipe, dst[0], - 0, 0, 0, - PIPE_TRANSFER_WRITE, - left, top, w, h); + 0, 0, + PIPE_TRANSFER_WRITE, + left, top, w, h); utrans = pipe_get_transfer(pipe, dst[1], - 0, 0, 0, - PIPE_TRANSFER_WRITE, - left, top, w, h); + 0, 0, + PIPE_TRANSFER_WRITE, + left, top, w, h); vtrans = pipe_get_transfer(pipe, dst[2], - 0, 0, 0, - PIPE_TRANSFER_WRITE, - left, top, w, h); + 0, 0, + PIPE_TRANSFER_WRITE, + left, top, w, h); ymap = (char*)pipe->transfer_map(pipe, ytrans); umap = (char*)pipe->transfer_map(pipe, utrans); @@ -533,7 +534,7 @@ display_video(ScrnInfoPtr pScrn, struct xorg_xv_port_priv *pPriv, int id, if (!dst || !dst->tex) XORG_FALLBACK("Xv destination %s", !dst ? "!dst" : "!dst->tex"); - dst_surf = xorg_gpu_surface(pPriv->r->pipe->screen, dst); + dst_surf = xorg_gpu_surface(pPriv->r->pipe, dst); hdtv = ((src_w >= RES_720P_X) && (src_h >= RES_720P_Y)); #ifdef COMPOSITE diff --git a/src/gallium/targets/dri-noop/Makefile b/src/gallium/targets/dri-noop/Makefile deleted file mode 100644 index 21c5f4f9f2a..00000000000 --- a/src/gallium/targets/dri-noop/Makefile +++ /dev/null @@ -1,34 +0,0 @@ -TOP = ../../../.. -include $(TOP)/configs/current - -LIBNAME = noop_dri.so - -DRIVER_DEFINES = \ - -D__NOT_HAVE_DRM_H - -PIPE_DRIVERS = \ - $(TOP)/src/gallium/state_trackers/dri/sw/libdrisw.a \ - $(TOP)/src/gallium/winsys/sw/dri/libswdri.a \ - $(TOP)/src/gallium/drivers/softpipe/libsoftpipe.a \ - $(TOP)/src/gallium/drivers/trace/libtrace.a \ - $(TOP)/src/gallium/drivers/rbug/librbug.a \ - $(TOP)/src/gallium/drivers/noop/libnoop.a - -SWRAST_COMMON_GALLIUM_SOURCES = \ - $(TOP)/src/mesa/drivers/dri/common/utils.c \ - $(TOP)/src/mesa/drivers/dri/common/drisw_util.c \ - $(TOP)/src/mesa/drivers/dri/common/xmlconfig.c - -C_SOURCES = \ - swrast_drm_api.c \ - $(SWRAST_COMMON_GALLIUM_SOURCES) \ - $(DRIVER_SOURCES) - -ASM_SOURCES = - -include ../Makefile.dri - -INCLUDES += \ - -I$(TOP)/src/gallium/winsys/sw/dri - -symlinks: diff --git a/src/gallium/targets/dri-noop/SConscript b/src/gallium/targets/dri-noop/SConscript deleted file mode 100644 index 9c04ee66318..00000000000 --- a/src/gallium/targets/dri-noop/SConscript +++ /dev/null @@ -1,31 +0,0 @@ -Import('*') - -env = drienv.Clone() - -env.Append(CPPPATH = [ - '#/src/gallium/winsys/sw/dri', -]) - -env.Prepend(LIBS = [ - st_drisw, - ws_dri, - noop, - mesa, - glsl, - gallium, - COMMON_DRI_SW_OBJECTS -]) - -env.Prepend(LIBS = [noop]) - -swrastg_sources = [ - 'swrast_drm_api.c' -] - -module = env.LoadableModule( - target ='noop_dri.so', - source = swrastg_sources, - SHLIBPREFIX = '', -) - -env.Alias('dri-noop', module) diff --git a/src/gallium/targets/dri-nouveau/Makefile b/src/gallium/targets/dri-nouveau/Makefile index 2f64f312b84..eb1ee859a00 100644 --- a/src/gallium/targets/dri-nouveau/Makefile +++ b/src/gallium/targets/dri-nouveau/Makefile @@ -10,6 +10,7 @@ PIPE_DRIVERS = \ $(TOP)/src/gallium/drivers/rbug/librbug.a \ $(TOP)/src/gallium/drivers/nvfx/libnvfx.a \ $(TOP)/src/gallium/drivers/nv50/libnv50.a \ + $(TOP)/src/gallium/drivers/nvc0/libnvc0.a \ $(TOP)/src/gallium/drivers/nouveau/libnouveau.a C_SOURCES = \ diff --git a/src/gallium/targets/dri-r600/Makefile b/src/gallium/targets/dri-r600/Makefile index 661283de6a8..c8fae2d8585 100644 --- a/src/gallium/targets/dri-r600/Makefile +++ b/src/gallium/targets/dri-r600/Makefile @@ -9,7 +9,8 @@ PIPE_DRIVERS = \ $(TOP)/src/gallium/winsys/r600/drm/libr600winsys.a \ $(TOP)/src/gallium/drivers/softpipe/libsoftpipe.a \ $(TOP)/src/gallium/drivers/trace/libtrace.a \ - $(TOP)/src/gallium/drivers/rbug/librbug.a + $(TOP)/src/gallium/drivers/rbug/librbug.a \ + $(TOP)/src/gallium/drivers/noop/libnoop.a C_SOURCES = \ target.c \ @@ -17,7 +18,7 @@ C_SOURCES = \ $(DRIVER_SOURCES) DRIVER_DEFINES = \ - -DGALLIUM_RBUG -DGALLIUM_TRACE + -DGALLIUM_RBUG -DGALLIUM_TRACE -DGALLIUM_NOOP include ../Makefile.dri diff --git a/src/gallium/targets/dri-r600/target.c b/src/gallium/targets/dri-r600/target.c index 8753e2bab17..2fe345402de 100644 --- a/src/gallium/targets/dri-r600/target.c +++ b/src/gallium/targets/dri-r600/target.c @@ -1,5 +1,5 @@ #include "state_tracker/drm_driver.h" -#include "target-helpers/inline_debug_helper.h" +#include "target-helpers/inline_noop_helper.h" #include "r600/drm/r600_drm_public.h" #include "r600/r600_public.h" diff --git a/src/gallium/targets/dri-vmwgfx/SConscript b/src/gallium/targets/dri-vmwgfx/SConscript index 7888e4f2c8b..17dd0210512 100644 --- a/src/gallium/targets/dri-vmwgfx/SConscript +++ b/src/gallium/targets/dri-vmwgfx/SConscript @@ -2,14 +2,14 @@ Import('*') env = drienv.Clone() -env.Append(CPPDEFINES = ['GALLIUM_RBUG', 'GALLIUM_TRACE']) +if True: + env.Append(CPPDEFINES = ['GALLIUM_TRACE', 'GALLIUM_RBUG', 'GALLIUM_GALAHAD', 'GALLIUM_SOFTPIPE']) + env.Prepend(LIBS = [trace, rbug, galahad, softpipe, ws_wrapper]) env.Prepend(LIBS = [ st_dri, svgadrm, svga, - trace, - rbug, mesa, glsl, gallium, @@ -22,4 +22,4 @@ module = env.LoadableModule( SHLIBPREFIX = '', ) -env.Alias('dri-vmwgfx', module)
\ No newline at end of file +env.Alias('dri-vmwgfx', module) diff --git a/src/gallium/targets/dri-vmwgfx/target.c b/src/gallium/targets/dri-vmwgfx/target.c index 15089d6db26..1362851d6be 100644 --- a/src/gallium/targets/dri-vmwgfx/target.c +++ b/src/gallium/targets/dri-vmwgfx/target.c @@ -1,4 +1,5 @@ +#include "target-helpers/inline_wrapper_sw_helper.h" #include "target-helpers/inline_debug_helper.h" #include "state_tracker/drm_driver.h" #include "svga/drm/svga_drm_public.h" @@ -18,6 +19,8 @@ create_screen(int fd) if (!screen) return NULL; + screen = sw_screen_wrap(screen); + screen = debug_screen_wrap(screen); return screen; diff --git a/src/gallium/targets/egl-gdi/SConscript b/src/gallium/targets/egl-gdi/SConscript deleted file mode 100644 index d52eeb70fd5..00000000000 --- a/src/gallium/targets/egl-gdi/SConscript +++ /dev/null @@ -1,55 +0,0 @@ -####################################################################### -# SConscript for egl-gdi target - -Import('*') - -env = env.Clone() - -env.Append(CPPPATH = [ - '#/src/gallium/state_trackers/egl', - '#/src/gallium/state_trackers/vega', - '#/src/egl/main', - '#/src/mesa', -]) - -env.Append(CPPDEFINES = [ - 'FEATURE_VG=1', - 'GALLIUM_SOFTPIPE', - 'GALLIUM_RBUG', - 'GALLIUM_TRACE', -]) - -env.Append(LIBS = [ - 'gdi32', - 'user32', - 'kernel32', - 'ws2_32', -]) - -env.Prepend(LIBS = [ - st_egl_gdi, - ws_gdi, - identity, - trace, - rbug, - softpipe, - vgapi, - st_vega, - gallium, - egl, -]) - -if env['llvm']: - env.Append(CPPDEFINES = 'GALLIUM_LLVMPIPE') - env.Prepend(LIBS = [llvmpipe]) - -egl_gallium = env.SharedLibrary( - target ='egl_gallium', - source = 'egl-static.c', -) - -env['no_import_lib'] = 1 - -egl_gdi = env.InstallSharedLibrary(egl_gallium) - -env.Alias('egl-gdi', egl_gdi) diff --git a/src/gallium/targets/egl-static/SConscript b/src/gallium/targets/egl-static/SConscript new file mode 100644 index 00000000000..381ef4e862d --- /dev/null +++ b/src/gallium/targets/egl-static/SConscript @@ -0,0 +1,127 @@ +####################################################################### +# SConscript for egl-static target + +Import('*') + +env = env.Clone() + +env.Append(CPPPATH = [ + '#/include', + '#/src/egl/main', + '#/src/gallium/auxiliary', + '#/src/gallium/drivers', + '#/src/gallium/include', + '#/src/gallium/winsys', + '#/src/gallium/state_trackers/egl', + '#/src/gallium/state_trackers/vega', + '#/src/mesa', +]) + +env.Append(CPPDEFINES = [ + 'GALLIUM_SOFTPIPE', + 'GALLIUM_RBUG', + 'GALLIUM_TRACE', + 'GALLIUM_GALAHAD', + '_EGL_MAIN=_eglBuiltInDriverGALLIUM', +]) + +env.Prepend(LIBS = [ + softpipe, + rbug, + trace, + galahad, + gallium, + egl, + st_egl, +]) + +if env['llvm']: + env.Append(CPPDEFINES = ['GALLIUM_LLVMPIPE']) + env.Prepend(LIBS = [llvmpipe]) + +sources = [ + 'egl.c', + 'egl_pipe.c', + 'egl_st.c', +] + +if env['platform'] == 'windows': + sources.append('#src/egl/main/egl.def') + + env.Append(LIBS = [ + 'gdi32', + 'user32', + 'kernel32', + 'ws2_32', + ]) + + env.Prepend(LIBS = [ + ws_gdi, + ]) +else: + # OpenGL + env.Append(CPPDEFINES = ['FEATURE_GL=1']) + env.Prepend(LIBS = ['GL', 'talloc', glsl, mesa]) + +# OpenVG +if True: + env.Append(CPPDEFINES = ['FEATURE_VG=1']) + env.Prepend(LIBPATH = [openvg.dir]) + # manually add LIBPREFIX on windows + openvg_name = 'OpenVG' if env['platform'] != 'windows' else 'libOpenVG' + env.Prepend(LIBS = [openvg_name, st_vega]) + +if env['x11']: + env.Prepend(LIBS = [ + ws_xlib, + env['X11_LIBS'], + ]) + +if env['dri']: + env.ParseConfig('pkg-config --cflags --libs xfixes') + +# pipe drivers +if env['drm']: + env.ParseConfig('pkg-config --cflags --libs libdrm') + + if env['drm_intel']: + env.ParseConfig('pkg-config --cflags --libs libdrm_intel') + env.Append(CPPDEFINES = ['_EGL_PIPE_I915', '_EGL_PIPE_I965']) + env.Prepend(LIBS = [ + i915drm, + i915, + i965drm, + i965, + ws_wrapper, + ]) + + if env['drm_radeon']: + env.ParseConfig('pkg-config --cflags --libs libdrm_radeon') + env.Append(CPPDEFINES = ['_EGL_PIPE_R300', '_EGL_PIPE_R600']) + env.Prepend(LIBS = [ + radeonwinsys, + r300, + r600winsys, + r600, + ]) + + env.Append(CPPDEFINES = ['_EGL_PIPE_VMWGFX']) + env.Prepend(LIBS = [ + svgadrm, + svga, + ]) + +# libEGL.dll +env['LIBPREFIX'] = 'lib' +env['SHLIBPREFIX'] = 'lib' + +egl_gallium = env.SharedLibrary( + target ='EGL', + source = sources, +) + +env.Depends(egl_gallium, [openvg]) + +egl_gallium = env.InstallSharedLibrary(egl_gallium, version=(1, 4, 0)) + +env.Alias('egl-gallium', egl_gallium) diff --git a/src/gallium/targets/egl-gdi/egl-static.c b/src/gallium/targets/egl-static/egl.c index da6e5ce3396..e617ff50208 100644 --- a/src/gallium/targets/egl-gdi/egl-static.c +++ b/src/gallium/targets/egl-static/egl.c @@ -1,8 +1,8 @@ /* * Mesa 3-D graphics library - * Version: 7.9 + * Version: 7.10 * - * Copyright (C) 2010 LunarG Inc. + * Copyright (C) 2010-2011 LunarG Inc. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -27,41 +27,29 @@ */ #include "common/egl_g3d_loader.h" -#include "state_tracker/st_gl_api.h" -#include "vg_api.h" -#include "target-helpers/inline_sw_helper.h" -#include "target-helpers/inline_debug_helper.h" #include "egldriver.h" -static struct st_api *stapis[ST_API_COUNT]; +#include "egl_pipe.h" +#include "egl_st.h" + +static struct egl_g3d_loader egl_g3d_loader; + +static struct st_module { + boolean initialized; + struct st_api *stapi; +} st_modules[ST_API_COUNT]; static struct st_api * get_st_api(enum st_api_type api) { - struct st_api *stapi; + struct st_module *stmod = &st_modules[api]; - stapi = stapis[api]; - if (stapi) - return stapi; - - switch (api) { -#if FEATURE_GL || FEATURE_ES1 || FEATURE_ES2 - case ST_API_OPENGL: - stapi = st_gl_api_create(); - break; -#endif -#if FEATURE_VG - case ST_API_OPENVG: - stapi = (struct st_api *) vg_api_get(); - break; -#endif - default: - break; + if (!stmod->initialized) { + stmod->stapi = egl_st_create_api(api); + stmod->initialized = TRUE; } - stapis[api] = stapi; - - return stapi; + return stmod->stapi; } static struct st_api * @@ -73,71 +61,69 @@ guess_gl_api(enum st_profile_type profile) static struct pipe_screen * create_drm_screen(const char *name, int fd) { - return NULL; + return egl_pipe_create_drm_screen(name, fd); } static struct pipe_screen * create_sw_screen(struct sw_winsys *ws) { - struct pipe_screen *screen; - - screen = sw_screen_create(ws); - if (screen) - screen = debug_screen_wrap(screen); - - return screen; + return egl_pipe_create_swrast_screen(ws); } -static void -init_loader(struct egl_g3d_loader *loader) +static const struct egl_g3d_loader * +loader_init(void) { -#if FEATURE_GL - loader->profile_masks[ST_API_OPENGL] |= ST_PROFILE_DEFAULT_MASK; -#endif -#if FEATURE_ES1 - loader->profile_masks[ST_API_OPENGL] |= ST_PROFILE_OPENGL_ES1_MASK; -#endif -#if FEATURE_ES2 - loader->profile_masks[ST_API_OPENGL] |= ST_PROFILE_OPENGL_ES2_MASK; -#endif -#if FEATURE_VG - loader->profile_masks[ST_API_OPENVG] |= ST_PROFILE_DEFAULT_MASK; -#endif - - loader->get_st_api = get_st_api; - loader->guess_gl_api = guess_gl_api; - loader->create_drm_screen = create_drm_screen; - loader->create_sw_screen = create_sw_screen; + int i; + + for (i = 0; i < ST_API_COUNT; i++) + egl_g3d_loader.profile_masks[i] = egl_st_get_profile_mask(i); + + egl_g3d_loader.get_st_api = get_st_api; + egl_g3d_loader.guess_gl_api = guess_gl_api; + egl_g3d_loader.create_drm_screen = create_drm_screen; + egl_g3d_loader.create_sw_screen = create_sw_screen; + + return &egl_g3d_loader; } static void -egl_g3d_unload(_EGLDriver *drv) +loader_fini(void) { int i; - egl_g3d_destroy_driver(drv); - for (i = 0; i < ST_API_COUNT; i++) { - if (stapis[i]) { - stapis[i]->destroy(stapis[i]); - stapis[i] = NULL; + struct st_module *stmod = &st_modules[i]; + + if (stmod->stapi) { + stmod->stapi->destroy(stmod->stapi); + stmod->stapi = NULL; } + stmod->initialized = FALSE; } } -static struct egl_g3d_loader loader; +static void +egl_g3d_unload(_EGLDriver *drv) +{ + egl_g3d_destroy_driver(drv); + loader_fini(); +} _EGLDriver * -_eglMain(const char *args) +_EGL_MAIN(const char *args) { + const struct egl_g3d_loader *loader; _EGLDriver *drv; - init_loader(&loader); - drv = egl_g3d_create_driver(&loader); - if (drv) { - drv->Name = "Gallium"; - drv->Unload = egl_g3d_unload; + loader = loader_init(); + drv = egl_g3d_create_driver(loader); + if (!drv) { + loader_fini(); + return NULL; } + drv->Name = "Gallium"; + drv->Unload = egl_g3d_unload; + return drv; } diff --git a/src/gallium/targets/egl-static/egl_pipe.c b/src/gallium/targets/egl-static/egl_pipe.c new file mode 100644 index 00000000000..a33d419e0aa --- /dev/null +++ b/src/gallium/targets/egl-static/egl_pipe.c @@ -0,0 +1,215 @@ +/* + * Mesa 3-D graphics library + * Version: 7.10 + * + * Copyright (C) 2011 LunarG Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Authors: + * Chia-I Wu <[email protected]> + */ +#include "target-helpers/inline_debug_helper.h" +#include "target-helpers/inline_sw_helper.h" +#include "egl_pipe.h" + +/* for i915 */ +#include "i915/drm/i915_drm_public.h" +#include "i915/i915_public.h" +/* for i965 */ +#include "target-helpers/inline_wrapper_sw_helper.h" +#include "i965/drm/i965_drm_public.h" +#include "i965/brw_public.h" +/* for nouveau */ +#include "nouveau/drm/nouveau_drm_public.h" +/* for r300 */ +#include "radeon/drm/radeon_drm_public.h" +#include "r300/r300_public.h" +/* for r600 */ +#include "r600/drm/r600_drm_public.h" +#include "r600/r600_public.h" +/* for vmwgfx */ +#include "svga/drm/svga_drm_public.h" +#include "svga/svga_public.h" + +static struct pipe_screen * +pipe_i915_create_screen(int fd) +{ +#if _EGL_PIPE_I915 + struct i915_winsys *iws; + struct pipe_screen *screen; + + iws = i915_drm_winsys_create(fd); + if (!iws) + return NULL; + + screen = i915_screen_create(iws); + if (!screen) + return NULL; + + screen = debug_screen_wrap(screen); + + return screen; +#else + return NULL; +#endif +} + +static struct pipe_screen * +pipe_i965_create_screen(int fd) +{ +#if _EGL_PIPE_I965 + struct brw_winsys_screen *bws; + struct pipe_screen *screen; + + bws = i965_drm_winsys_screen_create(fd); + if (!bws) + return NULL; + + screen = brw_screen_create(bws); + if (!screen) + return NULL; + + screen = sw_screen_wrap(screen); + + screen = debug_screen_wrap(screen); + + return screen; +#else + return NULL; +#endif +} + +static struct pipe_screen * +pipe_nouveau_create_screen(int fd) +{ +#if _EGL_PIPE_NOUVEAU + struct pipe_screen *screen; + + screen = nouveau_drm_screen_create(fd); + if (!screen) + return NULL; + + screen = debug_screen_wrap(screen); + + return screen; +#else + return NULL; +#endif +} + +static struct pipe_screen * +pipe_r300_create_screen(int fd) +{ +#if _EGL_PIPE_R300 + struct r300_winsys_screen *sws; + struct pipe_screen *screen; + + sws = r300_drm_winsys_screen_create(fd); + if (!sws) + return NULL; + + screen = r300_screen_create(sws); + if (!screen) + return NULL; + + screen = debug_screen_wrap(screen); + + return screen; +#else + return NULL; +#endif +} + +static struct pipe_screen * +pipe_r600_create_screen(int fd) +{ +#if _EGL_PIPE_R600 + struct radeon *rw; + struct pipe_screen *screen; + + rw = r600_drm_winsys_create(fd); + if (!rw) + return NULL; + + screen = r600_screen_create(rw); + if (!screen) + return NULL; + + screen = debug_screen_wrap(screen); + + return screen; +#else + return NULL; +#endif +} + +static struct pipe_screen * +pipe_vmwgfx_create_screen(int fd) +{ +#if _EGL_PIPE_VMWGFX + struct svga_winsys_screen *sws; + struct pipe_screen *screen; + + sws = svga_drm_winsys_screen_create(fd); + if (!sws) + return NULL; + + screen = svga_screen_create(sws); + if (!screen) + return NULL; + + screen = debug_screen_wrap(screen); + + return screen; +#else + return NULL; +#endif +} + +struct pipe_screen * +egl_pipe_create_drm_screen(const char *name, int fd) +{ + if (strcmp(name, "i915") == 0) + return pipe_i915_create_screen(fd); + else if (strcmp(name, "i965") == 0) + return pipe_i965_create_screen(fd); + else if (strcmp(name, "nouveau") == 0) + return pipe_nouveau_create_screen(fd); + else if (strcmp(name, "r300") == 0) + return pipe_r300_create_screen(fd); + else if (strcmp(name, "r600") == 0) + return pipe_r600_create_screen(fd); + else if (strcmp(name, "vmwgfx") == 0) + return pipe_vmwgfx_create_screen(fd); + else + return NULL; +} + +struct pipe_screen * +egl_pipe_create_swrast_screen(struct sw_winsys *ws) +{ + struct pipe_screen *screen; + + screen = sw_screen_create(ws); + if (screen) + screen = debug_screen_wrap(screen); + + return screen; +} diff --git a/src/gallium/targets/egl-static/egl_pipe.h b/src/gallium/targets/egl-static/egl_pipe.h new file mode 100644 index 00000000000..569b2d067c4 --- /dev/null +++ b/src/gallium/targets/egl-static/egl_pipe.h @@ -0,0 +1,40 @@ +/* + * Mesa 3-D graphics library + * Version: 7.10 + * + * Copyright (C) 2011 LunarG Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Authors: + * Chia-I Wu <[email protected]> + */ +#ifndef _EGL_PIPE_H_ +#define _EGL_PIPE_H_ + +struct pipe_screen; +struct sw_winsys; + +struct pipe_screen * +egl_pipe_create_drm_screen(const char *name, int fd); + +struct pipe_screen * +egl_pipe_create_swrast_screen(struct sw_winsys *ws); + +#endif /* _EGL_PIPE_H_ */ diff --git a/src/gallium/targets/egl-static/egl_st.c b/src/gallium/targets/egl-static/egl_st.c new file mode 100644 index 00000000000..3db52621def --- /dev/null +++ b/src/gallium/targets/egl-static/egl_st.c @@ -0,0 +1,105 @@ +/* + * Mesa 3-D graphics library + * Version: 7.10 + * + * Copyright (C) 2011 LunarG Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Authors: + * Chia-I Wu <[email protected]> + */ +#include "util/u_debug.h" +#include "state_tracker/st_api.h" +#include "egl_st.h" + +/* for st/mesa */ +#include "state_tracker/st_gl_api.h" +/* for st/vega */ +#include "vg_api.h" + +static struct st_api * +st_GL_create_api(void) +{ +#if FEATURE_GL || FEATURE_ES1 || FEATURE_ES2 + return st_gl_api_create(); +#else + return NULL; +#endif +} + +static struct st_api * +st_OpenVG_create_api(void) +{ +#if FEATURE_VG + return (struct st_api *) vg_api_get(); +#else + return NULL; +#endif +} + +struct st_api * +egl_st_create_api(enum st_api_type api) +{ + struct st_api *stapi; + + switch (api) { + case ST_API_OPENGL: + stapi = st_GL_create_api(); + break; + case ST_API_OPENVG: + stapi = st_OpenVG_create_api(); + break; + default: + assert(!"Unknown API Type\n"); + stapi = NULL; + break; + } + + return stapi; +} + +uint +egl_st_get_profile_mask(enum st_api_type api) +{ + uint mask = 0x0; + + switch (api) { + case ST_API_OPENGL: +#if FEATURE_GL + mask |= ST_PROFILE_DEFAULT_MASK; +#endif +#if FEATURE_ES1 + mask |= ST_PROFILE_OPENGL_ES1_MASK; +#endif +#if FEATURE_ES2 + mask |= ST_PROFILE_OPENGL_ES2_MASK; +#endif + break; + case ST_API_OPENVG: +#if FEATURE_VG + mask |= ST_PROFILE_DEFAULT_MASK; +#endif + break; + default: + break; + } + + return mask; +} diff --git a/src/gallium/targets/egl-static/egl_st.h b/src/gallium/targets/egl-static/egl_st.h new file mode 100644 index 00000000000..ba82faf0b0e --- /dev/null +++ b/src/gallium/targets/egl-static/egl_st.h @@ -0,0 +1,40 @@ +/* + * Mesa 3-D graphics library + * Version: 7.10 + * + * Copyright (C) 2011 LunarG Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Authors: + * Chia-I Wu <[email protected]> + */ +#ifndef _EGL_ST_H_ +#define _EGL_ST_H_ + +#include "pipe/p_compiler.h" +#include "state_tracker/st_api.h" + +struct st_api * +egl_st_create_api(enum st_api_type api); + +uint +egl_st_get_profile_mask(enum st_api_type api); + +#endif /* _EGL_ST_H_ */ diff --git a/src/gallium/targets/egl/Makefile b/src/gallium/targets/egl/Makefile index 63e9352144e..017c1952141 100644 --- a/src/gallium/targets/egl/Makefile +++ b/src/gallium/targets/egl/Makefile @@ -57,12 +57,6 @@ endif ifneq ($(filter $(GL_LIB), $(EGL_CLIENT_APIS)),) egl_CPPFLAGS += $(API_DEFINES) endif -ifneq ($(filter $(GLESv1_CM_LIB), $(EGL_CLIENT_APIS)),) -egl_CPPFLAGS += -DFEATURE_ES1=1 -endif -ifneq ($(filter $(GLESv2_LIB), $(EGL_CLIENT_APIS)),) -egl_CPPFLAGS += -DFEATURE_ES2=1 -endif ifneq ($(filter $(VG_LIB), $(EGL_CLIENT_APIS)),) egl_CPPFLAGS += -DFEATURE_VG=1 endif @@ -80,7 +74,8 @@ i965_CPPFLAGS := i965_SYS := -ldrm_intel i965_LIBS := \ $(TOP)/src/gallium/winsys/i965/drm/libi965drm.a \ - $(TOP)/src/gallium/drivers/i965/libi965.a + $(TOP)/src/gallium/drivers/i965/libi965.a \ + $(TOP)/src/gallium/winsys/sw/wrapper/libwsw.a # nouveau pipe driver nouveau_CPPFLAGS := @@ -89,6 +84,7 @@ nouveau_LIBS := \ $(TOP)/src/gallium/winsys/nouveau/drm/libnouveaudrm.a \ $(TOP)/src/gallium/drivers/nvfx/libnvfx.a \ $(TOP)/src/gallium/drivers/nv50/libnv50.a \ + $(TOP)/src/gallium/drivers/nvc0/libnvc0.a \ $(TOP)/src/gallium/drivers/nouveau/libnouveau.a # r300 pipe driver @@ -127,20 +123,10 @@ endif # OpenGL state tracker GL_CPPFLAGS := -I$(TOP)/src/mesa $(API_DEFINES) -# do not link to $(GL_LIB) as the it supports GLES too +# cannot not link to $(GL_LIB) as the app might want GLES GL_SYS := $(DRI_LIB_DEPS) GL_LIBS := $(TOP)/src/mesa/libmesagallium.a -# OpenGL ES 1.x state tracker -GLESv1_CM_CPPFLAGS := -I$(TOP)/src/mesa -GLESv1_CM_SYS := $(DRI_LIB_DEPS) -l$(GLESv1_CM_LIB) -GLESv1_CM_LIBS := $(TOP)/src/mesa/libes1gallium.a - -# OpenGL ES 2.x state tracker -GLESv2_CPPFLAGS := -I$(TOP)/src/mesa -GLESv2_SYS := $(DRI_LIB_DEPS) -l$(GLESv2_LIB) -GLESv2_LIBS := $(TOP)/src/mesa/libes2gallium.a - # OpenVG state tracker OpenVG_CPPFLAGS := -I$(TOP)/src/gallium/state_trackers/vega OpenVG_SYS := -lm -l$(VG_LIB) @@ -225,12 +211,6 @@ $(OUTPUT_PATH)/$(PIPE_PREFIX)swrast.so: pipe_swrast.o $(swrast_LIBS) $(OUTPUT_PATH)/$(ST_PREFIX)$(GL_LIB).so: st_GL.o $(GL_LIBS) $(call mklib-cxx,GL) -$(OUTPUT_PATH)/$(ST_PREFIX)$(GLESv1_CM_LIB).so: st_GLESv1_CM.o $(GLESv1_CM_LIBS) - $(call mklib-cxx,GLESv1_CM) - -$(OUTPUT_PATH)/$(ST_PREFIX)$(GLESv2_LIB).so: st_GLESv2.o $(GLESv2_LIBS) - $(call mklib-cxx,GLESv2) - $(OUTPUT_PATH)/$(ST_PREFIX)$(VG_LIB).so: st_OpenVG.o $(OpenVG_LIBS) $(call mklib,OpenVG) diff --git a/src/gallium/targets/egl/egl.c b/src/gallium/targets/egl/egl.c index 786d5d1105e..61fe5069e91 100644 --- a/src/gallium/targets/egl/egl.c +++ b/src/gallium/targets/egl/egl.c @@ -100,9 +100,14 @@ load_st_module(struct st_module *stmod, { struct st_api *(*create_api)(void); - _eglLog(_EGL_DEBUG, "searching for st module %s", name); + if (name) { + _eglLog(_EGL_DEBUG, "searching for st module %s", name); + stmod->name = loader_strdup(name); + } + else { + stmod->name = NULL; + } - stmod->name = loader_strdup(name); if (stmod->name) _eglSearchPathForEach(dlopen_st_module_cb, (void *) stmod); else @@ -200,19 +205,7 @@ get_st_api_full(enum st_api_type api, enum st_profile_type profile) switch (api) { case ST_API_OPENGL: symbol = ST_CREATE_OPENGL_SYMBOL; - switch (profile) { - case ST_PROFILE_OPENGL_ES1: - names[count++] = "GLESv1_CM"; - names[count++] = "GL"; - break; - case ST_PROFILE_OPENGL_ES2: - names[count++] = "GLESv2"; - names[count++] = "GL"; - break; - default: - names[count++] = "GL"; - break; - } + names[count++] = "GL"; break; case ST_API_OPENVG: symbol = ST_CREATE_OPENVG_SYMBOL; @@ -232,6 +225,21 @@ get_st_api_full(enum st_api_type api, enum st_profile_type profile) break; } + /* try again with libGL.so loaded */ + if (!stmod->stapi && api == ST_API_OPENGL) { + struct util_dl_library *glapi = util_dl_open("libGL" UTIL_DL_EXT); + + if (glapi) { + _eglLog(_EGL_DEBUG, "retry with libGL" UTIL_DL_EXT " loaded"); + /* skip the last name (which is NULL) */ + for (i = 0; i < count - 1; i++) { + if (load_st_module(stmod, names[i], symbol)) + break; + } + util_dl_close(glapi); + } + } + if (!stmod->stapi) { EGLint level = (egl_g3d_loader.profile_masks[api]) ? _EGL_WARNING : _EGL_DEBUG; diff --git a/src/gallium/targets/egl/st_GLESv1_CM.c b/src/gallium/targets/egl/st_GLESv1_CM.c deleted file mode 100644 index c1df844aa43..00000000000 --- a/src/gallium/targets/egl/st_GLESv1_CM.c +++ /dev/null @@ -1,8 +0,0 @@ -#include "state_tracker/st_gl_api.h" -#include "egl.h" - -PUBLIC struct st_api * -st_api_create_OpenGL(void) -{ - return st_gl_api_create(); -} diff --git a/src/gallium/targets/egl/st_GLESv2.c b/src/gallium/targets/egl/st_GLESv2.c deleted file mode 100644 index c1df844aa43..00000000000 --- a/src/gallium/targets/egl/st_GLESv2.c +++ /dev/null @@ -1,8 +0,0 @@ -#include "state_tracker/st_gl_api.h" -#include "egl.h" - -PUBLIC struct st_api * -st_api_create_OpenGL(void) -{ - return st_gl_api_create(); -} diff --git a/src/gallium/targets/graw-null/graw_util.c b/src/gallium/targets/graw-null/graw_util.c index e5cf526d33a..09cba895d2a 100644 --- a/src/gallium/targets/graw-null/graw_util.c +++ b/src/gallium/targets/graw-null/graw_util.c @@ -1,6 +1,7 @@ #include "pipe/p_compiler.h" #include "pipe/p_context.h" +#include "pipe/p_shader_tokens.h" #include "pipe/p_state.h" #include "tgsi/tgsi_text.h" #include "util/u_debug.h" diff --git a/src/gallium/targets/libgl-gdi/libgl_gdi.c b/src/gallium/targets/libgl-gdi/libgl_gdi.c index 1d6e664eabd..112904ab5fe 100644 --- a/src/gallium/targets/libgl-gdi/libgl_gdi.c +++ b/src/gallium/targets/libgl-gdi/libgl_gdi.c @@ -100,7 +100,7 @@ no_winsys: static void gdi_present(struct pipe_screen *screen, - struct pipe_surface *surface, + struct pipe_resource *res, HDC hDC) { /* This will fail if any interposing layer (trace, debug, etc) has @@ -119,14 +119,14 @@ gdi_present(struct pipe_screen *screen, #ifdef HAVE_LLVMPIPE if (use_llvmpipe) { winsys = llvmpipe_screen(screen)->winsys; - dt = llvmpipe_resource(surface->texture)->dt; + dt = llvmpipe_resource(res)->dt; gdi_sw_display(winsys, dt, hDC); return; } #endif winsys = softpipe_screen(screen)->winsys, - dt = softpipe_resource(surface->texture)->dt, + dt = softpipe_resource(res)->dt, gdi_sw_display(winsys, dt, hDC); } diff --git a/src/gallium/targets/libgl-xlib/xlib.c b/src/gallium/targets/libgl-xlib/xlib.c index 9a3e0e07b0d..1a5892b94a0 100644 --- a/src/gallium/targets/libgl-xlib/xlib.c +++ b/src/gallium/targets/libgl-xlib/xlib.c @@ -35,7 +35,6 @@ #include "state_tracker/xlib_sw_winsys.h" #include "xm_public.h" -#include "state_tracker/st_api.h" #include "state_tracker/st_gl_api.h" #include "target-helpers/inline_sw_helper.h" #include "target-helpers/inline_debug_helper.h" diff --git a/src/gallium/targets/xorg-vmwgfx/vmw_video.c b/src/gallium/targets/xorg-vmwgfx/vmw_video.c index 94465e52043..7979209993d 100644 --- a/src/gallium/targets/xorg-vmwgfx/vmw_video.c +++ b/src/gallium/targets/xorg-vmwgfx/vmw_video.c @@ -362,8 +362,10 @@ vmw_video_close(struct vmw_customizer *vmw) /* make sure the port is stoped as well */ vmw_xv_stop_video(pScrn, &video->port[i], TRUE); vmw_ioctl_unref_stream(vmw, video->port[i].streamId); + REGION_UNINIT(pScreen, &video->port[i].clipBoxes); } + /* XXX: I'm sure this function is missing code for turning off Xv */ free(vmw->video_priv); @@ -448,7 +450,16 @@ vmw_video_init_adaptor(ScrnInfoPtr pScrn, struct vmw_customizer *vmw) vmw->video_priv = video; adaptor->type = XvInputMask | XvImageMask | XvWindowMask; - adaptor->flags = VIDEO_OVERLAID_IMAGES | VIDEO_CLIP_TO_VIEWPORT; + + /** + * Note: CLIP_TO_VIEWPORT was removed from the flags, since with the + * crtc/output based modesetting, the viewport is not updated on + * RandR modeswitches. Hence the video may incorrectly be clipped away. + * The correct approach, (if needed) would be to clip against the + * scanout area union of all active crtcs. Revisit if needed. + */ + + adaptor->flags = VIDEO_OVERLAID_IMAGES; adaptor->name = "VMware Video Engine"; adaptor->nEncodings = VMWARE_VID_NUM_ENCODINGS; adaptor->pEncodings = vmwareVideoEncodings; @@ -463,6 +474,7 @@ vmw_video_init_adaptor(ScrnInfoPtr pScrn, struct vmw_customizer *vmw) video->port[i].flags = SVGA_VIDEO_FLAG_COLORKEY; video->port[i].colorKey = VMWARE_VIDEO_COLORKEY; video->port[i].isAutoPaintColorkey = TRUE; + REGION_NULL(pScrn->pScreen, &video->port[i].clipBoxes); adaptor->pPortPrivates[i].ptr = &video->port[i]; } @@ -554,7 +566,9 @@ vmw_video_port_init(ScrnInfoPtr pScrn, struct vmw_video_port *port, REGION_COPY(pScrn->pScreen, &port->clipBoxes, clipBoxes); if (port->isAutoPaintColorkey) - xf86XVFillKeyHelper(pScrn->pScreen, port->colorKey, clipBoxes); + xf86XVFillKeyHelper(pScrn->pScreen, port->colorKey, clipBoxes); + + xorg_flush(pScrn->pScreen); return port->play(pScrn, port, src_x, src_y, drw_x, drw_y, src_w, src_h, drw_w, drw_h, format, buf, width, height, clipBoxes); @@ -641,11 +655,12 @@ vmw_video_port_play(ScrnInfoPtr pScrn, struct vmw_video_port *port, */ if (!REGION_EQUAL(pScrn->pScreen, &port->clipBoxes, clipBoxes)) { REGION_COPY(pScrn->pScreen, &port->clipBoxes, clipBoxes); - if (port->isAutoPaintColorkey) { + if (port->isAutoPaintColorkey) xf86XVFillKeyHelper(pScrn->pScreen, port->colorKey, clipBoxes); - } } + xorg_flush(pScrn->pScreen); + ret = drmCommandWrite(vmw->fd, DRM_VMW_CONTROL_STREAM, &arg, sizeof(arg)); if (ret) { vmw_video_port_cleanup(pScrn, port); @@ -865,6 +880,8 @@ vmw_xv_stop_video(ScrnInfoPtr pScrn, pointer data, Bool cleanup) if (!vmw->video_priv) return; + REGION_EMPTY(pScrn->pScreen, &port->clipBoxes); + if (!cleanup) return; diff --git a/src/gallium/tests/graw/clear.c b/src/gallium/tests/graw/clear.c index ee4581ef1ed..1ff80cadeec 100644 --- a/src/gallium/tests/graw/clear.c +++ b/src/gallium/tests/graw/clear.c @@ -20,6 +20,7 @@ static const int HEIGHT = 300; struct pipe_screen *screen; struct pipe_context *ctx; struct pipe_surface *surf; +struct pipe_resource *tex; static void *window = NULL; static void draw( void ) @@ -31,13 +32,14 @@ static void draw( void ) graw_save_surface_to_file(ctx, surf, NULL); - screen->flush_frontbuffer(screen, surf, window); + screen->flush_frontbuffer(screen, tex, 0, 0, window); } static void init( void ) { struct pipe_framebuffer_state fb; - struct pipe_resource *tex, templat; + struct pipe_resource templat; + struct pipe_surface surf_tmpl; int i; /* It's hard to say whether window or screen should be created @@ -66,6 +68,7 @@ static void init( void ) templat.width0 = WIDTH; templat.height0 = HEIGHT; templat.depth0 = 1; + templat.array_size = 1; templat.last_level = 0; templat.nr_samples = 1; templat.bind = (PIPE_BIND_RENDER_TARGET | @@ -76,9 +79,12 @@ static void init( void ) if (tex == NULL) exit(4); - surf = screen->get_tex_surface(screen, tex, 0, 0, 0, - PIPE_BIND_RENDER_TARGET | - PIPE_BIND_DISPLAY_TARGET); + surf_tmpl.format = templat.format; + surf_tmpl.usage = PIPE_BIND_RENDER_TARGET; + surf_tmpl.u.tex.level = 0; + surf_tmpl.u.tex.first_layer = 0; + surf_tmpl.u.tex.last_layer = 0; + surf = ctx->create_surface(ctx, tex, &surf_tmpl); if (surf == NULL) exit(5); diff --git a/src/gallium/tests/graw/fs-test.c b/src/gallium/tests/graw/fs-test.c index 19af83fda88..37be2d0830c 100644 --- a/src/gallium/tests/graw/fs-test.c +++ b/src/gallium/tests/graw/fs-test.c @@ -119,6 +119,7 @@ static void init_fs_constbuf( void ) templat.width0 = sizeof(constants1); templat.height0 = 1; templat.depth0 = 1; + templat.array_size = 1; templat.last_level = 0; templat.nr_samples = 1; templat.bind = PIPE_BIND_CONSTANT_BUFFER; @@ -139,7 +140,7 @@ static void init_fs_constbuf( void ) ctx->transfer_inline_write(ctx, constbuf1, - u_subresource(0,0), + 0, PIPE_TRANSFER_WRITE, &box, constants1, @@ -156,7 +157,7 @@ static void init_fs_constbuf( void ) ctx->transfer_inline_write(ctx, constbuf2, - u_subresource(0,0), + 0, PIPE_TRANSFER_WRITE, &box, constants2, @@ -280,7 +281,7 @@ static void draw( void ) graw_save_surface_to_file(ctx, surf, NULL); - screen->flush_frontbuffer(screen, surf, window); + screen->flush_frontbuffer(screen, rttex, 0, 0, window); } #define SIZE 16 @@ -340,6 +341,7 @@ static void init_tex( void ) templat.width0 = SIZE; templat.height0 = SIZE; templat.depth0 = 1; + templat.array_size = 1; templat.last_level = 0; templat.nr_samples = 1; templat.bind = PIPE_BIND_SAMPLER_VIEW; @@ -354,7 +356,7 @@ static void init_tex( void ) ctx->transfer_inline_write(ctx, samptex, - u_subresource(0,0), + 0, PIPE_TRANSFER_WRITE, &box, tex2d, @@ -368,7 +370,7 @@ static void init_tex( void ) struct pipe_transfer *t; uint32_t *ptr; t = pipe_get_transfer(ctx, samptex, - 0, 0, 0, /* face, level, zslice */ + 0, 0, /* level, layer */ PIPE_TRANSFER_READ, 0, 0, SIZE, SIZE); /* x, y, width, height */ @@ -387,8 +389,6 @@ static void init_tex( void ) memset(&sv_template, 0, sizeof sv_template); sv_template.format = samptex->format; sv_template.texture = samptex; - sv_template.first_level = 0; - sv_template.last_level = 0; sv_template.swizzle_r = 0; sv_template.swizzle_g = 1; sv_template.swizzle_b = 2; @@ -424,6 +424,7 @@ static void init( void ) { struct pipe_framebuffer_state fb; struct pipe_resource templat; + struct pipe_surface surf_tmpl; int i; /* It's hard to say whether window or screen should be created @@ -450,6 +451,7 @@ static void init( void ) templat.width0 = WIDTH; templat.height0 = HEIGHT; templat.depth0 = 1; + templat.array_size = 1; templat.last_level = 0; templat.nr_samples = 1; templat.bind = (PIPE_BIND_RENDER_TARGET | @@ -460,9 +462,12 @@ static void init( void ) if (rttex == NULL) exit(4); - surf = screen->get_tex_surface(screen, rttex, 0, 0, 0, - PIPE_BIND_RENDER_TARGET | - PIPE_BIND_DISPLAY_TARGET); + surf_tmpl.format = templat.format; + surf_tmpl.usage = PIPE_BIND_RENDER_TARGET; + surf_tmpl.u.tex.level = 0; + surf_tmpl.u.tex.first_layer = 0; + surf_tmpl.u.tex.last_layer = 0; + surf = ctx->create_surface(ctx, rttex, &surf_tmpl); if (surf == NULL) exit(5); diff --git a/src/gallium/tests/graw/gs-test.c b/src/gallium/tests/graw/gs-test.c index ef29f134980..812666a8c84 100644 --- a/src/gallium/tests/graw/gs-test.c +++ b/src/gallium/tests/graw/gs-test.c @@ -156,6 +156,7 @@ static void init_fs_constbuf( void ) templat.width0 = sizeof(constants1); templat.height0 = 1; templat.depth0 = 1; + templat.array_size = 1; templat.last_level = 0; templat.nr_samples = 1; templat.bind = PIPE_BIND_CONSTANT_BUFFER; @@ -172,7 +173,7 @@ static void init_fs_constbuf( void ) ctx->transfer_inline_write(ctx, constbuf1, - u_subresource(0,0), + 0, PIPE_TRANSFER_WRITE, &box, constants1, @@ -189,7 +190,7 @@ static void init_fs_constbuf( void ) ctx->transfer_inline_write(ctx, constbuf2, - u_subresource(0,0), + 0, PIPE_TRANSFER_WRITE, &box, constants2, @@ -344,7 +345,7 @@ static void draw( void ) graw_save_surface_to_file(ctx, surf, NULL); - screen->flush_frontbuffer(screen, surf, window); + screen->flush_frontbuffer(screen, rttex, 0, 0, window); } #define SIZE 16 @@ -404,6 +405,7 @@ static void init_tex( void ) templat.width0 = SIZE; templat.height0 = SIZE; templat.depth0 = 1; + templat.array_size = 1; templat.last_level = 0; templat.nr_samples = 1; templat.bind = PIPE_BIND_SAMPLER_VIEW; @@ -418,7 +420,7 @@ static void init_tex( void ) ctx->transfer_inline_write(ctx, samptex, - u_subresource(0,0), + 0, PIPE_TRANSFER_WRITE, &box, tex2d, @@ -432,7 +434,7 @@ static void init_tex( void ) struct pipe_transfer *t; uint32_t *ptr; t = pipe_get_transfer(ctx, samptex, - 0, 0, 0, /* face, level, zslice */ + 0, 0, /* level, layer */ PIPE_TRANSFER_READ, 0, 0, SIZE, SIZE); /* x, y, width, height */ @@ -451,8 +453,6 @@ static void init_tex( void ) memset(&sv_template, 0, sizeof sv_template); sv_template.format = samptex->format; sv_template.texture = samptex; - sv_template.first_level = 0; - sv_template.last_level = 0; sv_template.swizzle_r = 0; sv_template.swizzle_g = 1; sv_template.swizzle_b = 2; @@ -488,6 +488,7 @@ static void init( void ) { struct pipe_framebuffer_state fb; struct pipe_resource templat; + struct pipe_surface surf_tmpl; int i; /* It's hard to say whether window or screen should be created @@ -514,6 +515,7 @@ static void init( void ) templat.width0 = WIDTH; templat.height0 = HEIGHT; templat.depth0 = 1; + templat.array_size = 1; templat.last_level = 0; templat.nr_samples = 1; templat.bind = (PIPE_BIND_RENDER_TARGET | @@ -524,9 +526,12 @@ static void init( void ) if (rttex == NULL) exit(4); - surf = screen->get_tex_surface(screen, rttex, 0, 0, 0, - PIPE_BIND_RENDER_TARGET | - PIPE_BIND_DISPLAY_TARGET); + surf_tmpl.format = templat.format; + surf_tmpl.usage = PIPE_BIND_RENDER_TARGET; + surf_tmpl.u.tex.level = 0; + surf_tmpl.u.tex.first_layer = 0; + surf_tmpl.u.tex.last_layer = 0; + surf = ctx->create_surface(ctx, rttex, &surf_tmpl); if (surf == NULL) exit(5); diff --git a/src/gallium/tests/graw/quad-tex.c b/src/gallium/tests/graw/quad-tex.c index 35eade939e6..952131d765b 100644 --- a/src/gallium/tests/graw/quad-tex.c +++ b/src/gallium/tests/graw/quad-tex.c @@ -151,7 +151,7 @@ static void draw( void ) graw_save_surface_to_file(ctx, surf, NULL); - screen->flush_frontbuffer(screen, surf, window); + screen->flush_frontbuffer(screen, rttex, 0, 0, window); } #define SIZE 16 @@ -211,6 +211,7 @@ static void init_tex( void ) templat.width0 = SIZE; templat.height0 = SIZE; templat.depth0 = 1; + templat.array_size = 1; templat.last_level = 0; templat.nr_samples = 1; templat.bind = PIPE_BIND_SAMPLER_VIEW; @@ -225,7 +226,7 @@ static void init_tex( void ) ctx->transfer_inline_write(ctx, samptex, - u_subresource(0,0), + 0, PIPE_TRANSFER_WRITE, &box, tex2d, @@ -239,7 +240,7 @@ static void init_tex( void ) struct pipe_transfer *t; uint32_t *ptr; t = pipe_get_transfer(ctx, samptex, - 0, 0, 0, /* face, level, zslice */ + 0, 0, /* level, layer */ PIPE_TRANSFER_READ, 0, 0, SIZE, SIZE); /* x, y, width, height */ @@ -258,8 +259,6 @@ static void init_tex( void ) memset(&sv_template, 0, sizeof sv_template); sv_template.format = samptex->format; sv_template.texture = samptex; - sv_template.first_level = 0; - sv_template.last_level = 0; sv_template.swizzle_r = 0; sv_template.swizzle_g = 1; sv_template.swizzle_b = 2; @@ -295,6 +294,7 @@ static void init( void ) { struct pipe_framebuffer_state fb; struct pipe_resource templat; + struct pipe_surface surf_tmpl; int i; /* It's hard to say whether window or screen should be created @@ -321,6 +321,7 @@ static void init( void ) templat.width0 = WIDTH; templat.height0 = HEIGHT; templat.depth0 = 1; + templat.array_size = 1; templat.last_level = 0; templat.nr_samples = 1; templat.bind = (PIPE_BIND_RENDER_TARGET | @@ -331,9 +332,12 @@ static void init( void ) if (rttex == NULL) exit(4); - surf = screen->get_tex_surface(screen, rttex, 0, 0, 0, - PIPE_BIND_RENDER_TARGET | - PIPE_BIND_DISPLAY_TARGET); + surf_tmpl.format = templat.format; + surf_tmpl.usage = PIPE_BIND_RENDER_TARGET; + surf_tmpl.u.tex.level = 0; + surf_tmpl.u.tex.first_layer = 0; + surf_tmpl.u.tex.last_layer = 0; + surf = ctx->create_surface(ctx, rttex, &surf_tmpl); if (surf == NULL) exit(5); diff --git a/src/gallium/tests/graw/shader-leak.c b/src/gallium/tests/graw/shader-leak.c index 0a6c362d172..b53f0a046ca 100644 --- a/src/gallium/tests/graw/shader-leak.c +++ b/src/gallium/tests/graw/shader-leak.c @@ -28,6 +28,7 @@ static const int HEIGHT = 300; static struct pipe_screen *screen = NULL; static struct pipe_context *ctx = NULL; static struct pipe_surface *surf = NULL; +static struct pipe_resource *tex = NULL; static void *window = NULL; struct vertex { @@ -155,7 +156,7 @@ static void draw( void ) ctx->delete_fs_state(ctx, fs); } - screen->flush_frontbuffer(screen, surf, window); + screen->flush_frontbuffer(screen, tex, 0, 0, window); ctx->destroy(ctx); exit(0); @@ -165,7 +166,8 @@ static void draw( void ) static void init( void ) { struct pipe_framebuffer_state fb; - struct pipe_resource *tex, templat; + struct pipe_resource templat; + struct pipe_surface surf_tmpl; int i; /* It's hard to say whether window or screen should be created @@ -203,11 +205,16 @@ static void init( void ) exit(4); } - surf = screen->get_tex_surface(screen, tex, 0, 0, 0, - PIPE_BIND_RENDER_TARGET | - PIPE_BIND_DISPLAY_TARGET); - if (surf == NULL) + surf_tmpl.format = templat.format; + surf_tmpl.usage = PIPE_BIND_RENDER_TARGET; + surf_tmpl.u.tex.level = 0; + surf_tmpl.u.tex.first_layer = 0; + surf_tmpl.u.tex.last_layer = 0; + surf = ctx->create_surface(ctx, tex, &surf_tmpl); + if (surf == NULL) { + fprintf(stderr, "Unable to create tex surface!\n"); exit(5); + } memset(&fb, 0, sizeof fb); fb.nr_cbufs = 1; diff --git a/src/gallium/tests/graw/tri-gs.c b/src/gallium/tests/graw/tri-gs.c index 731c4e10cfd..84ff3e67735 100644 --- a/src/gallium/tests/graw/tri-gs.c +++ b/src/gallium/tests/graw/tri-gs.c @@ -23,6 +23,7 @@ static const int HEIGHT = 300; static struct pipe_screen *screen = NULL; static struct pipe_context *ctx = NULL; static struct pipe_surface *surf = NULL; +static struct pipe_resource *tex = NULL; static void *window = NULL; struct vertex { @@ -164,14 +165,15 @@ static void draw( void ) util_draw_arrays(ctx, PIPE_PRIM_TRIANGLES, 0, 3); ctx->flush(ctx, PIPE_FLUSH_RENDER_CACHE, NULL); - screen->flush_frontbuffer(screen, surf, window); + screen->flush_frontbuffer(screen, tex, 0, 0, window); } static void init( void ) { struct pipe_framebuffer_state fb; - struct pipe_resource *tex, templat; + struct pipe_resource templat; + struct pipe_surface surf_tmpl; int i; /* It's hard to say whether window or screen should be created @@ -198,6 +200,7 @@ static void init( void ) templat.width0 = WIDTH; templat.height0 = HEIGHT; templat.depth0 = 1; + templat.array_size = 1; templat.last_level = 0; templat.nr_samples = 1; templat.bind = (PIPE_BIND_RENDER_TARGET | @@ -208,9 +211,12 @@ static void init( void ) if (tex == NULL) exit(4); - surf = screen->get_tex_surface(screen, tex, 0, 0, 0, - PIPE_BIND_RENDER_TARGET | - PIPE_BIND_DISPLAY_TARGET); + surf_tmpl.format = templat.format; + surf_tmpl.usage = PIPE_BIND_RENDER_TARGET; + surf_tmpl.u.tex.level = 0; + surf_tmpl.u.tex.first_layer = 0; + surf_tmpl.u.tex.last_layer = 0; + surf = ctx->create_surface(ctx, tex, &surf_tmpl); if (surf == NULL) exit(5); diff --git a/src/gallium/tests/graw/tri-instanced.c b/src/gallium/tests/graw/tri-instanced.c index 76443811629..f33c061b22b 100644 --- a/src/gallium/tests/graw/tri-instanced.c +++ b/src/gallium/tests/graw/tri-instanced.c @@ -27,6 +27,7 @@ static const int HEIGHT = 300; static struct pipe_screen *screen = NULL; static struct pipe_context *ctx = NULL; static struct pipe_surface *surf = NULL; +static struct pipe_resource *tex = NULL; static void *window = NULL; struct vertex { @@ -216,14 +217,15 @@ static void draw( void ) graw_save_surface_to_file(ctx, surf, NULL); - screen->flush_frontbuffer(screen, surf, window); + screen->flush_frontbuffer(screen, tex, 0, 0, window); } static void init( void ) { struct pipe_framebuffer_state fb; - struct pipe_resource *tex, templat; + struct pipe_resource templat; + struct pipe_surface surf_tmpl; int i; /* It's hard to say whether window or screen should be created @@ -250,6 +252,7 @@ static void init( void ) templat.width0 = WIDTH; templat.height0 = HEIGHT; templat.depth0 = 1; + templat.array_size = 1; templat.last_level = 0; templat.nr_samples = 1; templat.bind = (PIPE_BIND_RENDER_TARGET | @@ -260,9 +263,12 @@ static void init( void ) if (tex == NULL) exit(4); - surf = screen->get_tex_surface(screen, tex, 0, 0, 0, - PIPE_BIND_RENDER_TARGET | - PIPE_BIND_DISPLAY_TARGET); + surf_tmpl.format = templat.format; + surf_tmpl.usage = PIPE_BIND_RENDER_TARGET; + surf_tmpl.u.tex.level = 0; + surf_tmpl.u.tex.first_layer = 0; + surf_tmpl.u.tex.last_layer = 0; + surf = ctx->create_surface(ctx, tex, &surf_tmpl); if (surf == NULL) exit(5); diff --git a/src/gallium/tests/graw/tri.c b/src/gallium/tests/graw/tri.c index 025a1470dc9..2742c7c99e0 100644 --- a/src/gallium/tests/graw/tri.c +++ b/src/gallium/tests/graw/tri.c @@ -25,6 +25,7 @@ static const int HEIGHT = 300; static struct pipe_screen *screen = NULL; static struct pipe_context *ctx = NULL; static struct pipe_surface *surf = NULL; +static struct pipe_resource *tex = NULL; static void *window = NULL; struct vertex { @@ -144,14 +145,15 @@ static void draw( void ) graw_save_surface_to_file(ctx, surf, NULL); - screen->flush_frontbuffer(screen, surf, window); + screen->flush_frontbuffer(screen, tex, 0, 0, window); } static void init( void ) { struct pipe_framebuffer_state fb; - struct pipe_resource *tex, templat; + struct pipe_resource templat; + struct pipe_surface surf_tmpl; int i; /* It's hard to say whether window or screen should be created @@ -180,6 +182,7 @@ static void init( void ) templat.width0 = WIDTH; templat.height0 = HEIGHT; templat.depth0 = 1; + templat.array_size = 1; templat.last_level = 0; templat.nr_samples = 1; templat.bind = (PIPE_BIND_RENDER_TARGET | @@ -192,11 +195,14 @@ static void init( void ) exit(4); } - surf = screen->get_tex_surface(screen, tex, 0, 0, 0, - PIPE_BIND_RENDER_TARGET | - PIPE_BIND_DISPLAY_TARGET); + surf_tmpl.format = templat.format; + surf_tmpl.usage = PIPE_BIND_RENDER_TARGET; + surf_tmpl.u.tex.level = 0; + surf_tmpl.u.tex.first_layer = 0; + surf_tmpl.u.tex.last_layer = 0; + surf = ctx->create_surface(ctx, tex, &surf_tmpl); if (surf == NULL) { - fprintf(stderr, "Unable to get tex surface!\n"); + fprintf(stderr, "Unable to create tex surface!\n"); exit(5); } diff --git a/src/gallium/tests/graw/vs-test.c b/src/gallium/tests/graw/vs-test.c index 440c40bdcda..58908f38a23 100644 --- a/src/gallium/tests/graw/vs-test.c +++ b/src/gallium/tests/graw/vs-test.c @@ -87,6 +87,7 @@ static void init_fs_constbuf( void ) templat.width0 = sizeof(constants); templat.height0 = 1; templat.depth0 = 1; + templat.array_size = 1; templat.last_level = 0; templat.nr_samples = 1; templat.bind = PIPE_BIND_CONSTANT_BUFFER; @@ -101,7 +102,7 @@ static void init_fs_constbuf( void ) ctx->transfer_inline_write(ctx, constbuf, - u_subresource(0,0), + 0, PIPE_TRANSFER_WRITE, &box, constants, @@ -231,7 +232,7 @@ static void draw( void ) graw_save_surface_to_file(ctx, surf, NULL); - screen->flush_frontbuffer(screen, surf, window); + screen->flush_frontbuffer(screen, rttex, 0, 0, window); } #define SIZE 16 @@ -291,6 +292,7 @@ static void init_tex( void ) templat.width0 = SIZE; templat.height0 = SIZE; templat.depth0 = 1; + templat.array_size = 1; templat.last_level = 0; templat.nr_samples = 1; templat.bind = PIPE_BIND_SAMPLER_VIEW; @@ -305,7 +307,7 @@ static void init_tex( void ) ctx->transfer_inline_write(ctx, samptex, - u_subresource(0,0), + 0, PIPE_TRANSFER_WRITE, &box, tex2d, @@ -319,7 +321,7 @@ static void init_tex( void ) struct pipe_transfer *t; uint32_t *ptr; t = pipe_get_transfer(ctx, samptex, - 0, 0, 0, /* face, level, zslice */ + 0, 0, /* level, layer */ PIPE_TRANSFER_READ, 0, 0, SIZE, SIZE); /* x, y, width, height */ @@ -338,8 +340,6 @@ static void init_tex( void ) memset(&sv_template, 0, sizeof sv_template); sv_template.format = samptex->format; sv_template.texture = samptex; - sv_template.first_level = 0; - sv_template.last_level = 0; sv_template.swizzle_r = 0; sv_template.swizzle_g = 1; sv_template.swizzle_b = 2; @@ -375,6 +375,7 @@ static void init( void ) { struct pipe_framebuffer_state fb; struct pipe_resource templat; + struct pipe_surface surf_tmpl; int i; /* It's hard to say whether window or screen should be created @@ -401,6 +402,7 @@ static void init( void ) templat.width0 = WIDTH; templat.height0 = HEIGHT; templat.depth0 = 1; + templat.array_size = 1; templat.last_level = 0; templat.nr_samples = 1; templat.bind = (PIPE_BIND_RENDER_TARGET | @@ -411,9 +413,12 @@ static void init( void ) if (rttex == NULL) exit(4); - surf = screen->get_tex_surface(screen, rttex, 0, 0, 0, - PIPE_BIND_RENDER_TARGET | - PIPE_BIND_DISPLAY_TARGET); + surf_tmpl.format = templat.format; + surf_tmpl.usage = PIPE_BIND_RENDER_TARGET; + surf_tmpl.u.tex.level = 0; + surf_tmpl.u.tex.first_layer = 0; + surf_tmpl.u.tex.last_layer = 0; + surf = ctx->create_surface(ctx, rttex, &surf_tmpl); if (surf == NULL) exit(5); diff --git a/src/gallium/tests/python/retrace/interpreter.py b/src/gallium/tests/python/retrace/interpreter.py index 954a701a53f..84371223f66 100755 --- a/src/gallium/tests/python/retrace/interpreter.py +++ b/src/gallium/tests/python/retrace/interpreter.py @@ -251,14 +251,6 @@ class Screen(Object): def texture_release(self, surface): pass - def get_tex_surface(self, texture, face, level, zslice, usage): - if texture is None: - return None - return texture.get_surface(face, level, zslice) - - def tex_surface_destroy(self, surface): - self.interpreter.unregister_object(surface) - def tex_surface_release(self, surface): pass @@ -282,7 +274,7 @@ class Screen(Object): def fence_reference(self, dst, src): pass - def flush_frontbuffer(self, surface): + def flush_frontbuffer(self, resource): pass @@ -581,7 +573,7 @@ class Context(Object): if transfer and usage & gallium.PIPE_TRANSFER_READ: if self.interpreter.options.all: surface = texture.get_surface(sr.face, sr.level, box.z) - self.interpreter.present(self.real, transfer.surface, 'transf_read', box.x, box.y, box.w, box.h) + self.interpreter.present(self.real, transfer.surface, 'transf_read', box.x, box.y, box.width, box.height) return transfer def tex_transfer_destroy(self, transfer): @@ -589,6 +581,10 @@ class Context(Object): def transfer_inline_write(self, resource, sr, usage, box, stride, slice_stride, data): self.real.transfer_inline_write(resource, sr, usage, box, data, stride, slice_stride) + if self.interpreter.options.all: + for z in range(box.z, box.z + box.depth): + surface = resource.get_surface(sr.face, sr.level, box.z) + self.interpreter.present(self.real, surface, 'transf_inline_write%u' % z, box.x, box.y, box.width, box.height) def _set_dirty(self): if self.interpreter.options.step: @@ -627,7 +623,13 @@ class Context(Object): if self.zsbuf: if self.interpreter.options.all: self.interpreter.present(self.real, self.zsbuf, "zsbuf") - + def create_surface(self, texture, level, layer, usage): + if texture is None: + return None + return texture.get_surface(level, layer) + + def surface_destroy(self, surface): + self.interpreter.unregister_object(surface) class Interpreter(parser.TraceDumper): diff --git a/src/gallium/tests/trivial/quad-tex.c b/src/gallium/tests/trivial/quad-tex.c index cf88edcdc56..92c5b4dbb18 100644 --- a/src/gallium/tests/trivial/quad-tex.c +++ b/src/gallium/tests/trivial/quad-tex.c @@ -92,6 +92,7 @@ struct program static void init_prog(struct program *p) { + struct pipe_surface surf_tmpl; /* create the software rasterizer */ p->screen = sw_screen_create(null_sw_create()); /* wrap the screen with any debugger */ @@ -141,6 +142,7 @@ static void init_prog(struct program *p) tmplt.width0 = WIDTH; tmplt.height0 = HEIGHT; tmplt.depth0 = 1; + tmplt.array_size = 1; tmplt.last_level = 0; tmplt.bind = PIPE_BIND_RENDER_TARGET; @@ -153,7 +155,6 @@ static void init_prog(struct program *p) struct pipe_transfer *t; struct pipe_resource t_tmplt; struct pipe_sampler_view v_tmplt; - struct pipe_subresource sub; struct pipe_box box; memset(&t_tmplt, 0, sizeof(t_tmplt)); @@ -162,17 +163,17 @@ static void init_prog(struct program *p) t_tmplt.width0 = 2; t_tmplt.height0 = 2; t_tmplt.depth0 = 1; + t_tmplt.array_size = 1; t_tmplt.last_level = 0; t_tmplt.bind = PIPE_BIND_RENDER_TARGET; p->tex = p->screen->resource_create(p->screen, &t_tmplt); - memset(&sub, 0, sizeof(sub)); memset(&box, 0, sizeof(box)); box.width = 2; box.height = 2; - t = p->pipe->get_transfer(p->pipe, p->tex, sub, PIPE_TRANSFER_WRITE, &box); + t = p->pipe->get_transfer(p->pipe, p->tex, 0, PIPE_TRANSFER_WRITE, &box); ptr = p->pipe->transfer_map(p->pipe, t); ptr[0] = 0xffff0000; @@ -210,12 +211,17 @@ static void init_prog(struct program *p) p->sampler.mag_img_filter = PIPE_TEX_MIPFILTER_LINEAR; p->sampler.normalized_coords = 1; + surf_tmpl.format = templat.format; + surf_tmpl.usage = PIPE_BIND_RENDER_TARGET; + surf_tmpl.u.tex.level = 0; + surf_tmpl.u.tex.first_layer = 0; + surf_tmpl.u.tex.last_layer = 0; /* drawing destination */ memset(&p->framebuffer, 0, sizeof(p->framebuffer)); p->framebuffer.width = WIDTH; p->framebuffer.height = HEIGHT; p->framebuffer.nr_cbufs = 1; - p->framebuffer.cbufs[0] = p->screen->get_tex_surface(p->screen, p->target, 0, 0, 0, PIPE_BIND_RENDER_TARGET); + p->framebuffer.cbufs[0] = p->pipe->create_surface(p->pipe, p->target, &surf_tmpl); /* viewport, depth isn't really needed */ { diff --git a/src/gallium/tests/trivial/tri.c b/src/gallium/tests/trivial/tri.c index 667a27b28ab..37c1573051f 100644 --- a/src/gallium/tests/trivial/tri.c +++ b/src/gallium/tests/trivial/tri.c @@ -87,6 +87,7 @@ struct program static void init_prog(struct program *p) { + struct pipe_surface surf_tmpl; /* create the software rasterizer */ p->screen = sw_screen_create(null_sw_create()); /* wrap the screen with any debugger */ @@ -132,6 +133,7 @@ static void init_prog(struct program *p) tmplt.width0 = WIDTH; tmplt.height0 = HEIGHT; tmplt.depth0 = 1; + tmplt.array_size = 1; tmplt.last_level = 0; tmplt.bind = PIPE_BIND_RENDER_TARGET; @@ -150,12 +152,17 @@ static void init_prog(struct program *p) p->rasterizer.cull_face = PIPE_FACE_NONE; p->rasterizer.gl_rasterization_rules = 1; + surf_tmpl.format = templat.format; + surf_tmpl.usage = PIPE_BIND_RENDER_TARGET; + surf_tmpl.u.tex.level = 0; + surf_tmpl.u.tex.first_layer = 0; + surf_tmpl.u.tex.last_layer = 0; /* drawing destination */ memset(&p->framebuffer, 0, sizeof(p->framebuffer)); p->framebuffer.width = WIDTH; p->framebuffer.height = HEIGHT; p->framebuffer.nr_cbufs = 1; - p->framebuffer.cbufs[0] = p->screen->get_tex_surface(p->screen, p->target, 0, 0, 0, PIPE_BIND_RENDER_TARGET); + p->framebuffer.cbufs[0] = p->pipe->create_surface(p->pipe, p->target, &surf_tmpl); /* viewport, depth isn't really needed */ { diff --git a/src/gallium/winsys/i915/drm/i915_drm_batchbuffer.c b/src/gallium/winsys/i915/drm/i915_drm_batchbuffer.c index c6daa52a379..ebe86dcf196 100644 --- a/src/gallium/winsys/i915/drm/i915_drm_batchbuffer.c +++ b/src/gallium/winsys/i915/drm/i915_drm_batchbuffer.c @@ -14,9 +14,6 @@ #define INTEL_BATCH_CLIPRECTS 0x2 #undef INTEL_RUN_SYNC -#undef INTEL_MAP_BATCHBUFFER -#undef INTEL_MAP_GTT -#define INTEL_ALWAYS_FLUSH struct i915_drm_batchbuffer { @@ -72,11 +69,7 @@ i915_drm_batchbuffer_create(struct i915_winsys *iws) batch->actual_size = idws->max_batch_size; -#ifdef INTEL_MAP_BATCHBUFFER - batch->base.map = NULL; -#else batch->base.map = MALLOC(batch->actual_size); -#endif batch->base.ptr = NULL; batch->base.size = 0; @@ -94,7 +87,7 @@ static int i915_drm_batchbuffer_reloc(struct i915_winsys_batchbuffer *ibatch, struct i915_winsys_buffer *buffer, enum i915_winsys_buffer_usage usage, - unsigned pre_add) + unsigned pre_add, bool fenced) { struct i915_drm_batchbuffer *batch = i915_drm_batchbuffer(ibatch); unsigned write_domain = 0; @@ -104,37 +97,44 @@ i915_drm_batchbuffer_reloc(struct i915_winsys_batchbuffer *ibatch, assert(batch->base.relocs < batch->base.max_relocs); - if (usage == I915_USAGE_SAMPLER) { + switch (usage) { + case I915_USAGE_SAMPLER: write_domain = 0; read_domain = I915_GEM_DOMAIN_SAMPLER; - - } else if (usage == I915_USAGE_RENDER) { + break; + case I915_USAGE_RENDER: write_domain = I915_GEM_DOMAIN_RENDER; read_domain = I915_GEM_DOMAIN_RENDER; - - } else if (usage == I915_USAGE_2D_TARGET) { + break; + case I915_USAGE_2D_TARGET: write_domain = I915_GEM_DOMAIN_RENDER; read_domain = I915_GEM_DOMAIN_RENDER; - - } else if (usage == I915_USAGE_2D_SOURCE) { + break; + case I915_USAGE_2D_SOURCE: write_domain = 0; read_domain = I915_GEM_DOMAIN_RENDER; - - } else if (usage == I915_USAGE_VERTEX) { + break; + case I915_USAGE_VERTEX: write_domain = 0; read_domain = I915_GEM_DOMAIN_VERTEX; - - } else { + break; + default: assert(0); return -1; } offset = (unsigned)(batch->base.ptr - batch->base.map); - ret = drm_intel_bo_emit_reloc(batch->bo, offset, - intel_bo(buffer), pre_add, - read_domain, - write_domain); + if (fenced) + ret = drm_intel_bo_emit_reloc_fence(batch->bo, offset, + intel_bo(buffer), pre_add, + read_domain, + write_domain); + else + ret = drm_intel_bo_emit_reloc(batch->bo, offset, + intel_bo(buffer), pre_add, + read_domain, + write_domain); ((uint32_t*)batch->base.ptr)[0] = intel_bo(buffer)->offset + pre_add; batch->base.ptr += 4; @@ -150,70 +150,32 @@ i915_drm_batchbuffer_flush(struct i915_winsys_batchbuffer *ibatch, struct pipe_fence_handle **fence) { struct i915_drm_batchbuffer *batch = i915_drm_batchbuffer(ibatch); - unsigned used = 0; - int ret = 0; + unsigned used; + int ret; - assert(i915_winsys_batchbuffer_space(ibatch) >= 0); + /* MI_BATCH_BUFFER_END */ + i915_winsys_batchbuffer_dword_unchecked(ibatch, (0xA<<23)); used = batch->base.ptr - batch->base.map; - assert((used & 3) == 0); - - -#ifdef INTEL_ALWAYS_FLUSH - /* MI_FLUSH | FLUSH_MAP_CACHE */ - i915_winsys_batchbuffer_dword(ibatch, (0x4<<23)|(1<<0)); - used += 4; -#endif - - if ((used & 4) == 0) { + if (used & 4) { /* MI_NOOP */ - i915_winsys_batchbuffer_dword(ibatch, 0); + i915_winsys_batchbuffer_dword_unchecked(ibatch, 0); + used += 4; } - /* MI_BATCH_BUFFER_END */ - i915_winsys_batchbuffer_dword(ibatch, (0xA<<23)); - - used = batch->base.ptr - batch->base.map; - assert((used & 4) == 0); - -#ifdef INTEL_MAP_BATCHBUFFER -#ifdef INTEL_MAP_GTT - drm_intel_gem_bo_unmap_gtt(batch->bo); -#else - drm_intel_bo_unmap(batch->bo); -#endif -#else - drm_intel_bo_subdata(batch->bo, 0, used, batch->base.map); -#endif /* Do the sending to HW */ - if (i915_drm_winsys(ibatch->iws)->send_cmd) + ret = drm_intel_bo_subdata(batch->bo, 0, used, batch->base.map); + if (ret == 0 && i915_drm_winsys(ibatch->iws)->send_cmd) ret = drm_intel_bo_exec(batch->bo, used, NULL, 0, 0); - else - ret = 0; if (ret != 0 || i915_drm_winsys(ibatch->iws)->dump_cmd) { -#ifdef INTEL_MAP_BATCHBUFFER -#ifdef INTEL_MAP_GTT - drm_intel_gem_bo_map_gtt(batch->bo); -#else - drm_intel_bo_map(batch->bo, 0); -#endif -#endif i915_dump_batchbuffer(ibatch); assert(ret == 0); -#ifdef INTEL_MAP_BATCHBUFFER -#ifdef INTEL_MAP_GTT - drm_intel_gem_bo_unmap_gtt(batch->bo); -#else - drm_intel_bo_unmap(batch->bo); -#endif -#endif - } else { + } + #ifdef INTEL_RUN_SYNC - drm_intel_bo_map(batch->bo, FALSE); - drm_intel_bo_unmap(batch->bo); + drm_intel_bo_wait_rendering(batch->bo); #endif - } if (fence) { ibatch->iws->fence_reference(ibatch->iws, fence, NULL); @@ -237,9 +199,7 @@ i915_drm_batchbuffer_destroy(struct i915_winsys_batchbuffer *ibatch) if (batch->bo) drm_intel_bo_unreference(batch->bo); -#ifndef INTEL_MAP_BATCHBUFFER FREE(batch->base.map); -#endif FREE(batch); } diff --git a/src/gallium/winsys/i915/drm/i915_drm_buffer.c b/src/gallium/winsys/i915/drm/i915_drm_buffer.c index 15ec4487457..01dd4bf062f 100644 --- a/src/gallium/winsys/i915/drm/i915_drm_buffer.c +++ b/src/gallium/winsys/i915/drm/i915_drm_buffer.c @@ -5,14 +5,31 @@ #include "i915_drm.h" +static char *i915_drm_type_to_name(enum i915_winsys_buffer_type type) +{ + char *name; + + if (type == I915_NEW_TEXTURE) { + name = "gallium3d_texture"; + } else if (type == I915_NEW_VERTEX) { + name = "gallium3d_vertex"; + } else if (type == I915_NEW_SCANOUT) { + name = "gallium3d_scanout"; + } else { + assert(0); + name = "gallium3d_unknown"; + } + + return name; +} + static struct i915_winsys_buffer * i915_drm_buffer_create(struct i915_winsys *iws, - unsigned size, unsigned alignment, + unsigned size, enum i915_winsys_buffer_type type) { struct i915_drm_buffer *buf = CALLOC_STRUCT(i915_drm_buffer); struct i915_drm_winsys *idws = i915_drm_winsys(iws); - char *name; if (!buf) return NULL; @@ -21,22 +38,48 @@ i915_drm_buffer_create(struct i915_winsys *iws, buf->flinked = FALSE; buf->flink = 0; - if (type == I915_NEW_TEXTURE) { - name = "gallium3d_texture"; - } else if (type == I915_NEW_VERTEX) { - name = "gallium3d_vertex"; - } else if (type == I915_NEW_SCANOUT) { - name = "gallium3d_scanout"; - } else { - assert(0); - name = "gallium3d_unknown"; - } + buf->bo = drm_intel_bo_alloc(idws->gem_manager, + i915_drm_type_to_name(type), size, 0); - buf->bo = drm_intel_bo_alloc(idws->gem_manager, name, size, alignment); + if (!buf->bo) + goto err; + + return (struct i915_winsys_buffer *)buf; + +err: + assert(0); + FREE(buf); + return NULL; +} + +static struct i915_winsys_buffer * +i915_drm_buffer_create_tiled(struct i915_winsys *iws, + unsigned *stride, unsigned height, + enum i915_winsys_buffer_tile *tiling, + enum i915_winsys_buffer_type type) +{ + struct i915_drm_buffer *buf = CALLOC_STRUCT(i915_drm_buffer); + struct i915_drm_winsys *idws = i915_drm_winsys(iws); + unsigned long pitch = 0; + uint32_t tiling_mode = *tiling; + + if (!buf) + return NULL; + + buf->magic = 0xDEAD1337; + buf->flinked = FALSE; + buf->flink = 0; + + buf->bo = drm_intel_bo_alloc_tiled(idws->gem_manager, + i915_drm_type_to_name(type), + *stride, height, 1, + &tiling_mode, &pitch, 0); if (!buf->bo) goto err; + *stride = pitch; + *tiling = tiling_mode; return (struct i915_winsys_buffer *)buf; err: @@ -47,8 +90,9 @@ err: static struct i915_winsys_buffer * i915_drm_buffer_from_handle(struct i915_winsys *iws, - struct winsys_handle *whandle, - unsigned *stride) + struct winsys_handle *whandle, + enum i915_winsys_buffer_tile *tiling, + unsigned *stride) { struct i915_drm_winsys *idws = i915_drm_winsys(iws); struct i915_drm_buffer *buf = CALLOC_STRUCT(i915_drm_buffer); @@ -68,6 +112,7 @@ i915_drm_buffer_from_handle(struct i915_winsys *iws, drm_intel_bo_get_tiling(buf->bo, &tile, &swizzle); *stride = whandle->stride; + *tiling = tile; return (struct i915_winsys_buffer *)buf; @@ -103,24 +148,6 @@ i915_drm_buffer_get_handle(struct i915_winsys *iws, return TRUE; } -static int -i915_drm_buffer_set_fence_reg(struct i915_winsys *iws, - struct i915_winsys_buffer *buffer, - unsigned stride, - enum i915_winsys_buffer_tile tile) -{ - struct i915_drm_buffer *buf = i915_drm_buffer(buffer); - assert(I915_TILING_NONE == I915_TILE_NONE); - assert(I915_TILING_X == I915_TILE_X); - assert(I915_TILING_Y == I915_TILE_Y); - - if (tile != I915_TILE_NONE) { - assert(buf->map_count == 0); - } - - return drm_intel_bo_set_tiling(buf->bo, &tile, stride); -} - static void * i915_drm_buffer_map(struct i915_winsys *iws, struct i915_winsys_buffer *buffer, @@ -190,9 +217,9 @@ void i915_drm_winsys_init_buffer_functions(struct i915_drm_winsys *idws) { idws->base.buffer_create = i915_drm_buffer_create; + idws->base.buffer_create_tiled = i915_drm_buffer_create_tiled; idws->base.buffer_from_handle = i915_drm_buffer_from_handle; idws->base.buffer_get_handle = i915_drm_buffer_get_handle; - idws->base.buffer_set_fence_reg = i915_drm_buffer_set_fence_reg; idws->base.buffer_map = i915_drm_buffer_map; idws->base.buffer_unmap = i915_drm_buffer_unmap; idws->base.buffer_write = i915_drm_buffer_write; diff --git a/src/gallium/winsys/i915/drm/i915_drm_winsys.c b/src/gallium/winsys/i915/drm/i915_drm_winsys.c index cc0b6a99577..2288b48b2bd 100644 --- a/src/gallium/winsys/i915/drm/i915_drm_winsys.c +++ b/src/gallium/winsys/i915/drm/i915_drm_winsys.c @@ -69,6 +69,7 @@ i915_drm_winsys_create(int drmFD) idws->gem_manager = drm_intel_bufmgr_gem_init(idws->fd, idws->max_batch_size); drm_intel_bufmgr_gem_enable_reuse(idws->gem_manager); + drm_intel_bufmgr_gem_enable_fenced_relocs(idws->gem_manager); idws->dump_cmd = debug_get_bool_option("I915_DUMP_CMD", FALSE); idws->send_cmd = !debug_get_bool_option("I915_NO_HW", FALSE); diff --git a/src/gallium/winsys/i915/sw/i915_sw_batchbuffer.c b/src/gallium/winsys/i915/sw/i915_sw_batchbuffer.c index a480cfed57b..44773ae30e7 100644 --- a/src/gallium/winsys/i915/sw/i915_sw_batchbuffer.c +++ b/src/gallium/winsys/i915/sw/i915_sw_batchbuffer.c @@ -61,7 +61,7 @@ static int i915_sw_batchbuffer_reloc(struct i915_winsys_batchbuffer *ibatch, struct i915_winsys_buffer *buffer, enum i915_winsys_buffer_usage usage, - unsigned pre_add) + unsigned pre_add, bool fenced) { struct i915_sw_batchbuffer *batch = i915_sw_batchbuffer(ibatch); int ret = 0; diff --git a/src/gallium/winsys/i915/sw/i915_sw_buffer.c b/src/gallium/winsys/i915/sw/i915_sw_buffer.c index df175688861..834805e621d 100644 --- a/src/gallium/winsys/i915/sw/i915_sw_buffer.c +++ b/src/gallium/winsys/i915/sw/i915_sw_buffer.c @@ -4,28 +4,15 @@ static struct i915_winsys_buffer * i915_sw_buffer_create(struct i915_winsys *iws, - unsigned size, unsigned alignment, + unsigned size, enum i915_winsys_buffer_type type) { struct i915_sw_buffer *buf = CALLOC_STRUCT(i915_sw_buffer); - char *name; if (!buf) return NULL; - if (type == I915_NEW_TEXTURE) { - name = "gallium3d_texture"; - } else if (type == I915_NEW_VERTEX) { - name = "gallium3d_vertex"; - } else if (type == I915_NEW_SCANOUT) { - name = "gallium3d_scanout"; - } else { - assert(0); - name = "gallium3d_unknown"; - } - buf->magic = 0xDEAD1337; - buf->name = name; buf->type = type; buf->ptr = CALLOC(size, 1); @@ -40,21 +27,32 @@ err: return NULL; } -static int -i915_sw_buffer_set_fence_reg(struct i915_winsys *iws, - struct i915_winsys_buffer *buffer, - unsigned stride, - enum i915_winsys_buffer_tile tile) +static struct i915_winsys_buffer * +i915_sw_buffer_create_tiled(struct i915_winsys *iws, + unsigned *stride, unsigned height, + enum i915_winsys_buffer_tile *tiling, + enum i915_winsys_buffer_type type) { - struct i915_sw_buffer *buf = i915_sw_buffer(buffer); + struct i915_sw_buffer *buf = CALLOC_STRUCT(i915_sw_buffer); + + if (!buf) + return NULL; + + buf->magic = 0xDEAD1337; + buf->type = type; + buf->ptr = CALLOC(*stride * height, 1); + buf->tiling = *tiling; + buf->stride = *stride; - if (tile != I915_TILE_NONE) { - assert(buf->map_count == 0); - } + if (!buf->ptr) + goto err; - buf->tile = tile; + return (struct i915_winsys_buffer *)buf; - return 0; +err: + assert(0); + FREE(buf); + return NULL; } static void * @@ -108,7 +106,7 @@ void i915_sw_winsys_init_buffer_functions(struct i915_sw_winsys *isws) { isws->base.buffer_create = i915_sw_buffer_create; - isws->base.buffer_set_fence_reg = i915_sw_buffer_set_fence_reg; + isws->base.buffer_create_tiled = i915_sw_buffer_create_tiled; isws->base.buffer_map = i915_sw_buffer_map; isws->base.buffer_unmap = i915_sw_buffer_unmap; isws->base.buffer_write = i915_sw_buffer_write; diff --git a/src/gallium/winsys/i915/sw/i915_sw_winsys.h b/src/gallium/winsys/i915/sw/i915_sw_winsys.h index b7b43669f30..3af2548419e 100644 --- a/src/gallium/winsys/i915/sw/i915_sw_winsys.h +++ b/src/gallium/winsys/i915/sw/i915_sw_winsys.h @@ -43,8 +43,8 @@ struct i915_sw_buffer { void *ptr; unsigned map_count; enum i915_winsys_buffer_type type; - enum i915_winsys_buffer_tile tile; - const char *name; + enum i915_winsys_buffer_tile tiling; + unsigned stride; }; static INLINE struct i915_sw_buffer * diff --git a/src/gallium/winsys/i965/drm/i965_drm_buffer.c b/src/gallium/winsys/i965/drm/i965_drm_buffer.c index ed62db60bbb..a904179eeb9 100644 --- a/src/gallium/winsys/i965/drm/i965_drm_buffer.c +++ b/src/gallium/winsys/i965/drm/i965_drm_buffer.c @@ -325,7 +325,7 @@ i965_libdrm_bo_subdata(struct brw_winsys_buffer *buffer, brw_dump_data( idws->base.pci_id, data_type, buf->bo->offset + offset, - data, size ); + data, size, buffer->sws->gen ); /* XXX: use bo_map_gtt/memcpy/unmap_gtt under some circumstances??? */ @@ -464,7 +464,7 @@ i965_libdrm_bo_flush_range(struct brw_winsys_buffer *buffer, buf->data_type, buf->bo->offset + offset, (char*)buf->bo->virtual + offset, - length ); + length, buffer->sws->gen ); } static void diff --git a/src/gallium/winsys/i965/xlib/xlib_i965.c b/src/gallium/winsys/i965/xlib/xlib_i965.c index baadd6e89ca..c22df6643aa 100644 --- a/src/gallium/winsys/i965/xlib/xlib_i965.c +++ b/src/gallium/winsys/i965/xlib/xlib_i965.c @@ -42,6 +42,7 @@ #include "i965/brw_winsys.h" #include "i965/brw_screen.h" +#include "i965/brw_resource.h" #include "i965/brw_reg.h" #include "i965/brw_structs_dump.h" @@ -421,25 +422,28 @@ xlib_create_brw_winsys_screen( void ) static void xlib_i965_display_surface(struct xmesa_buffer *xm_buffer, - struct pipe_surface *surf) + struct pipe_resource *resource, + unsigned level, unsigned layer) { - struct brw_surface *surface = brw_surface(surf); - struct xlib_brw_buffer *bo = xlib_brw_buffer(surface->bo); - + struct brw_texture *tex = brw_texture(resource); + struct xlib_brw_buffer *bo = xlib_brw_buffer(tex->bo); + /* not sure if the resource is really useful here but + since it was never implemented anyway... */ if (BRW_DEBUG & DEBUG_WINSYS) - debug_printf("%s offset %x+%x sz %dx%d\n", __FUNCTION__, + debug_printf("%s level %u layer %u offset %x base sz %dx%d\n", __FUNCTION__, + level, layer, bo->offset, - surface->draw_offset, - surf->width, - surf->height); + resource->width0, + resource->height0); } static void xlib_i965_flush_frontbuffer(struct pipe_screen *screen, - struct pipe_surface *surf, - void *context_private) + struct pipe_resource *resource, + unsigned level, unsigned layer, + void *context_private) { - xlib_i965_display_surface(NULL, surf); + xlib_i965_display_surface(NULL, resource, level, layer); } diff --git a/src/gallium/winsys/nouveau/drm/nouveau_drm_winsys.c b/src/gallium/winsys/nouveau/drm/nouveau_drm_winsys.c index d4bf124ce6f..648d6c8a8e2 100644 --- a/src/gallium/winsys/nouveau/drm/nouveau_drm_winsys.c +++ b/src/gallium/winsys/nouveau/drm/nouveau_drm_winsys.c @@ -50,6 +50,9 @@ nouveau_drm_screen_create(int fd) case 0xa0: init = nv50_screen_create; break; + case 0xc0: + init = nvc0_screen_create; + break; default: debug_printf("%s: unknown chipset nv%02x\n", __func__, dev->chipset); diff --git a/src/gallium/winsys/r600/drm/Makefile b/src/gallium/winsys/r600/drm/Makefile index a396205f897..7310734f051 100644 --- a/src/gallium/winsys/r600/drm/Makefile +++ b/src/gallium/winsys/r600/drm/Makefile @@ -8,12 +8,11 @@ C_SOURCES = \ bof.c \ evergreen_hw_context.c \ radeon_bo.c \ - radeon_bo_pb.c \ radeon_pciid.c \ - r600.c \ r600_bo.c \ r600_drm.c \ - r600_hw_context.c + r600_hw_context.c \ + r600_bomgr.c LIBRARY_INCLUDES = -I$(TOP)/src/gallium/drivers/r600 \ $(shell pkg-config libdrm --cflags-only-I) diff --git a/src/gallium/winsys/r600/drm/SConscript b/src/gallium/winsys/r600/drm/SConscript index cc053c06dd0..f97434e995d 100644 --- a/src/gallium/winsys/r600/drm/SConscript +++ b/src/gallium/winsys/r600/drm/SConscript @@ -6,12 +6,11 @@ r600_sources = [ 'bof.c', 'evergreen_hw_context.c', 'radeon_bo.c', - 'radeon_bo_pb.c', 'radeon_pciid.c', - 'r600.c', 'r600_bo.c', 'r600_drm.c', 'r600_hw_context.c', + 'r600_bomgr.c', ] env.ParseConfig('pkg-config --cflags libdrm_radeon') diff --git a/src/gallium/winsys/r600/drm/evergreen_hw_context.c b/src/gallium/winsys/r600/drm/evergreen_hw_context.c index e1f163eab62..3fdafc39283 100644 --- a/src/gallium/winsys/r600/drm/evergreen_hw_context.c +++ b/src/gallium/winsys/r600/drm/evergreen_hw_context.c @@ -36,7 +36,6 @@ #include "pipe/p_compiler.h" #include "util/u_inlines.h" #include "util/u_memory.h" -#include <pipebuffer/pb_bufmgr.h> #include "r600_priv.h" #define GROUP_FORCE_NEW_BLOCK 0 @@ -622,10 +621,7 @@ int evergreen_context_init(struct r600_context *ctx, struct radeon *radeon) /* save 16dwords space for fence mecanism */ ctx->pm4_ndwords -= 16; - r = r600_context_init_fence(ctx); - if (r) { - goto out_err; - } + LIST_INITHEAD(&ctx->fenced_bo); /* init dirty list */ LIST_INITHEAD(&ctx->dirty); @@ -881,59 +877,3 @@ void evergreen_context_draw(struct r600_context *ctx, const struct r600_draw *dr ctx->pm4_dirty_cdwords = 0; } -static inline void evergreen_resource_set(struct r600_context *ctx, struct r600_pipe_state *state, unsigned offset) -{ - struct r600_range *range; - struct r600_block *block; - - range = &ctx->range[CTX_RANGE_ID(ctx, offset)]; - block = range->blocks[CTX_BLOCK_ID(ctx, offset)]; - block->reg[0] = state->regs[0].value; - block->reg[1] = state->regs[1].value; - block->reg[2] = state->regs[2].value; - block->reg[3] = state->regs[3].value; - block->reg[4] = state->regs[4].value; - block->reg[5] = state->regs[5].value; - block->reg[6] = state->regs[6].value; - block->reg[7] = state->regs[7].value; - r600_bo_reference(ctx->radeon, &block->reloc[1].bo, NULL); - r600_bo_reference(ctx->radeon , &block->reloc[2].bo, NULL); - if (state->regs[0].bo) { - /* VERTEX RESOURCE, we preted there is 2 bo to relocate so - * we have single case btw VERTEX & TEXTURE resource - */ - r600_bo_reference(ctx->radeon, &block->reloc[1].bo, state->regs[0].bo); - r600_bo_reference(ctx->radeon, &block->reloc[2].bo, state->regs[0].bo); - } else { - /* TEXTURE RESOURCE */ - r600_bo_reference(ctx->radeon, &block->reloc[1].bo, state->regs[2].bo); - r600_bo_reference(ctx->radeon, &block->reloc[2].bo, state->regs[3].bo); - } - if (!(block->status & R600_BLOCK_STATUS_DIRTY)) { - block->status |= R600_BLOCK_STATUS_ENABLED; - block->status |= R600_BLOCK_STATUS_DIRTY; - ctx->pm4_dirty_cdwords += block->pm4_ndwords + block->pm4_flush_ndwords; - LIST_ADDTAIL(&block->list,&ctx->dirty); - } -} - -void evergreen_ps_resource_set(struct r600_context *ctx, struct r600_pipe_state *state, unsigned rid) -{ - unsigned offset = R_030000_RESOURCE0_WORD0 + 0x20 * rid; - - evergreen_resource_set(ctx, state, offset); -} - -void evergreen_vs_resource_set(struct r600_context *ctx, struct r600_pipe_state *state, unsigned rid) -{ - unsigned offset = R_030000_RESOURCE0_WORD0 + 0x1600 + 0x20 * rid; - - evergreen_resource_set(ctx, state, offset); -} - -void evergreen_fs_resource_set(struct r600_context *ctx, struct r600_pipe_state *state, unsigned rid) -{ - unsigned offset = R_030000_RESOURCE0_WORD0 + 0x7C00 + 0x20 * rid; - - evergreen_resource_set(ctx, state, offset); -} diff --git a/src/gallium/winsys/r600/drm/r600.c b/src/gallium/winsys/r600/drm/r600.c deleted file mode 100644 index f5e53e21f51..00000000000 --- a/src/gallium/winsys/r600/drm/r600.c +++ /dev/null @@ -1,175 +0,0 @@ -/* - * Copyright 2010 Jerome Glisse <[email protected]> - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * on the rights to use, copy, modify, merge, publish, distribute, sub - * license, and/or sell copies of the Software, and to permit persons to whom - * the Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL - * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, - * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR - * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE - * USE OR OTHER DEALINGS IN THE SOFTWARE. - * - * Authors: - * Jerome Glisse - */ -#include "xf86drm.h" -#include "radeon_drm.h" -#include "pipe/p_compiler.h" -#include "util/u_inlines.h" -#include <pipebuffer/pb_bufmgr.h> -#include "r600_priv.h" - -enum radeon_family r600_get_family(struct radeon *r600) -{ - return r600->family; -} - -enum chip_class r600_get_family_class(struct radeon *radeon) -{ - return radeon->chip_class; -} - -struct r600_tiling_info *r600_get_tiling_info(struct radeon *radeon) -{ - return &radeon->tiling_info; -} - -static int r600_get_device(struct radeon *r600) -{ - struct drm_radeon_info info; - - r600->device = 0; - info.request = RADEON_INFO_DEVICE_ID; - info.value = (uintptr_t)&r600->device; - return drmCommandWriteRead(r600->fd, DRM_RADEON_INFO, &info, sizeof(struct drm_radeon_info)); -} - -struct radeon *r600_new(int fd, unsigned device) -{ - struct radeon *r600; - int r; - - r600 = calloc(1, sizeof(*r600)); - if (r600 == NULL) { - return NULL; - } - r600->fd = fd; - r600->device = device; - if (fd >= 0) { - r = r600_get_device(r600); - if (r) { - R600_ERR("Failed to get device id\n"); - r600_delete(r600); - return NULL; - } - } - r600->family = radeon_family_from_device(r600->device); - if (r600->family == CHIP_UNKNOWN) { - R600_ERR("Unknown chipset 0x%04X\n", r600->device); - r600_delete(r600); - return NULL; - } - switch (r600->family) { - case CHIP_R600: - case CHIP_RV610: - case CHIP_RV630: - case CHIP_RV670: - case CHIP_RV620: - case CHIP_RV635: - case CHIP_RS780: - case CHIP_RS880: - case CHIP_RV770: - case CHIP_RV730: - case CHIP_RV710: - case CHIP_RV740: - case CHIP_CEDAR: - case CHIP_REDWOOD: - case CHIP_JUNIPER: - case CHIP_CYPRESS: - case CHIP_HEMLOCK: - case CHIP_PALM: - break; - case CHIP_R100: - case CHIP_RV100: - case CHIP_RS100: - case CHIP_RV200: - case CHIP_RS200: - case CHIP_R200: - case CHIP_RV250: - case CHIP_RS300: - case CHIP_RV280: - case CHIP_R300: - case CHIP_R350: - case CHIP_RV350: - case CHIP_RV380: - case CHIP_R420: - case CHIP_R423: - case CHIP_RV410: - case CHIP_RS400: - case CHIP_RS480: - case CHIP_RS600: - case CHIP_RS690: - case CHIP_RS740: - case CHIP_RV515: - case CHIP_R520: - case CHIP_RV530: - case CHIP_RV560: - case CHIP_RV570: - case CHIP_R580: - default: - R600_ERR("unknown or unsupported chipset 0x%04X\n", r600->device); - break; - } - - /* setup class */ - switch (r600->family) { - case CHIP_R600: - case CHIP_RV610: - case CHIP_RV630: - case CHIP_RV670: - case CHIP_RV620: - case CHIP_RV635: - case CHIP_RS780: - case CHIP_RS880: - r600->chip_class = R600; - break; - case CHIP_RV770: - case CHIP_RV730: - case CHIP_RV710: - case CHIP_RV740: - r600->chip_class = R700; - break; - case CHIP_CEDAR: - case CHIP_REDWOOD: - case CHIP_JUNIPER: - case CHIP_CYPRESS: - case CHIP_HEMLOCK: - case CHIP_PALM: - r600->chip_class = EVERGREEN; - break; - default: - R600_ERR("unknown or unsupported chipset 0x%04X\n", r600->device); - break; - } - - return r600; -} - -void r600_delete(struct radeon *r600) -{ - if (r600 == NULL) - return; - drmClose(r600->fd); - free(r600); -} diff --git a/src/gallium/winsys/r600/drm/r600_bo.c b/src/gallium/winsys/r600/drm/r600_bo.c index 251f009a6b0..6a3737f0a4a 100644 --- a/src/gallium/winsys/r600/drm/r600_bo.c +++ b/src/gallium/winsys/r600/drm/r600_bo.c @@ -36,142 +36,153 @@ struct r600_bo *r600_bo(struct radeon *radeon, unsigned size, unsigned alignment, unsigned binding, unsigned usage) { - struct r600_bo *ws_bo = calloc(1, sizeof(struct r600_bo)); - struct pb_desc desc; - struct pb_manager *man; + struct r600_bo *bo; + struct radeon_bo *rbo; - desc.alignment = alignment; - desc.usage = (PB_USAGE_CPU_READ_WRITE | PB_USAGE_GPU_READ_WRITE); - ws_bo->size = size; + if (binding & (PIPE_BIND_CONSTANT_BUFFER | PIPE_BIND_VERTEX_BUFFER | PIPE_BIND_INDEX_BUFFER)) { + bo = r600_bomgr_bo_create(radeon->bomgr, size, alignment, *radeon->cfence); + if (bo) { + return bo; + } + } - if (binding & (PIPE_BIND_CONSTANT_BUFFER | PIPE_BIND_VERTEX_BUFFER | PIPE_BIND_INDEX_BUFFER)) - man = radeon->cman; - else - man = radeon->kman; + rbo = radeon_bo(radeon, 0, size, alignment); + if (rbo == NULL) { + return NULL; + } + + bo = calloc(1, sizeof(struct r600_bo)); + bo->size = size; + bo->alignment = alignment; + bo->bo = rbo; + if (binding & (PIPE_BIND_CONSTANT_BUFFER | PIPE_BIND_VERTEX_BUFFER | PIPE_BIND_INDEX_BUFFER)) { + r600_bomgr_bo_init(radeon->bomgr, bo); + } /* Staging resources particpate in transfers and blits only * and are used for uploads and downloads from regular * resources. We generate them internally for some transfers. */ if (usage == PIPE_USAGE_STAGING) - ws_bo->domains = RADEON_GEM_DOMAIN_CPU | RADEON_GEM_DOMAIN_GTT; - else - ws_bo->domains = (RADEON_GEM_DOMAIN_CPU | - RADEON_GEM_DOMAIN_GTT | - RADEON_GEM_DOMAIN_VRAM); - - - ws_bo->pb = man->create_buffer(man, size, &desc); - if (ws_bo->pb == NULL) { - free(ws_bo); - return NULL; - } + bo->domains = RADEON_GEM_DOMAIN_CPU | RADEON_GEM_DOMAIN_GTT; + else + bo->domains = (RADEON_GEM_DOMAIN_CPU | + RADEON_GEM_DOMAIN_GTT | + RADEON_GEM_DOMAIN_VRAM); - pipe_reference_init(&ws_bo->reference, 1); - return ws_bo; + pipe_reference_init(&bo->reference, 1); + return bo; } struct r600_bo *r600_bo_handle(struct radeon *radeon, unsigned handle, unsigned *array_mode) { - struct r600_bo *ws_bo = calloc(1, sizeof(struct r600_bo)); - struct radeon_bo *bo; + struct r600_bo *bo = calloc(1, sizeof(struct r600_bo)); + struct radeon_bo *rbo; - ws_bo->pb = radeon_bo_pb_create_buffer_from_handle(radeon->kman, handle); - if (!ws_bo->pb) { - free(ws_bo); + rbo = bo->bo = radeon_bo(radeon, handle, 0, 0); + if (rbo == NULL) { + free(bo); return NULL; } - bo = radeon_bo_pb_get_bo(ws_bo->pb); - ws_bo->size = bo->size; - ws_bo->domains = (RADEON_GEM_DOMAIN_CPU | - RADEON_GEM_DOMAIN_GTT | - RADEON_GEM_DOMAIN_VRAM); + bo->size = rbo->size; + bo->domains = (RADEON_GEM_DOMAIN_CPU | + RADEON_GEM_DOMAIN_GTT | + RADEON_GEM_DOMAIN_VRAM); - pipe_reference_init(&ws_bo->reference, 1); + pipe_reference_init(&bo->reference, 1); - radeon_bo_get_tiling_flags(radeon, bo, &ws_bo->tiling_flags, - &ws_bo->kernel_pitch); + radeon_bo_get_tiling_flags(radeon, rbo, &bo->tiling_flags, &bo->kernel_pitch); if (array_mode) { - if (ws_bo->tiling_flags) { - if (ws_bo->tiling_flags & RADEON_TILING_MICRO) + if (bo->tiling_flags) { + if (bo->tiling_flags & RADEON_TILING_MICRO) *array_mode = V_0280A0_ARRAY_1D_TILED_THIN1; - if ((ws_bo->tiling_flags & (RADEON_TILING_MICRO | RADEON_TILING_MACRO)) == + if ((bo->tiling_flags & (RADEON_TILING_MICRO | RADEON_TILING_MACRO)) == (RADEON_TILING_MICRO | RADEON_TILING_MACRO)) *array_mode = V_0280A0_ARRAY_2D_TILED_THIN1; } else { *array_mode = 0; } } - return ws_bo; + return bo; } void *r600_bo_map(struct radeon *radeon, struct r600_bo *bo, unsigned usage, void *ctx) { - return pb_map(bo->pb, usage, ctx); + struct pipe_context *pctx = ctx; + + if (usage & PB_USAGE_UNSYNCHRONIZED) { + radeon_bo_map(radeon, bo->bo); + return (uint8_t *) bo->bo->data + bo->offset; + } + + if (p_atomic_read(&bo->bo->reference.count) > 1) { + if (usage & PB_USAGE_DONTBLOCK) { + return NULL; + } + if (ctx) { + pctx->flush(pctx, 0, NULL); + } + } + + if (usage & PB_USAGE_DONTBLOCK) { + uint32_t domain; + + if (radeon_bo_busy(radeon, bo->bo, &domain)) + return NULL; + if (radeon_bo_map(radeon, bo->bo)) { + return NULL; + } + goto out; + } + + radeon_bo_map(radeon, bo->bo); + if (radeon_bo_wait(radeon, bo->bo)) { + radeon_bo_unmap(radeon, bo->bo); + return NULL; + } + +out: + return (uint8_t *) bo->bo->data + bo->offset; } void r600_bo_unmap(struct radeon *radeon, struct r600_bo *bo) { - pb_unmap(bo->pb); + radeon_bo_unmap(radeon, bo->bo); } -static void r600_bo_destroy(struct radeon *radeon, struct r600_bo *bo) +void r600_bo_destroy(struct radeon *radeon, struct r600_bo *bo) { - if (bo->pb) - pb_reference(&bo->pb, NULL); + if (bo->manager_id) { + if (!r600_bomgr_bo_destroy(radeon->bomgr, bo)) { + /* destroy is delayed by buffer manager */ + return; + } + } + radeon_bo_reference(radeon, &bo->bo, NULL); free(bo); } -void r600_bo_reference(struct radeon *radeon, struct r600_bo **dst, - struct r600_bo *src) +void r600_bo_reference(struct radeon *radeon, struct r600_bo **dst, struct r600_bo *src) { struct r600_bo *old = *dst; - + if (pipe_reference(&(*dst)->reference, &src->reference)) { r600_bo_destroy(radeon, old); } *dst = src; } -unsigned r600_bo_get_handle(struct r600_bo *pb_bo) -{ - struct radeon_bo *bo; - - bo = radeon_bo_pb_get_bo(pb_bo->pb); - if (!bo) - return 0; - - return bo->handle; -} - -unsigned r600_bo_get_size(struct r600_bo *pb_bo) -{ - struct radeon_bo *bo; - - bo = radeon_bo_pb_get_bo(pb_bo->pb); - if (!bo) - return 0; - - return bo->size; -} - -boolean r600_bo_get_winsys_handle(struct radeon *radeon, struct r600_bo *pb_bo, +boolean r600_bo_get_winsys_handle(struct radeon *radeon, struct r600_bo *bo, unsigned stride, struct winsys_handle *whandle) { - struct radeon_bo *bo; - - bo = radeon_bo_pb_get_bo(pb_bo->pb); - if (!bo) - return FALSE; - whandle->stride = stride; switch(whandle->type) { case DRM_API_HANDLE_TYPE_KMS: - whandle->handle = r600_bo_get_handle(pb_bo); + whandle->handle = r600_bo_get_handle(bo); break; case DRM_API_HANDLE_TYPE_SHARED: - if (radeon_bo_get_name(radeon, bo, &whandle->handle)) + if (radeon_bo_get_name(radeon, bo->bo, &whandle->handle)) return FALSE; break; default: diff --git a/src/gallium/winsys/r600/drm/r600_bomgr.c b/src/gallium/winsys/r600/drm/r600_bomgr.c new file mode 100644 index 00000000000..446ef0f9cfc --- /dev/null +++ b/src/gallium/winsys/r600/drm/r600_bomgr.c @@ -0,0 +1,161 @@ +/* + * Copyright 2010 VMWare. + * Copyright 2010 Red Hat Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: + * Jose Fonseca <jrfonseca-at-vmware-dot-com> + * Thomas Hellström <thomas-at-vmware-dot-com> + * Jerome Glisse <[email protected]> + */ +#include <util/u_memory.h> +#include <util/u_double_list.h> +#include <util/u_time.h> +#include <pipebuffer/pb_bufmgr.h> +#include "r600_priv.h" + +static void r600_bomgr_timeout_flush(struct r600_bomgr *mgr) +{ + struct r600_bo *bo, *tmp; + int64_t now; + + now = os_time_get(); + LIST_FOR_EACH_ENTRY_SAFE(bo, tmp, &mgr->delayed, list) { + if(!os_time_timeout(bo->start, bo->end, now)) + break; + + mgr->num_delayed--; + bo->manager_id = 0; + LIST_DEL(&bo->list); + r600_bo_destroy(mgr->radeon, bo); + } +} + +static INLINE int r600_bo_is_compat(struct r600_bomgr *mgr, + struct r600_bo *bo, + unsigned size, + unsigned alignment, + unsigned cfence) +{ + if(bo->size < size) { + return 0; + } + + /* be lenient with size */ + if(bo->size >= 2*size) { + return 0; + } + + if(!pb_check_alignment(alignment, bo->alignment)) { + return 0; + } + + if (!fence_is_after(cfence, bo->fence)) { + return 0; + } + + return 1; +} + +struct r600_bo *r600_bomgr_bo_create(struct r600_bomgr *mgr, + unsigned size, + unsigned alignment, + unsigned cfence) +{ + struct r600_bo *bo, *tmp; + int64_t now; + + + pipe_mutex_lock(mgr->mutex); + + now = os_time_get(); + LIST_FOR_EACH_ENTRY_SAFE(bo, tmp, &mgr->delayed, list) { + if(r600_bo_is_compat(mgr, bo, size, alignment, cfence)) { + LIST_DEL(&bo->list); + --mgr->num_delayed; + r600_bomgr_timeout_flush(mgr); + pipe_mutex_unlock(mgr->mutex); + LIST_INITHEAD(&bo->list); + pipe_reference_init(&bo->reference, 1); + return bo; + } + + if(os_time_timeout(bo->start, bo->end, now)) { + mgr->num_delayed--; + bo->manager_id = 0; + LIST_DEL(&bo->list); + r600_bo_destroy(mgr->radeon, bo); + } + } + + pipe_mutex_unlock(mgr->mutex); + return NULL; +} + +void r600_bomgr_bo_init(struct r600_bomgr *mgr, struct r600_bo *bo) +{ + LIST_INITHEAD(&bo->list); + bo->manager_id = 1; +} + +bool r600_bomgr_bo_destroy(struct r600_bomgr *mgr, struct r600_bo *bo) +{ + bo->start = os_time_get(); + bo->end = bo->start + mgr->usecs; + pipe_mutex_lock(mgr->mutex); + LIST_ADDTAIL(&bo->list, &mgr->delayed); + ++mgr->num_delayed; + pipe_mutex_unlock(mgr->mutex); + return FALSE; +} + +void r600_bomgr_destroy(struct r600_bomgr *mgr) +{ + struct r600_bo *bo, *tmp; + + pipe_mutex_lock(mgr->mutex); + LIST_FOR_EACH_ENTRY_SAFE(bo, tmp, &mgr->delayed, list) { + mgr->num_delayed--; + bo->manager_id = 0; + LIST_DEL(&bo->list); + r600_bo_destroy(mgr->radeon, bo); + } + pipe_mutex_unlock(mgr->mutex); + + FREE(mgr); +} + +struct r600_bomgr *r600_bomgr_create(struct radeon *radeon, unsigned usecs) +{ + struct r600_bomgr *mgr; + + mgr = CALLOC_STRUCT(r600_bomgr); + if (mgr == NULL) + return NULL; + + mgr->radeon = radeon; + mgr->usecs = usecs; + LIST_INITHEAD(&mgr->delayed); + mgr->num_delayed = 0; + pipe_mutex_init(mgr->mutex); + + return mgr; +} diff --git a/src/gallium/winsys/r600/drm/r600_drm.c b/src/gallium/winsys/r600/drm/r600_drm.c index 8b1d88aed7d..ee262c3ea52 100644 --- a/src/gallium/winsys/r600/drm/r600_drm.c +++ b/src/gallium/winsys/r600/drm/r600_drm.c @@ -40,6 +40,24 @@ #ifndef RADEON_INFO_TILING_CONFIG #define RADEON_INFO_TILING_CONFIG 0x6 #endif + +static struct radeon *radeon_new(int fd, unsigned device); + +enum radeon_family r600_get_family(struct radeon *r600) +{ + return r600->family; +} + +enum chip_class r600_get_family_class(struct radeon *radeon) +{ + return radeon->chip_class; +} + +struct r600_tiling_info *r600_get_tiling_info(struct radeon *radeon) +{ + return &radeon->tiling_info; +} + static int radeon_get_device(struct radeon *radeon) { struct drm_radeon_info info; @@ -108,7 +126,19 @@ static int radeon_drm_get_tiling(struct radeon *radeon) return 0; } -struct radeon *radeon_new(int fd, unsigned device) +static int radeon_init_fence(struct radeon *radeon) +{ + radeon->fence = 1; + radeon->fence_bo = r600_bo(radeon, 4096, 0, 0, 0); + if (radeon->fence_bo == NULL) { + return -ENOMEM; + } + radeon->cfence = r600_bo_map(radeon, radeon->fence_bo, PB_USAGE_UNSYNCHRONIZED, NULL); + *radeon->cfence = 0; + return 0; +} + +static struct radeon *radeon_new(int fd, unsigned device) { struct radeon *radeon; int r; @@ -132,59 +162,6 @@ struct radeon *radeon_new(int fd, unsigned device) fprintf(stderr, "Unknown chipset 0x%04X\n", radeon->device); return radeon_decref(radeon); } - switch (radeon->family) { - case CHIP_R600: - case CHIP_RV610: - case CHIP_RV630: - case CHIP_RV670: - case CHIP_RV620: - case CHIP_RV635: - case CHIP_RS780: - case CHIP_RS880: - case CHIP_RV770: - case CHIP_RV730: - case CHIP_RV710: - case CHIP_RV740: - case CHIP_CEDAR: - case CHIP_REDWOOD: - case CHIP_JUNIPER: - case CHIP_CYPRESS: - case CHIP_HEMLOCK: - case CHIP_PALM: - break; - case CHIP_R100: - case CHIP_RV100: - case CHIP_RS100: - case CHIP_RV200: - case CHIP_RS200: - case CHIP_R200: - case CHIP_RV250: - case CHIP_RS300: - case CHIP_RV280: - case CHIP_R300: - case CHIP_R350: - case CHIP_RV350: - case CHIP_RV380: - case CHIP_R420: - case CHIP_R423: - case CHIP_RV410: - case CHIP_RS400: - case CHIP_RS480: - case CHIP_RS600: - case CHIP_RS690: - case CHIP_RS740: - case CHIP_RV515: - case CHIP_R520: - case CHIP_RV530: - case CHIP_RV560: - case CHIP_RV570: - case CHIP_R580: - default: - fprintf(stderr, "%s unknown or unsupported chipset 0x%04X\n", - __func__, radeon->device); - break; - } - /* setup class */ switch (radeon->family) { case CHIP_R600: @@ -213,6 +190,9 @@ struct radeon *radeon_new(int fd, unsigned device) case CHIP_CYPRESS: case CHIP_HEMLOCK: case CHIP_PALM: + case CHIP_BARTS: + case CHIP_TURKS: + case CHIP_CAICOS: radeon->chip_class = EVERGREEN; /* set default group bytes, overridden by tiling info ioctl */ radeon->tiling_info.group_bytes = 512; @@ -227,12 +207,15 @@ struct radeon *radeon_new(int fd, unsigned device) if (radeon_drm_get_tiling(radeon)) return NULL; } - radeon->kman = radeon_bo_pbmgr_create(radeon); - if (!radeon->kman) + radeon->bomgr = r600_bomgr_create(radeon, 1000000); + if (radeon->bomgr == NULL) { return NULL; - radeon->cman = pb_cache_manager_create(radeon->kman, 100000); - if (!radeon->cman) + } + r = radeon_init_fence(radeon); + if (r) { + radeon_decref(radeon); return NULL; + } return radeon; } @@ -249,14 +232,15 @@ struct radeon *radeon_decref(struct radeon *radeon) return NULL; } - if (radeon->cman) - radeon->cman->destroy(radeon->cman); + if (radeon->fence_bo) { + r600_bo_reference(radeon, &radeon->fence_bo, NULL); + } - if (radeon->kman) - radeon->kman->destroy(radeon->kman); + if (radeon->bomgr) + r600_bomgr_destroy(radeon->bomgr); - if (radeon->fd >= 0) - drmClose(radeon->fd); + if (radeon->fd >= 0) + drmClose(radeon->fd); free(radeon); return NULL; diff --git a/src/gallium/winsys/r600/drm/r600_hw_context.c b/src/gallium/winsys/r600/drm/r600_hw_context.c index de228918953..f10e2fda6f2 100644 --- a/src/gallium/winsys/r600/drm/r600_hw_context.c +++ b/src/gallium/winsys/r600/drm/r600_hw_context.c @@ -28,39 +28,25 @@ #include <string.h> #include <stdlib.h> #include <assert.h> +#include <pipe/p_compiler.h> +#include <util/u_inlines.h> +#include <util/u_memory.h> +#include <pipebuffer/pb_bufmgr.h> #include "xf86drm.h" -#include "r600.h" -#include "r600d.h" #include "radeon_drm.h" -#include "bof.h" -#include "pipe/p_compiler.h" -#include "util/u_inlines.h" -#include "util/u_memory.h" -#include <pipebuffer/pb_bufmgr.h> #include "r600_priv.h" +#include "bof.h" +#include "r600d.h" #define GROUP_FORCE_NEW_BLOCK 0 -int r600_context_init_fence(struct r600_context *ctx) -{ - ctx->fence = 1; - ctx->fence_bo = r600_bo(ctx->radeon, 4096, 0, 0, 0); - if (ctx->fence_bo == NULL) { - return -ENOMEM; - } - ctx->cfence = r600_bo_map(ctx->radeon, ctx->fence_bo, PB_USAGE_UNSYNCHRONIZED, NULL); - *ctx->cfence = 0; - LIST_INITHEAD(&ctx->fenced_bo); - return 0; -} - static void INLINE r600_context_update_fenced_list(struct r600_context *ctx) { for (int i = 0; i < ctx->creloc; i++) { if (!LIST_IS_EMPTY(&ctx->bo[i]->fencedlist)) LIST_DELINIT(&ctx->bo[i]->fencedlist); LIST_ADDTAIL(&ctx->bo[i]->fencedlist, &ctx->fenced_bo); - ctx->bo[i]->fence = ctx->fence; + ctx->bo[i]->fence = ctx->radeon->fence; ctx->bo[i]->ctx = ctx; } } @@ -71,7 +57,7 @@ static void INLINE r600_context_fence_wraparound(struct r600_context *ctx, unsig struct radeon_bo *tmp; LIST_FOR_EACH_ENTRY_SAFE(bo, tmp, &ctx->fenced_bo, fencedlist) { - if (bo->fence <= *ctx->cfence) { + if (bo->fence <= *ctx->radeon->cfence) { LIST_DELINIT(&bo->fencedlist); bo->fence = 0; } else { @@ -618,6 +604,9 @@ void r600_context_fini(struct r600_context *ctx) range = &ctx->range[CTX_RANGE_ID(ctx, offset)]; range->blocks[CTX_BLOCK_ID(ctx, offset)] = NULL; } + for (int k = 1; k <= block->nbo; k++) { + r600_bo_reference(ctx->radeon, &block->reloc[k].bo, NULL); + } free(block); } } @@ -629,9 +618,6 @@ void r600_context_fini(struct r600_context *ctx) free(ctx->pm4); r600_context_clear_fenced_bo(ctx); - if (ctx->fence_bo) { - r600_bo_reference(ctx->radeon, &ctx->fence_bo, NULL); - } memset(ctx, 0, sizeof(struct r600_context)); } @@ -760,10 +746,7 @@ int r600_context_init(struct r600_context *ctx, struct radeon *radeon) /* save 16dwords space for fence mecanism */ ctx->pm4_ndwords -= 16; - r = r600_context_init_fence(ctx); - if (r) { - goto out_err; - } + LIST_INITHEAD(&ctx->fenced_bo); /* init dirty list */ LIST_INITHEAD(&ctx->dirty); @@ -811,6 +794,7 @@ void r600_context_bo_reloc(struct r600_context *ctx, u32 *pm4, struct r600_bo *r ctx->reloc[ctx->creloc].write_domain = rbo->domains & (RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM); ctx->reloc[ctx->creloc].flags = 0; radeon_bo_reference(ctx->radeon, &ctx->bo[ctx->creloc], bo); + rbo->fence = ctx->radeon->fence; ctx->creloc++; /* set PKT3 to point to proper reloc */ *pm4 = bo->reloc_id; @@ -833,6 +817,7 @@ void r600_context_pipe_state_set(struct r600_context *ctx, struct r600_pipe_stat /* find relocation */ id = block->pm4_bo_index[id]; r600_bo_reference(ctx->radeon, &block->reloc[id].bo, state->regs[i].bo); + state->regs[i].bo->fence = ctx->radeon->fence; } if (!(block->status & R600_BLOCK_STATUS_DIRTY)) { block->status |= R600_BLOCK_STATUS_ENABLED; @@ -872,10 +857,13 @@ static inline void r600_context_pipe_state_set_resource(struct r600_context *ctx */ r600_bo_reference(ctx->radeon, &block->reloc[1].bo, state->regs[0].bo); r600_bo_reference(ctx->radeon, &block->reloc[2].bo, state->regs[0].bo); + state->regs[0].bo->fence = ctx->radeon->fence; } else { /* TEXTURE RESOURCE */ r600_bo_reference(ctx->radeon, &block->reloc[1].bo, state->regs[2].bo); r600_bo_reference(ctx->radeon, &block->reloc[2].bo, state->regs[3].bo); + state->regs[2].bo->fence = ctx->radeon->fence; + state->regs[3].bo->fence = ctx->radeon->fence; } if (!(block->status & R600_BLOCK_STATUS_DIRTY)) { block->status |= R600_BLOCK_STATUS_ENABLED; @@ -1108,18 +1096,18 @@ void r600_context_flush(struct r600_context *ctx) /* suspend queries */ r600_context_queries_suspend(ctx); - radeon_bo_pbmgr_flush_maps(ctx->radeon->kman); - /* emit fence */ + ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_EVENT_WRITE, 0); + ctx->pm4[ctx->pm4_cdwords++] = EVENT_TYPE(EVENT_TYPE_PS_PARTIAL_FLUSH) | EVENT_INDEX(4); ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_EVENT_WRITE_EOP, 4); ctx->pm4[ctx->pm4_cdwords++] = EVENT_TYPE(EVENT_TYPE_CACHE_FLUSH_AND_INV_TS_EVENT) | EVENT_INDEX(5); ctx->pm4[ctx->pm4_cdwords++] = 0; ctx->pm4[ctx->pm4_cdwords++] = (1 << 29) | (0 << 24); - ctx->pm4[ctx->pm4_cdwords++] = ctx->fence; + ctx->pm4[ctx->pm4_cdwords++] = ctx->radeon->fence; ctx->pm4[ctx->pm4_cdwords++] = 0; ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_NOP, 0); ctx->pm4[ctx->pm4_cdwords++] = 0; - r600_context_bo_reloc(ctx, &ctx->pm4[ctx->pm4_cdwords - 1], ctx->fence_bo); + r600_context_bo_reloc(ctx, &ctx->pm4[ctx->pm4_cdwords - 1], ctx->radeon->fence_bo); #if 1 /* emit cs */ @@ -1136,18 +1124,18 @@ void r600_context_flush(struct r600_context *ctx) r = drmCommandWriteRead(ctx->radeon->fd, DRM_RADEON_CS, &drmib, sizeof(struct drm_radeon_cs)); #else - *ctx->cfence = ctx->fence; + *ctx->radeon->cfence = ctx->radeon->fence; #endif r600_context_update_fenced_list(ctx); - fence = ctx->fence + 1; - if (fence < ctx->fence) { + fence = ctx->radeon->fence + 1; + if (fence < ctx->radeon->fence) { /* wrap around */ fence = 1; r600_context_fence_wraparound(ctx, fence); } - ctx->fence = fence; + ctx->radeon->fence = fence; /* restart */ for (int i = 0; i < ctx->creloc; i++) { diff --git a/src/gallium/winsys/r600/drm/r600_priv.h b/src/gallium/winsys/r600/drm/r600_priv.h index 9fd77b71c77..a38a6481b4f 100644 --- a/src/gallium/winsys/r600/drm/r600_priv.h +++ b/src/gallium/winsys/r600/drm/r600_priv.h @@ -30,24 +30,27 @@ #include <stdint.h> #include <stdlib.h> #include <assert.h> -#include <pipebuffer/pb_bufmgr.h> -#include "util/u_double_list.h" +#include <util/u_double_list.h> +#include <util/u_inlines.h> +#include <os/os_thread.h> #include "r600.h" +struct r600_bomgr; +struct r600_bo; + struct radeon { int fd; int refcount; unsigned device; unsigned family; enum chip_class chip_class; - struct pb_manager *kman; /* kernel bo manager */ - struct pb_manager *cman; /* cached bo manager */ - struct r600_tiling_info tiling_info; + struct r600_tiling_info tiling_info; + struct r600_bomgr *bomgr; + unsigned fence; + unsigned *cfence; + struct r600_bo *fence_bo; }; -struct radeon *r600_new(int fd, unsigned device); -void r600_delete(struct radeon *r600); - struct r600_reg { unsigned opcode; unsigned offset_base; @@ -75,28 +78,49 @@ struct radeon_bo { struct r600_bo { struct pipe_reference reference; - struct pb_buffer *pb; unsigned size; unsigned tiling_flags; - unsigned kernel_pitch; + unsigned kernel_pitch; unsigned domains; + struct radeon_bo *bo; + unsigned fence; + /* manager data */ + struct list_head list; + unsigned manager_id; + unsigned alignment; + unsigned offset; + int64_t start; + int64_t end; }; +struct r600_bomgr { + struct radeon *radeon; + unsigned usecs; + pipe_mutex mutex; + struct list_head delayed; + unsigned num_delayed; +}; -/* radeon_pciid.c */ -unsigned radeon_family_from_device(unsigned device); +/* + * r600_drm.c + */ +struct radeon *r600_new(int fd, unsigned device); +void r600_delete(struct radeon *r600); -/* r600_drm.c */ -struct radeon *radeon_decref(struct radeon *radeon); +/* + * radeon_pciid.c + */ +unsigned radeon_family_from_device(unsigned device); -/* radeon_bo.c */ +/* + * radeon_bo.c + */ struct radeon_bo *radeon_bo(struct radeon *radeon, unsigned handle, unsigned size, unsigned alignment); void radeon_bo_reference(struct radeon *radeon, struct radeon_bo **dst, struct radeon_bo *src); int radeon_bo_wait(struct radeon *radeon, struct radeon_bo *bo); int radeon_bo_busy(struct radeon *radeon, struct radeon_bo *bo, uint32_t *domain); -void radeon_bo_pbmgr_flush_maps(struct pb_manager *_mgr); int radeon_bo_fencelist(struct radeon *radeon, struct radeon_bo **bolist, uint32_t num_bo); int radeon_bo_get_tiling_flags(struct radeon *radeon, struct radeon_bo *bo, @@ -106,13 +130,9 @@ int radeon_bo_get_name(struct radeon *radeon, struct radeon_bo *bo, uint32_t *name); -/* radeon_bo_pb.c */ -struct radeon_bo *radeon_bo_pb_get_bo(struct pb_buffer *_buf); -struct pb_manager *radeon_bo_pbmgr_create(struct radeon *radeon); -struct pb_buffer *radeon_bo_pb_create_buffer_from_handle(struct pb_manager *_mgr, - uint32_t handle); - -/* r600_hw_context.c */ +/* + * r600_hw_context.c + */ int r600_context_init_fence(struct r600_context *ctx); void r600_context_bo_reloc(struct r600_context *ctx, u32 *pm4, struct r600_bo *rbo); void r600_context_bo_flush(struct r600_context *ctx, unsigned flush_flags, @@ -120,14 +140,27 @@ void r600_context_bo_flush(struct r600_context *ctx, unsigned flush_flags, struct r600_bo *r600_context_reg_bo(struct r600_context *ctx, unsigned offset); int r600_context_add_block(struct r600_context *ctx, const struct r600_reg *reg, unsigned nreg); -/* r600_bo.c */ -unsigned r600_bo_get_handle(struct r600_bo *bo); -unsigned r600_bo_get_size(struct r600_bo *bo); -static INLINE struct radeon_bo *r600_bo_get_bo(struct r600_bo *bo) -{ - return radeon_bo_pb_get_bo(bo->pb); -} +/* + * r600_bo.c + */ +void r600_bo_destroy(struct radeon *radeon, struct r600_bo *bo); + +/* + * r600_bomgr.c + */ +struct r600_bomgr *r600_bomgr_create(struct radeon *radeon, unsigned usecs); +void r600_bomgr_destroy(struct r600_bomgr *mgr); +bool r600_bomgr_bo_destroy(struct r600_bomgr *mgr, struct r600_bo *bo); +void r600_bomgr_bo_init(struct r600_bomgr *mgr, struct r600_bo *bo); +struct r600_bo *r600_bomgr_bo_create(struct r600_bomgr *mgr, + unsigned size, + unsigned alignment, + unsigned cfence); + +/* + * helpers + */ #define CTX_RANGE_ID(ctx, offset) (((offset) >> (ctx)->hash_shift) & 255) #define CTX_BLOCK_ID(ctx, offset) ((offset) & ((1 << (ctx)->hash_shift) - 1)) @@ -175,6 +208,9 @@ static inline void r600_context_block_emit_dirty(struct r600_context *ctx, struc LIST_DELINIT(&block->list); } +/* + * radeon_bo.c + */ static inline int radeon_bo_map(struct radeon *radeon, struct radeon_bo *bo) { bo->map_count++; @@ -187,4 +223,35 @@ static inline void radeon_bo_unmap(struct radeon *radeon, struct radeon_bo *bo) assert(bo->map_count >= 0); } +/* + * r600_bo + */ +static inline struct radeon_bo *r600_bo_get_bo(struct r600_bo *bo) +{ + return bo->bo; +} + +static unsigned inline r600_bo_get_handle(struct r600_bo *bo) +{ + return bo->bo->handle; +} + +static unsigned inline r600_bo_get_size(struct r600_bo *bo) +{ + return bo->size; +} + +/* + * fence + */ +static inline bool fence_is_after(unsigned fence, unsigned ofence) +{ + /* handle wrap around */ + if (fence < 0x80000000 && ofence > 0x80000000) + return TRUE; + if (fence > ofence) + return TRUE; + return FALSE; +} + #endif diff --git a/src/gallium/winsys/r600/drm/r600d.h b/src/gallium/winsys/r600/drm/r600d.h index 4a08d504aab..1c1ac76fe69 100644 --- a/src/gallium/winsys/r600/drm/r600d.h +++ b/src/gallium/winsys/r600/drm/r600d.h @@ -91,6 +91,7 @@ #define PKT3_SET_CTL_CONST 0x6F #define PKT3_SURFACE_BASE_UPDATE 0x73 +#define EVENT_TYPE_PS_PARTIAL_FLUSH 0x10 #define EVENT_TYPE_CACHE_FLUSH_AND_INV_TS_EVENT 0x14 #define EVENT_TYPE_ZPASS_DONE 0x15 #define EVENT_TYPE_CACHE_FLUSH_AND_INV_EVENT 0x16 diff --git a/src/gallium/winsys/r600/drm/radeon_bo.c b/src/gallium/winsys/r600/drm/radeon_bo.c index 557cfb95970..7e5f392efae 100644 --- a/src/gallium/winsys/r600/drm/radeon_bo.c +++ b/src/gallium/winsys/r600/drm/radeon_bo.c @@ -156,7 +156,7 @@ int radeon_bo_wait(struct radeon *radeon, struct radeon_bo *bo) if (!bo->shared) { if (!bo->fence) return 0; - if (bo->fence <= *bo->ctx->cfence) { + if (bo->fence <= *radeon->cfence) { LIST_DELINIT(&bo->fencedlist); bo->fence = 0; return 0; @@ -181,7 +181,7 @@ int radeon_bo_busy(struct radeon *radeon, struct radeon_bo *bo, uint32_t *domain if (!bo->shared) { if (!bo->fence) return 0; - if (bo->fence <= *bo->ctx->cfence) { + if (bo->fence <= *radeon->cfence) { LIST_DELINIT(&bo->fencedlist); bo->fence = 0; return 0; diff --git a/src/gallium/winsys/r600/drm/radeon_bo_pb.c b/src/gallium/winsys/r600/drm/radeon_bo_pb.c deleted file mode 100644 index 312552f0758..00000000000 --- a/src/gallium/winsys/r600/drm/radeon_bo_pb.c +++ /dev/null @@ -1,287 +0,0 @@ -/* - * Copyright 2010 Dave Airlie - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * on the rights to use, copy, modify, merge, publish, distribute, sub - * license, and/or sell copies of the Software, and to permit persons to whom - * the Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL - * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, - * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR - * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE - * USE OR OTHER DEALINGS IN THE SOFTWARE. - * - * Authors: - * Dave Airlie - */ -#include <util/u_inlines.h> -#include <util/u_memory.h> -#include <util/u_double_list.h> -#include <pipebuffer/pb_buffer.h> -#include <pipebuffer/pb_bufmgr.h> -#include "r600_priv.h" - -struct radeon_bo_pb { - struct pb_buffer b; - struct radeon_bo *bo; - - struct radeon_bo_pbmgr *mgr; - struct list_head maplist; -}; - -extern const struct pb_vtbl radeon_bo_pb_vtbl; - -static INLINE struct radeon_bo_pb *radeon_bo_pb(struct pb_buffer *buf) -{ - assert(buf); - assert(buf->vtbl == &radeon_bo_pb_vtbl); - return (struct radeon_bo_pb *)buf; -} - -struct radeon_bo_pbmgr { - struct pb_manager b; - struct radeon *radeon; - struct list_head buffer_map_list; -}; - -static INLINE struct radeon_bo_pbmgr *radeon_bo_pbmgr(struct pb_manager *mgr) -{ - assert(mgr); - return (struct radeon_bo_pbmgr *)mgr; -} - -static void radeon_bo_pb_destroy(struct pb_buffer *_buf) -{ - struct radeon_bo_pb *buf = radeon_bo_pb(_buf); - - /* If this buffer is on the list of buffers to unmap, - * do the unmapping now. - */ - if (!LIST_IS_EMPTY(&buf->maplist)) - radeon_bo_unmap(buf->mgr->radeon, buf->bo); - - LIST_DEL(&buf->maplist); - radeon_bo_reference(buf->mgr->radeon, &buf->bo, NULL); - FREE(buf); -} - -static void * -radeon_bo_pb_map_internal(struct pb_buffer *_buf, - unsigned flags, void *ctx) -{ - struct radeon_bo_pb *buf = radeon_bo_pb(_buf); - struct pipe_context *pctx = ctx; - - if (flags & PB_USAGE_UNSYNCHRONIZED) { - if (radeon_bo_map(buf->mgr->radeon, buf->bo)) { - return NULL; - } - LIST_DELINIT(&buf->maplist); - return buf->bo->data; - } - - if (p_atomic_read(&buf->bo->reference.count) > 1) { - if (flags & PB_USAGE_DONTBLOCK) { - return NULL; - } - if (ctx) { - pctx->flush(pctx, 0, NULL); - } - } - - if (flags & PB_USAGE_DONTBLOCK) { - uint32_t domain; - if (radeon_bo_busy(buf->mgr->radeon, buf->bo, &domain)) - return NULL; - if (radeon_bo_map(buf->mgr->radeon, buf->bo)) { - return NULL; - } - goto out; - } - - if (radeon_bo_map(buf->mgr->radeon, buf->bo)) { - return NULL; - } - if (radeon_bo_wait(buf->mgr->radeon, buf->bo)) { - radeon_bo_unmap(buf->mgr->radeon, buf->bo); - return NULL; - } -out: - LIST_DELINIT(&buf->maplist); - return buf->bo->data; -} - -static void radeon_bo_pb_unmap_internal(struct pb_buffer *_buf) -{ - struct radeon_bo_pb *buf = radeon_bo_pb(_buf); - LIST_ADDTAIL(&buf->maplist, &buf->mgr->buffer_map_list); -} - -static void -radeon_bo_pb_get_base_buffer(struct pb_buffer *buf, - struct pb_buffer **base_buf, - unsigned *offset) -{ - *base_buf = buf; - *offset = 0; -} - -static enum pipe_error -radeon_bo_pb_validate(struct pb_buffer *_buf, - struct pb_validate *vl, - unsigned flags) -{ - /* Always pinned */ - return PIPE_OK; -} - -static void -radeon_bo_pb_fence(struct pb_buffer *buf, - struct pipe_fence_handle *fence) -{ -} - -const struct pb_vtbl radeon_bo_pb_vtbl = { - radeon_bo_pb_destroy, - radeon_bo_pb_map_internal, - radeon_bo_pb_unmap_internal, - radeon_bo_pb_validate, - radeon_bo_pb_fence, - radeon_bo_pb_get_base_buffer, -}; - -struct pb_buffer * -radeon_bo_pb_create_buffer_from_handle(struct pb_manager *_mgr, - uint32_t handle) -{ - struct radeon_bo_pbmgr *mgr = radeon_bo_pbmgr(_mgr); - struct radeon *radeon = mgr->radeon; - struct radeon_bo_pb *bo; - struct radeon_bo *hw_bo; - - hw_bo = radeon_bo(radeon, handle, 0, 0); - if (hw_bo == NULL) - return NULL; - - bo = CALLOC_STRUCT(radeon_bo_pb); - if (!bo) { - radeon_bo_reference(radeon, &hw_bo, NULL); - return NULL; - } - - LIST_INITHEAD(&bo->maplist); - pipe_reference_init(&bo->b.base.reference, 1); - bo->b.base.alignment = 0; - bo->b.base.usage = PB_USAGE_GPU_WRITE | PB_USAGE_GPU_READ; - bo->b.base.size = hw_bo->size; - bo->b.vtbl = &radeon_bo_pb_vtbl; - bo->mgr = mgr; - - bo->bo = hw_bo; - - return &bo->b; -} - -static struct pb_buffer * -radeon_bo_pb_create_buffer(struct pb_manager *_mgr, - pb_size size, - const struct pb_desc *desc) -{ - struct radeon_bo_pbmgr *mgr = radeon_bo_pbmgr(_mgr); - struct radeon *radeon = mgr->radeon; - struct radeon_bo_pb *bo; - - bo = CALLOC_STRUCT(radeon_bo_pb); - if (!bo) - goto error1; - - pipe_reference_init(&bo->b.base.reference, 1); - bo->b.base.alignment = desc->alignment; - bo->b.base.usage = desc->usage; - bo->b.base.size = size; - bo->b.vtbl = &radeon_bo_pb_vtbl; - bo->mgr = mgr; - - LIST_INITHEAD(&bo->maplist); - - bo->bo = radeon_bo(radeon, 0, size, desc->alignment); - if (bo->bo == NULL) - goto error2; - return &bo->b; - -error2: - FREE(bo); -error1: - return NULL; -} - -static void -radeon_bo_pbmgr_flush(struct pb_manager *mgr) -{ - /* NOP */ -} - -static void -radeon_bo_pbmgr_destroy(struct pb_manager *_mgr) -{ - struct radeon_bo_pbmgr *mgr = radeon_bo_pbmgr(_mgr); - FREE(mgr); -} - -struct pb_manager *radeon_bo_pbmgr_create(struct radeon *radeon) -{ - struct radeon_bo_pbmgr *mgr; - - mgr = CALLOC_STRUCT(radeon_bo_pbmgr); - if (!mgr) - return NULL; - - mgr->b.destroy = radeon_bo_pbmgr_destroy; - mgr->b.create_buffer = radeon_bo_pb_create_buffer; - mgr->b.flush = radeon_bo_pbmgr_flush; - - mgr->radeon = radeon; - LIST_INITHEAD(&mgr->buffer_map_list); - return &mgr->b; -} - -void radeon_bo_pbmgr_flush_maps(struct pb_manager *_mgr) -{ - struct radeon_bo_pbmgr *mgr = radeon_bo_pbmgr(_mgr); - struct radeon_bo_pb *rpb = NULL; - struct radeon_bo_pb *t_rpb; - - LIST_FOR_EACH_ENTRY_SAFE(rpb, t_rpb, &mgr->buffer_map_list, maplist) { - radeon_bo_unmap(mgr->radeon, rpb->bo); - LIST_DELINIT(&rpb->maplist); - } - - LIST_INITHEAD(&mgr->buffer_map_list); -} - -struct radeon_bo *radeon_bo_pb_get_bo(struct pb_buffer *_buf) -{ - struct radeon_bo_pb *buf; - if (_buf->vtbl == &radeon_bo_pb_vtbl) { - buf = radeon_bo_pb(_buf); - return buf->bo; - } else { - struct pb_buffer *base_buf; - pb_size offset; - pb_get_base_buffer(_buf, &base_buf, &offset); - if (base_buf->vtbl == &radeon_bo_pb_vtbl) { - buf = radeon_bo_pb(base_buf); - return buf->bo; - } - } - return NULL; -} diff --git a/src/gallium/winsys/r600/drm/radeon_pciid.c b/src/gallium/winsys/r600/drm/radeon_pciid.c index 18bddc1a7ab..e2622abd468 100644 --- a/src/gallium/winsys/r600/drm/radeon_pciid.c +++ b/src/gallium/winsys/r600/drm/radeon_pciid.c @@ -24,7 +24,7 @@ * Jerome Glisse */ #include <stdlib.h> -#include "r600.h" +#include "r600_priv.h" struct pci_id { unsigned vendor; @@ -445,6 +445,42 @@ struct pci_id radeon_pci_id[] = { {0x1002, 0x9803, CHIP_PALM}, {0x1002, 0x9804, CHIP_PALM}, {0x1002, 0x9805, CHIP_PALM}, + {0x1002, 0x6720, CHIP_BARTS}, + {0x1002, 0x6721, CHIP_BARTS}, + {0x1002, 0x6722, CHIP_BARTS}, + {0x1002, 0x6723, CHIP_BARTS}, + {0x1002, 0x6724, CHIP_BARTS}, + {0x1002, 0x6725, CHIP_BARTS}, + {0x1002, 0x6726, CHIP_BARTS}, + {0x1002, 0x6727, CHIP_BARTS}, + {0x1002, 0x6728, CHIP_BARTS}, + {0x1002, 0x6729, CHIP_BARTS}, + {0x1002, 0x6738, CHIP_BARTS}, + {0x1002, 0x6739, CHIP_BARTS}, + {0x1002, 0x6740, CHIP_TURKS}, + {0x1002, 0x6741, CHIP_TURKS}, + {0x1002, 0x6742, CHIP_TURKS}, + {0x1002, 0x6743, CHIP_TURKS}, + {0x1002, 0x6744, CHIP_TURKS}, + {0x1002, 0x6745, CHIP_TURKS}, + {0x1002, 0x6746, CHIP_TURKS}, + {0x1002, 0x6747, CHIP_TURKS}, + {0x1002, 0x6748, CHIP_TURKS}, + {0x1002, 0x6749, CHIP_TURKS}, + {0x1002, 0x6750, CHIP_TURKS}, + {0x1002, 0x6758, CHIP_TURKS}, + {0x1002, 0x6759, CHIP_TURKS}, + {0x1002, 0x6760, CHIP_CAICOS}, + {0x1002, 0x6761, CHIP_CAICOS}, + {0x1002, 0x6762, CHIP_CAICOS}, + {0x1002, 0x6763, CHIP_CAICOS}, + {0x1002, 0x6764, CHIP_CAICOS}, + {0x1002, 0x6765, CHIP_CAICOS}, + {0x1002, 0x6766, CHIP_CAICOS}, + {0x1002, 0x6767, CHIP_CAICOS}, + {0x1002, 0x6768, CHIP_CAICOS}, + {0x1002, 0x6770, CHIP_CAICOS}, + {0x1002, 0x6779, CHIP_CAICOS}, {0, 0}, }; @@ -460,80 +496,3 @@ unsigned radeon_family_from_device(unsigned device) } return CHIP_UNKNOWN; } - -int radeon_is_family_compatible(unsigned family1, unsigned family2) -{ - switch (family1) { - case CHIP_R600: - case CHIP_RV610: - case CHIP_RV630: - case CHIP_RV670: - case CHIP_RV620: - case CHIP_RV635: - case CHIP_RS780: - case CHIP_RS880: - case CHIP_RV770: - case CHIP_RV730: - case CHIP_RV710: - case CHIP_RV740: - case CHIP_CEDAR: - case CHIP_REDWOOD: - case CHIP_JUNIPER: - case CHIP_CYPRESS: - case CHIP_HEMLOCK: - case CHIP_PALM: - switch (family2) { - case CHIP_R600: - case CHIP_RV610: - case CHIP_RV630: - case CHIP_RV670: - case CHIP_RV620: - case CHIP_RV635: - case CHIP_RS780: - case CHIP_RS880: - case CHIP_RV770: - case CHIP_RV730: - case CHIP_RV710: - case CHIP_RV740: - case CHIP_CEDAR: - case CHIP_REDWOOD: - case CHIP_JUNIPER: - case CHIP_CYPRESS: - case CHIP_HEMLOCK: - case CHIP_PALM: - return 1; - default: - return 0; - } - break; - case CHIP_R100: - case CHIP_RV100: - case CHIP_RS100: - case CHIP_RV200: - case CHIP_RS200: - case CHIP_R200: - case CHIP_RV250: - case CHIP_RS300: - case CHIP_RV280: - case CHIP_R300: - case CHIP_R350: - case CHIP_RV350: - case CHIP_RV380: - case CHIP_R420: - case CHIP_R423: - case CHIP_RV410: - case CHIP_RS400: - case CHIP_RS480: - case CHIP_RS600: - case CHIP_RS690: - case CHIP_RS740: - case CHIP_RV515: - case CHIP_R520: - case CHIP_RV530: - case CHIP_RV560: - case CHIP_RV570: - case CHIP_R580: - default: - return 0; - } -} diff --git a/src/gallium/winsys/radeon/drm/Makefile b/src/gallium/winsys/radeon/drm/Makefile index 7f69e392735..7e339a2ecfe 100644 --- a/src/gallium/winsys/radeon/drm/Makefile +++ b/src/gallium/winsys/radeon/drm/Makefile @@ -6,7 +6,8 @@ LIBNAME = radeonwinsys C_SOURCES = \ radeon_drm_buffer.c \ - radeon_drm.c \ + radeon_drm_cs.c \ + radeon_drm_common.c \ radeon_r300.c LIBRARY_INCLUDES = -I$(TOP)/src/gallium/drivers/r300 \ diff --git a/src/gallium/winsys/radeon/drm/SConscript b/src/gallium/winsys/radeon/drm/SConscript index 60e409fe10f..80816621848 100644 --- a/src/gallium/winsys/radeon/drm/SConscript +++ b/src/gallium/winsys/radeon/drm/SConscript @@ -4,7 +4,8 @@ env = env.Clone() radeon_sources = [ 'radeon_drm_buffer.c', - 'radeon_drm.c', + 'radeon_drm_cs.c', + 'radeon_drm_common.c', 'radeon_r300.c', ] diff --git a/src/gallium/winsys/radeon/drm/radeon_buffer.h b/src/gallium/winsys/radeon/drm/radeon_buffer.h deleted file mode 100644 index a8137d85e83..00000000000 --- a/src/gallium/winsys/radeon/drm/radeon_buffer.h +++ /dev/null @@ -1,109 +0,0 @@ -/* - * Copyright © 2008 Jérôme Glisse - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining - * a copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES - * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS - * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE - * USE OR OTHER DEALINGS IN THE SOFTWARE. - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - */ -/* - * Authors: - * Jérôme Glisse <[email protected]> - */ -#ifndef RADEON_BUFFER_H -#define RADEON_BUFFER_H - -#include <stdio.h> - -#include "pipe/p_defines.h" -#include "util/u_inlines.h" - -#include "pipebuffer/pb_buffer.h" -#include "pipebuffer/pb_bufmgr.h" - -#include "radeon_bo.h" -#include "radeon_cs.h" - -#include "radeon_winsys.h" - -#define RADEON_PB_USAGE_VERTEX (1 << 28) -#define RADEON_PB_USAGE_DOMAIN_GTT (1 << 29) -#define RADEON_PB_USAGE_DOMAIN_VRAM (1 << 30) - -static INLINE struct pb_buffer * -radeon_pb_buffer(struct r300_winsys_buffer *buffer) -{ - return (struct pb_buffer *)buffer; -} - -static INLINE struct r300_winsys_buffer * -radeon_libdrm_winsys_buffer(struct pb_buffer *buffer) -{ - return (struct r300_winsys_buffer *)buffer; -} - -struct pb_manager * -radeon_drm_bufmgr_create(struct radeon_libdrm_winsys *rws); - -void radeon_drm_bufmgr_add_buffer(struct r300_winsys_cs *cs, - struct r300_winsys_buffer *buf, - enum r300_buffer_domain rd, - enum r300_buffer_domain wd); - -void radeon_drm_bufmgr_write_reloc(struct r300_winsys_cs *cs, - struct r300_winsys_buffer *buf, - enum r300_buffer_domain rd, - enum r300_buffer_domain wd); - -struct pb_buffer *radeon_drm_bufmgr_create_buffer_from_handle(struct pb_manager *_mgr, - uint32_t handle); - -void radeon_drm_bufmgr_get_tiling(struct r300_winsys_screen *ws, - struct r300_winsys_buffer *buf, - enum r300_buffer_tiling *microtiled, - enum r300_buffer_tiling *macrotiled); - -void radeon_drm_bufmgr_set_tiling(struct r300_winsys_screen *ws, - struct r300_winsys_buffer *buf, - enum r300_buffer_tiling microtiled, - enum r300_buffer_tiling macrotiled, - uint32_t pitch); - -void radeon_drm_bufmgr_flush_maps(struct pb_manager *_mgr); - -boolean radeon_drm_bufmgr_get_handle(struct pb_buffer *_buf, - struct winsys_handle *whandle); - -boolean radeon_drm_bufmgr_is_buffer_referenced(struct r300_winsys_cs *cs, - struct r300_winsys_buffer *buf, - enum r300_reference_domain domain); - -void radeon_drm_bufmgr_wait(struct r300_winsys_screen *ws, - struct r300_winsys_buffer *buf); - -void *radeon_drm_buffer_map(struct r300_winsys_screen *ws, - struct r300_winsys_buffer *buf, - struct r300_winsys_cs *cs, - enum pipe_transfer_usage usage); - -void radeon_drm_buffer_unmap(struct r300_winsys_screen *ws, - struct r300_winsys_buffer *buf); - -#endif diff --git a/src/gallium/winsys/radeon/drm/radeon_drm.h b/src/gallium/winsys/radeon/drm/radeon_drm.h deleted file mode 100644 index 061229f1615..00000000000 --- a/src/gallium/winsys/radeon/drm/radeon_drm.h +++ /dev/null @@ -1,250 +0,0 @@ -/* - * Copyright © 2009 Corbin Simpson - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining - * a copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES - * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS - * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE - * USE OR OTHER DEALINGS IN THE SOFTWARE. - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - */ -/* - * Authors: - * Corbin Simpson <[email protected]> - */ -#ifndef RADEON_DRM_H -#define RADEON_DRM_H - -#include "state_tracker/drm_driver.h" - -/* Guess at whether this chipset should use r300g. - * - * I believe that this check is valid, but I haven't been exhaustive. */ -static INLINE boolean is_r3xx(int pciid) -{ - switch (pciid) { - case 0x4144: /* PCI_CHIP_R300_AD */ - case 0x4145: /* PCI_CHIP_R300_AE */ - case 0x4146: /* PCI_CHIP_R300_AF */ - case 0x4147: /* PCI_CHIP_R300_AG */ - case 0x4E44: /* PCI_CHIP_R300_ND */ - case 0x4E45: /* PCI_CHIP_R300_NE */ - case 0x4E46: /* PCI_CHIP_R300_NF */ - case 0x4E47: /* PCI_CHIP_R300_NG */ - case 0x4E48: /* PCI_CHIP_R350_NH */ - case 0x4E49: /* PCI_CHIP_R350_NI */ - case 0x4E4B: /* PCI_CHIP_R350_NK */ - case 0x4148: /* PCI_CHIP_R350_AH */ - case 0x4149: /* PCI_CHIP_R350_AI */ - case 0x414A: /* PCI_CHIP_R350_AJ */ - case 0x414B: /* PCI_CHIP_R350_AK */ - case 0x4E4A: /* PCI_CHIP_R360_NJ */ - case 0x4150: /* PCI_CHIP_RV350_AP */ - case 0x4151: /* PCI_CHIP_RV350_AQ */ - case 0x4152: /* PCI_CHIP_RV350_AR */ - case 0x4153: /* PCI_CHIP_RV350_AS */ - case 0x4154: /* PCI_CHIP_RV350_AT */ - case 0x4155: /* PCI_CHIP_RV350_AU */ - case 0x4156: /* PCI_CHIP_RV350_AV */ - case 0x4E50: /* PCI_CHIP_RV350_NP */ - case 0x4E51: /* PCI_CHIP_RV350_NQ */ - case 0x4E52: /* PCI_CHIP_RV350_NR */ - case 0x4E53: /* PCI_CHIP_RV350_NS */ - case 0x4E54: /* PCI_CHIP_RV350_NT */ - case 0x4E56: /* PCI_CHIP_RV350_NV */ - case 0x5460: /* PCI_CHIP_RV370_5460 */ - case 0x5462: /* PCI_CHIP_RV370_5462 */ - case 0x5464: /* PCI_CHIP_RV370_5464 */ - case 0x5B60: /* PCI_CHIP_RV370_5B60 */ - case 0x5B62: /* PCI_CHIP_RV370_5B62 */ - case 0x5B63: /* PCI_CHIP_RV370_5B63 */ - case 0x5B64: /* PCI_CHIP_RV370_5B64 */ - case 0x5B65: /* PCI_CHIP_RV370_5B65 */ - case 0x3150: /* PCI_CHIP_RV380_3150 */ - case 0x3152: /* PCI_CHIP_RV380_3152 */ - case 0x3154: /* PCI_CHIP_RV380_3154 */ - case 0x3155: /* PCI_CHIP_RV380_3155 */ - case 0x3E50: /* PCI_CHIP_RV380_3E50 */ - case 0x3E54: /* PCI_CHIP_RV380_3E54 */ - case 0x4A48: /* PCI_CHIP_R420_JH */ - case 0x4A49: /* PCI_CHIP_R420_JI */ - case 0x4A4A: /* PCI_CHIP_R420_JJ */ - case 0x4A4B: /* PCI_CHIP_R420_JK */ - case 0x4A4C: /* PCI_CHIP_R420_JL */ - case 0x4A4D: /* PCI_CHIP_R420_JM */ - case 0x4A4E: /* PCI_CHIP_R420_JN */ - case 0x4A4F: /* PCI_CHIP_R420_JO */ - case 0x4A50: /* PCI_CHIP_R420_JP */ - case 0x4A54: /* PCI_CHIP_R420_JT */ - case 0x5548: /* PCI_CHIP_R423_UH */ - case 0x5549: /* PCI_CHIP_R423_UI */ - case 0x554A: /* PCI_CHIP_R423_UJ */ - case 0x554B: /* PCI_CHIP_R423_UK */ - case 0x5550: /* PCI_CHIP_R423_5550 */ - case 0x5551: /* PCI_CHIP_R423_UQ */ - case 0x5552: /* PCI_CHIP_R423_UR */ - case 0x5554: /* PCI_CHIP_R423_UT */ - case 0x5D57: /* PCI_CHIP_R423_5D57 */ - case 0x554C: /* PCI_CHIP_R430_554C */ - case 0x554D: /* PCI_CHIP_R430_554D */ - case 0x554E: /* PCI_CHIP_R430_554E */ - case 0x554F: /* PCI_CHIP_R430_554F */ - case 0x5D48: /* PCI_CHIP_R430_5D48 */ - case 0x5D49: /* PCI_CHIP_R430_5D49 */ - case 0x5D4A: /* PCI_CHIP_R430_5D4A */ - case 0x5D4C: /* PCI_CHIP_R480_5D4C */ - case 0x5D4D: /* PCI_CHIP_R480_5D4D */ - case 0x5D4E: /* PCI_CHIP_R480_5D4E */ - case 0x5D4F: /* PCI_CHIP_R480_5D4F */ - case 0x5D50: /* PCI_CHIP_R480_5D50 */ - case 0x5D52: /* PCI_CHIP_R480_5D52 */ - case 0x4B49: /* PCI_CHIP_R481_4B49 */ - case 0x4B4A: /* PCI_CHIP_R481_4B4A */ - case 0x4B4B: /* PCI_CHIP_R481_4B4B */ - case 0x4B4C: /* PCI_CHIP_R481_4B4C */ - case 0x564A: /* PCI_CHIP_RV410_564A */ - case 0x564B: /* PCI_CHIP_RV410_564B */ - case 0x564F: /* PCI_CHIP_RV410_564F */ - case 0x5652: /* PCI_CHIP_RV410_5652 */ - case 0x5653: /* PCI_CHIP_RV410_5653 */ - case 0x5657: /* PCI_CHIP_RV410_5657 */ - case 0x5E48: /* PCI_CHIP_RV410_5E48 */ - case 0x5E4A: /* PCI_CHIP_RV410_5E4A */ - case 0x5E4B: /* PCI_CHIP_RV410_5E4B */ - case 0x5E4C: /* PCI_CHIP_RV410_5E4C */ - case 0x5E4D: /* PCI_CHIP_RV410_5E4D */ - case 0x5E4F: /* PCI_CHIP_RV410_5E4F */ - case 0x5A41: /* PCI_CHIP_RS400_5A41 */ - case 0x5A42: /* PCI_CHIP_RS400_5A42 */ - case 0x5A61: /* PCI_CHIP_RC410_5A61 */ - case 0x5A62: /* PCI_CHIP_RC410_5A62 */ - case 0x5954: /* PCI_CHIP_RS480_5954 */ - case 0x5955: /* PCI_CHIP_RS480_5955 */ - case 0x5974: /* PCI_CHIP_RS482_5974 */ - case 0x5975: /* PCI_CHIP_RS482_5975 */ - case 0x7100: /* PCI_CHIP_R520_7100 */ - case 0x7101: /* PCI_CHIP_R520_7101 */ - case 0x7102: /* PCI_CHIP_R520_7102 */ - case 0x7103: /* PCI_CHIP_R520_7103 */ - case 0x7104: /* PCI_CHIP_R520_7104 */ - case 0x7105: /* PCI_CHIP_R520_7105 */ - case 0x7106: /* PCI_CHIP_R520_7106 */ - case 0x7108: /* PCI_CHIP_R520_7108 */ - case 0x7109: /* PCI_CHIP_R520_7109 */ - case 0x710A: /* PCI_CHIP_R520_710A */ - case 0x710B: /* PCI_CHIP_R520_710B */ - case 0x710C: /* PCI_CHIP_R520_710C */ - case 0x710E: /* PCI_CHIP_R520_710E */ - case 0x710F: /* PCI_CHIP_R520_710F */ - case 0x7140: /* PCI_CHIP_RV515_7140 */ - case 0x7141: /* PCI_CHIP_RV515_7141 */ - case 0x7142: /* PCI_CHIP_RV515_7142 */ - case 0x7143: /* PCI_CHIP_RV515_7143 */ - case 0x7144: /* PCI_CHIP_RV515_7144 */ - case 0x7145: /* PCI_CHIP_RV515_7145 */ - case 0x7146: /* PCI_CHIP_RV515_7146 */ - case 0x7147: /* PCI_CHIP_RV515_7147 */ - case 0x7149: /* PCI_CHIP_RV515_7149 */ - case 0x714A: /* PCI_CHIP_RV515_714A */ - case 0x714B: /* PCI_CHIP_RV515_714B */ - case 0x714C: /* PCI_CHIP_RV515_714C */ - case 0x714D: /* PCI_CHIP_RV515_714D */ - case 0x714E: /* PCI_CHIP_RV515_714E */ - case 0x714F: /* PCI_CHIP_RV515_714F */ - case 0x7151: /* PCI_CHIP_RV515_7151 */ - case 0x7152: /* PCI_CHIP_RV515_7152 */ - case 0x7153: /* PCI_CHIP_RV515_7153 */ - case 0x715E: /* PCI_CHIP_RV515_715E */ - case 0x715F: /* PCI_CHIP_RV515_715F */ - case 0x7180: /* PCI_CHIP_RV515_7180 */ - case 0x7181: /* PCI_CHIP_RV515_7181 */ - case 0x7183: /* PCI_CHIP_RV515_7183 */ - case 0x7186: /* PCI_CHIP_RV515_7186 */ - case 0x7187: /* PCI_CHIP_RV515_7187 */ - case 0x7188: /* PCI_CHIP_RV515_7188 */ - case 0x718A: /* PCI_CHIP_RV515_718A */ - case 0x718B: /* PCI_CHIP_RV515_718B */ - case 0x718C: /* PCI_CHIP_RV515_718C */ - case 0x718D: /* PCI_CHIP_RV515_718D */ - case 0x718F: /* PCI_CHIP_RV515_718F */ - case 0x7193: /* PCI_CHIP_RV515_7193 */ - case 0x7196: /* PCI_CHIP_RV515_7196 */ - case 0x719B: /* PCI_CHIP_RV515_719B */ - case 0x719F: /* PCI_CHIP_RV515_719F */ - case 0x7200: /* PCI_CHIP_RV515_7200 */ - case 0x7210: /* PCI_CHIP_RV515_7210 */ - case 0x7211: /* PCI_CHIP_RV515_7211 */ - case 0x71C0: /* PCI_CHIP_RV530_71C0 */ - case 0x71C1: /* PCI_CHIP_RV530_71C1 */ - case 0x71C2: /* PCI_CHIP_RV530_71C2 */ - case 0x71C3: /* PCI_CHIP_RV530_71C3 */ - case 0x71C4: /* PCI_CHIP_RV530_71C4 */ - case 0x71C5: /* PCI_CHIP_RV530_71C5 */ - case 0x71C6: /* PCI_CHIP_RV530_71C6 */ - case 0x71C7: /* PCI_CHIP_RV530_71C7 */ - case 0x71CD: /* PCI_CHIP_RV530_71CD */ - case 0x71CE: /* PCI_CHIP_RV530_71CE */ - case 0x71D2: /* PCI_CHIP_RV530_71D2 */ - case 0x71D4: /* PCI_CHIP_RV530_71D4 */ - case 0x71D5: /* PCI_CHIP_RV530_71D5 */ - case 0x71D6: /* PCI_CHIP_RV530_71D6 */ - case 0x71DA: /* PCI_CHIP_RV530_71DA */ - case 0x71DE: /* PCI_CHIP_RV530_71DE */ - case 0x7281: /* PCI_CHIP_RV560_7281 */ - case 0x7283: /* PCI_CHIP_RV560_7283 */ - case 0x7287: /* PCI_CHIP_RV560_7287 */ - case 0x7290: /* PCI_CHIP_RV560_7290 */ - case 0x7291: /* PCI_CHIP_RV560_7291 */ - case 0x7293: /* PCI_CHIP_RV560_7293 */ - case 0x7297: /* PCI_CHIP_RV560_7297 */ - case 0x7280: /* PCI_CHIP_RV570_7280 */ - case 0x7288: /* PCI_CHIP_RV570_7288 */ - case 0x7289: /* PCI_CHIP_RV570_7289 */ - case 0x728B: /* PCI_CHIP_RV570_728B */ - case 0x728C: /* PCI_CHIP_RV570_728C */ - case 0x7240: /* PCI_CHIP_R580_7240 */ - case 0x7243: /* PCI_CHIP_R580_7243 */ - case 0x7244: /* PCI_CHIP_R580_7244 */ - case 0x7245: /* PCI_CHIP_R580_7245 */ - case 0x7246: /* PCI_CHIP_R580_7246 */ - case 0x7247: /* PCI_CHIP_R580_7247 */ - case 0x7248: /* PCI_CHIP_R580_7248 */ - case 0x7249: /* PCI_CHIP_R580_7249 */ - case 0x724A: /* PCI_CHIP_R580_724A */ - case 0x724B: /* PCI_CHIP_R580_724B */ - case 0x724C: /* PCI_CHIP_R580_724C */ - case 0x724D: /* PCI_CHIP_R580_724D */ - case 0x724E: /* PCI_CHIP_R580_724E */ - case 0x724F: /* PCI_CHIP_R580_724F */ - case 0x7284: /* PCI_CHIP_R580_7284 */ - case 0x793F: /* PCI_CHIP_RS600_793F */ - case 0x7941: /* PCI_CHIP_RS600_7941 */ - case 0x7942: /* PCI_CHIP_RS600_7942 */ - case 0x791E: /* PCI_CHIP_RS690_791E */ - case 0x791F: /* PCI_CHIP_RS690_791F */ - case 0x796C: /* PCI_CHIP_RS740_796C */ - case 0x796D: /* PCI_CHIP_RS740_796D */ - case 0x796E: /* PCI_CHIP_RS740_796E */ - case 0x796F: /* PCI_CHIP_RS740_796F */ - return TRUE; - default: - return FALSE; - } -} - -#endif diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_buffer.c b/src/gallium/winsys/radeon/drm/radeon_drm_buffer.c index 78723948d41..5e14287ec2d 100644 --- a/src/gallium/winsys/radeon/drm/radeon_drm_buffer.c +++ b/src/gallium/winsys/radeon/drm/radeon_drm_buffer.c @@ -1,19 +1,17 @@ - -#include <sys/ioctl.h> -#include "radeon_drm.h" -#include "radeon_bo_gem.h" -#include "radeon_cs_gem.h" -#include "radeon_buffer.h" +#include "radeon_drm_buffer.h" +#include "radeon_drm_cs.h" #include "util/u_hash_table.h" -#include "util/u_inlines.h" #include "util/u_memory.h" #include "util/u_simple_list.h" -#include "pipebuffer/pb_buffer.h" #include "pipebuffer/pb_bufmgr.h" #include "os/os_thread.h" -#include "radeon_winsys.h" +#include "state_tracker/drm_driver.h" + +#include <radeon_drm.h> +#include <radeon_bo_gem.h> +#include <sys/ioctl.h> struct radeon_drm_bufmgr; @@ -45,7 +43,7 @@ struct radeon_drm_bufmgr { struct pb_manager base; /* Winsys. */ - struct radeon_libdrm_winsys *rws; + struct radeon_drm_winsys *rws; /* List of mapped buffers and its mutex. */ struct radeon_drm_buffer buffer_map_list; @@ -115,7 +113,7 @@ radeon_drm_buffer_map_internal(struct pb_buffer *_buf, unsigned flags, void *flush_ctx) { struct radeon_drm_buffer *buf = radeon_drm_buffer(_buf); - struct radeon_libdrm_cs *cs = flush_ctx; + struct radeon_drm_cs *cs = flush_ctx; int write = 0; /* Note how we use radeon_bo_is_referenced_by_cs here. There are @@ -157,7 +155,7 @@ radeon_drm_buffer_map_internal(struct pb_buffer *_buf, * we cannot flush. */ assert(cs || !radeon_bo_is_referenced_by_cs(buf->bo, NULL)); - if (cs && radeon_bo_is_referenced_by_cs(buf->bo, cs->cs)) { + if (cs && radeon_bo_is_referenced_by_cs(buf->bo, NULL)) { cs->flush_cs(cs->flush_data); } @@ -225,7 +223,7 @@ radeon_drm_bufmgr_create_buffer_from_handle_unsafe(struct pb_manager *_mgr, uint32_t handle) { struct radeon_drm_bufmgr *mgr = radeon_drm_bufmgr(_mgr); - struct radeon_libdrm_winsys *rws = mgr->rws; + struct radeon_drm_winsys *rws = mgr->rws; struct radeon_drm_buffer *buf; struct radeon_bo *bo; @@ -284,7 +282,7 @@ radeon_drm_bufmgr_create_buffer(struct pb_manager *_mgr, const struct pb_desc *desc) { struct radeon_drm_bufmgr *mgr = radeon_drm_bufmgr(_mgr); - struct radeon_libdrm_winsys *rws = mgr->rws; + struct radeon_drm_winsys *rws = mgr->rws; struct radeon_drm_buffer *buf; uint32_t domain; @@ -345,7 +343,7 @@ static int handle_compare(void *key1, void *key2) } struct pb_manager * -radeon_drm_bufmgr_create(struct radeon_libdrm_winsys *rws) +radeon_drm_bufmgr_create(struct radeon_drm_winsys *rws) { struct radeon_drm_bufmgr *mgr; @@ -383,18 +381,18 @@ static struct radeon_drm_buffer *get_drm_buffer(struct pb_buffer *_buf) return buf; } -void *radeon_drm_buffer_map(struct r300_winsys_screen *ws, - struct r300_winsys_buffer *buf, - struct r300_winsys_cs *cs, - enum pipe_transfer_usage usage) +static void *radeon_drm_buffer_map(struct r300_winsys_screen *ws, + struct r300_winsys_buffer *buf, + struct r300_winsys_cs *cs, + enum pipe_transfer_usage usage) { struct pb_buffer *_buf = radeon_pb_buffer(buf); - return pb_map(_buf, get_pb_usage_from_transfer_flags(usage), radeon_libdrm_cs(cs)); + return pb_map(_buf, get_pb_usage_from_transfer_flags(usage), radeon_drm_cs(cs)); } -void radeon_drm_buffer_unmap(struct r300_winsys_screen *ws, - struct r300_winsys_buffer *buf) +static void radeon_drm_buffer_unmap(struct r300_winsys_screen *ws, + struct r300_winsys_buffer *buf) { struct pb_buffer *_buf = radeon_pb_buffer(buf); @@ -425,10 +423,10 @@ boolean radeon_drm_bufmgr_get_handle(struct pb_buffer *_buf, return TRUE; } -void radeon_drm_bufmgr_get_tiling(struct r300_winsys_screen *ws, - struct r300_winsys_buffer *_buf, - enum r300_buffer_tiling *microtiled, - enum r300_buffer_tiling *macrotiled) +static void radeon_drm_buffer_get_tiling(struct r300_winsys_screen *ws, + struct r300_winsys_buffer *_buf, + enum r300_buffer_tiling *microtiled, + enum r300_buffer_tiling *macrotiled) { struct radeon_drm_buffer *buf = get_drm_buffer(radeon_pb_buffer(_buf)); uint32_t flags = 0, pitch; @@ -444,11 +442,11 @@ void radeon_drm_bufmgr_get_tiling(struct r300_winsys_screen *ws, *macrotiled = R300_BUFFER_TILED; } -void radeon_drm_bufmgr_set_tiling(struct r300_winsys_screen *ws, - struct r300_winsys_buffer *_buf, - enum r300_buffer_tiling microtiled, - enum r300_buffer_tiling macrotiled, - uint32_t pitch) +static void radeon_drm_buffer_set_tiling(struct r300_winsys_screen *ws, + struct r300_winsys_buffer *_buf, + enum r300_buffer_tiling microtiled, + enum r300_buffer_tiling macrotiled, + uint32_t pitch) { struct radeon_drm_buffer *buf = get_drm_buffer(radeon_pb_buffer(_buf)); uint32_t flags = 0; @@ -465,66 +463,30 @@ void radeon_drm_bufmgr_set_tiling(struct r300_winsys_screen *ws, radeon_bo_set_tiling(buf->bo, flags, pitch); } -static uint32_t get_gem_domain(enum r300_buffer_domain domain) -{ - uint32_t res = 0; - - if (domain & R300_DOMAIN_GTT) - res |= RADEON_GEM_DOMAIN_GTT; - if (domain & R300_DOMAIN_VRAM) - res |= RADEON_GEM_DOMAIN_VRAM; - return res; -} - -void radeon_drm_bufmgr_add_buffer(struct r300_winsys_cs *rcs, - struct r300_winsys_buffer *_buf, - enum r300_buffer_domain rd, - enum r300_buffer_domain wd) -{ - struct radeon_libdrm_cs *cs = radeon_libdrm_cs(rcs); - struct radeon_drm_buffer *buf = get_drm_buffer(radeon_pb_buffer(_buf)); - uint32_t gem_rd = get_gem_domain(rd); - uint32_t gem_wd = get_gem_domain(wd); - - radeon_cs_space_add_persistent_bo(cs->cs, buf->bo, gem_rd, gem_wd); -} - -void radeon_drm_bufmgr_write_reloc(struct r300_winsys_cs *rcs, - struct r300_winsys_buffer *_buf, - enum r300_buffer_domain rd, - enum r300_buffer_domain wd) +static struct r300_winsys_cs_buffer *radeon_drm_get_cs_handle( + struct r300_winsys_screen *rws, + struct r300_winsys_buffer *_buf) { - struct radeon_libdrm_cs *cs = radeon_libdrm_cs(rcs); - struct radeon_drm_buffer *buf = get_drm_buffer(radeon_pb_buffer(_buf)); - int retval; - uint32_t gem_rd = get_gem_domain(rd); - uint32_t gem_wd = get_gem_domain(wd); - - cs->cs->cdw = cs->base.cdw; - retval = radeon_cs_write_reloc(cs->cs, buf->bo, gem_rd, gem_wd, 0); - cs->base.cdw = cs->cs->cdw; - if (retval) { - fprintf(stderr, "radeon: Relocation of %p (%d, %d, %d) failed!\n", - buf, gem_rd, gem_wd, 0); - } + /* return pure radeon_bo. */ + return (struct r300_winsys_cs_buffer*) + get_drm_buffer(radeon_pb_buffer(_buf))->bo; } -boolean radeon_drm_bufmgr_is_buffer_referenced(struct r300_winsys_cs *rcs, - struct r300_winsys_buffer *_buf, +static boolean radeon_drm_is_buffer_referenced(struct r300_winsys_cs *rcs, + struct r300_winsys_cs_buffer *_buf, enum r300_reference_domain domain) { - struct radeon_libdrm_cs *cs = radeon_libdrm_cs(rcs); - struct radeon_drm_buffer *buf = get_drm_buffer(radeon_pb_buffer(_buf)); + struct radeon_bo *bo = (struct radeon_bo*)_buf; uint32_t tmp; if (domain & R300_REF_CS) { - if (radeon_bo_is_referenced_by_cs(buf->bo, cs->cs)) { + if (radeon_bo_is_referenced_by_cs(bo, NULL)) { return TRUE; } } if (domain & R300_REF_HW) { - if (radeon_bo_is_busy(buf->bo, &tmp)) { + if (radeon_bo_is_busy(bo, &tmp)) { return TRUE; } } @@ -550,10 +512,21 @@ void radeon_drm_bufmgr_flush_maps(struct pb_manager *_mgr) pipe_mutex_unlock(mgr->buffer_map_list_mutex); } -void radeon_drm_bufmgr_wait(struct r300_winsys_screen *ws, - struct r300_winsys_buffer *_buf) +static void radeon_drm_buffer_wait(struct r300_winsys_screen *ws, + struct r300_winsys_buffer *_buf) { struct radeon_drm_buffer *buf = get_drm_buffer(radeon_pb_buffer(_buf)); radeon_bo_wait(buf->bo); } + +void radeon_drm_bufmgr_init_functions(struct radeon_drm_winsys *ws) +{ + ws->base.buffer_get_cs_handle = radeon_drm_get_cs_handle; + ws->base.buffer_set_tiling = radeon_drm_buffer_set_tiling; + ws->base.buffer_get_tiling = radeon_drm_buffer_get_tiling; + ws->base.buffer_map = radeon_drm_buffer_map; + ws->base.buffer_unmap = radeon_drm_buffer_unmap; + ws->base.buffer_wait = radeon_drm_buffer_wait; + ws->base.cs_is_buffer_referenced = radeon_drm_is_buffer_referenced; +} diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_buffer.h b/src/gallium/winsys/radeon/drm/radeon_drm_buffer.h new file mode 100644 index 00000000000..494abdc0b48 --- /dev/null +++ b/src/gallium/winsys/radeon/drm/radeon_drm_buffer.h @@ -0,0 +1,53 @@ +/* + * Copyright © 2008 Jérôme Glisse + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS + * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + */ +/* + * Authors: + * Jérôme Glisse <[email protected]> + */ +#ifndef RADEON_DRM_BUFFER_H +#define RADEON_DRM_BUFFER_H + +#include "radeon_winsys.h" + +#define RADEON_PB_USAGE_VERTEX (1 << 28) +#define RADEON_PB_USAGE_DOMAIN_GTT (1 << 29) +#define RADEON_PB_USAGE_DOMAIN_VRAM (1 << 30) + +static INLINE struct pb_buffer * +radeon_pb_buffer(struct r300_winsys_buffer *buffer) +{ + return (struct pb_buffer *)buffer; +} + +struct pb_manager *radeon_drm_bufmgr_create(struct radeon_drm_winsys *rws); +struct pb_buffer *radeon_drm_bufmgr_create_buffer_from_handle(struct pb_manager *_mgr, + uint32_t handle); +void radeon_drm_bufmgr_flush_maps(struct pb_manager *_mgr); +boolean radeon_drm_bufmgr_get_handle(struct pb_buffer *_buf, + struct winsys_handle *whandle); +void radeon_drm_bufmgr_init_functions(struct radeon_drm_winsys *ws); + +#endif diff --git a/src/gallium/winsys/radeon/drm/radeon_drm.c b/src/gallium/winsys/radeon/drm/radeon_drm_common.c index 86d4f949697..fe71f080592 100644 --- a/src/gallium/winsys/radeon/drm/radeon_drm.c +++ b/src/gallium/winsys/radeon/drm/radeon_drm_common.c @@ -29,30 +29,20 @@ * Joakim Sindholt <[email protected]> */ -#include "radeon_drm.h" -#include "radeon_r300.h" -#include "radeon_buffer.h" +#include "radeon_winsys.h" +#include "radeon_drm_buffer.h" +#include "radeon_drm_cs.h" #include "radeon_drm_public.h" -#include "r300_winsys.h" - +#include "pipebuffer/pb_bufmgr.h" #include "util/u_memory.h" -#include "xf86drm.h" - -static struct radeon_libdrm_winsys * -radeon_winsys_create(int fd) -{ - struct radeon_libdrm_winsys *rws; +#include <radeon_drm.h> +#include <radeon_bo_gem.h> +#include <radeon_cs_gem.h> +#include <xf86drm.h> +#include <stdio.h> - rws = CALLOC_STRUCT(radeon_libdrm_winsys); - if (rws == NULL) { - return NULL; - } - - rws->fd = fd; - return rws; -} /* Enable/disable Hyper-Z access. Return TRUE on success. */ static boolean radeon_set_hyperz_access(int fd, boolean enable) @@ -80,7 +70,7 @@ static boolean radeon_set_hyperz_access(int fd, boolean enable) } /* Helper function to do the ioctls needed for setup and init. */ -static void do_ioctls(int fd, struct radeon_libdrm_winsys* winsys) +static void do_ioctls(struct radeon_drm_winsys *winsys) { struct drm_radeon_gem_info gem_info = {0}; struct drm_radeon_info info = {0}; @@ -107,7 +97,7 @@ static void do_ioctls(int fd, struct radeon_libdrm_winsys* winsys) * (see radeon_gem_info_ioctl in radeon_gem.c) but that's alright because * we don't actually use the info for anything yet. */ - version = drmGetVersion(fd); + version = drmGetVersion(winsys->fd); if (version->version_major != 2) { fprintf(stderr, "%s: DRM version is %d.%d.%d but this driver is " "only compatible with 2.x.x\n", __FUNCTION__, @@ -131,8 +121,12 @@ static void do_ioctls(int fd, struct radeon_libdrm_winsys* winsys) (version->version_major == 2 && version->version_minor >= 6); + winsys->drm_2_8_0 = version->version_major > 2 || + (version->version_major == 2 && + version->version_minor >= 8); + info.request = RADEON_INFO_DEVICE_ID; - retval = drmCommandWriteRead(fd, DRM_RADEON_INFO, &info, sizeof(info)); + retval = drmCommandWriteRead(winsys->fd, DRM_RADEON_INFO, &info, sizeof(info)); if (retval) { fprintf(stderr, "%s: Failed to get PCI ID, " "error number %d\n", __FUNCTION__, retval); @@ -141,7 +135,7 @@ static void do_ioctls(int fd, struct radeon_libdrm_winsys* winsys) winsys->pci_id = target; info.request = RADEON_INFO_NUM_GB_PIPES; - retval = drmCommandWriteRead(fd, DRM_RADEON_INFO, &info, sizeof(info)); + retval = drmCommandWriteRead(winsys->fd, DRM_RADEON_INFO, &info, sizeof(info)); if (retval) { fprintf(stderr, "%s: Failed to get GB pipe count, " "error number %d\n", __FUNCTION__, retval); @@ -150,7 +144,7 @@ static void do_ioctls(int fd, struct radeon_libdrm_winsys* winsys) winsys->gb_pipes = target; info.request = RADEON_INFO_NUM_Z_PIPES; - retval = drmCommandWriteRead(fd, DRM_RADEON_INFO, &info, sizeof(info)); + retval = drmCommandWriteRead(winsys->fd, DRM_RADEON_INFO, &info, sizeof(info)); if (retval) { fprintf(stderr, "%s: Failed to get Z pipe count, " "error number %d\n", __FUNCTION__, retval); @@ -158,9 +152,9 @@ static void do_ioctls(int fd, struct radeon_libdrm_winsys* winsys) } winsys->z_pipes = target; - winsys->hyperz = radeon_set_hyperz_access(fd, TRUE); + winsys->hyperz = radeon_set_hyperz_access(winsys->fd, TRUE); - retval = drmCommandWriteRead(fd, DRM_RADEON_GEM_INFO, + retval = drmCommandWriteRead(winsys->fd, DRM_RADEON_GEM_INFO, &gem_info, sizeof(gem_info)); if (retval) { fprintf(stderr, "%s: Failed to get MM info, error number %d\n", @@ -184,29 +178,60 @@ static void do_ioctls(int fd, struct radeon_libdrm_winsys* winsys) drmFreeVersion(version); } -/* Create a pipe_screen. */ -struct r300_winsys_screen* r300_drm_winsys_screen_create(int drmFB) +static void radeon_winsys_destroy(struct r300_winsys_screen *rws) +{ + struct radeon_drm_winsys *ws = (struct radeon_drm_winsys*)rws; + + ws->cman->destroy(ws->cman); + ws->kman->destroy(ws->kman); + + radeon_bo_manager_gem_dtor(ws->bom); + FREE(rws); +} + +struct r300_winsys_screen *r300_drm_winsys_screen_create(int fd) { - struct radeon_libdrm_winsys* rws; - boolean ret; - - rws = radeon_winsys_create(drmFB); - if (!rws) - return NULL; - - do_ioctls(drmFB, rws); - - /* The state tracker can organize a softpipe fallback if no hw - * driver is found. - */ - if (is_r3xx(rws->pci_id)) { - ret = radeon_setup_winsys(drmFB, rws); - if (ret == FALSE) - goto fail; - return &rws->base; + struct radeon_drm_winsys *ws = CALLOC_STRUCT(radeon_drm_winsys); + if (!ws) { + return NULL; } + ws->fd = fd; + do_ioctls(ws); + + if (!is_r3xx(ws->pci_id)) { + goto fail; + } + + /* Create managers. */ + ws->bom = radeon_bo_manager_gem_ctor(fd); + if (!ws->bom) + goto fail; + ws->kman = radeon_drm_bufmgr_create(ws); + if (!ws->kman) + goto fail; + ws->cman = pb_cache_manager_create(ws->kman, 1000000); + if (!ws->cman) + goto fail; + + /* Set functions. */ + ws->base.destroy = radeon_winsys_destroy; + + radeon_drm_bufmgr_init_functions(ws); + radeon_drm_cs_init_functions(ws); + radeon_winsys_init_functions(ws); + + return &ws->base; + fail: - FREE(rws); + if (ws->bom) + radeon_bo_manager_gem_dtor(ws->bom); + + if (ws->cman) + ws->cman->destroy(ws->cman); + if (ws->kman) + ws->kman->destroy(ws->kman); + + FREE(ws); return NULL; } diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_cs.c b/src/gallium/winsys/radeon/drm/radeon_drm_cs.c new file mode 100644 index 00000000000..60bc36b0929 --- /dev/null +++ b/src/gallium/winsys/radeon/drm/radeon_drm_cs.c @@ -0,0 +1,360 @@ +/* + * Copyright © 2008 Jérôme Glisse + * Copyright © 2010 Marek Olšák <[email protected]> + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS + * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + */ +/* + * Authors: + * Marek Olšák <[email protected]> + * + * Based on work from libdrm_radeon by: + * Aapo Tahkola <[email protected]> + * Nicolai Haehnle <[email protected]> + * Jérôme Glisse <[email protected]> + */ + +/* + This file replaces libdrm's radeon_cs_gem with our own implemention. + It's optimized specifically for r300g, but r600g could use it as well. + Reloc writes and space checking are faster and simpler than their + counterparts in libdrm (the time complexity of all the functions + is O(1) in nearly all scenarios, thanks to hashing). + + It works like this: + + cs_add_reloc(cs, buf, read_domain, write_domain) adds a new relocation and + also adds the size of 'buf' to the used_gart and used_vram winsys variables + based on the domains, which are simply or'd for the accounting purposes. + The adding is skipped if the reloc is already present in the list, but it + accounts any newly-referenced domains. + + cs_validate is then called, which just checks: + used_vram/gart < vram/gart_size * 0.8 + The 0.8 number allows for some memory fragmentation. If the validation + fails, the pipe driver flushes CS and tries do the validation again, + i.e. it validates only that one operation. If it fails again, it drops + the operation on the floor and prints some nasty message to stderr. + (done in the pipe driver) + + cs_write_reloc(cs, buf) just writes a reloc that has been added using + cs_add_reloc. The read_domain and write_domain parameters have been removed, + because we already specify them in cs_add_reloc. +*/ + +#include "radeon_drm_cs.h" +#include "radeon_drm_buffer.h" + +#include "util/u_memory.h" + +#include <stdlib.h> +#include <stdint.h> +#include <radeon_bo.h> +#include <xf86drm.h> + +#define RELOC_DWORDS (sizeof(struct drm_radeon_cs_reloc) / sizeof(uint32_t)) + +static struct r300_winsys_cs *radeon_drm_cs_create(struct r300_winsys_screen *rws) +{ + struct radeon_drm_winsys *ws = radeon_drm_winsys(rws); + struct radeon_drm_cs *cs; + + cs = CALLOC_STRUCT(radeon_drm_cs); + if (!cs) { + return NULL; + } + + cs->ws = ws; + cs->nrelocs = 256; + cs->relocs_bo = (struct radeon_bo**) + CALLOC(1, cs->nrelocs * sizeof(struct radeon_bo*)); + if (!cs->relocs_bo) { + FREE(cs); + return NULL; + } + + cs->relocs = (struct drm_radeon_cs_reloc*) + CALLOC(1, cs->nrelocs * sizeof(struct drm_radeon_cs_reloc)); + if (!cs->relocs) { + FREE(cs->relocs_bo); + FREE(cs); + return NULL; + } + + cs->chunks[0].chunk_id = RADEON_CHUNK_ID_IB; + cs->chunks[0].length_dw = 0; + cs->chunks[0].chunk_data = (uint64_t)(uintptr_t)cs->base.buf; + cs->chunks[1].chunk_id = RADEON_CHUNK_ID_RELOCS; + cs->chunks[1].length_dw = 0; + cs->chunks[1].chunk_data = (uint64_t)(uintptr_t)cs->relocs; + return &cs->base; +} + +#define OUT_CS(cs, value) (cs)->buf[(cs)->cdw++] = (value) + +static inline void update_domains(struct drm_radeon_cs_reloc *reloc, + enum r300_buffer_domain rd, + enum r300_buffer_domain wd, + enum r300_buffer_domain *added_domains) +{ + *added_domains = (rd | wd) & ~(reloc->read_domains | reloc->write_domain); + + if (reloc->read_domains & wd) { + reloc->read_domains = rd; + reloc->write_domain = wd; + } else if (rd & reloc->write_domain) { + reloc->read_domains = rd; + reloc->write_domain |= wd; + } else { + reloc->read_domains |= rd; + reloc->write_domain |= wd; + } +} + +static int radeon_get_reloc(struct radeon_drm_cs *cs, + struct radeon_bo *bo) +{ + struct drm_radeon_cs_reloc *reloc; + unsigned i; + unsigned hash = bo->handle & (sizeof(cs->is_handle_added)-1); + + if (cs->is_handle_added[hash]) { + reloc = cs->relocs_hashlist[hash]; + if (reloc->handle == bo->handle) { + return cs->reloc_indices_hashlist[hash]; + } + + /* Hash collision, look for the BO in the list of relocs linearly. */ + for (i = cs->crelocs; i != 0;) { + --i; + reloc = &cs->relocs[i]; + if (reloc->handle == bo->handle) { + /* Put this reloc in the hash list. + * This will prevent additional hash collisions if there are + * several subsequent get_reloc calls of the same buffer. + * + * Example: Assuming buffers A,B,C collide in the hash list, + * the following sequence of relocs: + * AAAAAAAAAAABBBBBBBBBBBBBBCCCCCCCC + * will collide here: ^ and here: ^, + * meaning that we should get very few collisions in the end. */ + cs->relocs_hashlist[hash] = reloc; + cs->reloc_indices_hashlist[hash] = i; + /*printf("write_reloc collision, hash: %i, handle: %i\n", hash, bo->handle);*/ + return i; + } + } + } + + return -1; +} + +static void radeon_add_reloc(struct radeon_drm_cs *cs, + struct radeon_bo *bo, + enum r300_buffer_domain rd, + enum r300_buffer_domain wd, + enum r300_buffer_domain *added_domains) +{ + struct drm_radeon_cs_reloc *reloc; + unsigned i; + unsigned hash = bo->handle & (sizeof(cs->is_handle_added)-1); + + if (cs->is_handle_added[hash]) { + reloc = cs->relocs_hashlist[hash]; + if (reloc->handle == bo->handle) { + update_domains(reloc, rd, wd, added_domains); + return; + } + + /* Hash collision, look for the BO in the list of relocs linearly. */ + for (i = cs->crelocs; i != 0;) { + --i; + reloc = &cs->relocs[i]; + if (reloc->handle == bo->handle) { + update_domains(reloc, rd, wd, added_domains); + + cs->relocs_hashlist[hash] = reloc; + cs->reloc_indices_hashlist[hash] = i; + /*printf("write_reloc collision, hash: %i, handle: %i\n", hash, bo->handle);*/ + return; + } + } + } + + /* New relocation, check if the backing array is large enough. */ + if (cs->crelocs >= cs->nrelocs) { + uint32_t size; + cs->nrelocs += 10; + + size = cs->nrelocs * sizeof(struct radeon_bo*); + cs->relocs_bo = (struct radeon_bo**)realloc(cs->relocs_bo, size); + + size = cs->nrelocs * sizeof(struct drm_radeon_cs_reloc); + cs->relocs = (struct drm_radeon_cs_reloc*)realloc(cs->relocs, size); + + cs->chunks[1].chunk_data = (uint64_t)(uintptr_t)cs->relocs; + } + + /* Initialize the new relocation. */ + radeon_bo_ref(bo); + cs->relocs_bo[cs->crelocs] = bo; + reloc = &cs->relocs[cs->crelocs]; + reloc->handle = bo->handle; + reloc->read_domains = rd; + reloc->write_domain = wd; + reloc->flags = 0; + + cs->is_handle_added[hash] = TRUE; + cs->relocs_hashlist[hash] = reloc; + cs->reloc_indices_hashlist[hash] = cs->crelocs; + + cs->chunks[1].length_dw += RELOC_DWORDS; + cs->crelocs++; + + *added_domains = rd | wd; +} + +static void radeon_drm_cs_add_reloc(struct r300_winsys_cs *rcs, + struct r300_winsys_cs_buffer *buf, + enum r300_buffer_domain rd, + enum r300_buffer_domain wd) +{ + struct radeon_drm_cs *cs = radeon_drm_cs(rcs); + struct radeon_bo *bo = (struct radeon_bo*)buf; + enum r300_buffer_domain added_domains; + + radeon_add_reloc(cs, bo, rd, wd, &added_domains); + + if (!added_domains) + return; + + if (added_domains & R300_DOMAIN_GTT) + cs->used_gart += bo->size; + if (added_domains & R300_DOMAIN_VRAM) + cs->used_vram += bo->size; +} + +static boolean radeon_drm_cs_validate(struct r300_winsys_cs *rcs) +{ + struct radeon_drm_cs *cs = radeon_drm_cs(rcs); + + return cs->used_gart < cs->ws->gart_size * 0.8 && + cs->used_vram < cs->ws->vram_size * 0.8; +} + +static void radeon_drm_cs_write_reloc(struct r300_winsys_cs *rcs, + struct r300_winsys_cs_buffer *buf) +{ + struct radeon_drm_cs *cs = radeon_drm_cs(rcs); + struct radeon_bo *bo = (struct radeon_bo*)buf; + + unsigned index = radeon_get_reloc(cs, bo); + + if (index == -1) { + fprintf(stderr, "r300: Cannot get a relocation in %s.\n", __func__); + return; + } + + OUT_CS(&cs->base, 0xc0001000); + OUT_CS(&cs->base, index * RELOC_DWORDS); +} + +static void radeon_drm_cs_emit(struct r300_winsys_cs *rcs) +{ + struct radeon_drm_cs *cs = radeon_drm_cs(rcs); + uint64_t chunk_array[2]; + unsigned i; + int r; + + if (cs->base.cdw) { + /* Unmap buffers. */ + radeon_drm_bufmgr_flush_maps(cs->ws->kman); + + /* Prepare the arguments. */ + cs->chunks[0].length_dw = cs->base.cdw; + + chunk_array[0] = (uint64_t)(uintptr_t)&cs->chunks[0]; + chunk_array[1] = (uint64_t)(uintptr_t)&cs->chunks[1]; + + cs->cs.num_chunks = 2; + cs->cs.chunks = (uint64_t)(uintptr_t)chunk_array; + + /* Emit. */ + r = drmCommandWriteRead(cs->ws->fd, DRM_RADEON_CS, + &cs->cs, sizeof(struct drm_radeon_cs)); + if (r) { + if (debug_get_bool_option("RADEON_DUMP_CS", FALSE)) { + fprintf(stderr, "radeon: The kernel rejected CS, dumping...\n"); + fprintf(stderr, "VENDORID:DEVICEID 0x%04X:0x%04X\n", 0x1002, + cs->ws->pci_id); + for (i = 0; i < cs->base.cdw; i++) { + fprintf(stderr, "0x%08X\n", cs->base.buf[i]); + } + } else { + fprintf(stderr, "radeon: The kernel rejected CS, " + "see dmesg for more information.\n"); + } + } + } + + /* Unreference buffers, cleanup. */ + for (i = 0; i < cs->crelocs; i++) { + radeon_bo_unref((struct radeon_bo*)cs->relocs_bo[i]); + cs->relocs_bo[i] = NULL; + } + + cs->base.cdw = 0; + cs->crelocs = 0; + cs->chunks[0].length_dw = 0; + cs->chunks[1].length_dw = 0; + cs->used_gart = 0; + cs->used_vram = 0; + memset(cs->is_handle_added, 0, sizeof(cs->is_handle_added)); +} + +static void radeon_drm_cs_destroy(struct r300_winsys_cs *rcs) +{ + struct radeon_drm_cs *cs = radeon_drm_cs(rcs); + FREE(cs->relocs_bo); + FREE(cs->relocs); + FREE(cs); +} + +static void radeon_drm_cs_set_flush(struct r300_winsys_cs *rcs, + void (*flush)(void *), void *user) +{ + struct radeon_drm_cs *cs = radeon_drm_cs(rcs); + cs->flush_cs = flush; + cs->flush_data = user; +} + +void radeon_drm_cs_init_functions(struct radeon_drm_winsys *ws) +{ + ws->base.cs_create = radeon_drm_cs_create; + ws->base.cs_destroy = radeon_drm_cs_destroy; + ws->base.cs_add_reloc = radeon_drm_cs_add_reloc; + ws->base.cs_validate = radeon_drm_cs_validate; + ws->base.cs_write_reloc = radeon_drm_cs_write_reloc; + ws->base.cs_flush = radeon_drm_cs_emit; + ws->base.cs_set_flush = radeon_drm_cs_set_flush; +} diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_cs.h b/src/gallium/winsys/radeon/drm/radeon_drm_cs.h new file mode 100644 index 00000000000..76046534b65 --- /dev/null +++ b/src/gallium/winsys/radeon/drm/radeon_drm_cs.h @@ -0,0 +1,42 @@ +#ifndef RADEON_DRM_CS_H +#define RADEON_DRM_CS_H + +#include "radeon_winsys.h" +#include <radeon_drm.h> + +struct radeon_drm_cs { + struct r300_winsys_cs base; + + /* The winsys. */ + struct radeon_drm_winsys *ws; + + /* Flush CS. */ + void (*flush_cs)(void *); + void *flush_data; + + /* Relocs. */ + unsigned crelocs; + unsigned nrelocs; + struct drm_radeon_cs_reloc *relocs; + struct radeon_bo **relocs_bo; + struct drm_radeon_cs cs; + struct drm_radeon_cs_chunk chunks[2]; + + unsigned used_vram; + unsigned used_gart; + + /* 0 = BO not added, 1 = BO added */ + char is_handle_added[256]; + struct drm_radeon_cs_reloc *relocs_hashlist[256]; + unsigned reloc_indices_hashlist[256]; +}; + +static INLINE struct radeon_drm_cs * +radeon_drm_cs(struct r300_winsys_cs *base) +{ + return (struct radeon_drm_cs*)base; +} + +void radeon_drm_cs_init_functions(struct radeon_drm_winsys *ws); + +#endif diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_public.h b/src/gallium/winsys/radeon/drm/radeon_drm_public.h index 0d96ae8c470..3a208cdd4c4 100644 --- a/src/gallium/winsys/radeon/drm/radeon_drm_public.h +++ b/src/gallium/winsys/radeon/drm/radeon_drm_public.h @@ -1,9 +1,222 @@ - #ifndef RADEON_DRM_PUBLIC_H #define RADEON_DRM_PUBLIC_H +#include "pipe/p_defines.h" + struct r300_winsys_screen; -struct r300_winsys_screen *r300_drm_winsys_screen_create(int drmFD); +struct r300_winsys_screen *r300_drm_winsys_screen_create(int fd); + +static INLINE boolean is_r3xx(int pciid) +{ + switch (pciid) { + case 0x4144: /* PCI_CHIP_R300_AD */ + case 0x4145: /* PCI_CHIP_R300_AE */ + case 0x4146: /* PCI_CHIP_R300_AF */ + case 0x4147: /* PCI_CHIP_R300_AG */ + case 0x4E44: /* PCI_CHIP_R300_ND */ + case 0x4E45: /* PCI_CHIP_R300_NE */ + case 0x4E46: /* PCI_CHIP_R300_NF */ + case 0x4E47: /* PCI_CHIP_R300_NG */ + case 0x4E48: /* PCI_CHIP_R350_NH */ + case 0x4E49: /* PCI_CHIP_R350_NI */ + case 0x4E4B: /* PCI_CHIP_R350_NK */ + case 0x4148: /* PCI_CHIP_R350_AH */ + case 0x4149: /* PCI_CHIP_R350_AI */ + case 0x414A: /* PCI_CHIP_R350_AJ */ + case 0x414B: /* PCI_CHIP_R350_AK */ + case 0x4E4A: /* PCI_CHIP_R360_NJ */ + case 0x4150: /* PCI_CHIP_RV350_AP */ + case 0x4151: /* PCI_CHIP_RV350_AQ */ + case 0x4152: /* PCI_CHIP_RV350_AR */ + case 0x4153: /* PCI_CHIP_RV350_AS */ + case 0x4154: /* PCI_CHIP_RV350_AT */ + case 0x4155: /* PCI_CHIP_RV350_AU */ + case 0x4156: /* PCI_CHIP_RV350_AV */ + case 0x4E50: /* PCI_CHIP_RV350_NP */ + case 0x4E51: /* PCI_CHIP_RV350_NQ */ + case 0x4E52: /* PCI_CHIP_RV350_NR */ + case 0x4E53: /* PCI_CHIP_RV350_NS */ + case 0x4E54: /* PCI_CHIP_RV350_NT */ + case 0x4E56: /* PCI_CHIP_RV350_NV */ + case 0x5460: /* PCI_CHIP_RV370_5460 */ + case 0x5462: /* PCI_CHIP_RV370_5462 */ + case 0x5464: /* PCI_CHIP_RV370_5464 */ + case 0x5B60: /* PCI_CHIP_RV370_5B60 */ + case 0x5B62: /* PCI_CHIP_RV370_5B62 */ + case 0x5B63: /* PCI_CHIP_RV370_5B63 */ + case 0x5B64: /* PCI_CHIP_RV370_5B64 */ + case 0x5B65: /* PCI_CHIP_RV370_5B65 */ + case 0x3150: /* PCI_CHIP_RV380_3150 */ + case 0x3152: /* PCI_CHIP_RV380_3152 */ + case 0x3154: /* PCI_CHIP_RV380_3154 */ + case 0x3155: /* PCI_CHIP_RV380_3155 */ + case 0x3E50: /* PCI_CHIP_RV380_3E50 */ + case 0x3E54: /* PCI_CHIP_RV380_3E54 */ + case 0x4A48: /* PCI_CHIP_R420_JH */ + case 0x4A49: /* PCI_CHIP_R420_JI */ + case 0x4A4A: /* PCI_CHIP_R420_JJ */ + case 0x4A4B: /* PCI_CHIP_R420_JK */ + case 0x4A4C: /* PCI_CHIP_R420_JL */ + case 0x4A4D: /* PCI_CHIP_R420_JM */ + case 0x4A4E: /* PCI_CHIP_R420_JN */ + case 0x4A4F: /* PCI_CHIP_R420_JO */ + case 0x4A50: /* PCI_CHIP_R420_JP */ + case 0x4A54: /* PCI_CHIP_R420_JT */ + case 0x5548: /* PCI_CHIP_R423_UH */ + case 0x5549: /* PCI_CHIP_R423_UI */ + case 0x554A: /* PCI_CHIP_R423_UJ */ + case 0x554B: /* PCI_CHIP_R423_UK */ + case 0x5550: /* PCI_CHIP_R423_5550 */ + case 0x5551: /* PCI_CHIP_R423_UQ */ + case 0x5552: /* PCI_CHIP_R423_UR */ + case 0x5554: /* PCI_CHIP_R423_UT */ + case 0x5D57: /* PCI_CHIP_R423_5D57 */ + case 0x554C: /* PCI_CHIP_R430_554C */ + case 0x554D: /* PCI_CHIP_R430_554D */ + case 0x554E: /* PCI_CHIP_R430_554E */ + case 0x554F: /* PCI_CHIP_R430_554F */ + case 0x5D48: /* PCI_CHIP_R430_5D48 */ + case 0x5D49: /* PCI_CHIP_R430_5D49 */ + case 0x5D4A: /* PCI_CHIP_R430_5D4A */ + case 0x5D4C: /* PCI_CHIP_R480_5D4C */ + case 0x5D4D: /* PCI_CHIP_R480_5D4D */ + case 0x5D4E: /* PCI_CHIP_R480_5D4E */ + case 0x5D4F: /* PCI_CHIP_R480_5D4F */ + case 0x5D50: /* PCI_CHIP_R480_5D50 */ + case 0x5D52: /* PCI_CHIP_R480_5D52 */ + case 0x4B49: /* PCI_CHIP_R481_4B49 */ + case 0x4B4A: /* PCI_CHIP_R481_4B4A */ + case 0x4B4B: /* PCI_CHIP_R481_4B4B */ + case 0x4B4C: /* PCI_CHIP_R481_4B4C */ + case 0x564A: /* PCI_CHIP_RV410_564A */ + case 0x564B: /* PCI_CHIP_RV410_564B */ + case 0x564F: /* PCI_CHIP_RV410_564F */ + case 0x5652: /* PCI_CHIP_RV410_5652 */ + case 0x5653: /* PCI_CHIP_RV410_5653 */ + case 0x5657: /* PCI_CHIP_RV410_5657 */ + case 0x5E48: /* PCI_CHIP_RV410_5E48 */ + case 0x5E4A: /* PCI_CHIP_RV410_5E4A */ + case 0x5E4B: /* PCI_CHIP_RV410_5E4B */ + case 0x5E4C: /* PCI_CHIP_RV410_5E4C */ + case 0x5E4D: /* PCI_CHIP_RV410_5E4D */ + case 0x5E4F: /* PCI_CHIP_RV410_5E4F */ + case 0x5A41: /* PCI_CHIP_RS400_5A41 */ + case 0x5A42: /* PCI_CHIP_RS400_5A42 */ + case 0x5A61: /* PCI_CHIP_RC410_5A61 */ + case 0x5A62: /* PCI_CHIP_RC410_5A62 */ + case 0x5954: /* PCI_CHIP_RS480_5954 */ + case 0x5955: /* PCI_CHIP_RS480_5955 */ + case 0x5974: /* PCI_CHIP_RS482_5974 */ + case 0x5975: /* PCI_CHIP_RS482_5975 */ + case 0x7100: /* PCI_CHIP_R520_7100 */ + case 0x7101: /* PCI_CHIP_R520_7101 */ + case 0x7102: /* PCI_CHIP_R520_7102 */ + case 0x7103: /* PCI_CHIP_R520_7103 */ + case 0x7104: /* PCI_CHIP_R520_7104 */ + case 0x7105: /* PCI_CHIP_R520_7105 */ + case 0x7106: /* PCI_CHIP_R520_7106 */ + case 0x7108: /* PCI_CHIP_R520_7108 */ + case 0x7109: /* PCI_CHIP_R520_7109 */ + case 0x710A: /* PCI_CHIP_R520_710A */ + case 0x710B: /* PCI_CHIP_R520_710B */ + case 0x710C: /* PCI_CHIP_R520_710C */ + case 0x710E: /* PCI_CHIP_R520_710E */ + case 0x710F: /* PCI_CHIP_R520_710F */ + case 0x7140: /* PCI_CHIP_RV515_7140 */ + case 0x7141: /* PCI_CHIP_RV515_7141 */ + case 0x7142: /* PCI_CHIP_RV515_7142 */ + case 0x7143: /* PCI_CHIP_RV515_7143 */ + case 0x7144: /* PCI_CHIP_RV515_7144 */ + case 0x7145: /* PCI_CHIP_RV515_7145 */ + case 0x7146: /* PCI_CHIP_RV515_7146 */ + case 0x7147: /* PCI_CHIP_RV515_7147 */ + case 0x7149: /* PCI_CHIP_RV515_7149 */ + case 0x714A: /* PCI_CHIP_RV515_714A */ + case 0x714B: /* PCI_CHIP_RV515_714B */ + case 0x714C: /* PCI_CHIP_RV515_714C */ + case 0x714D: /* PCI_CHIP_RV515_714D */ + case 0x714E: /* PCI_CHIP_RV515_714E */ + case 0x714F: /* PCI_CHIP_RV515_714F */ + case 0x7151: /* PCI_CHIP_RV515_7151 */ + case 0x7152: /* PCI_CHIP_RV515_7152 */ + case 0x7153: /* PCI_CHIP_RV515_7153 */ + case 0x715E: /* PCI_CHIP_RV515_715E */ + case 0x715F: /* PCI_CHIP_RV515_715F */ + case 0x7180: /* PCI_CHIP_RV515_7180 */ + case 0x7181: /* PCI_CHIP_RV515_7181 */ + case 0x7183: /* PCI_CHIP_RV515_7183 */ + case 0x7186: /* PCI_CHIP_RV515_7186 */ + case 0x7187: /* PCI_CHIP_RV515_7187 */ + case 0x7188: /* PCI_CHIP_RV515_7188 */ + case 0x718A: /* PCI_CHIP_RV515_718A */ + case 0x718B: /* PCI_CHIP_RV515_718B */ + case 0x718C: /* PCI_CHIP_RV515_718C */ + case 0x718D: /* PCI_CHIP_RV515_718D */ + case 0x718F: /* PCI_CHIP_RV515_718F */ + case 0x7193: /* PCI_CHIP_RV515_7193 */ + case 0x7196: /* PCI_CHIP_RV515_7196 */ + case 0x719B: /* PCI_CHIP_RV515_719B */ + case 0x719F: /* PCI_CHIP_RV515_719F */ + case 0x7200: /* PCI_CHIP_RV515_7200 */ + case 0x7210: /* PCI_CHIP_RV515_7210 */ + case 0x7211: /* PCI_CHIP_RV515_7211 */ + case 0x71C0: /* PCI_CHIP_RV530_71C0 */ + case 0x71C1: /* PCI_CHIP_RV530_71C1 */ + case 0x71C2: /* PCI_CHIP_RV530_71C2 */ + case 0x71C3: /* PCI_CHIP_RV530_71C3 */ + case 0x71C4: /* PCI_CHIP_RV530_71C4 */ + case 0x71C5: /* PCI_CHIP_RV530_71C5 */ + case 0x71C6: /* PCI_CHIP_RV530_71C6 */ + case 0x71C7: /* PCI_CHIP_RV530_71C7 */ + case 0x71CD: /* PCI_CHIP_RV530_71CD */ + case 0x71CE: /* PCI_CHIP_RV530_71CE */ + case 0x71D2: /* PCI_CHIP_RV530_71D2 */ + case 0x71D4: /* PCI_CHIP_RV530_71D4 */ + case 0x71D5: /* PCI_CHIP_RV530_71D5 */ + case 0x71D6: /* PCI_CHIP_RV530_71D6 */ + case 0x71DA: /* PCI_CHIP_RV530_71DA */ + case 0x71DE: /* PCI_CHIP_RV530_71DE */ + case 0x7281: /* PCI_CHIP_RV560_7281 */ + case 0x7283: /* PCI_CHIP_RV560_7283 */ + case 0x7287: /* PCI_CHIP_RV560_7287 */ + case 0x7290: /* PCI_CHIP_RV560_7290 */ + case 0x7291: /* PCI_CHIP_RV560_7291 */ + case 0x7293: /* PCI_CHIP_RV560_7293 */ + case 0x7297: /* PCI_CHIP_RV560_7297 */ + case 0x7280: /* PCI_CHIP_RV570_7280 */ + case 0x7288: /* PCI_CHIP_RV570_7288 */ + case 0x7289: /* PCI_CHIP_RV570_7289 */ + case 0x728B: /* PCI_CHIP_RV570_728B */ + case 0x728C: /* PCI_CHIP_RV570_728C */ + case 0x7240: /* PCI_CHIP_R580_7240 */ + case 0x7243: /* PCI_CHIP_R580_7243 */ + case 0x7244: /* PCI_CHIP_R580_7244 */ + case 0x7245: /* PCI_CHIP_R580_7245 */ + case 0x7246: /* PCI_CHIP_R580_7246 */ + case 0x7247: /* PCI_CHIP_R580_7247 */ + case 0x7248: /* PCI_CHIP_R580_7248 */ + case 0x7249: /* PCI_CHIP_R580_7249 */ + case 0x724A: /* PCI_CHIP_R580_724A */ + case 0x724B: /* PCI_CHIP_R580_724B */ + case 0x724C: /* PCI_CHIP_R580_724C */ + case 0x724D: /* PCI_CHIP_R580_724D */ + case 0x724E: /* PCI_CHIP_R580_724E */ + case 0x724F: /* PCI_CHIP_R580_724F */ + case 0x7284: /* PCI_CHIP_R580_7284 */ + case 0x793F: /* PCI_CHIP_RS600_793F */ + case 0x7941: /* PCI_CHIP_RS600_7941 */ + case 0x7942: /* PCI_CHIP_RS600_7942 */ + case 0x791E: /* PCI_CHIP_RS690_791E */ + case 0x791F: /* PCI_CHIP_RS690_791F */ + case 0x796C: /* PCI_CHIP_RS740_796C */ + case 0x796D: /* PCI_CHIP_RS740_796D */ + case 0x796E: /* PCI_CHIP_RS740_796E */ + case 0x796F: /* PCI_CHIP_RS740_796F */ + return TRUE; + default: + return FALSE; + } +} #endif diff --git a/src/gallium/winsys/radeon/drm/radeon_r300.c b/src/gallium/winsys/radeon/drm/radeon_r300.c index 420522f5c1f..bacf181b47c 100644 --- a/src/gallium/winsys/radeon/drm/radeon_r300.c +++ b/src/gallium/winsys/radeon/drm/radeon_r300.c @@ -20,14 +20,12 @@ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE * USE OR OTHER DEALINGS IN THE SOFTWARE. */ -#include "radeon_r300.h" -#include "radeon_buffer.h" - -#include "radeon_bo_gem.h" -#include "radeon_cs_gem.h" -#include "state_tracker/drm_driver.h" +#include "radeon_drm_buffer.h" #include "util/u_memory.h" +#include "pipebuffer/pb_bufmgr.h" + +#include "state_tracker/drm_driver.h" static unsigned get_pb_usage_from_create_flags(unsigned bind, unsigned usage, enum r300_buffer_domain domain) @@ -73,7 +71,7 @@ radeon_r300_winsys_buffer_create(struct r300_winsys_screen *rws, unsigned usage, enum r300_buffer_domain domain) { - struct radeon_libdrm_winsys *ws = radeon_libdrm_winsys(rws); + struct radeon_drm_winsys *ws = radeon_drm_winsys(rws); struct pb_desc desc; struct pb_manager *provider; struct pb_buffer *buffer; @@ -92,7 +90,7 @@ radeon_r300_winsys_buffer_create(struct r300_winsys_screen *rws, if (!buffer) return NULL; - return radeon_libdrm_winsys_buffer(buffer); + return (struct r300_winsys_buffer*)buffer; } static void radeon_r300_winsys_buffer_reference(struct r300_winsys_screen *rws, @@ -104,7 +102,7 @@ static void radeon_r300_winsys_buffer_reference(struct r300_winsys_screen *rws, pb_reference(&_dst, _src); - *pdst = radeon_libdrm_winsys_buffer(_dst); + *pdst = (struct r300_winsys_buffer*)_dst; } static struct r300_winsys_buffer *radeon_r300_winsys_buffer_from_handle(struct r300_winsys_screen *rws, @@ -112,7 +110,7 @@ static struct r300_winsys_buffer *radeon_r300_winsys_buffer_from_handle(struct r unsigned *stride, unsigned *size) { - struct radeon_libdrm_winsys *ws = radeon_libdrm_winsys(rws); + struct radeon_drm_winsys *ws = radeon_drm_winsys(rws); struct pb_buffer *_buf; _buf = radeon_drm_bufmgr_create_buffer_from_handle(ws->kman, whandle->handle); @@ -122,7 +120,7 @@ static struct r300_winsys_buffer *radeon_r300_winsys_buffer_from_handle(struct r if (size) *size = _buf->base.size; - return radeon_libdrm_winsys_buffer(_buf); + return (struct r300_winsys_buffer*)_buf; } static boolean radeon_r300_winsys_buffer_get_handle(struct r300_winsys_screen *rws, @@ -135,70 +133,10 @@ static boolean radeon_r300_winsys_buffer_get_handle(struct r300_winsys_screen *r return radeon_drm_bufmgr_get_handle(_buf, whandle); } -static void radeon_r300_winsys_cs_set_flush(struct r300_winsys_cs *rcs, - void (*flush)(void *), - void *user) -{ - struct radeon_libdrm_cs *cs = radeon_libdrm_cs(rcs); - cs->flush_cs = flush; - cs->flush_data = user; - radeon_cs_space_set_flush(cs->cs, flush, user); -} - -static boolean radeon_r300_winsys_cs_validate(struct r300_winsys_cs *rcs) -{ - struct radeon_libdrm_cs *cs = radeon_libdrm_cs(rcs); - - return radeon_cs_space_check(cs->cs) >= 0; -} - -static void radeon_r300_winsys_cs_reset_buffers(struct r300_winsys_cs *rcs) -{ - struct radeon_libdrm_cs *cs = radeon_libdrm_cs(rcs); - radeon_cs_space_reset_bos(cs->cs); -} - -static void radeon_r300_winsys_cs_flush(struct r300_winsys_cs *rcs) -{ - struct radeon_libdrm_cs *cs = radeon_libdrm_cs(rcs); - int retval; - - /* Don't flush a zero-sized CS. */ - if (!cs->base.cdw) { - return; - } - - cs->cs->cdw = cs->base.cdw; - - radeon_drm_bufmgr_flush_maps(cs->ws->kman); - - /* Emit the CS. */ - retval = radeon_cs_emit(cs->cs); - if (retval) { - if (debug_get_bool_option("RADEON_DUMP_CS", FALSE)) { - fprintf(stderr, "radeon: The kernel rejected CS, dumping...\n"); - radeon_cs_print(cs->cs, stderr); - } else { - fprintf(stderr, "radeon: The kernel rejected CS, " - "see dmesg for more information.\n"); - } - } - - /* Reset CS. - * Someday, when we care about performance, we should really find a way - * to rotate between two or three CS objects so that the GPU can be - * spinning through one CS while another one is being filled. */ - radeon_cs_erase(cs->cs); - - cs->base.ptr = cs->cs->packets; - cs->base.cdw = cs->cs->cdw; - cs->base.ndw = cs->cs->ndw; -} - static uint32_t radeon_get_value(struct r300_winsys_screen *rws, enum r300_value_id id) { - struct radeon_libdrm_winsys *ws = (struct radeon_libdrm_winsys *)rws; + struct radeon_drm_winsys *ws = (struct radeon_drm_winsys *)rws; switch(id) { case R300_VID_PCI_ID: @@ -213,110 +151,19 @@ static uint32_t radeon_get_value(struct r300_winsys_screen *rws, return ws->drm_2_3_0; case R300_VID_DRM_2_6_0: return ws->drm_2_6_0; + case R300_VID_DRM_2_8_0: + return ws->drm_2_8_0; case R300_CAN_HYPERZ: return ws->hyperz; } return 0; } -static struct r300_winsys_cs *radeon_r300_winsys_cs_create(struct r300_winsys_screen *rws) +void radeon_winsys_init_functions(struct radeon_drm_winsys *ws) { - struct radeon_libdrm_winsys *ws = radeon_libdrm_winsys(rws); - struct radeon_libdrm_cs *cs = CALLOC_STRUCT(radeon_libdrm_cs); - - if (!cs) - return NULL; - - /* Size limit on IBs is 64 kibibytes. */ - cs->cs = radeon_cs_create(ws->csm, 1024 * 64 / 4); - if (!cs->cs) { - FREE(cs); - return NULL; - } - - radeon_cs_set_limit(cs->cs, - RADEON_GEM_DOMAIN_GTT, ws->gart_size); - radeon_cs_set_limit(cs->cs, - RADEON_GEM_DOMAIN_VRAM, ws->vram_size); - - cs->ws = ws; - cs->base.ptr = cs->cs->packets; - cs->base.cdw = cs->cs->cdw; - cs->base.ndw = cs->cs->ndw; - return &cs->base; -} - -static void radeon_r300_winsys_cs_destroy(struct r300_winsys_cs *rcs) -{ - struct radeon_libdrm_cs *cs = radeon_libdrm_cs(rcs); - radeon_cs_destroy(cs->cs); - FREE(cs); -} - -static void radeon_winsys_destroy(struct r300_winsys_screen *rws) -{ - struct radeon_libdrm_winsys *ws = (struct radeon_libdrm_winsys *)rws; - - ws->cman->destroy(ws->cman); - ws->kman->destroy(ws->kman); - - radeon_bo_manager_gem_dtor(ws->bom); - radeon_cs_manager_gem_dtor(ws->csm); - - FREE(rws); -} - -boolean radeon_setup_winsys(int fd, struct radeon_libdrm_winsys* ws) -{ - ws->csm = radeon_cs_manager_gem_ctor(fd); - if (!ws->csm) - goto fail; - ws->bom = radeon_bo_manager_gem_ctor(fd); - if (!ws->bom) - goto fail; - ws->kman = radeon_drm_bufmgr_create(ws); - if (!ws->kman) - goto fail; - - ws->cman = pb_cache_manager_create(ws->kman, 100000); - if (!ws->cman) - goto fail; - - ws->base.destroy = radeon_winsys_destroy; ws->base.get_value = radeon_get_value; - ws->base.buffer_create = radeon_r300_winsys_buffer_create; - ws->base.buffer_set_tiling = radeon_drm_bufmgr_set_tiling; - ws->base.buffer_get_tiling = radeon_drm_bufmgr_get_tiling; - ws->base.buffer_map = radeon_drm_buffer_map; - ws->base.buffer_unmap = radeon_drm_buffer_unmap; - ws->base.buffer_wait = radeon_drm_bufmgr_wait; ws->base.buffer_reference = radeon_r300_winsys_buffer_reference; ws->base.buffer_from_handle = radeon_r300_winsys_buffer_from_handle; ws->base.buffer_get_handle = radeon_r300_winsys_buffer_get_handle; - - ws->base.cs_create = radeon_r300_winsys_cs_create; - ws->base.cs_destroy = radeon_r300_winsys_cs_destroy; - ws->base.cs_add_buffer = radeon_drm_bufmgr_add_buffer; - ws->base.cs_validate = radeon_r300_winsys_cs_validate; - ws->base.cs_write_reloc = radeon_drm_bufmgr_write_reloc; - ws->base.cs_flush = radeon_r300_winsys_cs_flush; - ws->base.cs_reset_buffers = radeon_r300_winsys_cs_reset_buffers; - ws->base.cs_set_flush = radeon_r300_winsys_cs_set_flush; - ws->base.cs_is_buffer_referenced = radeon_drm_bufmgr_is_buffer_referenced; - return TRUE; - -fail: - if (ws->csm) - radeon_cs_manager_gem_dtor(ws->csm); - - if (ws->bom) - radeon_bo_manager_gem_dtor(ws->bom); - - if (ws->cman) - ws->cman->destroy(ws->cman); - if (ws->kman) - ws->kman->destroy(ws->kman); - - return FALSE; } diff --git a/src/gallium/winsys/radeon/drm/radeon_r300.h b/src/gallium/winsys/radeon/drm/radeon_r300.h deleted file mode 100644 index 2703464ad8f..00000000000 --- a/src/gallium/winsys/radeon/drm/radeon_r300.h +++ /dev/null @@ -1,30 +0,0 @@ -/* - * Copyright 2008 Corbin Simpson <[email protected]> - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * on the rights to use, copy, modify, merge, publish, distribute, sub - * license, and/or sell copies of the Software, and to permit persons to whom - * the Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL - * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, - * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR - * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE - * USE OR OTHER DEALINGS IN THE SOFTWARE. */ - -#ifndef RADEON_R300_H -#define RADEON_R300_H - -#include "radeon_winsys.h" - -boolean radeon_setup_winsys(int fd, struct radeon_libdrm_winsys* winsys); - -#endif /* RADEON_R300_H */ diff --git a/src/gallium/winsys/radeon/drm/radeon_winsys.h b/src/gallium/winsys/radeon/drm/radeon_winsys.h index 6f4aa4bce30..492edfef8c3 100644 --- a/src/gallium/winsys/radeon/drm/radeon_winsys.h +++ b/src/gallium/winsys/radeon/drm/radeon_winsys.h @@ -32,82 +32,37 @@ #include "r300_winsys.h" -struct radeon_libdrm_winsys { - /* Parent class. */ +struct radeon_drm_winsys { struct r300_winsys_screen base; - struct pb_manager *kman; + int fd; /* DRM file descriptor */ + struct radeon_bo_manager *bom; /* Radeon BO manager. */ + struct pb_manager *kman; struct pb_manager *cman; - /* PCI ID */ - uint32_t pci_id; - - /* GB pipe count */ - uint32_t gb_pipes; - - /* Z pipe count (rv530 only) */ - uint32_t z_pipes; - - /* GART size. */ - uint32_t gart_size; - - /* VRAM size. */ - uint32_t vram_size; - - /* Square tiling support. */ - boolean squaretiling; - - /* DRM 2.3.0 - * - R500 VAP regs - * - MSPOS regs - * - Fixed texture 3D size calculation - */ + uint32_t pci_id; /* PCI ID */ + uint32_t gb_pipes; /* GB pipe count */ + uint32_t z_pipes; /* Z pipe count (rv530 only) */ + uint32_t gart_size; /* GART size. */ + uint32_t vram_size; /* VRAM size. */ + boolean squaretiling; /* Square tiling support. */ + /* DRM 2.3.0 (R500 VAP regs, MSPOS regs, fixed tex3D size checking) */ boolean drm_2_3_0; - - /* DRM 2.6.0 - * - Hyper-Z - * - GB_Z_PEQ_CONFIG allowed on rv350->r4xx, we should initialize it - */ + /* DRM 2.6.0 (Hyper-Z, GB_Z_PEQ_CONFIG allowed on rv350->r4xx, FG_ALPHA_VALUE) */ boolean drm_2_6_0; - - /* hyperz user */ + /* DRM 2.8.0 (US_FORMAT regs, ARGB2101010 colorbuffer) */ + boolean drm_2_8_0; + /* Hyper-Z user */ boolean hyperz; - - /* DRM FD */ - int fd; - - /* Radeon BO manager. */ - struct radeon_bo_manager *bom; - - /* Radeon CS manager. */ - struct radeon_cs_manager *csm; -}; - -struct radeon_libdrm_cs { - struct r300_winsys_cs base; - - /* The winsys. */ - struct radeon_libdrm_winsys *ws; - - /* The libdrm command stream. */ - struct radeon_cs *cs; - - /* Flush CS. */ - void (*flush_cs)(void *); - void *flush_data; }; -static INLINE struct radeon_libdrm_cs * -radeon_libdrm_cs(struct r300_winsys_cs *base) +static INLINE struct radeon_drm_winsys * +radeon_drm_winsys(struct r300_winsys_screen *base) { - return (struct radeon_libdrm_cs*)base; + return (struct radeon_drm_winsys*)base; } -static INLINE struct radeon_libdrm_winsys * -radeon_libdrm_winsys(struct r300_winsys_screen *base) -{ - return (struct radeon_libdrm_winsys*)base; -} +void radeon_winsys_init_functions(struct radeon_drm_winsys *ws); #endif diff --git a/src/gallium/winsys/svga/drm/vmw_screen.c b/src/gallium/winsys/svga/drm/vmw_screen.c index 51a4c55e5a2..cc3003d252f 100644 --- a/src/gallium/winsys/svga/drm/vmw_screen.c +++ b/src/gallium/winsys/svga/drm/vmw_screen.c @@ -45,8 +45,6 @@ vmw_winsys_create( int fd, boolean use_old_scanout_flag ) vws->ioctl.drm_fd = fd; vws->use_old_scanout_flag = use_old_scanout_flag; - debug_printf("%s: use_old_scanout_flag == %s\n", __FUNCTION__, - use_old_scanout_flag ? "true" : "false"); if (!vmw_ioctl_init(vws)) goto out_no_ioctl; diff --git a/src/gallium/winsys/sw/wrapper/wrapper_sw_winsys.c b/src/gallium/winsys/sw/wrapper/wrapper_sw_winsys.c index bc2623e7b77..4ecfdbf3915 100644 --- a/src/gallium/winsys/sw/wrapper/wrapper_sw_winsys.c +++ b/src/gallium/winsys/sw/wrapper/wrapper_sw_winsys.c @@ -61,10 +61,8 @@ struct wrapper_sw_displaytarget struct pipe_resource *tex; struct pipe_transfer *transfer; - unsigned width; - unsigned height; unsigned map_count; - unsigned stride; /**< because we give stride at create */ + unsigned stride; /**< because we get stride at create */ void *ptr; }; @@ -93,9 +91,9 @@ wsw_dt_get_stride(struct wrapper_sw_displaytarget *wdt, unsigned *stride) struct pipe_resource *tex = wdt->tex; struct pipe_transfer *tr; - tr = pipe_get_transfer(pipe, tex, 0, 0, 0, - PIPE_TRANSFER_READ_WRITE, - 0, 0, wdt->width, wdt->height); + tr = pipe_get_transfer(pipe, tex, 0, 0, + PIPE_TRANSFER_READ_WRITE, + 0, 0, wdt->tex->width0, wdt->tex->height0); if (!tr) return FALSE; @@ -149,6 +147,8 @@ wsw_dt_create(struct sw_winsys *ws, templ.target = wsw->target; templ.width0 = width; templ.height0 = height; + templ.depth0 = 1; + templ.array_size = 1; templ.format = format; templ.bind = bind; @@ -204,9 +204,9 @@ wsw_dt_map(struct sw_winsys *ws, assert(!wdt->transfer); - tr = pipe_get_transfer(pipe, tex, 0, 0, 0, - PIPE_TRANSFER_READ_WRITE, - 0, 0, wdt->width, wdt->height); + tr = pipe_get_transfer(pipe, tex, 0, 0, + PIPE_TRANSFER_READ_WRITE, + 0, 0, wdt->tex->width0, wdt->tex->height0); if (!tr) return NULL; @@ -246,6 +246,7 @@ wsw_dt_unmap(struct sw_winsys *ws, pipe->transfer_unmap(pipe, wdt->transfer); pipe->transfer_destroy(pipe, wdt->transfer); + pipe->flush(pipe, 0, NULL); wdt->transfer = NULL; } |