diff options
Diffstat (limited to 'src/gallium')
272 files changed, 14928 insertions, 3713 deletions
diff --git a/src/gallium/SConscript b/src/gallium/SConscript index da2e4dd5ded..b61eba0390e 100644 --- a/src/gallium/SConscript +++ b/src/gallium/SConscript @@ -33,8 +33,11 @@ if env['drm']: SConscript([ 'drivers/r300/SConscript', 'drivers/r600/SConscript', - 'drivers/radeonsi/SConscript', ]) + if env['llvm']: + SConscript([ + 'drivers/radeonsi/SConscript', + ]) # XXX: nouveau drivers have a tight dependency on libdrm, so to enable # we need some version logic before we enable them. Also, ATM there is # no nouveau target in scons @@ -55,7 +58,7 @@ SConscript('winsys/sw/null/SConscript') if not env['embedded']: SConscript('state_trackers/vega/SConscript') - if env['platform'] not in ['darwin', 'haiku']: + if env['platform'] not in ['darwin', 'haiku', 'sunos']: SConscript('state_trackers/egl/SConscript') if env['x11']: @@ -121,7 +124,7 @@ SConscript([ ]) if not env['embedded']: - if env['platform'] not in ['darwin', 'haiku']: + if env['platform'] not in ['darwin', 'haiku', 'sunos']: SConscript([ 'targets/egl-static/SConscript' ]) @@ -153,8 +156,11 @@ if not env['embedded']: SConscript([ 'targets/dri-r300/SConscript', 'targets/dri-r600/SConscript', - 'targets/dri-radeonsi/SConscript', ]) + if env['llvm']: + SConscript([ + 'targets/dri-radeonsi/SConscript', + ]) if env['xorg'] and env['drm']: SConscript([ diff --git a/src/gallium/auxiliary/Makefile b/src/gallium/auxiliary/Makefile index 896c058fde9..a70ae7384fb 100644 --- a/src/gallium/auxiliary/Makefile +++ b/src/gallium/auxiliary/Makefile @@ -18,6 +18,14 @@ endif include ../Makefile.template +default install clean: %: subdirs-% + +subdirs-%: + @for dir in $(SUBDIRS) ; do \ + if [ -d $$dir ] ; then \ + (cd $$dir && $(MAKE) $*) || exit 1; \ + fi \ + done indices/u_indices_gen.c: indices/u_indices_gen.py $(PYTHON2) $< > $@ diff --git a/src/gallium/auxiliary/Makefile.sources b/src/gallium/auxiliary/Makefile.sources index 44cf2d326de..277428b38be 100644 --- a/src/gallium/auxiliary/Makefile.sources +++ b/src/gallium/auxiliary/Makefile.sources @@ -1,3 +1,5 @@ +SUBDIRS := pipe-loader + C_SOURCES := \ cso_cache/cso_cache.c \ cso_cache/cso_context.c \ diff --git a/src/gallium/auxiliary/draw/draw_pipe_aaline.c b/src/gallium/auxiliary/draw/draw_pipe_aaline.c index ea0a4fb3f25..d6b981195b2 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_aaline.c +++ b/src/gallium/auxiliary/draw/draw_pipe_aaline.c @@ -237,12 +237,13 @@ aa_transform_inst(struct tgsi_transform_context *ctx, decl = tgsi_default_full_declaration(); decl.Declaration.File = TGSI_FILE_INPUT; /* XXX this could be linear... */ - decl.Declaration.Interpolate = TGSI_INTERPOLATE_PERSPECTIVE; + decl.Declaration.Interpolate = 1; decl.Declaration.Semantic = 1; decl.Semantic.Name = TGSI_SEMANTIC_GENERIC; decl.Semantic.Index = aactx->maxGeneric + 1; decl.Range.First = decl.Range.Last = aactx->maxInput + 1; + decl.Interp.Interpolate = TGSI_INTERPOLATE_PERSPECTIVE; ctx->emit_declaration(ctx, &decl); /* declare new sampler */ diff --git a/src/gallium/auxiliary/draw/draw_pipe_aapoint.c b/src/gallium/auxiliary/draw/draw_pipe_aapoint.c index a900dd3ab54..ec703d0b394 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_aapoint.c +++ b/src/gallium/auxiliary/draw/draw_pipe_aapoint.c @@ -201,12 +201,13 @@ aa_transform_inst(struct tgsi_transform_context *ctx, decl = tgsi_default_full_declaration(); decl.Declaration.File = TGSI_FILE_INPUT; /* XXX this could be linear... */ - decl.Declaration.Interpolate = TGSI_INTERPOLATE_PERSPECTIVE; + decl.Declaration.Interpolate = 1; decl.Declaration.Semantic = 1; decl.Semantic.Name = TGSI_SEMANTIC_GENERIC; decl.Semantic.Index = aactx->maxGeneric + 1; decl.Range.First = decl.Range.Last = texInput; + decl.Interp.Interpolate = TGSI_INTERPOLATE_PERSPECTIVE; ctx->emit_declaration(ctx, &decl); /* declare new temp regs */ diff --git a/src/gallium/auxiliary/draw/draw_pipe_pstipple.c b/src/gallium/auxiliary/draw/draw_pipe_pstipple.c index cfb6ef4453a..842f6eeba22 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_pstipple.c +++ b/src/gallium/auxiliary/draw/draw_pipe_pstipple.c @@ -234,12 +234,13 @@ pstip_transform_inst(struct tgsi_transform_context *ctx, /* declare new position input reg */ decl = tgsi_default_full_declaration(); decl.Declaration.File = TGSI_FILE_INPUT; - decl.Declaration.Interpolate = TGSI_INTERPOLATE_LINEAR; /* XXX? */ + decl.Declaration.Interpolate = 1; decl.Declaration.Semantic = 1; decl.Semantic.Name = TGSI_SEMANTIC_POSITION; decl.Semantic.Index = 0; decl.Range.First = decl.Range.Last = wincoordInput; + decl.Interp.Interpolate = TGSI_INTERPOLATE_LINEAR; /* XXX? */ ctx->emit_declaration(ctx, &decl); } diff --git a/src/gallium/auxiliary/gallivm/lp_bld_arit.c b/src/gallium/auxiliary/gallivm/lp_bld_arit.c index 4ba4aa54596..3a74790e89f 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_arit.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_arit.c @@ -926,7 +926,7 @@ lp_build_sgn(struct lp_build_context *bld, } else { - LLVMValueRef minus_one = lp_build_const_vec(bld->gallivm, type, -1.0); + LLVMValueRef minus_one = lp_build_const_int_vec(bld->gallivm, type, -1.0); cond = lp_build_cmp(bld, PIPE_FUNC_GREATER, a, bld->zero); res = lp_build_select(bld, cond, bld->one, minus_one); } diff --git a/src/gallium/auxiliary/gallivm/lp_bld_assert.c b/src/gallium/auxiliary/gallivm/lp_bld_assert.c index 9de5e8e7b51..37c142bd2ae 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_assert.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_assert.c @@ -29,6 +29,7 @@ #include "util/u_memory.h" #include "lp_bld_assert.h" #include "lp_bld_init.h" +#include "lp_bld_const.h" #include "lp_bld_printf.h" @@ -55,48 +56,37 @@ lp_assert(int condition, const char *msg) * \param condition should be an 'i1' or 'i32' value * \param msg a string to print if the assertion fails. */ -LLVMValueRef +void lp_build_assert(struct gallivm_state *gallivm, LLVMValueRef condition, const char *msg) { LLVMBuilderRef builder = gallivm->builder; LLVMContextRef context = gallivm->context; - LLVMModuleRef module = gallivm->module; LLVMTypeRef arg_types[2]; - LLVMValueRef msg_string, assert_func, params[2], r; + LLVMTypeRef ret_type; + LLVMValueRef function; + LLVMValueRef args[2]; + LLVMValueRef msg_string; - msg_string = lp_build_const_string_variable(module, context, - msg, strlen(msg) + 1); + msg_string = lp_build_const_string(gallivm, msg); + ret_type = LLVMVoidTypeInContext(context); arg_types[0] = LLVMInt32TypeInContext(context); arg_types[1] = LLVMPointerType(LLVMInt8TypeInContext(context), 0); - /* lookup the lp_assert function */ - assert_func = LLVMGetNamedFunction(module, "lp_assert"); - - /* Create the assertion function if not found */ - if (!assert_func) { - LLVMTypeRef func_type = - LLVMFunctionType(LLVMVoidTypeInContext(context), arg_types, 2, 0); - - assert_func = LLVMAddFunction(module, "lp_assert", func_type); - LLVMSetFunctionCallConv(assert_func, LLVMCCallConv); - LLVMSetLinkage(assert_func, LLVMExternalLinkage); - LLVMAddGlobalMapping(gallivm->engine, assert_func, - func_to_pointer((func_pointer)lp_assert)); - } - assert(assert_func); + function = lp_build_const_func_pointer(gallivm, + func_to_pointer((func_pointer)lp_assert), + ret_type, arg_types, Elements(arg_types), + "assert"); /* build function call param list */ - params[0] = LLVMBuildZExt(builder, condition, arg_types[0], ""); - params[1] = LLVMBuildBitCast(builder, msg_string, arg_types[1], ""); + args[0] = LLVMBuildZExt(builder, condition, arg_types[0], ""); + args[1] = msg_string; /* check arg types */ - assert(LLVMTypeOf(params[0]) == arg_types[0]); - assert(LLVMTypeOf(params[1]) == arg_types[1]); - - r = LLVMBuildCall(builder, assert_func, params, 2, ""); + assert(LLVMTypeOf(args[0]) == arg_types[0]); + assert(LLVMTypeOf(args[1]) == arg_types[1]); - return r; + LLVMBuildCall(builder, function, args, Elements(args), ""); } diff --git a/src/gallium/auxiliary/gallivm/lp_bld_assert.h b/src/gallium/auxiliary/gallivm/lp_bld_assert.h index 1d2baab30a2..e377b59bbed 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_assert.h +++ b/src/gallium/auxiliary/gallivm/lp_bld_assert.h @@ -33,7 +33,7 @@ #include "lp_bld_init.h" -LLVMValueRef +void lp_build_assert(struct gallivm_state *gallivm, LLVMValueRef condition, const char *msg); diff --git a/src/gallium/auxiliary/gallivm/lp_bld_const.c b/src/gallium/auxiliary/gallivm/lp_bld_const.c index 6d8b7c26fc8..f0611b158d9 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_const.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_const.c @@ -409,3 +409,69 @@ lp_build_const_mask_aos(struct gallivm_state *gallivm, return LLVMConstVector(masks, type.length); } + + +/** + * Performs lp_build_const_mask_aos, but first swizzles the mask + */ +LLVMValueRef +lp_build_const_mask_aos_swizzled(struct gallivm_state *gallivm, + struct lp_type type, + unsigned mask, + const unsigned char *swizzle) +{ + mask = + ((mask & (1 << swizzle[0])) >> swizzle[0]) + | (((mask & (1 << swizzle[1])) >> swizzle[1]) << 1) + | (((mask & (1 << swizzle[2])) >> swizzle[2]) << 2) + | (((mask & (1 << swizzle[3])) >> swizzle[3]) << 3); + + return lp_build_const_mask_aos(gallivm, type, mask); +} + + +/** + * Build a zero-terminated constant string. + */ +LLVMValueRef +lp_build_const_string(struct gallivm_state *gallivm, + const char *str) +{ + unsigned len = strlen(str) + 1; + LLVMTypeRef i8 = LLVMInt8TypeInContext(gallivm->context); + LLVMValueRef string = LLVMAddGlobal(gallivm->module, LLVMArrayType(i8, len), ""); + LLVMSetGlobalConstant(string, TRUE); + LLVMSetLinkage(string, LLVMInternalLinkage); + LLVMSetInitializer(string, LLVMConstStringInContext(gallivm->context, str, len, TRUE)); + string = LLVMConstBitCast(string, LLVMPointerType(i8, 0)); + return string; +} + + +/** + * Build a callable function pointer. + * + * We this casts instead of LLVMAddGlobalMapping() + * to work around a bug in LLVM 2.6, and for efficiency/simplicity. + */ +LLVMValueRef +lp_build_const_func_pointer(struct gallivm_state *gallivm, + const void *ptr, + LLVMTypeRef ret_type, + LLVMTypeRef *arg_types, + unsigned num_args, + const char *name) +{ + LLVMTypeRef function_type; + LLVMValueRef function; + + function_type = LLVMFunctionType(ret_type, arg_types, num_args, 0); + + function = lp_build_const_int_pointer(gallivm, ptr); + + function = LLVMBuildBitCast(gallivm->builder, function, + LLVMPointerType(function_type, 0), + name); + + return function; +} diff --git a/src/gallium/auxiliary/gallivm/lp_bld_const.h b/src/gallium/auxiliary/gallivm/lp_bld_const.h index 69718eb4b3d..2205616274f 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_const.h +++ b/src/gallium/auxiliary/gallivm/lp_bld_const.h @@ -111,6 +111,13 @@ lp_build_const_mask_aos(struct gallivm_state *gallivm, unsigned mask); +LLVMValueRef +lp_build_const_mask_aos_swizzled(struct gallivm_state *gallivm, + struct lp_type type, + unsigned mask, + const unsigned char *swizzle); + + static INLINE LLVMValueRef lp_build_const_int32(struct gallivm_state *gallivm, int i) { @@ -142,5 +149,18 @@ lp_build_const_int_pointer(struct gallivm_state *gallivm, const void *ptr) } +LLVMValueRef +lp_build_const_string(struct gallivm_state *gallivm, + const char *str); + + +LLVMValueRef +lp_build_const_func_pointer(struct gallivm_state *gallivm, + const void *ptr, + LLVMTypeRef ret_type, + LLVMTypeRef *arg_types, + unsigned num_args, + const char *name); + #endif /* !LP_BLD_CONST_H */ diff --git a/src/gallium/auxiliary/gallivm/lp_bld_flow.c b/src/gallium/auxiliary/gallivm/lp_bld_flow.c index a9c9c7af10c..d2b3713ed2d 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_flow.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_flow.c @@ -265,6 +265,73 @@ lp_build_loop_end(struct lp_build_loop_state *state, lp_build_loop_end_cond(state, end, step, LLVMIntNE); } +/** + * Creates a c-style for loop, + * contrasts lp_build_loop as this checks condition on entry + * e.g. for(i = start; i cmp_op end; i += step) + * \param state the for loop state, initialized here + * \param gallivm the gallivm state + * \param start starting value of iterator + * \param cmp_op comparison operator used for comparing current value with end value + * \param end value used to compare against iterator + * \param step value added to iterator at end of each loop + */ +void +lp_build_for_loop_begin(struct lp_build_for_loop_state *state, + struct gallivm_state *gallivm, + LLVMValueRef start, + LLVMIntPredicate cmp_op, + LLVMValueRef end, + LLVMValueRef step) +{ + LLVMBuilderRef builder = gallivm->builder; + + assert(LLVMTypeOf(start) == LLVMTypeOf(end)); + assert(LLVMTypeOf(start) == LLVMTypeOf(step)); + + state->begin = lp_build_insert_new_block(gallivm, "loop_begin"); + state->step = step; + state->counter_var = lp_build_alloca(gallivm, LLVMTypeOf(start), "loop_counter"); + state->gallivm = gallivm; + state->cond = cmp_op; + state->end = end; + + LLVMBuildStore(builder, start, state->counter_var); + LLVMBuildBr(builder, state->begin); + + LLVMPositionBuilderAtEnd(builder, state->begin); + state->counter = LLVMBuildLoad(builder, state->counter_var, ""); + + state->body = lp_build_insert_new_block(gallivm, "loop_body"); + LLVMPositionBuilderAtEnd(builder, state->body); +} + +/** + * End the for loop. + */ +void +lp_build_for_loop_end(struct lp_build_for_loop_state *state) +{ + LLVMValueRef next, cond; + LLVMBuilderRef builder = state->gallivm->builder; + + next = LLVMBuildAdd(builder, state->counter, state->step, ""); + LLVMBuildStore(builder, next, state->counter_var); + LLVMBuildBr(builder, state->begin); + + state->exit = lp_build_insert_new_block(state->gallivm, "loop_exit"); + + /* + * We build the comparison for the begin block here, + * if we build it earlier the output llvm ir is not human readable + * as the code produced is not in the standard begin -> body -> end order. + */ + LLVMPositionBuilderAtEnd(builder, state->begin); + cond = LLVMBuildICmp(builder, state->cond, state->counter, state->end, ""); + LLVMBuildCondBr(builder, cond, state->body, state->exit); + + LLVMPositionBuilderAtEnd(builder, state->exit); +} /* diff --git a/src/gallium/auxiliary/gallivm/lp_bld_flow.h b/src/gallium/auxiliary/gallivm/lp_bld_flow.h index 3cd5a9f42a5..0da849bfe0c 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_flow.h +++ b/src/gallium/auxiliary/gallivm/lp_bld_flow.h @@ -131,6 +131,33 @@ lp_build_loop_end_cond(struct lp_build_loop_state *state, LLVMIntPredicate cond); +/** + * Implementation of simple C-style for loops + */ +struct lp_build_for_loop_state +{ + LLVMBasicBlockRef begin; + LLVMBasicBlockRef body; + LLVMBasicBlockRef exit; + LLVMValueRef counter_var; + LLVMValueRef counter; + LLVMValueRef step; + LLVMIntPredicate cond; + LLVMValueRef end; + struct gallivm_state *gallivm; +}; + +void +lp_build_for_loop_begin(struct lp_build_for_loop_state *state, + struct gallivm_state *gallivm, + LLVMValueRef start, + LLVMIntPredicate llvm_cond, + LLVMValueRef end, + LLVMValueRef step); + +void +lp_build_for_loop_end(struct lp_build_for_loop_state *state); + /** * if/else/endif. diff --git a/src/gallium/auxiliary/gallivm/lp_bld_format_aos.c b/src/gallium/auxiliary/gallivm/lp_bld_format_aos.c index 82ab19eda14..e4b8da6bcfd 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_format_aos.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_format_aos.c @@ -643,28 +643,18 @@ lp_build_fetch_rgba_aos(struct gallivm_state *gallivm, */ LLVMTypeRef ret_type; LLVMTypeRef arg_types[4]; - LLVMTypeRef function_type; ret_type = LLVMVoidTypeInContext(gallivm->context); arg_types[0] = pf32t; arg_types[1] = pi8t; arg_types[2] = i32t; arg_types[3] = i32t; - function_type = LLVMFunctionType(ret_type, arg_types, - Elements(arg_types), 0); - /* Note: we're using this casting here instead of LLVMAddGlobalMapping() - * to work around a bug in LLVM 2.6, and for efficiency/simplicity. - */ - - /* make const pointer for the C fetch_rgba_float function */ - function = lp_build_const_int_pointer(gallivm, - func_to_pointer((func_pointer) format_desc->fetch_rgba_float)); - - /* cast the callee pointer to the function's type */ - function = LLVMBuildBitCast(builder, function, - LLVMPointerType(function_type, 0), - "cast callee"); + function = lp_build_const_func_pointer(gallivm, + func_to_pointer((func_pointer) format_desc->fetch_rgba_float), + ret_type, + arg_types, Elements(arg_types), + format_desc->short_name); } tmp_ptr = lp_build_alloca(gallivm, f32x4t, ""); diff --git a/src/gallium/auxiliary/gallivm/lp_bld_init.h b/src/gallium/auxiliary/gallivm/lp_bld_init.h index f68bf75a851..5fc0f996c64 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_init.h +++ b/src/gallium/auxiliary/gallivm/lp_bld_init.h @@ -81,5 +81,12 @@ extern LLVMValueRef lp_build_load_volatile(LLVMBuilderRef B, LLVMValueRef PointerVal, const char *Name); +void +lp_set_load_alignment(LLVMValueRef Inst, + unsigned Align); + +void +lp_set_store_alignment(LLVMValueRef Inst, + unsigned Align); #endif /* !LP_BLD_INIT_H */ diff --git a/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp b/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp index 68f8808f3ef..6c4586c4212 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp +++ b/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp @@ -165,3 +165,18 @@ lp_build_load_volatile(LLVMBuilderRef B, LLVMValueRef PointerVal, return llvm::wrap(llvm::unwrap(B)->CreateLoad(llvm::unwrap(PointerVal), true, Name)); } +extern "C" +void +lp_set_load_alignment(LLVMValueRef Inst, + unsigned Align) +{ + llvm::unwrap<llvm::LoadInst>(Inst)->setAlignment(Align); +} + +extern "C" +void +lp_set_store_alignment(LLVMValueRef Inst, + unsigned Align) +{ + llvm::unwrap<llvm::StoreInst>(Inst)->setAlignment(Align); +} diff --git a/src/gallium/auxiliary/gallivm/lp_bld_printf.c b/src/gallium/auxiliary/gallivm/lp_bld_printf.c index 56ff4269588..5e359ceaa20 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_printf.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_printf.c @@ -66,19 +66,6 @@ lp_get_printf_arg_count(const char *fmt) return count; } -LLVMValueRef -lp_build_const_string_variable(LLVMModuleRef module, - LLVMContextRef context, - const char *str, int len) -{ - LLVMValueRef string = LLVMAddGlobal(module, LLVMArrayType(LLVMInt8TypeInContext(context), len + 1), ""); - LLVMSetGlobalConstant(string, TRUE); - LLVMSetLinkage(string, LLVMInternalLinkage); - LLVMSetInitializer(string, LLVMConstStringInContext(context, str, len + 1, TRUE)); - return string; -} - - /** * lp_build_printf. * @@ -94,24 +81,22 @@ lp_build_printf(struct gallivm_state *gallivm, const char *fmt, ...) int argcount = lp_get_printf_arg_count(fmt); LLVMBuilderRef builder = gallivm->builder; LLVMContextRef context = gallivm->context; - LLVMModuleRef module = gallivm->module; LLVMValueRef params[50]; - LLVMValueRef fmtarg = lp_build_const_string_variable(module, context, - fmt, strlen(fmt) + 1); - LLVMValueRef int0 = lp_build_const_int32(gallivm, 0); - LLVMValueRef index[2]; - LLVMValueRef func_printf = LLVMGetNamedFunction(module, "printf"); + LLVMValueRef fmtarg = lp_build_const_string(gallivm, fmt); + LLVMTypeRef printf_type; + LLVMValueRef func_printf; assert(Elements(params) >= argcount + 1); - index[0] = index[1] = int0; + printf_type = LLVMFunctionType(LLVMIntTypeInContext(context, 32), NULL, 0, 1); - if (!func_printf) { - LLVMTypeRef printf_type = LLVMFunctionType(LLVMIntTypeInContext(context, 32), NULL, 0, 1); - func_printf = LLVMAddFunction(module, "printf", printf_type); - } + func_printf = lp_build_const_int_pointer(gallivm, func_to_pointer((func_pointer)debug_printf)); + + func_printf = LLVMBuildBitCast(gallivm->builder, func_printf, + LLVMPointerType(printf_type, 0), + "debug_printf"); - params[0] = LLVMBuildGEP(builder, fmtarg, index, 2, ""); + params[0] = fmtarg; va_start(arglist, fmt); for (i = 1; i <= argcount; i++) { @@ -170,3 +155,30 @@ lp_build_print_ivec4(struct gallivm_state *gallivm, util_snprintf(format, sizeof(format), "%s %%i %%i %%i %%i\n", msg); return lp_build_printf(gallivm, format, x, y, z, w); } + + +/** + * Print a uint8[16] vector. + */ +LLVMValueRef +lp_build_print_uvec16(struct gallivm_state *gallivm, + const char *msg, LLVMValueRef vec) +{ + LLVMBuilderRef builder = gallivm->builder; + char format[1000]; + LLVMValueRef args[16]; + int i; + + for (i = 0; i < 16; ++i) { + args[i] = LLVMBuildExtractElement(builder, vec, lp_build_const_int32(gallivm, i), ""); + } + + util_snprintf(format, sizeof(format), "%s %%u %%u %%u %%u %%u %%u %%u %%u %%u %%u %%u %%u %%u %%u %%u %%u\n", msg); + + return lp_build_printf( + gallivm, format, + args[ 0], args[ 1], args[ 2], args[ 3], + args[ 4], args[ 5], args[ 6], args[ 7], + args[ 8], args[ 9], args[10], args[11], + args[12], args[13], args[14], args[15]); +} diff --git a/src/gallium/auxiliary/gallivm/lp_bld_printf.h b/src/gallium/auxiliary/gallivm/lp_bld_printf.h index 79db74d8886..7a2b26d41f4 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_printf.h +++ b/src/gallium/auxiliary/gallivm/lp_bld_printf.h @@ -34,12 +34,9 @@ #include "lp_bld_init.h" -LLVMValueRef lp_build_const_string_variable(LLVMModuleRef module, - LLVMContextRef context, - const char *str, int len); - -LLVMValueRef lp_build_printf(struct gallivm_state *gallivm, - const char *fmt, ...); +LLVMValueRef +lp_build_printf(struct gallivm_state *gallivm, + const char *fmt, ...); LLVMValueRef lp_build_print_vec4(struct gallivm_state *gallivm, @@ -49,5 +46,9 @@ LLVMValueRef lp_build_print_ivec4(struct gallivm_state *gallivm, const char *msg, LLVMValueRef vec); +LLVMValueRef +lp_build_print_uvec16(struct gallivm_state *gallivm, + const char *msg, LLVMValueRef vec); + #endif diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample.c b/src/gallium/auxiliary/gallivm/lp_bld_sample.c index 07f4f3bf6b8..c6d4f1bcc28 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_sample.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_sample.c @@ -349,7 +349,7 @@ lp_build_brilinear_lod(struct lp_build_context *bld, * Combined log2 and brilinear lod computation. * * It's in all identical to calling lp_build_fast_log2() and - * lp_build_brilinear_lod() above, but by combining we can compute the interger + * lp_build_brilinear_lod() above, but by combining we can compute the integer * and fractional part independently. */ static void diff --git a/src/gallium/auxiliary/gallivm/lp_bld_struct.c b/src/gallium/auxiliary/gallivm/lp_bld_struct.c index 0dc2f24d10a..cc248d15e97 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_struct.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_struct.c @@ -146,6 +146,25 @@ lp_build_pointer_get(LLVMBuilderRef builder, } +LLVMValueRef +lp_build_pointer_get_unaligned(LLVMBuilderRef builder, + LLVMValueRef ptr, + LLVMValueRef index, + unsigned alignment) +{ + LLVMValueRef element_ptr; + LLVMValueRef res; + assert(LLVMGetTypeKind(LLVMTypeOf(ptr)) == LLVMPointerTypeKind); + element_ptr = LLVMBuildGEP(builder, ptr, &index, 1, ""); + res = LLVMBuildLoad(builder, element_ptr, ""); + lp_set_load_alignment(res, alignment); +#ifdef DEBUG + lp_build_name(res, "%s[%s]", LLVMGetValueName(ptr), LLVMGetValueName(index)); +#endif + return res; +} + + void lp_build_pointer_set(LLVMBuilderRef builder, LLVMValueRef ptr, @@ -156,3 +175,18 @@ lp_build_pointer_set(LLVMBuilderRef builder, element_ptr = LLVMBuildGEP(builder, ptr, &index, 1, ""); LLVMBuildStore(builder, value, element_ptr); } + + +void +lp_build_pointer_set_unaligned(LLVMBuilderRef builder, + LLVMValueRef ptr, + LLVMValueRef index, + LLVMValueRef value, + unsigned alignment) +{ + LLVMValueRef element_ptr; + LLVMValueRef instr; + element_ptr = LLVMBuildGEP(builder, ptr, &index, 1, ""); + instr = LLVMBuildStore(builder, value, element_ptr); + lp_set_store_alignment(instr, alignment); +} diff --git a/src/gallium/auxiliary/gallivm/lp_bld_struct.h b/src/gallium/auxiliary/gallivm/lp_bld_struct.h index 11605c685f0..6b7b4f2a6bf 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_struct.h +++ b/src/gallium/auxiliary/gallivm/lp_bld_struct.h @@ -105,6 +105,18 @@ lp_build_pointer_get(LLVMBuilderRef builder, LLVMValueRef index); /** + * Get the value of an array element, with explicit alignment. + * + * If the element size is different from the alignment this will + * cause llvm to emit an unaligned load + */ +LLVMValueRef +lp_build_pointer_get_unaligned(LLVMBuilderRef builder, + LLVMValueRef ptr, + LLVMValueRef index, + unsigned alignment); + +/** * Set the value of an array element. */ void @@ -113,4 +125,17 @@ lp_build_pointer_set(LLVMBuilderRef builder, LLVMValueRef index, LLVMValueRef value); +/** + * Set the value of an array element, with explicit alignment. + * + * If the element size is different from the alignment this will + * cause llvm to emit an unaligned store + */ +void +lp_build_pointer_set_unaligned(LLVMBuilderRef builder, + LLVMValueRef ptr, + LLVMValueRef index, + LLVMValueRef value, + unsigned alignment); + #endif /* !LP_BLD_STRUCT_H */ diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi.c b/src/gallium/auxiliary/gallivm/lp_bld_tgsi.c index 45bbf81eb29..680c85f843c 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi.c @@ -215,8 +215,6 @@ lp_build_tgsi_inst_llvm( case TGSI_OPCODE_PUSHA: case TGSI_OPCODE_POPA: case TGSI_OPCODE_SAD: - case TGSI_OPCODE_TXF: - case TGSI_OPCODE_TXQ: /* deprecated? */ assert(0); return FALSE; diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h b/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h index 888221d4d64..773c679a4d8 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h +++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h @@ -122,6 +122,11 @@ struct lp_tgsi_info unsigned indirect_textures:1; /* + * Whether any immediate values are outside the range of 0 and 1 + */ + unsigned unclamped_immediates:1; + + /* * Texture opcode description. Aimed at detecting and described direct * texture opcodes. */ diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.c b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.c index 7f011563264..d278444ce90 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.c @@ -1007,6 +1007,17 @@ i2f_emit_cpu( emit_data->args[0]); } +/* TGSI_OPCODE_IABS (CPU Only) */ +static void +iabs_emit_cpu( + const struct lp_build_tgsi_action * action, + struct lp_build_tgsi_context * bld_base, + struct lp_build_emit_data * emit_data) +{ + emit_data->output[emit_data->chan] = lp_build_abs(&bld_base->int_bld, + emit_data->args[0]); +} + /* TGSI_OPCODE_IDIV (CPU Only) */ static void idiv_emit_cpu( @@ -1101,6 +1112,18 @@ islt_emit_cpu( iset_emit_cpu(action, bld_base, emit_data, PIPE_FUNC_LESS); } + +/* TGSI_OPCODE_ISSG (CPU Only) */ +static void +issg_emit_cpu( + const struct lp_build_tgsi_action * action, + struct lp_build_tgsi_context * bld_base, + struct lp_build_emit_data * emit_data) +{ + emit_data->output[emit_data->chan] = lp_build_sgn(&bld_base->int_bld, + emit_data->args[0]); +} + /* TGSI_OPCODE_LG2 (CPU Only) */ static void lg2_emit_cpu( @@ -1541,6 +1564,7 @@ lp_set_default_actions_cpu( bld_base->op_actions[TGSI_OPCODE_FLR].emit = flr_emit_cpu; bld_base->op_actions[TGSI_OPCODE_I2F].emit = i2f_emit_cpu; + bld_base->op_actions[TGSI_OPCODE_IABS].emit = iabs_emit_cpu; bld_base->op_actions[TGSI_OPCODE_IDIV].emit = idiv_emit_cpu; bld_base->op_actions[TGSI_OPCODE_INEG].emit = ineg_emit_cpu; bld_base->op_actions[TGSI_OPCODE_IMAX].emit = imax_emit_cpu; @@ -1548,6 +1572,7 @@ lp_set_default_actions_cpu( bld_base->op_actions[TGSI_OPCODE_ISGE].emit = isge_emit_cpu; bld_base->op_actions[TGSI_OPCODE_ISHR].emit = ishr_emit_cpu; bld_base->op_actions[TGSI_OPCODE_ISLT].emit = islt_emit_cpu; + bld_base->op_actions[TGSI_OPCODE_ISSG].emit = issg_emit_cpu; bld_base->op_actions[TGSI_OPCODE_LG2].emit = lg2_emit_cpu; bld_base->op_actions[TGSI_OPCODE_LOG].emit = log_emit_cpu; diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_aos.c b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_aos.c index 80c148124ee..24bc13a9be8 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_aos.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_aos.c @@ -325,8 +325,10 @@ lp_emit_store_aos( if (reg->Register.WriteMask != TGSI_WRITEMASK_XYZW) { LLVMValueRef writemask; - writemask = lp_build_const_mask_aos(bld->bld_base.base.gallivm, bld->bld_base.base.type, - reg->Register.WriteMask); + writemask = lp_build_const_mask_aos_swizzled(bld->bld_base.base.gallivm, + bld->bld_base.base.type, + reg->Register.WriteMask, + bld->swizzles); if (mask) { mask = LLVMBuildAnd(builder, mask, writemask, ""); @@ -1089,6 +1091,7 @@ lp_build_tgsi_aos(struct gallivm_state *gallivm, debug_printf("2222222222222222222222222222 \n"); } tgsi_parse_free(&parse); + FREE(bld.bld_base.instructions); if (0) { LLVMModuleRef module = LLVMGetGlobalParent( diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_info.c b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_info.c index 3373ed4426d..ab393ed942a 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_info.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_info.c @@ -442,8 +442,12 @@ lp_build_tgsi_info(const struct tgsi_token *tokens, assert(size <= 4); if (ctx.num_imms < Elements(ctx.imm)) { for (chan = 0; chan < size; ++chan) { - ctx.imm[ctx.num_imms][chan] = - parse.FullToken.FullImmediate.u[chan].Float; + float value = parse.FullToken.FullImmediate.u[chan].Float; + ctx.imm[ctx.num_imms][chan] = value; + + if (value < 0.0f || value > 1.0f) { + info->unclamped_immediates = TRUE; + } } ++ctx.num_imms; } diff --git a/src/gallium/auxiliary/os/os_thread.h b/src/gallium/auxiliary/os/os_thread.h index 3e1c273027b..3773be945dd 100644 --- a/src/gallium/auxiliary/os/os_thread.h +++ b/src/gallium/auxiliary/os/os_thread.h @@ -44,6 +44,7 @@ #include <pthread.h> /* POSIX threads headers */ #include <stdio.h> /* for perror() */ +#include <signal.h> /* pipe_thread diff --git a/src/gallium/auxiliary/pipe-loader/Makefile.am b/src/gallium/auxiliary/pipe-loader/Makefile.am new file mode 100644 index 00000000000..c63dce3fe4a --- /dev/null +++ b/src/gallium/auxiliary/pipe-loader/Makefile.am @@ -0,0 +1,38 @@ +AUTOMAKE_OPTIONS = subdir-objects + +AM_CPPFLAGS = $(DEFINES) \ + $(GALLIUM_PIPE_LOADER_DEFINES) \ + -I$(top_srcdir)/include \ + -I$(top_srcdir)/src/gallium/include \ + -I$(top_srcdir)/src/gallium/auxiliary \ + -I$(top_srcdir)/src/gallium/winsys + +AM_CFLAGS = $(PIC_FLAGS) + +noinst_LTLIBRARIES = + +if HAVE_LOADER_GALLIUM +noinst_LTLIBRARIES += libpipe_loader.la + +libpipe_loader_la_SOURCES = \ + pipe_loader.h \ + pipe_loader_priv.h \ + pipe_loader.c \ + pipe_loader_sw.c + +if HAVE_DRM_LOADER_GALLIUM +libpipe_loader_la_SOURCES += pipe_loader_drm.c +AM_CFLAGS += $(LIBDRM_CFLAGS) +endif + +# Provide compatibility with scripts for the old Mesa build system for +# a while by putting a link to the library in the current directory. +all-local: libpipe_loader.la + ln -f .libs/libpipe_loader.a . + +clean-local: + rm -f libpipe_loader.a +endif + +# FIXME: Remove when the rest of Gallium is converted to automake. +default: all diff --git a/src/gallium/auxiliary/pipe-loader/pipe_loader.c b/src/gallium/auxiliary/pipe-loader/pipe_loader.c new file mode 100644 index 00000000000..6a10ac3998d --- /dev/null +++ b/src/gallium/auxiliary/pipe-loader/pipe_loader.c @@ -0,0 +1,102 @@ +/************************************************************************** + * + * Copyright 2012 Francisco Jerez + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include "pipe_loader_priv.h" + +#include "util/u_inlines.h" +#include "util/u_memory.h" +#include "util/u_string.h" +#include "util/u_dl.h" + +#define MODULE_PREFIX "pipe_" + +static int (*backends[])(struct pipe_loader_device **, int) = { +#ifdef HAVE_PIPE_LOADER_DRM + &pipe_loader_drm_probe, +#endif + &pipe_loader_sw_probe +}; + +int +pipe_loader_probe(struct pipe_loader_device **devs, int ndev) +{ + int i, n = 0; + + for (i = 0; i < Elements(backends); i++) + n += backends[i](&devs[n], MAX2(0, ndev - n)); + + return n; +} + +void +pipe_loader_release(struct pipe_loader_device **devs, int ndev) +{ + int i; + + for (i = 0; i < ndev; i++) + devs[i]->ops->release(&devs[i]); +} + +struct pipe_screen * +pipe_loader_create_screen(struct pipe_loader_device *dev, + const char *library_paths) +{ + return dev->ops->create_screen(dev, library_paths); +} + +struct util_dl_library * +pipe_loader_find_module(struct pipe_loader_device *dev, + const char *library_paths) +{ + struct util_dl_library *lib; + const char *next; + char path[PATH_MAX]; + int len, ret; + + for (next = library_paths; *next; library_paths = next + 1) { + next = util_strchrnul(library_paths, ':'); + len = next - library_paths; + + if (len) + ret = util_snprintf(path, sizeof(path), "%.*s/%s%s%s", + len, library_paths, + MODULE_PREFIX, dev->driver_name, UTIL_DL_EXT); + else + ret = util_snprintf(path, sizeof(path), "%s%s%s", + MODULE_PREFIX, dev->driver_name, UTIL_DL_EXT); + + if (ret > 0 && ret < sizeof(path)) { + lib = util_dl_open(path); + if (lib) { + debug_printf("loaded %s\n", path); + return lib; + } + } + } + + return NULL; +} diff --git a/src/gallium/auxiliary/pipe-loader/pipe_loader.h b/src/gallium/auxiliary/pipe-loader/pipe_loader.h new file mode 100644 index 00000000000..e41969458dd --- /dev/null +++ b/src/gallium/auxiliary/pipe-loader/pipe_loader.h @@ -0,0 +1,144 @@ +/************************************************************************** + * + * Copyright 2012 Francisco Jerez + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * \file Library that provides device enumeration and creation of + * winsys/pipe_screen instances. + */ + +#ifndef PIPE_LOADER_H +#define PIPE_LOADER_H + +#include "pipe/p_compiler.h" + +#ifdef __cplusplus +extern "C" { +#endif + +struct pipe_screen; + +enum pipe_loader_device_type { + PIPE_LOADER_DEVICE_SOFTWARE, + PIPE_LOADER_DEVICE_PCI, + NUM_PIPE_LOADER_DEVICE_TYPES +}; + +/** + * A device known to the pipe loader. + */ +struct pipe_loader_device { + enum pipe_loader_device_type type; + + union { + struct { + int vendor_id; + int chip_id; + } pci; + }; /**< Discriminated by \a type */ + + const char *driver_name; + const struct pipe_loader_ops *ops; +}; + +/** + * Get a list of known devices. + * + * \param devs Array that will be filled with pointers to the devices + * available in the system. + * \param ndev Maximum number of devices to return. + * \return Number of devices available in the system. + */ +int +pipe_loader_probe(struct pipe_loader_device **devs, int ndev); + +/** + * Create a pipe_screen for the specified device. + * + * \param dev Device the screen will be created for. + * \param library_paths Colon-separated list of filesystem paths that + * will be used to look for the pipe driver + * module that handles this device. + */ +struct pipe_screen * +pipe_loader_create_screen(struct pipe_loader_device *dev, + const char *library_paths); + +/** + * Release resources allocated for a list of devices. + * + * Should be called when the specified devices are no longer in use to + * release any resources allocated by pipe_loader_probe. + * + * \param devs Devices to release. + * \param ndev Number of devices to release. + */ +void +pipe_loader_release(struct pipe_loader_device **devs, int ndev); + +#ifdef HAVE_PIPE_LOADER_SW + +/** + * Get a list of known software devices. + * + * This function is platform-specific. + * + * \sa pipe_loader_probe + */ +int +pipe_loader_sw_probe(struct pipe_loader_device **devs, int ndev); + +#endif + +#ifdef HAVE_PIPE_LOADER_DRM + +/** + * Get a list of known DRM devices. + * + * This function is platform-specific. + * + * \sa pipe_loader_probe + */ +int +pipe_loader_drm_probe(struct pipe_loader_device **devs, int ndev); + +/** + * Initialize a DRM device in an already opened fd. + * + * This function is platform-specific. + * + * \sa pipe_loader_probe + */ +boolean +pipe_loader_drm_probe_fd(struct pipe_loader_device **dev, int fd); + +#endif + +#ifdef __cplusplus +} +#endif + +#endif /* PIPE_LOADER_H */ diff --git a/src/gallium/auxiliary/pipe-loader/pipe_loader_drm.c b/src/gallium/auxiliary/pipe-loader/pipe_loader_drm.c new file mode 100644 index 00000000000..7a7e9942f3b --- /dev/null +++ b/src/gallium/auxiliary/pipe-loader/pipe_loader_drm.c @@ -0,0 +1,218 @@ +/************************************************************************** + * + * Copyright 2011 Intel Corporation + * Copyright 2012 Francisco Jerez + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: + * Kristian Høgsberg <[email protected]> + * Benjamin Franzke <[email protected]> + * + **************************************************************************/ + +#include <fcntl.h> +#include <stdio.h> +#include <libudev.h> +#include <xf86drm.h> + +#include "state_tracker/drm_driver.h" +#include "pipe_loader_priv.h" + +#include "util/u_memory.h" +#include "util/u_dl.h" +#include "util/u_debug.h" + +#define DRIVER_MAP_GALLIUM_ONLY +#include "pci_ids/pci_id_driver_map.h" + +struct pipe_loader_drm_device { + struct pipe_loader_device base; + struct util_dl_library *lib; + int fd; +}; + +#define pipe_loader_drm_device(dev) ((struct pipe_loader_drm_device *)dev) + +static boolean +find_drm_pci_id(struct pipe_loader_drm_device *ddev) +{ + struct udev *udev = NULL; + struct udev_device *parent, *device = NULL; + struct stat stat; + const char *pci_id; + + if (fstat(ddev->fd, &stat) < 0) + goto fail; + + udev = udev_new(); + if (!udev) + goto fail; + + device = udev_device_new_from_devnum(udev, 'c', stat.st_rdev); + if (!device) + goto fail; + + parent = udev_device_get_parent(device); + if (!parent) + goto fail; + + pci_id = udev_device_get_property_value(parent, "PCI_ID"); + if (!pci_id || + sscanf(pci_id, "%x:%x", &ddev->base.pci.vendor_id, + &ddev->base.pci.chip_id) != 2) + goto fail; + + return TRUE; + + fail: + if (device) + udev_device_unref(device); + if (udev) + udev_unref(udev); + + debug_printf("pci id for fd %d not found\n", ddev->fd); + return FALSE; +} + +static boolean +find_drm_driver_name(struct pipe_loader_drm_device *ddev) +{ + struct pipe_loader_device *dev = &ddev->base; + int i, j; + + for (i = 0; driver_map[i].driver; i++) { + if (dev->pci.vendor_id != driver_map[i].vendor_id) + continue; + + if (driver_map[i].num_chips_ids == -1) { + dev->driver_name = driver_map[i].driver; + goto found; + } + + for (j = 0; j < driver_map[i].num_chips_ids; j++) { + if (dev->pci.chip_id == driver_map[i].chip_ids[j]) { + dev->driver_name = driver_map[i].driver; + goto found; + } + } + } + + return FALSE; + + found: + debug_printf("driver for %04x:%04x: %s\n", dev->pci.vendor_id, + dev->pci.chip_id, dev->driver_name); + return TRUE; +} + +static struct pipe_loader_ops pipe_loader_drm_ops; + +boolean +pipe_loader_drm_probe_fd(struct pipe_loader_device **dev, int fd) +{ + struct pipe_loader_drm_device *ddev = CALLOC_STRUCT(pipe_loader_drm_device); + + ddev->base.type = PIPE_LOADER_DEVICE_PCI; + ddev->base.ops = &pipe_loader_drm_ops; + ddev->fd = fd; + + if (!find_drm_pci_id(ddev)) + goto fail; + + if (!find_drm_driver_name(ddev)) + goto fail; + + *dev = &ddev->base; + return TRUE; + + fail: + FREE(ddev); + return FALSE; +} + +static int +open_drm_minor(int minor) +{ + char path[PATH_MAX]; + snprintf(path, sizeof(path), DRM_DEV_NAME, DRM_DIR_NAME, minor); + return open(path, O_RDWR, 0); +} + +int +pipe_loader_drm_probe(struct pipe_loader_device **devs, int ndev) +{ + int i, j, fd; + + for (i = 0, j = 0; i < DRM_MAX_MINOR; i++) { + fd = open_drm_minor(i); + if (fd < 0) + continue; + + if (j >= ndev || !pipe_loader_drm_probe_fd(&devs[j], fd)) + close(fd); + + j++; + } + + return j; +} + +static void +pipe_loader_drm_release(struct pipe_loader_device **dev) +{ + struct pipe_loader_drm_device *ddev = pipe_loader_drm_device(*dev); + + if (ddev->lib) + util_dl_close(ddev->lib); + + close(ddev->fd); + FREE(ddev); + *dev = NULL; +} + +static struct pipe_screen * +pipe_loader_drm_create_screen(struct pipe_loader_device *dev, + const char *library_paths) +{ + struct pipe_loader_drm_device *ddev = pipe_loader_drm_device(dev); + const struct drm_driver_descriptor *dd; + + if (!ddev->lib) + ddev->lib = pipe_loader_find_module(dev, library_paths); + if (!ddev->lib) + return NULL; + + dd = (const struct drm_driver_descriptor *) + util_dl_get_proc_address(ddev->lib, "driver_descriptor"); + + /* sanity check on the name */ + if (!dd || strcmp(dd->name, ddev->base.driver_name) != 0) + return NULL; + + return dd->create_screen(ddev->fd); +} + +static struct pipe_loader_ops pipe_loader_drm_ops = { + .create_screen = pipe_loader_drm_create_screen, + .release = pipe_loader_drm_release +}; diff --git a/src/gallium/auxiliary/pipe-loader/pipe_loader_priv.h b/src/gallium/auxiliary/pipe-loader/pipe_loader_priv.h new file mode 100644 index 00000000000..0be833a0f3f --- /dev/null +++ b/src/gallium/auxiliary/pipe-loader/pipe_loader_priv.h @@ -0,0 +1,47 @@ +/************************************************************************** + * + * Copyright 2012 Francisco Jerez + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef PIPE_LOADER_PRIV_H +#define PIPE_LOADER_PRIV_H + +#include "pipe_loader.h" + +struct pipe_loader_ops { + struct pipe_screen *(*create_screen)(struct pipe_loader_device *dev, + const char *library_paths); + + void (*release)(struct pipe_loader_device **dev); +}; + +/** + * Open the pipe driver module that handles a specified device. + */ +struct util_dl_library * +pipe_loader_find_module(struct pipe_loader_device *dev, + const char *library_paths); + +#endif /* PIPE_LOADER_PRIV_H */ diff --git a/src/gallium/auxiliary/pipe-loader/pipe_loader_sw.c b/src/gallium/auxiliary/pipe-loader/pipe_loader_sw.c new file mode 100644 index 00000000000..c2b78c636a7 --- /dev/null +++ b/src/gallium/auxiliary/pipe-loader/pipe_loader_sw.c @@ -0,0 +1,107 @@ +/************************************************************************** + * + * Copyright 2012 Francisco Jerez + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include "pipe_loader_priv.h" + +#include "util/u_memory.h" +#include "util/u_dl.h" +#include "sw/null/null_sw_winsys.h" +#include "target-helpers/inline_sw_helper.h" +#include "state_tracker/xlib_sw_winsys.h" + +struct pipe_loader_sw_device { + struct pipe_loader_device base; + struct util_dl_library *lib; + struct sw_winsys *ws; +}; + +#define pipe_loader_sw_device(dev) ((struct pipe_loader_sw_device *)dev) + +static struct pipe_loader_ops pipe_loader_sw_ops; + +static struct sw_winsys *(*backends[])() = { +#ifdef HAVE_WINSYS_XLIB + x11_sw_create, +#endif + null_sw_create +}; + +int +pipe_loader_sw_probe(struct pipe_loader_device **devs, int ndev) +{ + int i; + + for (i = 0; i < Elements(backends); i++) { + if (i < ndev) { + struct pipe_loader_sw_device *sdev = CALLOC_STRUCT(pipe_loader_sw_device); + + sdev->base.type = PIPE_LOADER_DEVICE_SOFTWARE; + sdev->base.driver_name = "swrast"; + sdev->base.ops = &pipe_loader_sw_ops; + sdev->ws = backends[i](); + devs[i] = &sdev->base; + } + } + + return i; +} + +static void +pipe_loader_sw_release(struct pipe_loader_device **dev) +{ + struct pipe_loader_sw_device *sdev = pipe_loader_sw_device(*dev); + + if (sdev->lib) + util_dl_close(sdev->lib); + + FREE(sdev); + *dev = NULL; +} + +static struct pipe_screen * +pipe_loader_sw_create_screen(struct pipe_loader_device *dev, + const char *library_paths) +{ + struct pipe_loader_sw_device *sdev = pipe_loader_sw_device(dev); + struct pipe_screen *(*init)(struct sw_winsys *); + + if (!sdev->lib) + sdev->lib = pipe_loader_find_module(dev, library_paths); + if (!sdev->lib) + return NULL; + + init = (void *)util_dl_get_proc_address(sdev->lib, "swrast_create_screen"); + if (!init) + return NULL; + + return init(sdev->ws); +} + +static struct pipe_loader_ops pipe_loader_sw_ops = { + .create_screen = pipe_loader_sw_create_screen, + .release = pipe_loader_sw_release +}; diff --git a/src/gallium/auxiliary/tgsi/tgsi_build.c b/src/gallium/auxiliary/tgsi/tgsi_build.c index 6ec2b0d8f21..1c24b9bdbed 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_build.c +++ b/src/gallium/auxiliary/tgsi/tgsi_build.c @@ -104,12 +104,11 @@ tgsi_default_declaration( void ) declaration.NrTokens = 1; declaration.File = TGSI_FILE_NULL; declaration.UsageMask = TGSI_WRITEMASK_XYZW; - declaration.Interpolate = TGSI_INTERPOLATE_CONSTANT; + declaration.Interpolate = 0; declaration.Dimension = 0; declaration.Semantic = 0; - declaration.Centroid = 0; declaration.Invariant = 0; - declaration.CylindricalWrap = 0; + declaration.Local = 0; return declaration; } @@ -121,9 +120,8 @@ tgsi_build_declaration( unsigned interpolate, unsigned dimension, unsigned semantic, - unsigned centroid, unsigned invariant, - unsigned cylindrical_wrap, + unsigned local, struct tgsi_header *header ) { struct tgsi_declaration declaration; @@ -137,9 +135,8 @@ tgsi_build_declaration( declaration.Interpolate = interpolate; declaration.Dimension = dimension; declaration.Semantic = semantic; - declaration.Centroid = centroid; declaration.Invariant = invariant; - declaration.CylindricalWrap = cylindrical_wrap; + declaration.Local = local; header_bodysize_grow( header ); @@ -194,6 +191,36 @@ tgsi_build_declaration_dimension(unsigned index_2d, return dd; } +static struct tgsi_declaration_interp +tgsi_default_declaration_interp( void ) +{ + struct tgsi_declaration_interp di; + + di.Interpolate = TGSI_INTERPOLATE_CONSTANT; + di.Centroid = 0; + di.CylindricalWrap = 0; + + return di; +} + +static struct tgsi_declaration_interp +tgsi_build_declaration_interp(unsigned interpolate, + unsigned centroid, + unsigned cylindrical_wrap, + struct tgsi_declaration *declaration, + struct tgsi_header *header) +{ + struct tgsi_declaration_interp di; + + di.Interpolate = interpolate; + di.Centroid = centroid; + di.CylindricalWrap = cylindrical_wrap; + + declaration_grow(declaration, header); + + return di; +} + static struct tgsi_declaration_semantic tgsi_default_declaration_semantic( void ) { @@ -227,42 +254,72 @@ tgsi_build_declaration_semantic( return ds; } - static struct tgsi_declaration_resource tgsi_default_declaration_resource(void) { - struct tgsi_declaration_resource declaration_resource; + struct tgsi_declaration_resource dr; - declaration_resource.Resource = TGSI_TEXTURE_UNKNOWN; - declaration_resource.ReturnTypeX = PIPE_TYPE_UNORM; - declaration_resource.ReturnTypeY = PIPE_TYPE_UNORM; - declaration_resource.ReturnTypeZ = PIPE_TYPE_UNORM; - declaration_resource.ReturnTypeW = PIPE_TYPE_UNORM; + dr.Resource = TGSI_TEXTURE_BUFFER; + dr.Raw = 0; + dr.Writable = 0; - return declaration_resource; + return dr; } static struct tgsi_declaration_resource tgsi_build_declaration_resource(unsigned texture, - unsigned return_type_x, - unsigned return_type_y, - unsigned return_type_z, - unsigned return_type_w, + unsigned raw, + unsigned writable, struct tgsi_declaration *declaration, struct tgsi_header *header) { - struct tgsi_declaration_resource declaration_resource; + struct tgsi_declaration_resource dr; + + dr = tgsi_default_declaration_resource(); + dr.Resource = texture; + dr.Raw = raw; + dr.Writable = writable; + + declaration_grow(declaration, header); + + return dr; +} + +static struct tgsi_declaration_sampler_view +tgsi_default_declaration_sampler_view(void) +{ + struct tgsi_declaration_sampler_view dsv; - declaration_resource = tgsi_default_declaration_resource(); - declaration_resource.Resource = texture; - declaration_resource.ReturnTypeX = return_type_x; - declaration_resource.ReturnTypeY = return_type_y; - declaration_resource.ReturnTypeZ = return_type_z; - declaration_resource.ReturnTypeW = return_type_w; + dsv.Resource = TGSI_TEXTURE_BUFFER; + dsv.ReturnTypeX = PIPE_TYPE_UNORM; + dsv.ReturnTypeY = PIPE_TYPE_UNORM; + dsv.ReturnTypeZ = PIPE_TYPE_UNORM; + dsv.ReturnTypeW = PIPE_TYPE_UNORM; + + return dsv; +} + +static struct tgsi_declaration_sampler_view +tgsi_build_declaration_sampler_view(unsigned texture, + unsigned return_type_x, + unsigned return_type_y, + unsigned return_type_z, + unsigned return_type_w, + struct tgsi_declaration *declaration, + struct tgsi_header *header) +{ + struct tgsi_declaration_sampler_view dsv; + + dsv = tgsi_default_declaration_sampler_view(); + dsv.Resource = texture; + dsv.ReturnTypeX = return_type_x; + dsv.ReturnTypeY = return_type_y; + dsv.ReturnTypeZ = return_type_z; + dsv.ReturnTypeW = return_type_w; declaration_grow(declaration, header); - return declaration_resource; + return dsv; } @@ -274,8 +331,10 @@ tgsi_default_full_declaration( void ) full_declaration.Declaration = tgsi_default_declaration(); full_declaration.Range = tgsi_default_declaration_range(); full_declaration.Semantic = tgsi_default_declaration_semantic(); + full_declaration.Interp = tgsi_default_declaration_interp(); full_declaration.ImmediateData.u = NULL; full_declaration.Resource = tgsi_default_declaration_resource(); + full_declaration.SamplerView = tgsi_default_declaration_sampler_view(); return full_declaration; } @@ -302,9 +361,8 @@ tgsi_build_full_declaration( full_decl->Declaration.Interpolate, full_decl->Declaration.Dimension, full_decl->Declaration.Semantic, - full_decl->Declaration.Centroid, full_decl->Declaration.Invariant, - full_decl->Declaration.CylindricalWrap, + full_decl->Declaration.Local, header ); if (maxsize <= size) @@ -332,6 +390,22 @@ tgsi_build_full_declaration( header); } + if (full_decl->Declaration.Interpolate) { + struct tgsi_declaration_interp *di; + + if (maxsize <= size) { + return 0; + } + di = (struct tgsi_declaration_interp *)&tokens[size]; + size++; + + *di = tgsi_build_declaration_interp(full_decl->Interp.Interpolate, + full_decl->Interp.Centroid, + full_decl->Interp.CylindricalWrap, + declaration, + header); + } + if( full_decl->Declaration.Semantic ) { struct tgsi_declaration_semantic *ds; @@ -375,14 +449,31 @@ tgsi_build_full_declaration( size++; *dr = tgsi_build_declaration_resource(full_decl->Resource.Resource, - full_decl->Resource.ReturnTypeX, - full_decl->Resource.ReturnTypeY, - full_decl->Resource.ReturnTypeZ, - full_decl->Resource.ReturnTypeW, + full_decl->Resource.Raw, + full_decl->Resource.Writable, declaration, header); } + if (full_decl->Declaration.File == TGSI_FILE_SAMPLER_VIEW) { + struct tgsi_declaration_sampler_view *dsv; + + if (maxsize <= size) { + return 0; + } + dsv = (struct tgsi_declaration_sampler_view *)&tokens[size]; + size++; + + *dsv = tgsi_build_declaration_sampler_view( + full_decl->SamplerView.Resource, + full_decl->SamplerView.ReturnTypeX, + full_decl->SamplerView.ReturnTypeY, + full_decl->SamplerView.ReturnTypeZ, + full_decl->SamplerView.ReturnTypeW, + declaration, + header); + } + return size; } @@ -405,11 +496,13 @@ tgsi_default_immediate( void ) static struct tgsi_immediate tgsi_build_immediate( - struct tgsi_header *header ) + struct tgsi_header *header, + unsigned type ) { struct tgsi_immediate immediate; immediate = tgsi_default_immediate(); + immediate.DataType = type; header_bodysize_grow( header ); @@ -442,21 +535,6 @@ immediate_grow( header_bodysize_grow( header ); } -static union tgsi_immediate_data -tgsi_build_immediate_float32( - float value, - struct tgsi_immediate *immediate, - struct tgsi_header *header ) -{ - union tgsi_immediate_data immediate_data; - - immediate_data.Float = value; - - immediate_grow( immediate, header ); - - return immediate_data; -} - unsigned tgsi_build_full_immediate( const struct tgsi_full_immediate *full_imm, @@ -472,7 +550,7 @@ tgsi_build_full_immediate( immediate = (struct tgsi_immediate *) &tokens[size]; size++; - *immediate = tgsi_build_immediate( header ); + *immediate = tgsi_build_immediate( header, full_imm->Immediate.DataType ); assert( full_imm->Immediate.NrTokens <= 4 + 1 ); @@ -481,13 +559,12 @@ tgsi_build_full_immediate( if( maxsize <= size ) return 0; + data = (union tgsi_immediate_data *) &tokens[size]; - size++; + *data = full_imm->u[i]; - *data = tgsi_build_immediate_float32( - full_imm->u[i].Float, - immediate, - header ); + immediate_grow( immediate, header ); + size++; } return size; diff --git a/src/gallium/auxiliary/tgsi/tgsi_dump.c b/src/gallium/auxiliary/tgsi/tgsi_dump.c index 635212b7e86..383c54590af 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_dump.c +++ b/src/gallium/auxiliary/tgsi/tgsi_dump.c @@ -271,6 +271,9 @@ iter_declaration( ctx, decl->Declaration.UsageMask ); + if (decl->Declaration.Local) + TXT( ", LOCAL" ); + if (decl->Declaration.Semantic) { TXT( ", " ); ENM( decl->Semantic.Name, tgsi_semantic_names ); @@ -285,53 +288,64 @@ iter_declaration( if (decl->Declaration.File == TGSI_FILE_RESOURCE) { TXT(", "); ENM(decl->Resource.Resource, tgsi_texture_names); + if (decl->Resource.Writable) + TXT(", WR"); + if (decl->Resource.Raw) + TXT(", RAW"); + } + + if (decl->Declaration.File == TGSI_FILE_SAMPLER_VIEW) { TXT(", "); - if ((decl->Resource.ReturnTypeX == decl->Resource.ReturnTypeY) && - (decl->Resource.ReturnTypeX == decl->Resource.ReturnTypeZ) && - (decl->Resource.ReturnTypeX == decl->Resource.ReturnTypeW)) { - ENM(decl->Resource.ReturnTypeX, tgsi_type_names); + ENM(decl->SamplerView.Resource, tgsi_texture_names); + TXT(", "); + if ((decl->SamplerView.ReturnTypeX == decl->SamplerView.ReturnTypeY) && + (decl->SamplerView.ReturnTypeX == decl->SamplerView.ReturnTypeZ) && + (decl->SamplerView.ReturnTypeX == decl->SamplerView.ReturnTypeW)) { + ENM(decl->SamplerView.ReturnTypeX, tgsi_type_names); } else { - ENM(decl->Resource.ReturnTypeX, tgsi_type_names); + ENM(decl->SamplerView.ReturnTypeX, tgsi_type_names); TXT(", "); - ENM(decl->Resource.ReturnTypeY, tgsi_type_names); + ENM(decl->SamplerView.ReturnTypeY, tgsi_type_names); TXT(", "); - ENM(decl->Resource.ReturnTypeZ, tgsi_type_names); + ENM(decl->SamplerView.ReturnTypeZ, tgsi_type_names); TXT(", "); - ENM(decl->Resource.ReturnTypeW, tgsi_type_names); + ENM(decl->SamplerView.ReturnTypeW, tgsi_type_names); } - } - if (iter->processor.Processor == TGSI_PROCESSOR_FRAGMENT && - decl->Declaration.File == TGSI_FILE_INPUT) - { - TXT( ", " ); - ENM( decl->Declaration.Interpolate, tgsi_interpolate_names ); - } + if (decl->Declaration.Interpolate) { + if (iter->processor.Processor == TGSI_PROCESSOR_FRAGMENT && + decl->Declaration.File == TGSI_FILE_INPUT) + { + TXT( ", " ); + ENM( decl->Interp.Interpolate, tgsi_interpolate_names ); + } + + if (decl->Interp.Centroid) { + TXT( ", CENTROID" ); + } - if (decl->Declaration.Centroid) { - TXT( ", CENTROID" ); + if (decl->Interp.CylindricalWrap) { + TXT(", CYLWRAP_"); + if (decl->Interp.CylindricalWrap & TGSI_CYLINDRICAL_WRAP_X) { + CHR('X'); + } + if (decl->Interp.CylindricalWrap & TGSI_CYLINDRICAL_WRAP_Y) { + CHR('Y'); + } + if (decl->Interp.CylindricalWrap & TGSI_CYLINDRICAL_WRAP_Z) { + CHR('Z'); + } + if (decl->Interp.CylindricalWrap & TGSI_CYLINDRICAL_WRAP_W) { + CHR('W'); + } + } } if (decl->Declaration.Invariant) { TXT( ", INVARIANT" ); } - if (decl->Declaration.CylindricalWrap) { - TXT(", CYLWRAP_"); - if (decl->Declaration.CylindricalWrap & TGSI_CYLINDRICAL_WRAP_X) { - CHR('X'); - } - if (decl->Declaration.CylindricalWrap & TGSI_CYLINDRICAL_WRAP_Y) { - CHR('Y'); - } - if (decl->Declaration.CylindricalWrap & TGSI_CYLINDRICAL_WRAP_Z) { - CHR('Z'); - } - if (decl->Declaration.CylindricalWrap & TGSI_CYLINDRICAL_WRAP_W) { - CHR('W'); - } - } if (decl->Declaration.File == TGSI_FILE_IMMEDIATE_ARRAY) { unsigned i; diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.c b/src/gallium/auxiliary/tgsi/tgsi_exec.c index c4ad34b1e61..5e23f5da65b 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_exec.c +++ b/src/gallium/auxiliary/tgsi/tgsi_exec.c @@ -2121,7 +2121,7 @@ exec_sample(struct tgsi_exec_machine *mach, control = tgsi_sampler_lod_bias; } - switch (mach->Resources[resource_unit].Resource) { + switch (mach->SamplerViews[resource_unit].Resource) { case TGSI_TEXTURE_1D: case TGSI_TEXTURE_SHADOW1D: FETCH(&r[0], 0, TGSI_CHAN_X); @@ -2215,7 +2215,7 @@ exec_sample_d(struct tgsi_exec_machine *mach, * XXX: This is fake SAMPLE_D -- the derivatives are not taken into account, yet. */ - switch (mach->Resources[resource_unit].Resource) { + switch (mach->SamplerViews[resource_unit].Resource) { case TGSI_TEXTURE_1D: case TGSI_TEXTURE_SHADOW1D: @@ -2338,8 +2338,8 @@ static void exec_declaration(struct tgsi_exec_machine *mach, const struct tgsi_full_declaration *decl) { - if (decl->Declaration.File == TGSI_FILE_RESOURCE) { - mach->Resources[decl->Range.First] = decl->Resource; + if (decl->Declaration.File == TGSI_FILE_SAMPLER_VIEW) { + mach->SamplerViews[decl->Range.First] = decl->SamplerView; return; } @@ -2371,7 +2371,7 @@ exec_declaration(struct tgsi_exec_machine *mach, eval_coef_func eval; uint i, j; - switch (decl->Declaration.Interpolate) { + switch (decl->Interp.Interpolate) { case TGSI_INTERPOLATE_CONSTANT: eval = eval_constant_coef; break; @@ -4154,11 +4154,11 @@ exec_instruction( exec_endswitch(mach); break; - case TGSI_OPCODE_LOAD: + case TGSI_OPCODE_SAMPLE_I: assert(0); break; - case TGSI_OPCODE_LOAD_MS: + case TGSI_OPCODE_SAMPLE_I_MS: assert(0); break; @@ -4190,7 +4190,7 @@ exec_instruction( assert(0); break; - case TGSI_OPCODE_RESINFO: + case TGSI_OPCODE_SVIEWINFO: assert(0); break; diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.h b/src/gallium/auxiliary/tgsi/tgsi_exec.h index d9e93ce138d..0ecb4e952bb 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_exec.h +++ b/src/gallium/auxiliary/tgsi/tgsi_exec.h @@ -331,7 +331,8 @@ struct tgsi_exec_machine struct tgsi_full_declaration *Declarations; uint NumDeclarations; - struct tgsi_declaration_resource Resources[PIPE_MAX_SHADER_RESOURCES]; + struct tgsi_declaration_sampler_view + SamplerViews[PIPE_MAX_SHADER_SAMPLER_VIEWS]; boolean UsedGeometryShader; }; diff --git a/src/gallium/auxiliary/tgsi/tgsi_info.c b/src/gallium/auxiliary/tgsi/tgsi_info.c index 81df96b3c7a..8bf9aeb4284 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_info.c +++ b/src/gallium/auxiliary/tgsi/tgsi_info.c @@ -183,22 +183,39 @@ static const struct tgsi_opcode_info opcode_info[TGSI_OPCODE_LAST] = { 0, 0, 0, 0, 0, 0, NONE, "DEFAULT", TGSI_OPCODE_DEFAULT }, { 0, 0, 0, 0, 0, 0, NONE, "ENDSWITCH", TGSI_OPCODE_ENDSWITCH }, - { 1, 2, 0, 0, 0, 0, OTHR, "LOAD", TGSI_OPCODE_LOAD }, - { 1, 2, 0, 0, 0, 0, OTHR, "LOAD_MS", TGSI_OPCODE_LOAD_MS }, { 1, 3, 0, 0, 0, 0, OTHR, "SAMPLE", TGSI_OPCODE_SAMPLE }, + { 1, 2, 0, 0, 0, 0, OTHR, "SAMPLE_I", TGSI_OPCODE_SAMPLE_I }, + { 1, 2, 0, 0, 0, 0, OTHR, "SAMPLE_I_MS", TGSI_OPCODE_SAMPLE_I_MS }, { 1, 4, 0, 0, 0, 0, OTHR, "SAMPLE_B", TGSI_OPCODE_SAMPLE_B }, { 1, 4, 0, 0, 0, 0, OTHR, "SAMPLE_C", TGSI_OPCODE_SAMPLE_C }, { 1, 4, 0, 0, 0, 0, OTHR, "SAMPLE_C_LZ", TGSI_OPCODE_SAMPLE_C_LZ }, { 1, 5, 0, 0, 0, 0, OTHR, "SAMPLE_D", TGSI_OPCODE_SAMPLE_D }, { 1, 3, 0, 0, 0, 0, OTHR, "SAMPLE_L", TGSI_OPCODE_SAMPLE_L }, { 1, 3, 0, 0, 0, 0, OTHR, "GATHER4", TGSI_OPCODE_GATHER4 }, - { 1, 2, 0, 0, 0, 0, OTHR, "RESINFO", TGSI_OPCODE_RESINFO }, + { 1, 2, 0, 0, 0, 0, OTHR, "SVIEWINFO", TGSI_OPCODE_SVIEWINFO }, { 1, 2, 0, 0, 0, 0, OTHR, "SAMPLE_POS", TGSI_OPCODE_SAMPLE_POS }, { 1, 2, 0, 0, 0, 0, OTHR, "SAMPLE_INFO", TGSI_OPCODE_SAMPLE_INFO }, { 1, 1, 0, 0, 0, 0, COMP, "UARL", TGSI_OPCODE_UARL }, { 1, 3, 0, 0, 0, 0, COMP, "UCMP", TGSI_OPCODE_UCMP }, { 1, 1, 0, 0, 0, 0, COMP, "IABS", TGSI_OPCODE_IABS }, { 1, 1, 0, 0, 0, 0, COMP, "ISSG", TGSI_OPCODE_ISSG }, + { 1, 2, 0, 0, 0, 0, OTHR, "LOAD", TGSI_OPCODE_LOAD }, + { 1, 2, 0, 0, 0, 0, OTHR, "STORE", TGSI_OPCODE_STORE }, + { 1, 0, 0, 0, 0, 0, OTHR, "MFENCE", TGSI_OPCODE_MFENCE }, + { 1, 0, 0, 0, 0, 0, OTHR, "LFENCE", TGSI_OPCODE_LFENCE }, + { 1, 0, 0, 0, 0, 0, OTHR, "SFENCE", TGSI_OPCODE_SFENCE }, + { 0, 0, 0, 0, 0, 0, OTHR, "BARRIER", TGSI_OPCODE_BARRIER }, + + { 1, 3, 0, 0, 0, 0, OTHR, "ATOMUADD", TGSI_OPCODE_ATOMUADD }, + { 1, 3, 0, 0, 0, 0, OTHR, "ATOMXCHG", TGSI_OPCODE_ATOMXCHG }, + { 1, 4, 0, 0, 0, 0, OTHR, "ATOMCAS", TGSI_OPCODE_ATOMCAS }, + { 1, 3, 0, 0, 0, 0, OTHR, "ATOMAND", TGSI_OPCODE_ATOMAND }, + { 1, 3, 0, 0, 0, 0, OTHR, "ATOMOR", TGSI_OPCODE_ATOMOR }, + { 1, 3, 0, 0, 0, 0, OTHR, "ATOMXOR", TGSI_OPCODE_ATOMXOR }, + { 1, 3, 0, 0, 0, 0, OTHR, "ATOMUMIN", TGSI_OPCODE_ATOMUMIN }, + { 1, 3, 0, 0, 0, 0, OTHR, "ATOMUMAX", TGSI_OPCODE_ATOMUMAX }, + { 1, 3, 0, 0, 0, 0, OTHR, "ATOMIMIN", TGSI_OPCODE_ATOMIMIN }, + { 1, 3, 0, 0, 0, 0, OTHR, "ATOMIMAX", TGSI_OPCODE_ATOMIMAX } }; const struct tgsi_opcode_info * diff --git a/src/gallium/auxiliary/tgsi/tgsi_opcode_tmp.h b/src/gallium/auxiliary/tgsi/tgsi_opcode_tmp.h index b5d4504425b..96b864f50d0 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_opcode_tmp.h +++ b/src/gallium/auxiliary/tgsi/tgsi_opcode_tmp.h @@ -168,16 +168,16 @@ OP01(CASE) OP00(DEFAULT) OP00(ENDSWITCH) -OP12(LOAD) -OP12(LOAD_MS) OP13(SAMPLE) +OP12(SAMPLE_I) +OP12(SAMPLE_I_MS) OP14(SAMPLE_B) OP14(SAMPLE_C) OP14(SAMPLE_C_LZ) OP15(SAMPLE_D) OP13(SAMPLE_L) OP13(GATHER4) -OP12(RESINFO) +OP12(SVIEWINFO) OP13(SAMPLE_POS) OP12(SAMPLE_INFO) diff --git a/src/gallium/auxiliary/tgsi/tgsi_parse.c b/src/gallium/auxiliary/tgsi/tgsi_parse.c index e1902eb1862..45c5c41ec82 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_parse.c +++ b/src/gallium/auxiliary/tgsi/tgsi_parse.c @@ -113,6 +113,10 @@ tgsi_parse_token( next_token(ctx, &decl->Dim); } + if( decl->Declaration.Interpolate ) { + next_token( ctx, &decl->Interp ); + } + if( decl->Declaration.Semantic ) { next_token( ctx, &decl->Semantic ); } @@ -132,6 +136,10 @@ tgsi_parse_token( next_token(ctx, &decl->Resource); } + if (decl->Declaration.File == TGSI_FILE_SAMPLER_VIEW) { + next_token(ctx, &decl->SamplerView); + } + break; } diff --git a/src/gallium/auxiliary/tgsi/tgsi_parse.h b/src/gallium/auxiliary/tgsi/tgsi_parse.h index f7b7e6edc98..3f8bf99e3c1 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_parse.h +++ b/src/gallium/auxiliary/tgsi/tgsi_parse.h @@ -67,9 +67,11 @@ struct tgsi_full_declaration struct tgsi_declaration Declaration; struct tgsi_declaration_range Range; struct tgsi_declaration_dimension Dim; + struct tgsi_declaration_interp Interp; struct tgsi_declaration_semantic Semantic; struct tgsi_immediate_array_data ImmediateData; struct tgsi_declaration_resource Resource; + struct tgsi_declaration_sampler_view SamplerView; }; struct tgsi_full_immediate diff --git a/src/gallium/auxiliary/tgsi/tgsi_ppc.c b/src/gallium/auxiliary/tgsi/tgsi_ppc.c index 1e00e2e54ed..ce728ecee06 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_ppc.c +++ b/src/gallium/auxiliary/tgsi/tgsi_ppc.c @@ -1170,7 +1170,7 @@ emit_declaration( for( i = first; i <= last; i++ ) { for( j = 0; j < NUM_CHANNELS; j++ ) { if( mask & (1 << j) ) { - switch( decl->Declaration.Interpolate ) { + switch( decl->Interp.Interpolate ) { case TGSI_INTERPOLATE_CONSTANT: emit_coef_a0( func, 0, i, j ); emit_inputs( func, 0, i, j ); diff --git a/src/gallium/auxiliary/tgsi/tgsi_scan.c b/src/gallium/auxiliary/tgsi/tgsi_scan.c index e4e9c032e02..df299baa9c1 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_scan.c +++ b/src/gallium/auxiliary/tgsi/tgsi_scan.c @@ -68,7 +68,8 @@ tgsi_scan_shader(const struct tgsi_token *tokens, procType = parse.FullHeader.Processor.Processor; assert(procType == TGSI_PROCESSOR_FRAGMENT || procType == TGSI_PROCESSOR_VERTEX || - procType == TGSI_PROCESSOR_GEOMETRY); + procType == TGSI_PROCESSOR_GEOMETRY || + procType == TGSI_PROCESSOR_COMPUTE); /** @@ -157,9 +158,9 @@ tgsi_scan_shader(const struct tgsi_token *tokens, if (file == TGSI_FILE_INPUT) { info->input_semantic_name[reg] = (ubyte)fulldecl->Semantic.Name; info->input_semantic_index[reg] = (ubyte)fulldecl->Semantic.Index; - info->input_interpolate[reg] = (ubyte)fulldecl->Declaration.Interpolate; - info->input_centroid[reg] = (ubyte)fulldecl->Declaration.Centroid; - info->input_cylindrical_wrap[reg] = (ubyte)fulldecl->Declaration.CylindricalWrap; + info->input_interpolate[reg] = (ubyte)fulldecl->Interp.Interpolate; + info->input_centroid[reg] = (ubyte)fulldecl->Interp.Centroid; + info->input_cylindrical_wrap[reg] = (ubyte)fulldecl->Interp.CylindricalWrap; info->num_inputs++; if (procType == TGSI_PROCESSOR_FRAGMENT && diff --git a/src/gallium/auxiliary/tgsi/tgsi_strings.c b/src/gallium/auxiliary/tgsi/tgsi_strings.c index 520452ce1b8..b5fd1fc0a65 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_strings.c +++ b/src/gallium/auxiliary/tgsi/tgsi_strings.c @@ -32,11 +32,12 @@ #include "tgsi_strings.h" -const char *tgsi_processor_type_names[3] = +const char *tgsi_processor_type_names[4] = { "FRAG", "VERT", - "GEOM" + "GEOM", + "COMP" }; const char *tgsi_file_names[TGSI_FILE_COUNT] = @@ -53,7 +54,8 @@ const char *tgsi_file_names[TGSI_FILE_COUNT] = "SV", "IMMX", "TEMPX", - "RES" + "RES", + "SVIEW" }; const char *tgsi_semantic_names[TGSI_SEMANTIC_COUNT] = @@ -72,12 +74,16 @@ const char *tgsi_semantic_names[TGSI_SEMANTIC_COUNT] = "VERTEXID", "STENCIL", "CLIPDIST", - "CLIPVERTEX" + "CLIPVERTEX", + "GRID_SIZE", + "BLOCK_ID", + "BLOCK_SIZE", + "THREAD_ID" }; const char *tgsi_texture_names[TGSI_TEXTURE_COUNT] = { - "UNKNOWN", + "BUFFER", "1D", "2D", "3D", @@ -90,7 +96,8 @@ const char *tgsi_texture_names[TGSI_TEXTURE_COUNT] = "2DARRAY", "SHADOW1DARRAY", "SHADOW2DARRAY", - "SHADOWCUBE" + "SHADOWCUBE", + "UNKNOWN" }; const char *tgsi_property_names[TGSI_PROPERTY_COUNT] = diff --git a/src/gallium/auxiliary/tgsi/tgsi_strings.h b/src/gallium/auxiliary/tgsi/tgsi_strings.h index 0946a58f135..5c57e229c28 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_strings.h +++ b/src/gallium/auxiliary/tgsi/tgsi_strings.h @@ -38,7 +38,7 @@ extern "C" { #endif -extern const char *tgsi_processor_type_names[3]; +extern const char *tgsi_processor_type_names[4]; extern const char *tgsi_file_names[TGSI_FILE_COUNT]; diff --git a/src/gallium/auxiliary/tgsi/tgsi_text.c b/src/gallium/auxiliary/tgsi/tgsi_text.c index 4b3d22c3072..279a046e202 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_text.c +++ b/src/gallium/auxiliary/tgsi/tgsi_text.c @@ -132,6 +132,23 @@ static boolean parse_uint( const char **pcur, uint *val ) return FALSE; } +static boolean parse_int( const char **pcur, int *val ) +{ + const char *cur = *pcur; + int sign = (*cur == '-' ? -1 : 1); + + if (*cur == '+' || *cur == '-') + cur++; + + if (parse_uint(&cur, (uint *)val)) { + *val *= sign; + *pcur = cur; + return TRUE; + } + + return FALSE; +} + static boolean parse_identifier( const char **pcur, char *ret ) { const char *cur = *pcur; @@ -238,6 +255,8 @@ static boolean parse_header( struct translate_ctx *ctx ) processor = TGSI_PROCESSOR_VERTEX; else if (str_match_no_case( &ctx->cur, "GEOM" )) processor = TGSI_PROCESSOR_GEOMETRY; + else if (str_match_no_case( &ctx->cur, "COMP" )) + processor = TGSI_PROCESSOR_COMPUTE; else { report_error( ctx, "Unknown header" ); return FALSE; @@ -447,24 +466,10 @@ parse_register_bracket( eat_opt_white(&ctx->cur); } - if (*ctx->cur == '+' || *ctx->cur == '-') { - boolean negate; - - negate = *ctx->cur == '-'; - ctx->cur++; - eat_opt_white( &ctx->cur ); - if (!parse_uint( &ctx->cur, &uindex )) { - report_error( ctx, "Expected literal unsigned integer" ); - return FALSE; - } - if (negate) - brackets->index = -(int) uindex; - else - brackets->index = (int) uindex; - } - else { + if (*ctx->cur == '+' || *ctx->cur == '-') + parse_int( &ctx->cur, &brackets->index ); + else brackets->index = 0; - } } else { if (!parse_uint( &ctx->cur, &uindex )) { @@ -819,6 +824,7 @@ parse_instruction( uint saturate = TGSI_SAT_NONE; const struct tgsi_opcode_info *info; struct tgsi_full_instruction inst; + const char *cur; uint advance; inst = tgsi_default_full_instruction(); @@ -864,7 +870,7 @@ parse_instruction( */ eat_opt_white( &ctx->cur ); for (i = 0; i < TGSI_OPCODE_LAST; i++) { - const char *cur = ctx->cur; + cur = ctx->cur; info = tgsi_get_opcode_info( i ); if (match_inst_mnemonic(&cur, info)) { @@ -938,22 +944,20 @@ parse_instruction( } } - if (info->is_branch) { + cur = ctx->cur; + eat_opt_white( &cur ); + if (info->is_branch && *cur == ':') { uint target; - eat_opt_white( &ctx->cur ); - if (*ctx->cur != ':') { - report_error( ctx, "Expected `:'" ); - return FALSE; - } - ctx->cur++; - eat_opt_white( &ctx->cur ); - if (!parse_uint( &ctx->cur, &target )) { + cur++; + eat_opt_white( &cur ); + if (!parse_uint( &cur, &target )) { report_error( ctx, "Expected a label" ); return FALSE; } inst.Instruction.Label = 1; inst.Label.Label = target; + ctx->cur = cur; } advance = tgsi_build_full_instruction( @@ -970,10 +974,11 @@ parse_instruction( /* parses a 4-touple of the form {x, y, z, w} * where x, y, z, w are numbers */ -static boolean parse_immediate_data(struct translate_ctx *ctx, - float *values) +static boolean parse_immediate_data(struct translate_ctx *ctx, unsigned type, + union tgsi_immediate_data *values) { unsigned i; + int ret; eat_opt_white( &ctx->cur ); if (*ctx->cur != '{') { @@ -991,8 +996,21 @@ static boolean parse_immediate_data(struct translate_ctx *ctx, ctx->cur++; eat_opt_white( &ctx->cur ); } - if (!parse_float( &ctx->cur, &values[i] )) { - report_error( ctx, "Expected literal floating point" ); + + switch (type) { + case TGSI_IMM_FLOAT32: + ret = parse_float(&ctx->cur, &values[i].Float); + break; + case TGSI_IMM_UINT32: + ret = parse_uint(&ctx->cur, &values[i].Uint); + break; + case TGSI_IMM_INT32: + ret = parse_int(&ctx->cur, &values[i].Int); + break; + } + + if (!ret) { + report_error( ctx, "Expected immediate constant" ); return FALSE; } } @@ -1013,7 +1031,7 @@ static boolean parse_declaration( struct translate_ctx *ctx ) struct parsed_dcl_bracket brackets[2]; int num_brackets; uint writemask; - const char *cur; + const char *cur, *cur2; uint advance; boolean is_vs_input; boolean is_imm_array; @@ -1066,6 +1084,42 @@ static boolean parse_declaration( struct translate_ctx *ctx ) report_error(ctx, "Expected texture target"); return FALSE; } + + cur2 = cur; + eat_opt_white(&cur2); + while (*cur2 == ',') { + cur2++; + eat_opt_white(&cur2); + if (str_match_no_case(&cur2, "RAW") && + !is_digit_alpha_underscore(cur2)) { + decl.Resource.Raw = 1; + + } else if (str_match_no_case(&cur2, "WR") && + !is_digit_alpha_underscore(cur2)) { + decl.Resource.Writable = 1; + + } else { + break; + } + cur = cur2; + eat_opt_white(&cur2); + } + + ctx->cur = cur; + + } else if (file == TGSI_FILE_SAMPLER_VIEW) { + for (i = 0; i < TGSI_TEXTURE_COUNT; i++) { + if (str_match_no_case(&cur, tgsi_texture_names[i])) { + if (!is_digit_alpha_underscore(cur)) { + decl.SamplerView.Resource = i; + break; + } + } + } + if (i == TGSI_TEXTURE_COUNT) { + report_error(ctx, "Expected texture target"); + return FALSE; + } eat_opt_white( &cur ); if (*cur != ',') { report_error( ctx, "Expected `,'" ); @@ -1079,16 +1133,16 @@ static boolean parse_declaration( struct translate_ctx *ctx ) if (!is_digit_alpha_underscore(cur)) { switch (j) { case 0: - decl.Resource.ReturnTypeX = i; + decl.SamplerView.ReturnTypeX = i; break; case 1: - decl.Resource.ReturnTypeY = i; + decl.SamplerView.ReturnTypeY = i; break; case 2: - decl.Resource.ReturnTypeZ = i; + decl.SamplerView.ReturnTypeZ = i; break; case 3: - decl.Resource.ReturnTypeW = i; + decl.SamplerView.ReturnTypeW = i; break; default: assert(0); @@ -1104,7 +1158,7 @@ static boolean parse_declaration( struct translate_ctx *ctx ) } break; } else { - const char *cur2 = cur; + cur2 = cur; eat_opt_white( &cur2 ); if (*cur2 == ',') { cur2++; @@ -1116,51 +1170,64 @@ static boolean parse_declaration( struct translate_ctx *ctx ) } } if (j < 4) { - decl.Resource.ReturnTypeY = - decl.Resource.ReturnTypeZ = - decl.Resource.ReturnTypeW = - decl.Resource.ReturnTypeX; + decl.SamplerView.ReturnTypeY = + decl.SamplerView.ReturnTypeZ = + decl.SamplerView.ReturnTypeW = + decl.SamplerView.ReturnTypeX; } ctx->cur = cur; } else { - for (i = 0; i < TGSI_SEMANTIC_COUNT; i++) { - if (str_match_no_case( &cur, tgsi_semantic_names[i] )) { - const char *cur2 = cur; - uint index; + if (str_match_no_case(&cur, "LOCAL") && + !is_digit_alpha_underscore(cur)) { + decl.Declaration.Local = 1; + ctx->cur = cur; + } - if (is_digit_alpha_underscore( cur )) - continue; - eat_opt_white( &cur2 ); - if (*cur2 == '[') { - cur2++; - eat_opt_white( &cur2 ); - if (!parse_uint( &cur2, &index )) { - report_error( ctx, "Expected literal integer" ); - return FALSE; - } + cur = ctx->cur; + eat_opt_white( &cur ); + if (*cur == ',') { + cur++; + eat_opt_white( &cur ); + + for (i = 0; i < TGSI_SEMANTIC_COUNT; i++) { + if (str_match_no_case( &cur, tgsi_semantic_names[i] )) { + uint index; + + if (is_digit_alpha_underscore( cur )) + continue; + cur2 = cur; eat_opt_white( &cur2 ); - if (*cur2 != ']') { - report_error( ctx, "Expected `]'" ); - return FALSE; - } - cur2++; + if (*cur2 == '[') { + cur2++; + eat_opt_white( &cur2 ); + if (!parse_uint( &cur2, &index )) { + report_error( ctx, "Expected literal integer" ); + return FALSE; + } + eat_opt_white( &cur2 ); + if (*cur2 != ']') { + report_error( ctx, "Expected `]'" ); + return FALSE; + } + cur2++; - decl.Semantic.Index = index; + decl.Semantic.Index = index; - cur = cur2; - } + cur = cur2; + } - decl.Declaration.Semantic = 1; - decl.Semantic.Name = i; + decl.Declaration.Semantic = 1; + decl.Semantic.Name = i; - ctx->cur = cur; - break; + ctx->cur = cur; + break; + } } } } } else if (is_imm_array) { unsigned i; - float *vals_itr; + union tgsi_immediate_data *vals_itr; /* we have our immediate data */ if (*cur != '{') { report_error( ctx, "Immediate array without data" ); @@ -1172,9 +1239,9 @@ static boolean parse_declaration( struct translate_ctx *ctx ) decl.ImmediateData.u = MALLOC(sizeof(union tgsi_immediate_data) * 4 * (decl.Range.Last + 1)); - vals_itr = (float*)decl.ImmediateData.u; + vals_itr = decl.ImmediateData.u; for (i = 0; i <= decl.Range.Last; ++i) { - if (!parse_immediate_data(ctx, vals_itr)) { + if (!parse_immediate_data(ctx, TGSI_IMM_FLOAT32, vals_itr)) { FREE(decl.ImmediateData.u); return FALSE; } @@ -1209,7 +1276,8 @@ static boolean parse_declaration( struct translate_ctx *ctx ) if (str_match_no_case( &cur, tgsi_interpolate_names[i] )) { if (is_digit_alpha_underscore( cur )) continue; - decl.Declaration.Interpolate = i; + decl.Declaration.Interpolate = 1; + decl.Interp.Interpolate = i; ctx->cur = cur; break; @@ -1240,28 +1308,27 @@ static boolean parse_declaration( struct translate_ctx *ctx ) static boolean parse_immediate( struct translate_ctx *ctx ) { struct tgsi_full_immediate imm; - float values[4]; uint advance; + int type; if (!eat_white( &ctx->cur )) { report_error( ctx, "Syntax error" ); return FALSE; } - if (!str_match_no_case( &ctx->cur, "FLT32" ) || - is_digit_alpha_underscore( ctx->cur )) { - report_error( ctx, "Expected `FLT32'" ); + for (type = 0; type < Elements(tgsi_immediate_type_names); ++type) { + if (str_match_no_case(&ctx->cur, tgsi_immediate_type_names[type]) && + !is_digit_alpha_underscore(ctx->cur)) + break; + } + if (type == Elements(tgsi_immediate_type_names)) { + report_error( ctx, "Expected immediate type" ); return FALSE; } - parse_immediate_data(ctx, values); - imm = tgsi_default_full_immediate(); imm.Immediate.NrTokens += 4; - imm.Immediate.DataType = TGSI_IMM_FLOAT32; - imm.u[0].Float = values[0]; - imm.u[1].Float = values[1]; - imm.u[2].Float = values[2]; - imm.u[3].Float = values[3]; + imm.Immediate.DataType = type; + parse_immediate_data(ctx, type, imm.u); advance = tgsi_build_full_immediate( &imm, diff --git a/src/gallium/auxiliary/tgsi/tgsi_ureg.c b/src/gallium/auxiliary/tgsi/tgsi_ureg.c index 0f9aa3ab43a..e427585db19 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_ureg.c +++ b/src/gallium/auxiliary/tgsi/tgsi_ureg.c @@ -36,6 +36,7 @@ #include "util/u_debug.h" #include "util/u_memory.h" #include "util/u_math.h" +#include "util/u_bitmask.h" union tgsi_any_token { struct tgsi_header header; @@ -46,8 +47,9 @@ union tgsi_any_token { struct tgsi_declaration decl; struct tgsi_declaration_range decl_range; struct tgsi_declaration_dimension decl_dim; + struct tgsi_declaration_interp decl_interp; struct tgsi_declaration_semantic decl_semantic; - struct tgsi_declaration_resource decl_resource; + struct tgsi_declaration_sampler_view decl_sampler_view; struct tgsi_immediate imm; union tgsi_immediate_data imm_data; struct tgsi_instruction insn; @@ -74,7 +76,6 @@ struct ureg_tokens { #define UREG_MAX_OUTPUT PIPE_MAX_ATTRIBS #define UREG_MAX_CONSTANT_RANGE 32 #define UREG_MAX_IMMEDIATE 256 -#define UREG_MAX_TEMP 256 #define UREG_MAX_ADDR 2 #define UREG_MAX_PRED 1 @@ -147,10 +148,11 @@ struct ureg_program unsigned return_type_y; unsigned return_type_z; unsigned return_type_w; - } resource[PIPE_MAX_SHADER_RESOURCES]; - unsigned nr_resources; + } sampler_view[PIPE_MAX_SHADER_SAMPLER_VIEWS]; + unsigned nr_sampler_views; - unsigned temps_active[UREG_MAX_TEMP / 32]; + struct util_bitmask *free_temps; + struct util_bitmask *local_temps; unsigned nr_temps; struct const_decl const_decls; @@ -529,43 +531,48 @@ out: return ureg_src_register(TGSI_FILE_CONSTANT, index); } - -/* Allocate a new temporary. Temporaries greater than UREG_MAX_TEMP - * are legal, but will not be released. - */ -struct ureg_dst ureg_DECL_temporary( struct ureg_program *ureg ) +static struct ureg_dst alloc_temporary( struct ureg_program *ureg, + boolean local ) { unsigned i; - for (i = 0; i < UREG_MAX_TEMP; i += 32) { - int bit = ffs(~ureg->temps_active[i/32]); - if (bit != 0) { - i += bit - 1; - goto out; - } + /* Look for a released temporary. + */ + for (i = util_bitmask_get_first_index(ureg->free_temps); + i != UTIL_BITMASK_INVALID_INDEX; + i = util_bitmask_get_next_index(ureg->free_temps, i + 1)) { + if (util_bitmask_get(ureg->local_temps, i) == local) + break; } - /* No reusable temps, so allocate a new one: + /* Or allocate a new one. */ - i = ureg->nr_temps++; + if (i == UTIL_BITMASK_INVALID_INDEX) + i = ureg->nr_temps++; -out: - if (i < UREG_MAX_TEMP) - ureg->temps_active[i/32] |= 1 << (i % 32); + util_bitmask_clear(ureg->free_temps, i); - if (i >= ureg->nr_temps) - ureg->nr_temps = i + 1; + if (local) + util_bitmask_set(ureg->local_temps, i); return ureg_dst_register( TGSI_FILE_TEMPORARY, i ); } +struct ureg_dst ureg_DECL_temporary( struct ureg_program *ureg ) +{ + return alloc_temporary(ureg, FALSE); +} + +struct ureg_dst ureg_DECL_local_temporary( struct ureg_program *ureg ) +{ + return alloc_temporary(ureg, TRUE); +} void ureg_release_temporary( struct ureg_program *ureg, struct ureg_dst tmp ) { if(tmp.File == TGSI_FILE_TEMPORARY) - if (tmp.Index < UREG_MAX_TEMP) - ureg->temps_active[tmp.Index/32] &= ~(1 << (tmp.Index % 32)); + util_bitmask_set(ureg->free_temps, tmp.Index); } @@ -615,34 +622,34 @@ struct ureg_src ureg_DECL_sampler( struct ureg_program *ureg, } /* - * Allocate a new shader resource. + * Allocate a new shader sampler view. */ struct ureg_src -ureg_DECL_resource(struct ureg_program *ureg, - unsigned index, - unsigned target, - unsigned return_type_x, - unsigned return_type_y, - unsigned return_type_z, - unsigned return_type_w) +ureg_DECL_sampler_view(struct ureg_program *ureg, + unsigned index, + unsigned target, + unsigned return_type_x, + unsigned return_type_y, + unsigned return_type_z, + unsigned return_type_w) { - struct ureg_src reg = ureg_src_register(TGSI_FILE_RESOURCE, index); + struct ureg_src reg = ureg_src_register(TGSI_FILE_SAMPLER_VIEW, index); uint i; - for (i = 0; i < ureg->nr_resources; i++) { - if (ureg->resource[i].index == index) { + for (i = 0; i < ureg->nr_sampler_views; i++) { + if (ureg->sampler_view[i].index == index) { return reg; } } - if (i < PIPE_MAX_SHADER_RESOURCES) { - ureg->resource[i].index = index; - ureg->resource[i].target = target; - ureg->resource[i].return_type_x = return_type_x; - ureg->resource[i].return_type_y = return_type_y; - ureg->resource[i].return_type_z = return_type_z; - ureg->resource[i].return_type_w = return_type_w; - ureg->nr_resources++; + if (i < PIPE_MAX_SHADER_SAMPLER_VIEWS) { + ureg->sampler_view[i].index = index; + ureg->sampler_view[i].target = target; + ureg->sampler_view[i].return_type_x = return_type_x; + ureg->sampler_view[i].return_type_y = return_type_y; + ureg->sampler_view[i].return_type_z = return_type_z; + ureg->sampler_view[i].return_type_w = return_type_w; + ureg->nr_sampler_views++; return reg; } @@ -891,7 +898,7 @@ ureg_emit_dst( struct ureg_program *ureg, assert(dst.File != TGSI_FILE_CONSTANT); assert(dst.File != TGSI_FILE_INPUT); assert(dst.File != TGSI_FILE_SAMPLER); - assert(dst.File != TGSI_FILE_RESOURCE); + assert(dst.File != TGSI_FILE_SAMPLER_VIEW); assert(dst.File != TGSI_FILE_IMMEDIATE); assert(dst.File < TGSI_FILE_COUNT); @@ -1229,28 +1236,50 @@ emit_decl_fs(struct ureg_program *ureg, unsigned cylindrical_wrap, unsigned centroid) { - union tgsi_any_token *out = get_tokens(ureg, DOMAIN_DECL, 3); + union tgsi_any_token *out = get_tokens(ureg, DOMAIN_DECL, 4); out[0].value = 0; out[0].decl.Type = TGSI_TOKEN_TYPE_DECLARATION; - out[0].decl.NrTokens = 3; + out[0].decl.NrTokens = 4; out[0].decl.File = file; out[0].decl.UsageMask = TGSI_WRITEMASK_XYZW; /* FIXME! */ - out[0].decl.Interpolate = interpolate; + out[0].decl.Interpolate = 1; out[0].decl.Semantic = 1; - out[0].decl.CylindricalWrap = cylindrical_wrap; - out[0].decl.Centroid = centroid; out[1].value = 0; out[1].decl_range.First = index; out[1].decl_range.Last = index; out[2].value = 0; - out[2].decl_semantic.Name = semantic_name; - out[2].decl_semantic.Index = semantic_index; + out[2].decl_interp.Interpolate = interpolate; + out[2].decl_interp.CylindricalWrap = cylindrical_wrap; + out[2].decl_interp.Centroid = centroid; + + out[3].value = 0; + out[3].decl_semantic.Name = semantic_name; + out[3].decl_semantic.Index = semantic_index; } +static void emit_decl( struct ureg_program *ureg, + unsigned file, + unsigned index, + boolean local ) +{ + union tgsi_any_token *out = get_tokens( ureg, DOMAIN_DECL, 2 ); + + out[0].value = 0; + out[0].decl.Type = TGSI_TOKEN_TYPE_DECLARATION; + out[0].decl.NrTokens = 2; + out[0].decl.File = file; + out[0].decl.UsageMask = TGSI_WRITEMASK_XYZW; + out[0].decl.Local = local; + + out[1].value = 0; + out[1].decl_range.First = index; + out[1].decl_range.Last = index; +} + static void emit_decl_range( struct ureg_program *ureg, unsigned file, unsigned first, @@ -1263,7 +1292,6 @@ static void emit_decl_range( struct ureg_program *ureg, out[0].decl.NrTokens = 2; out[0].decl.File = file; out[0].decl.UsageMask = TGSI_WRITEMASK_XYZW; - out[0].decl.Interpolate = TGSI_INTERPOLATE_CONSTANT; out[0].decl.Semantic = 0; out[1].value = 0; @@ -1285,7 +1313,6 @@ emit_decl_range2D(struct ureg_program *ureg, out[0].decl.NrTokens = 3; out[0].decl.File = file; out[0].decl.UsageMask = TGSI_WRITEMASK_XYZW; - out[0].decl.Interpolate = TGSI_INTERPOLATE_CONSTANT; out[0].decl.Dimension = 1; out[1].value = 0; @@ -1297,33 +1324,32 @@ emit_decl_range2D(struct ureg_program *ureg, } static void -emit_decl_resource(struct ureg_program *ureg, - unsigned index, - unsigned target, - unsigned return_type_x, - unsigned return_type_y, - unsigned return_type_z, - unsigned return_type_w ) +emit_decl_sampler_view(struct ureg_program *ureg, + unsigned index, + unsigned target, + unsigned return_type_x, + unsigned return_type_y, + unsigned return_type_z, + unsigned return_type_w ) { union tgsi_any_token *out = get_tokens(ureg, DOMAIN_DECL, 3); out[0].value = 0; out[0].decl.Type = TGSI_TOKEN_TYPE_DECLARATION; out[0].decl.NrTokens = 3; - out[0].decl.File = TGSI_FILE_RESOURCE; + out[0].decl.File = TGSI_FILE_SAMPLER_VIEW; out[0].decl.UsageMask = 0xf; - out[0].decl.Interpolate = TGSI_INTERPOLATE_CONSTANT; out[1].value = 0; out[1].decl_range.First = index; out[1].decl_range.Last = index; out[2].value = 0; - out[2].decl_resource.Resource = target; - out[2].decl_resource.ReturnTypeX = return_type_x; - out[2].decl_resource.ReturnTypeY = return_type_y; - out[2].decl_resource.ReturnTypeZ = return_type_z; - out[2].decl_resource.ReturnTypeW = return_type_w; + out[2].decl_sampler_view.Resource = target; + out[2].decl_sampler_view.ReturnTypeX = return_type_x; + out[2].decl_sampler_view.ReturnTypeY = return_type_y; + out[2].decl_sampler_view.ReturnTypeZ = return_type_z; + out[2].decl_sampler_view.ReturnTypeW = return_type_w; } static void @@ -1473,14 +1499,14 @@ static void emit_decls( struct ureg_program *ureg ) ureg->sampler[i].Index, 1 ); } - for (i = 0; i < ureg->nr_resources; i++) { - emit_decl_resource(ureg, - ureg->resource[i].index, - ureg->resource[i].target, - ureg->resource[i].return_type_x, - ureg->resource[i].return_type_y, - ureg->resource[i].return_type_z, - ureg->resource[i].return_type_w); + for (i = 0; i < ureg->nr_sampler_views; i++) { + emit_decl_sampler_view(ureg, + ureg->sampler_view[i].index, + ureg->sampler_view[i].target, + ureg->sampler_view[i].return_type_x, + ureg->sampler_view[i].return_type_y, + ureg->sampler_view[i].return_type_z, + ureg->sampler_view[i].return_type_w); } if (ureg->const_decls.nr_constant_ranges) { @@ -1508,10 +1534,9 @@ static void emit_decls( struct ureg_program *ureg ) } } - if (ureg->nr_temps) { - emit_decl_range( ureg, - TGSI_FILE_TEMPORARY, - 0, ureg->nr_temps ); + for (i = 0; i < ureg->nr_temps; i++) { + emit_decl( ureg, TGSI_FILE_TEMPORARY, i, + util_bitmask_get(ureg->local_temps, i) ); } if (ureg->nr_addrs) { @@ -1668,7 +1693,21 @@ struct ureg_program *ureg_create( unsigned processor ) ureg->property_gs_input_prim = ~0; ureg->property_gs_output_prim = ~0; ureg->property_gs_max_vertices = ~0; + + ureg->free_temps = util_bitmask_create(); + if (ureg->free_temps == NULL) + goto fail; + + ureg->local_temps = util_bitmask_create(); + if (ureg->local_temps == NULL) + goto fail; + return ureg; + +fail: + FREE(ureg->free_temps); + FREE(ureg); + return NULL; } @@ -1681,6 +1720,9 @@ void ureg_destroy( struct ureg_program *ureg ) ureg->domain[i].tokens != error_tokens) FREE(ureg->domain[i].tokens); } - + + util_bitmask_destroy(ureg->free_temps); + util_bitmask_destroy(ureg->local_temps); + FREE(ureg); } diff --git a/src/gallium/auxiliary/tgsi/tgsi_ureg.h b/src/gallium/auxiliary/tgsi/tgsi_ureg.h index 07ab8cba0ba..e6131f25aae 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_ureg.h +++ b/src/gallium/auxiliary/tgsi/tgsi_ureg.h @@ -272,6 +272,14 @@ ureg_DECL_constant( struct ureg_program *, struct ureg_dst ureg_DECL_temporary( struct ureg_program * ); +/** + * Emit a temporary with the LOCAL declaration flag set. For use when + * the register value is not required to be preserved across + * subroutine boundaries. + */ +struct ureg_dst +ureg_DECL_local_temporary( struct ureg_program * ); + void ureg_release_temporary( struct ureg_program *ureg, struct ureg_dst tmp ); @@ -292,13 +300,13 @@ ureg_DECL_sampler( struct ureg_program *, unsigned index ); struct ureg_src -ureg_DECL_resource(struct ureg_program *, - unsigned index, - unsigned target, - unsigned return_type_x, - unsigned return_type_y, - unsigned return_type_z, - unsigned return_type_w ); +ureg_DECL_sampler_view(struct ureg_program *, + unsigned index, + unsigned target, + unsigned return_type_x, + unsigned return_type_y, + unsigned return_type_z, + unsigned return_type_w ); static INLINE struct ureg_src diff --git a/src/gallium/auxiliary/util/u_format.h b/src/gallium/auxiliary/util/u_format.h index bd4e51d279f..1718fb5e240 100644 --- a/src/gallium/auxiliary/util/u_format.h +++ b/src/gallium/auxiliary/util/u_format.h @@ -549,6 +549,19 @@ util_format_colormask(const struct util_format_description *desc) } +/** + * Checks if color mask covers every channel for the specified format + * + * @param desc a format description to check colormask with + * @param colormask a bit mask for channels, matches format of PIPE_MASK_RGBA + */ +static INLINE boolean +util_format_colormask_full(const struct util_format_description *desc, unsigned colormask) +{ + return (~colormask & util_format_colormask(desc)) == 0; +} + + boolean util_format_is_float(enum pipe_format format); diff --git a/src/gallium/auxiliary/util/u_pstipple.c b/src/gallium/auxiliary/util/u_pstipple.c index ac0df8c1a9c..3a91b1da138 100644 --- a/src/gallium/auxiliary/util/u_pstipple.c +++ b/src/gallium/auxiliary/util/u_pstipple.c @@ -298,12 +298,13 @@ pstip_transform_inst(struct tgsi_transform_context *ctx, /* declare new position input reg */ decl = tgsi_default_full_declaration(); decl.Declaration.File = TGSI_FILE_INPUT; - decl.Declaration.Interpolate = TGSI_INTERPOLATE_LINEAR; + decl.Declaration.Interpolate = 1; decl.Declaration.Semantic = 1; decl.Semantic.Name = TGSI_SEMANTIC_POSITION; decl.Semantic.Index = 0; decl.Range.First = decl.Range.Last = wincoordInput; + decl.Interp.Interpolate = TGSI_INTERPOLATE_LINEAR; ctx->emit_declaration(ctx, &decl); } diff --git a/src/gallium/auxiliary/util/u_string.h b/src/gallium/auxiliary/util/u_string.h index cc7992d7391..ed15981f1a5 100644 --- a/src/gallium/auxiliary/util/u_string.h +++ b/src/gallium/auxiliary/util/u_string.h @@ -48,6 +48,21 @@ extern "C" { #endif +#ifdef _GNU_SOURCE + +#define util_strchrnul strchrnul + +#else + +static INLINE char * +util_strchrnul(const char *s, char c) +{ + for (; *s && *s != c; ++s); + + return (char *)s; +} + +#endif #ifdef WIN32 @@ -72,12 +87,9 @@ util_sprintf(char *str, const char *format, ...) static INLINE char * util_strchr(const char *s, char c) { - while(*s) { - if(*s == c) - return (char *)s; - ++s; - } - return NULL; + char *p = util_strchrnul(s, c); + + return *p ? p : NULL; } static INLINE char* diff --git a/src/gallium/docs/source/context.rst b/src/gallium/docs/source/context.rst index b2872cd282f..d17ea4289da 100644 --- a/src/gallium/docs/source/context.rst +++ b/src/gallium/docs/source/context.rst @@ -120,6 +120,23 @@ to the array index which is used for sampling. * ``sampler_view_destroy`` destroys a sampler view and releases its reference to associated texture. +Shader Resources +^^^^^^^^^^^^^^^^ + +Shader resources are textures or buffers that may be read or written +from a shader without an associated sampler. This means that they +have no support for floating point coordinates, address wrap modes or +filtering. + +Shader resources are specified for all the shader stages at once using +the ``set_shader_resources`` method. When binding texture resources, +the ``level``, ``first_layer`` and ``last_layer`` pipe_surface fields +specify the mipmap level and the range of layers the texture will be +constrained to. In the case of buffers, ``first_element`` and +``last_element`` specify the range within the buffer that will be used +by the shader resource. Writes to a shader resource are only allowed +when the ``writable`` flag is set. + Surfaces ^^^^^^^^ @@ -542,3 +559,44 @@ These flags control the behavior of a transfer object. ``PIPE_TRANSFER_FLUSH_EXPLICIT`` Written ranges will be notified later with :ref:`transfer_flush_region`. Cannot be used with ``PIPE_TRANSFER_READ``. + + +Compute kernel execution +^^^^^^^^^^^^^^^^^^^^^^^^ + +A compute program can be defined, bound or destroyed using +``create_compute_state``, ``bind_compute_state`` or +``destroy_compute_state`` respectively. + +Any of the subroutines contained within the compute program can be +executed on the device using the ``launch_grid`` method. This method +will execute as many instances of the program as elements in the +specified N-dimensional grid, hopefully in parallel. + +The compute program has access to four special resources: + +* ``GLOBAL`` represents a memory space shared among all the threads + running on the device. An arbitrary buffer created with the + ``PIPE_BIND_GLOBAL`` flag can be mapped into it using the + ``set_global_binding`` method. + +* ``LOCAL`` represents a memory space shared among all the threads + running in the same working group. The initial contents of this + resource are undefined. + +* ``PRIVATE`` represents a memory space local to a single thread. + The initial contents of this resource are undefined. + +* ``INPUT`` represents a read-only memory space that can be + initialized at ``launch_grid`` time. + +These resources use a byte-based addressing scheme, and they can be +accessed from the compute program by means of the LOAD/STORE TGSI +opcodes. Additional resources to be accessed using the same opcodes +may be specified by the user with the ``set_compute_resources`` +method. + +In addition, normal texture sampling is allowed from the compute +program: ``bind_compute_sampler_states`` may be used to set up texture +samplers for the compute stage and ``set_compute_sampler_views`` may +be used to bind a number of sampler views to it. diff --git a/src/gallium/docs/source/screen.rst b/src/gallium/docs/source/screen.rst index f6c6f3fd119..ff63ce83bea 100644 --- a/src/gallium/docs/source/screen.rst +++ b/src/gallium/docs/source/screen.rst @@ -110,6 +110,8 @@ The integer capabilities: * ``PIPE_CAP_VERTEX_ELEMENT_SRC_OFFSET_4BYTE_ALIGNED_ONLY``: This CAP describes a hw limitation. If true, pipe_vertex_element::src_offset must always be aligned to 4. If false, there are no restrictions on src_offset. +* ``PIPE_CAP_COMPUTE``: Whether the implementation supports the + compute entry points defined in pipe_context and pipe_screen. * ``PIPE_CAP_USER_INDEX_BUFFERS``: Whether user index buffers are supported. If not, the state tracker must upload all indices which are not in hw resources. @@ -192,8 +194,33 @@ to be 0. If unsupported, only float opcodes are supported. * ``PIPE_SHADER_CAP_MAX_TEXTURE_SAMPLERS``: THe maximum number of texture samplers. +* ``PIPE_SHADER_CAP_PREFERRED_IR``: Preferred representation of the + program. It should be one of the ``pipe_shader_ir`` enum values. +.. _pipe_compute_cap: + +PIPE_COMPUTE_CAP_* +^^^^^^^^^^^^^^^^^^ + +Compute-specific capabilities. They can be queried using +pipe_screen::get_compute_param. + +* ``PIPE_COMPUTE_CAP_GRID_DIMENSION``: Number of supported dimensions + for grid and block coordinates. Value type: ``uint64_t``. +* ``PIPE_COMPUTE_CAP_MAX_GRID_SIZE``: Maximum grid size in block + units. Value type: ``uint64_t []``. +* ``PIPE_COMPUTE_CAP_MAX_BLOCK_SIZE``: Maximum block size in thread + units. Value type: ``uint64_t []``. +* ``PIPE_COMPUTE_CAP_MAX_GLOBAL_SIZE``: Maximum size of the GLOBAL + resource. Value type: ``uint64_t``. +* ``PIPE_COMPUTE_CAP_MAX_LOCAL_SIZE``: Maximum size of the LOCAL + resource. Value type: ``uint64_t``. +* ``PIPE_COMPUTE_CAP_MAX_PRIVATE_SIZE``: Maximum size of the PRIVATE + resource. Value type: ``uint64_t``. +* ``PIPE_COMPUTE_CAP_MAX_INPUT_SIZE``: Maximum size of the INPUT + resource. Value type: ``uint64_t``. + .. _pipe_bind: PIPE_BIND_* @@ -231,6 +258,12 @@ resources might be created and handled quite differently. * ``PIPE_BIND_SCANOUT``: A front color buffer or scanout buffer. * ``PIPE_BIND_SHARED``: A sharable buffer that can be given to another process. +* ``PIPE_BIND_GLOBAL``: A buffer that can be mapped into the global + address space of a compute program. +* ``PIPE_BIND_SHADER_RESOURCE``: A buffer or texture that can be + bound to the graphics pipeline as a shader resource. +* ``PIPE_BIND_COMPUTE_RESOURCE``: A buffer or texture that can be + bound to the compute program as a shader resource. .. _pipe_usage: diff --git a/src/gallium/docs/source/tgsi.rst b/src/gallium/docs/source/tgsi.rst index 48e68968346..548a9a39855 100644 --- a/src/gallium/docs/source/tgsi.rst +++ b/src/gallium/docs/source/tgsi.rst @@ -1312,28 +1312,36 @@ This opcode is the inverse of :opcode:`DFRACEXP`. dst.zw = \sqrt{src.zw} -.. _resourceopcodes: +.. _samplingopcodes: -Resource Access Opcodes -^^^^^^^^^^^^^^^^^^^^^^^^ +Resource Sampling Opcodes +^^^^^^^^^^^^^^^^^^^^^^^^^ Those opcodes follow very closely semantics of the respective Direct3D instructions. If in doubt double check Direct3D documentation. -.. opcode:: LOAD - Simplified alternative to the "SAMPLE" instruction. - Using the provided integer address, LOAD fetches data - from the specified buffer/texture without any filtering. +.. opcode:: SAMPLE - Using provided address, sample data from the + specified texture using the filtering mode identified + by the gven sampler. The source data may come from + any resource type other than buffers. + SAMPLE dst, address, sampler_view, sampler + e.g. + SAMPLE TEMP[0], TEMP[1], SVIEW[0], SAMP[0] + +.. opcode:: SAMPLE_I - Simplified alternative to the SAMPLE instruction. + Using the provided integer address, SAMPLE_I fetches data + from the specified sampler view without any filtering. The source data may come from any resource type other than CUBE. - LOAD dst, address, resource + SAMPLE_I dst, address, sampler_view e.g. - LOAD TEMP[0], TEMP[1], RES[0] + SAMPLE_I TEMP[0], TEMP[1], SVIEW[0] The 'address' is specified as unsigned integers. If the 'address' is out of range [0...(# texels - 1)] the result of the fetch is always 0 in all components. As such the instruction doesn't honor address wrap modes, in cases where that behavior is desirable - 'sample' instruction should be used. + 'SAMPLE' instruction should be used. address.w always provides an unsigned integer mipmap level. If the value is out of the range then the instruction always returns 0 in all components. @@ -1348,7 +1356,7 @@ instructions. If in doubt double check Direct3D documentation. For 2D texture arrays address.z provides the array index, otherwise it exhibits the same behavior as in the case for 1D texture arrays. - The exeact semantics of the source address are presented + The exact semantics of the source address are presented in the table below: resource type X Y Z W ------------- ------------------------ @@ -1364,25 +1372,16 @@ instructions. If in doubt double check Direct3D documentation. Where 'mpl' is a mipmap level and 'idx' is the array index. - -.. opcode:: LOAD_MS - Just like LOAD but allows fetch data from +.. opcode:: SAMPLE_I_MS - Just like SAMPLE_I but allows fetch data from multi-sampled surfaces. -.. opcode:: SAMPLE - Using provided address, sample data from the - specified texture using the filtering mode identified - by the gven sampler. The source data may come from - any resource type other than buffers. - SAMPLE dst, address, resource, sampler - e.g. - SAMPLE TEMP[0], TEMP[1], RES[0], SAMP[0] - .. opcode:: SAMPLE_B - Just like the SAMPLE instruction with the exception that an additiona bias is applied to the level of detail computed as part of the instruction execution. - SAMPLE_B dst, address, resource, sampler, lod_bias + SAMPLE_B dst, address, sampler_view, sampler, lod_bias e.g. - SAMPLE_B TEMP[0], TEMP[1], RES[0], SAMP[0], TEMP[2].x + SAMPLE_B TEMP[0], TEMP[1], SVIEW[0], SAMP[0], TEMP[2].x .. opcode:: SAMPLE_C - Similar to the SAMPLE instruction but it performs a comparison filter. The operands to SAMPLE_C @@ -1394,33 +1393,32 @@ instructions. If in doubt double check Direct3D documentation. reference value against the red component value for the surce resource at each texel that the currently configured texture filter covers based on the provided coordinates. - SAMPLE_C dst, address, resource.r, sampler, ref_value + SAMPLE_C dst, address, sampler_view.r, sampler, ref_value e.g. - SAMPLE_C TEMP[0], TEMP[1], RES[0].r, SAMP[0], TEMP[2].x + SAMPLE_C TEMP[0], TEMP[1], SVIEW[0].r, SAMP[0], TEMP[2].x .. opcode:: SAMPLE_C_LZ - Same as SAMPLE_C, but LOD is 0 and derivatives are ignored. The LZ stands for level-zero. - SAMPLE_C_LZ dst, address, resource.r, sampler, ref_value + SAMPLE_C_LZ dst, address, sampler_view.r, sampler, ref_value e.g. - SAMPLE_C_LZ TEMP[0], TEMP[1], RES[0].r, SAMP[0], TEMP[2].x + SAMPLE_C_LZ TEMP[0], TEMP[1], SVIEW[0].r, SAMP[0], TEMP[2].x .. opcode:: SAMPLE_D - SAMPLE_D is identical to the SAMPLE opcode except that the derivatives for the source address in the x direction and the y direction are provided by extra parameters. - SAMPLE_D dst, address, resource, sampler, der_x, der_y + SAMPLE_D dst, address, sampler_view, sampler, der_x, der_y e.g. - SAMPLE_D TEMP[0], TEMP[1], RES[0], SAMP[0], TEMP[2], TEMP[3] + SAMPLE_D TEMP[0], TEMP[1], SVIEW[0], SAMP[0], TEMP[2], TEMP[3] .. opcode:: SAMPLE_L - SAMPLE_L is identical to the SAMPLE opcode except that the LOD is provided directly as a scalar value, representing no anisotropy. Source addresses A channel is used as the LOD. - SAMPLE_L dst, address, resource, sampler + SAMPLE_L dst, address, sampler_view, sampler e.g. - SAMPLE_L TEMP[0], TEMP[1], RES[0], SAMP[0] - + SAMPLE_L TEMP[0], TEMP[1], SVIEW[0], SAMP[0] .. opcode:: GATHER4 - Gathers the four texels to be used in a bi-linear filtering operation and packs them into a single register. @@ -1435,18 +1433,18 @@ instructions. If in doubt double check Direct3D documentation. the magnitude of the deltas are half a texel. -.. opcode:: RESINFO - query the dimensions of a given input buffer. +.. opcode:: SVIEWINFO - query the dimensions of a given sampler view. dst receives width, height, depth or array size and number of mipmap levels. The dst can have a writemask which will specify what info is the caller interested in. - RESINFO dst, src_mip_level, resource + SVIEWINFO dst, src_mip_level, sampler_view e.g. - RESINFO TEMP[0], TEMP[1].x, RES[0] + SVIEWINFO TEMP[0], TEMP[1].x, SVIEW[0] src_mip_level is an unsigned integer scalar. If it's out of range then returns 0 for width, height and depth/array size but the total number of mipmap is - still returned correctly for the given resource. + still returned correctly for the given sampler view. The returned width, height and depth values are for the mipmap level selected by the src_mip_level and are in the number of texels. @@ -1463,6 +1461,272 @@ instructions. If in doubt double check Direct3D documentation. not a render target, the result is 0. +.. _resourceopcodes: + +Resource Access Opcodes +^^^^^^^^^^^^^^^^^^^^^^^ + +.. opcode:: LOAD - Fetch data from a shader resource + + Syntax: ``LOAD dst, resource, address`` + + Example: ``LOAD TEMP[0], RES[0], TEMP[1]`` + + Using the provided integer address, LOAD fetches data + from the specified buffer or texture without any + filtering. + + The 'address' is specified as a vector of unsigned + integers. If the 'address' is out of range the result + is unspecified. + + Only the first mipmap level of a resource can be read + from using this instruction. + + For 1D or 2D texture arrays, the array index is + provided as an unsigned integer in address.y or + address.z, respectively. address.yz are ignored for + buffers and 1D textures. address.z is ignored for 1D + texture arrays and 2D textures. address.w is always + ignored. + +.. opcode:: STORE - Write data to a shader resource + + Syntax: ``STORE resource, address, src`` + + Example: ``STORE RES[0], TEMP[0], TEMP[1]`` + + Using the provided integer address, STORE writes data + to the specified buffer or texture. + + The 'address' is specified as a vector of unsigned + integers. If the 'address' is out of range the result + is unspecified. + + Only the first mipmap level of a resource can be + written to using this instruction. + + For 1D or 2D texture arrays, the array index is + provided as an unsigned integer in address.y or + address.z, respectively. address.yz are ignored for + buffers and 1D textures. address.z is ignored for 1D + texture arrays and 2D textures. address.w is always + ignored. + + +.. _threadsyncopcodes: + +Inter-thread synchronization opcodes +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +These opcodes are intended for communication between threads running +within the same compute grid. For now they're only valid in compute +programs. + +.. opcode:: MFENCE - Memory fence + + Syntax: ``MFENCE resource`` + + Example: ``MFENCE RES[0]`` + + This opcode forces strong ordering between any memory access + operations that affect the specified resource. This means that + previous loads and stores (and only those) will be performed and + visible to other threads before the program execution continues. + + +.. opcode:: LFENCE - Load memory fence + + Syntax: ``LFENCE resource`` + + Example: ``LFENCE RES[0]`` + + Similar to MFENCE, but it only affects the ordering of memory loads. + + +.. opcode:: SFENCE - Store memory fence + + Syntax: ``SFENCE resource`` + + Example: ``SFENCE RES[0]`` + + Similar to MFENCE, but it only affects the ordering of memory stores. + + +.. opcode:: BARRIER - Thread group barrier + + ``BARRIER`` + + This opcode suspends the execution of the current thread until all + the remaining threads in the working group reach the same point of + the program. Results are unspecified if any of the remaining + threads terminates or never reaches an executed BARRIER instruction. + + +.. _atomopcodes: + +Atomic opcodes +^^^^^^^^^^^^^^ + +These opcodes provide atomic variants of some common arithmetic and +logical operations. In this context atomicity means that another +concurrent memory access operation that affects the same memory +location is guaranteed to be performed strictly before or after the +entire execution of the atomic operation. + +For the moment they're only valid in compute programs. + +.. opcode:: ATOMUADD - Atomic integer addition + + Syntax: ``ATOMUADD dst, resource, offset, src`` + + Example: ``ATOMUADD TEMP[0], RES[0], TEMP[1], TEMP[2]`` + + The following operation is performed atomically on each component: + +.. math:: + + dst_i = resource[offset]_i + + resource[offset]_i = dst_i + src_i + + +.. opcode:: ATOMXCHG - Atomic exchange + + Syntax: ``ATOMXCHG dst, resource, offset, src`` + + Example: ``ATOMXCHG TEMP[0], RES[0], TEMP[1], TEMP[2]`` + + The following operation is performed atomically on each component: + +.. math:: + + dst_i = resource[offset]_i + + resource[offset]_i = src_i + + +.. opcode:: ATOMCAS - Atomic compare-and-exchange + + Syntax: ``ATOMCAS dst, resource, offset, cmp, src`` + + Example: ``ATOMCAS TEMP[0], RES[0], TEMP[1], TEMP[2], TEMP[3]`` + + The following operation is performed atomically on each component: + +.. math:: + + dst_i = resource[offset]_i + + resource[offset]_i = (dst_i == cmp_i ? src_i : dst_i) + + +.. opcode:: ATOMAND - Atomic bitwise And + + Syntax: ``ATOMAND dst, resource, offset, src`` + + Example: ``ATOMAND TEMP[0], RES[0], TEMP[1], TEMP[2]`` + + The following operation is performed atomically on each component: + +.. math:: + + dst_i = resource[offset]_i + + resource[offset]_i = dst_i \& src_i + + +.. opcode:: ATOMOR - Atomic bitwise Or + + Syntax: ``ATOMOR dst, resource, offset, src`` + + Example: ``ATOMOR TEMP[0], RES[0], TEMP[1], TEMP[2]`` + + The following operation is performed atomically on each component: + +.. math:: + + dst_i = resource[offset]_i + + resource[offset]_i = dst_i | src_i + + +.. opcode:: ATOMXOR - Atomic bitwise Xor + + Syntax: ``ATOMXOR dst, resource, offset, src`` + + Example: ``ATOMXOR TEMP[0], RES[0], TEMP[1], TEMP[2]`` + + The following operation is performed atomically on each component: + +.. math:: + + dst_i = resource[offset]_i + + resource[offset]_i = dst_i \oplus src_i + + +.. opcode:: ATOMUMIN - Atomic unsigned minimum + + Syntax: ``ATOMUMIN dst, resource, offset, src`` + + Example: ``ATOMUMIN TEMP[0], RES[0], TEMP[1], TEMP[2]`` + + The following operation is performed atomically on each component: + +.. math:: + + dst_i = resource[offset]_i + + resource[offset]_i = (dst_i < src_i ? dst_i : src_i) + + +.. opcode:: ATOMUMAX - Atomic unsigned maximum + + Syntax: ``ATOMUMAX dst, resource, offset, src`` + + Example: ``ATOMUMAX TEMP[0], RES[0], TEMP[1], TEMP[2]`` + + The following operation is performed atomically on each component: + +.. math:: + + dst_i = resource[offset]_i + + resource[offset]_i = (dst_i > src_i ? dst_i : src_i) + + +.. opcode:: ATOMIMIN - Atomic signed minimum + + Syntax: ``ATOMIMIN dst, resource, offset, src`` + + Example: ``ATOMIMIN TEMP[0], RES[0], TEMP[1], TEMP[2]`` + + The following operation is performed atomically on each component: + +.. math:: + + dst_i = resource[offset]_i + + resource[offset]_i = (dst_i < src_i ? dst_i : src_i) + + +.. opcode:: ATOMIMAX - Atomic signed maximum + + Syntax: ``ATOMIMAX dst, resource, offset, src`` + + Example: ``ATOMIMAX TEMP[0], RES[0], TEMP[1], TEMP[2]`` + + The following operation is performed atomically on each component: + +.. math:: + + dst_i = resource[offset]_i + + resource[offset]_i = (dst_i > src_i ? dst_i : src_i) + + + Explanation of symbols used ------------------------------ @@ -1531,19 +1795,17 @@ of TGSI_FILE. UsageMask field specifies which of the register components can be accessed and is one of TGSI_WRITEMASK. -Interpolate field is only valid for fragment shader INPUT register files. -It specifes the way input is being interpolated by the rasteriser and is one -of TGSI_INTERPOLATE. +The Local flag specifies that a given value isn't intended for +subroutine parameter passing and, as a result, the implementation +isn't required to give any guarantees of it being preserved across +subroutine boundaries. As it's merely a compiler hint, the +implementation is free to ignore it. If Dimension flag is set to 1, a Declaration Dimension token follows. If Semantic flag is set to 1, a Declaration Semantic token follows. -CylindricalWrap bitfield is only valid for fragment shader INPUT register -files. It specifies which register components should be subject to cylindrical -wrapping when interpolating by the rasteriser. If TGSI_CYLINDRICAL_WRAP_X -is set to 1, the X component should be interpolated according to cylindrical -wrapping rules. +If Interpolate flag is set to 1, a Declaration Interpolate token follows. If file is TGSI_FILE_RESOURCE, a Declaration Resource token follows. @@ -1690,12 +1952,42 @@ is a writable stencil reference value. Only the Y component is writable. This allows the fragment shader to change the fragments stencilref value. -Declaration Resource +Declaration Interpolate +^^^^^^^^^^^^^^^^^^^^^^^ + +This token is only valid for fragment shader INPUT declarations. + +The Interpolate field specifes the way input is being interpolated by +the rasteriser and is one of TGSI_INTERPOLATE_*. + +The CylindricalWrap bitfield specifies which register components +should be subject to cylindrical wrapping when interpolating by the +rasteriser. If TGSI_CYLINDRICAL_WRAP_X is set to 1, the X component +should be interpolated according to cylindrical wrapping rules. + + +Declaration Sampler View ^^^^^^^^^^^^^^^^^^^^^^^^ + Follows Declaration token if file is TGSI_FILE_SAMPLER_VIEW. + + DCL SVIEW[#], resource, type(s) + + Declares a shader input sampler view and assigns it to a SVIEW[#] + register. + + resource can be one of BUFFER, 1D, 2D, 3D, 1DArray and 2DArray. + + type must be 1 or 4 entries (if specifying on a per-component + level) out of UNORM, SNORM, SINT, UINT and FLOAT. + + +Declaration Resource +^^^^^^^^^^^^^^^^^^^^ + Follows Declaration token if file is TGSI_FILE_RESOURCE. - DCL RES[#], resource, type(s) + DCL RES[#], resource [, WR] [, RAW] Declares a shader input resource and assigns it to a RES[#] register. @@ -1703,8 +1995,21 @@ Declaration Resource resource can be one of BUFFER, 1D, 2D, 3D, CUBE, 1DArray and 2DArray. - type must be 1 or 4 entries (if specifying on a per-component - level) out of UNORM, SNORM, SINT, UINT and FLOAT. + If the RAW keyword is not specified, the texture data will be + subject to conversion, swizzling and scaling as required to yield + the specified data type from the physical data format of the bound + resource. + + If the RAW keyword is specified, no channel conversion will be + performed: the values read for each of the channels (X,Y,Z,W) will + correspond to consecutive words in the same order and format + they're found in memory. No element-to-address conversion will be + performed either: the value of the provided X coordinate will be + interpreted in byte units instead of texel units. The result of + accessing a misaligned address is undefined. + + Usage of the STORE opcode is only allowed if the WR (writable) flag + is set. Properties diff --git a/src/gallium/drivers/i915/i915_fpc_translate.c b/src/gallium/drivers/i915/i915_fpc_translate.c index 94c0c69fcae..def9a03d377 100644 --- a/src/gallium/drivers/i915/i915_fpc_translate.c +++ b/src/gallium/drivers/i915/i915_fpc_translate.c @@ -512,6 +512,22 @@ i915_translate_instruction(struct i915_fp_compile *p, emit_simple_arith(p, inst, A0_ADD, 2, fs); break; + case TGSI_OPCODE_CEIL: + src0 = src_vector(p, &inst->Src[0], fs); + tmp = i915_get_utemp(p); + flags = get_result_flags(inst); + i915_emit_arith(p, + A0_FLR, + tmp, + flags & A0_DEST_CHANNEL_ALL, 0, + negate(src0, 1, 1, 1, 1), 0, 0); + i915_emit_arith(p, + A0_MOV, + get_result_vector(p, &inst->Dst[0]), + flags, 0, + negate(tmp, 1, 1, 1, 1), 0, 0); + break; + case TGSI_OPCODE_CMP: src0 = src_vector(p, &inst->Src[0], fs); src1 = src_vector(p, &inst->Src[1], fs); diff --git a/src/gallium/drivers/llvmpipe/lp_bld_blend.h b/src/gallium/drivers/llvmpipe/lp_bld_blend.h index f82ae30bb7d..c0c95a27129 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_blend.h +++ b/src/gallium/drivers/llvmpipe/lp_bld_blend.h @@ -64,12 +64,14 @@ lp_build_blend_func(struct lp_build_context *bld, LLVMValueRef lp_build_blend_aos(struct gallivm_state *gallivm, const struct pipe_blend_state *blend, + const enum pipe_format *cbuf_format, struct lp_type type, unsigned rt, LLVMValueRef src, LLVMValueRef dst, + LLVMValueRef mask, LLVMValueRef const_, - unsigned alpha_swizzle); + const unsigned char swizzle[4]); void diff --git a/src/gallium/drivers/llvmpipe/lp_bld_blend_aos.c b/src/gallium/drivers/llvmpipe/lp_bld_blend_aos.c index c342346a36e..59d5f545966 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_blend_aos.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_blend_aos.c @@ -45,12 +45,14 @@ #include "pipe/p_state.h" #include "util/u_debug.h" +#include "util/u_format.h" #include "gallivm/lp_bld_type.h" #include "gallivm/lp_bld_const.h" #include "gallivm/lp_bld_arit.h" #include "gallivm/lp_bld_logic.h" #include "gallivm/lp_bld_swizzle.h" +#include "gallivm/lp_bld_bitarit.h" #include "gallivm/lp_bld_debug.h" #include "lp_bld_blend.h" @@ -300,25 +302,39 @@ lp_build_blend_func(struct lp_build_context *bld, } +/** + * Performs blending of src and dst pixels + * + * @param blend the blend state of the shader variant + * @param cbuf_format format of the colour buffer + * @param type data type of the pixel vector + * @param rt rt number + * @param src blend src + * @param dst blend dst + * @param mask optional mask to apply to the blending result + * @param const_ const blend color + * @param swizzle swizzle values for RGBA + * + * @return the result of blending src and dst + */ LLVMValueRef lp_build_blend_aos(struct gallivm_state *gallivm, const struct pipe_blend_state *blend, + const enum pipe_format *cbuf_format, struct lp_type type, unsigned rt, LLVMValueRef src, LLVMValueRef dst, + LLVMValueRef mask, LLVMValueRef const_, - unsigned alpha_swizzle) + const unsigned char swizzle[4]) { struct lp_build_blend_aos_context bld; LLVMValueRef src_term; LLVMValueRef dst_term; - - /* FIXME: color masking not implemented yet */ - assert(blend->rt[rt].colormask == 0xf); - - if(!blend->rt[rt].blend_enable) - return src; + LLVMValueRef result; + unsigned alpha_swizzle = swizzle[3]; + boolean fullcolormask; /* Setup build context */ memset(&bld, 0, sizeof bld); @@ -327,30 +343,59 @@ lp_build_blend_aos(struct gallivm_state *gallivm, bld.dst = dst; bld.const_ = const_; - /* TODO: There are still a few optimization opportunities here. For certain - * combinations it is possible to reorder the operations and therefore saving - * some instructions. */ + if (!blend->rt[rt].blend_enable) { + result = src; + } else { + + /* TODO: There are still a few optimization opportunities here. For certain + * combinations it is possible to reorder the operations and therefore saving + * some instructions. */ + + src_term = lp_build_blend_factor(&bld, src, blend->rt[rt].rgb_src_factor, + blend->rt[rt].alpha_src_factor, alpha_swizzle); + dst_term = lp_build_blend_factor(&bld, dst, blend->rt[rt].rgb_dst_factor, + blend->rt[rt].alpha_dst_factor, alpha_swizzle); + + lp_build_name(src_term, "src_term"); + lp_build_name(dst_term, "dst_term"); - src_term = lp_build_blend_factor(&bld, src, blend->rt[rt].rgb_src_factor, - blend->rt[rt].alpha_src_factor, alpha_swizzle); - dst_term = lp_build_blend_factor(&bld, dst, blend->rt[rt].rgb_dst_factor, - blend->rt[rt].alpha_dst_factor, alpha_swizzle); + if(blend->rt[rt].rgb_func == blend->rt[rt].alpha_func) { + result = lp_build_blend_func(&bld.base, blend->rt[rt].rgb_func, src_term, dst_term); + } + else { + /* Seperate RGB / A functions */ + + LLVMValueRef rgb; + LLVMValueRef alpha; - lp_build_name(src_term, "src_term"); - lp_build_name(dst_term, "dst_term"); + rgb = lp_build_blend_func(&bld.base, blend->rt[rt].rgb_func, src_term, dst_term); + alpha = lp_build_blend_func(&bld.base, blend->rt[rt].alpha_func, src_term, dst_term); - if(blend->rt[rt].rgb_func == blend->rt[rt].alpha_func) { - return lp_build_blend_func(&bld.base, blend->rt[rt].rgb_func, src_term, dst_term); + result = lp_build_blend_swizzle(&bld, rgb, alpha, LP_BUILD_BLEND_SWIZZLE_RGBA, alpha_swizzle); + } } - else { - /* Seperate RGB / A functions */ - LLVMValueRef rgb; - LLVMValueRef alpha; + /* Check if color mask is necessary */ + fullcolormask = util_format_colormask_full(util_format_description(cbuf_format[rt]), blend->rt[rt].colormask); + + if (!fullcolormask) { + LLVMValueRef color_mask; - rgb = lp_build_blend_func(&bld.base, blend->rt[rt].rgb_func, src_term, dst_term); - alpha = lp_build_blend_func(&bld.base, blend->rt[rt].alpha_func, src_term, dst_term); + color_mask = lp_build_const_mask_aos_swizzled(gallivm, bld.base.type, blend->rt[rt].colormask, swizzle); + lp_build_name(color_mask, "color_mask"); - return lp_build_blend_swizzle(&bld, rgb, alpha, LP_BUILD_BLEND_SWIZZLE_RGBA, alpha_swizzle); + /* Combine with input mask if necessary */ + if (mask) { + mask = lp_build_and(&bld.base, color_mask, mask); + } else { + mask = color_mask; + } + } + + /* Apply mask, if one exists */ + if (mask) { + result = lp_build_select(&bld.base, mask, result, dst); } + + return result; } diff --git a/src/gallium/drivers/llvmpipe/lp_rast_tri.c b/src/gallium/drivers/llvmpipe/lp_rast_tri.c index 71d0ddf5e75..230b80a945f 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast_tri.c +++ b/src/gallium/drivers/llvmpipe/lp_rast_tri.c @@ -290,6 +290,10 @@ lp_rast_triangle_3_16(struct lp_rasterizer_task *task, c = _mm_add_epi32(c, mm_mullo_epi32(dcdy, _mm_set1_epi32(y))); rej4 = _mm_slli_epi32(rej4, 2); + /* Adjust so we can just check the sign bit (< 0 comparison), instead of having to do a less efficient <= 0 comparison */ + c = _mm_sub_epi32(c, _mm_set1_epi32(1)); + rej4 = _mm_add_epi32(rej4, _mm_set1_epi32(1)); + dcdx2 = _mm_add_epi32(dcdx, dcdx); dcdx3 = _mm_add_epi32(dcdx2, dcdx); @@ -383,7 +387,7 @@ lp_rast_triangle_3_4(struct lp_rasterizer_task *task, __m128i span_1; /* 0,dcdx,2dcdx,3dcdx for plane 1 */ __m128i span_2; /* 0,dcdx,2dcdx,3dcdx for plane 2 */ __m128i unused; - + transpose4_epi32(&p0, &p1, &p2, &zero, &c, &dcdx, &dcdy, &unused); @@ -394,6 +398,9 @@ lp_rast_triangle_3_4(struct lp_rasterizer_task *task, c = _mm_add_epi32(c, mm_mullo_epi32(dcdx, _mm_set1_epi32(x))); c = _mm_add_epi32(c, mm_mullo_epi32(dcdy, _mm_set1_epi32(y))); + /* Adjust so we can just check the sign bit (< 0 comparison), instead of having to do a less efficient <= 0 comparison */ + c = _mm_sub_epi32(c, _mm_set1_epi32(1)); + dcdx2 = _mm_add_epi32(dcdx, dcdx); dcdx3 = _mm_add_epi32(dcdx2, dcdx); diff --git a/src/gallium/drivers/llvmpipe/lp_setup_tri.c b/src/gallium/drivers/llvmpipe/lp_setup_tri.c index b50c354fa9b..26d35debdaf 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup_tri.c +++ b/src/gallium/drivers/llvmpipe/lp_setup_tri.c @@ -271,15 +271,13 @@ do_triangle_ccw(struct lp_setup_context *setup, */ int adj = (setup->pixel_offset != 0) ? 1 : 0; - bbox.x0 = (MIN3(x[0], x[1], x[2]) + (FIXED_ONE-1)) >> FIXED_ORDER; - bbox.x1 = (MAX3(x[0], x[1], x[2]) + (FIXED_ONE-1)) >> FIXED_ORDER; - bbox.y0 = (MIN3(y[0], y[1], y[2]) + (FIXED_ONE-1) + adj) >> FIXED_ORDER; - bbox.y1 = (MAX3(y[0], y[1], y[2]) + (FIXED_ONE-1) + adj) >> FIXED_ORDER; + /* Inclusive x0, exclusive x1 */ + bbox.x0 = MIN3(x[0], x[1], x[2]) >> FIXED_ORDER; + bbox.x1 = (MAX3(x[0], x[1], x[2]) - 1) >> FIXED_ORDER; - /* Inclusive coordinates: - */ - bbox.x1--; - bbox.y1--; + /* Inclusive / exclusive depending upon adj (bottom-left or top-right) */ + bbox.y0 = (MIN3(y[0], y[1], y[2]) + adj) >> FIXED_ORDER; + bbox.y1 = (MAX3(y[0], y[1], y[2]) - 1 + adj) >> FIXED_ORDER; } if (bbox.x1 < bbox.x0 || diff --git a/src/gallium/drivers/llvmpipe/lp_state_fs.c b/src/gallium/drivers/llvmpipe/lp_state_fs.c index ec94190649c..2d2391e908c 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_fs.c +++ b/src/gallium/drivers/llvmpipe/lp_state_fs.c @@ -924,6 +924,7 @@ generate_variant(struct llvmpipe_context *lp, const struct lp_fragment_shader_variant_key *key) { struct lp_fragment_shader_variant *variant; + const struct util_format_description *cbuf0_format_desc; boolean fullcolormask; variant = CALLOC_STRUCT(lp_fragment_shader_variant); @@ -942,12 +943,8 @@ generate_variant(struct llvmpipe_context *lp, */ fullcolormask = FALSE; if (key->nr_cbufs == 1) { - const struct util_format_description *format_desc; - format_desc = util_format_description(key->cbuf_format[0]); - if ((~key->blend.rt[0].colormask & - util_format_colormask(format_desc)) == 0) { - fullcolormask = TRUE; - } + cbuf0_format_desc = util_format_description(key->cbuf_format[0]); + fullcolormask = util_format_colormask_full(cbuf0_format_desc, key->blend.rt[0].colormask); } variant->opaque = diff --git a/src/gallium/drivers/llvmpipe/lp_test_blend.c b/src/gallium/drivers/llvmpipe/lp_test_blend.c index b3ca134131d..51324cbb6a3 100644 --- a/src/gallium/drivers/llvmpipe/lp_test_blend.c +++ b/src/gallium/drivers/llvmpipe/lp_test_blend.c @@ -179,7 +179,9 @@ add_blend_test(struct gallivm_state *gallivm, LLVMValueRef res_ptr; LLVMBasicBlockRef block; LLVMBuilderRef builder; + const enum pipe_format format = PIPE_FORMAT_R8G8B8A8_UNORM; const unsigned rt = 0; + const unsigned char swizzle[4] = { 0, 1, 2, 3 }; vec_type = lp_build_vec_type(gallivm, type); @@ -205,7 +207,7 @@ add_blend_test(struct gallivm_state *gallivm, dst = LLVMBuildLoad(builder, dst_ptr, "dst"); con = LLVMBuildLoad(builder, const_ptr, "const"); - res = lp_build_blend_aos(gallivm, blend, type, rt, src, dst, con, 3); + res = lp_build_blend_aos(gallivm, blend, &format, type, rt, src, dst, NULL, con, swizzle); lp_build_name(res, "res"); diff --git a/src/gallium/drivers/nouveau/nouveau_buffer.c b/src/gallium/drivers/nouveau/nouveau_buffer.c index ff199debd74..936e2bf246a 100644 --- a/src/gallium/drivers/nouveau/nouveau_buffer.c +++ b/src/gallium/drivers/nouveau/nouveau_buffer.c @@ -523,8 +523,10 @@ nouveau_scratch_runout_release(struct nouveau_context *nv) { if (!nv->scratch.nr_runout) return; - while (nv->scratch.nr_runout--) + do { + --nv->scratch.nr_runout; nouveau_bo_ref(NULL, &nv->scratch.runout[nv->scratch.nr_runout]); + } while (nv->scratch.nr_runout); FREE(nv->scratch.runout); nv->scratch.end = 0; diff --git a/src/gallium/drivers/nv30/nvfx_fragprog.c b/src/gallium/drivers/nv30/nvfx_fragprog.c index e562b454f92..592ad21c6c8 100644 --- a/src/gallium/drivers/nv30/nvfx_fragprog.c +++ b/src/gallium/drivers/nv30/nvfx_fragprog.c @@ -535,6 +535,11 @@ nvfx_fragprog_parse_instruction(struct nv30_context* nvfx, struct nvfx_fpc *fpc, case TGSI_OPCODE_ADD: nvfx_fp_emit(fpc, arith(sat, ADD, dst, mask, src[0], src[1], none)); break; + case TGSI_OPCODE_CEIL: + tmp = nvfx_src(temp(fpc)); + nvfx_fp_emit(fpc, arith(0, FLR, tmp.reg, mask, neg(src[0]), none, none)); + nvfx_fp_emit(fpc, arith(sat, MOV, dst, mask, neg(tmp), none, none)); + break; case TGSI_OPCODE_CMP: insn = arith(0, MOV, none.reg, mask, src[0], none, none); insn.cc_update = 1; diff --git a/src/gallium/drivers/nv30/nvfx_vertprog.c b/src/gallium/drivers/nv30/nvfx_vertprog.c index d7eb9fb0a63..82972b3943c 100644 --- a/src/gallium/drivers/nv30/nvfx_vertprog.c +++ b/src/gallium/drivers/nv30/nvfx_vertprog.c @@ -550,6 +550,11 @@ nvfx_vertprog_parse_instruction(struct nv30_context *nv30, struct nvfx_vpc *vpc, case TGSI_OPCODE_ARL: nvfx_vp_emit(vpc, arith(0, VEC, ARL, dst, mask, src[0], none, none)); break; + case TGSI_OPCODE_CEIL: + tmp = nvfx_src(temp(vpc)); + nvfx_vp_emit(vpc, arith(0, VEC, FLR, tmp.reg, mask, neg(src[0]), none, none)); + nvfx_vp_emit(vpc, arith(sat, VEC, MOV, dst, mask, neg(tmp), none, none)); + break; case TGSI_OPCODE_CMP: insn = arith(0, VEC, MOV, none.reg, mask, src[0], none, none); insn.cc_update = 1; diff --git a/src/gallium/drivers/nv50/codegen/nv50_ir_build_util.cpp b/src/gallium/drivers/nv50/codegen/nv50_ir_build_util.cpp index f7dac25c116..f713e6391c6 100644 --- a/src/gallium/drivers/nv50/codegen/nv50_ir_build_util.cpp +++ b/src/gallium/drivers/nv50/codegen/nv50_ir_build_util.cpp @@ -278,6 +278,31 @@ BuildUtil::mkSelect(Value *pred, Value *dst, Value *trSrc, Value *flSrc) return mkOp2(OP_UNION, typeOfSize(dst->reg.size), dst, def0, def1); } +Instruction * +BuildUtil::mkSplit(Value *h[2], uint8_t halfSize, Value *val) +{ + Instruction *insn = NULL; + + const DataType fTy = typeOfSize(halfSize * 2); + + if (val->reg.file == FILE_IMMEDIATE) + val = mkMov(getSSA(halfSize * 2), val, fTy)->getDef(0); + + if (isMemoryFile(val->reg.file)) { + h[0] = cloneShallow(getFunction(), val); + h[1] = cloneShallow(getFunction(), val); + h[0]->reg.size = halfSize; + h[1]->reg.size = halfSize; + h[1]->reg.data.offset += halfSize; + } else { + h[0] = getSSA(halfSize, val->reg.file); + h[1] = getSSA(halfSize, val->reg.file); + insn = mkOp1(OP_SPLIT, fTy, h[0], val); + insn->setDef(1, h[1]); + } + return insn; +} + FlowInstruction * BuildUtil::mkFlow(operation op, void *targ, CondCode cc, Value *pred) { diff --git a/src/gallium/drivers/nv50/codegen/nv50_ir_build_util.h b/src/gallium/drivers/nv50/codegen/nv50_ir_build_util.h index 9ee04dbcd12..dd7e491cb5c 100644 --- a/src/gallium/drivers/nv50/codegen/nv50_ir_build_util.h +++ b/src/gallium/drivers/nv50/codegen/nv50_ir_build_util.h @@ -81,6 +81,8 @@ public: Instruction *mkSelect(Value *pred, Value *dst, Value *trSrc, Value *flSrc); + Instruction *mkSplit(Value *half[2], uint8_t halfSize, Value *); + void mkClobber(DataFile file, uint32_t regMask, int regUnitLog2); ImmediateValue *mkImm(float); diff --git a/src/gallium/drivers/nv50/codegen/nv50_ir_from_tgsi.cpp b/src/gallium/drivers/nv50/codegen/nv50_ir_from_tgsi.cpp index 82e23602ca0..16f191da159 100644 --- a/src/gallium/drivers/nv50/codegen/nv50_ir_from_tgsi.cpp +++ b/src/gallium/drivers/nv50/codegen/nv50_ir_from_tgsi.cpp @@ -347,6 +347,7 @@ static nv50_ir::TexTarget translateTexture(uint tex) NV50_IR_TEX_TARG_CASE(SHADOW2D_ARRAY, 2D_ARRAY_SHADOW); NV50_IR_TEX_TARG_CASE(SHADOWCUBE, CUBE_SHADOW); NV50_IR_TEX_TARG_CASE(SHADOWRECT, RECT_SHADOW); + NV50_IR_TEX_TARG_CASE(BUFFER, BUFFER); case TGSI_TEXTURE_UNKNOWN: default: @@ -548,7 +549,7 @@ static nv50_ir::operation translateOpcode(uint opcode) NV50_IR_OPCODE_CASE(SAMPLE_D, TXD); NV50_IR_OPCODE_CASE(SAMPLE_L, TXL); NV50_IR_OPCODE_CASE(GATHER4, TXG); - NV50_IR_OPCODE_CASE(RESINFO, TXQ); + NV50_IR_OPCODE_CASE(SVIEWINFO, TXQ); NV50_IR_OPCODE_CASE(END, EXIT); @@ -597,8 +598,8 @@ public: int clipVertexOutput; - uint8_t *resourceTargets; // TGSI_TEXTURE_* - unsigned resourceCount; + uint8_t *samplerViewTargets; // TGSI_TEXTURE_* + unsigned samplerViewCount; private: int inferSysValDirection(unsigned sn) const; @@ -617,7 +618,7 @@ Source::Source(struct nv50_ir_prog_info *prog) : info(prog) if (prog->dbgFlags & NV50_IR_DEBUG_BASIC) tgsi_dump(tokens, 0); - resourceTargets = NULL; + samplerViewTargets = NULL; mainTempsInLMem = FALSE; } @@ -632,8 +633,8 @@ Source::~Source() if (info->immd.type) FREE(info->immd.type); - if (resourceTargets) - delete[] resourceTargets; + if (samplerViewTargets) + delete[] samplerViewTargets; } bool Source::scanSource() @@ -650,8 +651,8 @@ bool Source::scanSource() clipVertexOutput = -1; - resourceCount = scan.file_max[TGSI_FILE_RESOURCE] + 1; - resourceTargets = new uint8_t[resourceCount]; + samplerViewCount = scan.file_max[TGSI_FILE_SAMPLER_VIEW] + 1; + samplerViewTargets = new uint8_t[samplerViewCount]; info->immd.bufSize = 0; tempArrayCount = 0; @@ -805,7 +806,7 @@ bool Source::scanDeclaration(const struct tgsi_full_declaration *decl) info->in[i].si = si; if (info->type == PIPE_SHADER_FRAGMENT) { // translate interpolation mode - switch (decl->Declaration.Interpolate) { + switch (decl->Interp.Interpolate) { case TGSI_INTERPOLATE_CONSTANT: info->in[i].flat = 1; break; @@ -818,7 +819,7 @@ bool Source::scanDeclaration(const struct tgsi_full_declaration *decl) default: break; } - if (decl->Declaration.Centroid) + if (decl->Interp.Centroid) info->in[i].centroid = 1; } } @@ -874,9 +875,9 @@ bool Source::scanDeclaration(const struct tgsi_full_declaration *decl) info->sv[i].input = inferSysValDirection(sn); } break; - case TGSI_FILE_RESOURCE: + case TGSI_FILE_SAMPLER_VIEW: for (i = first; i <= last; ++i) - resourceTargets[i] = decl->Resource.Resource; + samplerViewTargets[i] = decl->SamplerView.Resource; break; case TGSI_FILE_IMMEDIATE_ARRAY: { @@ -1000,13 +1001,15 @@ bool Source::scanInstruction(const struct tgsi_full_instruction *inst) nv50_ir::TexInstruction::Target Instruction::getTexture(const tgsi::Source *code, int s) const { - if (insn->Instruction.Texture) { - return translateTexture(insn->Texture.Texture); - } else { + switch (getSrc(s).getFile()) { + case TGSI_FILE_SAMPLER_VIEW: { // XXX: indirect access unsigned int r = getSrc(s).getIndex(0); - assert(r < code->resourceCount); - return translateTexture(code->resourceTargets[r]); + assert(r < code->samplerViewCount); + return translateTexture(code->samplerViewTargets[r]); + } + default: + return translateTexture(insn->Texture.Texture); } } @@ -2042,7 +2045,7 @@ Converter::handleInstruction(const struct tgsi_full_instruction *insn) handleTXF(dst0, 1); break; case TGSI_OPCODE_TXQ: - case TGSI_OPCODE_RESINFO: + case TGSI_OPCODE_SVIEWINFO: handleTXQ(dst0, TXQ_DIMS); break; case TGSI_OPCODE_F2I: diff --git a/src/gallium/drivers/nv50/codegen/nv50_ir_lowering_nv50.cpp b/src/gallium/drivers/nv50/codegen/nv50_ir_lowering_nv50.cpp index 27373b4cc47..16bba0e1723 100644 --- a/src/gallium/drivers/nv50/codegen/nv50_ir_lowering_nv50.cpp +++ b/src/gallium/drivers/nv50/codegen/nv50_ir_lowering_nv50.cpp @@ -57,15 +57,17 @@ expandIntegerMUL(BuildUtil *bld, Instruction *mul) Instruction *i[9]; - Value *a[2] = { bld->getSSA(halfSize), bld->getSSA(halfSize) }; - Value *b[2] = { bld->getSSA(halfSize), bld->getSSA(halfSize) }; + bld->setPosition(mul, true); + + Value *a[2], *b[2]; Value *c[2]; Value *t[4]; for (int j = 0; j < 4; ++j) t[j] = bld->getSSA(fullSize); - (i[0] = bld->mkOp1(OP_SPLIT, fTy, a[0], mul->getSrc(0)))->setDef(1, a[1]); - (i[1] = bld->mkOp1(OP_SPLIT, fTy, b[0], mul->getSrc(1)))->setDef(1, b[1]); + // split sources into halves + i[0] = bld->mkSplit(a, halfSize, mul->getSrc(0)); + i[1] = bld->mkSplit(b, halfSize, mul->getSrc(1)); i[2] = bld->mkOp2(OP_MUL, fTy, t[0], a[0], b[1]); i[3] = bld->mkOp3(OP_MAD, fTy, t[1], a[1], b[0], t[0]); @@ -96,7 +98,8 @@ expandIntegerMUL(BuildUtil *bld, Instruction *mul) delete_Instruction(bld->getProgram(), mul); for (int j = 2; j <= (highResult ? 5 : 4); ++j) - i[j]->sType = hTy; + if (i[j]) + i[j]->sType = hTy; return true; } @@ -518,7 +521,6 @@ private: bool handleEXPORT(Instruction *); - bool handleMUL(Instruction *); bool handleDIV(Instruction *); bool handleSQRT(Instruction *); bool handlePOW(Instruction *); @@ -587,7 +589,8 @@ NV50LoweringPreSSA::handleTEX(TexInstruction *i) if (i->tex.target.isArray()) { Value *layer = i->getSrc(arg - 1); LValue *src = new_LValue(func, FILE_GPR); - bld.mkCvt(OP_CVT, TYPE_U16, src, TYPE_F32, layer); + bld.mkCvt(OP_CVT, TYPE_U32, src, TYPE_F32, layer); + bld.mkOp2(OP_MIN, TYPE_U32, src, src, bld.loadImm(NULL, 511)); i->setSrc(arg - 1, src); if (i->tex.target.isCube()) { @@ -941,14 +944,6 @@ NV50LoweringPreSSA::handleRDSV(Instruction *i) } bool -NV50LoweringPreSSA::handleMUL(Instruction *i) -{ - if (!isFloatType(i->dType) && typeSizeof(i->sType) > 2) - return expandIntegerMUL(&bld, i); - return true; -} - -bool NV50LoweringPreSSA::handleDIV(Instruction *i) { if (!isFloatType(i->dType)) @@ -1068,8 +1063,6 @@ NV50LoweringPreSSA::visit(Instruction *i) return handleSELP(i); case OP_POW: return handlePOW(i); - case OP_MUL: - return handleMUL(i); case OP_DIV: return handleDIV(i); case OP_SQRT: diff --git a/src/gallium/drivers/nv50/codegen/nv50_ir_peephole.cpp b/src/gallium/drivers/nv50/codegen/nv50_ir_peephole.cpp index db5195cd582..10382d9cac6 100644 --- a/src/gallium/drivers/nv50/codegen/nv50_ir_peephole.cpp +++ b/src/gallium/drivers/nv50/codegen/nv50_ir_peephole.cpp @@ -564,7 +564,7 @@ ConstantFolding::tryCollapseChainedMULs(Instruction *mul2, insn = mul2->getSrc(t)->getInsn(); if (!mul2->src(t).mod && insn->op == OP_MUL && insn->dType == TYPE_F32) mul1 = insn; - if (mul1) { + if (mul1 && !mul1->saturate) { int s1; if (mul1->src(s1 = 0).getImmediate(imm1) || @@ -584,10 +584,11 @@ ConstantFolding::tryCollapseChainedMULs(Instruction *mul2, if (f < 0) mul1->src(0).mod *= Modifier(NV50_IR_MOD_NEG); } + mul1->saturate = mul2->saturate; return; } } - if (mul2->getDef(0)->refCount() == 1) { + if (mul2->getDef(0)->refCount() == 1 && !mul2->saturate) { // b = mul a, imm // d = mul b, c -> d = mul_x_imm a, c int s2, t2; diff --git a/src/gallium/drivers/nv50/codegen/nv50_ir_ra.cpp b/src/gallium/drivers/nv50/codegen/nv50_ir_ra.cpp index 77edaa6067a..726331e91e7 100644 --- a/src/gallium/drivers/nv50/codegen/nv50_ir_ra.cpp +++ b/src/gallium/drivers/nv50/codegen/nv50_ir_ra.cpp @@ -1819,8 +1819,8 @@ RegAlloc::InsertConstraintsPass::texConstraintNVE0(TexInstruction *tex) int n = tex->srcCount(0xff, true); if (n > 4) { condenseSrcs(tex, 0, 3); - if (n > 5) - condenseSrcs(tex, 4, n - 1); + if (n > 5) // NOTE: first call modified positions already + condenseSrcs(tex, 4 - (4 - 1), n - 1 - (4 - 1)); } else if (n > 1) { condenseSrcs(tex, 0, n - 1); @@ -1850,8 +1850,8 @@ RegAlloc::InsertConstraintsPass::texConstraintNVC0(TexInstruction *tex) if (s > 1) condenseSrcs(tex, 0, s - 1); - if (n > 1) - condenseSrcs(tex, s, s + (n - 1)); + if (n > 1) // NOTE: first call modified positions already + condenseSrcs(tex, 1, n); condenseDefs(tex); } diff --git a/src/gallium/drivers/nv50/codegen/nv50_ir_target_nv50.cpp b/src/gallium/drivers/nv50/codegen/nv50_ir_target_nv50.cpp index 5e541e514cb..8b11c6a2fdd 100644 --- a/src/gallium/drivers/nv50/codegen/nv50_ir_target_nv50.cpp +++ b/src/gallium/drivers/nv50/codegen/nv50_ir_target_nv50.cpp @@ -310,7 +310,22 @@ TargetNV50::insnCanLoad(const Instruction *i, int s, return false; } - if (ld->getSrc(0)->reg.data.offset > (int32_t)(127 * typeSizeof(ld->dType))) + uint8_t ldSize; + + if ((i->op == OP_MUL || i->op == OP_MAD) && !isFloatType(i->dType)) { + // 32-bit MUL will be split into 16-bit MULs + if (ld->src(0).isIndirect(0)) + return false; + if (sf == FILE_IMMEDIATE) + return false; + ldSize = 2; + } else { + ldSize = typeSizeof(ld->dType); + } + + if (ldSize < 4 && sf == FILE_SHADER_INPUT) // no < 4-byte aligned a[] access + return false; + if (ld->getSrc(0)->reg.data.offset > (int32_t)(127 * ldSize)) return false; if (ld->src(0).isIndirect(0)) { diff --git a/src/gallium/drivers/nv50/nv50_context.h b/src/gallium/drivers/nv50/nv50_context.h index 1cee0e06c02..44a0ba0f561 100644 --- a/src/gallium/drivers/nv50/nv50_context.h +++ b/src/gallium/drivers/nv50/nv50_context.h @@ -48,6 +48,7 @@ #define NV50_NEW_CONSTBUF (1 << 18) #define NV50_NEW_TEXTURES (1 << 19) #define NV50_NEW_SAMPLERS (1 << 20) +#define NV50_NEW_STRMOUT (1 << 21) #define NV50_NEW_CONTEXT (1 << 31) #define NV50_BIND_FB 0 @@ -56,9 +57,10 @@ #define NV50_BIND_INDEX 3 #define NV50_BIND_TEXTURES 4 #define NV50_BIND_CB(s, i) (5 + 16 * (s) + (i)) -#define NV50_BIND_SCREEN 53 -#define NV50_BIND_TLS 54 -#define NV50_BIND_COUNT 55 +#define NV50_BIND_SO 53 +#define NV50_BIND_SCREEN 54 +#define NV50_BIND_TLS 55 +#define NV50_BIND_COUNT 56 #define NV50_BIND_2D 0 #define NV50_BIND_M2MF 0 #define NV50_BIND_FENCE 1 @@ -92,11 +94,13 @@ struct nv50_context { boolean point_sprite; boolean rt_serialize; boolean flushed; + boolean rasterizer_discard; uint8_t tls_required; uint8_t num_vtxbufs; uint8_t num_vtxelts; uint8_t num_textures[3]; uint8_t num_samplers[3]; + uint8_t prim_size; uint16_t scissor; } state; @@ -126,6 +130,10 @@ struct nv50_context { struct nv50_tsc_entry *samplers[3][PIPE_MAX_SAMPLERS]; unsigned num_samplers[3]; + uint8_t num_so_targets; + uint8_t so_targets_dirty; + struct pipe_stream_output_target *so_target[4]; + struct pipe_framebuffer_state framebuffer; struct pipe_blend_color blend_colour; struct pipe_stencil_ref stencil_ref; @@ -168,6 +176,14 @@ extern struct draw_stage *nv50_draw_render_stage(struct nv50_context *); /* nv50_query.c */ void nv50_init_query_functions(struct nv50_context *); +void nv50_query_pushbuf_submit(struct nouveau_pushbuf *, + struct pipe_query *, unsigned result_offset); +void nv84_query_fifo_wait(struct nouveau_pushbuf *, struct pipe_query *); +void nva0_so_target_save_offset(struct pipe_context *, + struct pipe_stream_output_target *, + unsigned index, boolean seralize); + +#define NVA0_QUERY_STREAM_OUTPUT_BUFFER_OFFSET (PIPE_QUERY_TYPES + 0) /* nv50_shader_state.c */ void nv50_vertprog_validate(struct nv50_context *); @@ -177,6 +193,7 @@ void nv50_fp_linkage_validate(struct nv50_context *); void nv50_gp_linkage_validate(struct nv50_context *); void nv50_constbufs_validate(struct nv50_context *); void nv50_validate_derived_rs(struct nv50_context *); +void nv50_stream_output_validate(struct nv50_context *); /* nv50_state.c */ extern void nv50_init_state_functions(struct nv50_context *); diff --git a/src/gallium/drivers/nv50/nv50_program.c b/src/gallium/drivers/nv50/nv50_program.c index 1b2e2934b79..ca40ac2dd43 100644 --- a/src/gallium/drivers/nv50/nv50_program.c +++ b/src/gallium/drivers/nv50/nv50_program.c @@ -68,6 +68,17 @@ nv50_vertprog_assign_slots(struct nv50_ir_prog_info *info) break; } } + + /* + * Corner case: VP has no inputs, but we will still need to submit data to + * draw it. HW will shout at us and won't draw anything if we don't enable + * any input, so let's just pretend it's the first one. + */ + if (prog->vp.attrs[0] == 0 && + prog->vp.attrs[1] == 0 && + prog->vp.attrs[2] == 0) + prog->vp.attrs[0] |= 0xf; + /* VertexID before InstanceID */ if (info->io.vertexId < info->numSysVals) info->sv[info->io.vertexId].slot[0] = n++; @@ -235,6 +246,59 @@ nv50_program_assign_varying_slots(struct nv50_ir_prog_info *info) } } +static struct nv50_stream_output_state * +nv50_program_create_strmout_state(const struct nv50_ir_prog_info *info, + const struct pipe_stream_output_info *pso) +{ + struct nv50_stream_output_state *so; + unsigned b, i, c; + unsigned base[4]; + + so = MALLOC_STRUCT(nv50_stream_output_state); + if (!so) + return NULL; + memset(so->map, 0xff, sizeof(so->map)); + + for (b = 0; b < 4; ++b) + so->num_attribs[b] = 0; + for (i = 0; i < pso->num_outputs; ++i) { + unsigned end = pso->output[i].dst_offset + pso->output[i].num_components; + b = pso->output[i].output_buffer; + assert(b < 4); + so->num_attribs[b] = MAX2(so->num_attribs[b], end); + } + + so->ctrl = NV50_3D_STRMOUT_BUFFERS_CTRL_INTERLEAVED; + + so->stride[0] = pso->stride[0] * 4; + base[0] = 0; + for (b = 1; b < 4; ++b) { + assert(!so->num_attribs[b] || so->num_attribs[b] == pso->stride[b]); + so->stride[b] = so->num_attribs[b] * 4; + if (so->num_attribs[b]) + so->ctrl = (b + 1) << NV50_3D_STRMOUT_BUFFERS_CTRL_SEPARATE__SHIFT; + base[b] = align(base[b - 1] + so->num_attribs[b - 1], 4); + } + if (so->ctrl & NV50_3D_STRMOUT_BUFFERS_CTRL_INTERLEAVED) { + assert(so->stride[0] < NV50_3D_STRMOUT_BUFFERS_CTRL_STRIDE__MAX); + so->ctrl |= so->stride[0] << NV50_3D_STRMOUT_BUFFERS_CTRL_STRIDE__SHIFT; + } + + so->map_size = base[3] + so->num_attribs[3]; + + for (i = 0; i < pso->num_outputs; ++i) { + const unsigned s = pso->output[i].start_component; + const unsigned p = pso->output[i].dst_offset; + const unsigned r = pso->output[i].register_index; + b = pso->output[i].output_buffer; + + for (c = 0; c < pso->output[i].num_components; ++c) + so->map[base[b] + p + c] = info->out[r].slot[s + c]; + } + + return so; +} + boolean nv50_program_translate(struct nv50_program *prog, uint16_t chipset) { @@ -293,6 +357,10 @@ nv50_program_translate(struct nv50_program *prog, uint16_t chipset) prog->fp.flags[0] |= NV50_3D_FP_CONTROL_USES_KIL; } + if (prog->pipe.stream_output.num_outputs) + prog->so = nv50_program_create_strmout_state(info, + &prog->pipe.stream_output); + out: FREE(info); return !ret; diff --git a/src/gallium/drivers/nv50/nv50_program.h b/src/gallium/drivers/nv50/nv50_program.h index 92361ad9946..f56268b5439 100644 --- a/src/gallium/drivers/nv50/nv50_program.h +++ b/src/gallium/drivers/nv50/nv50_program.h @@ -42,6 +42,15 @@ struct nv50_varying { ubyte si; /* semantic index */ }; +struct nv50_stream_output_state +{ + uint32_t ctrl; + uint16_t stride[4]; + uint8_t num_attribs[4]; + uint8_t map_size; + uint8_t map[128]; +}; + struct nv50_program { struct pipe_shader_state pipe; @@ -88,6 +97,8 @@ struct nv50_program { void *fixups; /* relocation records */ struct nouveau_heap *mem; + + struct nv50_stream_output_state *so; }; boolean nv50_program_translate(struct nv50_program *, uint16_t chipset); diff --git a/src/gallium/drivers/nv50/nv50_push.c b/src/gallium/drivers/nv50/nv50_push.c index 04e32b7e8b9..3abe189e7b5 100644 --- a/src/gallium/drivers/nv50/nv50_push.c +++ b/src/gallium/drivers/nv50/nv50_push.c @@ -210,7 +210,8 @@ nv50_push_vbo(struct nv50_context *nv50, const struct pipe_draw_info *info) { struct push_context ctx; unsigned i, index_size; - unsigned inst = info->instance_count; + unsigned inst_count = info->instance_count; + unsigned vert_count = info->count; boolean apply_bias = info->indexed && info->index_bias; ctx.push = nv50->base.pushbuf; @@ -242,6 +243,17 @@ nv50_push_vbo(struct nv50_context *nv50, const struct pipe_draw_info *info) ctx.primitive_restart = info->primitive_restart; ctx.restart_index = info->restart_index; } else { + if (unlikely(info->count_from_stream_output)) { + struct pipe_context *pipe = &nv50->base.pipe; + struct nv50_so_target *targ; + targ = nv50_so_target(info->count_from_stream_output); + if (!targ->pq) { + NOUVEAU_ERR("draw_stream_output not supported on pre-NVA0 cards\n"); + return; + } + pipe->get_query_result(pipe, targ->pq, TRUE, (void *)&vert_count); + vert_count /= targ->stride; + } ctx.idxbuf = NULL; index_size = 0; ctx.primitive_restart = FALSE; @@ -262,21 +274,21 @@ nv50_push_vbo(struct nv50_context *nv50, const struct pipe_draw_info *info) } nv50->state.prim_restart = info->primitive_restart; - while (inst--) { + while (inst_count--) { BEGIN_NV04(ctx.push, NV50_3D(VERTEX_BEGIN_GL), 1); PUSH_DATA (ctx.push, ctx.prim); switch (index_size) { case 0: - emit_vertices_seq(&ctx, info->start, info->count); + emit_vertices_seq(&ctx, info->start, vert_count); break; case 1: - emit_vertices_i08(&ctx, info->start, info->count); + emit_vertices_i08(&ctx, info->start, vert_count); break; case 2: - emit_vertices_i16(&ctx, info->start, info->count); + emit_vertices_i16(&ctx, info->start, vert_count); break; case 4: - emit_vertices_i32(&ctx, info->start, info->count); + emit_vertices_i32(&ctx, info->start, vert_count); break; default: assert(0); diff --git a/src/gallium/drivers/nv50/nv50_query.c b/src/gallium/drivers/nv50/nv50_query.c index 5275e74964a..8e62c5f11bc 100644 --- a/src/gallium/drivers/nv50/nv50_query.c +++ b/src/gallium/drivers/nv50/nv50_query.c @@ -36,7 +36,8 @@ struct nv50_query { uint32_t *data; - uint32_t type; + uint16_t type; + uint16_t index; uint32_t sequence; struct nouveau_bo *bo; uint32_t base; @@ -170,21 +171,15 @@ nv50_query_begin(struct pipe_context *pipe, struct pipe_query *pq) BEGIN_NV04(push, NV50_3D(SAMPLECNT_ENABLE), 1); PUSH_DATA (push, 1); break; - case PIPE_QUERY_PRIMITIVES_GENERATED: /* store before & after instead ? */ - PUSH_SPACE(push, 2); - BEGIN_NV04(push, NV50_3D(COUNTER_RESET), 1); - PUSH_DATA (push, NV50_3D_COUNTER_RESET_GENERATED_PRIMITIVES); + case PIPE_QUERY_PRIMITIVES_GENERATED: + nv50_query_get(push, q, 0x10, 0x06805002); break; case PIPE_QUERY_PRIMITIVES_EMITTED: - PUSH_SPACE(push, 2); - BEGIN_NV04(push, NV50_3D(COUNTER_RESET), 1); - PUSH_DATA (push, NV50_3D_COUNTER_RESET_TRANSFORM_FEEDBACK); + nv50_query_get(push, q, 0x10, 0x05805002); break; case PIPE_QUERY_SO_STATISTICS: - PUSH_SPACE(push, 3); - BEGIN_NI04(push, NV50_3D(COUNTER_RESET), 2); - PUSH_DATA (push, NV50_3D_COUNTER_RESET_TRANSFORM_FEEDBACK); - PUSH_DATA (push, NV50_3D_COUNTER_RESET_GENERATED_PRIMITIVES); + nv50_query_get(push, q, 0x20, 0x05805002); + nv50_query_get(push, q, 0x30, 0x06805002); break; case PIPE_QUERY_TIMESTAMP_DISJOINT: case PIPE_QUERY_TIME_ELAPSED: @@ -227,6 +222,9 @@ nv50_query_end(struct pipe_context *pipe, struct pipe_query *pq) case PIPE_QUERY_GPU_FINISHED: nv50_query_get(push, q, 0, 0x1000f010); break; + case NVA0_QUERY_STREAM_OUTPUT_BUFFER_OFFSET: + nv50_query_get(push, q, 0, 0x0d005002 | (q->index << 5)); + break; default: assert(0); break; @@ -247,6 +245,7 @@ nv50_query_result(struct pipe_context *pipe, struct pipe_query *pq, struct nv50_context *nv50 = nv50_context(pipe); struct nv50_query *q = nv50_query(pq); uint64_t *res64 = (uint64_t *)result; + uint32_t *res32 = (uint32_t *)result; boolean *res8 = (boolean *)result; uint64_t *data64 = (uint64_t *)q->data; @@ -275,11 +274,11 @@ nv50_query_result(struct pipe_context *pipe, struct pipe_query *pq, break; case PIPE_QUERY_PRIMITIVES_GENERATED: /* u64 count, u64 time */ case PIPE_QUERY_PRIMITIVES_EMITTED: /* u64 count, u64 time */ - res64[0] = data64[0]; + res64[0] = data64[0] - data64[2]; break; case PIPE_QUERY_SO_STATISTICS: - res64[0] = data64[0]; - res64[1] = data64[1]; + res64[0] = data64[0] - data64[4]; + res64[1] = data64[2] - data64[6]; break; case PIPE_QUERY_TIMESTAMP_DISJOINT: /* u32 sequence, u32 0, u64 time */ res64[0] = 1000000000; @@ -288,6 +287,9 @@ nv50_query_result(struct pipe_context *pipe, struct pipe_query *pq, case PIPE_QUERY_TIME_ELAPSED: res64[0] = data64[1] - data64[3]; break; + case NVA0_QUERY_STREAM_OUTPUT_BUFFER_OFFSET: + res32[0] = q->data[1]; + break; default: return FALSE; } @@ -295,6 +297,21 @@ nv50_query_result(struct pipe_context *pipe, struct pipe_query *pq, return TRUE; } +void +nv84_query_fifo_wait(struct nouveau_pushbuf *push, struct pipe_query *pq) +{ + struct nv50_query *q = nv50_query(pq); + unsigned offset = q->offset; + + PUSH_SPACE(push, 5); + PUSH_REFN (push, q->bo, NOUVEAU_BO_GART | NOUVEAU_BO_RD); + BEGIN_NV04(push, SUBC_3D(NV84_SUBCHAN_SEMAPHORE_ADDRESS_HIGH), 4); + PUSH_DATAh(push, q->bo->offset + offset); + PUSH_DATA (push, q->bo->offset + offset); + PUSH_DATA (push, q->sequence); + PUSH_DATA (push, NV84_SUBCHAN_SEMAPHORE_TRIGGER_ACQUIRE_EQUAL); +} + static void nv50_render_condition(struct pipe_context *pipe, struct pipe_query *pq, uint mode) @@ -325,6 +342,38 @@ nv50_render_condition(struct pipe_context *pipe, } void +nv50_query_pushbuf_submit(struct nouveau_pushbuf *push, + struct pipe_query *pq, unsigned result_offset) +{ + struct nv50_query *q = nv50_query(pq); + + /* XXX: does this exist ? */ +#define NV50_IB_ENTRY_1_NO_PREFETCH (0 << (31 - 8)) + + nouveau_pushbuf_space(push, 0, 0, 1); + nouveau_pushbuf_data(push, q->bo, q->offset + result_offset, 4 | + NV50_IB_ENTRY_1_NO_PREFETCH); +} + +void +nva0_so_target_save_offset(struct pipe_context *pipe, + struct pipe_stream_output_target *ptarg, + unsigned index, boolean serialize) +{ + struct nv50_so_target *targ = nv50_so_target(ptarg); + + if (serialize) { + struct nouveau_pushbuf *push = nv50_context(pipe)->base.pushbuf; + PUSH_SPACE(push, 2); + BEGIN_NV04(push, SUBC_3D(NV50_GRAPH_SERIALIZE), 1); + PUSH_DATA (push, 0); + } + + nv50_query(targ->pq)->index = index; + nv50_query_end(pipe, targ->pq); +} + +void nv50_init_query_functions(struct nv50_context *nv50) { struct pipe_context *pipe = &nv50->base.pipe; diff --git a/src/gallium/drivers/nv50/nv50_screen.c b/src/gallium/drivers/nv50/nv50_screen.c index a6dfbedf299..c96e028b2a2 100644 --- a/src/gallium/drivers/nv50/nv50_screen.c +++ b/src/gallium/drivers/nv50/nv50_screen.c @@ -73,6 +73,8 @@ nv50_screen_is_format_supported(struct pipe_screen *pscreen, static int nv50_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param) { + const uint16_t class_3d = nouveau_screen(pscreen)->class_3d; + switch (param) { case PIPE_CAP_MAX_COMBINED_SAMPLERS: return 64; @@ -82,8 +84,8 @@ nv50_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param) return 12; case PIPE_CAP_MAX_TEXTURE_CUBE_LEVELS: return 14; - case PIPE_CAP_MAX_TEXTURE_ARRAY_LAYERS: /* shader support missing */ - return 0; + case PIPE_CAP_MAX_TEXTURE_ARRAY_LAYERS: + return 512; case PIPE_CAP_MIN_TEXEL_OFFSET: return -8; case PIPE_CAP_MAX_TEXEL_OFFSET: @@ -95,7 +97,6 @@ nv50_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param) case PIPE_CAP_ANISOTROPIC_FILTER: case PIPE_CAP_SCALED_RESOLVE: return 1; - case PIPE_CAP_STREAM_OUTPUT_PAUSE_RESUME: case PIPE_CAP_SEAMLESS_CUBE_MAP: return nv50_screen(pscreen)->tesla->oclass >= NVA0_3D_CLASS; case PIPE_CAP_SEAMLESS_CUBE_MAP_PER_TEXTURE: @@ -121,11 +122,12 @@ nv50_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param) case PIPE_CAP_OCCLUSION_QUERY: return 1; case PIPE_CAP_MAX_STREAM_OUTPUT_BUFFERS: - return 0; + return 4; case PIPE_CAP_MAX_STREAM_OUTPUT_INTERLEAVED_COMPONENTS: - return 128; case PIPE_CAP_MAX_STREAM_OUTPUT_SEPARATE_COMPONENTS: - return 32; + return 64; + case PIPE_CAP_STREAM_OUTPUT_PAUSE_RESUME: + return (class_3d >= NVA0_3D_CLASS) ? 1 : 0; case PIPE_CAP_BLEND_EQUATION_SEPARATE: case PIPE_CAP_INDEP_BLEND_ENABLE: return 1; diff --git a/src/gallium/drivers/nv50/nv50_shader_state.c b/src/gallium/drivers/nv50/nv50_shader_state.c index aef3f129c81..d070f07bbbc 100644 --- a/src/gallium/drivers/nv50/nv50_shader_state.c +++ b/src/gallium/drivers/nv50/nv50_shader_state.c @@ -207,6 +207,8 @@ nv50_gmtyprog_validate(struct nv50_context *nv50) PUSH_DATA (push, gp->gp.vert_count); BEGIN_NV04(push, NV50_3D(GP_START_ID), 1); PUSH_DATA (push, gp->code_base); + + nv50->state.prim_size = gp->gp.prim_type; /* enum matches vertex count */ } nv50_program_update_context_state(nv50, gp, 2); @@ -278,6 +280,12 @@ nv50_validate_derived_rs(struct nv50_context *nv50) nv50_sprite_coords_validate(nv50); + if (nv50->state.rasterizer_discard != nv50->rast->pipe.rasterizer_discard) { + nv50->state.rasterizer_discard = nv50->rast->pipe.rasterizer_discard; + BEGIN_NV04(push, NV50_3D(RASTERIZE_ENABLE), 1); + PUSH_DATA (push, !nv50->rast->pipe.rasterizer_discard); + } + if (nv50->dirty & NV50_NEW_FRAGPROG) return; psize = nv50->state.semantic_psize & ~NV50_3D_SEMANTIC_PTSZ_PTSZ_EN__MASK; @@ -343,6 +351,7 @@ nv50_fp_linkage_validate(struct nv50_context *nv50) uint32_t colors = fp->fp.colors; uint32_t lin[4]; uint8_t map[64]; + uint8_t so_map[64]; if (!(nv50->dirty & (NV50_NEW_VERTPROG | NV50_NEW_FRAGPROG | @@ -411,6 +420,30 @@ nv50_fp_linkage_validate(struct nv50_context *nv50) if (nv50->rast->pipe.clamp_vertex_color) colors |= NV50_3D_SEMANTIC_COLOR_CLMP_EN; + if (unlikely(vp->so)) { + /* Slot i in STRMOUT_MAP specifies the offset where slot i in RESULT_MAP + * gets written. + * + * TODO: + * Inverting vp->so->map (output -> offset) would probably speed this up. + */ + memset(so_map, 0, sizeof(so_map)); + for (i = 0; i < vp->so->map_size; ++i) { + if (vp->so->map[i] == 0xff) + continue; + for (c = 0; c < m; ++c) + if (map[c] == vp->so->map[i] && !so_map[c]) + break; + if (c == m) { + c = m; + map[m++] = vp->so->map[i]; + } + so_map[c] = 0x80 | i; + } + for (c = m; c & 3; ++c) + so_map[c] = 0; + } + n = (m + 3) / 4; assert(m <= 64); @@ -451,6 +484,11 @@ nv50_fp_linkage_validate(struct nv50_context *nv50) BEGIN_NV04(push, NV50_3D(GP_ENABLE), 1); PUSH_DATA (push, nv50->gmtyprog ? 1 : 0); + + if (vp->so) { + BEGIN_NV04(push, NV50_3D(STRMOUT_MAP(0)), n); + PUSH_DATAp(push, so_map, n); + } } static int @@ -509,3 +547,75 @@ nv50_gp_linkage_validate(struct nv50_context *nv50) BEGIN_NV04(push, NV50_3D(VP_RESULT_MAP(0)), n); PUSH_DATAp(push, map, n); } + +void +nv50_stream_output_validate(struct nv50_context *nv50) +{ + struct nouveau_pushbuf *push = nv50->base.pushbuf; + struct nv50_stream_output_state *so; + uint32_t ctrl; + unsigned i; + unsigned prims = ~0; + + so = nv50->gmtyprog ? nv50->gmtyprog->so : nv50->vertprog->so; + + if (!so || !nv50->num_so_targets) { + BEGIN_NV04(push, NV50_3D(STRMOUT_ENABLE), 1); + PUSH_DATA (push, 0); + if (nv50->screen->base.class_3d < NVA0_3D_CLASS) { + BEGIN_NV04(push, NV50_3D(STRMOUT_PRIMITIVE_LIMIT), 1); + PUSH_DATA (push, 0); + } + BEGIN_NV04(push, NV50_3D(STRMOUT_PARAMS_LATCH), 1); + PUSH_DATA (push, 1); + return; + } + + ctrl = so->ctrl; + if (nv50->screen->base.class_3d >= NVA0_3D_CLASS) + ctrl |= NVA0_3D_STRMOUT_BUFFERS_CTRL_LIMIT_MODE_OFFSET; + + BEGIN_NV04(push, NV50_3D(STRMOUT_BUFFERS_CTRL), 1); + PUSH_DATA (push, ctrl); + + nouveau_bufctx_reset(nv50->bufctx_3d, NV50_BIND_SO); + + for (i = 0; i < nv50->num_so_targets; ++i) { + struct nv50_so_target *targ = nv50_so_target(nv50->so_target[i]); + struct nv04_resource *buf = nv04_resource(targ->pipe.buffer); + + const unsigned n = nv50->screen->base.class_3d >= NVA0_3D_CLASS ? 4 : 3; + + if (n == 4 && !targ->clean) + nv84_query_fifo_wait(push, targ->pq); + BEGIN_NV04(push, NV50_3D(STRMOUT_ADDRESS_HIGH(i)), n); + PUSH_DATAh(push, buf->address + targ->pipe.buffer_offset); + PUSH_DATA (push, buf->address + targ->pipe.buffer_offset); + PUSH_DATA (push, so->num_attribs[i]); + if (n == 4) { + PUSH_DATA(push, targ->pipe.buffer_size); + + BEGIN_NV04(push, NVA0_3D(STRMOUT_OFFSET(i)), 1); + if (!targ->clean) { + assert(targ->pq); + nv50_query_pushbuf_submit(push, targ->pq, 0x4); + } else { + PUSH_DATA(push, 0); + targ->clean = FALSE; + } + } else { + const unsigned limit = targ->pipe.buffer_size / + (so->stride[i] * nv50->state.prim_size); + prims = MIN2(prims, limit); + } + BCTX_REFN(nv50->bufctx_3d, SO, buf, WR); + } + if (prims != ~0) { + BEGIN_NV04(push, NV50_3D(STRMOUT_PRIMITIVE_LIMIT), 1); + PUSH_DATA (push, prims); + } + BEGIN_NV04(push, NV50_3D(STRMOUT_PARAMS_LATCH), 1); + PUSH_DATA (push, 1); + BEGIN_NV04(push, NV50_3D(STRMOUT_ENABLE), 1); + PUSH_DATA (push, 1); +} diff --git a/src/gallium/drivers/nv50/nv50_state.c b/src/gallium/drivers/nv50/nv50_state.c index a17540a1492..7f840e2b42e 100644 --- a/src/gallium/drivers/nv50/nv50_state.c +++ b/src/gallium/drivers/nv50/nv50_state.c @@ -680,6 +680,9 @@ nv50_sp_state_create(struct pipe_context *pipe, prog->type = type; prog->pipe.tokens = tgsi_dup_tokens(cso->tokens); + if (cso->stream_output.num_outputs) + prog->pipe.stream_output = cso->stream_output; + return (void *)prog; } @@ -920,6 +923,90 @@ nv50_vertex_state_bind(struct pipe_context *pipe, void *hwcso) nv50->dirty |= NV50_NEW_VERTEX; } +static struct pipe_stream_output_target * +nv50_so_target_create(struct pipe_context *pipe, + struct pipe_resource *res, + unsigned offset, unsigned size) +{ + struct nv50_so_target *targ = MALLOC_STRUCT(nv50_so_target); + if (!targ) + return NULL; + + if (nouveau_context(pipe)->screen->class_3d >= NVA0_3D_CLASS) { + targ->pq = pipe->create_query(pipe, + NVA0_QUERY_STREAM_OUTPUT_BUFFER_OFFSET); + if (!targ->pq) { + FREE(targ); + return NULL; + } + } else { + targ->pq = NULL; + } + targ->clean = TRUE; + + targ->pipe.buffer_size = size; + targ->pipe.buffer_offset = offset; + targ->pipe.context = pipe; + targ->pipe.buffer = NULL; + pipe_resource_reference(&targ->pipe.buffer, res); + pipe_reference_init(&targ->pipe.reference, 1); + + return &targ->pipe; +} + +static void +nv50_so_target_destroy(struct pipe_context *pipe, + struct pipe_stream_output_target *ptarg) +{ + struct nv50_so_target *targ = nv50_so_target(ptarg); + if (targ->pq) + pipe->destroy_query(pipe, targ->pq); + FREE(targ); +} + +static void +nv50_set_stream_output_targets(struct pipe_context *pipe, + unsigned num_targets, + struct pipe_stream_output_target **targets, + unsigned append_mask) +{ + struct nv50_context *nv50 = nv50_context(pipe); + unsigned i; + boolean serialize = TRUE; + const boolean can_resume = nv50->screen->base.class_3d >= NVA0_3D_CLASS; + + assert(num_targets <= 4); + + for (i = 0; i < num_targets; ++i) { + const boolean changed = nv50->so_target[i] != targets[i]; + if (!changed && (append_mask & (1 << i))) + continue; + nv50->so_targets_dirty |= 1 << i; + + if (can_resume && changed && nv50->so_target[i]) { + nva0_so_target_save_offset(pipe, nv50->so_target[i], i, serialize); + serialize = FALSE; + } + + if (targets[i] && !(append_mask & (1 << i))) + nv50_so_target(targets[i])->clean = TRUE; + + pipe_so_target_reference(&nv50->so_target[i], targets[i]); + } + for (; i < nv50->num_so_targets; ++i) { + if (can_resume && nv50->so_target[i]) { + nva0_so_target_save_offset(pipe, nv50->so_target[i], i, serialize); + serialize = FALSE; + } + pipe_so_target_reference(&nv50->so_target[i], NULL); + nv50->so_targets_dirty |= 1 << i; + } + nv50->num_so_targets = num_targets; + + if (nv50->so_targets_dirty) + nv50->dirty |= NV50_NEW_STRMOUT; +} + void nv50_init_state_functions(struct nv50_context *nv50) { @@ -975,5 +1062,8 @@ nv50_init_state_functions(struct nv50_context *nv50) pipe->set_vertex_buffers = nv50_set_vertex_buffers; pipe->set_index_buffer = nv50_set_index_buffer; -} + pipe->create_stream_output_target = nv50_so_target_create; + pipe->stream_output_target_destroy = nv50_so_target_destroy; + pipe->set_stream_output_targets = nv50_set_stream_output_targets; +} diff --git a/src/gallium/drivers/nv50/nv50_state_validate.c b/src/gallium/drivers/nv50/nv50_state_validate.c index c19acf6c426..a95e96d3c51 100644 --- a/src/gallium/drivers/nv50/nv50_state_validate.c +++ b/src/gallium/drivers/nv50/nv50_state_validate.c @@ -360,6 +360,8 @@ static struct state_validate { { nv50_constbufs_validate, NV50_NEW_CONSTBUF }, { nv50_validate_textures, NV50_NEW_TEXTURES }, { nv50_validate_samplers, NV50_NEW_SAMPLERS }, + { nv50_stream_output_validate, NV50_NEW_STRMOUT | + NV50_NEW_VERTPROG | NV50_NEW_GMTYPROG }, { nv50_vertex_arrays_validate, NV50_NEW_VERTEX | NV50_NEW_ARRAYS } }; #define validate_list_len (sizeof(validate_list) / sizeof(validate_list[0])) diff --git a/src/gallium/drivers/nv50/nv50_stateobj.h b/src/gallium/drivers/nv50/nv50_stateobj.h index 188406da600..8a9260c937e 100644 --- a/src/gallium/drivers/nv50/nv50_stateobj.h +++ b/src/gallium/drivers/nv50/nv50_stateobj.h @@ -51,4 +51,17 @@ struct nv50_vertex_stateobj { struct nv50_vertex_element element[0]; }; +struct nv50_so_target { + struct pipe_stream_output_target pipe; + struct pipe_query *pq; + unsigned stride; + boolean clean; +}; + +static INLINE struct nv50_so_target * +nv50_so_target(struct pipe_stream_output_target *ptarg) +{ + return (struct nv50_so_target *)ptarg; +} + #endif diff --git a/src/gallium/drivers/nv50/nv50_surface.c b/src/gallium/drivers/nv50/nv50_surface.c index b38e49ffcc1..15c88d5316d 100644 --- a/src/gallium/drivers/nv50/nv50_surface.c +++ b/src/gallium/drivers/nv50/nv50_surface.c @@ -711,7 +711,7 @@ nv50_blit_set_src(struct nv50_context *nv50, templ.swizzle_a = PIPE_SWIZZLE_ALPHA; nv50->textures[2][0] = nv50_create_sampler_view(pipe, res, &templ); - nv50->textures[2][0] = NULL; + nv50->textures[2][1] = NULL; nv50_blit_fixup_tic_entry(nv50->textures[2][0]); diff --git a/src/gallium/drivers/nv50/nv50_vbo.c b/src/gallium/drivers/nv50/nv50_vbo.c index bc01e69decf..323677eaf80 100644 --- a/src/gallium/drivers/nv50/nv50_vbo.c +++ b/src/gallium/drivers/nv50/nv50_vbo.c @@ -405,6 +405,25 @@ nv50_prim_gl(unsigned prim) } } +/* For pre-nva0 transform feedback. */ +static const uint8_t nv50_pipe_prim_to_prim_size[PIPE_PRIM_MAX + 1] = +{ + [PIPE_PRIM_POINTS] = 1, + [PIPE_PRIM_LINES] = 2, + [PIPE_PRIM_LINE_LOOP] = 2, + [PIPE_PRIM_LINE_STRIP] = 2, + [PIPE_PRIM_TRIANGLES] = 3, + [PIPE_PRIM_TRIANGLE_STRIP] = 3, + [PIPE_PRIM_TRIANGLE_FAN] = 3, + [PIPE_PRIM_QUADS] = 3, + [PIPE_PRIM_QUAD_STRIP] = 3, + [PIPE_PRIM_POLYGON] = 3, + [PIPE_PRIM_LINES_ADJACENCY] = 2, + [PIPE_PRIM_LINE_STRIP_ADJACENCY] = 2, + [PIPE_PRIM_TRIANGLES_ADJACENCY] = 3, + [PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY] = 3 +}; + static void nv50_draw_arrays(struct nv50_context *nv50, unsigned mode, unsigned start, unsigned count, @@ -624,6 +643,51 @@ nv50_draw_elements(struct nv50_context *nv50, boolean shorten, } static void +nva0_draw_stream_output(struct nv50_context *nv50, + const struct pipe_draw_info *info) +{ + struct nouveau_pushbuf *push = nv50->base.pushbuf; + struct nv50_so_target *so = nv50_so_target(info->count_from_stream_output); + struct nv04_resource *res = nv04_resource(so->pipe.buffer); + unsigned num_instances = info->instance_count; + unsigned mode = nv50_prim_gl(info->mode); + + if (unlikely(nv50->screen->base.class_3d < NVA0_3D_CLASS)) { + /* A proper implementation without waiting doesn't seem possible, + * so don't bother. + */ + NOUVEAU_ERR("draw_stream_output not supported on pre-NVA0 cards\n"); + return; + } + + if (res->status & NOUVEAU_BUFFER_STATUS_GPU_WRITING) { + res->status &= ~NOUVEAU_BUFFER_STATUS_GPU_WRITING; + PUSH_SPACE(push, 4); + BEGIN_NV04(push, SUBC_3D(NV50_GRAPH_SERIALIZE), 1); + PUSH_DATA (push, 0); + BEGIN_NV04(push, NV50_3D(VERTEX_ARRAY_FLUSH), 1); + PUSH_DATA (push, 0); + } + + assert(num_instances); + do { + PUSH_SPACE(push, 8); + BEGIN_NV04(push, NV50_3D(VERTEX_BEGIN_GL), 1); + PUSH_DATA (push, mode); + BEGIN_NV04(push, NVA0_3D(DRAW_TFB_BASE), 1); + PUSH_DATA (push, 0); + BEGIN_NV04(push, NVA0_3D(DRAW_TFB_STRIDE), 1); + PUSH_DATA (push, 0); + BEGIN_NV04(push, NVA0_3D(DRAW_TFB_BYTES), 1); + nv50_query_pushbuf_submit(push, so->pq, 0x4); + BEGIN_NV04(push, NV50_3D(VERTEX_END_GL), 1); + PUSH_DATA (push, 0); + + mode |= NV50_3D_VERTEX_BEGIN_GL_INSTANCE_NEXT; + } while (--num_instances); +} + +static void nv50_draw_vbo_kick_notify(struct nouveau_pushbuf *chan) { struct nv50_screen *screen = chan->user_priv; @@ -655,6 +719,9 @@ nv50_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info) if (nv50->vbo_user && !(nv50->dirty & (NV50_NEW_VERTEX | NV50_NEW_ARRAYS))) nv50_update_user_vbufs(nv50); + if (unlikely(nv50->num_so_targets && !nv50->gmtyprog)) + nv50->state.prim_size = nv50_pipe_prim_to_prim_size[info->mode]; + nv50_state_validate(nv50, ~0, 8); /* 8 as minimum, we use flush_notify */ push->kick_notify = nv50_draw_vbo_kick_notify; @@ -679,11 +746,7 @@ nv50_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info) nv50->base.vbo_dirty = FALSE; } - if (!info->indexed) { - nv50_draw_arrays(nv50, - info->mode, info->start, info->count, - info->instance_count); - } else { + if (info->indexed) { boolean shorten = info->max_index <= 65535; assert(nv50->idxbuf.buffer); @@ -713,6 +776,13 @@ nv50_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info) nv50_draw_elements(nv50, shorten, info->mode, info->start, info->count, info->instance_count, info->index_bias); + } else + if (unlikely(info->count_from_stream_output)) { + nva0_draw_stream_output(nv50, info); + } else { + nv50_draw_arrays(nv50, + info->mode, info->start, info->count, + info->instance_count); } push->kick_notify = nv50_default_kick_notify; diff --git a/src/gallium/drivers/nv50/nv50_winsys.h b/src/gallium/drivers/nv50/nv50_winsys.h index b36898dabe6..145ee70cb9f 100644 --- a/src/gallium/drivers/nv50/nv50_winsys.h +++ b/src/gallium/drivers/nv50/nv50_winsys.h @@ -49,6 +49,7 @@ PUSH_REFN(struct nouveau_pushbuf *push, struct nouveau_bo *bo, uint32_t flags) #define SUBC_3D(m) 3, (m) #define NV50_3D(n) SUBC_3D(NV50_3D_##n) +#define NVA0_3D(n) SUBC_3D(NVA0_3D_##n) #define SUBC_2D(m) 4, (m) #define NV50_2D(n) SUBC_2D(NV50_2D_##n) diff --git a/src/gallium/drivers/nvc0/codegen/nv50_ir_emit_nvc0.cpp b/src/gallium/drivers/nvc0/codegen/nv50_ir_emit_nvc0.cpp index 2ca4979dc74..fbd1aa5dfc9 100644 --- a/src/gallium/drivers/nvc0/codegen/nv50_ir_emit_nvc0.cpp +++ b/src/gallium/drivers/nvc0/codegen/nv50_ir_emit_nvc0.cpp @@ -1020,7 +1020,7 @@ CodeEmitterNVC0::emitTEX(const TexInstruction *i) code[1] |= 0x02000000; } - if (i->tex.derivAll) + if (i->op != OP_TXD && i->tex.derivAll) code[1] |= 1 << 13; defId(i->def(0), 14); diff --git a/src/gallium/drivers/nvc0/codegen/nv50_ir_lowering_nvc0.cpp b/src/gallium/drivers/nvc0/codegen/nv50_ir_lowering_nvc0.cpp index 02ae9fd5d0e..900e998df8d 100644 --- a/src/gallium/drivers/nvc0/codegen/nv50_ir_lowering_nvc0.cpp +++ b/src/gallium/drivers/nvc0/codegen/nv50_ir_lowering_nvc0.cpp @@ -749,21 +749,22 @@ bool NVC0LoweringPass::handleTXD(TexInstruction *txd) { int dim = txd->tex.target.getDim(); - int arg = txd->tex.target.getDim() + txd->tex.target.isArray(); + int arg = txd->tex.target.getArgCount(); handleTEX(txd); - while (txd->src(arg).exists()) + while (txd->srcExists(arg)) ++arg; txd->tex.derivAll = true; - if (dim > 2 || txd->tex.target.isShadow()) + if (dim > 2 || + txd->tex.target.isCube() || + arg > 4 || + txd->tex.target.isShadow()) return handleManualTXD(txd); - assert(arg <= 4); // at most s/t/array, x, y, offset - for (int c = 0; c < dim; ++c) { - txd->src(arg + c * 2 + 0).set(txd->dPdx[c]); - txd->src(arg + c * 2 + 1).set(txd->dPdy[c]); + txd->setSrc(arg + c * 2 + 0, txd->dPdx[c]); + txd->setSrc(arg + c * 2 + 1, txd->dPdy[c]); txd->dPdx[c].set(NULL); txd->dPdy[c].set(NULL); } diff --git a/src/gallium/drivers/nvc0/codegen/nv50_ir_target_nvc0.cpp b/src/gallium/drivers/nvc0/codegen/nv50_ir_target_nvc0.cpp index 10c2d09d657..e4b9dc18311 100644 --- a/src/gallium/drivers/nvc0/codegen/nv50_ir_target_nvc0.cpp +++ b/src/gallium/drivers/nvc0/codegen/nv50_ir_target_nvc0.cpp @@ -223,6 +223,9 @@ static const struct opProperties _initProps[] = { OP_ABS, 0x0, 0x0, 0x0, 0x0, 0x1, 0x0 }, { OP_NEG, 0x0, 0x1, 0x0, 0x0, 0x1, 0x0 }, { OP_CVT, 0x1, 0x1, 0x0, 0x8, 0x1, 0x0 }, + { OP_CEIL, 0x1, 0x1, 0x0, 0x8, 0x1, 0x0 }, + { OP_FLOOR, 0x1, 0x1, 0x0, 0x8, 0x1, 0x0 }, + { OP_TRUNC, 0x1, 0x1, 0x0, 0x8, 0x1, 0x0 }, { OP_AND, 0x0, 0x0, 0x3, 0x0, 0x2, 0x2 | 0x8 }, { OP_OR, 0x0, 0x0, 0x3, 0x0, 0x2, 0x2 | 0x8 }, { OP_XOR, 0x0, 0x0, 0x3, 0x0, 0x2, 0x2 | 0x8 }, diff --git a/src/gallium/drivers/r300/r300_vs_draw.c b/src/gallium/drivers/r300/r300_vs_draw.c index 69d67585d8b..b9e73dd514b 100644 --- a/src/gallium/drivers/r300/r300_vs_draw.c +++ b/src/gallium/drivers/r300/r300_vs_draw.c @@ -94,11 +94,12 @@ static void emit_output(struct tgsi_transform_context *ctx, decl = tgsi_default_full_declaration(); decl.Declaration.File = TGSI_FILE_OUTPUT; - decl.Declaration.Interpolate = interp; + decl.Declaration.Interpolate = 1; decl.Declaration.Semantic = TRUE; decl.Semantic.Name = name; decl.Semantic.Index = index; decl.Range.First = decl.Range.Last = reg; + decl.Interp.Interpolate = interp; ctx->emit_declaration(ctx, &decl); ++vsctx->num_outputs; } diff --git a/src/gallium/drivers/r600/Makefile.am b/src/gallium/drivers/r600/Makefile.am index 3089a829e53..77d2674d262 100644 --- a/src/gallium/drivers/r600/Makefile.am +++ b/src/gallium/drivers/r600/Makefile.am @@ -29,7 +29,7 @@ libr600_a_SOURCES += \ $(LLVM_C_SOURCES) libr600_a_LIBADD = \ - $(top_srcdir)/src/gallium/drivers/radeon/libradeon.a + $(top_builddir)/src/gallium/drivers/radeon/libradeon.a AM_CFLAGS += \ $(LLVM_CFLAGS) \ diff --git a/src/gallium/drivers/r600/eg_asm.c b/src/gallium/drivers/r600/eg_asm.c index b6d03ef37de..d2c1679796a 100644 --- a/src/gallium/drivers/r600/eg_asm.c +++ b/src/gallium/drivers/r600/eg_asm.c @@ -133,6 +133,10 @@ int eg_bytecode_cf_build(struct r600_bytecode *bc, struct r600_bytecode_cf *cf) S_SQ_CF_WORD1_COND(cf->cond) | S_SQ_CF_WORD1_POP_COUNT(cf->pop_count); break; + case CF_NATIVE: + bc->bytecode[id++] = cf->isa[0]; + bc->bytecode[id++] = cf->isa[1]; + break; default: R600_ERR("unsupported CF instruction (0x%X)\n", cf->inst); return -EINVAL; diff --git a/src/gallium/drivers/r600/evergreen_state.c b/src/gallium/drivers/r600/evergreen_state.c index 517121dc288..81aedb5c0ac 100644 --- a/src/gallium/drivers/r600/evergreen_state.c +++ b/src/gallium/drivers/r600/evergreen_state.c @@ -796,11 +796,11 @@ static void *evergreen_create_dsa_state(struct pipe_context *ctx, alpha_test_control |= S_028410_ALPHA_TEST_ENABLE(1); alpha_ref = fui(state->alpha.ref_value); } + dsa->sx_alpha_test_control = alpha_test_control & 0xff; dsa->alpha_ref = alpha_ref; /* misc */ db_render_control = 0; - r600_pipe_state_add_reg(rstate, R_028410_SX_ALPHA_TEST_CONTROL, alpha_test_control); r600_pipe_state_add_reg(rstate, R_028800_DB_DEPTH_CONTROL, db_depth_control); r600_pipe_state_add_reg(rstate, R_028000_DB_RENDER_CONTROL, db_render_control); return rstate; @@ -1428,6 +1428,11 @@ static void evergreen_cb(struct r600_context *rctx, struct r600_pipe_state *rsta blend_bypass = 1; } + if (ntype == V_028C70_NUMBER_UINT || ntype == V_028C70_NUMBER_SINT) + rctx->sx_alpha_test_control |= S_028410_ALPHA_TEST_BYPASS(1); + else + rctx->sx_alpha_test_control &= C_028410_ALPHA_TEST_BYPASS; + color_info |= S_028C70_FORMAT(format) | S_028C70_COMP_SWAP(swap) | S_028C70_BLEND_CLAMP(blend_clamp) | diff --git a/src/gallium/drivers/r600/evergreend.h b/src/gallium/drivers/r600/evergreend.h index 4009e91d4fc..105d80f061d 100644 --- a/src/gallium/drivers/r600/evergreend.h +++ b/src/gallium/drivers/r600/evergreend.h @@ -32,20 +32,16 @@ #define EVERGREEN_CONTEXT_REG_OFFSET 0X00028000 #define EVERGREEN_CONTEXT_REG_END 0X00029000 #define EVERGREEN_RESOURCE_OFFSET 0x00030000 -#define EVERGREEN_RESOURCE_END 0x00034000 -#define CAYMAN_RESOURCE_END 0x00038000 +#define EVERGREEN_RESOURCE_END 0x00038000 #define EVERGREEN_LOOP_CONST_OFFSET 0x0003A200 -#define EVERGREEN_LOOP_CONST_END 0x0003A26C +#define EVERGREEN_LOOP_CONST_END 0x0003A500 #define EVERGREEN_BOOL_CONST_OFFSET 0x0003A500 -#define EVERGREEN_BOOL_CONST_END 0x0003A506 -#define CAYMAN_BOOL_CONST_END 0x0003A518 +#define EVERGREEN_BOOL_CONST_END 0x0003A518 #define EVERGREEN_SAMPLER_OFFSET 0X0003C000 -#define EVERGREEN_SAMPLER_END 0X0003CFF0 -#define CAYMAN_SAMPLER_END 0X0003C600 +#define EVERGREEN_SAMPLER_END 0X0003C600 #define EVERGREEN_CTL_CONST_OFFSET 0x0003CFF0 -#define EVERGREEN_CTL_CONST_END 0x0003E200 -#define CAYMAN_CTL_CONST_END 0x0003FF0C +#define EVERGREEN_CTL_CONST_END 0x0003FF0C #define EVENT_TYPE_PS_PARTIAL_FLUSH 0x10 #define EVENT_TYPE_ZPASS_DONE 0x15 diff --git a/src/gallium/drivers/r600/r600_asm.c b/src/gallium/drivers/r600/r600_asm.c index 651933bf37c..5a10bd90776 100644 --- a/src/gallium/drivers/r600/r600_asm.c +++ b/src/gallium/drivers/r600/r600_asm.c @@ -94,6 +94,7 @@ static inline unsigned int r600_bytecode_get_num_operands(struct r600_bytecode * case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_GPR_INT: case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_INT: case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT: + case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_CEIL: case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR: case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_TRUNC: case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE: @@ -153,6 +154,7 @@ static inline unsigned int r600_bytecode_get_num_operands(struct r600_bytecode * case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE_INT: case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE_UINT: case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETE: + case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETE_INT: case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGT: case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGE: case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETNE: @@ -171,6 +173,7 @@ static inline unsigned int r600_bytecode_get_num_operands(struct r600_bytecode * case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV: case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_INT: case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT: + case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_CEIL: case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR: case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_TRUNC: case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE: @@ -1927,6 +1930,7 @@ int r600_bytecode_build(struct r600_bytecode *bc) case EG_V_SQ_CF_WORD1_SQ_CF_INST_CALL_FS: case EG_V_SQ_CF_WORD1_SQ_CF_INST_RETURN: case CM_V_SQ_CF_WORD1_SQ_CF_INST_END: + case CF_NATIVE: break; default: R600_ERR("unsupported CF instruction (0x%X)\n", cf->inst); @@ -2025,13 +2029,12 @@ int r600_bytecode_build(struct r600_bytecode *bc) } break; case EG_V_SQ_CF_WORD1_SQ_CF_INST_TEX: - if (bc->chip_class == CAYMAN) { - LIST_FOR_EACH_ENTRY(vtx, &cf->vtx, list) { - r = r600_bytecode_vtx_build(bc, vtx, addr); - if (r) - return r; - addr += 4; - } + LIST_FOR_EACH_ENTRY(vtx, &cf->vtx, list) { + assert(bc->chip_class >= EVERGREEN); + r = r600_bytecode_vtx_build(bc, vtx, addr); + if (r) + return r; + addr += 4; } LIST_FOR_EACH_ENTRY(tex, &cf->tex, list) { r = r600_bytecode_tex_build(bc, tex, addr); @@ -2069,6 +2072,8 @@ int r600_bytecode_build(struct r600_bytecode *bc) case EG_V_SQ_CF_WORD1_SQ_CF_INST_RETURN: case CM_V_SQ_CF_WORD1_SQ_CF_INST_END: break; + case CF_NATIVE: + break; default: R600_ERR("unsupported CF instruction (0x%X)\n", cf->inst); return -EINVAL; @@ -2341,6 +2346,10 @@ void r600_bytecode_dump(struct r600_bytecode *bc) fprintf(stderr, "COND:%X ", cf->cond); fprintf(stderr, "POP_COUNT:%X\n", cf->pop_count); break; + case CF_NATIVE: + fprintf(stderr, "%04d %08X CF NATIVE\n", id, bc->bytecode[id]); + fprintf(stderr, "%04d %08X CF NATIVE\n", id + 1, bc->bytecode[id + 1]); + break; default: R600_ERR("Unknown instruction %0x\n", cf->inst); } @@ -2477,7 +2486,8 @@ void r600_bytecode_dump(struct r600_bytecode *bc) if (alu->last) { for (i = 0; i < nliteral; i++, id++) { float *f = (float*)(bc->bytecode + id); - fprintf(stderr, "%04d %08X\t%f\n", id, bc->bytecode[id], *f); + fprintf(stderr, "%04d %08X\t%f (%d)\n", id, bc->bytecode[id], *f, + *(bc->bytecode + id)); } id += nliteral & 1; nliteral = 0; diff --git a/src/gallium/drivers/r600/r600_asm.h b/src/gallium/drivers/r600/r600_asm.h index 5790ead991f..a8a157b79e4 100644 --- a/src/gallium/drivers/r600/r600_asm.h +++ b/src/gallium/drivers/r600/r600_asm.h @@ -135,6 +135,14 @@ struct r600_bytecode_kcache { unsigned addr; }; +/* A value of CF_NATIVE in r600_bytecode_cf::inst means that this instruction + * has already been encoded, and the encoding has been stored in + * r600_bytecode::isa. This is used by the LLVM backend to emit CF instructions + * e.g. RAT_WRITE_* that can't be properly represented by struct + * r600_bytecode_cf. + */ +#define CF_NATIVE ~0 + struct r600_bytecode_cf { struct list_head list; @@ -157,6 +165,7 @@ struct r600_bytecode_cf { struct r600_bytecode_alu *curr_bs_head; struct r600_bytecode_alu *prev_bs_head; struct r600_bytecode_alu *prev2_bs_head; + unsigned isa[2]; }; #define FC_NONE 0 diff --git a/src/gallium/drivers/r600/r600_llvm.c b/src/gallium/drivers/r600/r600_llvm.c index d467baf60fb..f916604db7b 100644 --- a/src/gallium/drivers/r600/r600_llvm.c +++ b/src/gallium/drivers/r600/r600_llvm.c @@ -21,10 +21,44 @@ static LLVMValueRef llvm_fetch_const( enum tgsi_opcode_type type, unsigned swizzle) { - return lp_build_intrinsic_unary(bld_base->base.gallivm->builder, + LLVMValueRef cval = lp_build_intrinsic_unary(bld_base->base.gallivm->builder, "llvm.AMDGPU.load.const", bld_base->base.elem_type, lp_build_const_int32(bld_base->base.gallivm, radeon_llvm_reg_index_soa(reg->Register.Index, swizzle))); + + return bitcast(bld_base, type, cval); +} + +static void llvm_load_system_value( + struct radeon_llvm_context * ctx, + unsigned index, + const struct tgsi_full_declaration *decl) +{ + unsigned chan; + + switch (decl->Semantic.Name) { + case TGSI_SEMANTIC_INSTANCEID: chan = 3; break; + case TGSI_SEMANTIC_VERTEXID: chan = 0; break; + default: assert(!"unknown system value"); + } + + LLVMValueRef reg = lp_build_const_int32( + ctx->soa.bld_base.base.gallivm, chan); + ctx->system_values[index] = lp_build_intrinsic_unary( + ctx->soa.bld_base.base.gallivm->builder, + "llvm.R600.load.input", + ctx->soa.bld_base.base.elem_type, reg); +} + +static LLVMValueRef llvm_fetch_system_value( + struct lp_build_tgsi_context * bld_base, + const struct tgsi_full_src_register *reg, + enum tgsi_opcode_type type, + unsigned swizzle) +{ + struct radeon_llvm_context * ctx = radeon_llvm_context(bld_base); + LLVMValueRef cval = ctx->system_values[reg->Register.Index]; + return bitcast(bld_base, type, cval); } static void llvm_load_input( @@ -59,17 +93,13 @@ static void llvm_emit_prologue(struct lp_build_tgsi_context * bld_base) for (i = 0; i < ctx->reserved_reg_count; i++) { unsigned chan; for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { - LLVMValueRef reg; LLVMValueRef reg_index = lp_build_const_int32( base->gallivm, radeon_llvm_reg_index_soa(i, chan)); - reg = lp_build_intrinsic_unary(base->gallivm->builder, - "llvm.AMDGPU.reserve.reg", - base->elem_type, reg_index); lp_build_intrinsic_unary(base->gallivm->builder, - "llvm.AMDGPU.export.reg", + "llvm.AMDGPU.reserve.reg", LLVMVoidTypeInContext(base->gallivm->context), - reg); + reg_index); } } } @@ -85,7 +115,6 @@ static void llvm_emit_epilogue(struct lp_build_tgsi_context * bld_base) unsigned chan; for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { LLVMValueRef output; - LLVMValueRef store_output; unsigned adjusted_reg_idx = i + ctx->reserved_reg_count; LLVMValueRef reg_index = lp_build_const_int32( @@ -95,16 +124,11 @@ static void llvm_emit_epilogue(struct lp_build_tgsi_context * bld_base) output = LLVMBuildLoad(base->gallivm->builder, ctx->soa.outputs[i][chan], ""); - store_output = lp_build_intrinsic_binary( + lp_build_intrinsic_binary( base->gallivm->builder, "llvm.AMDGPU.store.output", - base->elem_type, - output, reg_index); - - lp_build_intrinsic_unary(base->gallivm->builder, - "llvm.AMDGPU.export.reg", LLVMVoidTypeInContext(base->gallivm->context), - store_output); + output, reg_index); } } } @@ -169,28 +193,7 @@ static struct lp_build_tgsi_action dot_action = { .intr_name = "llvm.AMDGPU.dp4" }; -static void txp_fetch_args( - struct lp_build_tgsi_context * bld_base, - struct lp_build_emit_data * emit_data) -{ - LLVMValueRef src_w; - unsigned chan; - LLVMValueRef coords[4]; - - emit_data->dst_type = LLVMVectorType(bld_base->base.elem_type, 4); - src_w = lp_build_emit_fetch(bld_base, emit_data->inst, 0, TGSI_CHAN_W); - for (chan = 0; chan < 3; chan++ ) { - LLVMValueRef arg = lp_build_emit_fetch(bld_base, - emit_data->inst, 0, chan); - coords[chan] = lp_build_emit_llvm_binary(bld_base, - TGSI_OPCODE_DIV, arg, src_w); - } - coords[3] = bld_base->base.one; - emit_data->args[0] = lp_build_gather_values(bld_base->base.gallivm, - coords, 4); - emit_data->arg_count = 1; -} LLVMModuleRef r600_tgsi_llvm( struct radeon_llvm_context * ctx, @@ -204,20 +207,25 @@ LLVMModuleRef r600_tgsi_llvm( bld_base->info = &shader_info; bld_base->userdata = ctx; bld_base->emit_fetch_funcs[TGSI_FILE_CONSTANT] = llvm_fetch_const; + bld_base->emit_fetch_funcs[TGSI_FILE_SYSTEM_VALUE] = llvm_fetch_system_value; bld_base->emit_prologue = llvm_emit_prologue; bld_base->emit_epilogue = llvm_emit_epilogue; ctx->userdata = ctx; ctx->load_input = llvm_load_input; + ctx->load_system_value = llvm_load_system_value; bld_base->op_actions[TGSI_OPCODE_DP2] = dot_action; bld_base->op_actions[TGSI_OPCODE_DP3] = dot_action; bld_base->op_actions[TGSI_OPCODE_DP4] = dot_action; bld_base->op_actions[TGSI_OPCODE_DPH] = dot_action; + bld_base->op_actions[TGSI_OPCODE_DDX].emit = llvm_emit_tex; + bld_base->op_actions[TGSI_OPCODE_DDY].emit = llvm_emit_tex; bld_base->op_actions[TGSI_OPCODE_TEX].emit = llvm_emit_tex; bld_base->op_actions[TGSI_OPCODE_TXB].emit = llvm_emit_tex; bld_base->op_actions[TGSI_OPCODE_TXD].emit = llvm_emit_tex; bld_base->op_actions[TGSI_OPCODE_TXL].emit = llvm_emit_tex; - bld_base->op_actions[TGSI_OPCODE_TXP].fetch_args = txp_fetch_args; + bld_base->op_actions[TGSI_OPCODE_TXF].emit = llvm_emit_tex; + bld_base->op_actions[TGSI_OPCODE_TXQ].emit = llvm_emit_tex; bld_base->op_actions[TGSI_OPCODE_TXP].emit = llvm_emit_tex; lp_build_tgsi_llvm(bld_base, tokens); diff --git a/src/gallium/drivers/r600/r600_pipe.h b/src/gallium/drivers/r600/r600_pipe.h index 63fc27564d7..db455f021ad 100644 --- a/src/gallium/drivers/r600/r600_pipe.h +++ b/src/gallium/drivers/r600/r600_pipe.h @@ -161,6 +161,7 @@ struct r600_pipe_dsa { ubyte valuemask[2]; ubyte writemask[2]; bool is_flush; + unsigned sx_alpha_test_control; }; struct r600_vertex_element @@ -250,6 +251,7 @@ struct r600_context { struct pipe_framebuffer_state framebuffer; unsigned cb_target_mask; unsigned fb_cb_shader_mask; + unsigned sx_alpha_test_control; unsigned cb_shader_mask; unsigned cb_color_control; unsigned pa_sc_line_stipple; diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c index 5e22b35ba48..cd78104a010 100644 --- a/src/gallium/drivers/r600/r600_shader.c +++ b/src/gallium/drivers/r600/r600_shader.c @@ -287,6 +287,7 @@ static void llvm_if(struct r600_shader_ctx *ctx, struct r600_bytecode_alu * alu, { alu->inst = pred_inst; alu->predicate = 1; + alu->dst.write = 0; alu->src[1].sel = V_SQ_ALU_SRC_0; alu->src[1].chan = 0; alu->last = 1; @@ -362,6 +363,10 @@ static unsigned r600_fc_from_byte_stream(struct r600_shader_ctx *ctx, tgsi_loop_brk_cont(ctx); } break; + case 8: + r600_break_from_byte_stream(ctx, &alu, + CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETE_INT)); + break; } return bytes_read; @@ -401,10 +406,43 @@ static unsigned r600_tex_from_byte_stream(struct r600_shader_ctx *ctx, return bytes_read; } +static int r600_vtx_from_byte_stream(struct r600_shader_ctx *ctx, + unsigned char * bytes, unsigned bytes_read) +{ + struct r600_bytecode_vtx vtx; + memset(&vtx, 0, sizeof(vtx)); + vtx.inst = bytes[bytes_read++]; + vtx.fetch_type = bytes[bytes_read++]; + vtx.buffer_id = bytes[bytes_read++]; + vtx.src_gpr = bytes[bytes_read++]; + vtx.src_sel_x = bytes[bytes_read++]; + vtx.mega_fetch_count = bytes[bytes_read++]; + vtx.dst_gpr = bytes[bytes_read++]; + vtx.dst_sel_x = bytes[bytes_read++]; + vtx.dst_sel_y = bytes[bytes_read++]; + vtx.dst_sel_z = bytes[bytes_read++]; + vtx.dst_sel_w = bytes[bytes_read++]; + vtx.use_const_fields = bytes[bytes_read++]; + vtx.data_format = bytes[bytes_read++]; + vtx.num_format_all = bytes[bytes_read++]; + vtx.format_comp_all = bytes[bytes_read++]; + vtx.srf_mode_all = bytes[bytes_read++]; + vtx.offset = bytes[bytes_read++]; + vtx.endian = bytes[bytes_read++]; + + if (r600_bytecode_add_vtx(ctx->bc, &vtx)) { + fprintf(stderr, "Error adding vtx\n"); + } + /* Use the Texture Cache */ + ctx->bc->cf_last->inst = EG_V_SQ_CF_WORD1_SQ_CF_INST_TEX; + return bytes_read; +} + static void r600_bytecode_from_byte_stream(struct r600_shader_ctx *ctx, unsigned char * bytes, unsigned num_bytes) { unsigned bytes_read = 0; + unsigned i, byte; while (bytes_read < num_bytes) { char inst_type = bytes[bytes_read++]; switch (inst_type) { @@ -420,6 +458,20 @@ static void r600_bytecode_from_byte_stream(struct r600_shader_ctx *ctx, bytes_read = r600_fc_from_byte_stream(ctx, bytes, bytes_read); break; + case 3: + r600_bytecode_add_cfinst(ctx->bc, CF_NATIVE); + for (i = 0; i < 2; i++) { + for (byte = 0 ; byte < 4; byte++) { + ctx->bc->cf_last->isa[i] |= + (bytes[bytes_read++] << (byte * 8)); + } + } + break; + + case 4: + bytes_read = r600_vtx_from_byte_stream(ctx, bytes, + bytes_read); + break; default: /* XXX: Error here */ break; @@ -670,8 +722,8 @@ static int tgsi_declaration(struct r600_shader_ctx *ctx) ctx->shader->input[i].name = d->Semantic.Name; ctx->shader->input[i].sid = d->Semantic.Index; ctx->shader->input[i].spi_sid = r600_spi_sid(&ctx->shader->input[i]); - ctx->shader->input[i].interpolate = d->Declaration.Interpolate; - ctx->shader->input[i].centroid = d->Declaration.Centroid; + ctx->shader->input[i].interpolate = d->Interp.Interpolate; + ctx->shader->input[i].centroid = d->Interp.Centroid; ctx->shader->input[i].gpr = ctx->file_offset[TGSI_FILE_INPUT] + d->Range.First; if (ctx->type == TGSI_PROCESSOR_FRAGMENT) { switch (ctx->shader->input[i].name) { @@ -697,7 +749,7 @@ static int tgsi_declaration(struct r600_shader_ctx *ctx) ctx->shader->output[i].sid = d->Semantic.Index; ctx->shader->output[i].spi_sid = r600_spi_sid(&ctx->shader->output[i]); ctx->shader->output[i].gpr = ctx->file_offset[TGSI_FILE_OUTPUT] + d->Range.First; - ctx->shader->output[i].interpolate = d->Declaration.Interpolate; + ctx->shader->output[i].interpolate = d->Interp.Interpolate; ctx->shader->output[i].write_mask = d->Declaration.UsageMask; if (ctx->type == TGSI_PROCESSOR_VERTEX) { switch (d->Semantic.Name) { @@ -5102,7 +5154,7 @@ static struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[] = { {80, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, {TGSI_OPCODE_PUSHA, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, {TGSI_OPCODE_POPA, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, - {TGSI_OPCODE_CEIL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_CEIL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_CEIL, tgsi_op2}, {TGSI_OPCODE_I2F, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_INT_TO_FLT, tgsi_op2_trans}, {TGSI_OPCODE_NOT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOT_INT, tgsi_op2}, {TGSI_OPCODE_TRUNC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_TRUNC, tgsi_op2}, @@ -5168,16 +5220,16 @@ static struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[] = { {TGSI_OPCODE_CASE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, {TGSI_OPCODE_DEFAULT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, {TGSI_OPCODE_ENDSWITCH, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, - {TGSI_OPCODE_LOAD, 0, 0, tgsi_unsupported}, - {TGSI_OPCODE_LOAD_MS, 0, 0, tgsi_unsupported}, {TGSI_OPCODE_SAMPLE, 0, 0, tgsi_unsupported}, + {TGSI_OPCODE_SAMPLE_I, 0, 0, tgsi_unsupported}, + {TGSI_OPCODE_SAMPLE_I_MS, 0, 0, tgsi_unsupported}, {TGSI_OPCODE_SAMPLE_B, 0, 0, tgsi_unsupported}, {TGSI_OPCODE_SAMPLE_C, 0, 0, tgsi_unsupported}, {TGSI_OPCODE_SAMPLE_C_LZ, 0, 0, tgsi_unsupported}, {TGSI_OPCODE_SAMPLE_D, 0, 0, tgsi_unsupported}, {TGSI_OPCODE_SAMPLE_L, 0, 0, tgsi_unsupported}, {TGSI_OPCODE_GATHER4, 0, 0, tgsi_unsupported}, - {TGSI_OPCODE_RESINFO, 0, 0, tgsi_unsupported}, + {TGSI_OPCODE_SVIEWINFO, 0, 0, tgsi_unsupported}, {TGSI_OPCODE_SAMPLE_POS, 0, 0, tgsi_unsupported}, {TGSI_OPCODE_SAMPLE_INFO, 0, 0, tgsi_unsupported}, {TGSI_OPCODE_UARL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_INT, tgsi_r600_arl}, @@ -5276,7 +5328,7 @@ static struct r600_shader_tgsi_instruction eg_shader_tgsi_instruction[] = { {80, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, {TGSI_OPCODE_PUSHA, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, {TGSI_OPCODE_POPA, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, - {TGSI_OPCODE_CEIL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_CEIL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_CEIL, tgsi_op2}, {TGSI_OPCODE_I2F, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_INT_TO_FLT, tgsi_op2_trans}, {TGSI_OPCODE_NOT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOT_INT, tgsi_op2}, {TGSI_OPCODE_TRUNC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_TRUNC, tgsi_op2}, @@ -5342,16 +5394,16 @@ static struct r600_shader_tgsi_instruction eg_shader_tgsi_instruction[] = { {TGSI_OPCODE_CASE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, {TGSI_OPCODE_DEFAULT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, {TGSI_OPCODE_ENDSWITCH, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, - {TGSI_OPCODE_LOAD, 0, 0, tgsi_unsupported}, - {TGSI_OPCODE_LOAD_MS, 0, 0, tgsi_unsupported}, {TGSI_OPCODE_SAMPLE, 0, 0, tgsi_unsupported}, + {TGSI_OPCODE_SAMPLE_I, 0, 0, tgsi_unsupported}, + {TGSI_OPCODE_SAMPLE_I_MS, 0, 0, tgsi_unsupported}, {TGSI_OPCODE_SAMPLE_B, 0, 0, tgsi_unsupported}, {TGSI_OPCODE_SAMPLE_C, 0, 0, tgsi_unsupported}, {TGSI_OPCODE_SAMPLE_C_LZ, 0, 0, tgsi_unsupported}, {TGSI_OPCODE_SAMPLE_D, 0, 0, tgsi_unsupported}, {TGSI_OPCODE_SAMPLE_L, 0, 0, tgsi_unsupported}, {TGSI_OPCODE_GATHER4, 0, 0, tgsi_unsupported}, - {TGSI_OPCODE_RESINFO, 0, 0, tgsi_unsupported}, + {TGSI_OPCODE_SVIEWINFO, 0, 0, tgsi_unsupported}, {TGSI_OPCODE_SAMPLE_POS, 0, 0, tgsi_unsupported}, {TGSI_OPCODE_SAMPLE_INFO, 0, 0, tgsi_unsupported}, {TGSI_OPCODE_UARL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_INT, tgsi_eg_arl}, @@ -5450,7 +5502,7 @@ static struct r600_shader_tgsi_instruction cm_shader_tgsi_instruction[] = { {80, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, {TGSI_OPCODE_PUSHA, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, {TGSI_OPCODE_POPA, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, - {TGSI_OPCODE_CEIL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_CEIL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_CEIL, tgsi_op2}, {TGSI_OPCODE_I2F, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_INT_TO_FLT, tgsi_op2}, {TGSI_OPCODE_NOT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOT_INT, tgsi_op2}, {TGSI_OPCODE_TRUNC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_TRUNC, tgsi_op2}, @@ -5516,16 +5568,16 @@ static struct r600_shader_tgsi_instruction cm_shader_tgsi_instruction[] = { {TGSI_OPCODE_CASE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, {TGSI_OPCODE_DEFAULT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, {TGSI_OPCODE_ENDSWITCH, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, - {TGSI_OPCODE_LOAD, 0, 0, tgsi_unsupported}, - {TGSI_OPCODE_LOAD_MS, 0, 0, tgsi_unsupported}, {TGSI_OPCODE_SAMPLE, 0, 0, tgsi_unsupported}, + {TGSI_OPCODE_SAMPLE_I, 0, 0, tgsi_unsupported}, + {TGSI_OPCODE_SAMPLE_I_MS, 0, 0, tgsi_unsupported}, {TGSI_OPCODE_SAMPLE_B, 0, 0, tgsi_unsupported}, {TGSI_OPCODE_SAMPLE_C, 0, 0, tgsi_unsupported}, {TGSI_OPCODE_SAMPLE_C_LZ, 0, 0, tgsi_unsupported}, {TGSI_OPCODE_SAMPLE_D, 0, 0, tgsi_unsupported}, {TGSI_OPCODE_SAMPLE_L, 0, 0, tgsi_unsupported}, {TGSI_OPCODE_GATHER4, 0, 0, tgsi_unsupported}, - {TGSI_OPCODE_RESINFO, 0, 0, tgsi_unsupported}, + {TGSI_OPCODE_SVIEWINFO, 0, 0, tgsi_unsupported}, {TGSI_OPCODE_SAMPLE_POS, 0, 0, tgsi_unsupported}, {TGSI_OPCODE_SAMPLE_INFO, 0, 0, tgsi_unsupported}, {TGSI_OPCODE_UARL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_INT, tgsi_eg_arl}, diff --git a/src/gallium/drivers/r600/r600_state.c b/src/gallium/drivers/r600/r600_state.c index 3a83b613e58..acf59f80bf4 100644 --- a/src/gallium/drivers/r600/r600_state.c +++ b/src/gallium/drivers/r600/r600_state.c @@ -805,9 +805,9 @@ static void *r600_create_dsa_state(struct pipe_context *ctx, alpha_test_control |= S_028410_ALPHA_TEST_ENABLE(1); alpha_ref = fui(state->alpha.ref_value); } + dsa->sx_alpha_test_control = alpha_test_control & 0xff; dsa->alpha_ref = alpha_ref; - r600_pipe_state_add_reg(rstate, R_028410_SX_ALPHA_TEST_CONTROL, alpha_test_control); r600_pipe_state_add_reg(rstate, R_028800_DB_DEPTH_CONTROL, db_depth_control); return rstate; } @@ -1466,6 +1466,11 @@ static void r600_cb(struct r600_context *rctx, struct r600_pipe_state *rstate, blend_bypass = 1; } + if (ntype == V_0280A0_NUMBER_UINT || ntype == V_0280A0_NUMBER_SINT) + rctx->sx_alpha_test_control |= S_028410_ALPHA_TEST_BYPASS(1); + else + rctx->sx_alpha_test_control &= C_028410_ALPHA_TEST_BYPASS; + color_info |= S_0280A0_FORMAT(format) | S_0280A0_COMP_SWAP(swap) | S_0280A0_BLEND_BYPASS(blend_bypass) | diff --git a/src/gallium/drivers/r600/r600_state_common.c b/src/gallium/drivers/r600/r600_state_common.c index ccae7d91d43..d47383558d9 100644 --- a/src/gallium/drivers/r600/r600_state_common.c +++ b/src/gallium/drivers/r600/r600_state_common.c @@ -244,6 +244,8 @@ void r600_bind_dsa_state(struct pipe_context *ctx, void *state) return; rstate = &dsa->rstate; rctx->states[rstate->id] = rstate; + rctx->sx_alpha_test_control &= ~0xff; + rctx->sx_alpha_test_control |= dsa->sx_alpha_test_control; rctx->alpha_ref = dsa->alpha_ref; rctx->alpha_ref_dirty = true; r600_context_pipe_state_set(rctx, rstate); @@ -796,6 +798,7 @@ void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *dinfo) r600_pipe_state_add_reg(&rctx->vgt, R_02823C_CB_SHADER_MASK, 0); r600_pipe_state_add_reg(&rctx->vgt, R_028408_VGT_INDX_OFFSET, info.index_bias); r600_pipe_state_add_reg(&rctx->vgt, R_02840C_VGT_MULTI_PRIM_IB_RESET_INDX, info.restart_index); + r600_pipe_state_add_reg(&rctx->vgt, R_028410_SX_ALPHA_TEST_CONTROL, 0); r600_pipe_state_add_reg(&rctx->vgt, R_028A94_VGT_MULTI_PRIM_IB_RESET_EN, info.primitive_restart); r600_pipe_state_add_reg(&rctx->vgt, R_03CFF4_SQ_VTX_START_INST_LOC, info.start_instance); r600_pipe_state_add_reg(&rctx->vgt, R_028A0C_PA_SC_LINE_STIPPLE, 0); @@ -817,6 +820,7 @@ void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *dinfo) r600_pipe_state_mod_reg(&rctx->vgt, rctx->cb_shader_mask); r600_pipe_state_mod_reg(&rctx->vgt, info.index_bias); r600_pipe_state_mod_reg(&rctx->vgt, info.restart_index); + r600_pipe_state_mod_reg(&rctx->vgt, rctx->sx_alpha_test_control); r600_pipe_state_mod_reg(&rctx->vgt, info.primitive_restart); r600_pipe_state_mod_reg(&rctx->vgt, info.start_instance); diff --git a/src/gallium/drivers/radeon/AMDGPU.h b/src/gallium/drivers/radeon/AMDGPU.h index eff002a5eae..0f42cb744d3 100644 --- a/src/gallium/drivers/radeon/AMDGPU.h +++ b/src/gallium/drivers/radeon/AMDGPU.h @@ -1,4 +1,4 @@ -//===-- AMDGPU.h - TODO: Add brief description -------===// +//===-- AMDGPU.h - MachineFunction passes hw codegen --------------*- C++ -*-=// // // The LLVM Compiler Infrastructure // @@ -6,10 +6,6 @@ // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// -// -// TODO: Add full description -// -//===----------------------------------------------------------------------===// #ifndef AMDGPU_H #define AMDGPU_H @@ -19,29 +15,24 @@ #include "llvm/Target/TargetMachine.h" namespace llvm { - class FunctionPass; - class AMDGPUTargetMachine; - - FunctionPass *createR600CodeEmitterPass(formatted_raw_ostream &OS); - FunctionPass *createR600LowerShaderInstructionsPass(TargetMachine &tm); - FunctionPass *createR600LowerInstructionsPass(TargetMachine &tm); - - FunctionPass *createSIAssignInterpRegsPass(TargetMachine &tm); - FunctionPass *createSIInitMachineFunctionInfoPass(TargetMachine &tm); - FunctionPass *createSILowerShaderInstructionsPass(TargetMachine &tm); - FunctionPass *createSIPropagateImmReadsPass(TargetMachine &tm); - FunctionPass *createSICodeEmitterPass(formatted_raw_ostream &OS); - FunctionPass *createAMDGPUReorderPreloadInstructionsPass(TargetMachine &tm); +class FunctionPass; +class AMDGPUTargetMachine; - FunctionPass *createAMDGPULowerInstructionsPass(TargetMachine &tm); - FunctionPass *createAMDGPULowerShaderInstructionsPass(TargetMachine &tm); +// R600 Passes +FunctionPass* createR600KernelParametersPass(const TargetData* TD); +FunctionPass *createR600CodeEmitterPass(formatted_raw_ostream &OS); +FunctionPass *createR600LowerInstructionsPass(TargetMachine &tm); - FunctionPass *createAMDGPUDelimitInstGroupsPass(TargetMachine &tm); +// SI Passes +FunctionPass *createSIAssignInterpRegsPass(TargetMachine &tm); +FunctionPass *createSIPropagateImmReadsPass(TargetMachine &tm); +FunctionPass *createSICodeEmitterPass(formatted_raw_ostream &OS); - FunctionPass *createAMDGPUConvertToISAPass(TargetMachine &tm); +// Passes common to R600 and SI +FunctionPass *createAMDGPULowerInstructionsPass(TargetMachine &tm); +FunctionPass *createAMDGPUConvertToISAPass(TargetMachine &tm); - FunctionPass *createAMDGPUFixRegClassesPass(TargetMachine &tm); +} // End namespace llvm -} /* End namespace llvm */ -#endif /* AMDGPU_H */ +#endif // AMDGPU_H diff --git a/src/gallium/drivers/radeon/AMDGPUConstants.pm b/src/gallium/drivers/radeon/AMDGPUConstants.pm deleted file mode 100644 index b64ff49c187..00000000000 --- a/src/gallium/drivers/radeon/AMDGPUConstants.pm +++ /dev/null @@ -1,44 +0,0 @@ -#===-- AMDGPUConstants.pm - TODO: Add brief description -------===# -# -# The LLVM Compiler Infrastructure -# -# This file is distributed under the University of Illinois Open Source -# License. See LICENSE.TXT for details. -# -#===----------------------------------------------------------------------===# -# -# TODO: Add full description -# -#===----------------------------------------------------------------------===# - -package AMDGPUConstants; - -use base 'Exporter'; - -use constant CONST_REG_COUNT => 256; -use constant TEMP_REG_COUNT => 128; - -our @EXPORT = ('TEMP_REG_COUNT', 'CONST_REG_COUNT', 'get_hw_index', 'get_chan_str'); - -sub get_hw_index { - my ($index) = @_; - return int($index / 4); -} - -sub get_chan_str { - my ($index) = @_; - my $chan = $index % 4; - if ($chan == 0 ) { - return 'X'; - } elsif ($chan == 1) { - return 'Y'; - } elsif ($chan == 2) { - return 'Z'; - } elsif ($chan == 3) { - return 'W'; - } else { - die("Unknown chan value: $chan"); - } -} - -1; diff --git a/src/gallium/drivers/radeon/AMDGPUConvertToISA.cpp b/src/gallium/drivers/radeon/AMDGPUConvertToISA.cpp index ce947f8ff78..8e82b8438bb 100644 --- a/src/gallium/drivers/radeon/AMDGPUConvertToISA.cpp +++ b/src/gallium/drivers/radeon/AMDGPUConvertToISA.cpp @@ -34,7 +34,7 @@ namespace { virtual bool runOnMachineFunction(MachineFunction &MF); }; -} /* End anonymous namespace */ +} // End anonymous namespace char AMDGPUConvertToISAPass::ID = 0; diff --git a/src/gallium/drivers/radeon/AMDGPUGenInstrEnums.pl b/src/gallium/drivers/radeon/AMDGPUGenInstrEnums.pl index 1fd4fb04b3e..130eaac72bc 100644 --- a/src/gallium/drivers/radeon/AMDGPUGenInstrEnums.pl +++ b/src/gallium/drivers/radeon/AMDGPUGenInstrEnums.pl @@ -1,15 +1,32 @@ -#===-- AMDGPUGenInstrEnums.pl - TODO: Add brief description -------===# +#===-- AMDGPUGenInstrEnums.pl - Script for generating instruction enums ----===# # # The LLVM Compiler Infrastructure # # This file is distributed under the University of Illinois Open Source # License. See LICENSE.TXT for details. # -#===----------------------------------------------------------------------===# +#===-----------------------------------------------------------------------===# # -# TODO: Add full description +# This perl script is used to generate the following files: # -#===----------------------------------------------------------------------===# +# 1. perl AMDGPUGenInstrEnums.pl td > AMDGPUInstrEnums.td +# +# This file contains Tablegen constants used for matching hw instructions +# from R600 and SI with functionally similar AMDIL instruction. It aslo +# contains definitions of floating point constants like pi (in hex notation) +# that are used in some of the shader patterns. +# +# 2. perl AMDGPUGenInstrEnums.pl h > AMDGPUInstrEnums.h +# +# This file contains cpp enums that match the constant values in +# AMDGPUInstrEnums.td +# +# 3. perl AMDGPUGenInstrEnums.pl inc > AMDGPUInstrEnums.include +# +# This file contains a function called GetRealAMDILOpcode which maps the +# constant values defined in AMDGPUInstrEnums.h to the corresponding AMDIL +# instructions. +#===-----------------------------------------------------------------------===# use warnings; use strict; @@ -41,7 +58,7 @@ my $FILE_TYPE = $ARGV[0]; open AMDIL, '<', 'AMDILInstructions.td'; -my @INST_ENUMS = ('NONE', 'FEQ', 'FGE', 'FLT', 'FNE', 'MOVE_f32', 'MOVE_i32', 'FTOI', 'ITOF', 'CMOVLOG_f32', 'UGT', 'IGE', 'INE', 'UGE', 'IEQ'); +my @INST_ENUMS = ('NONE', 'FEQ', 'FGE', 'FLT', 'FNE', 'MOVE_f32', 'MOVE_i32', 'FTOI', 'ITOF', 'CMOVLOG_f32', 'UGT', 'IGE', 'INE', 'UGE', 'IEQ', 'BINARY_OR_i32', 'BINARY_NOT_i32'); while (<AMDIL>) { if ($_ =~ /defm\s+([A-Z_]+)\s+:\s+([A-Za-z0-9]+)</) { diff --git a/src/gallium/drivers/radeon/AMDGPUGenShaderPatterns.pl b/src/gallium/drivers/radeon/AMDGPUGenShaderPatterns.pl deleted file mode 100644 index 60523a7b48f..00000000000 --- a/src/gallium/drivers/radeon/AMDGPUGenShaderPatterns.pl +++ /dev/null @@ -1,30 +0,0 @@ -#===-- AMDGPUGenShaderPatterns.pl - TODO: Add brief description -------===# -# -# The LLVM Compiler Infrastructure -# -# This file is distributed under the University of Illinois Open Source -# License. See LICENSE.TXT for details. -# -#===----------------------------------------------------------------------===# -# -# TODO: Add full description -# -#===----------------------------------------------------------------------===# - -use strict; -use warnings; - -use AMDGPUConstants; - -my $reg_prefix = $ARGV[0]; - -for (my $i = 0; $i < CONST_REG_COUNT * 4; $i++) { - my $index = get_hw_index($i); - my $chan = get_chan_str($i); -print <<STRING; -def : Pat < - (int_AMDGPU_load_const $i), - (f32 (MOV (f32 $reg_prefix$index\_$chan))) ->; -STRING -} diff --git a/src/gallium/drivers/radeon/AMDGPUISelLowering.cpp b/src/gallium/drivers/radeon/AMDGPUISelLowering.cpp index 2c1052fd8ea..2bdc8a759f2 100644 --- a/src/gallium/drivers/radeon/AMDGPUISelLowering.cpp +++ b/src/gallium/drivers/radeon/AMDGPUISelLowering.cpp @@ -1,4 +1,4 @@ -//===-- AMDGPUISelLowering.cpp - TODO: Add brief description -------===// +//===-- AMDGPUISelLowering.cpp - AMDGPU Common DAG lowering functions -----===// // // The LLVM Compiler Infrastructure // @@ -7,7 +7,7 @@ // //===----------------------------------------------------------------------===// // -// TODO: Add full description +// This is the parent TargetLowering class for hardware code gen targets. // //===----------------------------------------------------------------------===// diff --git a/src/gallium/drivers/radeon/AMDGPUISelLowering.h b/src/gallium/drivers/radeon/AMDGPUISelLowering.h index 3c5beb1cdae..1b3f71006e2 100644 --- a/src/gallium/drivers/radeon/AMDGPUISelLowering.h +++ b/src/gallium/drivers/radeon/AMDGPUISelLowering.h @@ -1,4 +1,4 @@ -//===-- AMDGPUISelLowering.h - TODO: Add brief description -------===// +//===-- AMDGPUISelLowering.h - AMDGPU Lowering Interface --------*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -7,7 +7,8 @@ // //===----------------------------------------------------------------------===// // -// TODO: Add full description +// This file contains the interface defintiion of the TargetLowering class +// that is common to all AMD GPUs. // //===----------------------------------------------------------------------===// diff --git a/src/gallium/drivers/radeon/AMDGPUInstrInfo.cpp b/src/gallium/drivers/radeon/AMDGPUInstrInfo.cpp index 4742283f688..ecd8ac90526 100644 --- a/src/gallium/drivers/radeon/AMDGPUInstrInfo.cpp +++ b/src/gallium/drivers/radeon/AMDGPUInstrInfo.cpp @@ -108,9 +108,4 @@ unsigned AMDGPUInstrInfo::getISAOpcode(unsigned opcode) const } } -bool AMDGPUInstrInfo::isRegPreload(const MachineInstr &MI) const -{ - return (get(MI.getOpcode()).TSFlags >> AMDGPU_TFLAG_SHIFTS::PRELOAD_REG) & 0x1; -} - #include "AMDGPUInstrEnums.include" diff --git a/src/gallium/drivers/radeon/AMDGPUInstrInfo.h b/src/gallium/drivers/radeon/AMDGPUInstrInfo.h index fa009bc6302..930b41e7191 100644 --- a/src/gallium/drivers/radeon/AMDGPUInstrInfo.h +++ b/src/gallium/drivers/radeon/AMDGPUInstrInfo.h @@ -1,4 +1,4 @@ -//===-- AMDGPUInstrInfo.h - TODO: Add brief description -------===// +//===-- AMDGPUInstrInfo.h - AMDGPU Instruction Information ------*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -7,7 +7,8 @@ // //===----------------------------------------------------------------------===// // -// TODO: Add full description +// This file contains the definitoin of a TargetInstrInfo class that is common +// to all AMD GPUs. // //===----------------------------------------------------------------------===// @@ -21,17 +22,17 @@ namespace llvm { - class AMDGPUTargetMachine; - class MachineFunction; - class MachineInstr; - class MachineInstrBuilder; +class AMDGPUTargetMachine; +class MachineFunction; +class MachineInstr; +class MachineInstrBuilder; - class AMDGPUInstrInfo : public AMDILInstrInfo { - private: +class AMDGPUInstrInfo : public AMDILInstrInfo { +private: AMDGPUTargetMachine & TM; std::map<unsigned, unsigned> amdilToISA; - public: +public: explicit AMDGPUInstrInfo(AMDGPUTargetMachine &tm); virtual const AMDGPURegisterInfo &getRegisterInfo() const = 0; @@ -41,19 +42,9 @@ namespace llvm { virtual MachineInstr * convertToISA(MachineInstr & MI, MachineFunction &MF, DebugLoc DL) const; - bool isRegPreload(const MachineInstr &MI) const; - #include "AMDGPUInstrEnums.h.include" - }; +}; } // End llvm namespace -/* AMDGPU target flags are stored in bits 32-39 */ -namespace AMDGPU_TFLAG_SHIFTS { - enum TFLAGS { - PRELOAD_REG = 32 - }; -} - - #endif // AMDGPUINSTRINFO_H_ diff --git a/src/gallium/drivers/radeon/AMDGPUInstructions.td b/src/gallium/drivers/radeon/AMDGPUInstructions.td index 0433c8dcd95..f689356e488 100644 --- a/src/gallium/drivers/radeon/AMDGPUInstructions.td +++ b/src/gallium/drivers/radeon/AMDGPUInstructions.td @@ -1,4 +1,4 @@ -//===-- AMDGPUInstructions.td - TODO: Add brief description -------===// +//===-- AMDGPUInstructions.td - Common instruction defs ---*- tablegen -*-===// // // The LLVM Compiler Infrastructure // @@ -7,7 +7,8 @@ // //===----------------------------------------------------------------------===// // -// TODO: Add full description +// This file contains instruction defs that are common to all hw codegen +// targets. // //===----------------------------------------------------------------------===// @@ -16,14 +17,12 @@ include "AMDGPUInstrEnums.td" class AMDGPUInst <dag outs, dag ins, string asm, list<dag> pattern> : Instruction { field bits<16> AMDILOp = 0; field bits<3> Gen = 0; - field bit PreloadReg = 0; let Namespace = "AMDIL"; let OutOperandList = outs; let InOperandList = ins; let AsmString = asm; let Pattern = pattern; - let TSFlags{32} = PreloadReg; let TSFlags{42-40} = Gen; let TSFlags{63-48} = AMDILOp; } @@ -37,42 +36,12 @@ class AMDGPUShaderInst <dag outs, dag ins, string asm, list<dag> pattern> let isCodeGenOnly = 1 in { - def EXPORT_REG : AMDGPUShaderInst < - (outs), - (ins GPRF32:$src), - "EXPORT_REG $src", - [(int_AMDGPU_export_reg GPRF32:$src)] - >; - - def LOAD_INPUT : AMDGPUShaderInst < - (outs GPRF32:$dst), - (ins i32imm:$src), - "LOAD_INPUT $dst, $src", - [] >{ - let PreloadReg = 1; - } - def MASK_WRITE : AMDGPUShaderInst < (outs), (ins GPRF32:$src), "MASK_WRITE $src", [] >; - - def RESERVE_REG : AMDGPUShaderInst < - (outs GPRF32:$dst), - (ins i32imm:$src), - "RESERVE_REG $dst, $src", - [(set GPRF32:$dst, (int_AMDGPU_reserve_reg imm:$src))]> { - let PreloadReg = 1; - } - - def STORE_OUTPUT: AMDGPUShaderInst < - (outs GPRF32:$dst), - (ins GPRF32:$src0, i32imm:$src1), - "STORE_OUTPUT $dst, $src0, $src1", - [(set GPRF32:$dst, (int_AMDGPU_store_output GPRF32:$src0, imm:$src1))] - >; } /* Generic helper patterns for intrinsics */ diff --git a/src/gallium/drivers/radeon/AMDGPUIntrinsics.td b/src/gallium/drivers/radeon/AMDGPUIntrinsics.td index d2cda0db936..398fd11431f 100644 --- a/src/gallium/drivers/radeon/AMDGPUIntrinsics.td +++ b/src/gallium/drivers/radeon/AMDGPUIntrinsics.td @@ -1,4 +1,4 @@ -//===-- AMDGPUIntrinsics.td - TODO: Add brief description -------===// +//===-- AMDGPUIntrinsics.td - Common intrinsics -*- tablegen -*-----------===// // // The LLVM Compiler Infrastructure // @@ -7,17 +7,16 @@ // //===----------------------------------------------------------------------===// // -// TODO: Add full description +// This file defines intrinsics that are used by all hw codegen targets. // //===----------------------------------------------------------------------===// let TargetPrefix = "AMDGPU", isTarget = 1 in { - def int_AMDGPU_export_reg : Intrinsic<[], [llvm_float_ty], []>; def int_AMDGPU_load_const : Intrinsic<[llvm_float_ty], [llvm_i32_ty], []>; def int_AMDGPU_load_imm : Intrinsic<[llvm_v4f32_ty], [llvm_i32_ty], []>; - def int_AMDGPU_reserve_reg : Intrinsic<[llvm_float_ty], [llvm_i32_ty], []>; - def int_AMDGPU_store_output : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_i32_ty], []>; + def int_AMDGPU_reserve_reg : Intrinsic<[], [llvm_i32_ty], []>; + def int_AMDGPU_store_output : Intrinsic<[], [llvm_float_ty, llvm_i32_ty], []>; def int_AMDGPU_swizzle : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_i32_ty], []>; def int_AMDGPU_arl : Intrinsic<[llvm_i32_ty], [llvm_float_ty], []>; @@ -26,7 +25,7 @@ let TargetPrefix = "AMDGPU", isTarget = 1 in { def int_AMDGPU_div : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], []>; def int_AMDGPU_dp4 : Intrinsic<[llvm_float_ty], [llvm_v4f32_ty, llvm_v4f32_ty], []>; def int_AMDGPU_floor : Intrinsic<[llvm_float_ty], [llvm_float_ty], []>; - def int_AMDGPU_kill : Intrinsic<[llvm_float_ty], [llvm_float_ty], []>; + def int_AMDGPU_kill : Intrinsic<[], [llvm_float_ty], []>; def int_AMDGPU_kilp : Intrinsic<[], [], []>; def int_AMDGPU_lrp : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty, llvm_float_ty], []>; def int_AMDGPU_mul : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], []>; @@ -35,7 +34,7 @@ let TargetPrefix = "AMDGPU", isTarget = 1 in { def int_AMDGPU_rsq : Intrinsic<[llvm_float_ty], [llvm_float_ty], []>; def int_AMDGPU_seq : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], []>; def int_AMDGPU_sgt : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], []>; - def int_AMDGPU_sge : BinaryIntFloat; + def int_AMDGPU_sge : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], []>; def int_AMDGPU_sin : Intrinsic<[llvm_float_ty], [llvm_float_ty], []>; def int_AMDGPU_sle : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], []>; def int_AMDGPU_sne : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], []>; @@ -43,9 +42,18 @@ let TargetPrefix = "AMDGPU", isTarget = 1 in { def int_AMDGPU_mullit : Intrinsic<[llvm_v4f32_ty], [llvm_float_ty, llvm_float_ty, llvm_float_ty], []>; def int_AMDGPU_tex : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty], []>; def int_AMDGPU_txb : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty], []>; + def int_AMDGPU_txf : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty], []>; + def int_AMDGPU_txq : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty], []>; def int_AMDGPU_txd : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty], []>; def int_AMDGPU_txl : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty], []>; def int_AMDGPU_trunc : Intrinsic<[llvm_float_ty], [llvm_float_ty], []>; + def int_AMDGPU_ddx : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty], []>; + def int_AMDGPU_ddy : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty], []>; + def int_AMDGPU_imax : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], []>; + def int_AMDGPU_imin : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], []>; + def int_AMDGPU_umax : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], []>; + def int_AMDGPU_umin : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], []>; + def int_AMDGPU_cube : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty], []>; } let TargetPrefix = "TGSI", isTarget = 1 in { diff --git a/src/gallium/drivers/radeon/AMDGPULowerInstructions.cpp b/src/gallium/drivers/radeon/AMDGPULowerInstructions.cpp index b49d0dddf65..2e455fea8ab 100644 --- a/src/gallium/drivers/radeon/AMDGPULowerInstructions.cpp +++ b/src/gallium/drivers/radeon/AMDGPULowerInstructions.cpp @@ -1,4 +1,4 @@ -//===-- AMDGPULowerInstructions.cpp - TODO: Add brief description -------===// +//===-- AMDGPULowerInstructions.cpp - AMDGPU lowering pass ----------------===// // // The LLVM Compiler Infrastructure // @@ -7,7 +7,8 @@ // //===----------------------------------------------------------------------===// // -// TODO: Add full description +// This pass lowers unsupported AMDIL MachineInstrs to LLVM pseudo +// MachineInstrs for hw codegen targets. // //===----------------------------------------------------------------------===// @@ -27,7 +28,7 @@ namespace { private: static char ID; TargetMachine &TM; - void lowerVCREATE_v4f32(MachineInstr &MI, MachineBasicBlock::iterator I, + void lowerVCREATE_v4(MachineInstr &MI, MachineBasicBlock::iterator I, MachineBasicBlock &MBB, MachineFunction &MF); public: @@ -56,8 +57,9 @@ bool AMDGPULowerInstructionsPass::runOnMachineFunction(MachineFunction &MF) switch (MI.getOpcode()) { default: continue; - case AMDIL::VCREATE_v4f32: lowerVCREATE_v4f32(MI, I, MBB, MF); break; - + case AMDIL::VCREATE_v4f32: + case AMDIL::VCREATE_v4i32: + lowerVCREATE_v4(MI, I, MBB, MF); break; } MI.eraseFromParent(); } @@ -65,7 +67,7 @@ bool AMDGPULowerInstructionsPass::runOnMachineFunction(MachineFunction &MF) return false; } -void AMDGPULowerInstructionsPass::lowerVCREATE_v4f32(MachineInstr &MI, +void AMDGPULowerInstructionsPass::lowerVCREATE_v4(MachineInstr &MI, MachineBasicBlock::iterator I, MachineBasicBlock &MBB, MachineFunction &MF) { MachineRegisterInfo & MRI = MF.getRegInfo(); diff --git a/src/gallium/drivers/radeon/AMDGPULowerShaderInstructions.cpp b/src/gallium/drivers/radeon/AMDGPULowerShaderInstructions.cpp deleted file mode 100644 index d33055ccb87..00000000000 --- a/src/gallium/drivers/radeon/AMDGPULowerShaderInstructions.cpp +++ /dev/null @@ -1,38 +0,0 @@ -//===-- AMDGPULowerShaderInstructions.cpp - TODO: Add brief description -------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// TODO: Add full description -// -//===----------------------------------------------------------------------===// - - -#include "AMDGPULowerShaderInstructions.h" -#include "llvm/CodeGen/MachineFunction.h" -#include "llvm/CodeGen/MachineInstrBuilder.h" -#include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/Target/TargetInstrInfo.h" - -using namespace llvm; - -void AMDGPULowerShaderInstructionsPass::preloadRegister(MachineFunction * MF, - const TargetInstrInfo * TII, unsigned physReg, unsigned virtReg) const -{ - if (!MRI->isLiveIn(physReg)) { - MRI->addLiveIn(physReg, virtReg); - MachineBasicBlock &EntryMBB = MF->front(); - BuildMI(MF->front(), EntryMBB.begin(), DebugLoc(), TII->get(TargetOpcode::COPY), - virtReg) - .addReg(physReg); - } else { - /* We can't mark the same register as preloaded twice, but we still must - * associate virtReg with the correct preloaded register. */ - unsigned newReg = MRI->getLiveInVirtReg(physReg); - MRI->replaceRegWith(virtReg, newReg); - } -} diff --git a/src/gallium/drivers/radeon/AMDGPULowerShaderInstructions.h b/src/gallium/drivers/radeon/AMDGPULowerShaderInstructions.h deleted file mode 100644 index 5ee77fafe2b..00000000000 --- a/src/gallium/drivers/radeon/AMDGPULowerShaderInstructions.h +++ /dev/null @@ -1,40 +0,0 @@ -//===-- AMDGPULowerShaderInstructions.h - TODO: Add brief description -------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// TODO: Add full description -// -//===----------------------------------------------------------------------===// - - -#ifndef AMDGPU_LOWER_SHADER_INSTRUCTIONS -#define AMDGPU_LOWER_SHADER_INSTRUCTIONS - -namespace llvm { - -class MachineFunction; -class MachineRegisterInfo; -class TargetInstrInfo; - -class AMDGPULowerShaderInstructionsPass { - - protected: - MachineRegisterInfo * MRI; - /** - * @param physReg The physical register that will be preloaded. - * @param virtReg The virtual register that currently holds the - * preloaded value. - */ - void preloadRegister(MachineFunction * MF, const TargetInstrInfo * TII, - unsigned physReg, unsigned virtReg) const; -}; - -} // end namespace llvm - - -#endif // AMDGPU_LOWER_SHADER_INSTRUCTIONS diff --git a/src/gallium/drivers/radeon/AMDGPURegisterInfo.cpp b/src/gallium/drivers/radeon/AMDGPURegisterInfo.cpp index 162a49116a0..ad48335fd33 100644 --- a/src/gallium/drivers/radeon/AMDGPURegisterInfo.cpp +++ b/src/gallium/drivers/radeon/AMDGPURegisterInfo.cpp @@ -1,4 +1,4 @@ -//===-- AMDGPURegisterInfo.cpp - TODO: Add brief description -------===// +//===-- AMDGPURegisterInfo.cpp - AMDGPU Register Information -------------===// // // The LLVM Compiler Infrastructure // @@ -7,7 +7,7 @@ // //===----------------------------------------------------------------------===// // -// TODO: Add full description +// Parent TargetRegisterInfo class common to all hw codegen targets. // //===----------------------------------------------------------------------===// diff --git a/src/gallium/drivers/radeon/AMDGPURegisterInfo.h b/src/gallium/drivers/radeon/AMDGPURegisterInfo.h index f4492e9795d..d545c06f69e 100644 --- a/src/gallium/drivers/radeon/AMDGPURegisterInfo.h +++ b/src/gallium/drivers/radeon/AMDGPURegisterInfo.h @@ -1,4 +1,4 @@ -//===-- AMDGPURegisterInfo.h - TODO: Add brief description -------===// +//===-- AMDGPURegisterInfo.h - AMDGPURegisterInfo Interface -*- C++ -*-----===// // // The LLVM Compiler Infrastructure // @@ -7,7 +7,8 @@ // //===----------------------------------------------------------------------===// // -// TODO: Add full description +// This file contains the TargetRegisterInfo interface that is implemented +// by all hw codegen targets. // //===----------------------------------------------------------------------===// diff --git a/src/gallium/drivers/radeon/AMDGPURegisterInfo.td b/src/gallium/drivers/radeon/AMDGPURegisterInfo.td index 173d6622569..1707903ae7e 100644 --- a/src/gallium/drivers/radeon/AMDGPURegisterInfo.td +++ b/src/gallium/drivers/radeon/AMDGPURegisterInfo.td @@ -1,4 +1,4 @@ -//===-- AMDGPURegisterInfo.td - TODO: Add brief description -------===// +//===-- AMDGPURegisterInfo.td - AMDGPU register info -------*- tablegen -*-===// // // The LLVM Compiler Infrastructure // @@ -7,7 +7,7 @@ // //===----------------------------------------------------------------------===// // -// TODO: Add full description +// Tablegen register definitions common to all hw codegen targets. // //===----------------------------------------------------------------------===// diff --git a/src/gallium/drivers/radeon/AMDGPUReorderPreloadInstructions.cpp b/src/gallium/drivers/radeon/AMDGPUReorderPreloadInstructions.cpp deleted file mode 100644 index c923f19c39f..00000000000 --- a/src/gallium/drivers/radeon/AMDGPUReorderPreloadInstructions.cpp +++ /dev/null @@ -1,66 +0,0 @@ -//===-- AMDGPUReorderPreloadInstructions.cpp - TODO: Add brief description -------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// TODO: Add full description -// -//===----------------------------------------------------------------------===// - -#include "AMDGPU.h" -#include "AMDIL.h" -#include "AMDILInstrInfo.h" -#include "llvm/CodeGen/MachineFunctionPass.h" -#include "llvm/CodeGen/MachineInstrBuilder.h" -#include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/Function.h" - -using namespace llvm; - -namespace { - class AMDGPUReorderPreloadInstructionsPass : public MachineFunctionPass { - - private: - static char ID; - TargetMachine &TM; - - public: - AMDGPUReorderPreloadInstructionsPass(TargetMachine &tm) : - MachineFunctionPass(ID), TM(tm) { } - - bool runOnMachineFunction(MachineFunction &MF); - - const char *getPassName() const { return "AMDGPU Reorder Preload Instructions"; } - }; -} /* End anonymous namespace */ - -char AMDGPUReorderPreloadInstructionsPass::ID = 0; - -FunctionPass *llvm::createAMDGPUReorderPreloadInstructionsPass(TargetMachine &tm) { - return new AMDGPUReorderPreloadInstructionsPass(tm); -} - -/* This pass moves instructions that represent preloaded registers to the - * start of the program. */ -bool AMDGPUReorderPreloadInstructionsPass::runOnMachineFunction(MachineFunction &MF) -{ - const AMDGPUInstrInfo * TII = - static_cast<const AMDGPUInstrInfo*>(TM.getInstrInfo()); - - for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end(); - BB != BB_E; ++BB) { - MachineBasicBlock &MBB = *BB; - for (MachineBasicBlock::iterator I = MBB.begin(), Next = llvm::next(I); - I != MBB.end(); I = Next, Next = llvm::next(I) ) { - MachineInstr &MI = *I; - if (TII->isRegPreload(MI)) { - MF.front().insert(MF.front().begin(), MI.removeFromParent()); - } - } - } - return false; -} diff --git a/src/gallium/drivers/radeon/AMDGPUTargetMachine.cpp b/src/gallium/drivers/radeon/AMDGPUTargetMachine.cpp index 313349ce01b..c1c21abc9c1 100644 --- a/src/gallium/drivers/radeon/AMDGPUTargetMachine.cpp +++ b/src/gallium/drivers/radeon/AMDGPUTargetMachine.cpp @@ -1,4 +1,4 @@ -//===-- AMDGPUTargetMachine.cpp - TODO: Add brief description -------===// +//===-- AMDGPUTargetMachine.cpp - TargetMachine for hw codegen targets-----===// // // The LLVM Compiler Infrastructure // @@ -7,7 +7,8 @@ // //===----------------------------------------------------------------------===// // -// TODO: Add full description +// The AMDGPU target machine contains all of the hardware specific information +// needed to emit code for R600 and SI GPUs. // //===----------------------------------------------------------------------===// @@ -16,7 +17,6 @@ #include "AMDILTargetMachine.h" #include "R600ISelLowering.h" #include "R600InstrInfo.h" -#include "R600KernelParameters.h" #include "SIISelLowering.h" #include "SIInstrInfo.h" #include "llvm/Analysis/Passes.h" @@ -112,31 +112,28 @@ AMDGPUPassConfig::addPreISel() { const AMDILSubtarget &ST = TM->getSubtarget<AMDILSubtarget>(); if (ST.device()->getGeneration() <= AMDILDeviceInfo::HD6XXX) { - PM.add(createR600KernelParametersPass( + PM->add(createR600KernelParametersPass( getAMDGPUTargetMachine().getTargetData())); } return false; } bool AMDGPUPassConfig::addInstSelector() { - PM.add(createAMDILPeepholeOpt(*TM)); - PM.add(createAMDILISelDag(getAMDGPUTargetMachine())); + PM->add(createAMDILPeepholeOpt(*TM)); + PM->add(createAMDILISelDag(getAMDGPUTargetMachine())); return false; } bool AMDGPUPassConfig::addPreRegAlloc() { const AMDILSubtarget &ST = TM->getSubtarget<AMDILSubtarget>(); - PM.add(createAMDGPUReorderPreloadInstructionsPass(*TM)); if (ST.device()->getGeneration() <= AMDILDeviceInfo::HD6XXX) { - PM.add(createR600LowerShaderInstructionsPass(*TM)); - PM.add(createR600LowerInstructionsPass(*TM)); + PM->add(createR600LowerInstructionsPass(*TM)); } else { - PM.add(createSILowerShaderInstructionsPass(*TM)); - PM.add(createSIAssignInterpRegsPass(*TM)); + PM->add(createSIAssignInterpRegsPass(*TM)); } - PM.add(createAMDGPULowerInstructionsPass(*TM)); - PM.add(createAMDGPUConvertToISAPass(*TM)); + PM->add(createAMDGPULowerInstructionsPass(*TM)); + PM->add(createAMDGPUConvertToISAPass(*TM)); return false; } @@ -150,10 +147,10 @@ bool AMDGPUPassConfig::addPreSched2() { bool AMDGPUPassConfig::addPreEmitPass() { const AMDILSubtarget &ST = TM->getSubtarget<AMDILSubtarget>(); - PM.add(createAMDILCFGPreparationPass(*TM)); - PM.add(createAMDILCFGStructurizerPass(*TM)); + PM->add(createAMDILCFGPreparationPass(*TM)); + PM->add(createAMDILCFGStructurizerPass(*TM)); if (ST.device()->getGeneration() == AMDILDeviceInfo::HD7XXX) { - PM.add(createSIPropagateImmReadsPass(*TM)); + PM->add(createSIPropagateImmReadsPass(*TM)); } return false; diff --git a/src/gallium/drivers/radeon/AMDGPUTargetMachine.h b/src/gallium/drivers/radeon/AMDGPUTargetMachine.h index d4165b09e84..2428fe638a7 100644 --- a/src/gallium/drivers/radeon/AMDGPUTargetMachine.h +++ b/src/gallium/drivers/radeon/AMDGPUTargetMachine.h @@ -1,4 +1,4 @@ -//===-- AMDGPUTargetMachine.h - TODO: Add brief description -------===// +//===-- AMDGPUTargetMachine.h - AMDGPU TargetMachine Interface --*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -7,7 +7,7 @@ // //===----------------------------------------------------------------------===// // -// TODO: Add full description +// The AMDGPU TargetMachine interface definition for hw codgen targets. // //===----------------------------------------------------------------------===// @@ -52,9 +52,6 @@ public: formatted_raw_ostream &Out, CodeGenFileType FileType, bool DisableVerify); -public: - void dumpCode() { mDump = true; } - bool shouldDumpCode() const { return mDump; } }; } /* End namespace llvm */ diff --git a/src/gallium/drivers/radeon/AMDGPUUtil.cpp b/src/gallium/drivers/radeon/AMDGPUUtil.cpp index a5045436ab4..bd8f5eef697 100644 --- a/src/gallium/drivers/radeon/AMDGPUUtil.cpp +++ b/src/gallium/drivers/radeon/AMDGPUUtil.cpp @@ -1,4 +1,4 @@ -//===-- AMDGPUUtil.cpp - TODO: Add brief description -------===// +//===-- AMDGPUUtil.cpp - AMDGPU Utility functions -------------------------===// // // The LLVM Compiler Infrastructure // @@ -7,39 +7,39 @@ // //===----------------------------------------------------------------------===// // -// TODO: Add full description +// Common utility functions used by hw codegen targets // //===----------------------------------------------------------------------===// #include "AMDGPUUtil.h" #include "AMDGPURegisterInfo.h" #include "AMDIL.h" +#include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/Support/ErrorHandling.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetRegisterInfo.h" using namespace llvm; -/* Some instructions act as place holders to emulate operations that the GPU - * hardware does automatically. This function can be used to check if - * an opcode falls into this category. */ -bool llvm::isPlaceHolderOpcode(unsigned opcode) +// Some instructions act as place holders to emulate operations that the GPU +// hardware does automatically. This function can be used to check if +// an opcode falls into this category. +bool AMDGPU::isPlaceHolderOpcode(unsigned opcode) { switch (opcode) { default: return false; - case AMDIL::EXPORT_REG: case AMDIL::RETURN: case AMDIL::LOAD_INPUT: case AMDIL::LAST: + case AMDIL::MASK_WRITE: case AMDIL::RESERVE_REG: return true; } } -bool llvm::isTransOp(unsigned opcode) +bool AMDGPU::isTransOp(unsigned opcode) { switch(opcode) { default: return false; @@ -67,10 +67,12 @@ bool llvm::isTransOp(unsigned opcode) } } -bool llvm::isTexOp(unsigned opcode) +bool AMDGPU::isTexOp(unsigned opcode) { switch(opcode) { default: return false; + case AMDIL::TEX_LD: + case AMDIL::TEX_GET_TEXTURE_RESINFO: case AMDIL::TEX_SAMPLE: case AMDIL::TEX_SAMPLE_C: case AMDIL::TEX_SAMPLE_L: @@ -79,11 +81,13 @@ bool llvm::isTexOp(unsigned opcode) case AMDIL::TEX_SAMPLE_C_LB: case AMDIL::TEX_SAMPLE_G: case AMDIL::TEX_SAMPLE_C_G: + case AMDIL::TEX_GET_GRADIENTS_H: + case AMDIL::TEX_GET_GRADIENTS_V: return true; } } -bool llvm::isReductionOp(unsigned opcode) +bool AMDGPU::isReductionOp(unsigned opcode) { switch(opcode) { default: return false; @@ -93,13 +97,25 @@ bool llvm::isReductionOp(unsigned opcode) } } -bool llvm::isFCOp(unsigned opcode) +bool AMDGPU::isCubeOp(unsigned opcode) +{ + switch(opcode) { + default: return false; + case AMDIL::CUBE_r600: + case AMDIL::CUBE_eg: + return true; + } +} + + +bool AMDGPU::isFCOp(unsigned opcode) { switch(opcode) { default: return false; case AMDIL::BREAK_LOGICALZ_f32: case AMDIL::BREAK_LOGICALNZ_i32: case AMDIL::BREAK_LOGICALZ_i32: + case AMDIL::BREAK_LOGICALNZ_f32: case AMDIL::CONTINUE_LOGICALNZ_f32: case AMDIL::IF_LOGICALNZ_i32: case AMDIL::IF_LOGICALZ_f32: @@ -112,11 +128,14 @@ bool llvm::isFCOp(unsigned opcode) } } -void AMDGPU::utilAddLiveIn(MachineFunction * MF, MachineRegisterInfo & MRI, - const struct TargetInstrInfo * TII, unsigned physReg, unsigned virtReg) +void AMDGPU::utilAddLiveIn(llvm::MachineFunction * MF, + llvm::MachineRegisterInfo & MRI, + const struct llvm::TargetInstrInfo * TII, + unsigned physReg, unsigned virtReg) { if (!MRI.isLiveIn(physReg)) { MRI.addLiveIn(physReg, virtReg); + MF->front().addLiveIn(physReg); BuildMI(MF->front(), MF->front().begin(), DebugLoc(), TII->get(TargetOpcode::COPY), virtReg) .addReg(physReg); diff --git a/src/gallium/drivers/radeon/AMDGPUUtil.h b/src/gallium/drivers/radeon/AMDGPUUtil.h index 299146e1ba7..15f2ce57af9 100644 --- a/src/gallium/drivers/radeon/AMDGPUUtil.h +++ b/src/gallium/drivers/radeon/AMDGPUUtil.h @@ -1,4 +1,4 @@ -//===-- AMDGPUUtil.h - TODO: Add brief description -------===// +//===-- AMDGPUUtil.h - AMDGPU Utility function declarations -----*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -7,43 +7,40 @@ // //===----------------------------------------------------------------------===// // -// TODO: Add full description +// Declarations for utility functions common to all hw codegen targets. // //===----------------------------------------------------------------------===// #ifndef AMDGPU_UTIL_H #define AMDGPU_UTIL_H -#include "AMDGPURegisterInfo.h" -#include "llvm/Support/DataTypes.h" - namespace llvm { -class AMDILMachineFunctionInfo; +class MachineFunction; +class MachineRegisterInfo; +class TargetInstrInfo; + +} -class TargetMachine; -class TargetRegisterInfo; +namespace AMDGPU { bool isPlaceHolderOpcode(unsigned opcode); bool isTransOp(unsigned opcode); bool isTexOp(unsigned opcode); bool isReductionOp(unsigned opcode); +bool isCubeOp(unsigned opcode); bool isFCOp(unsigned opcode); -/* XXX: Move these to AMDGPUInstrInfo.h */ +// XXX: Move these to AMDGPUInstrInfo.h #define MO_FLAG_CLAMP (1 << 0) #define MO_FLAG_NEG (1 << 1) #define MO_FLAG_ABS (1 << 2) #define MO_FLAG_MASK (1 << 3) -} /* End namespace llvm */ - -namespace AMDGPU { - void utilAddLiveIn(llvm::MachineFunction * MF, llvm::MachineRegisterInfo & MRI, const struct llvm::TargetInstrInfo * TII, unsigned physReg, unsigned virtReg); } // End namespace AMDGPU -#endif /* AMDGPU_UTIL_H */ +#endif // AMDGPU_UTIL_H diff --git a/src/gallium/drivers/radeon/AMDIL.h b/src/gallium/drivers/radeon/AMDIL.h index 317ea124f66..6759ccd9527 100644 --- a/src/gallium/drivers/radeon/AMDIL.h +++ b/src/gallium/drivers/radeon/AMDIL.h @@ -137,11 +137,6 @@ enum AddressSpaces { LAST_ADDRESS = 8 }; -// We are piggybacking on the CommentFlag enum in MachineInstr.h to -// set bits in AsmPrinterFlags of the MachineInstruction. We will -// start at bit 16 and allocate down while LLVM will start at bit -// 1 and allocate up. - // This union/struct combination is an easy way to read out the // exact bits that are needed. typedef union ResourceRec { @@ -181,26 +176,6 @@ typedef union ResourceRec { } // namespace AMDILAS -// The OpSwizzle encodes a subset of all possible -// swizzle combinations into a number of bits using -// only the combinations utilized by the backend. -// The lower 128 are for source swizzles and the -// upper 128 or for destination swizzles. -// The valid mappings can be found in the -// getSrcSwizzle and getDstSwizzle functions of -// AMDILUtilityFunctions.cpp. -typedef union SwizzleRec { - struct { -#ifdef __BIG_ENDIAN__ - unsigned char dst : 1; - unsigned char swizzle : 7; -#else - unsigned char swizzle : 7; - unsigned char dst : 1; -#endif - } bits; - unsigned char u8all; -} OpSwizzle; // Enums corresponding to AMDIL condition codes for IL. These // values must be kept in sync with the ones in the .td file. namespace AMDILCC { diff --git a/src/gallium/drivers/radeon/AMDIL.td b/src/gallium/drivers/radeon/AMDIL.td index 9bcccac2411..deee290fad5 100644 --- a/src/gallium/drivers/radeon/AMDIL.td +++ b/src/gallium/drivers/radeon/AMDIL.td @@ -1,4 +1,4 @@ -//===-- AMDIL.td - TODO: Add brief description -------===// +//===-- AMDIL.td - AMDIL Tablegen files --*- tablegen -*-------------------===// // // The LLVM Compiler Infrastructure // diff --git a/src/gallium/drivers/radeon/AMDIL7XXDevice.cpp b/src/gallium/drivers/radeon/AMDIL7XXDevice.cpp index 6625dd77d5f..d7c96573a15 100644 --- a/src/gallium/drivers/radeon/AMDIL7XXDevice.cpp +++ b/src/gallium/drivers/radeon/AMDIL7XXDevice.cpp @@ -1,4 +1,4 @@ -//===-- AMDIL7XXDevice.cpp - TODO: Add brief description -------===// +//===-- AMDIL7XXDevice.cpp - Device Info for 7XX GPUs ---------------------===// // // The LLVM Compiler Infrastructure // diff --git a/src/gallium/drivers/radeon/AMDILBase.td b/src/gallium/drivers/radeon/AMDILBase.td index 2706b211f2d..31ebed31d72 100644 --- a/src/gallium/drivers/radeon/AMDILBase.td +++ b/src/gallium/drivers/radeon/AMDILBase.td @@ -60,6 +60,11 @@ def FeatureDebug : SubtargetFeature<"debug", "CapsOverride[AMDILDeviceInfo::Debug]", "true", "Debug mode is enabled, so disable hardware accelerated address spaces.">; +def FeatureDumpCode : SubtargetFeature <"DumpCode", + "mDumpCode", + "true", + "Dump MachineInstrs in the CodeEmitter">; + //===----------------------------------------------------------------------===// // Register File, Calling Conv, Instruction Descriptions diff --git a/src/gallium/drivers/radeon/AMDILCFGStructurizer.cpp b/src/gallium/drivers/radeon/AMDILCFGStructurizer.cpp index 289af6f210e..cdcd5e89880 100644 --- a/src/gallium/drivers/radeon/AMDILCFGStructurizer.cpp +++ b/src/gallium/drivers/radeon/AMDILCFGStructurizer.cpp @@ -7,22 +7,22 @@ // //==-----------------------------------------------------------------------===// -#define DEBUG_TYPE "structcfg" -#ifdef DEBUG -#define DEBUGME (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) -#else #define DEBUGME 0 -#endif +#define DEBUG_TYPE "structcfg" #include "AMDILTargetMachine.h" #include "AMDILUtilityFunctions.h" #include "llvm/ADT/SCCIterator.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/DominatorInternals.h" +#include "llvm/Analysis/Dominators.h" +#include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFunctionAnalysis.h" #include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineJumpTableInfo.h" #include "llvm/CodeGen/MachineLoopInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" @@ -30,8 +30,6 @@ #define FirstNonDebugInstr(A) A->begin() using namespace llvm; -// bixia TODO: move this out to analysis lib. Make this work for both target -// AMDIL and CBackend. // TODO: move-begin. //===----------------------------------------------------------------------===// @@ -109,23 +107,6 @@ void ReverseVector(SmallVector<NodeT *, DEFAULT_VEC_SLOTS> &Src) { // //===----------------------------------------------------------------------===// -#include "AMDILTargetMachine.h" -#include "AMDILUtilityFunctions.h" -#include "llvm/ADT/SCCIterator.h" -#include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/Statistic.h" -#include "llvm/Analysis/DominatorInternals.h" -#include "llvm/Analysis/Dominators.h" -#include "llvm/CodeGen/MachineDominators.h" -#include "llvm/CodeGen/MachineDominators.h" -#include "llvm/CodeGen/MachineFunction.h" -#include "llvm/CodeGen/MachineFunctionAnalysis.h" -#include "llvm/CodeGen/MachineFunctionPass.h" -#include "llvm/CodeGen/MachineFunctionPass.h" -#include "llvm/CodeGen/MachineJumpTableInfo.h" -#include "llvm/CodeGen/MachineLoopInfo.h" -#include "llvm/CodeGen/MachineRegisterInfo.h" - namespace llvm { /// PostDominatorTree Class - Concrete subclass of DominatorTree that is used @@ -3156,10 +3137,6 @@ struct CFGStructTraits<AMDILCFGStructurizer> iterEnd = srcBlk->end(); iter != iterEnd; ++iter) { MachineInstr *instr = func->CloneMachineInstr(iter); - // This is a workaround for LLVM bugzilla 8420 because CloneMachineInstr - // does not clone the AsmPrinterFlags. - instr->setAsmPrinterFlag( - (llvm::MachineInstr::CommentFlag)iter->getAsmPrinterFlags()); newBlk->push_back(instr); } return newBlk; diff --git a/src/gallium/drivers/radeon/AMDILCodeEmitter.h b/src/gallium/drivers/radeon/AMDILCodeEmitter.h index b0ea1455cf9..fa46cbd203d 100644 --- a/src/gallium/drivers/radeon/AMDILCodeEmitter.h +++ b/src/gallium/drivers/radeon/AMDILCodeEmitter.h @@ -1,23 +1,21 @@ -// The LLVM Compiler Infrastructure +//===-- AMDILCodeEmitter.h - AMDIL Code Emitter interface -----------------===// // // The LLVM Compiler Infrastructure // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // -//===-- AMDILCodeEmitter.h - TODO: Add brief description -------===// -//===-- AMDILCodeEmitter.h - TODO: Add brief description -------===// -//===-- AMDILCodeEmitter.h - TODO: Add brief description -------===// +//===----------------------------------------------------------------------===// // -// The LLVM Compiler Infrastructure +// CodeEmitter interface for R600 and SI codegen. // +//===----------------------------------------------------------------------===// #ifndef AMDILCODEEMITTER_H #define AMDILCODEEMITTER_H namespace llvm { - /* XXX: Temp HACK to work around tablegen name generation */ class AMDILCodeEmitter { public: uint64_t getBinaryCodeForInstr(const MachineInstr &MI) const; diff --git a/src/gallium/drivers/radeon/AMDILConversions.td b/src/gallium/drivers/radeon/AMDILConversions.td index 0db66ae8475..1bc5e4ddf37 100644 --- a/src/gallium/drivers/radeon/AMDILConversions.td +++ b/src/gallium/drivers/radeon/AMDILConversions.td @@ -1,4 +1,4 @@ -//===-- AMDILConversions.td - TODO: Add brief description -------===// +//==- AMDILConversions.td - Type conversion tablegen patterns -*-tablegen -*-=// // // The LLVM Compiler Infrastructure // diff --git a/src/gallium/drivers/radeon/AMDILDevice.cpp b/src/gallium/drivers/radeon/AMDILDevice.cpp index aa6d8af7012..4294a8bef0c 100644 --- a/src/gallium/drivers/radeon/AMDILDevice.cpp +++ b/src/gallium/drivers/radeon/AMDILDevice.cpp @@ -1,4 +1,4 @@ -//===-- AMDILDevice.cpp - TODO: Add brief description -------===// +//===-- AMDILDevice.cpp - Base class for AMDIL Devices --------------------===// // // The LLVM Compiler Infrastructure // diff --git a/src/gallium/drivers/radeon/AMDILDeviceInfo.cpp b/src/gallium/drivers/radeon/AMDILDeviceInfo.cpp index 89b8312c294..cbf5b512471 100644 --- a/src/gallium/drivers/radeon/AMDILDeviceInfo.cpp +++ b/src/gallium/drivers/radeon/AMDILDeviceInfo.cpp @@ -1,4 +1,4 @@ -//===-- AMDILDeviceInfo.cpp - TODO: Add brief description -------===// +//===-- AMDILDeviceInfo.cpp - AMDILDeviceInfo class -----------------------===// // // The LLVM Compiler Infrastructure // @@ -6,11 +6,16 @@ // License. See LICENSE.TXT for details. // //==-----------------------------------------------------------------------===// +// +// Function that creates DeviceInfo from a device name and other information. +// +//==-----------------------------------------------------------------------===// #include "AMDILDevices.h" #include "AMDILSubtarget.h" using namespace llvm; namespace llvm { +namespace AMDILDeviceInfo { AMDILDevice* getDeviceFromName(const std::string &deviceName, AMDILSubtarget *ptr, bool is64bit, bool is64on32bit) { @@ -84,4 +89,5 @@ getDeviceFromName(const std::string &deviceName, AMDILSubtarget *ptr, bool is64b return new AMDIL7XXDevice(ptr); } } -} +} // End namespace AMDILDeviceInfo +} // End namespace llvm diff --git a/src/gallium/drivers/radeon/AMDILDeviceInfo.h b/src/gallium/drivers/radeon/AMDILDeviceInfo.h index c4acf9145ae..06ac4322d0f 100644 --- a/src/gallium/drivers/radeon/AMDILDeviceInfo.h +++ b/src/gallium/drivers/radeon/AMDILDeviceInfo.h @@ -1,4 +1,4 @@ -//===-- AMDILDeviceInfo.h - TODO: Add brief description -------===// +//===-- AMDILDeviceInfo.h - Constants for describing devices --------------===// // // The LLVM Compiler Infrastructure // @@ -82,8 +82,8 @@ namespace llvm }; + AMDILDevice* + getDeviceFromName(const std::string &name, AMDILSubtarget *ptr, bool is64bit = false, bool is64on32bit = false); } // namespace AMDILDeviceInfo - llvm::AMDILDevice* - getDeviceFromName(const std::string &name, llvm::AMDILSubtarget *ptr, bool is64bit = false, bool is64on32bit = false); } // namespace llvm #endif // _AMDILDEVICEINFO_H_ diff --git a/src/gallium/drivers/radeon/AMDILDevices.h b/src/gallium/drivers/radeon/AMDILDevices.h index 3fc5fa05669..cfcc3304b4b 100644 --- a/src/gallium/drivers/radeon/AMDILDevices.h +++ b/src/gallium/drivers/radeon/AMDILDevices.h @@ -1,4 +1,4 @@ -//===-- AMDILDevices.h - TODO: Add brief description -------===// +//===-- AMDILDevices.h - Consolidate AMDIL Device headers -----------------===// // // The LLVM Compiler Infrastructure // diff --git a/src/gallium/drivers/radeon/AMDILEnumeratedTypes.td b/src/gallium/drivers/radeon/AMDILEnumeratedTypes.td index 445fd608bbb..f10936b8c6c 100644 --- a/src/gallium/drivers/radeon/AMDILEnumeratedTypes.td +++ b/src/gallium/drivers/radeon/AMDILEnumeratedTypes.td @@ -1,4 +1,4 @@ -//===-- AMDILEnumeratedTypes.td - TODO: Add brief description -------===// +//===-- AMDILEnumeratedTypes.td - IL Type definitions --*- tablegen -*-----===// // // The LLVM Compiler Infrastructure // diff --git a/src/gallium/drivers/radeon/AMDILEvergreenDevice.cpp b/src/gallium/drivers/radeon/AMDILEvergreenDevice.cpp index 7b5c52345d2..779b2d3df2f 100644 --- a/src/gallium/drivers/radeon/AMDILEvergreenDevice.cpp +++ b/src/gallium/drivers/radeon/AMDILEvergreenDevice.cpp @@ -1,4 +1,4 @@ -//===-- AMDILEvergreenDevice.cpp - TODO: Add brief description -------===// +//===-- AMDILEvergreenDevice.cpp - Device Info for Evergreen --------------===// // // The LLVM Compiler Infrastructure // diff --git a/src/gallium/drivers/radeon/AMDILISelDAGToDAG.cpp b/src/gallium/drivers/radeon/AMDILISelDAGToDAG.cpp index ff04d9d55bf..b8898828dd6 100644 --- a/src/gallium/drivers/radeon/AMDILISelDAGToDAG.cpp +++ b/src/gallium/drivers/radeon/AMDILISelDAGToDAG.cpp @@ -13,9 +13,12 @@ #include "AMDILDevices.h" #include "AMDILTargetMachine.h" #include "AMDILUtilityFunctions.h" +#include "llvm/ADT/ValueMap.h" #include "llvm/CodeGen/PseudoSourceValue.h" #include "llvm/CodeGen/SelectionDAGISel.h" #include "llvm/Support/Compiler.h" +#include <list> +#include <queue> using namespace llvm; @@ -35,13 +38,21 @@ class AMDILDAGToDAGISel : public SelectionDAGISel { public: AMDILDAGToDAGISel(AMDILTargetMachine &TM AMDIL_OPT_LEVEL_DECL); virtual ~AMDILDAGToDAGISel(); - inline SDValue getSmallIPtrImm(unsigned Imm); SDNode *Select(SDNode *N); + virtual const char *getPassName() const; + +private: + inline SDValue getSmallIPtrImm(unsigned Imm); + // Complex pattern selectors bool SelectADDRParam(SDValue Addr, SDValue& R1, SDValue& R2); bool SelectADDR(SDValue N, SDValue &R1, SDValue &R2); bool SelectADDR64(SDValue N, SDValue &R1, SDValue &R2); + + static bool checkType(const Value *ptr, unsigned int addrspace); + static const Value *getBasePointerValue(const Value *V); + static bool isGlobalStore(const StoreSDNode *N); static bool isPrivateStore(const StoreSDNode *N); static bool isLocalStore(const StoreSDNode *N); @@ -54,8 +65,6 @@ public: static bool isLocalLoad(const LoadSDNode *N); static bool isRegionLoad(const LoadSDNode *N); - virtual const char *getPassName() const; -private: SDNode *xformAtomicInst(SDNode *N); // Include the pieces autogenerated from the target description. @@ -165,26 +174,75 @@ SDNode *AMDILDAGToDAGISel::Select(SDNode *N) { return SelectCode(N); } +bool AMDILDAGToDAGISel::checkType(const Value *ptr, unsigned int addrspace) { + if (!ptr) { + return false; + } + Type *ptrType = ptr->getType(); + return dyn_cast<PointerType>(ptrType)->getAddressSpace() == addrspace; +} + +const Value * AMDILDAGToDAGISel::getBasePointerValue(const Value *V) +{ + if (!V) { + return NULL; + } + const Value *ret = NULL; + ValueMap<const Value *, bool> ValueBitMap; + std::queue<const Value *, std::list<const Value *> > ValueQueue; + ValueQueue.push(V); + while (!ValueQueue.empty()) { + V = ValueQueue.front(); + if (ValueBitMap.find(V) == ValueBitMap.end()) { + ValueBitMap[V] = true; + if (dyn_cast<Argument>(V) && dyn_cast<PointerType>(V->getType())) { + ret = V; + break; + } else if (dyn_cast<GlobalVariable>(V)) { + ret = V; + break; + } else if (dyn_cast<Constant>(V)) { + const ConstantExpr *CE = dyn_cast<ConstantExpr>(V); + if (CE) { + ValueQueue.push(CE->getOperand(0)); + } + } else if (const AllocaInst *AI = dyn_cast<AllocaInst>(V)) { + ret = AI; + break; + } else if (const Instruction *I = dyn_cast<Instruction>(V)) { + uint32_t numOps = I->getNumOperands(); + for (uint32_t x = 0; x < numOps; ++x) { + ValueQueue.push(I->getOperand(x)); + } + } else { + // assert(0 && "Found a Value that we didn't know how to handle!"); + } + } + ValueQueue.pop(); + } + return ret; +} + bool AMDILDAGToDAGISel::isGlobalStore(const StoreSDNode *N) { - return check_type(N->getSrcValue(), AMDILAS::GLOBAL_ADDRESS); + return checkType(N->getSrcValue(), AMDILAS::GLOBAL_ADDRESS); } bool AMDILDAGToDAGISel::isPrivateStore(const StoreSDNode *N) { - return (!check_type(N->getSrcValue(), AMDILAS::LOCAL_ADDRESS) - && !check_type(N->getSrcValue(), AMDILAS::GLOBAL_ADDRESS) - && !check_type(N->getSrcValue(), AMDILAS::REGION_ADDRESS)); + return (!checkType(N->getSrcValue(), AMDILAS::LOCAL_ADDRESS) + && !checkType(N->getSrcValue(), AMDILAS::GLOBAL_ADDRESS) + && !checkType(N->getSrcValue(), AMDILAS::REGION_ADDRESS)); } bool AMDILDAGToDAGISel::isLocalStore(const StoreSDNode *N) { - return check_type(N->getSrcValue(), AMDILAS::LOCAL_ADDRESS); + return checkType(N->getSrcValue(), AMDILAS::LOCAL_ADDRESS); } bool AMDILDAGToDAGISel::isRegionStore(const StoreSDNode *N) { - return check_type(N->getSrcValue(), AMDILAS::REGION_ADDRESS); + return checkType(N->getSrcValue(), AMDILAS::REGION_ADDRESS); } bool AMDILDAGToDAGISel::isConstantLoad(const LoadSDNode *N, int cbID) { - if (check_type(N->getSrcValue(), AMDILAS::CONSTANT_ADDRESS)) { + if (checkType(N->getSrcValue(), AMDILAS::CONSTANT_ADDRESS)) { return true; } MachineMemOperand *MMO = N->getMemOperand(); @@ -195,27 +253,27 @@ bool AMDILDAGToDAGISel::isConstantLoad(const LoadSDNode *N, int cbID) { && ((V && dyn_cast<GlobalValue>(V)) || (BV && dyn_cast<GlobalValue>( getBasePointerValue(MMO->getValue()))))) { - return check_type(N->getSrcValue(), AMDILAS::PRIVATE_ADDRESS); + return checkType(N->getSrcValue(), AMDILAS::PRIVATE_ADDRESS); } else { return false; } } bool AMDILDAGToDAGISel::isGlobalLoad(const LoadSDNode *N) { - return check_type(N->getSrcValue(), AMDILAS::GLOBAL_ADDRESS); + return checkType(N->getSrcValue(), AMDILAS::GLOBAL_ADDRESS); } bool AMDILDAGToDAGISel::isLocalLoad(const LoadSDNode *N) { - return check_type(N->getSrcValue(), AMDILAS::LOCAL_ADDRESS); + return checkType(N->getSrcValue(), AMDILAS::LOCAL_ADDRESS); } bool AMDILDAGToDAGISel::isRegionLoad(const LoadSDNode *N) { - return check_type(N->getSrcValue(), AMDILAS::REGION_ADDRESS); + return checkType(N->getSrcValue(), AMDILAS::REGION_ADDRESS); } bool AMDILDAGToDAGISel::isCPLoad(const LoadSDNode *N) { MachineMemOperand *MMO = N->getMemOperand(); - if (check_type(N->getSrcValue(), AMDILAS::PRIVATE_ADDRESS)) { + if (checkType(N->getSrcValue(), AMDILAS::PRIVATE_ADDRESS)) { if (MMO) { const Value *V = MMO->getValue(); const PseudoSourceValue *PSV = dyn_cast<PseudoSourceValue>(V); @@ -228,19 +286,19 @@ bool AMDILDAGToDAGISel::isCPLoad(const LoadSDNode *N) { } bool AMDILDAGToDAGISel::isPrivateLoad(const LoadSDNode *N) { - if (check_type(N->getSrcValue(), AMDILAS::PRIVATE_ADDRESS)) { + if (checkType(N->getSrcValue(), AMDILAS::PRIVATE_ADDRESS)) { // Check to make sure we are not a constant pool load or a constant load // that is marked as a private load if (isCPLoad(N) || isConstantLoad(N, -1)) { return false; } } - if (!check_type(N->getSrcValue(), AMDILAS::LOCAL_ADDRESS) - && !check_type(N->getSrcValue(), AMDILAS::GLOBAL_ADDRESS) - && !check_type(N->getSrcValue(), AMDILAS::REGION_ADDRESS) - && !check_type(N->getSrcValue(), AMDILAS::CONSTANT_ADDRESS) - && !check_type(N->getSrcValue(), AMDILAS::PARAM_D_ADDRESS) - && !check_type(N->getSrcValue(), AMDILAS::PARAM_I_ADDRESS)) + if (!checkType(N->getSrcValue(), AMDILAS::LOCAL_ADDRESS) + && !checkType(N->getSrcValue(), AMDILAS::GLOBAL_ADDRESS) + && !checkType(N->getSrcValue(), AMDILAS::REGION_ADDRESS) + && !checkType(N->getSrcValue(), AMDILAS::CONSTANT_ADDRESS) + && !checkType(N->getSrcValue(), AMDILAS::PARAM_D_ADDRESS) + && !checkType(N->getSrcValue(), AMDILAS::PARAM_I_ADDRESS)) { return true; } diff --git a/src/gallium/drivers/radeon/AMDILISelLowering.cpp b/src/gallium/drivers/radeon/AMDILISelLowering.cpp index 54c6ea65065..19b12fcf72b 100644 --- a/src/gallium/drivers/radeon/AMDILISelLowering.cpp +++ b/src/gallium/drivers/radeon/AMDILISelLowering.cpp @@ -623,6 +623,48 @@ translateToOpcode(uint64_t CCCode, unsigned int regClass) assert(0 && "Unknown opcode retrieved"); return 0; } + +/// Helper function used by LowerFormalArguments +static const TargetRegisterClass* +getRegClassFromType(unsigned int type) { + switch (type) { + default: + assert(0 && "Passed in type does not match any register classes."); + case MVT::i8: + return &AMDIL::GPRI8RegClass; + case MVT::i16: + return &AMDIL::GPRI16RegClass; + case MVT::i32: + return &AMDIL::GPRI32RegClass; + case MVT::f32: + return &AMDIL::GPRF32RegClass; + case MVT::i64: + return &AMDIL::GPRI64RegClass; + case MVT::f64: + return &AMDIL::GPRF64RegClass; + case MVT::v4f32: + return &AMDIL::GPRV4F32RegClass; + case MVT::v4i8: + return &AMDIL::GPRV4I8RegClass; + case MVT::v4i16: + return &AMDIL::GPRV4I16RegClass; + case MVT::v4i32: + return &AMDIL::GPRV4I32RegClass; + case MVT::v2f32: + return &AMDIL::GPRV2F32RegClass; + case MVT::v2i8: + return &AMDIL::GPRV2I8RegClass; + case MVT::v2i16: + return &AMDIL::GPRV2I16RegClass; + case MVT::v2i32: + return &AMDIL::GPRV2I32RegClass; + case MVT::v2f64: + return &AMDIL::GPRV2F64RegClass; + case MVT::v2i64: + return &AMDIL::GPRV2I64RegClass; + } +} + SDValue AMDILTargetLowering::LowerMemArgument( SDValue Chain, @@ -2189,6 +2231,7 @@ AMDILTargetLowering::LowerExternalSymbol(SDValue Op, SelectionDAG &DAG) const SDValue Result = DAG.getTargetExternalSymbol(Sym, MVT::i32); return Result; } + /// LowerFORMAL_ARGUMENTS - transform physical registers into /// virtual registers and generate load operations for /// arguments places on the stack. @@ -3191,7 +3234,7 @@ AMDILTargetLowering::LowerFP_TO_SINT(SDValue Op, SelectionDAG &DAG) const amdtm = reinterpret_cast<const AMDILTargetMachine*> (&this->getTargetMachine()); const AMDILSubtarget* - stm = dynamic_cast<const AMDILSubtarget*>( + stm = static_cast<const AMDILSubtarget*>( amdtm->getSubtargetImpl()); if (RST == MVT::f64 && RHSVT.isVector() && stm->device()->getGeneration() > AMDILDeviceInfo::HD6XXX) { @@ -3248,7 +3291,7 @@ AMDILTargetLowering::LowerFP_TO_UINT(SDValue Op, SelectionDAG &DAG) const amdtm = reinterpret_cast<const AMDILTargetMachine*> (&this->getTargetMachine()); const AMDILSubtarget* - stm = dynamic_cast<const AMDILSubtarget*>( + stm = static_cast<const AMDILSubtarget*>( amdtm->getSubtargetImpl()); if (RST == MVT::f64 && RHSVT.isVector() && stm->device()->getGeneration() > AMDILDeviceInfo::HD6XXX) { @@ -3314,7 +3357,7 @@ AMDILTargetLowering::genu32tof64(SDValue RHS, EVT LHSVT, amdtm = reinterpret_cast<const AMDILTargetMachine*> (&this->getTargetMachine()); const AMDILSubtarget* - stm = dynamic_cast<const AMDILSubtarget*>( + stm = static_cast<const AMDILSubtarget*>( amdtm->getSubtargetImpl()); if (stm->calVersion() >= CAL_VERSION_SC_135) { // unsigned x = RHS; @@ -3489,7 +3532,7 @@ AMDILTargetLowering::LowerUINT_TO_FP(SDValue Op, SelectionDAG &DAG) const amdtm = reinterpret_cast<const AMDILTargetMachine*> (&this->getTargetMachine()); const AMDILSubtarget* - stm = dynamic_cast<const AMDILSubtarget*>( + stm = static_cast<const AMDILSubtarget*>( amdtm->getSubtargetImpl()); if (LST == MVT::f64 && LHSVT.isVector() && stm->device()->getGeneration() > AMDILDeviceInfo::HD6XXX) { @@ -3543,7 +3586,7 @@ AMDILTargetLowering::LowerSINT_TO_FP(SDValue Op, SelectionDAG &DAG) const amdtm = reinterpret_cast<const AMDILTargetMachine*> (&this->getTargetMachine()); const AMDILSubtarget* - stm = dynamic_cast<const AMDILSubtarget*>( + stm = static_cast<const AMDILSubtarget*>( amdtm->getSubtargetImpl()); if (LST == MVT::f64 && LHSVT.isVector() && stm->device()->getGeneration() > AMDILDeviceInfo::HD6XXX) { @@ -3843,7 +3886,6 @@ SDValue AMDILTargetLowering::LowerBUILD_VECTOR( SDValue Op, SelectionDAG &DAG ) const { EVT VT = Op.getValueType(); - //printSDValue(Op, 1); SDValue Nodes1; SDValue second; SDValue third; @@ -3965,7 +4007,6 @@ AMDILTargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const { EVT VT = Op.getValueType(); - //printSDValue(Op, 1); const ConstantSDNode *CSDN = dyn_cast<ConstantSDNode>(Op.getOperand(1)); uint64_t swizzleNum = 0; DebugLoc DL = Op.getDebugLoc(); @@ -4782,7 +4823,7 @@ uint32_t AMDILTargetLowering::genVReg(uint32_t regType) const { return mBB->getParent()->getRegInfo().createVirtualRegister( - getRegClassFromID(regType)); + getTargetMachine().getRegisterInfo()->getRegClass(regType)); } MachineInstrBuilder diff --git a/src/gallium/drivers/radeon/AMDILInstrInfo.cpp b/src/gallium/drivers/radeon/AMDILInstrInfo.cpp index fbc3e45b357..cd2fb48209c 100644 --- a/src/gallium/drivers/radeon/AMDILInstrInfo.cpp +++ b/src/gallium/drivers/radeon/AMDILInstrInfo.cpp @@ -10,13 +10,10 @@ // This file contains the AMDIL implementation of the TargetInstrInfo class. // //===----------------------------------------------------------------------===// -#include "AMDILInstrInfo.h" -#include "AMDILUtilityFunctions.h" - -#define GET_INSTRINFO_CTOR -#include "AMDILGenInstrInfo.inc" #include "AMDILInstrInfo.h" +#include "AMDIL.h" +#include "AMDILISelLowering.h" #include "AMDILUtilityFunctions.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineInstrBuilder.h" @@ -24,6 +21,9 @@ #include "llvm/CodeGen/PseudoSourceValue.h" #include "llvm/Instructions.h" +#define GET_INSTRINFO_CTOR +#include "AMDILGenInstrInfo.inc" + using namespace llvm; AMDILInstrInfo::AMDILInstrInfo(AMDILTargetMachine &tm) @@ -36,28 +36,6 @@ const AMDILRegisterInfo &AMDILInstrInfo::getRegisterInfo() const { return RI; } -/// Return true if the instruction is a register to register move and leave the -/// source and dest operands in the passed parameters. -bool AMDILInstrInfo::isMoveInstr(const MachineInstr &MI, unsigned int &SrcReg, - unsigned int &DstReg, unsigned int &SrcSubIdx, - unsigned int &DstSubIdx) const { - // FIXME: we should look for: - // add with 0 - //assert(0 && "is Move Instruction has not been implemented yet!"); - //return true; - if (!isMove(MI.getOpcode())) { - return false; - } - if (!MI.getOperand(0).isReg() || !MI.getOperand(1).isReg()) { - return false; - } - SrcReg = MI.getOperand(1).getReg(); - DstReg = MI.getOperand(0).getReg(); - DstSubIdx = 0; - SrcSubIdx = 0; - return true; -} - bool AMDILInstrInfo::isCoalescableExtInstr(const MachineInstr &MI, unsigned &SrcReg, unsigned &DstReg, unsigned &SubIdx) const { @@ -99,22 +77,7 @@ bool AMDILInstrInfo::hasStoreFromStackSlot(const MachineInstr *MI, // TODO: Implement this function return false; } -#if 0 -void -AMDILInstrInfo::reMaterialize(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MI, - unsigned DestReg, unsigned SubIdx, - const MachineInstr *Orig, - const TargetRegisterInfo *TRI) const { -// TODO: Implement this function -} -MachineInst AMDILInstrInfo::duplicate(MachineInstr *Orig, - MachineFunction &MF) const { -// TODO: Implement this function - return NULL; -} -#endif MachineInstr * AMDILInstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI, MachineBasicBlock::iterator &MBBI, @@ -122,25 +85,6 @@ AMDILInstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI, // TODO: Implement this function return NULL; } -#if 0 -MachineInst AMDILInstrInfo::commuteInstruction(MachineInstr *MI, - bool NewMI = false) const { -// TODO: Implement this function - return NULL; -} -bool -AMDILInstrInfo::findCommutedOpIndices(MachineInstr *MI, unsigned &SrcOpIdx1, - unsigned &SrcOpIdx2) const -{ -// TODO: Implement this function -} -bool -AMDILInstrInfo::produceSameValue(const MachineInstr *MI0, - const MachineInstr *MI1) const -{ -// TODO: Implement this function -} -#endif bool AMDILInstrInfo::getNextBranchInstr(MachineBasicBlock::iterator &iter, MachineBasicBlock &MBB) const { while (iter != MBB.end()) { @@ -299,43 +243,6 @@ MachineBasicBlock::iterator skipFlowControl(MachineBasicBlock *MBB) { return MBB->end(); } -bool -AMDILInstrInfo::copyRegToReg(MachineBasicBlock &MBB, - MachineBasicBlock::iterator I, - unsigned DestReg, unsigned SrcReg, - const TargetRegisterClass *DestRC, - const TargetRegisterClass *SrcRC, - DebugLoc DL) const { - // If we are adding to the end of a basic block we can safely assume that the - // move is caused by a PHI node since all move instructions that are non-PHI - // have already been inserted into the basic blocks Therefor we call the skip - // flow control instruction to move the iterator before the flow control - // instructions and put the move instruction there. - bool phi = (DestReg < 1025) || (SrcReg < 1025); - int movInst = phi ? getMoveInstFromID(DestRC->getID()) - : getPHIMoveInstFromID(DestRC->getID()); - - MachineBasicBlock::iterator iTemp = (I == MBB.end()) ? skipFlowControl(&MBB) - : I; - if (DestRC != SrcRC) { - //int convInst; - size_t dSize = DestRC->getSize(); - size_t sSize = SrcRC->getSize(); - if (dSize > sSize) { - // Elements are going to get duplicated. - BuildMI(MBB, iTemp, DL, get(movInst), DestReg).addReg(SrcReg); - } else if (dSize == sSize) { - // Direct copy, conversions are not handled. - BuildMI(MBB, iTemp, DL, get(movInst), DestReg).addReg(SrcReg); - } else if (dSize < sSize) { - // Elements are going to get dropped. - BuildMI(MBB, iTemp, DL, get(movInst), DestReg).addReg(SrcReg); - } - } else { - BuildMI( MBB, iTemp, DL, get(movInst), DestReg).addReg(SrcReg); - } - return true; -} void AMDILInstrInfo::copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, DebugLoc DL, @@ -427,15 +334,11 @@ AMDILInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, if (MI != MBB.end()) { DL = MI->getDebugLoc(); } - MachineInstr *nMI = BuildMI(MBB, MI, DL, get(Opc)) + BuildMI(MBB, MI, DL, get(Opc)) .addReg(SrcReg, getKillRegState(isKill)) .addFrameIndex(FrameIndex) .addMemOperand(MMO) .addImm(0); - AMDILAS::InstrResEnc curRes; - curRes.bits.ResourceID - = TM.getSubtargetImpl()->device()->getResourceID(AMDILDevice::SCRATCH_ID); - setAsmPrinterFlags(nMI, curRes); } void @@ -511,16 +414,11 @@ AMDILInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, if (MI != MBB.end()) { DL = MI->getDebugLoc(); } - MachineInstr* nMI = BuildMI(MBB, MI, DL, get(Opc)) + BuildMI(MBB, MI, DL, get(Opc)) .addReg(DestReg, RegState::Define) .addFrameIndex(FrameIndex) .addMemOperand(MMO) .addImm(0); - AMDILAS::InstrResEnc curRes; - curRes.bits.ResourceID - = TM.getSubtargetImpl()->device()->getResourceID(AMDILDevice::SCRATCH_ID); - setAsmPrinterFlags(nMI, curRes); - } MachineInstr * AMDILInstrInfo::foldMemoryOperandImpl(MachineFunction &MF, @@ -569,65 +467,6 @@ AMDILInstrInfo::getOpcodeAfterMemoryUnfold(unsigned Opc, return 0; } -bool -AMDILInstrInfo::areLoadsFromSameBasePtr(SDNode *Load1, SDNode *Load2, - int64_t &Offset1, - int64_t &Offset2) const { - return false; - if (!Load1->isMachineOpcode() || !Load2->isMachineOpcode()) { - return false; - } - const MachineSDNode *mload1 = dyn_cast<MachineSDNode>(Load1); - const MachineSDNode *mload2 = dyn_cast<MachineSDNode>(Load2); - if (!mload1 || !mload2) { - return false; - } - if (mload1->memoperands_empty() || - mload2->memoperands_empty()) { - return false; - } - MachineMemOperand *memOp1 = (*mload1->memoperands_begin()); - MachineMemOperand *memOp2 = (*mload2->memoperands_begin()); - const Value *mv1 = memOp1->getValue(); - const Value *mv2 = memOp2->getValue(); - if (!memOp1->isLoad() || !memOp2->isLoad()) { - return false; - } - if (getBasePointerValue(mv1) == getBasePointerValue(mv2)) { - if (isa<GetElementPtrInst>(mv1) && isa<GetElementPtrInst>(mv2)) { - const GetElementPtrInst *gep1 = dyn_cast<GetElementPtrInst>(mv1); - const GetElementPtrInst *gep2 = dyn_cast<GetElementPtrInst>(mv2); - if (!gep1 || !gep2) { - return false; - } - if (gep1->getNumOperands() != gep2->getNumOperands()) { - return false; - } - for (unsigned i = 0, e = gep1->getNumOperands() - 1; i < e; ++i) { - const Value *op1 = gep1->getOperand(i); - const Value *op2 = gep2->getOperand(i); - if (op1 != op2) { - // If any value except the last one is different, return false. - return false; - } - } - unsigned size = gep1->getNumOperands()-1; - if (!isa<ConstantInt>(gep1->getOperand(size)) - || !isa<ConstantInt>(gep2->getOperand(size))) { - return false; - } - Offset1 = dyn_cast<ConstantInt>(gep1->getOperand(size))->getSExtValue(); - Offset2 = dyn_cast<ConstantInt>(gep2->getOperand(size))->getSExtValue(); - return true; - } else if (isa<Argument>(mv1) && isa<Argument>(mv2)) { - return false; - } else if (isa<GlobalValue>(mv1) && isa<GlobalValue>(mv2)) { - return false; - } - } - return false; -} - bool AMDILInstrInfo::shouldScheduleLoadsNear(SDNode *Load1, SDNode *Load2, int64_t Offset1, int64_t Offset2, unsigned NumLoads) const { @@ -654,16 +493,6 @@ bool AMDILInstrInfo::isPredicated(const MachineInstr *MI) const { // TODO: Implement this function return false; } -#if 0 -bool AMDILInstrInfo::isUnpredicatedTerminator(const MachineInstr *MI) const { - // TODO: Implement this function -} - -bool AMDILInstrInfo::PredicateInstruction(MachineInstr *MI, - const SmallVectorImpl<MachineOperand> &Pred) const { - // TODO: Implement this function -} -#endif bool AMDILInstrInfo::SubsumesPredicate(const SmallVectorImpl<MachineOperand> &Pred1, const SmallVectorImpl<MachineOperand> &Pred2) @@ -689,21 +518,112 @@ AMDILInstrInfo::isSafeToMoveRegClassDefs(const TargetRegisterClass *RC) const { return true; } -unsigned AMDILInstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const { - // TODO: Implement this function - return 0; +bool AMDILInstrInfo::isLoadInst(MachineInstr *MI) const { + if (strstr(getName(MI->getOpcode()), "LOADCONST")) { + return false; + } + return strstr(getName(MI->getOpcode()), "LOAD"); } -#if 0 -unsigned -AMDILInstrInfo::GetFunctionSizeInBytes(const MachineFunction &MF) const { - // TODO: Implement this function - return 0; +bool AMDILInstrInfo::isSWSExtLoadInst(MachineInstr *MI) const +{ +switch (MI->getOpcode()) { + default: + break; + ExpandCaseToByteShortTypes(AMDIL::LOCALLOAD); + ExpandCaseToByteShortTypes(AMDIL::GLOBALLOAD); + ExpandCaseToByteShortTypes(AMDIL::REGIONLOAD); + ExpandCaseToByteShortTypes(AMDIL::PRIVATELOAD); + ExpandCaseToByteShortTypes(AMDIL::CPOOLLOAD); + ExpandCaseToByteShortTypes(AMDIL::CONSTANTLOAD); + return true; + }; + return false; } -unsigned AMDILInstrInfo::getInlineAsmLength(const char *Str, - const MCAsmInfo &MAI) const { - // TODO: Implement this function - return 0; +bool AMDILInstrInfo::isExtLoadInst(MachineInstr *MI) const { + return strstr(getName(MI->getOpcode()), "EXTLOAD"); +} + +bool AMDILInstrInfo::isSExtLoadInst(MachineInstr *MI) const { + return strstr(getName(MI->getOpcode()), "SEXTLOAD"); +} + +bool AMDILInstrInfo::isAExtLoadInst(MachineInstr *MI) const { + return strstr(getName(MI->getOpcode()), "AEXTLOAD"); +} + +bool AMDILInstrInfo::isZExtLoadInst(MachineInstr *MI) const { + return strstr(getName(MI->getOpcode()), "ZEXTLOAD"); +} + +bool AMDILInstrInfo::isStoreInst(MachineInstr *MI) const { + return strstr(getName(MI->getOpcode()), "STORE"); +} + +bool AMDILInstrInfo::isTruncStoreInst(MachineInstr *MI) const { + return strstr(getName(MI->getOpcode()), "TRUNCSTORE"); +} + +bool AMDILInstrInfo::isAtomicInst(MachineInstr *MI) const { + return strstr(getName(MI->getOpcode()), "ATOM"); +} + +bool AMDILInstrInfo::isVolatileInst(MachineInstr *MI) const { + if (!MI->memoperands_empty()) { + for (MachineInstr::mmo_iterator mob = MI->memoperands_begin(), + moe = MI->memoperands_end(); mob != moe; ++mob) { + // If there is a volatile mem operand, this is a volatile instruction. + if ((*mob)->isVolatile()) { + return true; + } + } + } + return false; +} +bool AMDILInstrInfo::isGlobalInst(llvm::MachineInstr *MI) const +{ + return strstr(getName(MI->getOpcode()), "GLOBAL"); +} +bool AMDILInstrInfo::isPrivateInst(llvm::MachineInstr *MI) const +{ + return strstr(getName(MI->getOpcode()), "PRIVATE"); +} +bool AMDILInstrInfo::isConstantInst(llvm::MachineInstr *MI) const +{ + return strstr(getName(MI->getOpcode()), "CONSTANT") + || strstr(getName(MI->getOpcode()), "CPOOL"); +} +bool AMDILInstrInfo::isRegionInst(llvm::MachineInstr *MI) const +{ + return strstr(getName(MI->getOpcode()), "REGION"); +} +bool AMDILInstrInfo::isLocalInst(llvm::MachineInstr *MI) const +{ + return strstr(getName(MI->getOpcode()), "LOCAL"); +} +bool AMDILInstrInfo::isImageInst(llvm::MachineInstr *MI) const +{ + return strstr(getName(MI->getOpcode()), "IMAGE"); +} +bool AMDILInstrInfo::isAppendInst(llvm::MachineInstr *MI) const +{ + return strstr(getName(MI->getOpcode()), "APPEND"); +} +bool AMDILInstrInfo::isRegionAtomic(llvm::MachineInstr *MI) const +{ + return strstr(getName(MI->getOpcode()), "ATOM_R"); +} +bool AMDILInstrInfo::isLocalAtomic(llvm::MachineInstr *MI) const +{ + return strstr(getName(MI->getOpcode()), "ATOM_L"); +} +bool AMDILInstrInfo::isGlobalAtomic(llvm::MachineInstr *MI) const +{ + return strstr(getName(MI->getOpcode()), "ATOM_G") + || isArenaAtomic(MI); +} +bool AMDILInstrInfo::isArenaAtomic(llvm::MachineInstr *MI) const +{ + return strstr(getName(MI->getOpcode()), "ATOM_A"); } -#endif diff --git a/src/gallium/drivers/radeon/AMDILInstrInfo.h b/src/gallium/drivers/radeon/AMDILInstrInfo.h index 88dd4e9441a..4121246e6f9 100644 --- a/src/gallium/drivers/radeon/AMDILInstrInfo.h +++ b/src/gallium/drivers/radeon/AMDILInstrInfo.h @@ -40,12 +40,6 @@ public: // always be able to get register info as well (through this method). const AMDILRegisterInfo &getRegisterInfo() const; - // Return true if the instruction is a register to register move and leave the - // source and dest operands in the passed parameters. - bool isMoveInstr(const MachineInstr &MI, unsigned int &SrcReg, - unsigned int &DstReg, unsigned int &SrcSubIdx, - unsigned int &DstSubIdx) const; - bool isCoalescableExtInstr(const MachineInstr &MI, unsigned &SrcReg, unsigned &DstReg, unsigned &SubIdx) const; @@ -62,29 +56,10 @@ public: const MachineMemOperand *&MMO, int &FrameIndex) const; - -#if 0 - void reMaterialize(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MI, - unsigned DestReg, unsigned SubIdx, - const MachineInstr *Orig, - const TargetRegisterInfo *TRI) const; - MachineInstr *duplicate(MachineInstr *Orig, - MachineFunction &MF) const; -#endif MachineInstr * convertToThreeAddress(MachineFunction::iterator &MFI, MachineBasicBlock::iterator &MBBI, LiveVariables *LV) const; -#if 0 - MachineInstr *commuteInstruction(MachineInstr *MI, - bool NewMI = false) const; - bool findCommutedOpIndices(MachineInstr *MI, unsigned &SrcOpIdx1, - unsigned &SrcOpIdx2) const; - bool produceSameValue(const MachineInstr *MI0, - const MachineInstr *MI1) const; - -#endif bool AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, @@ -99,12 +74,6 @@ public: const SmallVectorImpl<MachineOperand> &Cond, DebugLoc DL) const; - bool copyRegToReg(MachineBasicBlock &MBB, - MachineBasicBlock::iterator I, - unsigned DestReg, unsigned SrcReg, - const TargetRegisterClass *DestRC, - const TargetRegisterClass *SrcRC, - DebugLoc DL) const; virtual void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, DebugLoc DL, unsigned DestReg, unsigned SrcReg, @@ -141,8 +110,6 @@ public: unsigned getOpcodeAfterMemoryUnfold(unsigned Opc, bool UnfoldLoad, bool UnfoldStore, unsigned *LoadRegIndex = 0) const; - bool areLoadsFromSameBasePtr(SDNode *Load1, SDNode *Load2, - int64_t &Offset1, int64_t &Offset2) const; bool shouldScheduleLoadsNear(SDNode *Load1, SDNode *Load2, int64_t Offset1, int64_t Offset2, unsigned NumLoads) const; @@ -151,24 +118,36 @@ public: void insertNoop(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI) const; bool isPredicated(const MachineInstr *MI) const; -#if 0 - bool isUnpredicatedTerminator(const MachineInstr *MI) const; - bool PredicateInstruction(MachineInstr *MI, - const SmallVectorImpl<MachineOperand> &Pred) const; -#endif bool SubsumesPredicate(const SmallVectorImpl<MachineOperand> &Pred1, const SmallVectorImpl<MachineOperand> &Pred2) const; bool DefinesPredicate(MachineInstr *MI, std::vector<MachineOperand> &Pred) const; bool isPredicable(MachineInstr *MI) const; bool isSafeToMoveRegClassDefs(const TargetRegisterClass *RC) const; - unsigned GetInstSizeInBytes(const MachineInstr *MI) const; -#if 0 - unsigned GetFunctionSizeInBytes(const MachineFunction &MF) const; - unsigned getInlineAsmLength(const char *Str, - const MCAsmInfo &MAI) const; -#endif - }; + + // Helper functions that check the opcode for status information + bool isLoadInst(llvm::MachineInstr *MI) const; + bool isExtLoadInst(llvm::MachineInstr *MI) const; + bool isSWSExtLoadInst(llvm::MachineInstr *MI) const; + bool isSExtLoadInst(llvm::MachineInstr *MI) const; + bool isZExtLoadInst(llvm::MachineInstr *MI) const; + bool isAExtLoadInst(llvm::MachineInstr *MI) const; + bool isStoreInst(llvm::MachineInstr *MI) const; + bool isTruncStoreInst(llvm::MachineInstr *MI) const; + bool isAtomicInst(llvm::MachineInstr *MI) const; + bool isVolatileInst(llvm::MachineInstr *MI) const; + bool isGlobalInst(llvm::MachineInstr *MI) const; + bool isPrivateInst(llvm::MachineInstr *MI) const; + bool isConstantInst(llvm::MachineInstr *MI) const; + bool isRegionInst(llvm::MachineInstr *MI) const; + bool isLocalInst(llvm::MachineInstr *MI) const; + bool isImageInst(llvm::MachineInstr *MI) const; + bool isAppendInst(llvm::MachineInstr *MI) const; + bool isRegionAtomic(llvm::MachineInstr *MI) const; + bool isLocalAtomic(llvm::MachineInstr *MI) const; + bool isGlobalAtomic(llvm::MachineInstr *MI) const; + bool isArenaAtomic(llvm::MachineInstr *MI) const; +}; } diff --git a/src/gallium/drivers/radeon/AMDILInstructions.td b/src/gallium/drivers/radeon/AMDILInstructions.td index f824a67d7ad..db56e2121b3 100644 --- a/src/gallium/drivers/radeon/AMDILInstructions.td +++ b/src/gallium/drivers/radeon/AMDILInstructions.td @@ -1,4 +1,4 @@ -//===-- AMDILInstructions.td - TODO: Add brief description -------===// +//===-- AMDILInstructions.td - AMDIL Instruction definitions --------------===// // // The LLVM Compiler Infrastructure // diff --git a/src/gallium/drivers/radeon/AMDILMCCodeEmitter.cpp b/src/gallium/drivers/radeon/AMDILMCCodeEmitter.cpp deleted file mode 100644 index 9366f2e7bcb..00000000000 --- a/src/gallium/drivers/radeon/AMDILMCCodeEmitter.cpp +++ /dev/null @@ -1,158 +0,0 @@ -//===---- AMDILMCCodeEmitter.cpp - Convert AMDIL text to AMDIL binary ----===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//==-----------------------------------------------------------------------===// -// -//===---------------------------------------------------------------------===// - -#define DEBUG_TYPE "amdil-emitter" -#include "AMDIL.h" -#include "AMDILInstrInfo.h" -#include "llvm/ADT/SmallVector.h" -#include "llvm/MC/MCCodeEmitter.h" -#include "llvm/MC/MCExpr.h" -#include "llvm/MC/MCInst.h" -#include "llvm/Support/raw_ostream.h" - -using namespace llvm; -#if 0 -namespace { - class AMDILMCCodeEmitter : public MCCodeEmitter { - AMDILMCCodeEmitter(const AMDILMCCodeEmitter &);// DO NOT IMPLEMENT - void operator=(const AMDILMCCodeEmitter &); // DO NOT IMPLEMENT - const TargetMachine &TM; - const TargetInstrInfo &TII; - MCContext &Ctx; - bool Is64BitMode; - public: - AMDILMCCodeEmitter(TargetMachine &tm, MCContext &ctx, bool is64Bit); - ~AMDILMCCodeEmitter(); - unsigned getNumFixupKinds() const; - const MCFixupKindInfo& getFixupKindInfo(MCFixupKind Kind) const; - static unsigned GetAMDILRegNum(const MCOperand &MO); - void EmitByte(unsigned char C, unsigned &CurByte, raw_ostream &OS) const; - void EmitConstant(uint64_t Val, unsigned Size, unsigned &CurByte, - raw_ostream &OS) const; - void EmitImmediate(const MCOperand &Disp, unsigned ImmSize, - MCFixupKind FixupKind, unsigned &CurByte, raw_ostream &os, - SmallVectorImpl<MCFixup> &Fixups, int ImmOffset = 0) const; - - void EncodeInstruction(const MCInst &MI, raw_ostream &OS, - SmallVectorImpl<MCFixup> &Fixups) const; - - }; // class AMDILMCCodeEmitter -}; // anonymous namespace - -namespace llvm { - MCCodeEmitter *createAMDILMCCodeEmitter(const Target &, - TargetMachine &TM, MCContext &Ctx) - { - return new AMDILMCCodeEmitter(TM, Ctx, false); - } -} - -AMDILMCCodeEmitter::AMDILMCCodeEmitter(TargetMachine &tm, MCContext &ctx - , bool is64Bit) -: TM(tm), TII(*TM.getInstrInfo()), Ctx(ctx) -{ - Is64BitMode = is64Bit; -} - -AMDILMCCodeEmitter::~AMDILMCCodeEmitter() -{ -} - -unsigned -AMDILMCCodeEmitter::getNumFixupKinds() const -{ - return 0; -} - -const MCFixupKindInfo & -AMDILMCCodeEmitter::getFixupKindInfo(MCFixupKind Kind) const -{ -// const static MCFixupKindInfo Infos[] = {}; - if (Kind < FirstTargetFixupKind) { - return MCCodeEmitter::getFixupKindInfo(Kind); - } - assert(unsigned(Kind - FirstTargetFixupKind) < getNumFixupKinds() && - "Invalid kind!"); - return MCCodeEmitter::getFixupKindInfo(Kind); - // return Infos[Kind - FirstTargetFixupKind]; - -} - -void -AMDILMCCodeEmitter::EmitByte(unsigned char C, unsigned &CurByte, - raw_ostream &OS) const -{ - OS << (char) C; - ++CurByte; -} -void -AMDILMCCodeEmitter::EmitConstant(uint64_t Val, unsigned Size, unsigned &CurByte, - raw_ostream &OS) const -{ - // Output the constant in little endian byte order - for (unsigned i = 0; i != Size; ++i) { - EmitByte(Val & 255, CurByte, OS); - Val >>= 8; - } -} -void -AMDILMCCodeEmitter::EmitImmediate(const MCOperand &DispOp, unsigned ImmSize, - MCFixupKind FixupKind, unsigned &CurByte, raw_ostream &OS, - SmallVectorImpl<MCFixup> &Fixups, int ImmOffset) const -{ - // If this is a simple integer displacement that doesn't require a relocation - // emit it now. - if (DispOp.isImm()) { - EmitConstant(DispOp.getImm() + ImmOffset, ImmSize, CurByte, OS); - } - - // If we have an immoffset, add it to the expression - const MCExpr *Expr = DispOp.getExpr(); - - if (ImmOffset) { - Expr = MCBinaryExpr::CreateAdd(Expr, - MCConstantExpr::Create(ImmOffset, Ctx), Ctx); - } - // Emit a symbolic constant as a fixup and 4 zeros. - Fixups.push_back(MCFixup::Create(CurByte, Expr, FixupKind)); - // TODO: Why the 4 zeros? - EmitConstant(0, ImmSize, CurByte, OS); -} - -void -AMDILMCCodeEmitter::EncodeInstruction(const MCInst &MI, raw_ostream &OS, - SmallVectorImpl<MCFixup> &Fixups) const -{ -#if 0 - unsigned Opcode = MI.getOpcode(); - const TargetInstrDesc &Desc = TII.get(Opcode); - unsigned TSFlags = Desc.TSFlags; - - // Keep track of the current byte being emitted. - unsigned CurByte = 0; - - unsigned NumOps = Desc.getNumOperands(); - unsigned CurOp = 0; - - unsigned char BaseOpcode = 0; -#ifndef NDEBUG - // FIXME: Verify. - if (// !Desc.isVariadic() && - CurOp != NumOps) { - errs() << "Cannot encode all operands of: "; - MI.dump(); - errs() << '\n'; - abort(); - } -#endif -#endif -} -#endif diff --git a/src/gallium/drivers/radeon/AMDILMachinePeephole.cpp b/src/gallium/drivers/radeon/AMDILMachinePeephole.cpp index b8e536361f0..5cb988785e2 100644 --- a/src/gallium/drivers/radeon/AMDILMachinePeephole.cpp +++ b/src/gallium/drivers/radeon/AMDILMachinePeephole.cpp @@ -8,17 +8,11 @@ //==-----------------------------------------------------------------------===// -#define DEBUG_TYPE "machine_peephole" -#if !defined(NDEBUG) -#define DEBUGME (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) -#else -#define DEBUGME (false) -#endif - #include "AMDIL.h" +#include "AMDILInstrInfo.h" #include "AMDILSubtarget.h" -#include "AMDILUtilityFunctions.h" #include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/Support/Debug.h" #include "llvm/Target/TargetMachine.h" @@ -56,7 +50,7 @@ namespace llvm AMDILMachinePeephole::AMDILMachinePeephole(TargetMachine &tm AMDIL_OPT_LEVEL_DECL) : MachineFunctionPass(ID), TM(tm) { - mDebug = DEBUGME; + mDebug = false; } bool @@ -64,6 +58,8 @@ AMDILMachinePeephole::runOnMachineFunction(MachineFunction &MF) { bool Changed = false; const AMDILSubtarget *STM = &TM.getSubtarget<AMDILSubtarget>(); + const AMDILInstrInfo * AMDILII = + static_cast<const AMDILInstrInfo *>(TM.getInstrInfo()); for (MachineFunction::iterator MBB = MF.begin(), MBE = MF.end(); MBB != MBE; ++MBB) { MachineBasicBlock *mb = MBB; @@ -74,7 +70,7 @@ AMDILMachinePeephole::runOnMachineFunction(MachineFunction &MF) name = TM.getInstrInfo()->getName(mi->getOpcode()); switch (mi->getOpcode()) { default: - if (isAtomicInst(TM.getInstrInfo(), mi)) { + if (AMDILII->isAtomicInst(mi)) { // If we don't support the hardware accellerated address spaces, // then the atomic needs to be transformed to the global atomic. if (strstr(name, "_L_") @@ -94,7 +90,8 @@ AMDILMachinePeephole::runOnMachineFunction(MachineFunction &MF) TM.getInstrInfo()->get( (mi->getOpcode() - AMDIL::ATOM_R_ADD) + AMDIL::ATOM_G_ADD)); } - } else if ((isLoadInst(TM.getInstrInfo(), mi) || isStoreInst(TM.getInstrInfo(), mi)) && isVolatileInst(TM.getInstrInfo(), mi)) { + } else if ((AMDILII->isLoadInst(mi) || AMDILII->isStoreInst(mi)) + && AMDILII->isVolatileInst(mi)) { insertFence(MIB); } continue; diff --git a/src/gallium/drivers/radeon/AMDILMultiClass.td b/src/gallium/drivers/radeon/AMDILMultiClass.td index 92691db52fd..d6828178ba7 100644 --- a/src/gallium/drivers/radeon/AMDILMultiClass.td +++ b/src/gallium/drivers/radeon/AMDILMultiClass.td @@ -1,4 +1,4 @@ -//===-- AMDILMultiClass.td - TODO: Add brief description -------===// +//===-- AMDILMultiClass.td - AMDIL Multiclass defs ---*- tablegen -*-------===// // // The LLVM Compiler Infrastructure // diff --git a/src/gallium/drivers/radeon/AMDILNIDevice.cpp b/src/gallium/drivers/radeon/AMDILNIDevice.cpp index 8fda1c18ae5..d4112cda0b5 100644 --- a/src/gallium/drivers/radeon/AMDILNIDevice.cpp +++ b/src/gallium/drivers/radeon/AMDILNIDevice.cpp @@ -1,4 +1,4 @@ -//===-- AMDILNIDevice.cpp - TODO: Add brief description -------===// +//===-- AMDILNIDevice.cpp - Device Info for Northern Islands devices ------===// // // The LLVM Compiler Infrastructure // diff --git a/src/gallium/drivers/radeon/AMDILPeepholeOptimizer.cpp b/src/gallium/drivers/radeon/AMDILPeepholeOptimizer.cpp index 5fe9f53c8c8..b62c7ab048b 100644 --- a/src/gallium/drivers/radeon/AMDILPeepholeOptimizer.cpp +++ b/src/gallium/drivers/radeon/AMDILPeepholeOptimizer.cpp @@ -1,4 +1,4 @@ -//===-- AMDILPeepholeOptimizer.cpp - TODO: Add brief description -------===// +//===-- AMDILPeepholeOptimizer.cpp - AMDIL Peephole optimizations ---------===// // // The LLVM Compiler Infrastructure // @@ -7,20 +7,14 @@ // //==-----------------------------------------------------------------------===// -#define DEBUG_TYPE "PeepholeOpt" -#ifdef DEBUG -#define DEBUGME (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) -#else -#define DEBUGME 0 -#endif - #include "AMDILAlgorithms.tpp" #include "AMDILDevices.h" -#include "AMDILUtilityFunctions.h" +#include "AMDILInstrInfo.h" #include "llvm/ADT/Statistic.h" #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/Twine.h" +#include "llvm/Constants.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFunctionAnalysis.h" #include "llvm/Function.h" @@ -41,6 +35,9 @@ using namespace llvm; // The Peephole optimization pass is used to do simple last minute optimizations // that are required for correct code or to remove redundant functions namespace { + +class OpaqueType; + class LLVM_LIBRARY_VISIBILITY AMDILPeepholeOpt : public FunctionPass { public: TargetMachine &TM; @@ -114,6 +111,19 @@ private: // samplers at compile time. bool propagateSamplerInst(CallInst *CI); + // Helper functions + + // Group of functions that recursively calculate the size of a structure based + // on it's sub-types. + size_t getTypeSize(Type * const T, bool dereferencePtr = false); + size_t getTypeSize(StructType * const ST, bool dereferencePtr = false); + size_t getTypeSize(IntegerType * const IT, bool dereferencePtr = false); + size_t getTypeSize(FunctionType * const FT,bool dereferencePtr = false); + size_t getTypeSize(ArrayType * const AT, bool dereferencePtr = false); + size_t getTypeSize(VectorType * const VT, bool dereferencePtr = false); + size_t getTypeSize(PointerType * const PT, bool dereferencePtr = false); + size_t getTypeSize(OpaqueType * const OT, bool dereferencePtr = false); + LLVMContext *mCTX; Function *mF; const AMDILSubtarget *mSTM; @@ -134,7 +144,7 @@ namespace llvm { AMDILPeepholeOpt::AMDILPeepholeOpt(TargetMachine &tm AMDIL_OPT_LEVEL_DECL) : FunctionPass(ID), TM(tm) { - mDebug = DEBUGME; + mDebug = false; optLevel = TM.getOptLevel(); } @@ -1136,3 +1146,106 @@ AMDILPeepholeOpt::getAnalysisUsage(AnalysisUsage &AU) const FunctionPass::getAnalysisUsage(AU); AU.setPreservesAll(); } + +size_t AMDILPeepholeOpt::getTypeSize(Type * const T, bool dereferencePtr) { + size_t size = 0; + if (!T) { + return size; + } + switch (T->getTypeID()) { + case Type::X86_FP80TyID: + case Type::FP128TyID: + case Type::PPC_FP128TyID: + case Type::LabelTyID: + assert(0 && "These types are not supported by this backend"); + default: + case Type::FloatTyID: + case Type::DoubleTyID: + size = T->getPrimitiveSizeInBits() >> 3; + break; + case Type::PointerTyID: + size = getTypeSize(dyn_cast<PointerType>(T), dereferencePtr); + break; + case Type::IntegerTyID: + size = getTypeSize(dyn_cast<IntegerType>(T), dereferencePtr); + break; + case Type::StructTyID: + size = getTypeSize(dyn_cast<StructType>(T), dereferencePtr); + break; + case Type::ArrayTyID: + size = getTypeSize(dyn_cast<ArrayType>(T), dereferencePtr); + break; + case Type::FunctionTyID: + size = getTypeSize(dyn_cast<FunctionType>(T), dereferencePtr); + break; + case Type::VectorTyID: + size = getTypeSize(dyn_cast<VectorType>(T), dereferencePtr); + break; + }; + return size; +} + +size_t AMDILPeepholeOpt::getTypeSize(StructType * const ST, + bool dereferencePtr) { + size_t size = 0; + if (!ST) { + return size; + } + Type *curType; + StructType::element_iterator eib; + StructType::element_iterator eie; + for (eib = ST->element_begin(), eie = ST->element_end(); eib != eie; ++eib) { + curType = *eib; + size += getTypeSize(curType, dereferencePtr); + } + return size; +} + +size_t AMDILPeepholeOpt::getTypeSize(IntegerType * const IT, + bool dereferencePtr) { + return IT ? (IT->getBitWidth() >> 3) : 0; +} + +size_t AMDILPeepholeOpt::getTypeSize(FunctionType * const FT, + bool dereferencePtr) { + assert(0 && "Should not be able to calculate the size of an function type"); + return 0; +} + +size_t AMDILPeepholeOpt::getTypeSize(ArrayType * const AT, + bool dereferencePtr) { + return (size_t)(AT ? (getTypeSize(AT->getElementType(), + dereferencePtr) * AT->getNumElements()) + : 0); +} + +size_t AMDILPeepholeOpt::getTypeSize(VectorType * const VT, + bool dereferencePtr) { + return VT ? (VT->getBitWidth() >> 3) : 0; +} + +size_t AMDILPeepholeOpt::getTypeSize(PointerType * const PT, + bool dereferencePtr) { + if (!PT) { + return 0; + } + Type *CT = PT->getElementType(); + if (CT->getTypeID() == Type::StructTyID && + PT->getAddressSpace() == AMDILAS::PRIVATE_ADDRESS) { + return getTypeSize(dyn_cast<StructType>(CT)); + } else if (dereferencePtr) { + size_t size = 0; + for (size_t x = 0, y = PT->getNumContainedTypes(); x < y; ++x) { + size += getTypeSize(PT->getContainedType(x), dereferencePtr); + } + return size; + } else { + return 4; + } +} + +size_t AMDILPeepholeOpt::getTypeSize(OpaqueType * const OT, + bool dereferencePtr) { + //assert(0 && "Should not be able to calculate the size of an opaque type"); + return 4; +} diff --git a/src/gallium/drivers/radeon/AMDILRegisterInfo.cpp b/src/gallium/drivers/radeon/AMDILRegisterInfo.cpp index 5588233378c..d7c1dc74b8b 100644 --- a/src/gallium/drivers/radeon/AMDILRegisterInfo.cpp +++ b/src/gallium/drivers/radeon/AMDILRegisterInfo.cpp @@ -20,7 +20,8 @@ #include "AMDILRegisterInfo.h" #include "AMDIL.h" -#include "AMDILUtilityFunctions.h" +#include "AMDILInstrInfo.h" +#include "AMDILTargetMachine.h" #include "llvm/ADT/BitVector.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" @@ -109,7 +110,9 @@ AMDILRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, if (!MI.getOperand(x).isFI()) { continue; } - bool def = isStoreInst(TM.getInstrInfo(), &MI); + const AMDILInstrInfo * AMDILII = + static_cast<const AMDILInstrInfo *>(TM.getInstrInfo()); + bool def = AMDILII->isStoreInst(&MI); int FrameIndex = MI.getOperand(x).getIndex(); int64_t Offset = MFI->getObjectOffset(FrameIndex); //int64_t Size = MF.getFrameInfo()->getObjectSize(FrameIndex); diff --git a/src/gallium/drivers/radeon/AMDILSIDevice.cpp b/src/gallium/drivers/radeon/AMDILSIDevice.cpp index ce560984ef9..ae402a5d1f7 100644 --- a/src/gallium/drivers/radeon/AMDILSIDevice.cpp +++ b/src/gallium/drivers/radeon/AMDILSIDevice.cpp @@ -1,49 +1,49 @@ -//===-- AMDILSIDevice.cpp - TODO: Add brief description -------===// +//===-- AMDILSIDevice.cpp - Device Info for Southern Islands GPUs ---------===// // // The LLVM Compiler Infrastructure // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // -//==-----------------------------------------------------------------------===//
-#include "AMDILSIDevice.h"
-#include "AMDILEvergreenDevice.h"
-#include "AMDILNIDevice.h"
-#include "AMDILSubtarget.h"
+//==-----------------------------------------------------------------------===// +#include "AMDILSIDevice.h" +#include "AMDILEvergreenDevice.h" +#include "AMDILNIDevice.h" +#include "AMDILSubtarget.h" -using namespace llvm;
-
-AMDILSIDevice::AMDILSIDevice(AMDILSubtarget *ST)
- : AMDILEvergreenDevice(ST)
-{
-}
-AMDILSIDevice::~AMDILSIDevice()
-{
-}
-
-size_t
-AMDILSIDevice::getMaxLDSSize() const
-{
- if (usesHardware(AMDILDeviceInfo::LocalMem)) {
- return MAX_LDS_SIZE_900;
- } else {
- return 0;
- }
-}
-
-uint32_t
-AMDILSIDevice::getGeneration() const
-{
- return AMDILDeviceInfo::HD7XXX;
-}
-
-std::string
-AMDILSIDevice::getDataLayout() const
-{
- return std::string("e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16"
- "-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f80:32:32"
- "-v16:16:16-v24:32:32-v32:32:32-v48:64:64-v64:64:64"
- "-v96:128:128-v128:128:128-v192:256:256-v256:256:256"
- "-v512:512:512-v1024:1024:1024-v2048:2048:2048"
- "-n8:16:32:64");
-}
+using namespace llvm; + +AMDILSIDevice::AMDILSIDevice(AMDILSubtarget *ST) + : AMDILEvergreenDevice(ST) +{ +} +AMDILSIDevice::~AMDILSIDevice() +{ +} + +size_t +AMDILSIDevice::getMaxLDSSize() const +{ + if (usesHardware(AMDILDeviceInfo::LocalMem)) { + return MAX_LDS_SIZE_900; + } else { + return 0; + } +} + +uint32_t +AMDILSIDevice::getGeneration() const +{ + return AMDILDeviceInfo::HD7XXX; +} + +std::string +AMDILSIDevice::getDataLayout() const +{ + return std::string("e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16" + "-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f80:32:32" + "-v16:16:16-v24:32:32-v32:32:32-v48:64:64-v64:64:64" + "-v96:128:128-v128:128:128-v192:256:256-v256:256:256" + "-v512:512:512-v1024:1024:1024-v2048:2048:2048" + "-n8:16:32:64"); +} diff --git a/src/gallium/drivers/radeon/AMDILSIDevice.h b/src/gallium/drivers/radeon/AMDILSIDevice.h index 69f35a0588d..b272af7cfcf 100644 --- a/src/gallium/drivers/radeon/AMDILSIDevice.h +++ b/src/gallium/drivers/radeon/AMDILSIDevice.h @@ -1,45 +1,45 @@ -//===------- AMDILSIDevice.h - Define SI Device for AMDIL -*- C++ -*------===//
+//===------- AMDILSIDevice.h - Define SI Device for AMDIL -*- C++ -*------===// // // The LLVM Compiler Infrastructure // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // -//==-----------------------------------------------------------------------===//
-//
-// Interface for the subtarget data classes.
-//
-//===---------------------------------------------------------------------===//
-// This file will define the interface that each generation needs to
-// implement in order to correctly answer queries on the capabilities of the
+//==-----------------------------------------------------------------------===// +// +// Interface for the subtarget data classes. +// +//===---------------------------------------------------------------------===// +// This file will define the interface that each generation needs to +// implement in order to correctly answer queries on the capabilities of the // specific hardware.
-//===---------------------------------------------------------------------===//
-#ifndef _AMDILSIDEVICE_H_
-#define _AMDILSIDEVICE_H_
-#include "AMDILEvergreenDevice.h"
-#include "AMDILSubtarget.h"
+//===---------------------------------------------------------------------===// +#ifndef _AMDILSIDEVICE_H_ +#define _AMDILSIDEVICE_H_ +#include "AMDILEvergreenDevice.h" +#include "AMDILSubtarget.h" + +namespace llvm { + class AMDILSubtarget; +//===---------------------------------------------------------------------===// +// SI generation of devices and their respective sub classes +//===---------------------------------------------------------------------===// + +// The AMDILSIDevice is the base class for all Northern Island series of +// cards. It is very similiar to the AMDILEvergreenDevice, with the major +// exception being differences in wavefront size and hardware capabilities. The +// SI devices are all 64 wide wavefronts and also add support for signed 24 bit +// integer operations + + class AMDILSIDevice : public AMDILEvergreenDevice { + public: + AMDILSIDevice(AMDILSubtarget*); + virtual ~AMDILSIDevice(); + virtual size_t getMaxLDSSize() const; + virtual uint32_t getGeneration() const; + virtual std::string getDataLayout() const; + protected: + }; // AMDILSIDevice -namespace llvm {
- class AMDILSubtarget;
-//===---------------------------------------------------------------------===//
-// SI generation of devices and their respective sub classes
-//===---------------------------------------------------------------------===//
-
-// The AMDILSIDevice is the base class for all Northern Island series of
-// cards. It is very similiar to the AMDILEvergreenDevice, with the major
-// exception being differences in wavefront size and hardware capabilities. The
-// SI devices are all 64 wide wavefronts and also add support for signed 24 bit
-// integer operations
-
- class AMDILSIDevice : public AMDILEvergreenDevice {
- public:
- AMDILSIDevice(AMDILSubtarget*);
- virtual ~AMDILSIDevice();
- virtual size_t getMaxLDSSize() const;
- virtual uint32_t getGeneration() const;
- virtual std::string getDataLayout() const;
- protected:
- }; // AMDILSIDevice
-
-} // namespace llvm
-#endif // _AMDILSIDEVICE_H_
+} // namespace llvm +#endif // _AMDILSIDEVICE_H_ diff --git a/src/gallium/drivers/radeon/AMDILSubtarget.cpp b/src/gallium/drivers/radeon/AMDILSubtarget.cpp index 11b6bbe0c01..249cb03f4a3 100644 --- a/src/gallium/drivers/radeon/AMDILSubtarget.cpp +++ b/src/gallium/drivers/radeon/AMDILSubtarget.cpp @@ -27,7 +27,8 @@ using namespace llvm; #define GET_SUBTARGETINFO_TARGET_DESC #include "AMDILGenSubtargetInfo.inc" -AMDILSubtarget::AMDILSubtarget(llvm::StringRef TT, llvm::StringRef CPU, llvm::StringRef FS) : AMDILGenSubtargetInfo( TT, CPU, FS ) +AMDILSubtarget::AMDILSubtarget(llvm::StringRef TT, llvm::StringRef CPU, llvm::StringRef FS) : AMDILGenSubtargetInfo( TT, CPU, FS ), + mDumpCode(false) { memset(CapsOverride, 0, sizeof(*CapsOverride) * AMDILDeviceInfo::MaxNumberCapabilities); @@ -93,7 +94,7 @@ AMDILSubtarget::AMDILSubtarget(llvm::StringRef TT, llvm::StringRef CPU, llvm::St } #endif mDevName = GPU; - mDevice = getDeviceFromName(mDevName, this, mIs64bit); + mDevice = AMDILDeviceInfo::getDeviceFromName(mDevName, this, mIs64bit); } AMDILSubtarget::~AMDILSubtarget() { diff --git a/src/gallium/drivers/radeon/AMDILSubtarget.h b/src/gallium/drivers/radeon/AMDILSubtarget.h index a4b0e34ada7..38fcb859ac6 100644 --- a/src/gallium/drivers/radeon/AMDILSubtarget.h +++ b/src/gallium/drivers/radeon/AMDILSubtarget.h @@ -42,6 +42,7 @@ namespace llvm { uint32_t mVersion; bool mIs64bit; bool mIs32on64bit; + bool mDumpCode; public: AMDILSubtarget(llvm::StringRef TT, llvm::StringRef CPU, llvm::StringRef FS); virtual ~AMDILSubtarget(); @@ -67,6 +68,7 @@ namespace llvm { ParseSubtargetFeatures( llvm::StringRef CPU, llvm::StringRef FS); + bool dumpCode() const { return mDumpCode; } }; diff --git a/src/gallium/drivers/radeon/AMDILTargetMachine.cpp b/src/gallium/drivers/radeon/AMDILTargetMachine.cpp index 77fac1d97bd..0879d43ad72 100644 --- a/src/gallium/drivers/radeon/AMDILTargetMachine.cpp +++ b/src/gallium/drivers/radeon/AMDILTargetMachine.cpp @@ -150,8 +150,8 @@ bool AMDILPassConfig::addPreISel() bool AMDILPassConfig::addInstSelector() { - PM.add(createAMDILPeepholeOpt(*TM)); - PM.add(createAMDILISelDag(getAMDILTargetMachine())); + PM->add(createAMDILPeepholeOpt(*TM)); + PM->add(createAMDILISelDag(getAMDILTargetMachine())); return false; } @@ -162,7 +162,7 @@ bool AMDILPassConfig::addPreRegAlloc() llvm::RegisterScheduler::setDefault(&llvm::createSourceListDAGScheduler); } - PM.add(createAMDILMachinePeephole(*TM)); + PM->add(createAMDILMachinePeephole(*TM)); return false; } @@ -175,8 +175,8 @@ bool AMDILPassConfig::addPostRegAlloc() { /// true if -print-machineinstrs should print out the code after the passes. bool AMDILPassConfig::addPreEmitPass() { - PM.add(createAMDILCFGPreparationPass(*TM)); - PM.add(createAMDILCFGStructurizerPass(*TM)); + PM->add(createAMDILCFGPreparationPass(*TM)); + PM->add(createAMDILCFGStructurizerPass(*TM)); return true; } diff --git a/src/gallium/drivers/radeon/AMDILTokenDesc.td b/src/gallium/drivers/radeon/AMDILTokenDesc.td index b81f593506f..2dafb2cd559 100644 --- a/src/gallium/drivers/radeon/AMDILTokenDesc.td +++ b/src/gallium/drivers/radeon/AMDILTokenDesc.td @@ -1,4 +1,4 @@ -//===-- AMDILTokenDesc.td - TODO: Add brief description -------===// +//===-- AMDILTokenDesc.td - AMDIL Token Definitions --*- tablegen -*-----===// // // The LLVM Compiler Infrastructure // diff --git a/src/gallium/drivers/radeon/AMDILUtilityFunctions.cpp b/src/gallium/drivers/radeon/AMDILUtilityFunctions.cpp deleted file mode 100644 index f2ef4eb7771..00000000000 --- a/src/gallium/drivers/radeon/AMDILUtilityFunctions.cpp +++ /dev/null @@ -1,683 +0,0 @@ -//===-- AMDILUtilityFunctions.cpp - AMDIL Utility Functions ---------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//==-----------------------------------------------------------------------===// -// -// This file provides the implementations of functions that are declared in the -// AMDILUtilityFUnctions.h file. -// -//===----------------------------------------------------------------------===// -#include "AMDILUtilityFunctions.h" -#include "AMDILISelLowering.h" -#include "llvm/ADT/ValueMap.h" -#include "llvm/CodeGen/MachineInstr.h" -#include "llvm/Constants.h" -#include "llvm/DerivedTypes.h" -#include "llvm/Instruction.h" -#include "llvm/Instructions.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/FormattedStream.h" -#include "llvm/Type.h" - -#include <cstdio> -#include <list> -#include <queue> - -#define GET_OPCODE_NAME(TII, MI) \ - TII->getName(MI->getOpcode()) - - -using namespace llvm; -int64_t GET_SCALAR_SIZE(llvm::Type *A) { - return A->getScalarSizeInBits(); -} - -const TargetRegisterClass * getRegClassFromID(unsigned int ID) { - switch (ID) { - default: - assert(0 && "Passed in ID does not match any register classes."); - return NULL; - case AMDIL::GPRI8RegClassID: - return &AMDIL::GPRI8RegClass; - case AMDIL::GPRI16RegClassID: - return &AMDIL::GPRI16RegClass; - case AMDIL::GPRI32RegClassID: - return &AMDIL::GPRI32RegClass; - case AMDIL::GPRF32RegClassID: - return &AMDIL::GPRF32RegClass; - case AMDIL::GPRI64RegClassID: - return &AMDIL::GPRI64RegClass; - case AMDIL::GPRF64RegClassID: - return &AMDIL::GPRF64RegClass; - case AMDIL::GPRV4F32RegClassID: - return &AMDIL::GPRV4F32RegClass; - case AMDIL::GPRV4I8RegClassID: - return &AMDIL::GPRV4I8RegClass; - case AMDIL::GPRV4I16RegClassID: - return &AMDIL::GPRV4I16RegClass; - case AMDIL::GPRV4I32RegClassID: - return &AMDIL::GPRV4I32RegClass; - case AMDIL::GPRV2F32RegClassID: - return &AMDIL::GPRV2F32RegClass; - case AMDIL::GPRV2I8RegClassID: - return &AMDIL::GPRV2I8RegClass; - case AMDIL::GPRV2I16RegClassID: - return &AMDIL::GPRV2I16RegClass; - case AMDIL::GPRV2I32RegClassID: - return &AMDIL::GPRV2I32RegClass; - case AMDIL::GPRV2F64RegClassID: - return &AMDIL::GPRV2F64RegClass; - case AMDIL::GPRV2I64RegClassID: - return &AMDIL::GPRV2I64RegClass; - }; -} - -unsigned int getMoveInstFromID(unsigned int ID) { - switch (ID) { - default: - assert(0 && "Passed in ID does not match any move instructions."); - case AMDIL::GPRI8RegClassID: - return AMDIL::MOVE_i8; - case AMDIL::GPRI16RegClassID: - return AMDIL::MOVE_i16; - case AMDIL::GPRI32RegClassID: - return AMDIL::MOVE_i32; - case AMDIL::GPRF32RegClassID: - return AMDIL::MOVE_f32; - case AMDIL::GPRI64RegClassID: - return AMDIL::MOVE_i64; - case AMDIL::GPRF64RegClassID: - return AMDIL::MOVE_f64; - case AMDIL::GPRV4F32RegClassID: - return AMDIL::MOVE_v4f32; - case AMDIL::GPRV4I8RegClassID: - return AMDIL::MOVE_v4i8; - case AMDIL::GPRV4I16RegClassID: - return AMDIL::MOVE_v4i16; - case AMDIL::GPRV4I32RegClassID: - return AMDIL::MOVE_v4i32; - case AMDIL::GPRV2F32RegClassID: - return AMDIL::MOVE_v2f32; - case AMDIL::GPRV2I8RegClassID: - return AMDIL::MOVE_v2i8; - case AMDIL::GPRV2I16RegClassID: - return AMDIL::MOVE_v2i16; - case AMDIL::GPRV2I32RegClassID: - return AMDIL::MOVE_v2i32; - case AMDIL::GPRV2F64RegClassID: - return AMDIL::MOVE_v2f64; - case AMDIL::GPRV2I64RegClassID: - return AMDIL::MOVE_v2i64; - }; - return -1; -} - -unsigned int getPHIMoveInstFromID(unsigned int ID) { - switch (ID) { - default: - assert(0 && "Passed in ID does not match any move instructions."); - case AMDIL::GPRI8RegClassID: - return AMDIL::PHIMOVE_i8; - case AMDIL::GPRI16RegClassID: - return AMDIL::PHIMOVE_i16; - case AMDIL::GPRI32RegClassID: - return AMDIL::PHIMOVE_i32; - case AMDIL::GPRF32RegClassID: - return AMDIL::PHIMOVE_f32; - case AMDIL::GPRI64RegClassID: - return AMDIL::PHIMOVE_i64; - case AMDIL::GPRF64RegClassID: - return AMDIL::PHIMOVE_f64; - case AMDIL::GPRV4F32RegClassID: - return AMDIL::PHIMOVE_v4f32; - case AMDIL::GPRV4I8RegClassID: - return AMDIL::PHIMOVE_v4i8; - case AMDIL::GPRV4I16RegClassID: - return AMDIL::PHIMOVE_v4i16; - case AMDIL::GPRV4I32RegClassID: - return AMDIL::PHIMOVE_v4i32; - case AMDIL::GPRV2F32RegClassID: - return AMDIL::PHIMOVE_v2f32; - case AMDIL::GPRV2I8RegClassID: - return AMDIL::PHIMOVE_v2i8; - case AMDIL::GPRV2I16RegClassID: - return AMDIL::PHIMOVE_v2i16; - case AMDIL::GPRV2I32RegClassID: - return AMDIL::PHIMOVE_v2i32; - case AMDIL::GPRV2F64RegClassID: - return AMDIL::PHIMOVE_v2f64; - case AMDIL::GPRV2I64RegClassID: - return AMDIL::PHIMOVE_v2i64; - }; - return -1; -} - -const TargetRegisterClass* getRegClassFromType(unsigned int type) { - switch (type) { - default: - assert(0 && "Passed in type does not match any register classes."); - case MVT::i8: - return &AMDIL::GPRI8RegClass; - case MVT::i16: - return &AMDIL::GPRI16RegClass; - case MVT::i32: - return &AMDIL::GPRI32RegClass; - case MVT::f32: - return &AMDIL::GPRF32RegClass; - case MVT::i64: - return &AMDIL::GPRI64RegClass; - case MVT::f64: - return &AMDIL::GPRF64RegClass; - case MVT::v4f32: - return &AMDIL::GPRV4F32RegClass; - case MVT::v4i8: - return &AMDIL::GPRV4I8RegClass; - case MVT::v4i16: - return &AMDIL::GPRV4I16RegClass; - case MVT::v4i32: - return &AMDIL::GPRV4I32RegClass; - case MVT::v2f32: - return &AMDIL::GPRV2F32RegClass; - case MVT::v2i8: - return &AMDIL::GPRV2I8RegClass; - case MVT::v2i16: - return &AMDIL::GPRV2I16RegClass; - case MVT::v2i32: - return &AMDIL::GPRV2I32RegClass; - case MVT::v2f64: - return &AMDIL::GPRV2F64RegClass; - case MVT::v2i64: - return &AMDIL::GPRV2I64RegClass; - } -} - -void printSDNode(const SDNode *N) { - printf("Opcode: %d isTargetOpcode: %d isMachineOpcode: %d\n", - N->getOpcode(), N->isTargetOpcode(), N->isMachineOpcode()); - printf("Empty: %d OneUse: %d Size: %d NodeID: %d\n", - N->use_empty(), N->hasOneUse(), (int)N->use_size(), N->getNodeId()); - for (unsigned int i = 0; i < N->getNumOperands(); ++i) { - printf("OperandNum: %d ValueCount: %d ValueType: %d\n", - i, N->getNumValues(), N->getValueType(0) .getSimpleVT().SimpleTy); - printSDValue(N->getOperand(i), 0); - } -} - -void printSDValue(const SDValue &Op, int level) { - printf("\nOp: %p OpCode: %d NumOperands: %d ", (void*)&Op, Op.getOpcode(), - Op.getNumOperands()); - printf("IsTarget: %d IsMachine: %d ", Op.isTargetOpcode(), - Op.isMachineOpcode()); - if (Op.isMachineOpcode()) { - printf("MachineOpcode: %d\n", Op.getMachineOpcode()); - } else { - printf("\n"); - } - EVT vt = Op.getValueType(); - printf("ValueType: %d \n", vt.getSimpleVT().SimpleTy); - printf("UseEmpty: %d OneUse: %d\n", Op.use_empty(), Op.hasOneUse()); - if (level) { - printf("Children for %d:\n", level); - for (unsigned int i = 0; i < Op.getNumOperands(); ++i) { - printf("Child %d->%d:", level, i); - printSDValue(Op.getOperand(i), level - 1); - } - } -} - -bool isPHIMove(unsigned int opcode) { - switch (opcode) { - default: - return false; - ExpandCaseToAllTypes(AMDIL::PHIMOVE); - return true; - } - return false; -} - -bool isMove(unsigned int opcode) { - switch (opcode) { - default: - return false; - ExpandCaseToAllTypes(AMDIL::MOVE); - return true; - } - return false; -} - -bool isMoveOrEquivalent(unsigned int opcode) { - switch (opcode) { - default: - return isMove(opcode) || isPHIMove(opcode); - ExpandCaseToAllScalarTypes(AMDIL::IL_ASCHAR); - ExpandCaseToAllScalarTypes(AMDIL::IL_ASSHORT); - ExpandCaseToAllScalarTypes(AMDIL::IL_ASINT); - ExpandCaseToAllScalarTypes(AMDIL::IL_ASLONG); - ExpandCaseToAllScalarTypes(AMDIL::IL_ASDOUBLE); - ExpandCaseToAllScalarTypes(AMDIL::IL_ASFLOAT); - ExpandCaseToAllScalarTypes(AMDIL::IL_ASV2CHAR); - ExpandCaseToAllScalarTypes(AMDIL::IL_ASV2SHORT); - ExpandCaseToAllScalarTypes(AMDIL::IL_ASV2INT); - ExpandCaseToAllScalarTypes(AMDIL::IL_ASV2FLOAT); - ExpandCaseToAllScalarTypes(AMDIL::IL_ASV2LONG); - ExpandCaseToAllScalarTypes(AMDIL::IL_ASV2DOUBLE); - ExpandCaseToAllScalarTypes(AMDIL::IL_ASV4CHAR); - ExpandCaseToAllScalarTypes(AMDIL::IL_ASV4SHORT); - ExpandCaseToAllScalarTypes(AMDIL::IL_ASV4INT); - ExpandCaseToAllScalarTypes(AMDIL::IL_ASV4FLOAT); - case AMDIL::INTTOANY_i8: - case AMDIL::INTTOANY_i16: - case AMDIL::INTTOANY_i32: - case AMDIL::INTTOANY_f32: - case AMDIL::DLO: - case AMDIL::LLO: - case AMDIL::LLO_v2i64: - return true; - }; - return false; -} - -bool check_type(const Value *ptr, unsigned int addrspace) { - if (!ptr) { - return false; - } - Type *ptrType = ptr->getType(); - return dyn_cast<PointerType>(ptrType)->getAddressSpace() == addrspace; -} - -size_t getTypeSize(Type * const T, bool dereferencePtr) { - size_t size = 0; - if (!T) { - return size; - } - switch (T->getTypeID()) { - case Type::X86_FP80TyID: - case Type::FP128TyID: - case Type::PPC_FP128TyID: - case Type::LabelTyID: - assert(0 && "These types are not supported by this backend"); - default: - case Type::FloatTyID: - case Type::DoubleTyID: - size = T->getPrimitiveSizeInBits() >> 3; - break; - case Type::PointerTyID: - size = getTypeSize(dyn_cast<PointerType>(T), dereferencePtr); - break; - case Type::IntegerTyID: - size = getTypeSize(dyn_cast<IntegerType>(T), dereferencePtr); - break; - case Type::StructTyID: - size = getTypeSize(dyn_cast<StructType>(T), dereferencePtr); - break; - case Type::ArrayTyID: - size = getTypeSize(dyn_cast<ArrayType>(T), dereferencePtr); - break; - case Type::FunctionTyID: - size = getTypeSize(dyn_cast<FunctionType>(T), dereferencePtr); - break; - case Type::VectorTyID: - size = getTypeSize(dyn_cast<VectorType>(T), dereferencePtr); - break; - }; - return size; -} - -size_t getTypeSize(StructType * const ST, bool dereferencePtr) { - size_t size = 0; - if (!ST) { - return size; - } - Type *curType; - StructType::element_iterator eib; - StructType::element_iterator eie; - for (eib = ST->element_begin(), eie = ST->element_end(); eib != eie; ++eib) { - curType = *eib; - size += getTypeSize(curType, dereferencePtr); - } - return size; -} - -size_t getTypeSize(IntegerType * const IT, bool dereferencePtr) { - return IT ? (IT->getBitWidth() >> 3) : 0; -} - -size_t getTypeSize(FunctionType * const FT, bool dereferencePtr) { - assert(0 && "Should not be able to calculate the size of an function type"); - return 0; -} - -size_t getTypeSize(ArrayType * const AT, bool dereferencePtr) { - return (size_t)(AT ? (getTypeSize(AT->getElementType(), - dereferencePtr) * AT->getNumElements()) - : 0); -} - -size_t getTypeSize(VectorType * const VT, bool dereferencePtr) { - return VT ? (VT->getBitWidth() >> 3) : 0; -} - -size_t getTypeSize(PointerType * const PT, bool dereferencePtr) { - if (!PT) { - return 0; - } - Type *CT = PT->getElementType(); - if (CT->getTypeID() == Type::StructTyID && - PT->getAddressSpace() == AMDILAS::PRIVATE_ADDRESS) { - return getTypeSize(dyn_cast<StructType>(CT)); - } else if (dereferencePtr) { - size_t size = 0; - for (size_t x = 0, y = PT->getNumContainedTypes(); x < y; ++x) { - size += getTypeSize(PT->getContainedType(x), dereferencePtr); - } - return size; - } else { - return 4; - } -} - -size_t getTypeSize(OpaqueType * const OT, bool dereferencePtr) { - //assert(0 && "Should not be able to calculate the size of an opaque type"); - return 4; -} - -size_t getNumElements(Type * const T) { - size_t size = 0; - if (!T) { - return size; - } - switch (T->getTypeID()) { - case Type::X86_FP80TyID: - case Type::FP128TyID: - case Type::PPC_FP128TyID: - case Type::LabelTyID: - assert(0 && "These types are not supported by this backend"); - default: - case Type::FloatTyID: - case Type::DoubleTyID: - size = 1; - break; - case Type::PointerTyID: - size = getNumElements(dyn_cast<PointerType>(T)); - break; - case Type::IntegerTyID: - size = getNumElements(dyn_cast<IntegerType>(T)); - break; - case Type::StructTyID: - size = getNumElements(dyn_cast<StructType>(T)); - break; - case Type::ArrayTyID: - size = getNumElements(dyn_cast<ArrayType>(T)); - break; - case Type::FunctionTyID: - size = getNumElements(dyn_cast<FunctionType>(T)); - break; - case Type::VectorTyID: - size = getNumElements(dyn_cast<VectorType>(T)); - break; - }; - return size; -} - -size_t getNumElements(StructType * const ST) { - size_t size = 0; - if (!ST) { - return size; - } - Type *curType; - StructType::element_iterator eib; - StructType::element_iterator eie; - for (eib = ST->element_begin(), eie = ST->element_end(); - eib != eie; ++eib) { - curType = *eib; - size += getNumElements(curType); - } - return size; -} - -size_t getNumElements(IntegerType * const IT) { - return (!IT) ? 0 : 1; -} - -size_t getNumElements(FunctionType * const FT) { - assert(0 && "Should not be able to calculate the number of " - "elements of a function type"); - return 0; -} - -size_t getNumElements(ArrayType * const AT) { - return (!AT) ? 0 - : (size_t)(getNumElements(AT->getElementType()) * - AT->getNumElements()); -} - -size_t getNumElements(VectorType * const VT) { - return (!VT) ? 0 - : VT->getNumElements() * getNumElements(VT->getElementType()); -} - -size_t getNumElements(PointerType * const PT) { - size_t size = 0; - if (!PT) { - return size; - } - for (size_t x = 0, y = PT->getNumContainedTypes(); x < y; ++x) { - size += getNumElements(PT->getContainedType(x)); - } - return size; -} - -const llvm::Value *getBasePointerValue(const llvm::Value *V) -{ - if (!V) { - return NULL; - } - const Value *ret = NULL; - ValueMap<const Value *, bool> ValueBitMap; - std::queue<const Value *, std::list<const Value *> > ValueQueue; - ValueQueue.push(V); - while (!ValueQueue.empty()) { - V = ValueQueue.front(); - if (ValueBitMap.find(V) == ValueBitMap.end()) { - ValueBitMap[V] = true; - if (dyn_cast<Argument>(V) && dyn_cast<PointerType>(V->getType())) { - ret = V; - break; - } else if (dyn_cast<GlobalVariable>(V)) { - ret = V; - break; - } else if (dyn_cast<Constant>(V)) { - const ConstantExpr *CE = dyn_cast<ConstantExpr>(V); - if (CE) { - ValueQueue.push(CE->getOperand(0)); - } - } else if (const AllocaInst *AI = dyn_cast<AllocaInst>(V)) { - ret = AI; - break; - } else if (const Instruction *I = dyn_cast<Instruction>(V)) { - uint32_t numOps = I->getNumOperands(); - for (uint32_t x = 0; x < numOps; ++x) { - ValueQueue.push(I->getOperand(x)); - } - } else { - // assert(0 && "Found a Value that we didn't know how to handle!"); - } - } - ValueQueue.pop(); - } - return ret; -} - -const llvm::Value *getBasePointerValue(const llvm::MachineInstr *MI) { - const Value *moVal = NULL; - if (!MI->memoperands_empty()) { - const MachineMemOperand *memOp = (*MI->memoperands_begin()); - moVal = memOp ? memOp->getValue() : NULL; - moVal = getBasePointerValue(moVal); - } - return moVal; -} - -bool commaPrint(int i, llvm::raw_ostream &O) { - O << ":" << i; - return false; -} - -bool isLoadInst(const llvm::TargetInstrInfo * TII, MachineInstr *MI) { - if (strstr(GET_OPCODE_NAME(TII, MI), "LOADCONST")) { - return false; - } - return strstr(GET_OPCODE_NAME(TII, MI), "LOAD"); -} - -bool isSWSExtLoadInst(MachineInstr *MI) -{ -switch (MI->getOpcode()) { - default: - break; - ExpandCaseToByteShortTypes(AMDIL::LOCALLOAD); - ExpandCaseToByteShortTypes(AMDIL::GLOBALLOAD); - ExpandCaseToByteShortTypes(AMDIL::REGIONLOAD); - ExpandCaseToByteShortTypes(AMDIL::PRIVATELOAD); - ExpandCaseToByteShortTypes(AMDIL::CPOOLLOAD); - ExpandCaseToByteShortTypes(AMDIL::CONSTANTLOAD); - return true; - }; - return false; -} - -bool isExtLoadInst(const llvm::TargetInstrInfo * TII, MachineInstr *MI) { - return strstr(GET_OPCODE_NAME(TII, MI), "EXTLOAD"); -} - -bool isSExtLoadInst(const llvm::TargetInstrInfo * TII, MachineInstr *MI) { - return strstr(GET_OPCODE_NAME(TII, MI), "SEXTLOAD"); -} - -bool isAExtLoadInst(const llvm::TargetInstrInfo * TII, MachineInstr *MI) { - return strstr(GET_OPCODE_NAME(TII, MI), "AEXTLOAD"); -} - -bool isZExtLoadInst(const llvm::TargetInstrInfo * TII, MachineInstr *MI) { - return strstr(GET_OPCODE_NAME(TII, MI), "ZEXTLOAD"); -} - -bool isStoreInst(const llvm::TargetInstrInfo * TII, MachineInstr *MI) { - return strstr(GET_OPCODE_NAME(TII, MI), "STORE"); -} - -bool isTruncStoreInst(const llvm::TargetInstrInfo * TII, MachineInstr *MI) { - return strstr(GET_OPCODE_NAME(TII, MI), "TRUNCSTORE"); -} - -bool isAtomicInst(const llvm::TargetInstrInfo * TII, MachineInstr *MI) { - return strstr(GET_OPCODE_NAME(TII, MI), "ATOM"); -} - -bool isVolatileInst(const llvm::TargetInstrInfo * TII, MachineInstr *MI) { - if (!MI->memoperands_empty()) { - for (MachineInstr::mmo_iterator mob = MI->memoperands_begin(), - moe = MI->memoperands_end(); mob != moe; ++mob) { - // If there is a volatile mem operand, this is a volatile instruction. - if ((*mob)->isVolatile()) { - return true; - } - } - } - return false; -} -bool isGlobalInst(const llvm::TargetInstrInfo * TII, llvm::MachineInstr *MI) -{ - return strstr(GET_OPCODE_NAME(TII, MI), "GLOBAL"); -} -bool isPrivateInst(const llvm::TargetInstrInfo * TII, llvm::MachineInstr *MI) -{ - return strstr(GET_OPCODE_NAME(TII, MI), "PRIVATE"); -} -bool isConstantInst(const llvm::TargetInstrInfo * TII, llvm::MachineInstr *MI) -{ - return strstr(GET_OPCODE_NAME(TII, MI), "CONSTANT") - || strstr(GET_OPCODE_NAME(TII, MI), "CPOOL"); -} -bool isRegionInst(const llvm::TargetInstrInfo * TII, llvm::MachineInstr *MI) -{ - return strstr(GET_OPCODE_NAME(TII, MI), "REGION"); -} -bool isLocalInst(const llvm::TargetInstrInfo * TII, llvm::MachineInstr *MI) -{ - return strstr(GET_OPCODE_NAME(TII, MI), "LOCAL"); -} -bool isImageInst(const llvm::TargetInstrInfo * TII, llvm::MachineInstr *MI) -{ - return strstr(GET_OPCODE_NAME(TII, MI), "IMAGE"); -} -bool isAppendInst(const llvm::TargetInstrInfo * TII, llvm::MachineInstr *MI) -{ - return strstr(GET_OPCODE_NAME(TII, MI), "APPEND"); -} -bool isRegionAtomic(const llvm::TargetInstrInfo * TII, llvm::MachineInstr *MI) -{ - return strstr(GET_OPCODE_NAME(TII, MI), "ATOM_R"); -} -bool isLocalAtomic(const llvm::TargetInstrInfo * TII, llvm::MachineInstr *MI) -{ - return strstr(GET_OPCODE_NAME(TII, MI), "ATOM_L"); -} -bool isGlobalAtomic(const llvm::TargetInstrInfo * TII, llvm::MachineInstr *MI) -{ - return strstr(GET_OPCODE_NAME(TII, MI), "ATOM_G") - || isArenaAtomic(TII, MI); -} -bool isArenaAtomic(const llvm::TargetInstrInfo * TII, llvm::MachineInstr *MI) -{ - return strstr(GET_OPCODE_NAME(TII, MI), "ATOM_A"); -} - -const char* getSrcSwizzle(unsigned idx) { - const char *srcSwizzles[] = { - "", ".x000", ".0x00", ".00x0", ".000x", ".y000", ".0y00", ".00y0", ".000y", - ".z000", ".0z00", ".00z0", ".000z", ".w000", ".0w00", ".00w0", ".000w", - ".xy00", ".00xy", ".zw00", ".00zw", ".xyz0", ".0xyz", ".xyzw", ".0000", - ".xxxx", ".yyyy", ".zzzz", ".wwww", ".xyxy", ".zwzw", ".xzxz", ".ywyw", - ".x0y0", ".0x0y", ".xy_neg(y)", "_neg(yw)", "_neg(x)", ".xy_neg(xy)", - "_neg(xyzw)", ".0yzw", ".x0zw", ".xy0w", ".x", ".y", ".z", ".w", ".xy", - ".zw" - }; - assert(idx < sizeof(srcSwizzles)/sizeof(srcSwizzles[0]) - && "Idx passed in is invalid!"); - return srcSwizzles[idx]; -} -const char* getDstSwizzle(unsigned idx) { - const char *dstSwizzles[] = { - "", ".x___", ".xy__", ".xyz_", ".xyzw", "._y__", "._yz_", "._yzw", ".__z_", - ".__zw", ".___w", ".x_zw", ".xy_w", ".x_z_", ".x__w", "._y_w", - }; - assert(idx < sizeof(dstSwizzles)/sizeof(dstSwizzles[0]) - && "Idx passed in is invalid!"); - return dstSwizzles[idx]; -} -/// Helper function to get the currently set flags -void getAsmPrinterFlags(MachineInstr *MI, AMDILAS::InstrResEnc &curRes) -{ - // We need 16 bits of information, but LLVMr127097 cut the field in half. - // So we have to use two different fields to store all of our information. - uint16_t upper = MI->getFlags() << 8; - uint16_t lower = MI->getAsmPrinterFlags(); - curRes.u16all = upper | lower; -} -/// Helper function to clear the currently set flags and add the new flags. -void setAsmPrinterFlags(MachineInstr *MI, AMDILAS::InstrResEnc &curRes) -{ - // We need 16 bits of information, but LLVMr127097 cut the field in half. - // So we have to use two different fields to store all of our information. - MI->clearAsmPrinterFlags(); - MI->setFlags(0); - uint8_t lower = curRes.u16all & 0xFF; - uint8_t upper = (curRes.u16all >> 8) & 0xFF; - MI->setFlags(upper); - MI->setAsmPrinterFlag((llvm::MachineInstr::CommentFlag)lower); -} diff --git a/src/gallium/drivers/radeon/AMDILUtilityFunctions.h b/src/gallium/drivers/radeon/AMDILUtilityFunctions.h index 637c868b55c..66af706bbb3 100644 --- a/src/gallium/drivers/radeon/AMDILUtilityFunctions.h +++ b/src/gallium/drivers/radeon/AMDILUtilityFunctions.h @@ -7,191 +7,12 @@ // //==-----------------------------------------------------------------------===// // -// This file provides declarations for functions that are used across different -// classes and provide various conversions or utility to shorten the code +// This file provides helper macros for expanding case statements. // //===----------------------------------------------------------------------===// #ifndef AMDILUTILITYFUNCTIONS_H_ #define AMDILUTILITYFUNCTIONS_H_ -#include "AMDIL.h" -#include "AMDILTargetMachine.h" -#include "llvm/ADT/SmallVector.h" - -// Utility functions from ID -// -namespace llvm { -class TargetRegisterClass; -class SDValue; -class SDNode; -class Value; -class Type; -class StructType; -class IntegerType; -class FunctionType; -class VectorType; -class ArrayType; -class PointerType; -class OpaqueType; -class MachineInstr; - -} -enum SrcSwizzles { - AMDIL_SRC_SWIZZLE_DEFAULT = 0, - AMDIL_SRC_SWIZZLE_X000, - AMDIL_SRC_SWIZZLE_0X00, - AMDIL_SRC_SWIZZLE_00X0, - AMDIL_SRC_SWIZZLE_000X, - AMDIL_SRC_SWIZZLE_Y000, - AMDIL_SRC_SWIZZLE_0Y00, - AMDIL_SRC_SWIZZLE_00Y0, - AMDIL_SRC_SWIZZLE_000Y, - AMDIL_SRC_SWIZZLE_Z000, - AMDIL_SRC_SWIZZLE_0Z00, - AMDIL_SRC_SWIZZLE_00Z0, - AMDIL_SRC_SWIZZLE_000Z, - AMDIL_SRC_SWIZZLE_W000, - AMDIL_SRC_SWIZZLE_0W00, - AMDIL_SRC_SWIZZLE_00W0, - AMDIL_SRC_SWIZZLE_000W, - AMDIL_SRC_SWIZZLE_XY00, - AMDIL_SRC_SWIZZLE_00XY, - AMDIL_SRC_SWIZZLE_ZW00, - AMDIL_SRC_SWIZZLE_00ZW, - AMDIL_SRC_SWIZZLE_XYZ0, - AMDIL_SRC_SWIZZLE_0XYZ, - AMDIL_SRC_SWIZZLE_XYZW, - AMDIL_SRC_SWIZZLE_0000, - AMDIL_SRC_SWIZZLE_XXXX, - AMDIL_SRC_SWIZZLE_YYYY, - AMDIL_SRC_SWIZZLE_ZZZZ, - AMDIL_SRC_SWIZZLE_WWWW, - AMDIL_SRC_SWIZZLE_XYXY, - AMDIL_SRC_SWIZZLE_ZWZW, - AMDIL_SRC_SWIZZLE_XZXZ, - AMDIL_SRC_SWIZZLE_YWYW, - AMDIL_SRC_SWIZZLE_X0Y0, - AMDIL_SRC_SWIZZLE_0X0Y, - AMDIL_SRC_SWIZZLE_XY_NEGY, - AMDIL_SRC_SWIZZLE_NEGYW, - AMDIL_SRC_SWIZZLE_NEGX, - AMDIL_SRC_SWIZZLE_XY_NEGXY, - AMDIL_SRC_SWIZZLE_NEG_XYZW, - AMDIL_SRC_SWIZZLE_0YZW, - AMDIL_SRC_SWIZZLE_X0ZW, - AMDIL_SRC_SWIZZLE_XY0W, - AMDIL_SRC_SWIZZLE_X, - AMDIL_SRC_SWIZZLE_Y, - AMDIL_SRC_SWIZZLE_Z, - AMDIL_SRC_SWIZZLE_W, - AMDIL_SRC_SWIZZLE_XY, - AMDIL_SRC_SWIZZLE_ZW, - AMDIL_SRC_SWIZZLE_LAST -}; -enum DstSwizzles { - AMDIL_DST_SWIZZLE_DEFAULT = 0, - AMDIL_DST_SWIZZLE_X___, - AMDIL_DST_SWIZZLE_XY__, - AMDIL_DST_SWIZZLE_XYZ_, - AMDIL_DST_SWIZZLE_XYZW, - AMDIL_DST_SWIZZLE__Y__, - AMDIL_DST_SWIZZLE__YZ_, - AMDIL_DST_SWIZZLE__YZW, - AMDIL_DST_SWIZZLE___Z_, - AMDIL_DST_SWIZZLE___ZW, - AMDIL_DST_SWIZZLE____W, - AMDIL_DST_SWIZZLE_X_ZW, - AMDIL_DST_SWIZZLE_XY_W, - AMDIL_DST_SWIZZLE_X_Z_, - AMDIL_DST_SWIZZLE_X__W, - AMDIL_DST_SWIZZLE__Y_W, - AMDIL_DST_SWIZZLE_LAST -}; -// Function to get the correct src swizzle string from ID -const char *getSrcSwizzle(unsigned); - -// Function to get the correct dst swizzle string from ID -const char *getDstSwizzle(unsigned); - -const llvm::TargetRegisterClass *getRegClassFromID(unsigned int ID); - -unsigned int getMoveInstFromID(unsigned int ID); -unsigned int getPHIMoveInstFromID(unsigned int ID); - -// Utility functions from Type. -const llvm::TargetRegisterClass *getRegClassFromType(unsigned int type); -unsigned int getTargetIndependentMoveFromType(unsigned int type); - -// Debug functions for SDNode and SDValue. -void printSDValue(const llvm::SDValue &Op, int level); -void printSDNode(const llvm::SDNode *N); - -// Functions to check if an opcode is a specific type. -bool isMove(unsigned int opcode); -bool isPHIMove(unsigned int opcode); -bool isMoveOrEquivalent(unsigned int opcode); - -// Function to check address space -bool check_type(const llvm::Value *ptr, unsigned int addrspace); - -// Group of functions that recursively calculate the size of a structure based -// on it's sub-types. -size_t getTypeSize(llvm::Type * const T, bool dereferencePtr = false); -size_t -getTypeSize(llvm::StructType * const ST, bool dereferencePtr = false); -size_t -getTypeSize(llvm::IntegerType * const IT, bool dereferencePtr = false); -size_t -getTypeSize(llvm::FunctionType * const FT, bool dereferencePtr = false); -size_t -getTypeSize(llvm::ArrayType * const AT, bool dereferencePtr = false); -size_t -getTypeSize(llvm::VectorType * const VT, bool dereferencePtr = false); -size_t -getTypeSize(llvm::PointerType * const PT, bool dereferencePtr = false); -size_t -getTypeSize(llvm::OpaqueType * const OT, bool dereferencePtr = false); - -// Group of functions that recursively calculate the number of elements of a -// structure based on it's sub-types. -size_t getNumElements(llvm::Type * const T); -size_t getNumElements(llvm::StructType * const ST); -size_t getNumElements(llvm::IntegerType * const IT); -size_t getNumElements(llvm::FunctionType * const FT); -size_t getNumElements(llvm::ArrayType * const AT); -size_t getNumElements(llvm::VectorType * const VT); -size_t getNumElements(llvm::PointerType * const PT); -size_t getNumElements(llvm::OpaqueType * const OT); -const llvm::Value *getBasePointerValue(const llvm::Value *V); -const llvm::Value *getBasePointerValue(const llvm::MachineInstr *MI); - - -int64_t GET_SCALAR_SIZE(llvm::Type* A); - -// Helper functions that check the opcode for status information -bool isLoadInst(const llvm::TargetInstrInfo * TII, llvm::MachineInstr *MI); -bool isExtLoadInst(const llvm::TargetInstrInfo * TII, llvm::MachineInstr *MI); -bool isSWSExtLoadInst(llvm::MachineInstr *MI); -bool isSExtLoadInst(const llvm::TargetInstrInfo * TII, llvm::MachineInstr *MI); -bool isZExtLoadInst(const llvm::TargetInstrInfo * TII, llvm::MachineInstr *MI); -bool isAExtLoadInst(const llvm::TargetInstrInfo * TII, llvm::MachineInstr *MI); -bool isStoreInst(const llvm::TargetInstrInfo * TII, llvm::MachineInstr *MI); -bool isTruncStoreInst(const llvm::TargetInstrInfo * TII, llvm::MachineInstr *MI); -bool isAtomicInst(const llvm::TargetInstrInfo * TII, llvm::MachineInstr *MI); -bool isVolatileInst(const llvm::TargetInstrInfo * TII, llvm::MachineInstr *MI); -bool isGlobalInst(const llvm::TargetInstrInfo * TII, llvm::MachineInstr *MI); -bool isPrivateInst(const llvm::TargetInstrInfo * TII, llvm::MachineInstr *MI); -bool isConstantInst(const llvm::TargetInstrInfo * TII, llvm::MachineInstr *MI); -bool isRegionInst(const llvm::TargetInstrInfo * TII, llvm::MachineInstr *MI); -bool isLocalInst(const llvm::TargetInstrInfo * TII, llvm::MachineInstr *MI); -bool isImageInst(const llvm::TargetInstrInfo * TII, llvm::MachineInstr *MI); -bool isAppendInst(const llvm::TargetInstrInfo * TII, llvm::MachineInstr *MI); -bool isRegionAtomic(const llvm::TargetInstrInfo * TII, llvm::MachineInstr *MI); -bool isLocalAtomic(const llvm::TargetInstrInfo * TII, llvm::MachineInstr *MI); -bool isGlobalAtomic(const llvm::TargetInstrInfo * TII, llvm::MachineInstr *MI); -bool isArenaAtomic(const llvm::TargetInstrInfo * TII, llvm::MachineInstr *MI); - - // Macros that are used to help with switch statements for various data types // However, these macro's do not return anything unlike the second set below. #define ExpandCaseTo32bitIntTypes(Instr) \ @@ -354,9 +175,4 @@ case Instr##_v4f32: \ case Instr##_v2i64: \ case Instr##_v2f64: -bool commaPrint(int i, llvm::raw_ostream &O); -/// Helper function to get the currently get/set flags. -void getAsmPrinterFlags(llvm::MachineInstr *MI, llvm::AMDILAS::InstrResEnc &curRes); -void setAsmPrinterFlags(llvm::MachineInstr *MI, llvm::AMDILAS::InstrResEnc &curRes); - #endif // AMDILUTILITYFUNCTIONS_H_ diff --git a/src/gallium/drivers/radeon/AMDILVersion.td b/src/gallium/drivers/radeon/AMDILVersion.td index b8b02608d3b..d863b068131 100644 --- a/src/gallium/drivers/radeon/AMDILVersion.td +++ b/src/gallium/drivers/radeon/AMDILVersion.td @@ -1,4 +1,4 @@ -//===-- AMDILVersion.td - TODO: Add brief description -------===// +//===-- AMDILVersion.td - Barrier Instruction/Intrinsic definitions------===// // // The LLVM Compiler Infrastructure // diff --git a/src/gallium/drivers/radeon/Makefile b/src/gallium/drivers/radeon/Makefile index 807dc781c7c..cc409645a6e 100644 --- a/src/gallium/drivers/radeon/Makefile +++ b/src/gallium/drivers/radeon/Makefile @@ -18,6 +18,8 @@ CXXFLAGS := $(filter-out -DDEBUG, $(CXXFLAGS)) tablegen = $(TBLGEN) -I $(LLVM_INCLUDEDIR) $1 $2 -o $3 +HAVE_LLVM_INTRINSICS = $(shell grep IntrinsicsR600.td $(LLVM_INCLUDEDIR)/llvm/Intrinsics.td) + gen: $(GENERATED_SOURCES) SIRegisterInfo.td: SIGenRegisterInfo.pl @@ -26,9 +28,13 @@ SIRegisterInfo.td: SIGenRegisterInfo.pl SIRegisterGetHWRegNum.inc: SIGenRegisterInfo.pl $(PERL) $^ $@ > /dev/null -R600ShaderPatterns.td: AMDGPUGenShaderPatterns.pl - $(PERL) $^ C > $@ - +R600Intrinsics.td: R600IntrinsicsNoOpenCL.td R600IntrinsicsOpenCL.td +ifeq ($(HAVE_LLVM_INTRINSICS),) + cp R600IntrinsicsNoOpenCL.td R600Intrinsics.td +else + cp R600IntrinsicsOpenCL.td R600Intrinsics.td +endif + R600RegisterInfo.td: R600GenRegisterInfo.pl $(PERL) $^ > $@ diff --git a/src/gallium/drivers/radeon/Makefile.sources b/src/gallium/drivers/radeon/Makefile.sources index 7d2932b4dbd..6dc62320f40 100644 --- a/src/gallium/drivers/radeon/Makefile.sources +++ b/src/gallium/drivers/radeon/Makefile.sources @@ -1,6 +1,6 @@ GENERATED_SOURCES := \ - R600ShaderPatterns.td \ + R600Intrinsics.td \ R600RegisterInfo.td \ AMDGPUInstrEnums.td \ SIRegisterInfo.td \ @@ -29,20 +29,16 @@ CPP_SOURCES := \ AMDILISelDAGToDAG.cpp \ AMDILISelLowering.cpp \ AMDILMachinePeephole.cpp \ - AMDILMCCodeEmitter.cpp \ AMDILNIDevice.cpp \ AMDILPeepholeOptimizer.cpp \ AMDILRegisterInfo.cpp \ AMDILSIDevice.cpp \ AMDILSubtarget.cpp \ AMDILTargetMachine.cpp \ - AMDILUtilityFunctions.cpp \ AMDGPUTargetMachine.cpp \ AMDGPUISelLowering.cpp \ AMDGPUConvertToISA.cpp \ AMDGPULowerInstructions.cpp \ - AMDGPULowerShaderInstructions.cpp \ - AMDGPUReorderPreloadInstructions.cpp \ AMDGPUInstrInfo.cpp \ AMDGPURegisterInfo.cpp \ AMDGPUUtil.cpp \ @@ -51,13 +47,12 @@ CPP_SOURCES := \ R600InstrInfo.cpp \ R600KernelParameters.cpp \ R600LowerInstructions.cpp \ - R600LowerShaderInstructions.cpp \ + R600MachineFunctionInfo.cpp \ R600RegisterInfo.cpp \ SIAssignInterpRegs.cpp \ SICodeEmitter.cpp \ SIInstrInfo.cpp \ SIISelLowering.cpp \ - SILowerShaderInstructions.cpp \ SIMachineFunctionInfo.cpp \ SIPropagateImmReads.cpp \ SIRegisterInfo.cpp \ diff --git a/src/gallium/drivers/radeon/R600CodeEmitter.cpp b/src/gallium/drivers/radeon/R600CodeEmitter.cpp index 8faf0deb8c5..421562255f6 100644 --- a/src/gallium/drivers/radeon/R600CodeEmitter.cpp +++ b/src/gallium/drivers/radeon/R600CodeEmitter.cpp @@ -1,4 +1,4 @@ -//===-- R600CodeEmitter.cpp - TODO: Add brief description -------===// +//===-- R600CodeEmitter.cpp - Code Emitter for R600->Cayman GPU families --===// // // The LLVM Compiler Infrastructure // @@ -7,7 +7,12 @@ // //===----------------------------------------------------------------------===// // -// TODO: Add full description +// This code emitters outputs bytecode that is understood by the r600g driver +// in the Mesa [1] project. The bytecode is very similar to the hardware's ISA, +// except that the size of the instruction fields are rounded up to the +// nearest byte. +// +// [1] http://www.mesa3d.org/ // //===----------------------------------------------------------------------===// @@ -44,8 +49,9 @@ namespace { const R600RegisterInfo * TRI; bool evergreenEncoding; + bool isCube; bool isReduction; - unsigned reductionElement; + unsigned currentElement; bool isLast; unsigned section_start; @@ -53,7 +59,7 @@ namespace { public: R600CodeEmitter(formatted_raw_ostream &OS) : MachineFunctionPass(ID), - _OS(OS), TM(NULL), evergreenEncoding(false), isReduction(false), + _OS(OS), TM(NULL), evergreenEncoding(false), isCube(false), isReduction(false), isLast(true) { } const char *getPassName() const { return "AMDGPU Machine Code Emitter"; } @@ -65,7 +71,7 @@ namespace { private: void emitALUInstr(MachineInstr &MI); - void emitSrc(const MachineOperand & MO); + void emitSrc(const MachineOperand & MO, int chan_override = -1); void emitDst(const MachineOperand & MO); void emitALU(MachineInstr &MI, unsigned numSrc); void emitTexInstr(MachineInstr &MI); @@ -155,10 +161,8 @@ bool R600CodeEmitter::runOnMachineFunction(MachineFunction &MF) { } else { evergreenEncoding = true; } - const AMDGPUTargetMachine *amdtm = - static_cast<const AMDGPUTargetMachine *>(&MF.getTarget()); - if (amdtm->shouldDumpCode()) { + if (STM.dumpCode()) { MF.dump(); } @@ -171,18 +175,26 @@ bool R600CodeEmitter::runOnMachineFunction(MachineFunction &MF) { if (MI.getNumOperands() > 1 && MI.getOperand(0).isReg() && MI.getOperand(0).isDead()) { continue; } - if (isTexOp(MI.getOpcode())) { + if (AMDGPU::isTexOp(MI.getOpcode())) { emitTexInstr(MI); - } else if (isFCOp(MI.getOpcode())){ + } else if (AMDGPU::isFCOp(MI.getOpcode())){ emitFCInstr(MI); - } else if (isReductionOp(MI.getOpcode())) { + } else if (AMDGPU::isReductionOp(MI.getOpcode())) { isReduction = true; isLast = false; - for (reductionElement = 0; reductionElement < 4; reductionElement++) { - isLast = (reductionElement == 3); + for (currentElement = 0; currentElement < 4; currentElement++) { + isLast = (currentElement == 3); emitALUInstr(MI); } isReduction = false; + } else if (AMDGPU::isCubeOp(MI.getOpcode())) { + isCube = true; + isLast = false; + for (currentElement = 0; currentElement < 4; currentElement++) { + isLast = (currentElement == 3); + emitALUInstr(MI); + } + isCube = false; } else if (MI.getOpcode() == AMDIL::RETURN || MI.getOpcode() == AMDIL::BUNDLE || MI.getOpcode() == AMDIL::KILL) { @@ -191,12 +203,7 @@ bool R600CodeEmitter::runOnMachineFunction(MachineFunction &MF) { switch(MI.getOpcode()) { case AMDIL::RAT_WRITE_CACHELESS_eg: { - /* XXX: Support for autoencoding 64-bit instructions was added - * in LLVM 3.1. Until we drop support for 3.0, we will use Magic - * numbers for the high bits. */ - uint64_t high = 0x95c0100000000000; uint64_t inst = getBinaryCodeForInstr(MI); - inst |= high; /* Set End Of Program bit */ /* XXX: Need better check of end of program. EOP should be * encoded in one of the operands of the MI, and it should be @@ -286,7 +293,7 @@ void R600CodeEmitter::emitALUInstr(MachineInstr &MI) /* Some instructions are just place holder instructions that represent * operations that the GPU does automatically. They should be ignored. */ - if (isPlaceHolderOpcode(MI.getOpcode())) { + if (AMDGPU::isPlaceHolderOpcode(MI.getOpcode())) { return; } @@ -309,18 +316,25 @@ void R600CodeEmitter::emitALUInstr(MachineInstr &MI) /* Emit instruction type */ emitByte(0); - unsigned int opIndex; - for (opIndex = 1; opIndex < numOperands; opIndex++) { - /* Literal constants are always stored as the last operand. */ - if (MI.getOperand(opIndex).isImm() || MI.getOperand(opIndex).isFPImm()) { - break; + if (isCube) { + static const int cube_src_swz[] = {2, 2, 0, 1}; + emitSrc(MI.getOperand(1), cube_src_swz[currentElement]); + emitSrc(MI.getOperand(1), cube_src_swz[3-currentElement]); + emitNullBytes(SRC_BYTE_COUNT); + } else { + unsigned int opIndex; + for (opIndex = 1; opIndex < numOperands; opIndex++) { + /* Literal constants are always stored as the last operand. */ + if (MI.getOperand(opIndex).isImm() || MI.getOperand(opIndex).isFPImm()) { + break; + } + emitSrc(MI.getOperand(opIndex)); } - emitSrc(MI.getOperand(opIndex)); - } /* Emit zeros for unused sources */ - for ( ; opIndex < 4; opIndex++) { - emitNullBytes(SRC_BYTE_COUNT); + for ( ; opIndex < 4; opIndex++) { + emitNullBytes(SRC_BYTE_COUNT); + } } emitDst(dstOp); @@ -328,7 +342,7 @@ void R600CodeEmitter::emitALUInstr(MachineInstr &MI) emitALU(MI, numOperands - 1); } -void R600CodeEmitter::emitSrc(const MachineOperand & MO) +void R600CodeEmitter::emitSrc(const MachineOperand & MO, int chan_override /* = -1 */) { uint32_t value = 0; /* Emit the source select (2 bytes). For GPRs, this is the register index. @@ -354,8 +368,10 @@ void R600CodeEmitter::emitSrc(const MachineOperand & MO) } /* Emit the source channel (1 byte) */ - if (isReduction) { - emitByte(reductionElement); + if (chan_override != -1) { + emitByte(chan_override); + } else if (isReduction) { + emitByte(currentElement); } else if (MO.isReg()) { emitByte(TRI->getHWRegChan(MO.getReg())); } else { @@ -397,8 +413,8 @@ void R600CodeEmitter::emitDst(const MachineOperand & MO) emitByte(getHWReg(MO.getReg())); /* Emit the element of the destination register (1 byte)*/ - if (isReduction) { - emitByte(reductionElement); + if (isReduction || isCube) { + emitByte(currentElement); } else { emitByte(TRI->getHWRegChan(MO.getReg())); } @@ -411,7 +427,7 @@ void R600CodeEmitter::emitDst(const MachineOperand & MO) } /* Emit writemask (1 byte). */ - if ((isReduction && reductionElement != TRI->getHWRegChan(MO.getReg())) + if ((isReduction && currentElement != TRI->getHWRegChan(MO.getReg())) || MO.getTargetFlags() & MO_FLAG_MASK) { emitByte(0); } else { @@ -570,6 +586,7 @@ void R600CodeEmitter::emitFCInstr(MachineInstr &MI) case AMDIL::BREAK_LOGICALZ_f32: instr = FC_BREAK; break; + case AMDIL::BREAK_LOGICALNZ_f32: case AMDIL::BREAK_LOGICALNZ_i32: instr = FC_BREAK_NZ_INT; break; @@ -577,6 +594,7 @@ void R600CodeEmitter::emitFCInstr(MachineInstr &MI) instr = FC_BREAK_Z_INT; break; case AMDIL::CONTINUE_LOGICALNZ_f32: + case AMDIL::CONTINUE_LOGICALNZ_i32: instr = FC_CONTINUE; break; /* XXX: This assumes that all IFs will be if (x != 0). If we add @@ -706,44 +724,5 @@ RegElement maskBitToElement(unsigned int maskBit) } } -unsigned int dstSwizzleToWriteMask(unsigned swizzle) -{ - switch(swizzle) { - default: - case AMDIL_DST_SWIZZLE_DEFAULT: - return WRITE_MASK_X | WRITE_MASK_Y | WRITE_MASK_Z | WRITE_MASK_W; - case AMDIL_DST_SWIZZLE_X___: - return WRITE_MASK_X; - case AMDIL_DST_SWIZZLE_XY__: - return WRITE_MASK_X | WRITE_MASK_Y; - case AMDIL_DST_SWIZZLE_XYZ_: - return WRITE_MASK_X | WRITE_MASK_Y | WRITE_MASK_Z; - case AMDIL_DST_SWIZZLE_XYZW: - return WRITE_MASK_X | WRITE_MASK_Y | WRITE_MASK_Z | WRITE_MASK_W; - case AMDIL_DST_SWIZZLE__Y__: - return WRITE_MASK_Y; - case AMDIL_DST_SWIZZLE__YZ_: - return WRITE_MASK_Y | WRITE_MASK_Z; - case AMDIL_DST_SWIZZLE__YZW: - return WRITE_MASK_Y | WRITE_MASK_Z | WRITE_MASK_W; - case AMDIL_DST_SWIZZLE___Z_: - return WRITE_MASK_Z; - case AMDIL_DST_SWIZZLE___ZW: - return WRITE_MASK_Z | WRITE_MASK_W; - case AMDIL_DST_SWIZZLE____W: - return WRITE_MASK_W; - case AMDIL_DST_SWIZZLE_X_ZW: - return WRITE_MASK_X | WRITE_MASK_Z | WRITE_MASK_W; - case AMDIL_DST_SWIZZLE_XY_W: - return WRITE_MASK_X | WRITE_MASK_Y | WRITE_MASK_W; - case AMDIL_DST_SWIZZLE_X_Z_: - return WRITE_MASK_X | WRITE_MASK_Z; - case AMDIL_DST_SWIZZLE_X__W: - return WRITE_MASK_X | WRITE_MASK_W; - case AMDIL_DST_SWIZZLE__Y_W: - return WRITE_MASK_Y | WRITE_MASK_W; - } -} - #include "AMDILGenCodeEmitter.inc" diff --git a/src/gallium/drivers/radeon/R600GenRegisterInfo.pl b/src/gallium/drivers/radeon/R600GenRegisterInfo.pl index cbded115766..406f3dfdd39 100644 --- a/src/gallium/drivers/radeon/R600GenRegisterInfo.pl +++ b/src/gallium/drivers/radeon/R600GenRegisterInfo.pl @@ -1,20 +1,23 @@ -#===-- R600GenRegisterInfo.pl - TODO: Add brief description -------===# +#===-- R600GenRegisterInfo.pl - Script for generating register info files --===# # # The LLVM Compiler Infrastructure # # This file is distributed under the University of Illinois Open Source # License. See LICENSE.TXT for details. # -#===----------------------------------------------------------------------===# +#===------------------------------------------------------------------------===# # -# TODO: Add full description +# This perl script prints to stdout .td code to be used as R600RegisterInfo.td +# it also generates a file called R600HwRegInfo.include, which contains helper +# functions for determining the hw encoding of registers. # -#===----------------------------------------------------------------------===# +#===------------------------------------------------------------------------===# use strict; use warnings; -use AMDGPUConstants; +use constant CONST_REG_COUNT => 256; +use constant TEMP_REG_COUNT => 128; my $CREG_MAX = CONST_REG_COUNT - 1; my $TREG_MAX = TEMP_REG_COUNT - 1; @@ -81,7 +84,7 @@ def R600_Reg32 : RegisterClass <"AMDIL", [f32, i32], 32, (add R600_CReg32, ZERO, HALF, ONE, ONE_INT, PV_X, ALU_LITERAL_X, NEG_ONE, NEG_HALF)>; -def R600_Reg128 : RegisterClass<"AMDIL", [v4f32], 128, (add +def R600_Reg128 : RegisterClass<"AMDIL", [v4f32, v4i32], 128, (add $t128_string)> { let SubRegClasses = [(R600_TReg32 sel_x, sel_y, sel_z, sel_w)]; @@ -170,3 +173,24 @@ sub print_reg_defs { return @reg_list; } +#Helper functions +sub get_hw_index { + my ($index) = @_; + return int($index / 4); +} + +sub get_chan_str { + my ($index) = @_; + my $chan = $index % 4; + if ($chan == 0 ) { + return 'X'; + } elsif ($chan == 1) { + return 'Y'; + } elsif ($chan == 2) { + return 'Z'; + } elsif ($chan == 3) { + return 'W'; + } else { + die("Unknown chan value: $chan"); + } +} diff --git a/src/gallium/drivers/radeon/R600ISelLowering.cpp b/src/gallium/drivers/radeon/R600ISelLowering.cpp index f92fe2641a5..e85ac31b34c 100644 --- a/src/gallium/drivers/radeon/R600ISelLowering.cpp +++ b/src/gallium/drivers/radeon/R600ISelLowering.cpp @@ -1,4 +1,4 @@ -//===-- R600ISelLowering.cpp - TODO: Add brief description -------===// +//===-- R600ISelLowering.cpp - R600 DAG Lowering Implementation -----------===// // // The LLVM Compiler Infrastructure // @@ -7,12 +7,14 @@ // //===----------------------------------------------------------------------===// // -// TODO: Add full description +// Most of the DAG lowering is handled in AMDILISelLowering.cpp. This file +// is mostly EmitInstrWithCustomInserter(). // //===----------------------------------------------------------------------===// #include "R600ISelLowering.h" #include "R600InstrInfo.h" +#include "R600MachineFunctionInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" using namespace llvm; @@ -25,9 +27,13 @@ R600TargetLowering::R600TargetLowering(TargetMachine &TM) : // setSchedulingPreference(Sched::VLIW); addRegisterClass(MVT::v4f32, &AMDIL::R600_Reg128RegClass); addRegisterClass(MVT::f32, &AMDIL::R600_Reg32RegClass); + addRegisterClass(MVT::v4i32, &AMDIL::R600_Reg128RegClass); + addRegisterClass(MVT::i32, &AMDIL::R600_Reg32RegClass); setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Legal); setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f32, Legal); + setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4i32, Legal); + setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i32, Legal); } MachineBasicBlock * R600TargetLowering::EmitInstrWithCustomInserter( @@ -35,10 +41,10 @@ MachineBasicBlock * R600TargetLowering::EmitInstrWithCustomInserter( { MachineFunction * MF = BB->getParent(); MachineRegisterInfo &MRI = MF->getRegInfo(); + MachineBasicBlock::iterator I = *MI; switch (MI->getOpcode()) { default: return AMDGPUTargetLowering::EmitInstrWithCustomInserter(MI, BB); - /* XXX: Use helper function from AMDGPULowerShaderInstructions here */ case AMDIL::TGID_X: addLiveIn(MI, MF, MRI, TII, AMDIL::T1_X); break; @@ -84,7 +90,49 @@ MachineBasicBlock * R600TargetLowering::EmitInstrWithCustomInserter( case AMDIL::LOCAL_SIZE_Z: lowerImplicitParameter(MI, *BB, MRI, 8); break; + + case AMDIL::R600_LOAD_CONST: + { + int64_t RegIndex = MI->getOperand(1).getImm(); + unsigned ConstantReg = AMDIL::R600_CReg32RegClass.getRegister(RegIndex); + BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDIL::COPY)) + .addOperand(MI->getOperand(0)) + .addReg(ConstantReg); + break; + } + + case AMDIL::LOAD_INPUT: + { + int64_t RegIndex = MI->getOperand(1).getImm(); + addLiveIn(MI, MF, MRI, TII, + AMDIL::R600_TReg32RegClass.getRegister(RegIndex)); + break; + } + case AMDIL::STORE_OUTPUT: + { + int64_t OutputIndex = MI->getOperand(1).getImm(); + unsigned OutputReg = AMDIL::R600_TReg32RegClass.getRegister(OutputIndex); + + BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDIL::COPY), OutputReg) + .addOperand(MI->getOperand(0)); + + if (!MRI.isLiveOut(OutputReg)) { + MRI.addLiveOut(OutputReg); + } + break; + } + + case AMDIL::RESERVE_REG: + { + R600MachineFunctionInfo * MFI = MF->getInfo<R600MachineFunctionInfo>(); + int64_t ReservedIndex = MI->getOperand(0).getImm(); + unsigned ReservedReg = + AMDIL::R600_TReg32RegClass.getRegister(ReservedIndex); + MFI->ReservedRegs.push_back(ReservedReg); + break; + } } + MI->eraseFromParent(); return BB; } diff --git a/src/gallium/drivers/radeon/R600ISelLowering.h b/src/gallium/drivers/radeon/R600ISelLowering.h index fd26bf538c4..fdd552a172d 100644 --- a/src/gallium/drivers/radeon/R600ISelLowering.h +++ b/src/gallium/drivers/radeon/R600ISelLowering.h @@ -1,4 +1,4 @@ -//===-- R600ISelLowering.h - TODO: Add brief description -------===// +//===-- R600ISelLowering.h - R600 DAG Lowering Interface -*- C++ -*--------===// // // The LLVM Compiler Infrastructure // @@ -7,7 +7,7 @@ // //===----------------------------------------------------------------------===// // -// TODO: Add full description +// R600 DAG Lowering interface definition // //===----------------------------------------------------------------------===// diff --git a/src/gallium/drivers/radeon/R600InstrInfo.cpp b/src/gallium/drivers/radeon/R600InstrInfo.cpp index 0c7ffc4334d..2bd59fd5e1b 100644 --- a/src/gallium/drivers/radeon/R600InstrInfo.cpp +++ b/src/gallium/drivers/radeon/R600InstrInfo.cpp @@ -1,4 +1,4 @@ -//===-- R600InstrInfo.cpp - TODO: Add brief description -------===// +//===-- R600InstrInfo.cpp - R600 Instruction Information ------------------===// // // The LLVM Compiler Infrastructure // @@ -7,7 +7,7 @@ // //===----------------------------------------------------------------------===// // -// TODO: Add full description +// R600 Implementation of TargetInstrInfo. // //===----------------------------------------------------------------------===// @@ -73,10 +73,22 @@ unsigned R600InstrInfo::getISAOpcode(unsigned opcode) const case AMDIL::MOVE_i32: return AMDIL::MOV; case AMDIL::SHR_i32: + return getASHRop(); + case AMDIL::USHR_i32: return getLSHRop(); } } +unsigned R600InstrInfo::getASHRop() const +{ + unsigned gen = TM.getSubtarget<AMDILSubtarget>().device()->getGeneration(); + if (gen < AMDILDeviceInfo::HD5XXX) { + return AMDIL::ASHR_r600; + } else { + return AMDIL::ASHR_eg; + } +} + unsigned R600InstrInfo::getLSHRop() const { unsigned gen = TM.getSubtarget<AMDILSubtarget>().device()->getGeneration(); diff --git a/src/gallium/drivers/radeon/R600InstrInfo.h b/src/gallium/drivers/radeon/R600InstrInfo.h index aedaa9f47f3..014eeb0b9f7 100644 --- a/src/gallium/drivers/radeon/R600InstrInfo.h +++ b/src/gallium/drivers/radeon/R600InstrInfo.h @@ -1,4 +1,4 @@ -//===-- R600InstrInfo.h - TODO: Add brief description -------===// +//===-- R600InstrInfo.h - R600 Instruction Info Interface -------*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -7,7 +7,7 @@ // //===----------------------------------------------------------------------===// // -// TODO: Add full description +// Interface definition for R600InstrInfo // //===----------------------------------------------------------------------===// @@ -52,6 +52,7 @@ namespace llvm { bool isTrig(const MachineInstr &MI) const; unsigned getLSHRop() const; + unsigned getASHRop() const; unsigned getMULHI_UINT() const; unsigned getMULLO_UINT() const; unsigned getRECIP_UINT() const; diff --git a/src/gallium/drivers/radeon/R600Instructions.td b/src/gallium/drivers/radeon/R600Instructions.td index 02043fdeea5..a18240f09bd 100644 --- a/src/gallium/drivers/radeon/R600Instructions.td +++ b/src/gallium/drivers/radeon/R600Instructions.td @@ -1,4 +1,4 @@ -//===-- R600Instructions.td - TODO: Add brief description -------===// +//===-- R600Instructions.td - R600 Instruction defs -------*- tablegen -*-===// // // The LLVM Compiler Infrastructure // @@ -7,7 +7,7 @@ // //===----------------------------------------------------------------------===// // -// TODO: Add full description +// R600 Tablegen instruction definitions // //===----------------------------------------------------------------------===// @@ -84,7 +84,7 @@ class R600_3OP <bits<32> inst, string opName, list<dag> pattern, InstR600 <inst, (outs R600_Reg32:$dst), (ins R600_Reg32:$src0, R600_Reg32:$src1, R600_Reg32:$src2, variable_ops), - !strconcat(opName, "$dst $src0, $src1, $src2"), + !strconcat(opName, " $dst, $src0, $src1, $src2"), pattern, itin>{ @@ -92,7 +92,7 @@ class R600_3OP <bits<32> inst, string opName, list<dag> pattern, } class R600_REDUCTION <bits<32> inst, dag ins, string asm, list<dag> pattern, - InstrItinClass itin = AnyALU> : + InstrItinClass itin = VecALU> : InstR600 <inst, (outs R600_Reg32:$dst), ins, @@ -152,8 +152,6 @@ class EG_CF_RAT <bits <8> cf_inst, bits <6> rat_inst, dag outs, dag ins, let Inst{31-30} = ELEM_SIZE; /* CF_ALLOC_EXPORT_WORD1_BUF */ -/* XXX: We can't have auto encoding of 64-bit instructions until LLVM 3.1 :( */ -/* let Inst{43-32} = ARRAY_SIZE; let Inst{47-44} = COMP_MASK; let Inst{51-48} = BURST_COUNT; @@ -162,7 +160,6 @@ class EG_CF_RAT <bits <8> cf_inst, bits <6> rat_inst, dag outs, dag ins, let Inst{61-54} = cf_inst; let Inst{62} = MARK; let Inst{63} = BARRIER; -*/ } /* @@ -311,6 +308,18 @@ def TRUNC : R600_1OP < [(set R600_Reg32:$dst, (int_AMDGPU_trunc R600_Reg32:$src))] >; +def CEIL : R600_1OP < + 0x12, "CEIL", + [(set R600_Reg32:$dst, (int_AMDIL_round_neginf R600_Reg32:$src))]> { + let AMDILOp = AMDILInst.ROUND_NEGINF_f32; +} + +def RNDNE : R600_1OP < + 0x13, "RNDNE", + [(set R600_Reg32:$dst, (int_AMDIL_round_nearest R600_Reg32:$src))]> { + let AMDILOp = AMDILInst.ROUND_NEAREST_f32; +} + def FLOOR : R600_1OP < 0x14, "FLOOR", [(set R600_Reg32:$dst, (int_AMDGPU_floor R600_Reg32:$src))] @@ -329,64 +338,114 @@ def AND_INT : R600_2OP < let AMDILOp = AMDILInst.AND_i32; } +def OR_INT : R600_2OP < + 0x31, "OR_INT", + []>{ + let AMDILOp = AMDILInst.BINARY_OR_i32; +} + def XOR_INT : R600_2OP < 0x32, "XOR_INT", [] >; +def NOT_INT : R600_1OP < + 0x33, "NOT_INT", + []>{ + let AMDILOp = AMDILInst.BINARY_NOT_i32; +} + def ADD_INT : R600_2OP < - 0x34, "ADD_INT $dst, $src0, $src1", + 0x34, "ADD_INT", []>{ let AMDILOp = AMDILInst.ADD_i32; } def SUB_INT : R600_2OP < - 0x35, "SUB_INT $dst, $src0, $src1", + 0x35, "SUB_INT", [] >; +def MAX_INT : R600_2OP < + 0x36, "MAX_INT", + [(set R600_Reg32:$dst, (int_AMDGPU_imax R600_Reg32:$src0, R600_Reg32:$src1))]>; + +def MIN_INT : R600_2OP < + 0x37, "MIN_INT", + [(set R600_Reg32:$dst, (int_AMDGPU_imin R600_Reg32:$src0, R600_Reg32:$src1))]>; + +def MAX_UINT : R600_2OP < + 0x38, "MAX_UINT", + [(set R600_Reg32:$dst, (int_AMDGPU_umax R600_Reg32:$src0, R600_Reg32:$src1))]>; + +def MIN_UINT : R600_2OP < + 0x39, "MIN_UINT", + [(set R600_Reg32:$dst, (int_AMDGPU_umin R600_Reg32:$src0, R600_Reg32:$src1))]>; + + def SETE_INT : R600_2OP < - 0x3A, "SETE_INT $dst, $src0, $src1", + 0x3A, "SETE_INT", []>{ let AMDILOp = AMDILInst.IEQ; } def SETGT_INT : R600_2OP < - 0x3B, "SGT_INT $dst, $src0, $src1", + 0x3B, "SGT_INT", [] >; def SETGE_INT : R600_2OP < - 0x3C, "SETGE_INT $dst, $src0, $src1", + 0x3C, "SETGE_INT", []>{ let AMDILOp = AMDILInst.IGE; } def SETNE_INT : R600_2OP < - 0x3D, "SETNE_INT $dst, $src0, $src1", + 0x3D, "SETNE_INT", []>{ let AMDILOp = AMDILInst.INE; } def SETGT_UINT : R600_2OP < - 0x3E, "SETGT_UINT $dst, $src0, $src1", + 0x3E, "SETGT_UINT", []>{ let AMDILOp = AMDILInst.UGT; } def SETGE_UINT : R600_2OP < - 0x3F, "SETGE_UINT $dst, $src0, $src1", + 0x3F, "SETGE_UINT", []>{ let AMDILOp = AMDILInst.UGE; } def CNDE_INT : R600_3OP < - 0x1C, "CNDE_INT $dst, $src0, $src1, $src2", + 0x1C, "CNDE_INT", [] >; /* Texture instructions */ + +def TEX_LD : R600_TEX < + 0x03, "TEX_LD", + [(set R600_Reg128:$dst, (int_AMDGPU_txf R600_Reg128:$src0, imm:$src1, imm:$src2))] +>; + +def TEX_GET_TEXTURE_RESINFO : R600_TEX < + 0x04, "TEX_GET_TEXTURE_RESINFO", + [(set R600_Reg128:$dst, (int_AMDGPU_txq R600_Reg128:$src0, imm:$src1, imm:$src2))] +>; + +def TEX_GET_GRADIENTS_H : R600_TEX < + 0x07, "TEX_GET_GRADIENTS_H", + [(set R600_Reg128:$dst, (int_AMDGPU_ddx R600_Reg128:$src0, imm:$src1, imm:$src2))] +>; + +def TEX_GET_GRADIENTS_V : R600_TEX < + 0x08, "TEX_GET_GRADIENTS_V", + [(set R600_Reg128:$dst, (int_AMDGPU_ddy R600_Reg128:$src0, imm:$src1, imm:$src2))] +>; + def TEX_SAMPLE : R600_TEX < 0x10, "TEX_SAMPLE", [(set R600_Reg128:$dst, (int_AMDGPU_tex R600_Reg128:$src0, imm:$src1, imm:$src2))] @@ -434,6 +493,11 @@ def KILP : Pat < (MASK_WRITE (KILLGT (f32 ONE), (f32 ZERO))) >; +def KIL : Pat < + (int_AMDGPU_kill R600_Reg32:$src0), + (MASK_WRITE (KILLGT (f32 ZERO), (f32 R600_Reg32:$src0))) +>; + /* Helper classes for common instructions */ class MUL_LIT_Common <bits<32> inst> : R600_3OP < @@ -470,6 +534,15 @@ class DOT4_Common <bits<32> inst> : R600_REDUCTION < [(set R600_Reg32:$dst, (int_AMDGPU_dp4 R600_Reg128:$src0, R600_Reg128:$src1))] >; +class CUBE_Common <bits<32> inst> : InstR600 < + inst, + (outs R600_Reg128:$dst), + (ins R600_Reg128:$src), + "CUBE $dst $src", + [(set R600_Reg128:$dst, (int_AMDGPU_cube R600_Reg128:$src))], + VecALU +>; + class EXP_IEEE_Common <bits<32> inst> : R600_1OP < inst, "EXP_IEEE", []> { @@ -509,6 +582,12 @@ class LSHR_Common <bits<32> inst> : R600_2OP < let AMDILOp = AMDILInst.USHR_i32; } +class ASHR_Common <bits<32> inst> : R600_2OP < + inst, "ASHR $dst, $src0, $src1", + [] >{ + let AMDILOp = AMDILInst.SHR_i32; +} + class MULHI_INT_Common <bits<32> inst> : R600_2OP < inst, "MULHI_INT $dst, $src0, $src1", [] >{ @@ -608,6 +687,7 @@ let Gen = AMDGPUGen.R600 in { def CNDGT_r600 : CNDGT_Common<0x19>; def CNDGE_r600 : CNDGE_Common<0x1A>; def DOT4_r600 : DOT4_Common<0x50>; + def CUBE_r600 : CUBE_Common<0x52>; def EXP_IEEE_r600 : EXP_IEEE_Common<0x61>; def LOG_CLAMPED_r600 : LOG_CLAMPED_Common<0x62>; def LOG_IEEE_r600 : LOG_IEEE_Common<0x63>; @@ -619,6 +699,7 @@ let Gen = AMDGPUGen.R600 in { def INT_TO_FLT_r600 : INT_TO_FLT_Common<0x6c>; def SIN_r600 : SIN_Common<0x6E>; def COS_r600 : COS_Common<0x6F>; + def ASHR_r600 : ASHR_Common<0x70>; def LSHR_r600 : LSHR_Common<0x71>; def LSHL_r600 : LSHL_Common<0x72>; def MULLO_INT_r600 : MULLO_INT_Common<0x73>; @@ -661,20 +742,12 @@ def RAT_WRITE_CACHELESS_eg : EG_CF_RAT <0x57, 0x2, (outs), (ins R600_TReg32_X:$rw_gpr, R600_TReg32_X:$index_gpr, i32imm:$rat_id), ""> { -/* - let Inst{3-0} = RAT_ID; - let Inst{21-15} = RW_GPR; - let Inst{29-23} = INDEX_GPR; - /* Propery of the UAV */ - let Inst{31-30} = ELEM_SIZE; -*/ let RIM = 0; /* XXX: Have a separate instruction for non-indexed writes. */ let TYPE = 1; let RW_REL = 0; let ELEM_SIZE = 0; -/* let ARRAY_SIZE = 0; let COMP_MASK = 1; let BURST_COUNT = 0; @@ -682,7 +755,6 @@ def RAT_WRITE_CACHELESS_eg : let EOP = 0; let MARK = 0; let BARRIER = 1; -*/ } def VTX_READ_eg : InstR600ISA < (outs R600_TReg32_X:$dst), @@ -789,6 +861,7 @@ class TRIG_eg <InstR600 trig, Intrinsic intr> : Pat< let Gen = AMDGPUGen.EG_CAYMAN in { def MULADD_eg : MULADD_Common<0x14>; + def ASHR_eg : ASHR_Common<0x15>; def LSHR_eg : LSHR_Common<0x16>; def LSHL_eg : LSHL_Common<0x17>; def CNDE_eg : CNDE_Common<0x19>; @@ -812,6 +885,7 @@ let Gen = AMDGPUGen.EG_CAYMAN in { def RECIP_UINT_eg : RECIP_UINT_Common<0x94>; def INT_TO_FLT_eg : INT_TO_FLT_Common<0x9B>; def DOT4_eg : DOT4_Common<0xBE>; + def CUBE_eg : CUBE_Common<0xC0>; } // End AMDGPUGen.EG_CAYMAN @@ -905,6 +979,34 @@ def LOCAL_SIZE_Y : R600PreloadInst <"LOCAL_SIZE_Y", def LOCAL_SIZE_Z : R600PreloadInst <"LOCAL_SIZE_Z", int_r600_read_local_size_z>; +def R600_LOAD_CONST : AMDGPUShaderInst < + (outs R600_Reg32:$dst), + (ins i32imm:$src0), + "R600_LOAD_CONST $dst, $src0", + [(set R600_Reg32:$dst, (int_AMDGPU_load_const imm:$src0))] +>; + +def LOAD_INPUT : AMDGPUShaderInst < + (outs R600_Reg32:$dst), + (ins i32imm:$src), + "LOAD_INPUT $dst, $src", + [(set R600_Reg32:$dst, (int_R600_load_input imm:$src))] +>; + +def RESERVE_REG : AMDGPUShaderInst < + (outs), + (ins i32imm:$src), + "RESERVE_REG $src", + [(int_AMDGPU_reserve_reg imm:$src)] +>; + +def STORE_OUTPUT: AMDGPUShaderInst < + (outs), + (ins R600_Reg32:$src0, i32imm:$src1), + "STORE_OUTPUT $src0, $src1", + [(int_AMDGPU_store_output R600_Reg32:$src0, imm:$src1)] +>; + } // End usesCustomInserter = 1, isPseudo = 1 } // End isCodeGenOnly = 1 @@ -933,15 +1035,14 @@ def : Insert_Element <f32, v4f32, R600_Reg32, R600_Reg128, 5, sel_y>; def : Insert_Element <f32, v4f32, R600_Reg32, R600_Reg128, 6, sel_z>; def : Insert_Element <f32, v4f32, R600_Reg32, R600_Reg128, 7, sel_w>; +def : Extract_Element <i32, v4i32, R600_Reg128, 0, sel_x>; +def : Extract_Element <i32, v4i32, R600_Reg128, 1, sel_y>; +def : Extract_Element <i32, v4i32, R600_Reg128, 2, sel_z>; +def : Extract_Element <i32, v4i32, R600_Reg128, 3, sel_w>; -include "R600ShaderPatterns.td" - -// We need this pattern to avoid having real registers in PHI nodes. -// For some reason this pattern only works when it comes after the other -// instruction defs. -def : Pat < - (int_R600_load_input imm:$src), - (LOAD_INPUT imm:$src) ->; +def : Insert_Element <i32, v4i32, R600_Reg32, R600_Reg128, 4, sel_x>; +def : Insert_Element <i32, v4i32, R600_Reg32, R600_Reg128, 5, sel_y>; +def : Insert_Element <i32, v4i32, R600_Reg32, R600_Reg128, 6, sel_z>; +def : Insert_Element <i32, v4i32, R600_Reg32, R600_Reg128, 7, sel_w>; } // End isR600toCayman Predicate diff --git a/src/gallium/drivers/radeon/R600Intrinsics.td b/src/gallium/drivers/radeon/R600IntrinsicsNoOpenCL.td index 8038fee1a3c..73ef4aae234 100644 --- a/src/gallium/drivers/radeon/R600Intrinsics.td +++ b/src/gallium/drivers/radeon/R600IntrinsicsNoOpenCL.td @@ -1,4 +1,4 @@ -//===-- R600Intrinsics.td - TODO: Add brief description -------===// +//===-- R600Intrinsics.td - R600 Instrinsic defs -------*- tablegen -*-----===// // // The LLVM Compiler Infrastructure // @@ -7,7 +7,7 @@ // //===----------------------------------------------------------------------===// // -// TODO: Add full description +// R600 Intrinsic Definitions // //===----------------------------------------------------------------------===// diff --git a/src/gallium/drivers/radeon/R600InstrFormats.td b/src/gallium/drivers/radeon/R600IntrinsicsOpenCL.td index 0890eb64509..cd761358475 100644 --- a/src/gallium/drivers/radeon/R600InstrFormats.td +++ b/src/gallium/drivers/radeon/R600IntrinsicsOpenCL.td @@ -1,4 +1,4 @@ -//===-- R600InstrFormats.td - TODO: Add brief description -------===// +//===-- R600Intrinsics.td - TODO: Add brief description -------===// // // The LLVM Compiler Infrastructure // @@ -11,6 +11,6 @@ // //===----------------------------------------------------------------------===// - -class ALUInst <bits<10> op, dag outs, dag ins, string asm, list<dag> pattern> - : InstR600 <, outs, ins , asm, pattern> +let TargetPrefix = "R600", isTarget = 1 in { + def int_R600_load_input : Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrReadWriteArgMem]>; +} diff --git a/src/gallium/drivers/radeon/R600KernelParameters.cpp b/src/gallium/drivers/radeon/R600KernelParameters.cpp index 3fdf48a2bf2..53bfebc7364 100644 --- a/src/gallium/drivers/radeon/R600KernelParameters.cpp +++ b/src/gallium/drivers/radeon/R600KernelParameters.cpp @@ -1,4 +1,4 @@ -//===-- R600KernelParameters.cpp - TODO: Add brief description -------===// +//===-- R600KernelParameters.cpp - Lower kernel function arguments --------===// // // The LLVM Compiler Infrastructure // @@ -7,89 +7,83 @@ // //===----------------------------------------------------------------------===// // -// TODO: Add full description +// This pass lowers kernel function arguments to loads from the vertex buffer. +// +// Kernel arguemnts are stored in the vertex buffer at an offset of 9 dwords, +// so arg0 needs to be loaded from VTX_BUFFER[9] and arg1 is loaded from +// VTX_BUFFER[10], etc. // //===----------------------------------------------------------------------===// -#include <llvm-c/Core.h> -#include "R600KernelParameters.h" -#include "R600OpenCLUtils.h" +#include "AMDGPU.h" +#include "AMDIL.h" +#include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/Constants.h" +#include "llvm/Function.h" #include "llvm/Intrinsics.h" +#include "llvm/Metadata.h" +#include "llvm/Module.h" +#include "llvm/Target/TargetData.h" #include "llvm/Support/IRBuilder.h" #include "llvm/Support/TypeBuilder.h" -// #include "llvm/CodeGen/Function.h" - -namespace AMDILAS { -enum AddressSpaces { - PRIVATE_ADDRESS = 0, // Address space for private memory. - GLOBAL_ADDRESS = 1, // Address space for global memory (RAT0, VTX0). - CONSTANT_ADDRESS = 2, // Address space for constant memory. - LOCAL_ADDRESS = 3, // Address space for local memory. - REGION_ADDRESS = 4, // Address space for region memory. - ADDRESS_NONE = 5, // Address space for unknown memory. - PARAM_D_ADDRESS = 6, // Address space for direct addressible parameter memory (CONST0) - PARAM_I_ADDRESS = 7, // Address space for indirect addressible parameter memory (VTX1) - LAST_ADDRESS = 8 -}; -} - #include <map> #include <set> using namespace llvm; -using namespace std; + +namespace { #define CONSTANT_CACHE_SIZE_DW 127 -class R600KernelParameters : public llvm::FunctionPass +class R600KernelParameters : public FunctionPass { - const llvm::TargetData * TD; + const TargetData * TD; LLVMContext* Context; Module *mod; - + struct param { - param() : val(NULL), ptr_val(NULL), offset_in_dw(0), size_in_dw(0), indirect(false), specialID(0) {} - - llvm::Value* val; - llvm::Value* ptr_val; + param() : val(NULL), ptr_val(NULL), offset_in_dw(0), size_in_dw(0), + indirect(false), specialID(0) {} + + Value* val; + Value* ptr_val; int offset_in_dw; int size_in_dw; bool indirect; - - string specialType; + + std::string specialType; int specialID; - + int end() { return offset_in_dw + size_in_dw; } - /* The first 9 dwords are reserved for the grid sizes. */ + // The first 9 dwords are reserved for the grid sizes. int get_rat_offset() { return 9 + offset_in_dw; } }; std::vector<param> params; - int getLastSpecialID(const string& TypeName); - + bool isOpenCLKernel(const Function* fun); + int getLastSpecialID(const std::string& TypeName); + int getListSize(); - void AddParam(llvm::Argument* arg); - int calculateArgumentSize(llvm::Argument* arg); - void RunAna(llvm::Function* fun); - void Replace(llvm::Function* fun); - bool isIndirect(Value* val, set<Value*>& visited); - void Propagate(llvm::Function* fun); - void Propagate(llvm::Value* v, const llvm::Twine& name, bool indirect = false); + void AddParam(Argument* arg); + int calculateArgumentSize(Argument* arg); + void RunAna(Function* fun); + void Replace(Function* fun); + bool isIndirect(Value* val, std::set<Value*>& visited); + void Propagate(Function* fun); + void Propagate(Value* v, const Twine& name, bool indirect = false); Value* ConstantRead(Function* fun, param& p); Value* handleSpecial(Function* fun, param& p); bool isSpecialType(Type*); - string getSpecialTypeName(Type*); + std::string getSpecialTypeName(Type*); public: static char ID; R600KernelParameters() : FunctionPass(ID) {}; - R600KernelParameters(const llvm::TargetData* TD) : FunctionPass(ID), TD(TD) {} -// bool runOnFunction (llvm::Function &F); - bool runOnFunction (llvm::Function &F); + R600KernelParameters(const TargetData* TD) : FunctionPass(ID), TD(TD) {} + bool runOnFunction (Function &F); void getAnalysisUsage(AnalysisUsage &AU) const; const char *getPassName() const; bool doInitialization(Module &M); @@ -98,13 +92,42 @@ public: char R600KernelParameters::ID = 0; -static RegisterPass<R600KernelParameters> X("kerparam", "OpenCL Kernel Parameter conversion", false, false); +static RegisterPass<R600KernelParameters> X("kerparam", + "OpenCL Kernel Parameter conversion", false, false); -int R600KernelParameters::getLastSpecialID(const string& TypeName) +bool R600KernelParameters::isOpenCLKernel(const Function* fun) +{ + Module *mod = const_cast<Function*>(fun)->getParent(); + NamedMDNode * md = mod->getOrInsertNamedMetadata("opencl.kernels"); + + if (!md or !md->getNumOperands()) + { + return false; + } + + for (int i = 0; i < int(md->getNumOperands()); i++) + { + if (!md->getOperand(i) or !md->getOperand(i)->getOperand(0)) + { + continue; + } + + assert(md->getOperand(i)->getNumOperands() == 1); + + if (md->getOperand(i)->getOperand(0)->getName() == fun->getName()) + { + return true; + } + } + + return false; +} + +int R600KernelParameters::getLastSpecialID(const std::string& TypeName) { int lastID = -1; - - for (vector<param>::iterator i = params.begin(); i != params.end(); i++) + + for (std::vector<param>::iterator i = params.begin(); i != params.end(); i++) { if (i->specialType == TypeName) { @@ -125,7 +148,7 @@ int R600KernelParameters::getListSize() return params.back().end(); } -bool R600KernelParameters::isIndirect(Value* val, set<Value*>& visited) +bool R600KernelParameters::isIndirect(Value* val, std::set<Value*>& visited) { if (isa<LoadInst>(val)) { @@ -144,7 +167,7 @@ bool R600KernelParameters::isIndirect(Value* val, set<Value*>& visited) } visited.insert(val); - + if (isa<GetElementPtrInst>(val)) { GetElementPtrInst* GEP = dyn_cast<GetElementPtrInst>(val); @@ -158,7 +181,7 @@ bool R600KernelParameters::isIndirect(Value* val, set<Value*>& visited) } } } - + for (Value::use_iterator i = val->use_begin(); i != val->use_end(); i++) { Value* v2 = dyn_cast<Value>(*i); @@ -175,24 +198,24 @@ bool R600KernelParameters::isIndirect(Value* val, set<Value*>& visited) return false; } -void R600KernelParameters::AddParam(llvm::Argument* arg) +void R600KernelParameters::AddParam(Argument* arg) { param p; - + p.val = dyn_cast<Value>(arg); p.offset_in_dw = getListSize(); p.size_in_dw = calculateArgumentSize(arg); if (isa<PointerType>(arg->getType()) and arg->hasByValAttr()) { - set<Value*> visited; + std::set<Value*> visited; p.indirect = isIndirect(p.val, visited); } - + params.push_back(p); } -int R600KernelParameters::calculateArgumentSize(llvm::Argument* arg) +int R600KernelParameters::calculateArgumentSize(Argument* arg) { Type* t = arg->getType(); @@ -200,16 +223,16 @@ int R600KernelParameters::calculateArgumentSize(llvm::Argument* arg) { t = dyn_cast<PointerType>(t)->getElementType(); } - + int store_size_in_dw = (TD->getTypeStoreSize(t) + 3)/4; assert(store_size_in_dw); - + return store_size_in_dw; } -void R600KernelParameters::RunAna(llvm::Function* fun) +void R600KernelParameters::RunAna(Function* fun) { assert(isOpenCLKernel(fun)); @@ -220,7 +243,7 @@ void R600KernelParameters::RunAna(llvm::Function* fun) } -void R600KernelParameters::Replace(llvm::Function* fun) +void R600KernelParameters::Replace(Function* fun) { for (std::vector<param>::iterator i = params.begin(); i != params.end(); i++) { @@ -237,11 +260,11 @@ void R600KernelParameters::Replace(llvm::Function* fun) if (new_val) { i->val->replaceAllUsesWith(new_val); - } + } } } -void R600KernelParameters::Propagate(llvm::Function* fun) +void R600KernelParameters::Propagate(Function* fun) { for (std::vector<param>::iterator i = params.begin(); i != params.end(); i++) { @@ -256,8 +279,8 @@ void R600KernelParameters::Propagate(Value* v, const Twine& name, bool indirect) { LoadInst* load = dyn_cast<LoadInst>(v); GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(v); - - unsigned addrspace; + + unsigned addrspace; if (indirect) { @@ -274,49 +297,54 @@ void R600KernelParameters::Propagate(Value* v, const Twine& name, bool indirect) if (dyn_cast<PointerType>(op->getType())->getAddressSpace() != addrspace) { - op = new BitCastInst(op, PointerType::get(dyn_cast<PointerType>(op->getType())->getElementType(), addrspace), name, dyn_cast<Instruction>(v)); + op = new BitCastInst(op, PointerType::get(dyn_cast<PointerType>( + op->getType())->getElementType(), addrspace), + name, dyn_cast<Instruction>(v)); } - vector<Value*> params(GEP->idx_begin(), GEP->idx_end()); - - GetElementPtrInst* GEP2 = GetElementPtrInst::Create(op, params, name, dyn_cast<Instruction>(v)); + std::vector<Value*> params(GEP->idx_begin(), GEP->idx_end()); + + GetElementPtrInst* GEP2 = GetElementPtrInst::Create(op, params, name, + dyn_cast<Instruction>(v)); GEP2->setIsInBounds(GEP->isInBounds()); v = dyn_cast<Value>(GEP2); GEP->replaceAllUsesWith(GEP2); GEP->eraseFromParent(); load = NULL; } - + if (load) { - if (load->getPointerAddressSpace() != addrspace) ///normally at this point we have the right address space + ///normally at this point we have the right address space + if (load->getPointerAddressSpace() != addrspace) { Value *orig_ptr = load->getPointerOperand(); PointerType *orig_ptr_type = dyn_cast<PointerType>(orig_ptr->getType()); - - Type* new_ptr_type = PointerType::get(orig_ptr_type->getElementType(), addrspace); + + Type* new_ptr_type = PointerType::get(orig_ptr_type->getElementType(), + addrspace); Value* new_ptr = orig_ptr; - + if (orig_ptr->getType() != new_ptr_type) { new_ptr = new BitCastInst(orig_ptr, new_ptr_type, "prop_cast", load); } - + Value* new_load = new LoadInst(new_ptr, name, load); load->replaceAllUsesWith(new_load); load->eraseFromParent(); } - + return; } - vector<User*> users(v->use_begin(), v->use_end()); - + std::vector<User*> users(v->use_begin(), v->use_end()); + for (int i = 0; i < int(users.size()); i++) { Value* v2 = dyn_cast<Value>(users[i]); - + if (v2) { Propagate(v2, name, indirect); @@ -327,7 +355,7 @@ void R600KernelParameters::Propagate(Value* v, const Twine& name, bool indirect) Value* R600KernelParameters::ConstantRead(Function* fun, param& p) { assert(fun->front().begin() != fun->front().end()); - + Instruction *first_inst = fun->front().begin(); IRBuilder <> builder (first_inst); /* First 3 dwords are reserved for the dimmension info */ @@ -346,43 +374,54 @@ Value* R600KernelParameters::ConstantRead(Function* fun, param& p) { addrspace = AMDILAS::PARAM_D_ADDRESS; } - + Argument *arg = dyn_cast<Argument>(p.val); Type * argType = p.val->getType(); PointerType * argPtrType = dyn_cast<PointerType>(p.val->getType()); - + if (argPtrType and arg->hasByValAttr()) { - Value* param_addr_space_ptr = ConstantPointerNull::get(PointerType::get(Type::getInt32Ty(*Context), addrspace)); - Value* param_ptr = GetElementPtrInst::Create(param_addr_space_ptr, ConstantInt::get(Type::getInt32Ty(*Context), p.get_rat_offset()), arg->getName(), first_inst); - param_ptr = new BitCastInst(param_ptr, PointerType::get(argPtrType->getElementType(), addrspace), arg->getName(), first_inst); + Value* param_addr_space_ptr = ConstantPointerNull::get( + PointerType::get(Type::getInt32Ty(*Context), + addrspace)); + Value* param_ptr = GetElementPtrInst::Create(param_addr_space_ptr, + ConstantInt::get(Type::getInt32Ty(*Context), + p.get_rat_offset()), arg->getName(), + first_inst); + param_ptr = new BitCastInst(param_ptr, + PointerType::get(argPtrType->getElementType(), + addrspace), + arg->getName(), first_inst); p.ptr_val = param_ptr; return param_ptr; } else { - Value* param_addr_space_ptr = ConstantPointerNull::get(PointerType::get(argType, addrspace)); - + Value* param_addr_space_ptr = ConstantPointerNull::get(PointerType::get( + argType, addrspace)); + Value* param_ptr = builder.CreateGEP(param_addr_space_ptr, - ConstantInt::get(Type::getInt32Ty(*Context), p.get_rat_offset()), arg->getName()); - + ConstantInt::get(Type::getInt32Ty(*Context), p.get_rat_offset()), + arg->getName()); + Value* param_value = builder.CreateLoad(param_ptr, arg->getName()); - + return param_value; } } Value* R600KernelParameters::handleSpecial(Function* fun, param& p) { - string name = getSpecialTypeName(p.val->getType()); + std::string name = getSpecialTypeName(p.val->getType()); int ID; assert(!name.empty()); - + if (name == "image2d_t" or name == "image3d_t") { - int lastID = max(getLastSpecialID("image2d_t"), getLastSpecialID("image3d_t")); - + int lastID = std::max(getLastSpecialID("image2d_t"), + getLastSpecialID("image3d_t")); + if (lastID == -1) { ID = 2; ///ID0 and ID1 are used internally by the driver @@ -403,20 +442,22 @@ Value* R600KernelParameters::handleSpecial(Function* fun, param& p) else { ID = lastID + 1; - } + } } else { ///TODO: give some error message return NULL; } - + p.specialType = name; p.specialID = ID; Instruction *first_inst = fun->front().begin(); - return new IntToPtrInst(ConstantInt::get(Type::getInt32Ty(*Context), p.specialID), p.val->getType(), "resourceID", first_inst); + return new IntToPtrInst(ConstantInt::get(Type::getInt32Ty(*Context), + p.specialID), p.val->getType(), + "resourceID", first_inst); } @@ -425,7 +466,7 @@ bool R600KernelParameters::isSpecialType(Type* t) return !getSpecialTypeName(t).empty(); } -string R600KernelParameters::getSpecialTypeName(Type* t) +std::string R600KernelParameters::getSpecialTypeName(Type* t) { PointerType *pt = dyn_cast<PointerType>(t); StructType *st = NULL; @@ -437,9 +478,9 @@ string R600KernelParameters::getSpecialTypeName(Type* t) if (st) { - string prefix = "struct.opencl_builtin_type_"; - - string name = st->getName().str(); + std::string prefix = "struct.opencl_builtin_type_"; + + std::string name = st->getName().str(); if (name.substr(0, prefix.length()) == prefix) { @@ -458,19 +499,15 @@ bool R600KernelParameters::runOnFunction (Function &F) return false; } -// F.dump(); - RunAna(&F); Replace(&F); Propagate(&F); - - mod->dump(); + return false; } void R600KernelParameters::getAnalysisUsage(AnalysisUsage &AU) const { -// AU.addRequired<FunctionAnalysis>(); FunctionPass::getAnalysisUsage(AU); AU.setPreservesAll(); } @@ -484,7 +521,7 @@ bool R600KernelParameters::doInitialization(Module &M) { Context = &M.getContext(); mod = &M; - + return false; } @@ -493,10 +530,12 @@ bool R600KernelParameters::doFinalization(Module &M) return false; } -llvm::FunctionPass* createR600KernelParametersPass(const llvm::TargetData* TD) +} // End anonymous namespace + +FunctionPass* llvm::createR600KernelParametersPass(const TargetData* TD) { FunctionPass *p = new R600KernelParameters(TD); - + return p; } diff --git a/src/gallium/drivers/radeon/R600KernelParameters.h b/src/gallium/drivers/radeon/R600KernelParameters.h deleted file mode 100644 index 904a469a5f0..00000000000 --- a/src/gallium/drivers/radeon/R600KernelParameters.h +++ /dev/null @@ -1,28 +0,0 @@ -//===-- R600KernelParameters.h - TODO: Add brief description -------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// TODO: Add full description -// -//===----------------------------------------------------------------------===// - -#ifndef KERNELPARAMETERS_H -#define KERNELPARAMETERS_H - -#include "llvm/CodeGen/MachineFunctionPass.h" -#include "llvm/Function.h" -#include "llvm/Pass.h" -#include "llvm/Target/TargetData.h" -#include "llvm/Value.h" - -#include <vector> - -llvm::FunctionPass* createR600KernelParametersPass(const llvm::TargetData* TD); - - -#endif diff --git a/src/gallium/drivers/radeon/R600LowerInstructions.cpp b/src/gallium/drivers/radeon/R600LowerInstructions.cpp index fb5431d0eef..dca1fe195cc 100644 --- a/src/gallium/drivers/radeon/R600LowerInstructions.cpp +++ b/src/gallium/drivers/radeon/R600LowerInstructions.cpp @@ -1,4 +1,4 @@ -//===-- R600LowerInstructions.cpp - TODO: Add brief description -------===// +//===-- R600LowerInstructions.cpp - Lower unsupported AMDIL instructions --===// // // The LLVM Compiler Infrastructure // @@ -7,7 +7,8 @@ // //===----------------------------------------------------------------------===// // -// TODO: Add full description +// This pass lowers AMDIL MachineInstrs that aren't supported by the R600 +// target to either supported AMDIL MachineInstrs or R600 MachineInstrs. // //===----------------------------------------------------------------------===// @@ -93,8 +94,8 @@ bool R600LowerInstructionsPass::runOnMachineFunction(MachineFunction &MF) &AMDIL::R600_TReg32RegClass); BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(AMDIL::SETGE_INT), setgt) - .addOperand(MI.getOperand(1)) - .addReg(AMDIL::ZERO); + .addReg(AMDIL::ZERO) + .addOperand(MI.getOperand(1)); unsigned add_int = MRI->createVirtualRegister( &AMDIL::R600_TReg32RegClass); @@ -311,7 +312,8 @@ bool R600LowerInstructionsPass::runOnMachineFunction(MachineFunction &MF) MachineInstr * defInstr = MRI->getVRegDef(maskedRegister); MachineOperand * def = defInstr->findRegisterDefOperand(maskedRegister); def->addTargetFlag(MO_FLAG_MASK); - break; + /* Continue so the instruction is not erased */ + continue; } case AMDIL::NEGATE_i32: @@ -342,6 +344,13 @@ bool R600LowerInstructionsPass::runOnMachineFunction(MachineFunction &MF) break; } + case AMDIL::ULT: + BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(AMDIL::SETGT_UINT)) + .addOperand(MI.getOperand(0)) + .addOperand(MI.getOperand(2)) + .addOperand(MI.getOperand(1)); + break; + default: continue; } diff --git a/src/gallium/drivers/radeon/R600LowerShaderInstructions.cpp b/src/gallium/drivers/radeon/R600LowerShaderInstructions.cpp deleted file mode 100644 index 394ee7006ce..00000000000 --- a/src/gallium/drivers/radeon/R600LowerShaderInstructions.cpp +++ /dev/null @@ -1,143 +0,0 @@ -//===-- R600LowerShaderInstructions.cpp - TODO: Add brief description -------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// TODO: Add full description -// -//===----------------------------------------------------------------------===// - -#include "AMDGPU.h" -#include "AMDGPULowerShaderInstructions.h" -#include "AMDIL.h" -#include "AMDILInstrInfo.h" -#include "llvm/CodeGen/MachineFunctionPass.h" -#include "llvm/CodeGen/MachineInstrBuilder.h" -#include "llvm/CodeGen/MachineRegisterInfo.h" - -using namespace llvm; - -namespace { - class R600LowerShaderInstructionsPass : public MachineFunctionPass, - public AMDGPULowerShaderInstructionsPass { - - private: - static char ID; - TargetMachine &TM; - - void lowerEXPORT_REG_FAKE(MachineInstr &MI, MachineBasicBlock &MBB, - MachineBasicBlock::iterator I); - void lowerLOAD_INPUT(MachineInstr & MI); - bool lowerSTORE_OUTPUT(MachineInstr & MI, MachineBasicBlock &MBB, - MachineBasicBlock::iterator I); - - public: - R600LowerShaderInstructionsPass(TargetMachine &tm) : - MachineFunctionPass(ID), TM(tm) { } - - bool runOnMachineFunction(MachineFunction &MF); - - const char *getPassName() const { return "R600 Lower Shader Instructions"; } - }; -} /* End anonymous namespace */ - -char R600LowerShaderInstructionsPass::ID = 0; - -FunctionPass *llvm::createR600LowerShaderInstructionsPass(TargetMachine &tm) { - return new R600LowerShaderInstructionsPass(tm); -} - -#define INSTR_CASE_FLOAT_V(inst) \ - case AMDIL:: inst##_v4f32: \ - -#define INSTR_CASE_FLOAT_S(inst) \ - case AMDIL:: inst##_f32: - -#define INSTR_CASE_FLOAT(inst) \ - INSTR_CASE_FLOAT_V(inst) \ - INSTR_CASE_FLOAT_S(inst) -bool R600LowerShaderInstructionsPass::runOnMachineFunction(MachineFunction &MF) -{ - MRI = &MF.getRegInfo(); - - - for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end(); - BB != BB_E; ++BB) { - MachineBasicBlock &MBB = *BB; - for (MachineBasicBlock::iterator I = MBB.begin(); I != MBB.end();) { - MachineInstr &MI = *I; - bool deleteInstr = false; - switch (MI.getOpcode()) { - - default: break; - - case AMDIL::RESERVE_REG: - case AMDIL::EXPORT_REG: - deleteInstr = true; - break; - - case AMDIL::LOAD_INPUT: - lowerLOAD_INPUT(MI); - deleteInstr = true; - break; - - case AMDIL::STORE_OUTPUT: - deleteInstr = lowerSTORE_OUTPUT(MI, MBB, I); - break; - - } - - ++I; - - if (deleteInstr) { - MI.eraseFromParent(); - } - } - } - - return false; -} - -/* The goal of this function is to replace the virutal destination register of - * a LOAD_INPUT instruction with the correct physical register that will. - * - * XXX: I don't think this is the right way things assign physical registers, - * but I'm not sure of another way to do this. - */ -void R600LowerShaderInstructionsPass::lowerLOAD_INPUT(MachineInstr &MI) -{ - MachineOperand &dst = MI.getOperand(0); - MachineOperand &arg = MI.getOperand(1); - int64_t inputIndex = arg.getImm(); - const TargetRegisterClass * inputClass = TM.getRegisterInfo()->getRegClass(AMDIL::R600_TReg32RegClassID); - unsigned newRegister = inputClass->getRegister(inputIndex); - unsigned dstReg = dst.getReg(); - - preloadRegister(MI.getParent()->getParent(), TM.getInstrInfo(), newRegister, - dstReg); -} - -bool R600LowerShaderInstructionsPass::lowerSTORE_OUTPUT(MachineInstr &MI, - MachineBasicBlock &MBB, MachineBasicBlock::iterator I) -{ - MachineOperand &valueOp = MI.getOperand(1); - MachineOperand &indexOp = MI.getOperand(2); - unsigned valueReg = valueOp.getReg(); - int64_t outputIndex = indexOp.getImm(); - const TargetRegisterClass * outputClass = TM.getRegisterInfo()->getRegClass(AMDIL::R600_TReg32RegClassID); - unsigned newRegister = outputClass->getRegister(outputIndex); - - BuildMI(MBB, I, MBB.findDebugLoc(I), TM.getInstrInfo()->get(AMDIL::COPY), - newRegister) - .addReg(valueReg); - - if (!MRI->isLiveOut(newRegister)) - MRI->addLiveOut(newRegister); - - return true; - -} diff --git a/src/gallium/drivers/radeon/R600MachineFunctionInfo.cpp b/src/gallium/drivers/radeon/R600MachineFunctionInfo.cpp new file mode 100644 index 00000000000..48443fb57d8 --- /dev/null +++ b/src/gallium/drivers/radeon/R600MachineFunctionInfo.cpp @@ -0,0 +1,16 @@ +//===-- R600MachineFunctionInfo.cpp - R600 Machine Function Info-*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "R600MachineFunctionInfo.h" + +using namespace llvm; + +R600MachineFunctionInfo::R600MachineFunctionInfo(const MachineFunction &MF) + : MachineFunctionInfo() + { } diff --git a/src/gallium/drivers/radeon/R600MachineFunctionInfo.h b/src/gallium/drivers/radeon/R600MachineFunctionInfo.h new file mode 100644 index 00000000000..948e1924272 --- /dev/null +++ b/src/gallium/drivers/radeon/R600MachineFunctionInfo.h @@ -0,0 +1,33 @@ +//===-- R600MachineFunctionInfo.h - R600 Machine Function Info ----*- C++ -*-=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// R600MachineFunctionInfo is used for keeping track of which registers have +// been reserved by the llvm.AMDGPU.reserve.reg intrinsic. +// +//===----------------------------------------------------------------------===// + +#ifndef R600MACHINEFUNCTIONINFO_H +#define R600MACHINEFUNCTIONINFO_H + +#include "llvm/CodeGen/MachineFunction.h" +#include <vector> + +namespace llvm { + +class R600MachineFunctionInfo : public MachineFunctionInfo { + +public: + R600MachineFunctionInfo(const MachineFunction &MF); + std::vector<unsigned> ReservedRegs; + +}; + +} // End llvm namespace + +#endif //R600MACHINEFUNCTIONINFO_H diff --git a/src/gallium/drivers/radeon/R600OpenCLUtils.h b/src/gallium/drivers/radeon/R600OpenCLUtils.h deleted file mode 100644 index 91e41d63d0d..00000000000 --- a/src/gallium/drivers/radeon/R600OpenCLUtils.h +++ /dev/null @@ -1,49 +0,0 @@ -//===-- OpenCLUtils.h - TODO: Add brief description -------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// TODO: Add full description -// -//===----------------------------------------------------------------------===// -#ifndef OPENCLUTILS_H -#define OPENCLUTILS_H - -#include "llvm/Function.h" - -#include <llvm/Module.h> - -static bool isOpenCLKernel(const llvm::Function* fun) -{ - llvm::Module *mod = const_cast<llvm::Function*>(fun)->getParent(); - llvm::NamedMDNode * md = mod->getOrInsertNamedMetadata("opencl.kernels"); - - if (!md or !md->getNumOperands()) - { - return false; - } - - for (int i = 0; i < int(md->getNumOperands()); i++) - { - if (!md->getOperand(i) or !md->getOperand(i)->getOperand(0)) - { - continue; - } - - assert(md->getOperand(i)->getNumOperands() == 1); - - if (md->getOperand(i)->getOperand(0)->getName() == fun->getName()) - { - return true; - } - } - - return false; -} - - -#endif diff --git a/src/gallium/drivers/radeon/R600RegisterInfo.cpp b/src/gallium/drivers/radeon/R600RegisterInfo.cpp index 96507b104cf..de559bd2dfa 100644 --- a/src/gallium/drivers/radeon/R600RegisterInfo.cpp +++ b/src/gallium/drivers/radeon/R600RegisterInfo.cpp @@ -1,4 +1,4 @@ -//===-- R600RegisterInfo.cpp - TODO: Add brief description -------===// +//===-- R600RegisterInfo.cpp - R600 Register Information ------------------===// // // The LLVM Compiler Infrastructure // @@ -7,12 +7,13 @@ // //===----------------------------------------------------------------------===// // -// TODO: Add full description +// The file contains the R600 implementation of the TargetRegisterInfo class. // //===----------------------------------------------------------------------===// #include "R600RegisterInfo.h" #include "AMDGPUTargetMachine.h" +#include "R600MachineFunctionInfo.h" using namespace llvm; @@ -26,6 +27,8 @@ R600RegisterInfo::R600RegisterInfo(AMDGPUTargetMachine &tm, BitVector R600RegisterInfo::getReservedRegs(const MachineFunction &MF) const { BitVector Reserved(getNumRegs()); + const R600MachineFunctionInfo * MFI = MF.getInfo<R600MachineFunctionInfo>(); + Reserved.set(AMDIL::ZERO); Reserved.set(AMDIL::HALF); Reserved.set(AMDIL::ONE); @@ -40,19 +43,11 @@ BitVector R600RegisterInfo::getReservedRegs(const MachineFunction &MF) const Reserved.set(*I); } - for (MachineFunction::const_iterator BB = MF.begin(), - BB_E = MF.end(); BB != BB_E; ++BB) { - const MachineBasicBlock &MBB = *BB; - for (MachineBasicBlock::const_iterator I = MBB.begin(), E = MBB.end(); - I != E; ++I) { - const MachineInstr &MI = *I; - if (MI.getOpcode() == AMDIL::RESERVE_REG) { - if (!TargetRegisterInfo::isVirtualRegister(MI.getOperand(0).getReg())) { - Reserved.set(MI.getOperand(0).getReg()); - } - } - } + for (std::vector<unsigned>::const_iterator I = MFI->ReservedRegs.begin(), + E = MFI->ReservedRegs.end(); I != E; ++I) { + Reserved.set(*I); } + return Reserved; } diff --git a/src/gallium/drivers/radeon/R600RegisterInfo.h b/src/gallium/drivers/radeon/R600RegisterInfo.h index 95a44f971a0..89a11f9333b 100644 --- a/src/gallium/drivers/radeon/R600RegisterInfo.h +++ b/src/gallium/drivers/radeon/R600RegisterInfo.h @@ -1,4 +1,4 @@ -//===-- R600RegisterInfo.h - TODO: Add brief description -------===// +//===-- R600RegisterInfo.h - R600 Register Info Interface ------*- C++ -*--===// // // The LLVM Compiler Infrastructure // @@ -7,7 +7,7 @@ // //===----------------------------------------------------------------------===// // -// TODO: Add full description +// Interface definition for R600RegisterInfo // //===----------------------------------------------------------------------===// diff --git a/src/gallium/drivers/radeon/R600Schedule.td b/src/gallium/drivers/radeon/R600Schedule.td index c6b1ca61bb5..d1957903d87 100644 --- a/src/gallium/drivers/radeon/R600Schedule.td +++ b/src/gallium/drivers/radeon/R600Schedule.td @@ -1,4 +1,4 @@ -//===-- R600Schedule.td - TODO: Add brief description -------===// +//===-- R600Schedule.td - R600 Scheduling definitions ------*- tablegen -*-===// // // The LLVM Compiler Infrastructure // @@ -7,7 +7,9 @@ // //===----------------------------------------------------------------------===// // -// TODO: Add full description +// R600 has a VLIW architecture. On pre-cayman cards there are 5 instruction +// slots ALU.X, ALU.Y, ALU.Z, ALU.W, and TRANS. For cayman cards, the TRANS +// slot has been removed. // //===----------------------------------------------------------------------===// diff --git a/src/gallium/drivers/radeon/SIAssignInterpRegs.cpp b/src/gallium/drivers/radeon/SIAssignInterpRegs.cpp index b0bdf701a74..1ef097f7b1e 100644 --- a/src/gallium/drivers/radeon/SIAssignInterpRegs.cpp +++ b/src/gallium/drivers/radeon/SIAssignInterpRegs.cpp @@ -1,4 +1,4 @@ -//===-- SIAssignInterpRegs.cpp - TODO: Add brief description -------===// +//===-- SIAssignInterpRegs.cpp - Assign interpolation registers -----------===// // // The LLVM Compiler Infrastructure // @@ -7,7 +7,12 @@ // //===----------------------------------------------------------------------===// // -// TODO: Add full description +// This pass maps the pseudo interpolation registers to the correct physical +// registers. Prior to executing a fragment shader, the GPU loads interpolation +// parameters into physical registers. The specific physical register that each +// interpolation parameter ends up in depends on the type of the interpolation +// parameter as well as how many interpolation parameters are used by the +// shader. // //===----------------------------------------------------------------------===// diff --git a/src/gallium/drivers/radeon/SICodeEmitter.cpp b/src/gallium/drivers/radeon/SICodeEmitter.cpp index ad494fae7c6..6970d9f0875 100644 --- a/src/gallium/drivers/radeon/SICodeEmitter.cpp +++ b/src/gallium/drivers/radeon/SICodeEmitter.cpp @@ -1,4 +1,4 @@ -//===-- SICodeEmitter.cpp - TODO: Add brief description -------===// +//===-- SICodeEmitter.cpp - SI Code Emitter -------------------------------===// // // The LLVM Compiler Infrastructure // @@ -7,7 +7,8 @@ // //===----------------------------------------------------------------------===// // -// TODO: Add full description +// The SI code emitter produces machine code that can be executed directly on +// the GPU device. // //===----------------------------------------------------------------------===// @@ -144,8 +145,6 @@ bool SICodeEmitter::runOnMachineFunction(MachineFunction &MF) { MF.dump(); TM = &MF.getTarget(); - const AMDGPUInstrInfo * TII = - static_cast<const AMDGPUInstrInfo*>(TM->getInstrInfo()); emitState(MF); @@ -155,8 +154,7 @@ bool SICodeEmitter::runOnMachineFunction(MachineFunction &MF) for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end(); I != E; ++I) { MachineInstr &MI = *I; - if (!TII->isRegPreload(MI) && MI.getOpcode() != AMDIL::KILL - && MI.getOpcode() != AMDIL::RETURN) { + if (MI.getOpcode() != AMDIL::KILL && MI.getOpcode() != AMDIL::RETURN) { emitInstr(MI); } } diff --git a/src/gallium/drivers/radeon/SIGenRegisterInfo.pl b/src/gallium/drivers/radeon/SIGenRegisterInfo.pl index 644daa1bc22..bb5ebbd67e6 100644 --- a/src/gallium/drivers/radeon/SIGenRegisterInfo.pl +++ b/src/gallium/drivers/radeon/SIGenRegisterInfo.pl @@ -1,16 +1,17 @@ -#===-- SIGenRegisterInfo.pl - TODO: Add brief description -------===# +#===-- SIGenRegisterInfo.pl - Script for generating register info files ----===# # # The LLVM Compiler Infrastructure # # This file is distributed under the University of Illinois Open Source # License. See LICENSE.TXT for details. # -#===----------------------------------------------------------------------===# +#===------------------------------------------------------------------------===# # -# TODO: Add full description +# This perl script prints to stdout .td code to be used as SIRegisterInfo.td +# it also generates a file called SIHwRegInfo.include, which contains helper +# functions for determining the hw encoding of registers. # -#===----------------------------------------------------------------------===# - +#===------------------------------------------------------------------------===# use strict; use warnings; diff --git a/src/gallium/drivers/radeon/SIISelLowering.cpp b/src/gallium/drivers/radeon/SIISelLowering.cpp index 1a4b47ecbf5..441a4a07290 100644 --- a/src/gallium/drivers/radeon/SIISelLowering.cpp +++ b/src/gallium/drivers/radeon/SIISelLowering.cpp @@ -1,4 +1,4 @@ -//===-- SIISelLowering.cpp - TODO: Add brief description -------===// +//===-- SIISelLowering.cpp - SI DAG Lowering Implementation ---------------===// // // The LLVM Compiler Infrastructure // @@ -7,7 +7,8 @@ // //===----------------------------------------------------------------------===// // -// TODO: Add full description +// Most of the DAG lowering is handled in AMDILISelLowering.cpp. This file is +// mostly EmitInstrWithCustomInserter(). // //===----------------------------------------------------------------------===// diff --git a/src/gallium/drivers/radeon/SIISelLowering.h b/src/gallium/drivers/radeon/SIISelLowering.h index e7a79f8e215..229e682ef51 100644 --- a/src/gallium/drivers/radeon/SIISelLowering.h +++ b/src/gallium/drivers/radeon/SIISelLowering.h @@ -1,4 +1,4 @@ -//===-- SIISelLowering.h - TODO: Add brief description -------===// +//===-- SIISelLowering.h - SI DAG Lowering Interface ------------*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -7,7 +7,7 @@ // //===----------------------------------------------------------------------===// // -// TODO: Add full description +// SI DAG Lowering interface definition // //===----------------------------------------------------------------------===// diff --git a/src/gallium/drivers/radeon/SIInstrFormats.td b/src/gallium/drivers/radeon/SIInstrFormats.td index caf9b0ef120..de0d4fa39d2 100644 --- a/src/gallium/drivers/radeon/SIInstrFormats.td +++ b/src/gallium/drivers/radeon/SIInstrFormats.td @@ -1,4 +1,4 @@ -//===-- SIInstrFormats.td - TODO: Add brief description -------===// +//===-- SIInstrFormats.td - SI Instruction Formats ------------------------===// // // The LLVM Compiler Infrastructure // @@ -7,7 +7,17 @@ // //===----------------------------------------------------------------------===// // -// TODO: Add full description +// SI Instruction format definitions. +// +// Instructions with _32 take 32-bit operands. +// Instructions with _64 take 64-bit operands. +// +// VOP_* instructions can use either a 32-bit or 64-bit encoding. The 32-bit +// encoding is the standard encoding, but instruction that make use of +// any of the instruction modifiers must use the 64-bit encoding. +// +// Instructions with _e32 use the 32-bit encoding. +// Instructions with _e64 use the 64-bit encoding. // //===----------------------------------------------------------------------===// diff --git a/src/gallium/drivers/radeon/SIInstrInfo.cpp b/src/gallium/drivers/radeon/SIInstrInfo.cpp index 6f92e96c6e7..0cb97643a7f 100644 --- a/src/gallium/drivers/radeon/SIInstrInfo.cpp +++ b/src/gallium/drivers/radeon/SIInstrInfo.cpp @@ -1,4 +1,4 @@ -//===-- SIInstrInfo.cpp - TODO: Add brief description -------===// +//===-- SIInstrInfo.cpp - SI Instruction Information ---------------------===// // // The LLVM Compiler Infrastructure // @@ -7,7 +7,7 @@ // //===----------------------------------------------------------------------===// // -// TODO: Add full description +// SI Implementation of TargetInstrInfo. // //===----------------------------------------------------------------------===// @@ -107,6 +107,8 @@ unsigned SIInstrInfo::getISAOpcode(unsigned AMDILopcode) const { switch (AMDILopcode) { case AMDIL::MAD_f32: return AMDIL::V_MAD_LEGACY_F32; + //XXX We need a better way of detecting end of program + case AMDIL::RETURN: return AMDIL::S_ENDPGM; default: return AMDGPUInstrInfo::getISAOpcode(AMDILopcode); } } diff --git a/src/gallium/drivers/radeon/SIInstrInfo.h b/src/gallium/drivers/radeon/SIInstrInfo.h index bd76c3f94aa..68940ea3ca4 100644 --- a/src/gallium/drivers/radeon/SIInstrInfo.h +++ b/src/gallium/drivers/radeon/SIInstrInfo.h @@ -1,4 +1,4 @@ -//===-- SIInstrInfo.h - TODO: Add brief description -------===// +//===-- SIInstrInfo.h - SI Instruction Info Interface ---------------------===// // // The LLVM Compiler Infrastructure // @@ -7,7 +7,7 @@ // //===----------------------------------------------------------------------===// // -// TODO: Add full description +// Interface definition for SIInstrInfo. // //===----------------------------------------------------------------------===// diff --git a/src/gallium/drivers/radeon/SIInstrInfo.td b/src/gallium/drivers/radeon/SIInstrInfo.td index 65b28ec84ad..435948ff1de 100644 --- a/src/gallium/drivers/radeon/SIInstrInfo.td +++ b/src/gallium/drivers/radeon/SIInstrInfo.td @@ -1,4 +1,4 @@ -//===-- SIInstrInfo.td - TODO: Add brief description -------===// +//===-- SIInstrInfo.td - SI Instruction Encodings ---------*- tablegen -*--===// // // The LLVM Compiler Infrastructure // @@ -6,10 +6,6 @@ // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// -// -// TODO: Add full description -// -//===----------------------------------------------------------------------===// @@ -464,11 +460,4 @@ def IMM12bit : ImmLeaf < include "SIInstrFormats.td" -def LOAD_CONST : AMDGPUShaderInst < - (outs GPRF32:$dst), - (ins i32imm:$src), - "LOAD_CONST $dst, $src", - [(set GPRF32:$dst, (int_AMDGPU_load_const imm:$src))] ->; - include "SIInstructions.td" diff --git a/src/gallium/drivers/radeon/SIInstructions.td b/src/gallium/drivers/radeon/SIInstructions.td index 27a8b31757a..57bbc7a5d5a 100644 --- a/src/gallium/drivers/radeon/SIInstructions.td +++ b/src/gallium/drivers/radeon/SIInstructions.td @@ -1,4 +1,4 @@ -//===-- SIInstructions.td - TODO: Add brief description -------===// +//===-- SIInstructions.td - SI Instruction Defintions ---------------------===// // // The LLVM Compiler Infrastructure // @@ -6,10 +6,6 @@ // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// -// -// TODO: Add full description -// -//===----------------------------------------------------------------------===// def isSI : Predicate<"Subtarget.device()" @@ -800,6 +796,13 @@ def CONFIG_WRITE : InstSI < field bits<32> Inst = 0; } +def LOAD_CONST : AMDGPUShaderInst < + (outs GPRF32:$dst), + (ins i32imm:$src), + "LOAD_CONST $dst, $src", + [(set GPRF32:$dst, (int_AMDGPU_load_const imm:$src))] +>; + let usesCustomInserter = 1 in { def SI_V_CNDLT : InstSI < @@ -833,7 +836,6 @@ def USE_SGPR_32 : InstSI < > { field bits<32> Inst = 0; - let PreloadReg = 1; } def USE_SGPR_64 : InstSI < @@ -844,7 +846,6 @@ def USE_SGPR_64 : InstSI < > { field bits<32> Inst = 0; - let PreloadReg = 1; } def VS_LOAD_BUFFER_INDEX : InstSI < @@ -854,7 +855,6 @@ def VS_LOAD_BUFFER_INDEX : InstSI < [(set VReg_32:$dst, (int_SI_vs_load_buffer_index))]> { field bits<32> Inst = 0; - let PreloadReg = 1; } } // end usesCustomInserter diff --git a/src/gallium/drivers/radeon/SIIntrinsics.td b/src/gallium/drivers/radeon/SIIntrinsics.td index e3014e13916..4d23072d4f1 100644 --- a/src/gallium/drivers/radeon/SIIntrinsics.td +++ b/src/gallium/drivers/radeon/SIIntrinsics.td @@ -1,4 +1,4 @@ -//===-- SIIntrinsics.td - TODO: Add brief description -------===// +//===-- SIIntrinsics.td - SI Intrinsic defs ----------------*- tablegen -*-===// // // The LLVM Compiler Infrastructure // @@ -7,7 +7,7 @@ // //===----------------------------------------------------------------------===// // -// TODO: Add full description +// SI Intrinsic Definitions // //===----------------------------------------------------------------------===// diff --git a/src/gallium/drivers/radeon/SILowerShaderInstructions.cpp b/src/gallium/drivers/radeon/SILowerShaderInstructions.cpp deleted file mode 100644 index 5d49d88dc7c..00000000000 --- a/src/gallium/drivers/radeon/SILowerShaderInstructions.cpp +++ /dev/null @@ -1,90 +0,0 @@ -//===-- SILowerShaderInstructions.cpp - TODO: Add brief description -------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// TODO: Add full description -// -//===----------------------------------------------------------------------===// - - -#include "AMDGPU.h" -#include "AMDGPULowerShaderInstructions.h" -#include "llvm/CodeGen/MachineFunctionPass.h" -#include "llvm/CodeGen/MachineInstrBuilder.h" -#include "llvm/CodeGen/MachineRegisterInfo.h" - -using namespace llvm; - -namespace { - class SILowerShaderInstructionsPass : public MachineFunctionPass, - public AMDGPULowerShaderInstructionsPass { - - private: - static char ID; - TargetMachine &TM; - - public: - SILowerShaderInstructionsPass(TargetMachine &tm) : - MachineFunctionPass(ID), TM(tm) { } - - bool runOnMachineFunction(MachineFunction &MF); - - const char *getPassName() const { return "SI Lower Shader Instructions"; } - - void lowerRETURN(MachineBasicBlock &MBB, MachineBasicBlock::iterator I); - void lowerSET_M0(MachineInstr &MI, MachineBasicBlock &MBB, - MachineBasicBlock::iterator I); - }; -} /* End anonymous namespace */ - -char SILowerShaderInstructionsPass::ID = 0; - -FunctionPass *llvm::createSILowerShaderInstructionsPass(TargetMachine &tm) { - return new SILowerShaderInstructionsPass(tm); -} - -bool SILowerShaderInstructionsPass::runOnMachineFunction(MachineFunction &MF) -{ - MRI = &MF.getRegInfo(); - for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end(); - BB != BB_E; ++BB) { - MachineBasicBlock &MBB = *BB; - for (MachineBasicBlock::iterator I = MBB.begin(), Next = llvm::next(I); - I != MBB.end(); I = Next, Next = llvm::next(I) ) { - MachineInstr &MI = *I; - switch (MI.getOpcode()) { - case AMDIL::RETURN: - lowerRETURN(MBB, I); - break; - case AMDIL::SET_M0: - lowerSET_M0(MI, MBB, I); - break; - default: continue; - } - MI.removeFromParent(); - } - } - - return false; -} - -void SILowerShaderInstructionsPass::lowerRETURN(MachineBasicBlock &MBB, - MachineBasicBlock::iterator I) -{ - const struct TargetInstrInfo * TII = TM.getInstrInfo(); - BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(AMDIL::S_ENDPGM)); -} - -void SILowerShaderInstructionsPass::lowerSET_M0(MachineInstr &MI, - MachineBasicBlock &MBB, MachineBasicBlock::iterator I) -{ - const struct TargetInstrInfo * TII = TM.getInstrInfo(); - BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(AMDIL::S_MOV_IMM_I32)) - .addReg(AMDIL::M0) - .addOperand(MI.getOperand(1)); -} diff --git a/src/gallium/drivers/radeon/SIMachineFunctionInfo.cpp b/src/gallium/drivers/radeon/SIMachineFunctionInfo.cpp index eace40c226c..40ba76f1f86 100644 --- a/src/gallium/drivers/radeon/SIMachineFunctionInfo.cpp +++ b/src/gallium/drivers/radeon/SIMachineFunctionInfo.cpp @@ -1,4 +1,4 @@ -//===-- SIMachineFunctionInfo.cpp - TODO: Add brief description -------===// +//===-- SIMachineFunctionInfo.cpp - SI Machine Function Info -------===// // // The LLVM Compiler Infrastructure // @@ -6,10 +6,6 @@ // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// -// -// TODO: Add full description -// -//===----------------------------------------------------------------------===// #include "SIMachineFunctionInfo.h" diff --git a/src/gallium/drivers/radeon/SIMachineFunctionInfo.h b/src/gallium/drivers/radeon/SIMachineFunctionInfo.h index 5647de9d81f..46a021f3613 100644 --- a/src/gallium/drivers/radeon/SIMachineFunctionInfo.h +++ b/src/gallium/drivers/radeon/SIMachineFunctionInfo.h @@ -1,4 +1,4 @@ -//===-- SIMachineFunctionInfo.h - TODO: Add brief description -------===// +//===- SIMachineFunctionInfo.h - SIMachineFunctionInfo interface -*- C++ -*-==// // // The LLVM Compiler Infrastructure // @@ -7,7 +7,9 @@ // //===----------------------------------------------------------------------===// // -// TODO: Add full description +// SIMachineFunctionInfo is used to keep track of the spi_sp_input_addr config +// register, which is to tell the hardware which interpolation parameters to +// load. // //===----------------------------------------------------------------------===// diff --git a/src/gallium/drivers/radeon/SIPropagateImmReads.cpp b/src/gallium/drivers/radeon/SIPropagateImmReads.cpp index 4f925d5de1c..6a165488831 100644 --- a/src/gallium/drivers/radeon/SIPropagateImmReads.cpp +++ b/src/gallium/drivers/radeon/SIPropagateImmReads.cpp @@ -1,4 +1,4 @@ -//===-- SIPropagateImmReads.cpp - TODO: Add brief description -------===// +//===-- SIPropagateImmReads.cpp - Lower Immediate Reads Pass --------------===// // // The LLVM Compiler Infrastructure // @@ -7,7 +7,9 @@ // //===----------------------------------------------------------------------===// // -// TODO: Add full description +// We can't do this in the ConvertToISA pass, because later passes might +// create LOADCONST_* instructions that we would miss. This is why we need +// a separate pass for this. // //===----------------------------------------------------------------------===// diff --git a/src/gallium/drivers/radeon/SIRegisterInfo.cpp b/src/gallium/drivers/radeon/SIRegisterInfo.cpp index da2ec36a773..2d530a4f022 100644 --- a/src/gallium/drivers/radeon/SIRegisterInfo.cpp +++ b/src/gallium/drivers/radeon/SIRegisterInfo.cpp @@ -1,4 +1,4 @@ -//===-- SIRegisterInfo.cpp - TODO: Add brief description -------===// +//===-- SIRegisterInfo.cpp - SI Register Information ---------------------===// // // The LLVM Compiler Infrastructure // @@ -7,7 +7,7 @@ // //===----------------------------------------------------------------------===// // -// TODO: Add full description +// This file contains the SI implementation of the TargetRegisterInfo class. // //===----------------------------------------------------------------------===// diff --git a/src/gallium/drivers/radeon/SIRegisterInfo.h b/src/gallium/drivers/radeon/SIRegisterInfo.h index c797e3c8ace..77f3261efc5 100644 --- a/src/gallium/drivers/radeon/SIRegisterInfo.h +++ b/src/gallium/drivers/radeon/SIRegisterInfo.h @@ -1,4 +1,4 @@ -//===-- SIRegisterInfo.h - TODO: Add brief description -------===// +//===-- SIRegisterInfo.h - SI Register Info Interface ----------*- C++ -*--===// // // The LLVM Compiler Infrastructure // @@ -7,7 +7,7 @@ // //===----------------------------------------------------------------------===// // -// TODO: Add full description +// Interface definition for SIRegisterInfo // //===----------------------------------------------------------------------===// diff --git a/src/gallium/drivers/radeon/SISchedule.td b/src/gallium/drivers/radeon/SISchedule.td index 9e99268e9ca..28b65b82585 100644 --- a/src/gallium/drivers/radeon/SISchedule.td +++ b/src/gallium/drivers/radeon/SISchedule.td @@ -1,4 +1,4 @@ -//===-- SISchedule.td - TODO: Add brief description -------===// +//===-- SISchedule.td - SI Scheduling definitons -------------------------===// // // The LLVM Compiler Infrastructure // @@ -7,7 +7,7 @@ // //===----------------------------------------------------------------------===// // -// TODO: Add full description +// TODO: This is just a place holder for now. // //===----------------------------------------------------------------------===// diff --git a/src/gallium/drivers/radeon/radeon_llvm.h b/src/gallium/drivers/radeon/radeon_llvm.h index 9be7f90c3e6..4a706397fdd 100644 --- a/src/gallium/drivers/radeon/radeon_llvm.h +++ b/src/gallium/drivers/radeon/radeon_llvm.h @@ -36,6 +36,8 @@ #define RADEON_LLVM_MAX_BRANCH_DEPTH 16 #define RADEON_LLVM_MAX_LOOP_DEPTH 16 +#define RADEON_LLVM_MAX_SYSTEM_VALUES 4 + struct radeon_llvm_branch { LLVMBasicBlockRef endif_block; LLVMBasicBlockRef if_block; @@ -78,6 +80,9 @@ struct radeon_llvm_context { unsigned input_index, const struct tgsi_full_declaration *decl); + void (*load_system_value)(struct radeon_llvm_context *, + unsigned index, + const struct tgsi_full_declaration *decl); /** User data to use with the callbacks */ void * userdata; @@ -90,6 +95,8 @@ struct radeon_llvm_context { LLVMValueRef outputs[RADEON_LLVM_MAX_OUTPUTS][TGSI_NUM_CHANNELS]; unsigned output_reg_count; + LLVMValueRef system_values[RADEON_LLVM_MAX_SYSTEM_VALUES]; + unsigned reserved_reg_count; /*=== Private Members ===*/ @@ -105,6 +112,37 @@ struct radeon_llvm_context { struct gallivm_state gallivm; }; +static inline LLVMValueRef bitcast( + struct lp_build_tgsi_context * bld_base, + enum tgsi_opcode_type type, + LLVMValueRef value +) +{ + LLVMBuilderRef builder = bld_base->base.gallivm->builder; + LLVMContextRef ctx = bld_base->base.gallivm->context; + LLVMTypeRef dst_type; + + switch (type) { + case TGSI_TYPE_UNSIGNED: + case TGSI_TYPE_SIGNED: + dst_type = LLVMInt32TypeInContext(ctx); + break; + case TGSI_TYPE_UNTYPED: + case TGSI_TYPE_FLOAT: + dst_type = LLVMFloatTypeInContext(ctx); + break; + default: + dst_type = 0; + break; + } + + if (dst_type) + return LLVMBuildBitCast(builder, value, dst_type, ""); + else + return value; +} + + void radeon_llvm_context_init(struct radeon_llvm_context * ctx); void radeon_llvm_dispose(struct radeon_llvm_context * ctx); diff --git a/src/gallium/drivers/radeon/radeon_llvm_emit.cpp b/src/gallium/drivers/radeon/radeon_llvm_emit.cpp index b409cb2175e..ebc32106b52 100644 --- a/src/gallium/drivers/radeon/radeon_llvm_emit.cpp +++ b/src/gallium/drivers/radeon/radeon_llvm_emit.cpp @@ -93,19 +93,20 @@ radeon_llvm_compile(LLVMModuleRef M, unsigned char ** bytes, AMDGPUTriple.setArch(Arch); Module * mod = unwrap(M); - std::string FS = gpu_family; + std::string FS; TargetOptions TO; + if (dump) { + mod->dump(); + FS += "+DumpCode"; + } + std::auto_ptr<TargetMachine> tm(AMDGPUTarget->createTargetMachine( - AMDGPUTriple.getTriple(), gpu_family, "" /* Features */, + AMDGPUTriple.getTriple(), gpu_family, FS, TO, Reloc::Default, CodeModel::Default, CodeGenOpt::Default )); TargetMachine &AMDGPUTargetMachine = *tm.get(); - /* XXX: Use TargetMachine.Options in 3.0 */ - if (dump) { - mod->dump(); - } PassManager PM; PM.add(new TargetData(*AMDGPUTargetMachine.getTargetData())); PM.add(createPromoteMemoryToRegisterPass()); diff --git a/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c b/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c index 62de9da28de..6e6fc3d12cd 100644 --- a/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c +++ b/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c @@ -29,6 +29,7 @@ #include "gallivm/lp_bld_gather.h" #include "gallivm/lp_bld_flow.h" #include "gallivm/lp_bld_init.h" +#include "gallivm/lp_bld_intr.h" #include "gallivm/lp_bld_swizzle.h" #include "tgsi/tgsi_info.h" #include "tgsi/tgsi_parse.h" @@ -112,8 +113,25 @@ emit_fetch_immediate( enum tgsi_opcode_type type, unsigned swizzle) { + LLVMTypeRef ctype; + LLVMContextRef ctx = bld_base->base.gallivm->context; + + switch (type) { + case TGSI_TYPE_UNSIGNED: + case TGSI_TYPE_SIGNED: + ctype = LLVMInt32TypeInContext(ctx); + break; + case TGSI_TYPE_UNTYPED: + case TGSI_TYPE_FLOAT: + ctype = LLVMFloatTypeInContext(ctx); + break; + default: + ctype = 0; + break; + } + struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base); - return bld->immediates[reg->Register.Index][swizzle]; + return LLVMConstBitCast(bld->immediates[reg->Register.Index][swizzle], ctype); } static LLVMValueRef @@ -134,7 +152,7 @@ emit_fetch_input( return lp_build_gather_values(bld_base->base.gallivm, values, TGSI_NUM_CHANNELS); } else { - return ctx->inputs[radeon_llvm_reg_index_soa(reg->Register.Index, swizzle)]; + return bitcast(bld_base, type, ctx->inputs[radeon_llvm_reg_index_soa(reg->Register.Index, swizzle)]); } } @@ -155,7 +173,7 @@ emit_fetch_temporary( } else { LLVMValueRef temp_ptr; temp_ptr = lp_get_temp_ptr_soa(bld, reg->Register.Index, swizzle); - return LLVMBuildLoad(builder, temp_ptr, ""); + return bitcast(bld_base,type,LLVMBuildLoad(builder, temp_ptr, "")); } } @@ -213,6 +231,15 @@ static void emit_declaration( } break; + case TGSI_FILE_SYSTEM_VALUE: + { + unsigned idx; + for (idx = decl->Range.First; idx <= decl->Range.Last; idx++) { + ctx->load_system_value(ctx, idx, decl); + } + } + break; + case TGSI_FILE_OUTPUT: { unsigned idx; @@ -304,6 +331,9 @@ emit_store( default: return; } + + value = bitcast(bld_base, TGSI_TYPE_FLOAT, value); + LLVMBuildStore(builder, value, temp_ptr); } } @@ -444,8 +474,10 @@ static void if_emit( struct gallivm_state * gallivm = bld_base->base.gallivm; LLVMValueRef cond; LLVMBasicBlockRef if_block, else_block, endif_block; - cond = LLVMBuildFCmp(gallivm->builder, LLVMRealOEQ, emit_data->args[0], - bld_base->base.one, ""); + + cond = LLVMBuildICmp(gallivm->builder, LLVMIntNE, + bitcast(bld_base, TGSI_TYPE_UNSIGNED, emit_data->args[0]), + bld_base->int_bld.zero, ""); endif_block = LLVMAppendBasicBlockInContext(gallivm->context, ctx->main_fn, "ENDIF"); @@ -463,6 +495,101 @@ static void if_emit( ctx->branch[ctx->branch_depth - 1].has_else = 0; } +static void kil_emit( + const struct lp_build_tgsi_action * action, + struct lp_build_tgsi_context * bld_base, + struct lp_build_emit_data * emit_data) +{ + unsigned i; + for (i = 0; i < emit_data->arg_count; i++) { + emit_data->output[i] = lp_build_intrinsic_unary( + bld_base->base.gallivm->builder, + action->intr_name, + emit_data->dst_type, emit_data->args[i]); + } +} + + +static void emit_prepare_cube_coords( + struct lp_build_tgsi_context * bld_base, + struct lp_build_emit_data * emit_data) +{ + boolean shadowcube = (emit_data->inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE); + struct gallivm_state * gallivm = bld_base->base.gallivm; + LLVMBuilderRef builder = gallivm->builder; + LLVMTypeRef type = bld_base->base.elem_type; + LLVMValueRef coords[4]; + LLVMValueRef mad_args[3]; + unsigned i, cnt; + + LLVMValueRef v = lp_build_intrinsic(builder, "llvm.AMDGPU.cube", + LLVMVectorType(type, 4), + &emit_data->args[0],1); + + /* save src.w for shadow cube */ + cnt = shadowcube ? 3 : 4; + + for (i = 0; i < cnt; ++i) { + LLVMValueRef idx = lp_build_const_int32(gallivm, i); + coords[i] = LLVMBuildExtractElement(builder, v, idx, ""); + } + + coords[2] = lp_build_intrinsic(builder, "llvm.AMDIL.fabs.", + type, &coords[2], 1); + coords[2] = lp_build_intrinsic(builder, "llvm.AMDGPU.rcp", + type, &coords[2], 1); + + mad_args[1] = coords[2]; + mad_args[2] = LLVMConstReal(type, 1.5); + + mad_args[0] = coords[0]; + coords[0] = lp_build_intrinsic(builder, "llvm.AMDIL.mad.", + type, mad_args, 3); + + mad_args[0] = coords[1]; + coords[1] = lp_build_intrinsic(builder, "llvm.AMDIL.mad.", + type, mad_args, 3); + + /* apply yxwy swizzle to cooords */ + coords[2] = coords[3]; + coords[3] = coords[1]; + coords[1] = coords[0]; + coords[0] = coords[3]; + + emit_data->args[0] = lp_build_gather_values(bld_base->base.gallivm, + coords, 4); +} + +static void txp_fetch_args( + struct lp_build_tgsi_context * bld_base, + struct lp_build_emit_data * emit_data) +{ + const struct tgsi_full_instruction * inst = emit_data->inst; + LLVMValueRef src_w; + unsigned chan; + LLVMValueRef coords[4]; + + emit_data->dst_type = LLVMVectorType(bld_base->base.elem_type, 4); + src_w = lp_build_emit_fetch(bld_base, emit_data->inst, 0, TGSI_CHAN_W); + + for (chan = 0; chan < 3; chan++ ) { + LLVMValueRef arg = lp_build_emit_fetch(bld_base, + emit_data->inst, 0, chan); + coords[chan] = lp_build_emit_llvm_binary(bld_base, + TGSI_OPCODE_DIV, arg, src_w); + } + coords[3] = bld_base->base.one; + emit_data->args[0] = lp_build_gather_values(bld_base->base.gallivm, + coords, 4); + emit_data->arg_count = 1; + + if ((inst->Texture.Texture == TGSI_TEXTURE_CUBE || + inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE) && + inst->Instruction.Opcode != TGSI_OPCODE_TXQ) { + emit_prepare_cube_coords(bld_base, emit_data); + } +} + static void tex_fetch_args( struct lp_build_tgsi_context * bld_base, struct lp_build_emit_data * emit_data) @@ -475,16 +602,261 @@ static void tex_fetch_args( */ + const struct tgsi_full_instruction * inst = emit_data->inst; + LLVMValueRef coords[4]; unsigned chan; for (chan = 0; chan < 4; chan++) { - coords[chan] = lp_build_emit_fetch(bld_base, emit_data->inst, 0, chan); + coords[chan] = lp_build_emit_fetch(bld_base, inst, 0, chan); } emit_data->arg_count = 1; emit_data->args[0] = lp_build_gather_values(bld_base->base.gallivm, coords, 4); emit_data->dst_type = LLVMVectorType(bld_base->base.elem_type, 4); + + if ((inst->Texture.Texture == TGSI_TEXTURE_CUBE || + inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE) && + inst->Instruction.Opcode != TGSI_OPCODE_TXQ) { + emit_prepare_cube_coords(bld_base, emit_data); + } +} + +static void emit_icmp( + const struct lp_build_tgsi_action * action, + struct lp_build_tgsi_context * bld_base, + struct lp_build_emit_data * emit_data) +{ + unsigned pred; + LLVMBuilderRef builder = bld_base->base.gallivm->builder; + LLVMContextRef context = bld_base->base.gallivm->context; + + switch (emit_data->inst->Instruction.Opcode) { + case TGSI_OPCODE_USEQ: pred = LLVMIntEQ; break; + case TGSI_OPCODE_USNE: pred = LLVMIntNE; break; + case TGSI_OPCODE_USGE: pred = LLVMIntUGE; break; + case TGSI_OPCODE_USLT: pred = LLVMIntULT; break; + case TGSI_OPCODE_ISGE: pred = LLVMIntSGE; break; + case TGSI_OPCODE_ISLT: pred = LLVMIntSLT; break; + default: + assert(!"unknown instruction"); + } + + LLVMValueRef v = LLVMBuildICmp(builder, pred, + emit_data->args[0], emit_data->args[1],""); + + v = LLVMBuildSExtOrBitCast(builder, v, + LLVMInt32TypeInContext(context), ""); + + emit_data->output[emit_data->chan] = v; +} + +static void emit_not( + const struct lp_build_tgsi_action * action, + struct lp_build_tgsi_context * bld_base, + struct lp_build_emit_data * emit_data) +{ + LLVMBuilderRef builder = bld_base->base.gallivm->builder; + LLVMValueRef v = bitcast(bld_base, TGSI_TYPE_UNSIGNED, + emit_data->args[0]); + emit_data->output[emit_data->chan] = LLVMBuildNot(builder, v, ""); +} + +static void emit_and( + const struct lp_build_tgsi_action * action, + struct lp_build_tgsi_context * bld_base, + struct lp_build_emit_data * emit_data) +{ + LLVMBuilderRef builder = bld_base->base.gallivm->builder; + emit_data->output[emit_data->chan] = LLVMBuildAnd(builder, + emit_data->args[0], emit_data->args[1], ""); +} + +static void emit_or( + const struct lp_build_tgsi_action * action, + struct lp_build_tgsi_context * bld_base, + struct lp_build_emit_data * emit_data) +{ + LLVMBuilderRef builder = bld_base->base.gallivm->builder; + emit_data->output[emit_data->chan] = LLVMBuildOr(builder, + emit_data->args[0], emit_data->args[1], ""); +} + +static void emit_uadd( + const struct lp_build_tgsi_action * action, + struct lp_build_tgsi_context * bld_base, + struct lp_build_emit_data * emit_data) +{ + LLVMBuilderRef builder = bld_base->base.gallivm->builder; + emit_data->output[emit_data->chan] = LLVMBuildAdd(builder, + emit_data->args[0], emit_data->args[1], ""); +} + +static void emit_udiv( + const struct lp_build_tgsi_action * action, + struct lp_build_tgsi_context * bld_base, + struct lp_build_emit_data * emit_data) +{ + LLVMBuilderRef builder = bld_base->base.gallivm->builder; + emit_data->output[emit_data->chan] = LLVMBuildUDiv(builder, + emit_data->args[0], emit_data->args[1], ""); +} + +static void emit_idiv( + const struct lp_build_tgsi_action * action, + struct lp_build_tgsi_context * bld_base, + struct lp_build_emit_data * emit_data) +{ + LLVMBuilderRef builder = bld_base->base.gallivm->builder; + emit_data->output[emit_data->chan] = LLVMBuildSDiv(builder, + emit_data->args[0], emit_data->args[1], ""); +} + +static void emit_mod( + const struct lp_build_tgsi_action * action, + struct lp_build_tgsi_context * bld_base, + struct lp_build_emit_data * emit_data) +{ + LLVMBuilderRef builder = bld_base->base.gallivm->builder; + emit_data->output[emit_data->chan] = LLVMBuildSRem(builder, + emit_data->args[0], emit_data->args[1], ""); +} + +static void emit_umod( + const struct lp_build_tgsi_action * action, + struct lp_build_tgsi_context * bld_base, + struct lp_build_emit_data * emit_data) +{ + LLVMBuilderRef builder = bld_base->base.gallivm->builder; + emit_data->output[emit_data->chan] = LLVMBuildURem(builder, + emit_data->args[0], emit_data->args[1], ""); +} + +static void emit_shl( + const struct lp_build_tgsi_action * action, + struct lp_build_tgsi_context * bld_base, + struct lp_build_emit_data * emit_data) +{ + LLVMBuilderRef builder = bld_base->base.gallivm->builder; + emit_data->output[emit_data->chan] = LLVMBuildShl(builder, + emit_data->args[0], emit_data->args[1], ""); +} + +static void emit_ushr( + const struct lp_build_tgsi_action * action, + struct lp_build_tgsi_context * bld_base, + struct lp_build_emit_data * emit_data) +{ + LLVMBuilderRef builder = bld_base->base.gallivm->builder; + emit_data->output[emit_data->chan] = LLVMBuildLShr(builder, + emit_data->args[0], emit_data->args[1], ""); +} +static void emit_ishr( + const struct lp_build_tgsi_action * action, + struct lp_build_tgsi_context * bld_base, + struct lp_build_emit_data * emit_data) +{ + LLVMBuilderRef builder = bld_base->base.gallivm->builder; + emit_data->output[emit_data->chan] = LLVMBuildAShr(builder, + emit_data->args[0], emit_data->args[1], ""); +} + +static void emit_xor( + const struct lp_build_tgsi_action * action, + struct lp_build_tgsi_context * bld_base, + struct lp_build_emit_data * emit_data) +{ + LLVMBuilderRef builder = bld_base->base.gallivm->builder; + emit_data->output[emit_data->chan] = LLVMBuildXor(builder, + emit_data->args[0], emit_data->args[1], ""); +} + +static void emit_ssg( + const struct lp_build_tgsi_action * action, + struct lp_build_tgsi_context * bld_base, + struct lp_build_emit_data * emit_data) +{ + LLVMBuilderRef builder = bld_base->base.gallivm->builder; + + LLVMValueRef cmp, val; + + if (emit_data->inst->Instruction.Opcode == TGSI_OPCODE_ISSG) { + cmp = LLVMBuildICmp(builder, LLVMIntSGT, emit_data->args[0], bld_base->int_bld.zero, ""); + val = LLVMBuildSelect(builder, cmp, bld_base->int_bld.one, emit_data->args[0], ""); + cmp = LLVMBuildICmp(builder, LLVMIntSGE, val, bld_base->int_bld.zero, ""); + val = LLVMBuildSelect(builder, cmp, val, LLVMConstInt(bld_base->int_bld.elem_type, -1, true), ""); + } else { // float SSG + cmp = LLVMBuildFCmp(builder, LLVMRealUGT, emit_data->args[0], bld_base->int_bld.zero, ""); + val = LLVMBuildSelect(builder, cmp, bld_base->base.one, emit_data->args[0], ""); + cmp = LLVMBuildFCmp(builder, LLVMRealUGE, val, bld_base->base.zero, ""); + val = LLVMBuildSelect(builder, cmp, val, LLVMConstReal(bld_base->base.elem_type, -1), ""); + } + + emit_data->output[emit_data->chan] = val; +} + +static void emit_ineg( + const struct lp_build_tgsi_action * action, + struct lp_build_tgsi_context * bld_base, + struct lp_build_emit_data * emit_data) +{ + LLVMBuilderRef builder = bld_base->base.gallivm->builder; + emit_data->output[emit_data->chan] = LLVMBuildNeg(builder, + emit_data->args[0], ""); +} + +static void emit_f2i( + const struct lp_build_tgsi_action * action, + struct lp_build_tgsi_context * bld_base, + struct lp_build_emit_data * emit_data) +{ + LLVMBuilderRef builder = bld_base->base.gallivm->builder; + emit_data->output[emit_data->chan] = LLVMBuildFPToSI(builder, + emit_data->args[0], bld_base->int_bld.elem_type, ""); +} + +static void emit_f2u( + const struct lp_build_tgsi_action * action, + struct lp_build_tgsi_context * bld_base, + struct lp_build_emit_data * emit_data) +{ + LLVMBuilderRef builder = bld_base->base.gallivm->builder; + emit_data->output[emit_data->chan] = LLVMBuildFPToUI(builder, + emit_data->args[0], bld_base->uint_bld.elem_type, ""); +} + +static void emit_i2f( + const struct lp_build_tgsi_action * action, + struct lp_build_tgsi_context * bld_base, + struct lp_build_emit_data * emit_data) +{ + LLVMBuilderRef builder = bld_base->base.gallivm->builder; + emit_data->output[emit_data->chan] = LLVMBuildSIToFP(builder, + emit_data->args[0], bld_base->base.elem_type, ""); +} + +static void emit_u2f( + const struct lp_build_tgsi_action * action, + struct lp_build_tgsi_context * bld_base, + struct lp_build_emit_data * emit_data) +{ + LLVMBuilderRef builder = bld_base->base.gallivm->builder; + emit_data->output[emit_data->chan] = LLVMBuildUIToFP(builder, + emit_data->args[0], bld_base->base.elem_type, ""); +} + +static void emit_immediate(struct lp_build_tgsi_context * bld_base, + const struct tgsi_full_immediate *imm) +{ + unsigned i; + struct radeon_llvm_context * ctx = radeon_llvm_context(bld_base); + + for (i = 0; i < 4; ++i) { + ctx->soa.immediates[ctx->soa.num_immediates][i] = + LLVMConstInt(bld_base->uint_bld.elem_type, imm->u[i].Uint, false ); + } + + ctx->soa.num_immediates++; } void radeon_llvm_context_init(struct radeon_llvm_context * ctx) @@ -526,12 +898,13 @@ void radeon_llvm_context_init(struct radeon_llvm_context * ctx) lp_build_context_init(&bld_base->base, &ctx->gallivm, type); lp_build_context_init(&ctx->soa.bld_base.uint_bld, &ctx->gallivm, lp_uint_type(type)); + lp_build_context_init(&ctx->soa.bld_base.int_bld, &ctx->gallivm, lp_int_type(type)); bld_base->soa = 1; bld_base->emit_store = emit_store; bld_base->emit_swizzle = emit_swizzle; bld_base->emit_declaration = emit_declaration; - bld_base->emit_immediate = lp_emit_immediate_soa; + bld_base->emit_immediate = emit_immediate; bld_base->emit_fetch_funcs[TGSI_FILE_IMMEDIATE] = emit_fetch_immediate; bld_base->emit_fetch_funcs[TGSI_FILE_INPUT] = emit_fetch_input; @@ -545,6 +918,60 @@ void radeon_llvm_context_init(struct radeon_llvm_context * ctx) lp_set_default_actions(bld_base); + bld_base->op_actions[TGSI_OPCODE_IABS].emit = lp_build_tgsi_intrinsic; + bld_base->op_actions[TGSI_OPCODE_IABS].intr_name = "llvm.AMDIL.abs."; + bld_base->op_actions[TGSI_OPCODE_NOT].emit = emit_not; + bld_base->op_actions[TGSI_OPCODE_AND].emit = emit_and; + bld_base->op_actions[TGSI_OPCODE_XOR].emit = emit_xor; + bld_base->op_actions[TGSI_OPCODE_OR].emit = emit_or; + bld_base->op_actions[TGSI_OPCODE_UADD].emit = emit_uadd; + bld_base->op_actions[TGSI_OPCODE_UDIV].emit = emit_udiv; + bld_base->op_actions[TGSI_OPCODE_IDIV].emit = emit_idiv; + bld_base->op_actions[TGSI_OPCODE_MOD].emit = emit_mod; + bld_base->op_actions[TGSI_OPCODE_UMOD].emit = emit_umod; + bld_base->op_actions[TGSI_OPCODE_INEG].emit = emit_ineg; + bld_base->op_actions[TGSI_OPCODE_SHL].emit = emit_shl; + bld_base->op_actions[TGSI_OPCODE_ISHR].emit = emit_ishr; + bld_base->op_actions[TGSI_OPCODE_USHR].emit = emit_ushr; + bld_base->op_actions[TGSI_OPCODE_SSG].emit = emit_ssg; + bld_base->op_actions[TGSI_OPCODE_ISSG].emit = emit_ssg; + bld_base->op_actions[TGSI_OPCODE_I2F].emit = emit_i2f; + bld_base->op_actions[TGSI_OPCODE_U2F].emit = emit_u2f; + bld_base->op_actions[TGSI_OPCODE_F2I].emit = emit_f2i; + bld_base->op_actions[TGSI_OPCODE_F2U].emit = emit_f2u; + bld_base->op_actions[TGSI_OPCODE_DDX].intr_name = "llvm.AMDGPU.ddx"; + bld_base->op_actions[TGSI_OPCODE_DDX].fetch_args = tex_fetch_args; + bld_base->op_actions[TGSI_OPCODE_DDY].intr_name = "llvm.AMDGPU.ddy"; + bld_base->op_actions[TGSI_OPCODE_DDY].fetch_args = tex_fetch_args; + bld_base->op_actions[TGSI_OPCODE_USEQ].emit = emit_icmp; + bld_base->op_actions[TGSI_OPCODE_USGE].emit = emit_icmp; + bld_base->op_actions[TGSI_OPCODE_USLT].emit = emit_icmp; + bld_base->op_actions[TGSI_OPCODE_USNE].emit = emit_icmp; + bld_base->op_actions[TGSI_OPCODE_ISGE].emit = emit_icmp; + bld_base->op_actions[TGSI_OPCODE_ISLT].emit = emit_icmp; + bld_base->op_actions[TGSI_OPCODE_ROUND].emit = lp_build_tgsi_intrinsic; + bld_base->op_actions[TGSI_OPCODE_ROUND].intr_name = "llvm.AMDIL.round.nearest."; + bld_base->op_actions[TGSI_OPCODE_MIN].emit = lp_build_tgsi_intrinsic; + bld_base->op_actions[TGSI_OPCODE_MIN].intr_name = "llvm.AMDIL.min."; + bld_base->op_actions[TGSI_OPCODE_MAX].emit = lp_build_tgsi_intrinsic; + bld_base->op_actions[TGSI_OPCODE_MAX].intr_name = "llvm.AMDIL.max."; + bld_base->op_actions[TGSI_OPCODE_IMIN].emit = lp_build_tgsi_intrinsic; + bld_base->op_actions[TGSI_OPCODE_IMIN].intr_name = "llvm.AMDGPU.imin"; + bld_base->op_actions[TGSI_OPCODE_IMAX].emit = lp_build_tgsi_intrinsic; + bld_base->op_actions[TGSI_OPCODE_IMAX].intr_name = "llvm.AMDGPU.imax"; + bld_base->op_actions[TGSI_OPCODE_UMIN].emit = lp_build_tgsi_intrinsic; + bld_base->op_actions[TGSI_OPCODE_UMIN].intr_name = "llvm.AMDGPU.umin"; + bld_base->op_actions[TGSI_OPCODE_UMAX].emit = lp_build_tgsi_intrinsic; + bld_base->op_actions[TGSI_OPCODE_UMAX].intr_name = "llvm.AMDGPU.umax"; + bld_base->op_actions[TGSI_OPCODE_TXF].fetch_args = tex_fetch_args; + bld_base->op_actions[TGSI_OPCODE_TXF].intr_name = "llvm.AMDGPU.txf"; + bld_base->op_actions[TGSI_OPCODE_TXQ].fetch_args = tex_fetch_args; + bld_base->op_actions[TGSI_OPCODE_TXQ].intr_name = "llvm.AMDGPU.txq"; + bld_base->op_actions[TGSI_OPCODE_CEIL].emit = lp_build_tgsi_intrinsic; + bld_base->op_actions[TGSI_OPCODE_CEIL].intr_name = "llvm.AMDIL.round.neginf."; + + + bld_base->op_actions[TGSI_OPCODE_ABS].emit = lp_build_tgsi_intrinsic; bld_base->op_actions[TGSI_OPCODE_ABS].intr_name = "llvm.AMDIL.fabs."; bld_base->op_actions[TGSI_OPCODE_ARL].emit = lp_build_tgsi_intrinsic; @@ -558,10 +985,6 @@ void radeon_llvm_context_init(struct radeon_llvm_context * ctx) bld_base->op_actions[TGSI_OPCODE_CMP].intr_name = "llvm.AMDGPU.cndlt"; bld_base->op_actions[TGSI_OPCODE_COS].emit = lp_build_tgsi_intrinsic; bld_base->op_actions[TGSI_OPCODE_COS].intr_name = "llvm.AMDGPU.cos"; - bld_base->op_actions[TGSI_OPCODE_DDX].emit = lp_build_tgsi_intrinsic; - bld_base->op_actions[TGSI_OPCODE_DDX].intr_name = "llvm.AMDGPU.ddx"; - bld_base->op_actions[TGSI_OPCODE_DDY].emit = lp_build_tgsi_intrinsic; - bld_base->op_actions[TGSI_OPCODE_DDY].intr_name = "llvm.AMDGPU.ddy"; bld_base->op_actions[TGSI_OPCODE_DIV].emit = lp_build_tgsi_intrinsic; bld_base->op_actions[TGSI_OPCODE_DIV].intr_name = "llvm.AMDGPU.div"; bld_base->op_actions[TGSI_OPCODE_ELSE].emit = else_emit; @@ -574,7 +997,7 @@ void radeon_llvm_context_init(struct radeon_llvm_context * ctx) bld_base->op_actions[TGSI_OPCODE_FRC].emit = lp_build_tgsi_intrinsic; bld_base->op_actions[TGSI_OPCODE_FRC].intr_name = "llvm.AMDIL.fraction."; bld_base->op_actions[TGSI_OPCODE_IF].emit = if_emit; - bld_base->op_actions[TGSI_OPCODE_KIL].emit = lp_build_tgsi_intrinsic; + bld_base->op_actions[TGSI_OPCODE_KIL].emit = kil_emit; bld_base->op_actions[TGSI_OPCODE_KIL].intr_name = "llvm.AMDGPU.kill"; bld_base->op_actions[TGSI_OPCODE_KILP].emit = lp_build_tgsi_intrinsic; bld_base->op_actions[TGSI_OPCODE_KILP].intr_name = "llvm.AMDGPU.kilp"; @@ -597,7 +1020,7 @@ void radeon_llvm_context_init(struct radeon_llvm_context * ctx) bld_base->op_actions[TGSI_OPCODE_SSG].emit = lp_build_tgsi_intrinsic; bld_base->op_actions[TGSI_OPCODE_SSG].intr_name = "llvm.AMDGPU.ssg"; bld_base->op_actions[TGSI_OPCODE_SGE].emit = lp_build_tgsi_intrinsic; - bld_base->op_actions[TGSI_OPCODE_SGE].intr_name = "llvm.AMDGPU.sge."; + bld_base->op_actions[TGSI_OPCODE_SGE].intr_name = "llvm.AMDGPU.sge"; bld_base->op_actions[TGSI_OPCODE_SEQ].emit = lp_build_tgsi_intrinsic; bld_base->op_actions[TGSI_OPCODE_SEQ].intr_name = "llvm.AMDGPU.seq"; bld_base->op_actions[TGSI_OPCODE_SLE].fetch_args = radeon_llvm_fetch_args_2_reverse_soa; @@ -620,6 +1043,7 @@ void radeon_llvm_context_init(struct radeon_llvm_context * ctx) bld_base->op_actions[TGSI_OPCODE_TXD].intr_name = "llvm.AMDGPU.txd"; bld_base->op_actions[TGSI_OPCODE_TXL].fetch_args = tex_fetch_args; bld_base->op_actions[TGSI_OPCODE_TXL].intr_name = "llvm.AMDGPU.txl"; + bld_base->op_actions[TGSI_OPCODE_TXP].fetch_args = txp_fetch_args; bld_base->op_actions[TGSI_OPCODE_TXP].intr_name = "llvm.AMDGPU.tex"; bld_base->op_actions[TGSI_OPCODE_TRUNC].emit = lp_build_tgsi_intrinsic; bld_base->op_actions[TGSI_OPCODE_TRUNC].intr_name = "llvm.AMDGPU.trunc"; diff --git a/src/gallium/drivers/radeonsi/evergreen_state.c b/src/gallium/drivers/radeonsi/evergreen_state.c index 75d6cadc6cc..b094248fee1 100644 --- a/src/gallium/drivers/radeonsi/evergreen_state.c +++ b/src/gallium/drivers/radeonsi/evergreen_state.c @@ -1166,24 +1166,6 @@ static void si_delete_sampler_state(struct pipe_context *ctx, free(state); } -static unsigned si_map_swizzle(unsigned swizzle) -{ - switch (swizzle) { - case UTIL_FORMAT_SWIZZLE_Y: - return V_008F1C_SQ_SEL_Y; - case UTIL_FORMAT_SWIZZLE_Z: - return V_008F1C_SQ_SEL_Z; - case UTIL_FORMAT_SWIZZLE_W: - return V_008F1C_SQ_SEL_W; - case UTIL_FORMAT_SWIZZLE_0: - return V_008F1C_SQ_SEL_0; - case UTIL_FORMAT_SWIZZLE_1: - return V_008F1C_SQ_SEL_1; - default: /* UTIL_FORMAT_SWIZZLE_X */ - return V_008F1C_SQ_SEL_X; - } -} - static struct pipe_sampler_view *evergreen_create_sampler_view(struct pipe_context *ctx, struct pipe_resource *texture, const struct pipe_sampler_view *state) @@ -1259,9 +1241,9 @@ static struct pipe_sampler_view *evergreen_create_sampler_view(struct pipe_conte va = r600_resource_va(ctx->screen, texture); view->state[0] = (va + tmp->offset[0]) >> 8; - view->state[1] = ((va + tmp->offset[0]) >> 40) & 0xff; - view->state[1] |= (S_008F14_DATA_FORMAT(format) | - S_008F14_NUM_FORMAT(num_format)); + view->state[1] = (S_008F14_BASE_ADDRESS_HI((va + tmp->offset[0]) >> 40) | + S_008F14_DATA_FORMAT(format) | + S_008F14_NUM_FORMAT(num_format)); view->state[2] = (S_008F18_WIDTH(texture->width0 - 1) | S_008F18_HEIGHT(height - 1)); view->state[3] = (S_008F1C_DST_SEL_X(si_map_swizzle(swizzle[0])) | @@ -2087,9 +2069,9 @@ void si_pipe_shader_vs(struct pipe_context *ctx, struct si_pipe_shader *shader) r600_pipe_state_add_reg(rstate, R_02870C_SPI_SHADER_POS_FORMAT, S_02870C_POS0_EXPORT_FORMAT(V_02870C_SPI_SHADER_4COMP) | - S_02870C_POS1_EXPORT_FORMAT(V_02870C_SPI_SHADER_4COMP) | - S_02870C_POS2_EXPORT_FORMAT(V_02870C_SPI_SHADER_4COMP) | - S_02870C_POS3_EXPORT_FORMAT(V_02870C_SPI_SHADER_4COMP), + S_02870C_POS1_EXPORT_FORMAT(V_02870C_SPI_SHADER_NONE) | + S_02870C_POS2_EXPORT_FORMAT(V_02870C_SPI_SHADER_NONE) | + S_02870C_POS3_EXPORT_FORMAT(V_02870C_SPI_SHADER_NONE), NULL, 0); va = r600_resource_va(ctx->screen, (void *)shader->bo); diff --git a/src/gallium/drivers/radeonsi/r600_state_common.c b/src/gallium/drivers/radeonsi/r600_state_common.c index 53a34ef519c..06eb96b9ee8 100644 --- a/src/gallium/drivers/radeonsi/r600_state_common.c +++ b/src/gallium/drivers/radeonsi/r600_state_common.c @@ -628,12 +628,15 @@ static void r600_vertex_buffer_update(struct r600_context *rctx) ptr[0] = va & 0xFFFFFFFF; ptr[1] = (S_008F04_BASE_ADDRESS_HI(va >> 32) | S_008F04_STRIDE(vertex_buffer->stride)); - ptr[2] = (vertex_buffer->buffer->width0 - offset) / vertex_buffer->stride; - /* XXX: Hardcoding RGBA */ - ptr[3] = (S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) | - S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) | - S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) | - S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) | + if (vertex_buffer->stride > 0) + ptr[2] = ((vertex_buffer->buffer->width0 - offset) / + vertex_buffer->stride); + else + ptr[2] = vertex_buffer->buffer->width0 - offset; + ptr[3] = (S_008F0C_DST_SEL_X(si_map_swizzle(desc->swizzle[0])) | + S_008F0C_DST_SEL_Y(si_map_swizzle(desc->swizzle[1])) | + S_008F0C_DST_SEL_Z(si_map_swizzle(desc->swizzle[2])) | + S_008F0C_DST_SEL_W(si_map_swizzle(desc->swizzle[3])) | S_008F0C_NUM_FORMAT(num_format) | S_008F0C_DATA_FORMAT(data_format)); diff --git a/src/gallium/drivers/radeonsi/radeonsi_pipe.h b/src/gallium/drivers/radeonsi/radeonsi_pipe.h index ab30892d51a..bba4cf23691 100644 --- a/src/gallium/drivers/radeonsi/radeonsi_pipe.h +++ b/src/gallium/drivers/radeonsi/radeonsi_pipe.h @@ -37,6 +37,7 @@ #include "r600.h" #include "radeonsi_public.h" #include "r600_resource.h" +#include "sid.h" #define R600_MAX_CONST_BUFFERS 1 #define R600_MAX_CONST_BUFFER_SIZE 4096 @@ -467,6 +468,24 @@ static INLINE uint32_t S_FIXED(float value, uint32_t frac_bits) } #define ALIGN_DIVUP(x, y) (((x) + (y) - 1) / (y)) +static INLINE unsigned si_map_swizzle(unsigned swizzle) +{ + switch (swizzle) { + case UTIL_FORMAT_SWIZZLE_Y: + return V_008F0C_SQ_SEL_Y; + case UTIL_FORMAT_SWIZZLE_Z: + return V_008F0C_SQ_SEL_Z; + case UTIL_FORMAT_SWIZZLE_W: + return V_008F0C_SQ_SEL_W; + case UTIL_FORMAT_SWIZZLE_0: + return V_008F0C_SQ_SEL_0; + case UTIL_FORMAT_SWIZZLE_1: + return V_008F0C_SQ_SEL_1; + default: /* UTIL_FORMAT_SWIZZLE_X */ + return V_008F0C_SQ_SEL_X; + } +} + static inline unsigned r600_tex_aniso_filter(unsigned filter) { if (filter <= 1) return 0; diff --git a/src/gallium/drivers/radeonsi/radeonsi_shader.c b/src/gallium/drivers/radeonsi/radeonsi_shader.c index 6425c352d28..0e1a97bba3e 100644 --- a/src/gallium/drivers/radeonsi/radeonsi_shader.c +++ b/src/gallium/drivers/radeonsi/radeonsi_shader.c @@ -199,7 +199,7 @@ static void declare_input_fs( LLVMValueRef attr_number = lp_build_const_int32(gallivm, input_index); /* XXX: Handle all possible interpolation modes */ - switch (decl->Declaration.Interpolate) { + switch (decl->Interp.Interpolate) { case TGSI_INTERPOLATE_COLOR: if (si_shader_ctx->rctx->rasterizer->flatshade) intr_name = "llvm.SI.fs.interp.constant"; @@ -331,14 +331,14 @@ static void si_llvm_emit_epilogue(struct lp_build_tgsi_context * bld_base) i = shader->ninput++; shader->input[i].name = d->Semantic.Name; shader->input[i].sid = d->Semantic.Index; - shader->input[i].interpolate = d->Declaration.Interpolate; - shader->input[i].centroid = d->Declaration.Centroid; + shader->input[i].interpolate = d->Interp.Interpolate; + shader->input[i].centroid = d->Interp.Centroid; break; case TGSI_FILE_OUTPUT: i = shader->noutput++; shader->output[i].name = d->Semantic.Name; shader->output[i].sid = d->Semantic.Index; - shader->output[i].interpolate = d->Declaration.Interpolate; + shader->output[i].interpolate = d->Interp.Interpolate; break; } diff --git a/src/gallium/drivers/softpipe/sp_tex_sample.c b/src/gallium/drivers/softpipe/sp_tex_sample.c index d54e02e40cd..d4c01759dbe 100644 --- a/src/gallium/drivers/softpipe/sp_tex_sample.c +++ b/src/gallium/drivers/softpipe/sp_tex_sample.c @@ -799,7 +799,8 @@ get_texel_2d_array(const struct sp_sampler_variant *samp, const struct pipe_resource *texture = samp->view->texture; unsigned level = addr.bits.level; - assert(layer < texture->array_size); + assert(layer < (int) texture->array_size); + assert(layer >= 0); if (x < 0 || x >= (int) u_minify(texture->width0, level) || y < 0 || y >= (int) u_minify(texture->height0, level)) { @@ -1787,9 +1788,9 @@ img_filter_2d_ewa(struct tgsi_sampler *tgsi_sampler, float weight_buffer[TGSI_QUAD_SIZE]; unsigned buffer_next; int j; - float den;// = 0.0F; + float den; /* = 0.0F; */ float ddq; - float U;// = u0 - tex_u; + float U; /* = u0 - tex_u; */ int v; /* Scale ellipse formula to directly index the Filter Lookup Table. @@ -1805,8 +1806,8 @@ img_filter_2d_ewa(struct tgsi_sampler *tgsi_sampler, * also the same. Note that texel/image access can only be performed using * a quad, i.e. it is not possible to get the pixel value for a single * tex coord. In order to have a better performance, the access is buffered - * using the s_buffer/t_buffer and weight_buffer. Only when the buffer is full, - * then the pixel values are read from the image. + * using the s_buffer/t_buffer and weight_buffer. Only when the buffer is + * full, then the pixel values are read from the image. */ ddq = 2 * A; @@ -1834,7 +1835,9 @@ img_filter_2d_ewa(struct tgsi_sampler *tgsi_sampler, int u; for (u = u0; u <= u1; ++u) { - /* Note that the ellipse has been pre-scaled so F = WEIGHT_LUT_SIZE - 1 */ + /* Note that the ellipse has been pre-scaled so F = + * WEIGHT_LUT_SIZE - 1 + */ if (q < WEIGHT_LUT_SIZE) { /* as a LUT is used, q must never be negative; * should not happen, though @@ -1873,10 +1876,11 @@ img_filter_2d_ewa(struct tgsi_sampler *tgsi_sampler, } } - /* if the tex coord buffer contains unread values, we will read them now. - * Note that in most cases we have to read more pixel values than required, - * however, as the img_filter_2d_nearest function(s) does not have a count - * parameter, we need to read the whole quad and ignore the unused values + /* if the tex coord buffer contains unread values, we will read + * them now. Note that in most cases we have to read more pixel + * values than required, however, as the img_filter_2d_nearest + * function(s) does not have a count parameter, we need to read + * the whole quad and ignore the unused values */ if (buffer_next > 0) { unsigned jj; @@ -1895,11 +1899,9 @@ img_filter_2d_ewa(struct tgsi_sampler *tgsi_sampler, } if (den <= 0.0F) { - /* Reaching this place would mean - * that no pixels intersected the ellipse. - * This should never happen because - * the filter we use always - * intersects at least one pixel. + /* Reaching this place would mean that no pixels intersected + * the ellipse. This should never happen because the filter + * we use always intersects at least one pixel. */ /*rgba[0]=0; @@ -1907,7 +1909,8 @@ img_filter_2d_ewa(struct tgsi_sampler *tgsi_sampler, rgba[2]=0; rgba[3]=0;*/ /* not enough pixels in resampling, resort to direct interpolation */ - samp->min_img_filter(tgsi_sampler, s, t, p, NULL, tgsi_sampler_lod_bias, rgba_temp); + samp->min_img_filter(tgsi_sampler, s, t, p, NULL, + tgsi_sampler_lod_bias, rgba_temp); den = 1; num[0] = rgba_temp[0][j]; num[1] = rgba_temp[1][j]; @@ -2020,7 +2023,6 @@ mip_filter_linear_aniso(struct tgsi_sampler *tgsi_sampler, } - /** * Specialized version of mip_filter_linear with hard-wired calls to * 2d lambda calculation and 2d_linear_repeat_POT img filters. @@ -2090,7 +2092,6 @@ mip_filter_linear_2d_linear_repeat_POT( } - /** * Do shadow/depth comparisons. */ @@ -2287,9 +2288,11 @@ sample_cube(struct tgsi_sampler *tgsi_sampler, samp->compare(tgsi_sampler, ssss, tttt, NULL, c0, control, rgba); } -static void do_swizzling(const struct sp_sampler_variant *samp, - float in[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE], - float out[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE]) + +static void +do_swizzling(const struct sp_sampler_variant *samp, + float in[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE], + float out[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE]) { int j; const unsigned swizzle_r = samp->key.bits.swizzle_r; @@ -2358,6 +2361,7 @@ static void do_swizzling(const struct sp_sampler_variant *samp, } } + static void sample_swizzle(struct tgsi_sampler *tgsi_sampler, const float s[TGSI_QUAD_SIZE], @@ -2464,6 +2468,19 @@ get_linear_wrap(unsigned mode) } +/** + * Is swizzling needed for the given state key? + */ +static INLINE bool +any_swizzle(union sp_sampler_key key) +{ + return (key.bits.swizzle_r != PIPE_SWIZZLE_RED || + key.bits.swizzle_g != PIPE_SWIZZLE_GREEN || + key.bits.swizzle_b != PIPE_SWIZZLE_BLUE || + key.bits.swizzle_a != PIPE_SWIZZLE_ALPHA); +} + + static compute_lambda_func get_lambda_func(const union sp_sampler_key key) { @@ -2590,6 +2607,7 @@ sp_sampler_variant_destroy( struct sp_sampler_variant *samp ) FREE(samp); } + static void sample_get_dims(struct tgsi_sampler *tgsi_sampler, int level, int dims[4]) @@ -2630,35 +2648,43 @@ sample_get_dims(struct tgsi_sampler *tgsi_sampler, int level, } } -/* this function is only used for unfiltered texel gets - via the TGSI TXF opcode. */ +/** + * This function is only used for getting unfiltered texels via the + * TXF opcode. The GL spec says that out-of-bounds texel fetches + * produce undefined results. Instead of crashing, lets just clamp + * coords to the texture image size. + */ static void sample_get_texels(struct tgsi_sampler *tgsi_sampler, - const int v_i[TGSI_QUAD_SIZE], - const int v_j[TGSI_QUAD_SIZE], - const int v_k[TGSI_QUAD_SIZE], - const int lod[TGSI_QUAD_SIZE], - const int8_t offset[3], - float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE]) + const int v_i[TGSI_QUAD_SIZE], + const int v_j[TGSI_QUAD_SIZE], + const int v_k[TGSI_QUAD_SIZE], + const int lod[TGSI_QUAD_SIZE], + const int8_t offset[3], + float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE]) { const struct sp_sampler_variant *samp = sp_sampler_variant(tgsi_sampler); union tex_tile_address addr; const struct pipe_resource *texture = samp->view->texture; int j, c; const float *tx; - bool need_swizzle = (samp->key.bits.swizzle_r != PIPE_SWIZZLE_RED || - samp->key.bits.swizzle_g != PIPE_SWIZZLE_GREEN || - samp->key.bits.swizzle_b != PIPE_SWIZZLE_BLUE || - samp->key.bits.swizzle_a != PIPE_SWIZZLE_ALPHA); + const bool need_swizzle = any_swizzle(samp->key); + int width, height, depth, layers; addr.value = 0; /* TODO write a better test for LOD */ addr.bits.level = lod[0]; + width = u_minify(texture->width0, addr.bits.level); + height = u_minify(texture->height0, addr.bits.level); + depth = u_minify(texture->depth0, addr.bits.level); + layers = texture->array_size; + switch(texture->target) { case PIPE_TEXTURE_1D: for (j = 0; j < TGSI_QUAD_SIZE; j++) { - tx = get_texel_2d(samp, addr, v_i[j] + offset[0], 0); + int x = CLAMP(v_i[j] + offset[0], 0, width - 1); + tx = get_texel_2d(samp, addr, x, 0); for (c = 0; c < 4; c++) { rgba[c][j] = tx[c]; } @@ -2666,8 +2692,9 @@ sample_get_texels(struct tgsi_sampler *tgsi_sampler, break; case PIPE_TEXTURE_1D_ARRAY: for (j = 0; j < TGSI_QUAD_SIZE; j++) { - tx = get_texel_1d_array(samp, addr, v_i[j] + offset[0], - v_j[j] + offset[1]); + int x = CLAMP(v_i[j] + offset[0], 0, width - 1); + int y = CLAMP(v_j[j] + offset[1], 0, layers - 1); + tx = get_texel_1d_array(samp, addr, x, y); for (c = 0; c < 4; c++) { rgba[c][j] = tx[c]; } @@ -2676,8 +2703,9 @@ sample_get_texels(struct tgsi_sampler *tgsi_sampler, case PIPE_TEXTURE_2D: case PIPE_TEXTURE_RECT: for (j = 0; j < TGSI_QUAD_SIZE; j++) { - tx = get_texel_2d(samp, addr, v_i[j] + offset[0], - v_j[j] + offset[1]); + int x = CLAMP(v_i[j] + offset[0], 0, width - 1); + int y = CLAMP(v_j[j] + offset[1], 0, height - 1); + tx = get_texel_2d(samp, addr, x, y); for (c = 0; c < 4; c++) { rgba[c][j] = tx[c]; } @@ -2685,9 +2713,10 @@ sample_get_texels(struct tgsi_sampler *tgsi_sampler, break; case PIPE_TEXTURE_2D_ARRAY: for (j = 0; j < TGSI_QUAD_SIZE; j++) { - tx = get_texel_2d_array(samp, addr, v_i[j] + offset[0], - v_j[j] + offset[1], - v_k[j] + offset[2]); + int x = CLAMP(v_i[j] + offset[0], 0, width - 1); + int y = CLAMP(v_j[j] + offset[1], 0, height - 1); + int layer = CLAMP(v_k[j] + offset[2], 0, layers - 1); + tx = get_texel_2d_array(samp, addr, x, y, layer); for (c = 0; c < 4; c++) { rgba[c][j] = tx[c]; } @@ -2695,9 +2724,11 @@ sample_get_texels(struct tgsi_sampler *tgsi_sampler, break; case PIPE_TEXTURE_3D: for (j = 0; j < TGSI_QUAD_SIZE; j++) { - tx = get_texel_3d(samp, addr, v_i[j] + offset[0], - v_j[j] + offset[1], - v_k[j] + offset[2]); + int x = CLAMP(v_i[j] + offset[0], 0, width - 1); + int y = CLAMP(v_j[j] + offset[1], 0, height - 1); + int z = CLAMP(v_k[j] + offset[2], 0, depth - 1); + + tx = get_texel_3d(samp, addr, x, y, z); for (c = 0; c < 4; c++) { rgba[c][j] = tx[c]; } @@ -2715,6 +2746,8 @@ sample_get_texels(struct tgsi_sampler *tgsi_sampler, do_swizzling(samp, rgba_temp, rgba); } } + + /** * Create a sampler variant for a given set of non-orthogonal state. */ @@ -2830,10 +2863,7 @@ sp_create_sampler_variant( const struct pipe_sampler_state *sampler, samp->sample_target = samp->compare; } - if (key.bits.swizzle_r != PIPE_SWIZZLE_RED || - key.bits.swizzle_g != PIPE_SWIZZLE_GREEN || - key.bits.swizzle_b != PIPE_SWIZZLE_BLUE || - key.bits.swizzle_a != PIPE_SWIZZLE_ALPHA) { + if (any_swizzle(key)) { samp->base.get_samples = sample_swizzle; } else { diff --git a/src/gallium/drivers/svga/svga_screen.c b/src/gallium/drivers/svga/svga_screen.c index ac2d35e5ea4..64ec658b80e 100644 --- a/src/gallium/drivers/svga/svga_screen.c +++ b/src/gallium/drivers/svga/svga_screen.c @@ -241,7 +241,11 @@ svga_get_param(struct pipe_screen *screen, enum pipe_cap param) case PIPE_CAP_TGSI_CAN_COMPACT_VARYINGS: case PIPE_CAP_TGSI_CAN_COMPACT_CONSTANTS: case PIPE_CAP_GLSL_FEATURE_LEVEL: + case PIPE_CAP_VERTEX_BUFFER_OFFSET_4BYTE_ALIGNED_ONLY: + case PIPE_CAP_VERTEX_BUFFER_STRIDE_4BYTE_ALIGNED_ONLY: return 0; + case PIPE_CAP_VERTEX_ELEMENT_SRC_OFFSET_4BYTE_ALIGNED_ONLY: + return 1; default: debug_printf("Unexpected PIPE_CAP_ query %u\n", param); diff --git a/src/gallium/drivers/svga/svga_tgsi_insn.c b/src/gallium/drivers/svga/svga_tgsi_insn.c index 5e6d1fbc904..a68912608bc 100644 --- a/src/gallium/drivers/svga/svga_tgsi_insn.c +++ b/src/gallium/drivers/svga/svga_tgsi_insn.c @@ -871,6 +871,31 @@ static boolean emit_floor(struct svga_shader_emitter *emit, } +/* Translate the following TGSI CEIL instruction. + * CEIL DST, SRC + * To the following SVGA3D instruction sequence. + * FRC TMP, -SRC + * ADD DST, SRC, TMP + */ +static boolean emit_ceil(struct svga_shader_emitter *emit, + const struct tgsi_full_instruction *insn) +{ + SVGA3dShaderDestToken dst = translate_dst_register(emit, insn, 0); + const struct src_register src0 = translate_src_register(emit, &insn->Src[0]); + SVGA3dShaderDestToken temp = get_temp(emit); + + /* FRC TMP, -SRC */ + if (!submit_op1(emit, inst_token(SVGA3DOP_FRC), temp, negate(src0))) + return FALSE; + + /* ADD DST, SRC, TMP */ + if (!submit_op2(emit, inst_token(SVGA3DOP_ADD), dst, src0, src(temp))) + return FALSE; + + return TRUE; +} + + /* Translate the following TGSI CMP instruction. * CMP DST, SRC0, SRC1, SRC2 * To the following SVGA3D instruction sequence. @@ -2435,6 +2460,9 @@ static boolean svga_emit_instruction( struct svga_shader_emitter *emit, case TGSI_OPCODE_TRUNC: /* should be TRUNC, not FLR */ return emit_floor( emit, insn ); + case TGSI_OPCODE_CEIL: + return emit_ceil( emit, insn ); + case TGSI_OPCODE_CMP: return emit_cmp( emit, insn ); diff --git a/src/gallium/include/pipe/p_context.h b/src/gallium/include/pipe/p_context.h index 7709177444f..f59e3881232 100644 --- a/src/gallium/include/pipe/p_context.h +++ b/src/gallium/include/pipe/p_context.h @@ -64,6 +64,7 @@ struct pipe_vertex_element; struct pipe_video_buffer; struct pipe_video_decoder; struct pipe_viewport_state; +struct pipe_compute_state; union pipe_color_union; union pipe_query_result; @@ -142,6 +143,10 @@ struct pipe_context { void (*bind_geometry_sampler_states)(struct pipe_context *, unsigned num_samplers, void **samplers); + void (*bind_compute_sampler_states)(struct pipe_context *, + unsigned start_slot, + unsigned num_samplers, + void **samplers); void (*delete_sampler_state)(struct pipe_context *, void *); void * (*create_rasterizer_state)(struct pipe_context *, @@ -221,6 +226,26 @@ struct pipe_context { unsigned num_views, struct pipe_sampler_view **); + void (*set_compute_sampler_views)(struct pipe_context *, + unsigned start_slot, unsigned num_views, + struct pipe_sampler_view **); + + /** + * Bind an array of shader resources that will be used by the + * graphics pipeline. Any resources that were previously bound to + * the specified range will be unbound after this call. + * + * \param first first resource to bind. + * \param count number of consecutive resources to bind. + * \param resources array of pointers to the resources to bind, it + * should contain at least \a count elements + * unless it's NULL, in which case no new + * resources will be bound. + */ + void (*set_shader_resources)(struct pipe_context *, + unsigned start, unsigned count, + struct pipe_surface **resources); + void (*set_vertex_buffers)( struct pipe_context *, unsigned num_buffers, const struct pipe_vertex_buffer * ); @@ -410,6 +435,86 @@ struct pipe_context { */ struct pipe_video_buffer *(*create_video_buffer)( struct pipe_context *context, const struct pipe_video_buffer *templat ); + + /** + * Compute kernel execution + */ + /*@{*/ + /** + * Define the compute program and parameters to be used by + * pipe_context::launch_grid. + */ + void *(*create_compute_state)(struct pipe_context *context, + const struct pipe_compute_state *); + void (*bind_compute_state)(struct pipe_context *, void *); + void (*delete_compute_state)(struct pipe_context *, void *); + + /** + * Bind an array of shader resources that will be used by the + * compute program. Any resources that were previously bound to + * the specified range will be unbound after this call. + * + * \param first first resource to bind. + * \param count number of consecutive resources to bind. + * \param resources array of pointers to the resources to bind, it + * should contain at least \a count elements + * unless it's NULL, in which case no new + * resources will be bound. + */ + void (*set_compute_resources)(struct pipe_context *, + unsigned start, unsigned count, + struct pipe_surface **resources); + + /** + * Bind an array of buffers to be mapped into the address space of + * the GLOBAL resource. Any buffers that were previously bound + * between [first, first + count - 1] are unbound after this call. + * + * \param first first buffer to map. + * \param count number of consecutive buffers to map. + * \param resources array of pointers to the buffers to map, it + * should contain at least \a count elements + * unless it's NULL, in which case no new + * resources will be bound. + * \param handles array of pointers to the memory locations that + * will be filled with the respective base + * addresses each buffer will be mapped to. It + * should contain at least \a count elements, + * unless \a resources is NULL in which case \a + * handles should be NULL as well. + * + * Note that the driver isn't required to make any guarantees about + * the contents of the \a handles array being valid anytime except + * during the subsequent calls to pipe_context::launch_grid. This + * means that the only sensible location handles[i] may point to is + * somewhere within the INPUT buffer itself. This is so to + * accommodate implementations that lack virtual memory but + * nevertheless migrate buffers on the fly, leading to resource + * base addresses that change on each kernel invocation or are + * unknown to the pipe driver. + */ + void (*set_global_binding)(struct pipe_context *context, + unsigned first, unsigned count, + struct pipe_resource **resources, + uint32_t **handles); + + /** + * Launch the compute kernel starting from instruction \a pc of the + * currently bound compute program. + * + * \a grid_layout and \a block_layout are arrays of size \a + * PIPE_COMPUTE_CAP_GRID_DIMENSION that determine the layout of the + * grid (in block units) and working block (in thread units) to be + * used, respectively. + * + * \a input will be used to initialize the INPUT resource, and it + * should point to a buffer of at least + * pipe_compute_state::req_input_mem bytes. + */ + void (*launch_grid)(struct pipe_context *context, + const uint *block_layout, const uint *grid_layout, + uint32_t pc, const void *input); + /*@}*/ }; diff --git a/src/gallium/include/pipe/p_defines.h b/src/gallium/include/pipe/p_defines.h index 398cb98248c..1e05cc4caee 100644 --- a/src/gallium/include/pipe/p_defines.h +++ b/src/gallium/include/pipe/p_defines.h @@ -304,6 +304,9 @@ enum pipe_transfer_usage { #define PIPE_BIND_STREAM_OUTPUT (1 << 11) /* set_stream_output_buffers */ #define PIPE_BIND_CURSOR (1 << 16) /* mouse cursor */ #define PIPE_BIND_CUSTOM (1 << 17) /* state-tracker/winsys usages */ +#define PIPE_BIND_GLOBAL (1 << 18) /* set_global_binding */ +#define PIPE_BIND_SHADER_RESOURCE (1 << 19) /* set_shader_resources */ +#define PIPE_BIND_COMPUTE_RESOURCE (1 << 20) /* set_compute_resources */ /* The first two flags above were previously part of the amorphous * TEXTURE_USAGE, most of which are now descriptions of the ways a @@ -346,7 +349,8 @@ enum pipe_transfer_usage { #define PIPE_SHADER_VERTEX 0 #define PIPE_SHADER_FRAGMENT 1 #define PIPE_SHADER_GEOMETRY 2 -#define PIPE_SHADER_TYPES 3 +#define PIPE_SHADER_COMPUTE 3 +#define PIPE_SHADER_TYPES 4 /** @@ -477,9 +481,10 @@ enum pipe_cap { PIPE_CAP_VERTEX_BUFFER_OFFSET_4BYTE_ALIGNED_ONLY = 65, PIPE_CAP_VERTEX_BUFFER_STRIDE_4BYTE_ALIGNED_ONLY = 66, PIPE_CAP_VERTEX_ELEMENT_SRC_OFFSET_4BYTE_ALIGNED_ONLY = 67, - PIPE_CAP_USER_INDEX_BUFFERS = 68, - PIPE_CAP_USER_CONSTANT_BUFFERS = 69, - PIPE_CAP_CONSTANT_BUFFER_OFFSET_ALIGNMENT = 70 + PIPE_CAP_COMPUTE = 68, + PIPE_CAP_USER_INDEX_BUFFERS = 69, + PIPE_CAP_USER_CONSTANT_BUFFERS = 70, + PIPE_CAP_CONSTANT_BUFFER_OFFSET_ALIGNMENT = 71 }; /** @@ -522,9 +527,32 @@ enum pipe_shader_cap PIPE_SHADER_CAP_INDIRECT_CONST_ADDR = 15, PIPE_SHADER_CAP_SUBROUTINES = 16, /* BGNSUB, ENDSUB, CAL, RET */ PIPE_SHADER_CAP_INTEGERS = 17, - PIPE_SHADER_CAP_MAX_TEXTURE_SAMPLERS = 18 + PIPE_SHADER_CAP_MAX_TEXTURE_SAMPLERS = 18, + PIPE_SHADER_CAP_PREFERRED_IR = 19 }; +/** + * Shader intermediate representation. + */ +enum pipe_shader_ir +{ + PIPE_SHADER_IR_TGSI +}; + +/** + * Compute-specific implementation capability. They can be queried + * using pipe_screen::get_compute_param. + */ +enum pipe_compute_cap +{ + PIPE_COMPUTE_CAP_GRID_DIMENSION, + PIPE_COMPUTE_CAP_MAX_GRID_SIZE, + PIPE_COMPUTE_CAP_MAX_BLOCK_SIZE, + PIPE_COMPUTE_CAP_MAX_GLOBAL_SIZE, + PIPE_COMPUTE_CAP_MAX_LOCAL_SIZE, + PIPE_COMPUTE_CAP_MAX_PRIVATE_SIZE, + PIPE_COMPUTE_CAP_MAX_INPUT_SIZE +}; /** * Composite query types diff --git a/src/gallium/include/pipe/p_screen.h b/src/gallium/include/pipe/p_screen.h index 45c441b2fcf..7ae7c9a04e1 100644 --- a/src/gallium/include/pipe/p_screen.h +++ b/src/gallium/include/pipe/p_screen.h @@ -98,6 +98,18 @@ struct pipe_screen { enum pipe_video_profile profile, enum pipe_video_cap param ); + /** + * Query a compute-specific capability/parameter/limit. + * \param param one of PIPE_COMPUTE_CAP_x + * \param ret pointer to a preallocated buffer that will be + * initialized to the parameter value, or NULL. + * \return size in bytes of the parameter value that would be + * returned. + */ + int (*get_compute_param)(struct pipe_screen *, + enum pipe_compute_cap param, + void *ret); + struct pipe_context * (*context_create)( struct pipe_screen *, void *priv ); diff --git a/src/gallium/include/pipe/p_shader_tokens.h b/src/gallium/include/pipe/p_shader_tokens.h index df2dd5e618e..6b58293f409 100644 --- a/src/gallium/include/pipe/p_shader_tokens.h +++ b/src/gallium/include/pipe/p_shader_tokens.h @@ -43,6 +43,7 @@ struct tgsi_header #define TGSI_PROCESSOR_FRAGMENT 0 #define TGSI_PROCESSOR_VERTEX 1 #define TGSI_PROCESSOR_GEOMETRY 2 +#define TGSI_PROCESSOR_COMPUTE 3 struct tgsi_processor { @@ -76,6 +77,7 @@ enum tgsi_file_type { TGSI_FILE_IMMEDIATE_ARRAY =10, TGSI_FILE_TEMPORARY_ARRAY =11, TGSI_FILE_RESOURCE =12, + TGSI_FILE_SAMPLER_VIEW =13, TGSI_FILE_COUNT /**< how many TGSI_FILE_ types */ }; @@ -114,12 +116,12 @@ struct tgsi_declaration unsigned NrTokens : 8; /**< UINT */ unsigned File : 4; /**< one of TGSI_FILE_x */ unsigned UsageMask : 4; /**< bitmask of TGSI_WRITEMASK_x flags */ - unsigned Interpolate : 4; /**< one of TGSI_INTERPOLATE_x */ unsigned Dimension : 1; /**< any extra dimension info? */ unsigned Semantic : 1; /**< BOOL, any semantic info? */ - unsigned Centroid : 1; /**< centroid sampling? */ + unsigned Interpolate : 1; /**< any interpolation info? */ unsigned Invariant : 1; /**< invariant optimization? */ - unsigned CylindricalWrap:4; /**< TGSI_CYLINDRICAL_WRAP_x flags */ + unsigned Local : 1; /**< optimize as subroutine local variable? */ + unsigned Padding : 7; }; struct tgsi_declaration_range @@ -134,6 +136,14 @@ struct tgsi_declaration_dimension unsigned Padding:16; }; +struct tgsi_declaration_interp +{ + unsigned Interpolate : 4; /**< one of TGSI_INTERPOLATE_x */ + unsigned Centroid : 1; /**< centroid sampling? */ + unsigned CylindricalWrap:4; /**< TGSI_CYLINDRICAL_WRAP_x flags */ + unsigned Padding : 23; +}; + #define TGSI_SEMANTIC_POSITION 0 #define TGSI_SEMANTIC_COLOR 1 #define TGSI_SEMANTIC_BCOLOR 2 /**< back-face color */ @@ -149,7 +159,11 @@ struct tgsi_declaration_dimension #define TGSI_SEMANTIC_STENCIL 12 #define TGSI_SEMANTIC_CLIPDIST 13 #define TGSI_SEMANTIC_CLIPVERTEX 14 -#define TGSI_SEMANTIC_COUNT 15 /**< number of semantic values */ +#define TGSI_SEMANTIC_GRID_SIZE 15 /**< grid size in blocks */ +#define TGSI_SEMANTIC_BLOCK_ID 16 /**< id of the current block */ +#define TGSI_SEMANTIC_BLOCK_SIZE 17 /**< block size in threads */ +#define TGSI_SEMANTIC_THREAD_ID 18 /**< block-relative id of the current thread */ +#define TGSI_SEMANTIC_COUNT 19 /**< number of semantic values */ struct tgsi_declaration_semantic { @@ -160,12 +174,28 @@ struct tgsi_declaration_semantic struct tgsi_declaration_resource { unsigned Resource : 8; /**< one of TGSI_TEXTURE_ */ + unsigned Raw : 1; + unsigned Writable : 1; + unsigned Padding : 22; +}; + +struct tgsi_declaration_sampler_view { + unsigned Resource : 8; /**< one of TGSI_TEXTURE_ */ unsigned ReturnTypeX : 6; /**< one of enum pipe_type */ unsigned ReturnTypeY : 6; /**< one of enum pipe_type */ unsigned ReturnTypeZ : 6; /**< one of enum pipe_type */ unsigned ReturnTypeW : 6; /**< one of enum pipe_type */ }; +/* + * Special resources that don't need to be declared. They map to the + * GLOBAL/LOCAL/PRIVATE/INPUT compute memory spaces. + */ +#define TGSI_RESOURCE_GLOBAL 0x7fff +#define TGSI_RESOURCE_LOCAL 0x7ffe +#define TGSI_RESOURCE_PRIVATE 0x7ffd +#define TGSI_RESOURCE_INPUT 0x7ffc + #define TGSI_IMM_FLOAT32 0 #define TGSI_IMM_UINT32 1 #define TGSI_IMM_INT32 2 @@ -363,16 +393,16 @@ struct tgsi_property_data { #define TGSI_OPCODE_ENDSWITCH 144 /* resource related opcodes */ -#define TGSI_OPCODE_LOAD 145 -#define TGSI_OPCODE_LOAD_MS 146 -#define TGSI_OPCODE_SAMPLE 147 +#define TGSI_OPCODE_SAMPLE 145 +#define TGSI_OPCODE_SAMPLE_I 146 +#define TGSI_OPCODE_SAMPLE_I_MS 147 #define TGSI_OPCODE_SAMPLE_B 148 #define TGSI_OPCODE_SAMPLE_C 149 #define TGSI_OPCODE_SAMPLE_C_LZ 150 #define TGSI_OPCODE_SAMPLE_D 151 #define TGSI_OPCODE_SAMPLE_L 152 #define TGSI_OPCODE_GATHER4 153 -#define TGSI_OPCODE_RESINFO 154 +#define TGSI_OPCODE_SVIEWINFO 154 #define TGSI_OPCODE_SAMPLE_POS 155 #define TGSI_OPCODE_SAMPLE_INFO 156 @@ -381,7 +411,26 @@ struct tgsi_property_data { #define TGSI_OPCODE_IABS 159 #define TGSI_OPCODE_ISSG 160 -#define TGSI_OPCODE_LAST 161 +#define TGSI_OPCODE_LOAD 161 +#define TGSI_OPCODE_STORE 162 + +#define TGSI_OPCODE_MFENCE 163 +#define TGSI_OPCODE_LFENCE 164 +#define TGSI_OPCODE_SFENCE 165 +#define TGSI_OPCODE_BARRIER 166 + +#define TGSI_OPCODE_ATOMUADD 167 +#define TGSI_OPCODE_ATOMXCHG 168 +#define TGSI_OPCODE_ATOMCAS 169 +#define TGSI_OPCODE_ATOMAND 170 +#define TGSI_OPCODE_ATOMOR 171 +#define TGSI_OPCODE_ATOMXOR 172 +#define TGSI_OPCODE_ATOMUMIN 173 +#define TGSI_OPCODE_ATOMUMAX 174 +#define TGSI_OPCODE_ATOMIMIN 175 +#define TGSI_OPCODE_ATOMIMAX 176 + +#define TGSI_OPCODE_LAST 177 #define TGSI_SAT_NONE 0 /* do not saturate */ #define TGSI_SAT_ZERO_ONE 1 /* clamp to [0,1] */ @@ -441,7 +490,7 @@ struct tgsi_instruction_label unsigned Padding : 8; }; -#define TGSI_TEXTURE_UNKNOWN 0 +#define TGSI_TEXTURE_BUFFER 0 #define TGSI_TEXTURE_1D 1 #define TGSI_TEXTURE_2D 2 #define TGSI_TEXTURE_3D 3 @@ -455,7 +504,8 @@ struct tgsi_instruction_label #define TGSI_TEXTURE_SHADOW1D_ARRAY 11 #define TGSI_TEXTURE_SHADOW2D_ARRAY 12 #define TGSI_TEXTURE_SHADOWCUBE 13 -#define TGSI_TEXTURE_COUNT 14 +#define TGSI_TEXTURE_UNKNOWN 14 +#define TGSI_TEXTURE_COUNT 15 struct tgsi_instruction_texture { diff --git a/src/gallium/include/pipe/p_state.h b/src/gallium/include/pipe/p_state.h index 3bc35bc77ce..51a956d9532 100644 --- a/src/gallium/include/pipe/p_state.h +++ b/src/gallium/include/pipe/p_state.h @@ -62,6 +62,7 @@ extern "C" { #define PIPE_MAX_GEOMETRY_SAMPLERS 16 #define PIPE_MAX_SHADER_INPUTS 32 #define PIPE_MAX_SHADER_OUTPUTS 32 +#define PIPE_MAX_SHADER_SAMPLER_VIEWS 32 #define PIPE_MAX_SHADER_RESOURCES 32 #define PIPE_MAX_TEXTURE_LEVELS 16 #define PIPE_MAX_SO_BUFFERS 4 @@ -337,6 +338,7 @@ struct pipe_surface unsigned height; /**< logical height in pixels */ unsigned usage; /**< bitmask of PIPE_BIND_x */ + unsigned writable:1; /**< writable shader resource */ union { struct { @@ -591,6 +593,13 @@ struct pipe_resolve_info unsigned mask; /**< PIPE_MASK_RGBA, Z, S or ZS */ }; +struct pipe_compute_state +{ + const void *prog; /**< Compute program to be executed. */ + unsigned req_local_mem; /**< Required size of the LOCAL resource. */ + unsigned req_private_mem; /**< Required size of the PRIVATE resource. */ + unsigned req_input_mem; /**< Required size of the INPUT resource. */ +}; #ifdef __cplusplus } diff --git a/src/gallium/state_trackers/Makefile b/src/gallium/state_trackers/Makefile index 0900efc664f..d5162c17507 100644 --- a/src/gallium/state_trackers/Makefile +++ b/src/gallium/state_trackers/Makefile @@ -17,7 +17,7 @@ subdirs: clean: - rm -f `find . -name \*.[oa]` + rm -f `find . -regex '.*\.l?[oa]'` rm -f `find . -name depend` diff --git a/src/gallium/state_trackers/clover/Doxyfile b/src/gallium/state_trackers/clover/Doxyfile new file mode 100644 index 00000000000..50250e75672 --- /dev/null +++ b/src/gallium/state_trackers/clover/Doxyfile @@ -0,0 +1,1716 @@ +# Doxyfile 1.7.4 + +# This file describes the settings to be used by the documentation system +# doxygen (www.doxygen.org) for a project. +# +# All text after a hash (#) is considered a comment and will be ignored. +# The format is: +# TAG = value [value, ...] +# For lists items can also be appended using: +# TAG += value [value, ...] +# Values that contain spaces should be placed between quotes (" "). + +#--------------------------------------------------------------------------- +# Project related configuration options +#--------------------------------------------------------------------------- + +# This tag specifies the encoding used for all characters in the config file +# that follow. The default is UTF-8 which is also the encoding used for all +# text before the first occurrence of this tag. Doxygen uses libiconv (or the +# iconv built into libc) for the transcoding. See +# http://www.gnu.org/software/libiconv for the list of possible encodings. + +DOXYFILE_ENCODING = UTF-8 + +# The PROJECT_NAME tag is a single word (or a sequence of words surrounded +# by quotes) that should identify the project. + +PROJECT_NAME = Clover + +# The PROJECT_NUMBER tag can be used to enter a project or revision number. +# This could be handy for archiving the generated documentation or +# if some version control system is used. + +PROJECT_NUMBER = + +# Using the PROJECT_BRIEF tag one can provide an optional one line description +# for a project that appears at the top of each page and should give viewer +# a quick idea about the purpose of the project. Keep the description short. + +PROJECT_BRIEF = + +# With the PROJECT_LOGO tag one can specify an logo or icon that is +# included in the documentation. The maximum height of the logo should not +# exceed 55 pixels and the maximum width should not exceed 200 pixels. +# Doxygen will copy the logo to the output directory. + +PROJECT_LOGO = + +# The OUTPUT_DIRECTORY tag is used to specify the (relative or absolute) +# base path where the generated documentation will be put. +# If a relative path is entered, it will be relative to the location +# where doxygen was started. If left blank the current directory will be used. + +OUTPUT_DIRECTORY = + +# If the CREATE_SUBDIRS tag is set to YES, then doxygen will create +# 4096 sub-directories (in 2 levels) under the output directory of each output +# format and will distribute the generated files over these directories. +# Enabling this option can be useful when feeding doxygen a huge amount of +# source files, where putting all generated files in the same directory would +# otherwise cause performance problems for the file system. + +CREATE_SUBDIRS = NO + +# The OUTPUT_LANGUAGE tag is used to specify the language in which all +# documentation generated by doxygen is written. Doxygen will use this +# information to generate all constant output in the proper language. +# The default language is English, other supported languages are: +# Afrikaans, Arabic, Brazilian, Catalan, Chinese, Chinese-Traditional, +# Croatian, Czech, Danish, Dutch, Esperanto, Farsi, Finnish, French, German, +# Greek, Hungarian, Italian, Japanese, Japanese-en (Japanese with English +# messages), Korean, Korean-en, Lithuanian, Norwegian, Macedonian, Persian, +# Polish, Portuguese, Romanian, Russian, Serbian, Serbian-Cyrillic, Slovak, +# Slovene, Spanish, Swedish, Ukrainian, and Vietnamese. + +OUTPUT_LANGUAGE = English + +# If the BRIEF_MEMBER_DESC tag is set to YES (the default) Doxygen will +# include brief member descriptions after the members that are listed in +# the file and class documentation (similar to JavaDoc). +# Set to NO to disable this. + +BRIEF_MEMBER_DESC = YES + +# If the REPEAT_BRIEF tag is set to YES (the default) Doxygen will prepend +# the brief description of a member or function before the detailed description. +# Note: if both HIDE_UNDOC_MEMBERS and BRIEF_MEMBER_DESC are set to NO, the +# brief descriptions will be completely suppressed. + +REPEAT_BRIEF = YES + +# This tag implements a quasi-intelligent brief description abbreviator +# that is used to form the text in various listings. Each string +# in this list, if found as the leading text of the brief description, will be +# stripped from the text and the result after processing the whole list, is +# used as the annotated text. Otherwise, the brief description is used as-is. +# If left blank, the following values are used ("$name" is automatically +# replaced with the name of the entity): "The $name class" "The $name widget" +# "The $name file" "is" "provides" "specifies" "contains" +# "represents" "a" "an" "the" + +ABBREVIATE_BRIEF = + +# If the ALWAYS_DETAILED_SEC and REPEAT_BRIEF tags are both set to YES then +# Doxygen will generate a detailed section even if there is only a brief +# description. + +ALWAYS_DETAILED_SEC = NO + +# If the INLINE_INHERITED_MEMB tag is set to YES, doxygen will show all +# inherited members of a class in the documentation of that class as if those +# members were ordinary class members. Constructors, destructors and assignment +# operators of the base classes will not be shown. + +INLINE_INHERITED_MEMB = NO + +# If the FULL_PATH_NAMES tag is set to YES then Doxygen will prepend the full +# path before files name in the file list and in the header files. If set +# to NO the shortest path that makes the file name unique will be used. + +FULL_PATH_NAMES = YES + +# If the FULL_PATH_NAMES tag is set to YES then the STRIP_FROM_PATH tag +# can be used to strip a user-defined part of the path. Stripping is +# only done if one of the specified strings matches the left-hand part of +# the path. The tag can be used to show relative paths in the file list. +# If left blank the directory from which doxygen is run is used as the +# path to strip. + +STRIP_FROM_PATH = + +# The STRIP_FROM_INC_PATH tag can be used to strip a user-defined part of +# the path mentioned in the documentation of a class, which tells +# the reader which header file to include in order to use a class. +# If left blank only the name of the header file containing the class +# definition is used. Otherwise one should specify the include paths that +# are normally passed to the compiler using the -I flag. + +STRIP_FROM_INC_PATH = + +# If the SHORT_NAMES tag is set to YES, doxygen will generate much shorter +# (but less readable) file names. This can be useful if your file system +# doesn't support long names like on DOS, Mac, or CD-ROM. + +SHORT_NAMES = NO + +# If the JAVADOC_AUTOBRIEF tag is set to YES then Doxygen +# will interpret the first line (until the first dot) of a JavaDoc-style +# comment as the brief description. If set to NO, the JavaDoc +# comments will behave just like regular Qt-style comments +# (thus requiring an explicit @brief command for a brief description.) + +JAVADOC_AUTOBRIEF = YES + +# If the QT_AUTOBRIEF tag is set to YES then Doxygen will +# interpret the first line (until the first dot) of a Qt-style +# comment as the brief description. If set to NO, the comments +# will behave just like regular Qt-style comments (thus requiring +# an explicit \brief command for a brief description.) + +QT_AUTOBRIEF = NO + +# The MULTILINE_CPP_IS_BRIEF tag can be set to YES to make Doxygen +# treat a multi-line C++ special comment block (i.e. a block of //! or /// +# comments) as a brief description. This used to be the default behaviour. +# The new default is to treat a multi-line C++ comment block as a detailed +# description. Set this tag to YES if you prefer the old behaviour instead. + +MULTILINE_CPP_IS_BRIEF = NO + +# If the INHERIT_DOCS tag is set to YES (the default) then an undocumented +# member inherits the documentation from any documented member that it +# re-implements. + +INHERIT_DOCS = YES + +# If the SEPARATE_MEMBER_PAGES tag is set to YES, then doxygen will produce +# a new page for each member. If set to NO, the documentation of a member will +# be part of the file/class/namespace that contains it. + +SEPARATE_MEMBER_PAGES = NO + +# The TAB_SIZE tag can be used to set the number of spaces in a tab. +# Doxygen uses this value to replace tabs by spaces in code fragments. + +TAB_SIZE = 8 + +# This tag can be used to specify a number of aliases that acts +# as commands in the documentation. An alias has the form "name=value". +# For example adding "sideeffect=\par Side Effects:\n" will allow you to +# put the command \sideeffect (or @sideeffect) in the documentation, which +# will result in a user-defined paragraph with heading "Side Effects:". +# You can put \n's in the value part of an alias to insert newlines. + +ALIASES = + +# Set the OPTIMIZE_OUTPUT_FOR_C tag to YES if your project consists of C +# sources only. Doxygen will then generate output that is more tailored for C. +# For instance, some of the names that are used will be different. The list +# of all members will be omitted, etc. + +OPTIMIZE_OUTPUT_FOR_C = NO + +# Set the OPTIMIZE_OUTPUT_JAVA tag to YES if your project consists of Java +# sources only. Doxygen will then generate output that is more tailored for +# Java. For instance, namespaces will be presented as packages, qualified +# scopes will look different, etc. + +OPTIMIZE_OUTPUT_JAVA = NO + +# Set the OPTIMIZE_FOR_FORTRAN tag to YES if your project consists of Fortran +# sources only. Doxygen will then generate output that is more tailored for +# Fortran. + +OPTIMIZE_FOR_FORTRAN = NO + +# Set the OPTIMIZE_OUTPUT_VHDL tag to YES if your project consists of VHDL +# sources. Doxygen will then generate output that is tailored for +# VHDL. + +OPTIMIZE_OUTPUT_VHDL = NO + +# Doxygen selects the parser to use depending on the extension of the files it +# parses. With this tag you can assign which parser to use for a given extension. +# Doxygen has a built-in mapping, but you can override or extend it using this +# tag. The format is ext=language, where ext is a file extension, and language +# is one of the parsers supported by doxygen: IDL, Java, Javascript, CSharp, C, +# C++, D, PHP, Objective-C, Python, Fortran, VHDL, C, C++. For instance to make +# doxygen treat .inc files as Fortran files (default is PHP), and .f files as C +# (default is Fortran), use: inc=Fortran f=C. Note that for custom extensions +# you also need to set FILE_PATTERNS otherwise the files are not read by doxygen. + +EXTENSION_MAPPING = + +# If you use STL classes (i.e. std::string, std::vector, etc.) but do not want +# to include (a tag file for) the STL sources as input, then you should +# set this tag to YES in order to let doxygen match functions declarations and +# definitions whose arguments contain STL classes (e.g. func(std::string); v.s. +# func(std::string) {}). This also makes the inheritance and collaboration +# diagrams that involve STL classes more complete and accurate. + +BUILTIN_STL_SUPPORT = YES + +# If you use Microsoft's C++/CLI language, you should set this option to YES to +# enable parsing support. + +CPP_CLI_SUPPORT = NO + +# Set the SIP_SUPPORT tag to YES if your project consists of sip sources only. +# Doxygen will parse them like normal C++ but will assume all classes use public +# instead of private inheritance when no explicit protection keyword is present. + +SIP_SUPPORT = NO + +# For Microsoft's IDL there are propget and propput attributes to indicate getter +# and setter methods for a property. Setting this option to YES (the default) +# will make doxygen replace the get and set methods by a property in the +# documentation. This will only work if the methods are indeed getting or +# setting a simple type. If this is not the case, or you want to show the +# methods anyway, you should set this option to NO. + +IDL_PROPERTY_SUPPORT = YES + +# If member grouping is used in the documentation and the DISTRIBUTE_GROUP_DOC +# tag is set to YES, then doxygen will reuse the documentation of the first +# member in the group (if any) for the other members of the group. By default +# all members of a group must be documented explicitly. + +DISTRIBUTE_GROUP_DOC = NO + +# Set the SUBGROUPING tag to YES (the default) to allow class member groups of +# the same type (for instance a group of public functions) to be put as a +# subgroup of that type (e.g. under the Public Functions section). Set it to +# NO to prevent subgrouping. Alternatively, this can be done per class using +# the \nosubgrouping command. + +SUBGROUPING = YES + +# When the INLINE_GROUPED_CLASSES tag is set to YES, classes, structs and +# unions are shown inside the group in which they are included (e.g. using +# @ingroup) instead of on a separate page (for HTML and Man pages) or +# section (for LaTeX and RTF). + +INLINE_GROUPED_CLASSES = NO + +# When TYPEDEF_HIDES_STRUCT is enabled, a typedef of a struct, union, or enum +# is documented as struct, union, or enum with the name of the typedef. So +# typedef struct TypeS {} TypeT, will appear in the documentation as a struct +# with name TypeT. When disabled the typedef will appear as a member of a file, +# namespace, or class. And the struct will be named TypeS. This can typically +# be useful for C code in case the coding convention dictates that all compound +# types are typedef'ed and only the typedef is referenced, never the tag name. + +TYPEDEF_HIDES_STRUCT = NO + +# The SYMBOL_CACHE_SIZE determines the size of the internal cache use to +# determine which symbols to keep in memory and which to flush to disk. +# When the cache is full, less often used symbols will be written to disk. +# For small to medium size projects (<1000 input files) the default value is +# probably good enough. For larger projects a too small cache size can cause +# doxygen to be busy swapping symbols to and from disk most of the time +# causing a significant performance penalty. +# If the system has enough physical memory increasing the cache will improve the +# performance by keeping more symbols in memory. Note that the value works on +# a logarithmic scale so increasing the size by one will roughly double the +# memory usage. The cache size is given by this formula: +# 2^(16+SYMBOL_CACHE_SIZE). The valid range is 0..9, the default is 0, +# corresponding to a cache size of 2^16 = 65536 symbols + +SYMBOL_CACHE_SIZE = 0 + +#--------------------------------------------------------------------------- +# Build related configuration options +#--------------------------------------------------------------------------- + +# If the EXTRACT_ALL tag is set to YES doxygen will assume all entities in +# documentation are documented, even if no documentation was available. +# Private class members and static file members will be hidden unless +# the EXTRACT_PRIVATE and EXTRACT_STATIC tags are set to YES + +EXTRACT_ALL = NO + +# If the EXTRACT_PRIVATE tag is set to YES all private members of a class +# will be included in the documentation. + +EXTRACT_PRIVATE = NO + +# If the EXTRACT_STATIC tag is set to YES all static members of a file +# will be included in the documentation. + +EXTRACT_STATIC = NO + +# If the EXTRACT_LOCAL_CLASSES tag is set to YES classes (and structs) +# defined locally in source files will be included in the documentation. +# If set to NO only classes defined in header files are included. + +EXTRACT_LOCAL_CLASSES = YES + +# This flag is only useful for Objective-C code. When set to YES local +# methods, which are defined in the implementation section but not in +# the interface are included in the documentation. +# If set to NO (the default) only methods in the interface are included. + +EXTRACT_LOCAL_METHODS = NO + +# If this flag is set to YES, the members of anonymous namespaces will be +# extracted and appear in the documentation as a namespace called +# 'anonymous_namespace{file}', where file will be replaced with the base +# name of the file that contains the anonymous namespace. By default +# anonymous namespaces are hidden. + +EXTRACT_ANON_NSPACES = YES + +# If the HIDE_UNDOC_MEMBERS tag is set to YES, Doxygen will hide all +# undocumented members of documented classes, files or namespaces. +# If set to NO (the default) these members will be included in the +# various overviews, but no documentation section is generated. +# This option has no effect if EXTRACT_ALL is enabled. + +HIDE_UNDOC_MEMBERS = NO + +# If the HIDE_UNDOC_CLASSES tag is set to YES, Doxygen will hide all +# undocumented classes that are normally visible in the class hierarchy. +# If set to NO (the default) these classes will be included in the various +# overviews. This option has no effect if EXTRACT_ALL is enabled. + +HIDE_UNDOC_CLASSES = NO + +# If the HIDE_FRIEND_COMPOUNDS tag is set to YES, Doxygen will hide all +# friend (class|struct|union) declarations. +# If set to NO (the default) these declarations will be included in the +# documentation. + +HIDE_FRIEND_COMPOUNDS = NO + +# If the HIDE_IN_BODY_DOCS tag is set to YES, Doxygen will hide any +# documentation blocks found inside the body of a function. +# If set to NO (the default) these blocks will be appended to the +# function's detailed documentation block. + +HIDE_IN_BODY_DOCS = NO + +# The INTERNAL_DOCS tag determines if documentation +# that is typed after a \internal command is included. If the tag is set +# to NO (the default) then the documentation will be excluded. +# Set it to YES to include the internal documentation. + +INTERNAL_DOCS = NO + +# If the CASE_SENSE_NAMES tag is set to NO then Doxygen will only generate +# file names in lower-case letters. If set to YES upper-case letters are also +# allowed. This is useful if you have classes or files whose names only differ +# in case and if your file system supports case sensitive file names. Windows +# and Mac users are advised to set this option to NO. + +CASE_SENSE_NAMES = YES + +# If the HIDE_SCOPE_NAMES tag is set to NO (the default) then Doxygen +# will show members with their full class and namespace scopes in the +# documentation. If set to YES the scope will be hidden. + +HIDE_SCOPE_NAMES = NO + +# If the SHOW_INCLUDE_FILES tag is set to YES (the default) then Doxygen +# will put a list of the files that are included by a file in the documentation +# of that file. + +SHOW_INCLUDE_FILES = YES + +# If the FORCE_LOCAL_INCLUDES tag is set to YES then Doxygen +# will list include files with double quotes in the documentation +# rather than with sharp brackets. + +FORCE_LOCAL_INCLUDES = NO + +# If the INLINE_INFO tag is set to YES (the default) then a tag [inline] +# is inserted in the documentation for inline members. + +INLINE_INFO = YES + +# If the SORT_MEMBER_DOCS tag is set to YES (the default) then doxygen +# will sort the (detailed) documentation of file and class members +# alphabetically by member name. If set to NO the members will appear in +# declaration order. + +SORT_MEMBER_DOCS = YES + +# If the SORT_BRIEF_DOCS tag is set to YES then doxygen will sort the +# brief documentation of file, namespace and class members alphabetically +# by member name. If set to NO (the default) the members will appear in +# declaration order. + +SORT_BRIEF_DOCS = NO + +# If the SORT_MEMBERS_CTORS_1ST tag is set to YES then doxygen +# will sort the (brief and detailed) documentation of class members so that +# constructors and destructors are listed first. If set to NO (the default) +# the constructors will appear in the respective orders defined by +# SORT_MEMBER_DOCS and SORT_BRIEF_DOCS. +# This tag will be ignored for brief docs if SORT_BRIEF_DOCS is set to NO +# and ignored for detailed docs if SORT_MEMBER_DOCS is set to NO. + +SORT_MEMBERS_CTORS_1ST = NO + +# If the SORT_GROUP_NAMES tag is set to YES then doxygen will sort the +# hierarchy of group names into alphabetical order. If set to NO (the default) +# the group names will appear in their defined order. + +SORT_GROUP_NAMES = NO + +# If the SORT_BY_SCOPE_NAME tag is set to YES, the class list will be +# sorted by fully-qualified names, including namespaces. If set to +# NO (the default), the class list will be sorted only by class name, +# not including the namespace part. +# Note: This option is not very useful if HIDE_SCOPE_NAMES is set to YES. +# Note: This option applies only to the class list, not to the +# alphabetical list. + +SORT_BY_SCOPE_NAME = NO + +# If the STRICT_PROTO_MATCHING option is enabled and doxygen fails to +# do proper type resolution of all parameters of a function it will reject a +# match between the prototype and the implementation of a member function even +# if there is only one candidate or it is obvious which candidate to choose +# by doing a simple string match. By disabling STRICT_PROTO_MATCHING doxygen +# will still accept a match between prototype and implementation in such cases. + +STRICT_PROTO_MATCHING = NO + +# The GENERATE_TODOLIST tag can be used to enable (YES) or +# disable (NO) the todo list. This list is created by putting \todo +# commands in the documentation. + +GENERATE_TODOLIST = YES + +# The GENERATE_TESTLIST tag can be used to enable (YES) or +# disable (NO) the test list. This list is created by putting \test +# commands in the documentation. + +GENERATE_TESTLIST = YES + +# The GENERATE_BUGLIST tag can be used to enable (YES) or +# disable (NO) the bug list. This list is created by putting \bug +# commands in the documentation. + +GENERATE_BUGLIST = YES + +# The GENERATE_DEPRECATEDLIST tag can be used to enable (YES) or +# disable (NO) the deprecated list. This list is created by putting +# \deprecated commands in the documentation. + +GENERATE_DEPRECATEDLIST= YES + +# The ENABLED_SECTIONS tag can be used to enable conditional +# documentation sections, marked by \if sectionname ... \endif. + +ENABLED_SECTIONS = + +# The MAX_INITIALIZER_LINES tag determines the maximum number of lines +# the initial value of a variable or macro consists of for it to appear in +# the documentation. If the initializer consists of more lines than specified +# here it will be hidden. Use a value of 0 to hide initializers completely. +# The appearance of the initializer of individual variables and macros in the +# documentation can be controlled using \showinitializer or \hideinitializer +# command in the documentation regardless of this setting. + +MAX_INITIALIZER_LINES = 30 + +# Set the SHOW_USED_FILES tag to NO to disable the list of files generated +# at the bottom of the documentation of classes and structs. If set to YES the +# list will mention the files that were used to generate the documentation. + +SHOW_USED_FILES = YES + +# If the sources in your project are distributed over multiple directories +# then setting the SHOW_DIRECTORIES tag to YES will show the directory hierarchy +# in the documentation. The default is NO. + +SHOW_DIRECTORIES = NO + +# Set the SHOW_FILES tag to NO to disable the generation of the Files page. +# This will remove the Files entry from the Quick Index and from the +# Folder Tree View (if specified). The default is YES. + +SHOW_FILES = YES + +# Set the SHOW_NAMESPACES tag to NO to disable the generation of the +# Namespaces page. +# This will remove the Namespaces entry from the Quick Index +# and from the Folder Tree View (if specified). The default is YES. + +SHOW_NAMESPACES = YES + +# The FILE_VERSION_FILTER tag can be used to specify a program or script that +# doxygen should invoke to get the current version for each file (typically from +# the version control system). Doxygen will invoke the program by executing (via +# popen()) the command <command> <input-file>, where <command> is the value of +# the FILE_VERSION_FILTER tag, and <input-file> is the name of an input file +# provided by doxygen. Whatever the program writes to standard output +# is used as the file version. See the manual for examples. + +FILE_VERSION_FILTER = + +# The LAYOUT_FILE tag can be used to specify a layout file which will be parsed +# by doxygen. The layout file controls the global structure of the generated +# output files in an output format independent way. The create the layout file +# that represents doxygen's defaults, run doxygen with the -l option. +# You can optionally specify a file name after the option, if omitted +# DoxygenLayout.xml will be used as the name of the layout file. + +LAYOUT_FILE = + +#--------------------------------------------------------------------------- +# configuration options related to warning and progress messages +#--------------------------------------------------------------------------- + +# The QUIET tag can be used to turn on/off the messages that are generated +# by doxygen. Possible values are YES and NO. If left blank NO is used. + +QUIET = NO + +# The WARNINGS tag can be used to turn on/off the warning messages that are +# generated by doxygen. Possible values are YES and NO. If left blank +# NO is used. + +WARNINGS = YES + +# If WARN_IF_UNDOCUMENTED is set to YES, then doxygen will generate warnings +# for undocumented members. If EXTRACT_ALL is set to YES then this flag will +# automatically be disabled. + +WARN_IF_UNDOCUMENTED = NO + +# If WARN_IF_DOC_ERROR is set to YES, doxygen will generate warnings for +# potential errors in the documentation, such as not documenting some +# parameters in a documented function, or documenting parameters that +# don't exist or using markup commands wrongly. + +WARN_IF_DOC_ERROR = YES + +# The WARN_NO_PARAMDOC option can be enabled to get warnings for +# functions that are documented, but have no documentation for their parameters +# or return value. If set to NO (the default) doxygen will only warn about +# wrong or incomplete parameter documentation, but not about the absence of +# documentation. + +WARN_NO_PARAMDOC = NO + +# The WARN_FORMAT tag determines the format of the warning messages that +# doxygen can produce. The string should contain the $file, $line, and $text +# tags, which will be replaced by the file and line number from which the +# warning originated and the warning text. Optionally the format may contain +# $version, which will be replaced by the version of the file (if it could +# be obtained via FILE_VERSION_FILTER) + +WARN_FORMAT = "$file:$line: $text" + +# The WARN_LOGFILE tag can be used to specify a file to which warning +# and error messages should be written. If left blank the output is written +# to stderr. + +WARN_LOGFILE = + +#--------------------------------------------------------------------------- +# configuration options related to the input files +#--------------------------------------------------------------------------- + +# The INPUT tag can be used to specify the files and/or directories that contain +# documented source files. You may enter file names like "myfile.cpp" or +# directories like "/usr/src/myproject". Separate the files or directories +# with spaces. + +INPUT = api/ core/ + +# This tag can be used to specify the character encoding of the source files +# that doxygen parses. Internally doxygen uses the UTF-8 encoding, which is +# also the default input encoding. Doxygen uses libiconv (or the iconv built +# into libc) for the transcoding. See http://www.gnu.org/software/libiconv for +# the list of possible encodings. + +INPUT_ENCODING = UTF-8 + +# If the value of the INPUT tag contains directories, you can use the +# FILE_PATTERNS tag to specify one or more wildcard pattern (like *.cpp +# and *.h) to filter out the source-files in the directories. If left +# blank the following patterns are tested: +# *.c *.cc *.cxx *.cpp *.c++ *.d *.java *.ii *.ixx *.ipp *.i++ *.inl *.h *.hh +# *.hxx *.hpp *.h++ *.idl *.odl *.cs *.php *.php3 *.inc *.m *.mm *.dox *.py +# *.f90 *.f *.for *.vhd *.vhdl + +FILE_PATTERNS = + +# The RECURSIVE tag can be used to turn specify whether or not subdirectories +# should be searched for input files as well. Possible values are YES and NO. +# If left blank NO is used. + +RECURSIVE = NO + +# The EXCLUDE tag can be used to specify files and/or directories that should +# excluded from the INPUT source files. This way you can easily exclude a +# subdirectory from a directory tree whose root is specified with the INPUT tag. + +EXCLUDE = + +# The EXCLUDE_SYMLINKS tag can be used select whether or not files or +# directories that are symbolic links (a Unix file system feature) are excluded +# from the input. + +EXCLUDE_SYMLINKS = NO + +# If the value of the INPUT tag contains directories, you can use the +# EXCLUDE_PATTERNS tag to specify one or more wildcard patterns to exclude +# certain files from those directories. Note that the wildcards are matched +# against the file with absolute path, so to exclude all test directories +# for example use the pattern */test/* + +EXCLUDE_PATTERNS = + +# The EXCLUDE_SYMBOLS tag can be used to specify one or more symbol names +# (namespaces, classes, functions, etc.) that should be excluded from the +# output. The symbol name can be a fully qualified name, a word, or if the +# wildcard * is used, a substring. Examples: ANamespace, AClass, +# AClass::ANamespace, ANamespace::*Test + +EXCLUDE_SYMBOLS = + +# The EXAMPLE_PATH tag can be used to specify one or more files or +# directories that contain example code fragments that are included (see +# the \include command). + +EXAMPLE_PATH = + +# If the value of the EXAMPLE_PATH tag contains directories, you can use the +# EXAMPLE_PATTERNS tag to specify one or more wildcard pattern (like *.cpp +# and *.h) to filter out the source-files in the directories. If left +# blank all files are included. + +EXAMPLE_PATTERNS = + +# If the EXAMPLE_RECURSIVE tag is set to YES then subdirectories will be +# searched for input files to be used with the \include or \dontinclude +# commands irrespective of the value of the RECURSIVE tag. +# Possible values are YES and NO. If left blank NO is used. + +EXAMPLE_RECURSIVE = NO + +# The IMAGE_PATH tag can be used to specify one or more files or +# directories that contain image that are included in the documentation (see +# the \image command). + +IMAGE_PATH = + +# The INPUT_FILTER tag can be used to specify a program that doxygen should +# invoke to filter for each input file. Doxygen will invoke the filter program +# by executing (via popen()) the command <filter> <input-file>, where <filter> +# is the value of the INPUT_FILTER tag, and <input-file> is the name of an +# input file. Doxygen will then use the output that the filter program writes +# to standard output. +# If FILTER_PATTERNS is specified, this tag will be +# ignored. + +INPUT_FILTER = + +# The FILTER_PATTERNS tag can be used to specify filters on a per file pattern +# basis. +# Doxygen will compare the file name with each pattern and apply the +# filter if there is a match. +# The filters are a list of the form: +# pattern=filter (like *.cpp=my_cpp_filter). See INPUT_FILTER for further +# info on how filters are used. If FILTER_PATTERNS is empty or if +# non of the patterns match the file name, INPUT_FILTER is applied. + +FILTER_PATTERNS = + +# If the FILTER_SOURCE_FILES tag is set to YES, the input filter (if set using +# INPUT_FILTER) will be used to filter the input files when producing source +# files to browse (i.e. when SOURCE_BROWSER is set to YES). + +FILTER_SOURCE_FILES = NO + +# The FILTER_SOURCE_PATTERNS tag can be used to specify source filters per file +# pattern. A pattern will override the setting for FILTER_PATTERN (if any) +# and it is also possible to disable source filtering for a specific pattern +# using *.ext= (so without naming a filter). This option only has effect when +# FILTER_SOURCE_FILES is enabled. + +FILTER_SOURCE_PATTERNS = + +#--------------------------------------------------------------------------- +# configuration options related to source browsing +#--------------------------------------------------------------------------- + +# If the SOURCE_BROWSER tag is set to YES then a list of source files will +# be generated. Documented entities will be cross-referenced with these sources. +# Note: To get rid of all source code in the generated output, make sure also +# VERBATIM_HEADERS is set to NO. + +SOURCE_BROWSER = NO + +# Setting the INLINE_SOURCES tag to YES will include the body +# of functions and classes directly in the documentation. + +INLINE_SOURCES = NO + +# Setting the STRIP_CODE_COMMENTS tag to YES (the default) will instruct +# doxygen to hide any special comment blocks from generated source code +# fragments. Normal C and C++ comments will always remain visible. + +STRIP_CODE_COMMENTS = YES + +# If the REFERENCED_BY_RELATION tag is set to YES +# then for each documented function all documented +# functions referencing it will be listed. + +REFERENCED_BY_RELATION = NO + +# If the REFERENCES_RELATION tag is set to YES +# then for each documented function all documented entities +# called/used by that function will be listed. + +REFERENCES_RELATION = NO + +# If the REFERENCES_LINK_SOURCE tag is set to YES (the default) +# and SOURCE_BROWSER tag is set to YES, then the hyperlinks from +# functions in REFERENCES_RELATION and REFERENCED_BY_RELATION lists will +# link to the source code. +# Otherwise they will link to the documentation. + +REFERENCES_LINK_SOURCE = YES + +# If the USE_HTAGS tag is set to YES then the references to source code +# will point to the HTML generated by the htags(1) tool instead of doxygen +# built-in source browser. The htags tool is part of GNU's global source +# tagging system (see http://www.gnu.org/software/global/global.html). You +# will need version 4.8.6 or higher. + +USE_HTAGS = NO + +# If the VERBATIM_HEADERS tag is set to YES (the default) then Doxygen +# will generate a verbatim copy of the header file for each class for +# which an include is specified. Set to NO to disable this. + +VERBATIM_HEADERS = YES + +#--------------------------------------------------------------------------- +# configuration options related to the alphabetical class index +#--------------------------------------------------------------------------- + +# If the ALPHABETICAL_INDEX tag is set to YES, an alphabetical index +# of all compounds will be generated. Enable this if the project +# contains a lot of classes, structs, unions or interfaces. + +ALPHABETICAL_INDEX = YES + +# If the alphabetical index is enabled (see ALPHABETICAL_INDEX) then +# the COLS_IN_ALPHA_INDEX tag can be used to specify the number of columns +# in which this list will be split (can be a number in the range [1..20]) + +COLS_IN_ALPHA_INDEX = 5 + +# In case all classes in a project start with a common prefix, all +# classes will be put under the same header in the alphabetical index. +# The IGNORE_PREFIX tag can be used to specify one or more prefixes that +# should be ignored while generating the index headers. + +IGNORE_PREFIX = + +#--------------------------------------------------------------------------- +# configuration options related to the HTML output +#--------------------------------------------------------------------------- + +# If the GENERATE_HTML tag is set to YES (the default) Doxygen will +# generate HTML output. + +GENERATE_HTML = YES + +# The HTML_OUTPUT tag is used to specify where the HTML docs will be put. +# If a relative path is entered the value of OUTPUT_DIRECTORY will be +# put in front of it. If left blank `html' will be used as the default path. + +HTML_OUTPUT = html + +# The HTML_FILE_EXTENSION tag can be used to specify the file extension for +# each generated HTML page (for example: .htm,.php,.asp). If it is left blank +# doxygen will generate files with .html extension. + +HTML_FILE_EXTENSION = .html + +# The HTML_HEADER tag can be used to specify a personal HTML header for +# each generated HTML page. If it is left blank doxygen will generate a +# standard header. Note that when using a custom header you are responsible +# for the proper inclusion of any scripts and style sheets that doxygen +# needs, which is dependent on the configuration options used. +# It is adviced to generate a default header using "doxygen -w html +# header.html footer.html stylesheet.css YourConfigFile" and then modify +# that header. Note that the header is subject to change so you typically +# have to redo this when upgrading to a newer version of doxygen or when changing the value of configuration settings such as GENERATE_TREEVIEW! + +HTML_HEADER = + +# The HTML_FOOTER tag can be used to specify a personal HTML footer for +# each generated HTML page. If it is left blank doxygen will generate a +# standard footer. + +HTML_FOOTER = + +# The HTML_STYLESHEET tag can be used to specify a user-defined cascading +# style sheet that is used by each HTML page. It can be used to +# fine-tune the look of the HTML output. If the tag is left blank doxygen +# will generate a default style sheet. Note that doxygen will try to copy +# the style sheet file to the HTML output directory, so don't put your own +# stylesheet in the HTML output directory as well, or it will be erased! + +HTML_STYLESHEET = + +# The HTML_EXTRA_FILES tag can be used to specify one or more extra images or +# other source files which should be copied to the HTML output directory. Note +# that these files will be copied to the base HTML output directory. Use the +# $relpath$ marker in the HTML_HEADER and/or HTML_FOOTER files to load these +# files. In the HTML_STYLESHEET file, use the file name only. Also note that +# the files will be copied as-is; there are no commands or markers available. + +HTML_EXTRA_FILES = + +# The HTML_COLORSTYLE_HUE tag controls the color of the HTML output. +# Doxygen will adjust the colors in the stylesheet and background images +# according to this color. Hue is specified as an angle on a colorwheel, +# see http://en.wikipedia.org/wiki/Hue for more information. +# For instance the value 0 represents red, 60 is yellow, 120 is green, +# 180 is cyan, 240 is blue, 300 purple, and 360 is red again. +# The allowed range is 0 to 359. + +HTML_COLORSTYLE_HUE = 220 + +# The HTML_COLORSTYLE_SAT tag controls the purity (or saturation) of +# the colors in the HTML output. For a value of 0 the output will use +# grayscales only. A value of 255 will produce the most vivid colors. + +HTML_COLORSTYLE_SAT = 100 + +# The HTML_COLORSTYLE_GAMMA tag controls the gamma correction applied to +# the luminance component of the colors in the HTML output. Values below +# 100 gradually make the output lighter, whereas values above 100 make +# the output darker. The value divided by 100 is the actual gamma applied, +# so 80 represents a gamma of 0.8, The value 220 represents a gamma of 2.2, +# and 100 does not change the gamma. + +HTML_COLORSTYLE_GAMMA = 80 + +# If the HTML_TIMESTAMP tag is set to YES then the footer of each generated HTML +# page will contain the date and time when the page was generated. Setting +# this to NO can help when comparing the output of multiple runs. + +HTML_TIMESTAMP = YES + +# If the HTML_ALIGN_MEMBERS tag is set to YES, the members of classes, +# files or namespaces will be aligned in HTML using tables. If set to +# NO a bullet list will be used. + +HTML_ALIGN_MEMBERS = YES + +# If the HTML_DYNAMIC_SECTIONS tag is set to YES then the generated HTML +# documentation will contain sections that can be hidden and shown after the +# page has loaded. For this to work a browser that supports +# JavaScript and DHTML is required (for instance Mozilla 1.0+, Firefox +# Netscape 6.0+, Internet explorer 5.0+, Konqueror, or Safari). + +HTML_DYNAMIC_SECTIONS = NO + +# If the GENERATE_DOCSET tag is set to YES, additional index files +# will be generated that can be used as input for Apple's Xcode 3 +# integrated development environment, introduced with OSX 10.5 (Leopard). +# To create a documentation set, doxygen will generate a Makefile in the +# HTML output directory. Running make will produce the docset in that +# directory and running "make install" will install the docset in +# ~/Library/Developer/Shared/Documentation/DocSets so that Xcode will find +# it at startup. +# See http://developer.apple.com/tools/creatingdocsetswithdoxygen.html +# for more information. + +GENERATE_DOCSET = NO + +# When GENERATE_DOCSET tag is set to YES, this tag determines the name of the +# feed. A documentation feed provides an umbrella under which multiple +# documentation sets from a single provider (such as a company or product suite) +# can be grouped. + +DOCSET_FEEDNAME = "Doxygen generated docs" + +# When GENERATE_DOCSET tag is set to YES, this tag specifies a string that +# should uniquely identify the documentation set bundle. This should be a +# reverse domain-name style string, e.g. com.mycompany.MyDocSet. Doxygen +# will append .docset to the name. + +DOCSET_BUNDLE_ID = org.doxygen.Project + +# When GENERATE_PUBLISHER_ID tag specifies a string that should uniquely identify +# the documentation publisher. This should be a reverse domain-name style +# string, e.g. com.mycompany.MyDocSet.documentation. + +DOCSET_PUBLISHER_ID = org.doxygen.Publisher + +# The GENERATE_PUBLISHER_NAME tag identifies the documentation publisher. + +DOCSET_PUBLISHER_NAME = Publisher + +# If the GENERATE_HTMLHELP tag is set to YES, additional index files +# will be generated that can be used as input for tools like the +# Microsoft HTML help workshop to generate a compiled HTML help file (.chm) +# of the generated HTML documentation. + +GENERATE_HTMLHELP = NO + +# If the GENERATE_HTMLHELP tag is set to YES, the CHM_FILE tag can +# be used to specify the file name of the resulting .chm file. You +# can add a path in front of the file if the result should not be +# written to the html output directory. + +CHM_FILE = + +# If the GENERATE_HTMLHELP tag is set to YES, the HHC_LOCATION tag can +# be used to specify the location (absolute path including file name) of +# the HTML help compiler (hhc.exe). If non-empty doxygen will try to run +# the HTML help compiler on the generated index.hhp. + +HHC_LOCATION = + +# If the GENERATE_HTMLHELP tag is set to YES, the GENERATE_CHI flag +# controls if a separate .chi index file is generated (YES) or that +# it should be included in the master .chm file (NO). + +GENERATE_CHI = NO + +# If the GENERATE_HTMLHELP tag is set to YES, the CHM_INDEX_ENCODING +# is used to encode HtmlHelp index (hhk), content (hhc) and project file +# content. + +CHM_INDEX_ENCODING = + +# If the GENERATE_HTMLHELP tag is set to YES, the BINARY_TOC flag +# controls whether a binary table of contents is generated (YES) or a +# normal table of contents (NO) in the .chm file. + +BINARY_TOC = NO + +# The TOC_EXPAND flag can be set to YES to add extra items for group members +# to the contents of the HTML help documentation and to the tree view. + +TOC_EXPAND = NO + +# If the GENERATE_QHP tag is set to YES and both QHP_NAMESPACE and +# QHP_VIRTUAL_FOLDER are set, an additional index file will be generated +# that can be used as input for Qt's qhelpgenerator to generate a +# Qt Compressed Help (.qch) of the generated HTML documentation. + +GENERATE_QHP = NO + +# If the QHG_LOCATION tag is specified, the QCH_FILE tag can +# be used to specify the file name of the resulting .qch file. +# The path specified is relative to the HTML output folder. + +QCH_FILE = + +# The QHP_NAMESPACE tag specifies the namespace to use when generating +# Qt Help Project output. For more information please see +# http://doc.trolltech.com/qthelpproject.html#namespace + +QHP_NAMESPACE = org.doxygen.Project + +# The QHP_VIRTUAL_FOLDER tag specifies the namespace to use when generating +# Qt Help Project output. For more information please see +# http://doc.trolltech.com/qthelpproject.html#virtual-folders + +QHP_VIRTUAL_FOLDER = doc + +# If QHP_CUST_FILTER_NAME is set, it specifies the name of a custom filter to +# add. For more information please see +# http://doc.trolltech.com/qthelpproject.html#custom-filters + +QHP_CUST_FILTER_NAME = + +# The QHP_CUST_FILT_ATTRS tag specifies the list of the attributes of the +# custom filter to add. For more information please see +# <a href="http://doc.trolltech.com/qthelpproject.html#custom-filters"> +# Qt Help Project / Custom Filters</a>. + +QHP_CUST_FILTER_ATTRS = + +# The QHP_SECT_FILTER_ATTRS tag specifies the list of the attributes this +# project's +# filter section matches. +# <a href="http://doc.trolltech.com/qthelpproject.html#filter-attributes"> +# Qt Help Project / Filter Attributes</a>. + +QHP_SECT_FILTER_ATTRS = + +# If the GENERATE_QHP tag is set to YES, the QHG_LOCATION tag can +# be used to specify the location of Qt's qhelpgenerator. +# If non-empty doxygen will try to run qhelpgenerator on the generated +# .qhp file. + +QHG_LOCATION = + +# If the GENERATE_ECLIPSEHELP tag is set to YES, additional index files +# will be generated, which together with the HTML files, form an Eclipse help +# plugin. To install this plugin and make it available under the help contents +# menu in Eclipse, the contents of the directory containing the HTML and XML +# files needs to be copied into the plugins directory of eclipse. The name of +# the directory within the plugins directory should be the same as +# the ECLIPSE_DOC_ID value. After copying Eclipse needs to be restarted before +# the help appears. + +GENERATE_ECLIPSEHELP = NO + +# A unique identifier for the eclipse help plugin. When installing the plugin +# the directory name containing the HTML and XML files should also have +# this name. + +ECLIPSE_DOC_ID = org.doxygen.Project + +# The DISABLE_INDEX tag can be used to turn on/off the condensed index at +# top of each HTML page. The value NO (the default) enables the index and +# the value YES disables it. + +DISABLE_INDEX = NO + +# The ENUM_VALUES_PER_LINE tag can be used to set the number of enum values +# (range [0,1..20]) that doxygen will group on one line in the generated HTML +# documentation. Note that a value of 0 will completely suppress the enum +# values from appearing in the overview section. + +ENUM_VALUES_PER_LINE = 4 + +# The GENERATE_TREEVIEW tag is used to specify whether a tree-like index +# structure should be generated to display hierarchical information. +# If the tag value is set to YES, a side panel will be generated +# containing a tree-like index structure (just like the one that +# is generated for HTML Help). For this to work a browser that supports +# JavaScript, DHTML, CSS and frames is required (i.e. any modern browser). +# Windows users are probably better off using the HTML help feature. + +GENERATE_TREEVIEW = NO + +# By enabling USE_INLINE_TREES, doxygen will generate the Groups, Directories, +# and Class Hierarchy pages using a tree view instead of an ordered list. + +USE_INLINE_TREES = NO + +# If the treeview is enabled (see GENERATE_TREEVIEW) then this tag can be +# used to set the initial width (in pixels) of the frame in which the tree +# is shown. + +TREEVIEW_WIDTH = 250 + +# When the EXT_LINKS_IN_WINDOW option is set to YES doxygen will open +# links to external symbols imported via tag files in a separate window. + +EXT_LINKS_IN_WINDOW = NO + +# Use this tag to change the font size of Latex formulas included +# as images in the HTML documentation. The default is 10. Note that +# when you change the font size after a successful doxygen run you need +# to manually remove any form_*.png images from the HTML output directory +# to force them to be regenerated. + +FORMULA_FONTSIZE = 10 + +# Use the FORMULA_TRANPARENT tag to determine whether or not the images +# generated for formulas are transparent PNGs. Transparent PNGs are +# not supported properly for IE 6.0, but are supported on all modern browsers. +# Note that when changing this option you need to delete any form_*.png files +# in the HTML output before the changes have effect. + +FORMULA_TRANSPARENT = YES + +# Enable the USE_MATHJAX option to render LaTeX formulas using MathJax +# (see http://www.mathjax.org) which uses client side Javascript for the +# rendering instead of using prerendered bitmaps. Use this if you do not +# have LaTeX installed or if you want to formulas look prettier in the HTML +# output. When enabled you also need to install MathJax separately and +# configure the path to it using the MATHJAX_RELPATH option. + +USE_MATHJAX = NO + +# When MathJax is enabled you need to specify the location relative to the +# HTML output directory using the MATHJAX_RELPATH option. The destination +# directory should contain the MathJax.js script. For instance, if the mathjax +# directory is located at the same level as the HTML output directory, then +# MATHJAX_RELPATH should be ../mathjax. The default value points to the +# mathjax.org site, so you can quickly see the result without installing +# MathJax, but it is strongly recommended to install a local copy of MathJax +# before deployment. + +MATHJAX_RELPATH = http://www.mathjax.org/mathjax + +# When the SEARCHENGINE tag is enabled doxygen will generate a search box +# for the HTML output. The underlying search engine uses javascript +# and DHTML and should work on any modern browser. Note that when using +# HTML help (GENERATE_HTMLHELP), Qt help (GENERATE_QHP), or docsets +# (GENERATE_DOCSET) there is already a search function so this one should +# typically be disabled. For large projects the javascript based search engine +# can be slow, then enabling SERVER_BASED_SEARCH may provide a better solution. + +SEARCHENGINE = YES + +# When the SERVER_BASED_SEARCH tag is enabled the search engine will be +# implemented using a PHP enabled web server instead of at the web client +# using Javascript. Doxygen will generate the search PHP script and index +# file to put on the web server. The advantage of the server +# based approach is that it scales better to large projects and allows +# full text search. The disadvantages are that it is more difficult to setup +# and does not have live searching capabilities. + +SERVER_BASED_SEARCH = NO + +#--------------------------------------------------------------------------- +# configuration options related to the LaTeX output +#--------------------------------------------------------------------------- + +# If the GENERATE_LATEX tag is set to YES (the default) Doxygen will +# generate Latex output. + +GENERATE_LATEX = YES + +# The LATEX_OUTPUT tag is used to specify where the LaTeX docs will be put. +# If a relative path is entered the value of OUTPUT_DIRECTORY will be +# put in front of it. If left blank `latex' will be used as the default path. + +LATEX_OUTPUT = latex + +# The LATEX_CMD_NAME tag can be used to specify the LaTeX command name to be +# invoked. If left blank `latex' will be used as the default command name. +# Note that when enabling USE_PDFLATEX this option is only used for +# generating bitmaps for formulas in the HTML output, but not in the +# Makefile that is written to the output directory. + +LATEX_CMD_NAME = latex + +# The MAKEINDEX_CMD_NAME tag can be used to specify the command name to +# generate index for LaTeX. If left blank `makeindex' will be used as the +# default command name. + +MAKEINDEX_CMD_NAME = makeindex + +# If the COMPACT_LATEX tag is set to YES Doxygen generates more compact +# LaTeX documents. This may be useful for small projects and may help to +# save some trees in general. + +COMPACT_LATEX = NO + +# The PAPER_TYPE tag can be used to set the paper type that is used +# by the printer. Possible values are: a4, letter, legal and +# executive. If left blank a4wide will be used. + +PAPER_TYPE = a4 + +# The EXTRA_PACKAGES tag can be to specify one or more names of LaTeX +# packages that should be included in the LaTeX output. + +EXTRA_PACKAGES = + +# The LATEX_HEADER tag can be used to specify a personal LaTeX header for +# the generated latex document. The header should contain everything until +# the first chapter. If it is left blank doxygen will generate a +# standard header. Notice: only use this tag if you know what you are doing! + +LATEX_HEADER = + +# The LATEX_FOOTER tag can be used to specify a personal LaTeX footer for +# the generated latex document. The footer should contain everything after +# the last chapter. If it is left blank doxygen will generate a +# standard footer. Notice: only use this tag if you know what you are doing! + +LATEX_FOOTER = + +# If the PDF_HYPERLINKS tag is set to YES, the LaTeX that is generated +# is prepared for conversion to pdf (using ps2pdf). The pdf file will +# contain links (just like the HTML output) instead of page references +# This makes the output suitable for online browsing using a pdf viewer. + +PDF_HYPERLINKS = YES + +# If the USE_PDFLATEX tag is set to YES, pdflatex will be used instead of +# plain latex in the generated Makefile. Set this option to YES to get a +# higher quality PDF documentation. + +USE_PDFLATEX = YES + +# If the LATEX_BATCHMODE tag is set to YES, doxygen will add the \\batchmode. +# command to the generated LaTeX files. This will instruct LaTeX to keep +# running if errors occur, instead of asking the user for help. +# This option is also used when generating formulas in HTML. + +LATEX_BATCHMODE = NO + +# If LATEX_HIDE_INDICES is set to YES then doxygen will not +# include the index chapters (such as File Index, Compound Index, etc.) +# in the output. + +LATEX_HIDE_INDICES = NO + +# If LATEX_SOURCE_CODE is set to YES then doxygen will include +# source code with syntax highlighting in the LaTeX output. +# Note that which sources are shown also depends on other settings +# such as SOURCE_BROWSER. + +LATEX_SOURCE_CODE = NO + +#--------------------------------------------------------------------------- +# configuration options related to the RTF output +#--------------------------------------------------------------------------- + +# If the GENERATE_RTF tag is set to YES Doxygen will generate RTF output +# The RTF output is optimized for Word 97 and may not look very pretty with +# other RTF readers or editors. + +GENERATE_RTF = NO + +# The RTF_OUTPUT tag is used to specify where the RTF docs will be put. +# If a relative path is entered the value of OUTPUT_DIRECTORY will be +# put in front of it. If left blank `rtf' will be used as the default path. + +RTF_OUTPUT = rtf + +# If the COMPACT_RTF tag is set to YES Doxygen generates more compact +# RTF documents. This may be useful for small projects and may help to +# save some trees in general. + +COMPACT_RTF = NO + +# If the RTF_HYPERLINKS tag is set to YES, the RTF that is generated +# will contain hyperlink fields. The RTF file will +# contain links (just like the HTML output) instead of page references. +# This makes the output suitable for online browsing using WORD or other +# programs which support those fields. +# Note: wordpad (write) and others do not support links. + +RTF_HYPERLINKS = NO + +# Load stylesheet definitions from file. Syntax is similar to doxygen's +# config file, i.e. a series of assignments. You only have to provide +# replacements, missing definitions are set to their default value. + +RTF_STYLESHEET_FILE = + +# Set optional variables used in the generation of an rtf document. +# Syntax is similar to doxygen's config file. + +RTF_EXTENSIONS_FILE = + +#--------------------------------------------------------------------------- +# configuration options related to the man page output +#--------------------------------------------------------------------------- + +# If the GENERATE_MAN tag is set to YES (the default) Doxygen will +# generate man pages + +GENERATE_MAN = NO + +# The MAN_OUTPUT tag is used to specify where the man pages will be put. +# If a relative path is entered the value of OUTPUT_DIRECTORY will be +# put in front of it. If left blank `man' will be used as the default path. + +MAN_OUTPUT = man + +# The MAN_EXTENSION tag determines the extension that is added to +# the generated man pages (default is the subroutine's section .3) + +MAN_EXTENSION = .3 + +# If the MAN_LINKS tag is set to YES and Doxygen generates man output, +# then it will generate one additional man file for each entity +# documented in the real man page(s). These additional files +# only source the real man page, but without them the man command +# would be unable to find the correct page. The default is NO. + +MAN_LINKS = NO + +#--------------------------------------------------------------------------- +# configuration options related to the XML output +#--------------------------------------------------------------------------- + +# If the GENERATE_XML tag is set to YES Doxygen will +# generate an XML file that captures the structure of +# the code including all documentation. + +GENERATE_XML = NO + +# The XML_OUTPUT tag is used to specify where the XML pages will be put. +# If a relative path is entered the value of OUTPUT_DIRECTORY will be +# put in front of it. If left blank `xml' will be used as the default path. + +XML_OUTPUT = xml + +# The XML_SCHEMA tag can be used to specify an XML schema, +# which can be used by a validating XML parser to check the +# syntax of the XML files. + +XML_SCHEMA = + +# The XML_DTD tag can be used to specify an XML DTD, +# which can be used by a validating XML parser to check the +# syntax of the XML files. + +XML_DTD = + +# If the XML_PROGRAMLISTING tag is set to YES Doxygen will +# dump the program listings (including syntax highlighting +# and cross-referencing information) to the XML output. Note that +# enabling this will significantly increase the size of the XML output. + +XML_PROGRAMLISTING = YES + +#--------------------------------------------------------------------------- +# configuration options for the AutoGen Definitions output +#--------------------------------------------------------------------------- + +# If the GENERATE_AUTOGEN_DEF tag is set to YES Doxygen will +# generate an AutoGen Definitions (see autogen.sf.net) file +# that captures the structure of the code including all +# documentation. Note that this feature is still experimental +# and incomplete at the moment. + +GENERATE_AUTOGEN_DEF = NO + +#--------------------------------------------------------------------------- +# configuration options related to the Perl module output +#--------------------------------------------------------------------------- + +# If the GENERATE_PERLMOD tag is set to YES Doxygen will +# generate a Perl module file that captures the structure of +# the code including all documentation. Note that this +# feature is still experimental and incomplete at the +# moment. + +GENERATE_PERLMOD = NO + +# If the PERLMOD_LATEX tag is set to YES Doxygen will generate +# the necessary Makefile rules, Perl scripts and LaTeX code to be able +# to generate PDF and DVI output from the Perl module output. + +PERLMOD_LATEX = NO + +# If the PERLMOD_PRETTY tag is set to YES the Perl module output will be +# nicely formatted so it can be parsed by a human reader. +# This is useful +# if you want to understand what is going on. +# On the other hand, if this +# tag is set to NO the size of the Perl module output will be much smaller +# and Perl will parse it just the same. + +PERLMOD_PRETTY = YES + +# The names of the make variables in the generated doxyrules.make file +# are prefixed with the string contained in PERLMOD_MAKEVAR_PREFIX. +# This is useful so different doxyrules.make files included by the same +# Makefile don't overwrite each other's variables. + +PERLMOD_MAKEVAR_PREFIX = + +#--------------------------------------------------------------------------- +# Configuration options related to the preprocessor +#--------------------------------------------------------------------------- + +# If the ENABLE_PREPROCESSING tag is set to YES (the default) Doxygen will +# evaluate all C-preprocessor directives found in the sources and include +# files. + +ENABLE_PREPROCESSING = YES + +# If the MACRO_EXPANSION tag is set to YES Doxygen will expand all macro +# names in the source code. If set to NO (the default) only conditional +# compilation will be performed. Macro expansion can be done in a controlled +# way by setting EXPAND_ONLY_PREDEF to YES. + +MACRO_EXPANSION = NO + +# If the EXPAND_ONLY_PREDEF and MACRO_EXPANSION tags are both set to YES +# then the macro expansion is limited to the macros specified with the +# PREDEFINED and EXPAND_AS_DEFINED tags. + +EXPAND_ONLY_PREDEF = NO + +# If the SEARCH_INCLUDES tag is set to YES (the default) the includes files +# pointed to by INCLUDE_PATH will be searched when a #include is found. + +SEARCH_INCLUDES = YES + +# The INCLUDE_PATH tag can be used to specify one or more directories that +# contain include files that are not input files but should be processed by +# the preprocessor. + +INCLUDE_PATH = + +# You can use the INCLUDE_FILE_PATTERNS tag to specify one or more wildcard +# patterns (like *.h and *.hpp) to filter out the header-files in the +# directories. If left blank, the patterns specified with FILE_PATTERNS will +# be used. + +INCLUDE_FILE_PATTERNS = + +# The PREDEFINED tag can be used to specify one or more macro names that +# are defined before the preprocessor is started (similar to the -D option of +# gcc). The argument of the tag is a list of macros of the form: name +# or name=definition (no spaces). If the definition and the = are +# omitted =1 is assumed. To prevent a macro definition from being +# undefined via #undef or recursively expanded use the := operator +# instead of the = operator. + +PREDEFINED = + +# If the MACRO_EXPANSION and EXPAND_ONLY_PREDEF tags are set to YES then +# this tag can be used to specify a list of macro names that should be expanded. +# The macro definition that is found in the sources will be used. +# Use the PREDEFINED tag if you want to use a different macro definition that +# overrules the definition found in the source code. + +EXPAND_AS_DEFINED = + +# If the SKIP_FUNCTION_MACROS tag is set to YES (the default) then +# doxygen's preprocessor will remove all references to function-like macros +# that are alone on a line, have an all uppercase name, and do not end with a +# semicolon, because these will confuse the parser if not removed. + +SKIP_FUNCTION_MACROS = YES + +#--------------------------------------------------------------------------- +# Configuration::additions related to external references +#--------------------------------------------------------------------------- + +# The TAGFILES option can be used to specify one or more tagfiles. +# Optionally an initial location of the external documentation +# can be added for each tagfile. The format of a tag file without +# this location is as follows: +# +# TAGFILES = file1 file2 ... +# Adding location for the tag files is done as follows: +# +# TAGFILES = file1=loc1 "file2 = loc2" ... +# where "loc1" and "loc2" can be relative or absolute paths or +# URLs. If a location is present for each tag, the installdox tool +# does not have to be run to correct the links. +# Note that each tag file must have a unique name +# (where the name does NOT include the path) +# If a tag file is not located in the directory in which doxygen +# is run, you must also specify the path to the tagfile here. + +TAGFILES = + +# When a file name is specified after GENERATE_TAGFILE, doxygen will create +# a tag file that is based on the input files it reads. + +GENERATE_TAGFILE = + +# If the ALLEXTERNALS tag is set to YES all external classes will be listed +# in the class index. If set to NO only the inherited external classes +# will be listed. + +ALLEXTERNALS = NO + +# If the EXTERNAL_GROUPS tag is set to YES all external groups will be listed +# in the modules index. If set to NO, only the current project's groups will +# be listed. + +EXTERNAL_GROUPS = YES + +# The PERL_PATH should be the absolute path and name of the perl script +# interpreter (i.e. the result of `which perl'). + +PERL_PATH = /usr/bin/perl + +#--------------------------------------------------------------------------- +# Configuration options related to the dot tool +#--------------------------------------------------------------------------- + +# If the CLASS_DIAGRAMS tag is set to YES (the default) Doxygen will +# generate a inheritance diagram (in HTML, RTF and LaTeX) for classes with base +# or super classes. Setting the tag to NO turns the diagrams off. Note that +# this option also works with HAVE_DOT disabled, but it is recommended to +# install and use dot, since it yields more powerful graphs. + +CLASS_DIAGRAMS = YES + +# You can define message sequence charts within doxygen comments using the \msc +# command. Doxygen will then run the mscgen tool (see +# http://www.mcternan.me.uk/mscgen/) to produce the chart and insert it in the +# documentation. The MSCGEN_PATH tag allows you to specify the directory where +# the mscgen tool resides. If left empty the tool is assumed to be found in the +# default search path. + +MSCGEN_PATH = + +# If set to YES, the inheritance and collaboration graphs will hide +# inheritance and usage relations if the target is undocumented +# or is not a class. + +HIDE_UNDOC_RELATIONS = YES + +# If you set the HAVE_DOT tag to YES then doxygen will assume the dot tool is +# available from the path. This tool is part of Graphviz, a graph visualization +# toolkit from AT&T and Lucent Bell Labs. The other options in this section +# have no effect if this option is set to NO (the default) + +HAVE_DOT = NO + +# The DOT_NUM_THREADS specifies the number of dot invocations doxygen is +# allowed to run in parallel. When set to 0 (the default) doxygen will +# base this on the number of processors available in the system. You can set it +# explicitly to a value larger than 0 to get control over the balance +# between CPU load and processing speed. + +DOT_NUM_THREADS = 0 + +# By default doxygen will write a font called Helvetica to the output +# directory and reference it in all dot files that doxygen generates. +# When you want a differently looking font you can specify the font name +# using DOT_FONTNAME. You need to make sure dot is able to find the font, +# which can be done by putting it in a standard location or by setting the +# DOTFONTPATH environment variable or by setting DOT_FONTPATH to the directory +# containing the font. + +DOT_FONTNAME = Helvetica + +# The DOT_FONTSIZE tag can be used to set the size of the font of dot graphs. +# The default size is 10pt. + +DOT_FONTSIZE = 10 + +# By default doxygen will tell dot to use the output directory to look for the +# FreeSans.ttf font (which doxygen will put there itself). If you specify a +# different font using DOT_FONTNAME you can set the path where dot +# can find it using this tag. + +DOT_FONTPATH = + +# If the CLASS_GRAPH and HAVE_DOT tags are set to YES then doxygen +# will generate a graph for each documented class showing the direct and +# indirect inheritance relations. Setting this tag to YES will force the +# the CLASS_DIAGRAMS tag to NO. + +CLASS_GRAPH = YES + +# If the COLLABORATION_GRAPH and HAVE_DOT tags are set to YES then doxygen +# will generate a graph for each documented class showing the direct and +# indirect implementation dependencies (inheritance, containment, and +# class references variables) of the class with other documented classes. + +COLLABORATION_GRAPH = YES + +# If the GROUP_GRAPHS and HAVE_DOT tags are set to YES then doxygen +# will generate a graph for groups, showing the direct groups dependencies + +GROUP_GRAPHS = YES + +# If the UML_LOOK tag is set to YES doxygen will generate inheritance and +# collaboration diagrams in a style similar to the OMG's Unified Modeling +# Language. + +UML_LOOK = NO + +# If set to YES, the inheritance and collaboration graphs will show the +# relations between templates and their instances. + +TEMPLATE_RELATIONS = NO + +# If the ENABLE_PREPROCESSING, SEARCH_INCLUDES, INCLUDE_GRAPH, and HAVE_DOT +# tags are set to YES then doxygen will generate a graph for each documented +# file showing the direct and indirect include dependencies of the file with +# other documented files. + +INCLUDE_GRAPH = YES + +# If the ENABLE_PREPROCESSING, SEARCH_INCLUDES, INCLUDED_BY_GRAPH, and +# HAVE_DOT tags are set to YES then doxygen will generate a graph for each +# documented header file showing the documented files that directly or +# indirectly include this file. + +INCLUDED_BY_GRAPH = YES + +# If the CALL_GRAPH and HAVE_DOT options are set to YES then +# doxygen will generate a call dependency graph for every global function +# or class method. Note that enabling this option will significantly increase +# the time of a run. So in most cases it will be better to enable call graphs +# for selected functions only using the \callgraph command. + +CALL_GRAPH = NO + +# If the CALLER_GRAPH and HAVE_DOT tags are set to YES then +# doxygen will generate a caller dependency graph for every global function +# or class method. Note that enabling this option will significantly increase +# the time of a run. So in most cases it will be better to enable caller +# graphs for selected functions only using the \callergraph command. + +CALLER_GRAPH = NO + +# If the GRAPHICAL_HIERARCHY and HAVE_DOT tags are set to YES then doxygen +# will generate a graphical hierarchy of all classes instead of a textual one. + +GRAPHICAL_HIERARCHY = YES + +# If the DIRECTORY_GRAPH, SHOW_DIRECTORIES and HAVE_DOT tags are set to YES +# then doxygen will show the dependencies a directory has on other directories +# in a graphical way. The dependency relations are determined by the #include +# relations between the files in the directories. + +DIRECTORY_GRAPH = YES + +# The DOT_IMAGE_FORMAT tag can be used to set the image format of the images +# generated by dot. Possible values are svg, png, jpg, or gif. +# If left blank png will be used. + +DOT_IMAGE_FORMAT = png + +# The tag DOT_PATH can be used to specify the path where the dot tool can be +# found. If left blank, it is assumed the dot tool can be found in the path. + +DOT_PATH = + +# The DOTFILE_DIRS tag can be used to specify one or more directories that +# contain dot files that are included in the documentation (see the +# \dotfile command). + +DOTFILE_DIRS = + +# The MSCFILE_DIRS tag can be used to specify one or more directories that +# contain msc files that are included in the documentation (see the +# \mscfile command). + +MSCFILE_DIRS = + +# The DOT_GRAPH_MAX_NODES tag can be used to set the maximum number of +# nodes that will be shown in the graph. If the number of nodes in a graph +# becomes larger than this value, doxygen will truncate the graph, which is +# visualized by representing a node as a red box. Note that doxygen if the +# number of direct children of the root node in a graph is already larger than +# DOT_GRAPH_MAX_NODES then the graph will not be shown at all. Also note +# that the size of a graph can be further restricted by MAX_DOT_GRAPH_DEPTH. + +DOT_GRAPH_MAX_NODES = 50 + +# The MAX_DOT_GRAPH_DEPTH tag can be used to set the maximum depth of the +# graphs generated by dot. A depth value of 3 means that only nodes reachable +# from the root by following a path via at most 3 edges will be shown. Nodes +# that lay further from the root node will be omitted. Note that setting this +# option to 1 or 2 may greatly reduce the computation time needed for large +# code bases. Also note that the size of a graph can be further restricted by +# DOT_GRAPH_MAX_NODES. Using a depth of 0 means no depth restriction. + +MAX_DOT_GRAPH_DEPTH = 0 + +# Set the DOT_TRANSPARENT tag to YES to generate images with a transparent +# background. This is disabled by default, because dot on Windows does not +# seem to support this out of the box. Warning: Depending on the platform used, +# enabling this option may lead to badly anti-aliased labels on the edges of +# a graph (i.e. they become hard to read). + +DOT_TRANSPARENT = NO + +# Set the DOT_MULTI_TARGETS tag to YES allow dot to generate multiple output +# files in one run (i.e. multiple -o and -T options on the command line). This +# makes dot run faster, but since only newer versions of dot (>1.8.10) +# support this, this feature is disabled by default. + +DOT_MULTI_TARGETS = NO + +# If the GENERATE_LEGEND tag is set to YES (the default) Doxygen will +# generate a legend page explaining the meaning of the various boxes and +# arrows in the dot generated graphs. + +GENERATE_LEGEND = YES + +# If the DOT_CLEANUP tag is set to YES (the default) Doxygen will +# remove the intermediate dot files that are used to generate +# the various graphs. + +DOT_CLEANUP = YES diff --git a/src/gallium/state_trackers/clover/Makefile.am b/src/gallium/state_trackers/clover/Makefile.am new file mode 100644 index 00000000000..da9f3bb92da --- /dev/null +++ b/src/gallium/state_trackers/clover/Makefile.am @@ -0,0 +1,71 @@ +AUTOMAKE_OPTIONS = subdir-objects + +AM_CPPFLAGS = \ + $(GALLIUM_PIPE_LOADER_DEFINES) \ + -DMESA_VERSION=\"$(MESA_VERSION)\" \ + -DPIPE_SEARCH_DIR=\"$(OPENCL_LIB_INSTALL_DIR)\" \ + -I$(top_srcdir)/include \ + -I$(top_srcdir)/src/gallium/include \ + -I$(top_srcdir)/src/gallium/drivers \ + -I$(top_srcdir)/src/gallium/auxiliary \ + -I$(top_srcdir)/src/gallium/winsys \ + -I$(srcdir) + +noinst_LTLIBRARIES = libclover.la libcltgsi.la libclllvm.la + +libcltgsi_la_CXXFLAGS = \ + -std=c++0x + +libcltgsi_la_SOURCES = \ + tgsi/compiler.cpp + +libclllvm_la_CXXFLAGS = \ + -std=c++98 + +libclllvm_la_SOURCES = \ + llvm/invocation.cpp + +libclover_la_CXXFLAGS = \ + -std=c++0x + +libclover_la_LIBADD = \ + libcltgsi.la libclllvm.la + +libclover_la_SOURCES = \ + core/base.hpp \ + core/compat.hpp \ + core/compiler.hpp \ + core/geometry.hpp \ + core/device.hpp \ + core/device.cpp \ + core/context.hpp \ + core/context.cpp \ + core/queue.hpp \ + core/queue.cpp \ + core/format.hpp \ + core/format.cpp \ + core/memory.hpp \ + core/memory.cpp \ + core/resource.hpp \ + core/resource.cpp \ + core/sampler.hpp \ + core/sampler.cpp \ + core/event.hpp \ + core/event.cpp \ + core/program.hpp \ + core/program.cpp \ + core/kernel.hpp \ + core/kernel.cpp \ + core/module.hpp \ + core/module.cpp \ + api/util.hpp \ + api/platform.cpp \ + api/device.cpp \ + api/context.cpp \ + api/queue.cpp \ + api/memory.cpp \ + api/transfer.cpp \ + api/sampler.cpp \ + api/event.cpp \ + api/program.cpp \ + api/kernel.cpp diff --git a/src/gallium/state_trackers/clover/api/context.cpp b/src/gallium/state_trackers/clover/api/context.cpp new file mode 100644 index 00000000000..c8d668933e5 --- /dev/null +++ b/src/gallium/state_trackers/clover/api/context.cpp @@ -0,0 +1,120 @@ +// +// Copyright 2012 Francisco Jerez +// +// Permission is hereby granted, free of charge, to any person obtaining a +// copy of this software and associated documentation files (the "Software"), +// to deal in the Software without restriction, including without limitation +// the rights to use, copy, modify, merge, publish, distribute, sublicense, +// and/or sell copies of the Software, and to permit persons to whom the +// Software is furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +// THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF +// OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. +// + +#include "api/util.hpp" +#include "core/context.hpp" + +using namespace clover; + +PUBLIC cl_context +clCreateContext(const cl_context_properties *props, cl_uint num_devs, + const cl_device_id *devs, + void (CL_CALLBACK *pfn_notify)(const char *, const void *, + size_t, void *), + void *user_data, cl_int *errcode_ret) try { + auto mprops = property_map(props); + + if (!devs || !num_devs || + (!pfn_notify && user_data)) + throw error(CL_INVALID_VALUE); + + if (any_of(is_zero<cl_device_id>(), devs, devs + num_devs)) + throw error(CL_INVALID_DEVICE); + + for (auto p : mprops) { + if (!(p.first == CL_CONTEXT_PLATFORM && + (cl_platform_id)p.second == NULL)) + throw error(CL_INVALID_PROPERTY); + } + + ret_error(errcode_ret, CL_SUCCESS); + return new context( + property_vector(mprops), + std::vector<cl_device_id>(devs, devs + num_devs)); + +} catch(error &e) { + ret_error(errcode_ret, e); + return NULL; +} + +PUBLIC cl_context +clCreateContextFromType(const cl_context_properties *props, + cl_device_type type, + void (CL_CALLBACK *pfn_notify)( + const char *, const void *, size_t, void *), + void *user_data, cl_int *errcode_ret) { + cl_device_id dev; + cl_int ret; + + ret = clGetDeviceIDs(0, type, 1, &dev, 0); + if (ret) { + ret_error(errcode_ret, ret); + return NULL; + } + + return clCreateContext(props, 1, &dev, pfn_notify, user_data, errcode_ret); +} + +PUBLIC cl_int +clRetainContext(cl_context ctx) { + if (!ctx) + return CL_INVALID_CONTEXT; + + ctx->retain(); + return CL_SUCCESS; +} + +PUBLIC cl_int +clReleaseContext(cl_context ctx) { + if (!ctx) + return CL_INVALID_CONTEXT; + + if (ctx->release()) + delete ctx; + + return CL_SUCCESS; +} + +PUBLIC cl_int +clGetContextInfo(cl_context ctx, cl_context_info param, + size_t size, void *buf, size_t *size_ret) { + if (!ctx) + return CL_INVALID_CONTEXT; + + switch (param) { + case CL_CONTEXT_REFERENCE_COUNT: + return scalar_property<cl_uint>(buf, size, size_ret, ctx->ref_count()); + + case CL_CONTEXT_NUM_DEVICES: + return scalar_property<cl_uint>(buf, size, size_ret, ctx->devs.size()); + + case CL_CONTEXT_DEVICES: + return vector_property<cl_device_id>(buf, size, size_ret, ctx->devs); + + case CL_CONTEXT_PROPERTIES: + return vector_property<cl_context_properties>(buf, size, size_ret, + ctx->props()); + + default: + return CL_INVALID_VALUE; + } +} diff --git a/src/gallium/state_trackers/clover/api/device.cpp b/src/gallium/state_trackers/clover/api/device.cpp new file mode 100644 index 00000000000..03767519aaf --- /dev/null +++ b/src/gallium/state_trackers/clover/api/device.cpp @@ -0,0 +1,262 @@ +// +// Copyright 2012 Francisco Jerez +// +// Permission is hereby granted, free of charge, to any person obtaining a +// copy of this software and associated documentation files (the "Software"), +// to deal in the Software without restriction, including without limitation +// the rights to use, copy, modify, merge, publish, distribute, sublicense, +// and/or sell copies of the Software, and to permit persons to whom the +// Software is furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +// THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF +// OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. +// + +#include "api/util.hpp" +#include "core/device.hpp" + +using namespace clover; + +static device_registry registry; + +PUBLIC cl_int +clGetDeviceIDs(cl_platform_id platform, cl_device_type device_type, + cl_uint num_entries, cl_device_id *devices, + cl_uint *num_devices) { + std::vector<cl_device_id> devs; + + if (platform != NULL) + return CL_INVALID_PLATFORM; + + if ((!num_entries && devices) || + (!num_devices && !devices)) + return CL_INVALID_VALUE; + + // Collect matching devices + for (device &dev : registry) { + if (((device_type & CL_DEVICE_TYPE_DEFAULT) && + &dev == ®istry.front()) || + (device_type & dev.type())) + devs.push_back(&dev); + } + + if (devs.empty()) + return CL_DEVICE_NOT_FOUND; + + // ...and return the requested data. + if (num_devices) + *num_devices = devs.size(); + if (devices) + std::copy_n(devs.begin(), + std::min((cl_uint)devs.size(), num_entries), + devices); + + return CL_SUCCESS; +} + +PUBLIC cl_int +clGetDeviceInfo(cl_device_id dev, cl_device_info param, + size_t size, void *buf, size_t *size_ret) { + if (!dev) + return CL_INVALID_DEVICE; + + switch (param) { + case CL_DEVICE_TYPE: + return scalar_property<cl_device_type>(buf, size, size_ret, dev->type()); + + case CL_DEVICE_VENDOR_ID: + return scalar_property<cl_uint>(buf, size, size_ret, dev->vendor_id()); + + case CL_DEVICE_MAX_COMPUTE_UNITS: + return scalar_property<cl_uint>(buf, size, size_ret, 1); + + case CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS: + return scalar_property<cl_uint>(buf, size, size_ret, + dev->max_block_size().size()); + + case CL_DEVICE_MAX_WORK_ITEM_SIZES: + return vector_property<size_t>(buf, size, size_ret, + dev->max_block_size()); + + case CL_DEVICE_MAX_WORK_GROUP_SIZE: + return scalar_property<size_t>(buf, size, size_ret, SIZE_MAX); + + case CL_DEVICE_PREFERRED_VECTOR_WIDTH_CHAR: + return scalar_property<cl_uint>(buf, size, size_ret, 16); + + case CL_DEVICE_PREFERRED_VECTOR_WIDTH_SHORT: + return scalar_property<cl_uint>(buf, size, size_ret, 8); + + case CL_DEVICE_PREFERRED_VECTOR_WIDTH_INT: + return scalar_property<cl_uint>(buf, size, size_ret, 4); + + case CL_DEVICE_PREFERRED_VECTOR_WIDTH_LONG: + return scalar_property<cl_uint>(buf, size, size_ret, 2); + + case CL_DEVICE_PREFERRED_VECTOR_WIDTH_FLOAT: + return scalar_property<cl_uint>(buf, size, size_ret, 4); + + case CL_DEVICE_PREFERRED_VECTOR_WIDTH_DOUBLE: + return scalar_property<cl_uint>(buf, size, size_ret, 2); + + case CL_DEVICE_PREFERRED_VECTOR_WIDTH_HALF: + return scalar_property<cl_uint>(buf, size, size_ret, 0); + + case CL_DEVICE_MAX_CLOCK_FREQUENCY: + return scalar_property<cl_uint>(buf, size, size_ret, 0); + + case CL_DEVICE_ADDRESS_BITS: + return scalar_property<cl_uint>(buf, size, size_ret, 32); + + case CL_DEVICE_MAX_READ_IMAGE_ARGS: + return scalar_property<cl_uint>(buf, size, size_ret, + dev->max_images_read()); + + case CL_DEVICE_MAX_WRITE_IMAGE_ARGS: + return scalar_property<cl_uint>(buf, size, size_ret, + dev->max_images_write()); + + case CL_DEVICE_MAX_MEM_ALLOC_SIZE: + return scalar_property<cl_ulong>(buf, size, size_ret, 0); + + case CL_DEVICE_IMAGE2D_MAX_WIDTH: + case CL_DEVICE_IMAGE2D_MAX_HEIGHT: + return scalar_property<size_t>(buf, size, size_ret, + 1 << dev->max_image_levels_2d()); + + case CL_DEVICE_IMAGE3D_MAX_WIDTH: + case CL_DEVICE_IMAGE3D_MAX_HEIGHT: + case CL_DEVICE_IMAGE3D_MAX_DEPTH: + return scalar_property<size_t>(buf, size, size_ret, + 1 << dev->max_image_levels_3d()); + + case CL_DEVICE_IMAGE_SUPPORT: + return scalar_property<cl_bool>(buf, size, size_ret, CL_TRUE); + + case CL_DEVICE_MAX_PARAMETER_SIZE: + return scalar_property<size_t>(buf, size, size_ret, + dev->max_mem_input()); + + case CL_DEVICE_MAX_SAMPLERS: + return scalar_property<cl_uint>(buf, size, size_ret, + dev->max_samplers()); + + case CL_DEVICE_MEM_BASE_ADDR_ALIGN: + case CL_DEVICE_MIN_DATA_TYPE_ALIGN_SIZE: + return scalar_property<cl_uint>(buf, size, size_ret, 128); + + case CL_DEVICE_SINGLE_FP_CONFIG: + return scalar_property<cl_device_fp_config>(buf, size, size_ret, + CL_FP_DENORM | CL_FP_INF_NAN | CL_FP_ROUND_TO_NEAREST); + + case CL_DEVICE_GLOBAL_MEM_CACHE_TYPE: + return scalar_property<cl_device_mem_cache_type>(buf, size, size_ret, + CL_NONE); + + case CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE: + return scalar_property<cl_uint>(buf, size, size_ret, 0); + + case CL_DEVICE_GLOBAL_MEM_CACHE_SIZE: + return scalar_property<cl_ulong>(buf, size, size_ret, 0); + + case CL_DEVICE_GLOBAL_MEM_SIZE: + return scalar_property<cl_ulong>(buf, size, size_ret, + dev->max_mem_global()); + + case CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE: + return scalar_property<cl_ulong>(buf, size, size_ret, + dev->max_const_buffer_size()); + + case CL_DEVICE_MAX_CONSTANT_ARGS: + return scalar_property<cl_uint>(buf, size, size_ret, + dev->max_const_buffers()); + + case CL_DEVICE_LOCAL_MEM_TYPE: + return scalar_property<cl_device_local_mem_type>(buf, size, size_ret, + CL_LOCAL); + + case CL_DEVICE_LOCAL_MEM_SIZE: + return scalar_property<cl_ulong>(buf, size, size_ret, + dev->max_mem_local()); + + case CL_DEVICE_ERROR_CORRECTION_SUPPORT: + return scalar_property<cl_bool>(buf, size, size_ret, CL_FALSE); + + case CL_DEVICE_PROFILING_TIMER_RESOLUTION: + return scalar_property<size_t>(buf, size, size_ret, 0); + + case CL_DEVICE_ENDIAN_LITTLE: + return scalar_property<cl_bool>(buf, size, size_ret, CL_TRUE); + + case CL_DEVICE_AVAILABLE: + case CL_DEVICE_COMPILER_AVAILABLE: + return scalar_property<cl_bool>(buf, size, size_ret, CL_TRUE); + + case CL_DEVICE_EXECUTION_CAPABILITIES: + return scalar_property<cl_device_exec_capabilities>(buf, size, size_ret, + CL_EXEC_KERNEL); + + case CL_DEVICE_QUEUE_PROPERTIES: + return scalar_property<cl_command_queue_properties>(buf, size, size_ret, + CL_QUEUE_PROFILING_ENABLE); + + case CL_DEVICE_NAME: + return string_property(buf, size, size_ret, dev->device_name()); + + case CL_DEVICE_VENDOR: + return string_property(buf, size, size_ret, dev->vendor_name()); + + case CL_DRIVER_VERSION: + return string_property(buf, size, size_ret, MESA_VERSION); + + case CL_DEVICE_PROFILE: + return string_property(buf, size, size_ret, "FULL_PROFILE"); + + case CL_DEVICE_VERSION: + return string_property(buf, size, size_ret, "OpenCL 1.1 MESA " MESA_VERSION); + + case CL_DEVICE_EXTENSIONS: + return string_property(buf, size, size_ret, ""); + + case CL_DEVICE_PLATFORM: + return scalar_property<cl_platform_id>(buf, size, size_ret, NULL); + + case CL_DEVICE_HOST_UNIFIED_MEMORY: + return scalar_property<cl_bool>(buf, size, size_ret, CL_TRUE); + + case CL_DEVICE_NATIVE_VECTOR_WIDTH_CHAR: + return scalar_property<cl_uint>(buf, size, size_ret, 16); + + case CL_DEVICE_NATIVE_VECTOR_WIDTH_SHORT: + return scalar_property<cl_uint>(buf, size, size_ret, 8); + + case CL_DEVICE_NATIVE_VECTOR_WIDTH_INT: + return scalar_property<cl_uint>(buf, size, size_ret, 4); + + case CL_DEVICE_NATIVE_VECTOR_WIDTH_LONG: + return scalar_property<cl_uint>(buf, size, size_ret, 2); + + case CL_DEVICE_NATIVE_VECTOR_WIDTH_FLOAT: + return scalar_property<cl_uint>(buf, size, size_ret, 4); + + case CL_DEVICE_NATIVE_VECTOR_WIDTH_DOUBLE: + return scalar_property<cl_uint>(buf, size, size_ret, 2); + + case CL_DEVICE_NATIVE_VECTOR_WIDTH_HALF: + return scalar_property<cl_uint>(buf, size, size_ret, 0); + + case CL_DEVICE_OPENCL_C_VERSION: + return string_property(buf, size, size_ret, "OpenCL C 1.1"); + + default: + return CL_INVALID_VALUE; + } +} diff --git a/src/gallium/state_trackers/clover/api/event.cpp b/src/gallium/state_trackers/clover/api/event.cpp new file mode 100644 index 00000000000..d6c37f6aef2 --- /dev/null +++ b/src/gallium/state_trackers/clover/api/event.cpp @@ -0,0 +1,239 @@ +// +// Copyright 2012 Francisco Jerez +// +// Permission is hereby granted, free of charge, to any person obtaining a +// copy of this software and associated documentation files (the "Software"), +// to deal in the Software without restriction, including without limitation +// the rights to use, copy, modify, merge, publish, distribute, sublicense, +// and/or sell copies of the Software, and to permit persons to whom the +// Software is furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +// THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF +// OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. +// + +#include "api/util.hpp" +#include "core/event.hpp" + +using namespace clover; + +PUBLIC cl_event +clCreateUserEvent(cl_context ctx, cl_int *errcode_ret) try { + if (!ctx) + throw error(CL_INVALID_CONTEXT); + + ret_error(errcode_ret, CL_SUCCESS); + return new soft_event(*ctx, {}, false); + +} catch(error &e) { + ret_error(errcode_ret, e); + return NULL; +} + +PUBLIC cl_int +clSetUserEventStatus(cl_event ev, cl_int status) { + if (!dynamic_cast<soft_event *>(ev)) + return CL_INVALID_EVENT; + + if (status > 0) + return CL_INVALID_VALUE; + + if (ev->status() <= 0) + return CL_INVALID_OPERATION; + + if (status) + ev->abort(status); + else + ev->trigger(); + + return CL_SUCCESS; +} + +PUBLIC cl_int +clWaitForEvents(cl_uint num_evs, const cl_event *evs) try { + if (!num_evs || !evs) + throw error(CL_INVALID_VALUE); + + std::for_each(evs, evs + num_evs, [&](const cl_event ev) { + if (!ev) + throw error(CL_INVALID_EVENT); + + if (&ev->ctx != &evs[0]->ctx) + throw error(CL_INVALID_CONTEXT); + + if (ev->status() < 0) + throw error(CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST); + }); + + // Create a temporary soft event that depends on all the events in + // the wait list + ref_ptr<soft_event> sev = transfer( + new soft_event(evs[0]->ctx, { evs, evs + num_evs }, true)); + + // ...and wait on it. + sev->wait(); + + return CL_SUCCESS; + +} catch(error &e) { + return e.get(); +} + +PUBLIC cl_int +clGetEventInfo(cl_event ev, cl_event_info param, + size_t size, void *buf, size_t *size_ret) { + if (!ev) + return CL_INVALID_EVENT; + + switch (param) { + case CL_EVENT_COMMAND_QUEUE: + return scalar_property<cl_command_queue>(buf, size, size_ret, ev->queue()); + + case CL_EVENT_CONTEXT: + return scalar_property<cl_context>(buf, size, size_ret, &ev->ctx); + + case CL_EVENT_COMMAND_TYPE: + return scalar_property<cl_command_type>(buf, size, size_ret, ev->command()); + + case CL_EVENT_COMMAND_EXECUTION_STATUS: + return scalar_property<cl_int>(buf, size, size_ret, ev->status()); + + case CL_EVENT_REFERENCE_COUNT: + return scalar_property<cl_uint>(buf, size, size_ret, ev->ref_count()); + + default: + return CL_INVALID_VALUE; + } +} + +PUBLIC cl_int +clSetEventCallback(cl_event ev, cl_int type, + void (CL_CALLBACK *pfn_event_notify)(cl_event, cl_int, + void *), + void *user_data) try { + if (!ev) + throw error(CL_INVALID_EVENT); + + if (!pfn_event_notify || type != CL_COMPLETE) + throw error(CL_INVALID_VALUE); + + // Create a temporary soft event that depends on ev, with + // pfn_event_notify as completion action. + ref_ptr<soft_event> sev = transfer( + new soft_event(ev->ctx, { ev }, true, + [=](event &) { + ev->wait(); + pfn_event_notify(ev, ev->status(), user_data); + })); + + return CL_SUCCESS; + +} catch(error &e) { + return e.get(); +} + +PUBLIC cl_int +clRetainEvent(cl_event ev) { + if (!ev) + return CL_INVALID_EVENT; + + ev->retain(); + return CL_SUCCESS; +} + +PUBLIC cl_int +clReleaseEvent(cl_event ev) { + if (!ev) + return CL_INVALID_EVENT; + + if (ev->release()) + delete ev; + + return CL_SUCCESS; +} + +PUBLIC cl_int +clEnqueueMarker(cl_command_queue q, cl_event *ev) try { + if (!q) + throw error(CL_INVALID_COMMAND_QUEUE); + + if (!ev) + throw error(CL_INVALID_VALUE); + + *ev = new hard_event(*q, CL_COMMAND_MARKER, {}); + + return CL_SUCCESS; + +} catch(error &e) { + return e.get(); +} + +PUBLIC cl_int +clEnqueueBarrier(cl_command_queue q) { + if (!q) + return CL_INVALID_COMMAND_QUEUE; + + // No need to do anything, q preserves data ordering strictly. + return CL_SUCCESS; +} + +PUBLIC cl_int +clEnqueueWaitForEvents(cl_command_queue q, cl_uint num_evs, + const cl_event *evs) try { + if (!q) + throw error(CL_INVALID_COMMAND_QUEUE); + + if (!num_evs || !evs) + throw error(CL_INVALID_VALUE); + + std::for_each(evs, evs + num_evs, [&](const cl_event ev) { + if (!ev) + throw error(CL_INVALID_EVENT); + + if (&ev->ctx != &q->ctx) + throw error(CL_INVALID_CONTEXT); + }); + + // Create a hard event that depends on the events in the wait list: + // subsequent commands in the same queue will be implicitly + // serialized with respect to it -- hard events always are. + ref_ptr<hard_event> hev = transfer( + new hard_event(*q, 0, { evs, evs + num_evs })); + + return CL_SUCCESS; + +} catch(error &e) { + return e.get(); +} + +PUBLIC cl_int +clGetEventProfilingInfo(cl_event ev, cl_profiling_info param, + size_t size, void *buf, size_t *size_ret) { + return CL_PROFILING_INFO_NOT_AVAILABLE; +} + +PUBLIC cl_int +clFinish(cl_command_queue q) try { + if (!q) + throw error(CL_INVALID_COMMAND_QUEUE); + + // Create a temporary hard event -- it implicitly depends on all + // the previously queued hard events. + ref_ptr<hard_event> hev = transfer(new hard_event(*q, 0, { })); + + // And wait on it. + hev->wait(); + + return CL_SUCCESS; + +} catch(error &e) { + return e.get(); +} diff --git a/src/gallium/state_trackers/clover/api/kernel.cpp b/src/gallium/state_trackers/clover/api/kernel.cpp new file mode 100644 index 00000000000..44eeb277127 --- /dev/null +++ b/src/gallium/state_trackers/clover/api/kernel.cpp @@ -0,0 +1,318 @@ +// +// Copyright 2012 Francisco Jerez +// +// Permission is hereby granted, free of charge, to any person obtaining a +// copy of this software and associated documentation files (the "Software"), +// to deal in the Software without restriction, including without limitation +// the rights to use, copy, modify, merge, publish, distribute, sublicense, +// and/or sell copies of the Software, and to permit persons to whom the +// Software is furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +// THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF +// OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. +// + +#include "api/util.hpp" +#include "core/kernel.hpp" +#include "core/event.hpp" + +using namespace clover; + +PUBLIC cl_kernel +clCreateKernel(cl_program prog, const char *name, + cl_int *errcode_ret) try { + if (!prog) + throw error(CL_INVALID_PROGRAM); + + if (!name) + throw error(CL_INVALID_VALUE); + + if (prog->binaries().empty()) + throw error(CL_INVALID_PROGRAM_EXECUTABLE); + + auto sym = prog->binaries().begin()->second.sym(name); + + ret_error(errcode_ret, CL_SUCCESS); + return new kernel(*prog, name, { sym.args.begin(), sym.args.end() }); + +} catch (module::noent_error &e) { + ret_error(errcode_ret, CL_INVALID_KERNEL_NAME); + return NULL; + +} catch(error &e) { + ret_error(errcode_ret, e); + return NULL; +} + +PUBLIC cl_int +clCreateKernelsInProgram(cl_program prog, cl_uint count, + cl_kernel *kerns, cl_uint *count_ret) { + if (!prog) + throw error(CL_INVALID_PROGRAM); + + if (prog->binaries().empty()) + throw error(CL_INVALID_PROGRAM_EXECUTABLE); + + auto &syms = prog->binaries().begin()->second.syms; + + if (kerns && count < syms.size()) + throw error(CL_INVALID_VALUE); + + if (kerns) + std::transform(syms.begin(), syms.end(), kerns, + [=](const module::symbol &sym) { + return new kernel(*prog, compat::string(sym.name), + { sym.args.begin(), sym.args.end() }); + }); + + if (count_ret) + *count_ret = syms.size(); + + return CL_SUCCESS; +} + +PUBLIC cl_int +clRetainKernel(cl_kernel kern) { + if (!kern) + return CL_INVALID_KERNEL; + + kern->retain(); + return CL_SUCCESS; +} + +PUBLIC cl_int +clReleaseKernel(cl_kernel kern) { + if (!kern) + return CL_INVALID_KERNEL; + + if (kern->release()) + delete kern; + + return CL_SUCCESS; +} + +PUBLIC cl_int +clSetKernelArg(cl_kernel kern, cl_uint idx, size_t size, + const void *value) try { + if (!kern) + throw error(CL_INVALID_KERNEL); + + if (idx >= kern->args.size()) + throw error(CL_INVALID_ARG_INDEX); + + kern->args[idx]->set(size, value); + + return CL_SUCCESS; + +} catch(error &e) { + return e.get(); +} + +PUBLIC cl_int +clGetKernelInfo(cl_kernel kern, cl_kernel_info param, + size_t size, void *buf, size_t *size_ret) { + if (!kern) + return CL_INVALID_KERNEL; + + switch (param) { + case CL_KERNEL_FUNCTION_NAME: + return string_property(buf, size, size_ret, kern->name()); + + case CL_KERNEL_NUM_ARGS: + return scalar_property<cl_uint>(buf, size, size_ret, + kern->args.size()); + + case CL_KERNEL_REFERENCE_COUNT: + return scalar_property<cl_uint>(buf, size, size_ret, + kern->ref_count()); + + case CL_KERNEL_CONTEXT: + return scalar_property<cl_context>(buf, size, size_ret, + &kern->prog.ctx); + + case CL_KERNEL_PROGRAM: + return scalar_property<cl_program>(buf, size, size_ret, + &kern->prog); + + default: + return CL_INVALID_VALUE; + } +} + +PUBLIC cl_int +clGetKernelWorkGroupInfo(cl_kernel kern, cl_device_id dev, + cl_kernel_work_group_info param, + size_t size, void *buf, size_t *size_ret) { + if (!kern) + return CL_INVALID_KERNEL; + + if ((!dev && kern->prog.binaries().size() != 1) || + (dev && !kern->prog.binaries().count(dev))) + return CL_INVALID_DEVICE; + + switch (param) { + case CL_KERNEL_WORK_GROUP_SIZE: + return scalar_property<size_t>(buf, size, size_ret, + kern->max_block_size()); + + case CL_KERNEL_COMPILE_WORK_GROUP_SIZE: + return vector_property<size_t>(buf, size, size_ret, + kern->block_size()); + + case CL_KERNEL_LOCAL_MEM_SIZE: + return scalar_property<cl_ulong>(buf, size, size_ret, + kern->mem_local()); + + case CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE: + return scalar_property<size_t>(buf, size, size_ret, 1); + + case CL_KERNEL_PRIVATE_MEM_SIZE: + return scalar_property<cl_ulong>(buf, size, size_ret, + kern->mem_private()); + + default: + return CL_INVALID_VALUE; + } +} + +namespace { + /// + /// Common argument checking shared by kernel invocation commands. + /// + void + kernel_validate(cl_command_queue q, cl_kernel kern, + cl_uint dims, const size_t *grid_offset, + const size_t *grid_size, const size_t *block_size, + cl_uint num_deps, const cl_event *deps, + cl_event *ev) { + if (!q) + throw error(CL_INVALID_COMMAND_QUEUE); + + if (!kern) + throw error(CL_INVALID_KERNEL); + + if (&kern->prog.ctx != &q->ctx || + any_of([&](const cl_event ev) { + return &ev->ctx != &q->ctx; + }, deps, deps + num_deps)) + throw error(CL_INVALID_CONTEXT); + + if (bool(num_deps) != bool(deps) || + any_of(is_zero<cl_event>(), deps, deps + num_deps)) + throw error(CL_INVALID_EVENT_WAIT_LIST); + + if (any_of([](std::unique_ptr<kernel::argument> &arg) { + return !arg->set(); + }, kern->args.begin(), kern->args.end())) + throw error(CL_INVALID_KERNEL_ARGS); + + if (!kern->prog.binaries().count(&q->dev)) + throw error(CL_INVALID_PROGRAM_EXECUTABLE); + + if (dims < 1 || dims > q->dev.max_block_size().size()) + throw error(CL_INVALID_WORK_DIMENSION); + + if (!grid_size || any_of(is_zero<size_t>(), grid_size, grid_size + dims)) + throw error(CL_INVALID_GLOBAL_WORK_SIZE); + + if (block_size && any_of([](size_t b, size_t max) { + return b == 0 || b > max; + }, block_size, block_size + dims, + q->dev.max_block_size().begin())) + throw error(CL_INVALID_WORK_ITEM_SIZE); + + if (block_size && any_of([](size_t b, size_t g) { + return g % b; + }, block_size, block_size + dims, grid_size)) + throw error(CL_INVALID_WORK_GROUP_SIZE); + } + + /// + /// Common event action shared by kernel invocation commands. + /// + std::function<void (event &)> + kernel_op(cl_command_queue q, cl_kernel kern, + const std::vector<size_t> &grid_offset, + const std::vector<size_t> &grid_size, + const std::vector<size_t> &block_size) { + const std::vector<size_t> reduced_grid_size = map( + std::divides<size_t>(), grid_size.begin(), grid_size.end(), + block_size.begin()); + + return [=](event &) { + kern->launch(*q, grid_offset, reduced_grid_size, block_size); + }; + } + + template<typename T, typename S> + std::vector<T> + opt_vector(const T *p, S n) { + if (p) + return { p, p + n }; + else + return { n }; + } +} + +PUBLIC cl_int +clEnqueueNDRangeKernel(cl_command_queue q, cl_kernel kern, + cl_uint dims, const size_t *pgrid_offset, + const size_t *pgrid_size, const size_t *pblock_size, + cl_uint num_deps, const cl_event *deps, + cl_event *ev) try { + const std::vector<size_t> grid_offset = opt_vector(pgrid_offset, dims); + const std::vector<size_t> grid_size = opt_vector(pgrid_size, dims); + const std::vector<size_t> block_size = opt_vector(pblock_size, dims); + + kernel_validate(q, kern, dims, pgrid_offset, pgrid_size, pblock_size, + num_deps, deps, ev); + + hard_event *hev = new hard_event( + *q, CL_COMMAND_NDRANGE_KERNEL, { deps, deps + num_deps }, + kernel_op(q, kern, grid_offset, grid_size, block_size)); + + ret_object(ev, hev); + return CL_SUCCESS; + +} catch(error &e) { + return e.get(); +} + +PUBLIC cl_int +clEnqueueTask(cl_command_queue q, cl_kernel kern, + cl_uint num_deps, const cl_event *deps, + cl_event *ev) try { + const std::vector<size_t> grid_offset = { 0 }; + const std::vector<size_t> grid_size = { 1 }; + const std::vector<size_t> block_size = { 1 }; + + kernel_validate(q, kern, 1, grid_offset.data(), grid_size.data(), + block_size.data(), num_deps, deps, ev); + + hard_event *hev = new hard_event( + *q, CL_COMMAND_TASK, { deps, deps + num_deps }, + kernel_op(q, kern, grid_offset, grid_size, block_size)); + + ret_object(ev, hev); + return CL_SUCCESS; + +} catch(error &e) { + return e.get(); +} + +PUBLIC cl_int +clEnqueueNativeKernel(cl_command_queue q, void (*func)(void *), + void *args, size_t args_size, + cl_uint obj_count, const cl_mem *obj_list, + const void **obj_args, cl_uint num_deps, + const cl_event *deps, cl_event *ev) { + return CL_INVALID_OPERATION; +} diff --git a/src/gallium/state_trackers/clover/api/memory.cpp b/src/gallium/state_trackers/clover/api/memory.cpp new file mode 100644 index 00000000000..1b1ae73796f --- /dev/null +++ b/src/gallium/state_trackers/clover/api/memory.cpp @@ -0,0 +1,305 @@ +// +// Copyright 2012 Francisco Jerez +// +// Permission is hereby granted, free of charge, to any person obtaining a +// copy of this software and associated documentation files (the "Software"), +// to deal in the Software without restriction, including without limitation +// the rights to use, copy, modify, merge, publish, distribute, sublicense, +// and/or sell copies of the Software, and to permit persons to whom the +// Software is furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +// THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF +// OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. +// + +#include "api/util.hpp" +#include "core/memory.hpp" +#include "core/format.hpp" + +using namespace clover; + +PUBLIC cl_mem +clCreateBuffer(cl_context ctx, cl_mem_flags flags, size_t size, + void *host_ptr, cl_int *errcode_ret) try { + if (!ctx) + throw error(CL_INVALID_CONTEXT); + + if (bool(host_ptr) != bool(flags & (CL_MEM_USE_HOST_PTR | + CL_MEM_COPY_HOST_PTR))) + throw error(CL_INVALID_HOST_PTR); + + if (!size) + throw error(CL_INVALID_BUFFER_SIZE); + + if (flags & ~(CL_MEM_WRITE_ONLY | CL_MEM_READ_ONLY | + CL_MEM_USE_HOST_PTR | CL_MEM_ALLOC_HOST_PTR | + CL_MEM_COPY_HOST_PTR)) + throw error(CL_INVALID_VALUE); + + ret_error(errcode_ret, CL_SUCCESS); + return new root_buffer(*ctx, flags, size, host_ptr); + +} catch (error &e) { + ret_error(errcode_ret, e); + return NULL; +} + +PUBLIC cl_mem +clCreateSubBuffer(cl_mem obj, cl_mem_flags flags, cl_buffer_create_type op, + const void *op_info, cl_int *errcode_ret) try { + root_buffer *parent = dynamic_cast<root_buffer *>(obj); + + if (!parent) + throw error(CL_INVALID_MEM_OBJECT); + + if ((flags & (CL_MEM_USE_HOST_PTR | + CL_MEM_ALLOC_HOST_PTR | + CL_MEM_COPY_HOST_PTR)) || + (~flags & parent->flags() & (CL_MEM_READ_ONLY | + CL_MEM_WRITE_ONLY))) + throw error(CL_INVALID_VALUE); + + if (op == CL_BUFFER_CREATE_TYPE_REGION) { + const cl_buffer_region *reg = (const cl_buffer_region *)op_info; + + if (!reg || + reg->origin > parent->size() || + reg->origin + reg->size > parent->size()) + throw error(CL_INVALID_VALUE); + + if (!reg->size) + throw error(CL_INVALID_BUFFER_SIZE); + + ret_error(errcode_ret, CL_SUCCESS); + return new sub_buffer(*parent, flags, reg->origin, reg->size); + + } else { + throw error(CL_INVALID_VALUE); + } + +} catch (error &e) { + ret_error(errcode_ret, e); + return NULL; +} + +PUBLIC cl_mem +clCreateImage2D(cl_context ctx, cl_mem_flags flags, + const cl_image_format *format, + size_t width, size_t height, size_t row_pitch, + void *host_ptr, cl_int *errcode_ret) try { + if (!ctx) + throw error(CL_INVALID_CONTEXT); + + if (flags & ~(CL_MEM_WRITE_ONLY | CL_MEM_READ_ONLY | + CL_MEM_USE_HOST_PTR | CL_MEM_ALLOC_HOST_PTR | + CL_MEM_COPY_HOST_PTR)) + throw error(CL_INVALID_VALUE); + + if (!format) + throw error(CL_INVALID_IMAGE_FORMAT_DESCRIPTOR); + + if (width < 1 || height < 1) + throw error(CL_INVALID_IMAGE_SIZE); + + if (bool(host_ptr) != bool(flags & (CL_MEM_USE_HOST_PTR | + CL_MEM_COPY_HOST_PTR))) + throw error(CL_INVALID_HOST_PTR); + + if (!supported_formats(ctx, CL_MEM_OBJECT_IMAGE2D).count(*format)) + throw error(CL_IMAGE_FORMAT_NOT_SUPPORTED); + + ret_error(errcode_ret, CL_SUCCESS); + return new image2d(*ctx, flags, format, width, height, + row_pitch, host_ptr); + +} catch (error &e) { + ret_error(errcode_ret, e); + return NULL; +} + +PUBLIC cl_mem +clCreateImage3D(cl_context ctx, cl_mem_flags flags, + const cl_image_format *format, + size_t width, size_t height, size_t depth, + size_t row_pitch, size_t slice_pitch, + void *host_ptr, cl_int *errcode_ret) try { + if (!ctx) + throw error(CL_INVALID_CONTEXT); + + if (flags & ~(CL_MEM_WRITE_ONLY | CL_MEM_READ_ONLY | + CL_MEM_USE_HOST_PTR | CL_MEM_ALLOC_HOST_PTR | + CL_MEM_COPY_HOST_PTR)) + throw error(CL_INVALID_VALUE); + + if (!format) + throw error(CL_INVALID_IMAGE_FORMAT_DESCRIPTOR); + + if (width < 1 || height < 1 || depth < 2) + throw error(CL_INVALID_IMAGE_SIZE); + + if (bool(host_ptr) != bool(flags & (CL_MEM_USE_HOST_PTR | + CL_MEM_COPY_HOST_PTR))) + throw error(CL_INVALID_HOST_PTR); + + if (!supported_formats(ctx, CL_MEM_OBJECT_IMAGE3D).count(*format)) + throw error(CL_IMAGE_FORMAT_NOT_SUPPORTED); + + ret_error(errcode_ret, CL_SUCCESS); + return new image3d(*ctx, flags, format, width, height, depth, + row_pitch, slice_pitch, host_ptr); + +} catch (error &e) { + ret_error(errcode_ret, e); + return NULL; +} + +PUBLIC cl_int +clGetSupportedImageFormats(cl_context ctx, cl_mem_flags flags, + cl_mem_object_type type, cl_uint count, + cl_image_format *buf, cl_uint *count_ret) try { + if (!ctx) + throw error(CL_INVALID_CONTEXT); + + if (flags & ~(CL_MEM_WRITE_ONLY | CL_MEM_READ_ONLY | + CL_MEM_USE_HOST_PTR | CL_MEM_ALLOC_HOST_PTR | + CL_MEM_COPY_HOST_PTR)) + throw error(CL_INVALID_VALUE); + + if (!count && buf) + throw error(CL_INVALID_VALUE); + + auto formats = supported_formats(ctx, type); + + if (buf) + std::copy_n(formats.begin(), std::min((cl_uint)formats.size(), count), + buf); + if (count_ret) + *count_ret = formats.size(); + + return CL_SUCCESS; + +} catch (error &e) { + return e.get(); +} + +PUBLIC cl_int +clGetMemObjectInfo(cl_mem obj, cl_mem_info param, + size_t size, void *buf, size_t *size_ret) { + if (!obj) + return CL_INVALID_MEM_OBJECT; + + switch (param) { + case CL_MEM_TYPE: + return scalar_property<cl_mem_object_type>(buf, size, size_ret, + obj->type()); + + case CL_MEM_FLAGS: + return scalar_property<cl_mem_flags>(buf, size, size_ret, obj->flags()); + + case CL_MEM_SIZE: + return scalar_property<size_t>(buf, size, size_ret, obj->size()); + + case CL_MEM_HOST_PTR: + return scalar_property<void *>(buf, size, size_ret, obj->host_ptr()); + + case CL_MEM_MAP_COUNT: + return scalar_property<cl_uint>(buf, size, size_ret, 0); + + case CL_MEM_REFERENCE_COUNT: + return scalar_property<cl_uint>(buf, size, size_ret, obj->ref_count()); + + case CL_MEM_CONTEXT: + return scalar_property<cl_context>(buf, size, size_ret, &obj->ctx); + + case CL_MEM_ASSOCIATED_MEMOBJECT: { + sub_buffer *sub = dynamic_cast<sub_buffer *>(obj); + return scalar_property<cl_mem>(buf, size, size_ret, + (sub ? &sub->parent : NULL)); + } + case CL_MEM_OFFSET: { + sub_buffer *sub = dynamic_cast<sub_buffer *>(obj); + return scalar_property<size_t>(buf, size, size_ret, + (sub ? sub->offset() : 0)); + } + default: + return CL_INVALID_VALUE; + } +} + +PUBLIC cl_int +clGetImageInfo(cl_mem obj, cl_image_info param, + size_t size, void *buf, size_t *size_ret) { + image *img = dynamic_cast<image *>(obj); + if (!img) + return CL_INVALID_MEM_OBJECT; + + switch (param) { + case CL_IMAGE_FORMAT: + return scalar_property<cl_image_format>(buf, size, size_ret, + img->format()); + + case CL_IMAGE_ELEMENT_SIZE: + return scalar_property<size_t>(buf, size, size_ret, 0); + + case CL_IMAGE_ROW_PITCH: + return scalar_property<size_t>(buf, size, size_ret, img->row_pitch()); + + case CL_IMAGE_SLICE_PITCH: + return scalar_property<size_t>(buf, size, size_ret, img->slice_pitch()); + + case CL_IMAGE_WIDTH: + return scalar_property<size_t>(buf, size, size_ret, img->width()); + + case CL_IMAGE_HEIGHT: + return scalar_property<size_t>(buf, size, size_ret, img->height()); + + case CL_IMAGE_DEPTH: + return scalar_property<size_t>(buf, size, size_ret, img->depth()); + + default: + return CL_INVALID_VALUE; + } +} + +PUBLIC cl_int +clRetainMemObject(cl_mem obj) { + if (!obj) + return CL_INVALID_MEM_OBJECT; + + obj->retain(); + return CL_SUCCESS; +} + +PUBLIC cl_int +clReleaseMemObject(cl_mem obj) { + if (!obj) + return CL_INVALID_MEM_OBJECT; + + if (obj->release()) + delete obj; + + return CL_SUCCESS; +} + +PUBLIC cl_int +clSetMemObjectDestructorCallback(cl_mem obj, + void (CL_CALLBACK *pfn_notify)(cl_mem, void *), + void *user_data) { + if (!obj) + return CL_INVALID_MEM_OBJECT; + + if (!pfn_notify) + return CL_INVALID_VALUE; + + obj->destroy_notify([=]{ pfn_notify(obj, user_data); }); + + return CL_SUCCESS; +} diff --git a/src/gallium/state_trackers/clover/api/platform.cpp b/src/gallium/state_trackers/clover/api/platform.cpp new file mode 100644 index 00000000000..e5e80b85256 --- /dev/null +++ b/src/gallium/state_trackers/clover/api/platform.cpp @@ -0,0 +1,68 @@ +// +// Copyright 2012 Francisco Jerez +// +// Permission is hereby granted, free of charge, to any person obtaining a +// copy of this software and associated documentation files (the "Software"), +// to deal in the Software without restriction, including without limitation +// the rights to use, copy, modify, merge, publish, distribute, sublicense, +// and/or sell copies of the Software, and to permit persons to whom the +// Software is furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +// THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF +// OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. +// + +#include "api/util.hpp" + +using namespace clover; + +PUBLIC cl_int +clGetPlatformIDs(cl_uint num_entries, cl_platform_id *platforms, + cl_uint *num_platforms) { + if ((!num_entries && platforms) || + (!num_platforms && !platforms)) + return CL_INVALID_VALUE; + + if (num_platforms) + *num_platforms = 1; + if (platforms) + *platforms = NULL; + + return CL_SUCCESS; +} + +PUBLIC cl_int +clGetPlatformInfo(cl_platform_id platform, cl_platform_info param_name, + size_t size, void *buf, size_t *size_ret) { + if (platform != NULL) + return CL_INVALID_PLATFORM; + + switch (param_name) { + case CL_PLATFORM_PROFILE: + return string_property(buf, size, size_ret, "FULL_PROFILE"); + + case CL_PLATFORM_VERSION: + return string_property(buf, size, size_ret, + "OpenCL 1.1 MESA " MESA_VERSION); + + case CL_PLATFORM_NAME: + return string_property(buf, size, size_ret, "Default"); + + case CL_PLATFORM_VENDOR: + return string_property(buf, size, size_ret, "Mesa"); + + case CL_PLATFORM_EXTENSIONS: + return string_property(buf, size, size_ret, ""); + + default: + return CL_INVALID_VALUE; + } +} diff --git a/src/gallium/state_trackers/clover/api/program.cpp b/src/gallium/state_trackers/clover/api/program.cpp new file mode 100644 index 00000000000..e874c51ad7d --- /dev/null +++ b/src/gallium/state_trackers/clover/api/program.cpp @@ -0,0 +1,241 @@ +// +// Copyright 2012 Francisco Jerez +// +// Permission is hereby granted, free of charge, to any person obtaining a +// copy of this software and associated documentation files (the "Software"), +// to deal in the Software without restriction, including without limitation +// the rights to use, copy, modify, merge, publish, distribute, sublicense, +// and/or sell copies of the Software, and to permit persons to whom the +// Software is furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +// THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF +// OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. +// + +#include "api/util.hpp" +#include "core/program.hpp" + +using namespace clover; + +PUBLIC cl_program +clCreateProgramWithSource(cl_context ctx, cl_uint count, + const char **strings, const size_t *lengths, + cl_int *errcode_ret) try { + std::string source; + + if (!ctx) + throw error(CL_INVALID_CONTEXT); + + if (!count || !strings || + any_of(is_zero<const char *>(), strings, strings + count)) + throw error(CL_INVALID_VALUE); + + // Concatenate all the provided fragments together + for (unsigned i = 0; i < count; ++i) + source += (lengths && lengths[i] ? + std::string(strings[i], strings[i] + lengths[i]) : + std::string(strings[i])); + + // ...and create a program object for them. + ret_error(errcode_ret, CL_SUCCESS); + return new program(*ctx, source); + +} catch (error &e) { + ret_error(errcode_ret, e); + return NULL; +} + +PUBLIC cl_program +clCreateProgramWithBinary(cl_context ctx, cl_uint count, + const cl_device_id *devs, const size_t *lengths, + const unsigned char **binaries, cl_int *status_ret, + cl_int *errcode_ret) try { + if (!ctx) + throw error(CL_INVALID_CONTEXT); + + if (!count || !devs || !lengths || !binaries) + throw error(CL_INVALID_VALUE); + + if (any_of([&](const cl_device_id dev) { + return !ctx->has_device(dev); + }, devs, devs + count)) + throw error(CL_INVALID_DEVICE); + + // Deserialize the provided binaries, + auto modules = map( + [](const unsigned char *p, size_t l) -> std::pair<cl_int, module> { + if (!p || !l) + return { CL_INVALID_VALUE, {} }; + + try { + compat::istream::buffer_t bin(p, l); + compat::istream s(bin); + + return { CL_SUCCESS, module::deserialize(s) }; + + } catch (compat::istream::error &e) { + return { CL_INVALID_BINARY, {} }; + } + }, + binaries, binaries + count, lengths); + + // update the status array, + if (status_ret) + std::transform(modules.begin(), modules.end(), status_ret, + keys<cl_int, module>); + + if (any_of(key_equals<cl_int, module>(CL_INVALID_VALUE), + modules.begin(), modules.end())) + throw error(CL_INVALID_VALUE); + + if (any_of(key_equals<cl_int, module>(CL_INVALID_BINARY), + modules.begin(), modules.end())) + throw error(CL_INVALID_BINARY); + + // initialize a program object with them. + ret_error(errcode_ret, CL_SUCCESS); + return new program(*ctx, { devs, devs + count }, + map(values<cl_int, module>, + modules.begin(), modules.end())); + +} catch (error &e) { + ret_error(errcode_ret, e); + return NULL; +} + +PUBLIC cl_int +clRetainProgram(cl_program prog) { + if (!prog) + return CL_INVALID_PROGRAM; + + prog->retain(); + return CL_SUCCESS; +} + +PUBLIC cl_int +clReleaseProgram(cl_program prog) { + if (!prog) + return CL_INVALID_PROGRAM; + + if (prog->release()) + delete prog; + + return CL_SUCCESS; +} + +PUBLIC cl_int +clBuildProgram(cl_program prog, cl_uint count, const cl_device_id *devs, + const char *opts, void (*pfn_notify)(cl_program, void *), + void *user_data) try { + if (!prog) + throw error(CL_INVALID_PROGRAM); + + if (bool(count) != bool(devs) || + (!pfn_notify && user_data)) + throw error(CL_INVALID_VALUE); + + if (any_of([&](const cl_device_id dev) { + return !prog->ctx.has_device(dev); + }, devs, devs + count)) + throw error(CL_INVALID_DEVICE); + + prog->build({ devs, devs + count }); + return CL_SUCCESS; + +} catch (error &e) { + return e.get(); +} + +PUBLIC cl_int +clUnloadCompiler() { + return CL_SUCCESS; +} + +PUBLIC cl_int +clGetProgramInfo(cl_program prog, cl_program_info param, + size_t size, void *buf, size_t *size_ret) { + if (!prog) + return CL_INVALID_PROGRAM; + + switch (param) { + case CL_PROGRAM_REFERENCE_COUNT: + return scalar_property<cl_uint>(buf, size, size_ret, + prog->ref_count()); + + case CL_PROGRAM_CONTEXT: + return scalar_property<cl_context>(buf, size, size_ret, + &prog->ctx); + + case CL_PROGRAM_NUM_DEVICES: + return scalar_property<cl_uint>(buf, size, size_ret, + prog->binaries().size()); + + case CL_PROGRAM_DEVICES: + return vector_property<cl_device_id>( + buf, size, size_ret, + map(keys<device *, module>, + prog->binaries().begin(), prog->binaries().end())); + + case CL_PROGRAM_SOURCE: + return string_property(buf, size, size_ret, prog->source()); + + case CL_PROGRAM_BINARY_SIZES: + return vector_property<size_t>( + buf, size, size_ret, + map([](const std::pair<device *, module> &ent) { + compat::ostream::buffer_t bin; + compat::ostream s(bin); + ent.second.serialize(s); + return bin.size(); + }, + prog->binaries().begin(), prog->binaries().end())); + + case CL_PROGRAM_BINARIES: + return matrix_property<unsigned char>( + buf, size, size_ret, + map([](const std::pair<device *, module> &ent) { + compat::ostream::buffer_t bin; + compat::ostream s(bin); + ent.second.serialize(s); + return bin; + }, + prog->binaries().begin(), prog->binaries().end())); + + default: + return CL_INVALID_VALUE; + } +} + +PUBLIC cl_int +clGetProgramBuildInfo(cl_program prog, cl_device_id dev, + cl_program_build_info param, + size_t size, void *buf, size_t *size_ret) { + if (!prog) + return CL_INVALID_PROGRAM; + + if (!prog->ctx.has_device(dev)) + return CL_INVALID_DEVICE; + + switch (param) { + case CL_PROGRAM_BUILD_STATUS: + return scalar_property<cl_build_status>(buf, size, size_ret, + prog->build_status(dev)); + + case CL_PROGRAM_BUILD_OPTIONS: + return string_property(buf, size, size_ret, prog->build_opts(dev)); + + case CL_PROGRAM_BUILD_LOG: + return string_property(buf, size, size_ret, prog->build_log(dev)); + + default: + return CL_INVALID_VALUE; + } +} diff --git a/src/gallium/state_trackers/clover/api/queue.cpp b/src/gallium/state_trackers/clover/api/queue.cpp new file mode 100644 index 00000000000..a7905bc4396 --- /dev/null +++ b/src/gallium/state_trackers/clover/api/queue.cpp @@ -0,0 +1,102 @@ +// +// Copyright 2012 Francisco Jerez +// +// Permission is hereby granted, free of charge, to any person obtaining a +// copy of this software and associated documentation files (the "Software"), +// to deal in the Software without restriction, including without limitation +// the rights to use, copy, modify, merge, publish, distribute, sublicense, +// and/or sell copies of the Software, and to permit persons to whom the +// Software is furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +// THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF +// OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. +// + +#include "api/util.hpp" +#include "core/queue.hpp" + +using namespace clover; + +PUBLIC cl_command_queue +clCreateCommandQueue(cl_context ctx, cl_device_id dev, + cl_command_queue_properties props, + cl_int *errcode_ret) try { + if (!ctx) + throw error(CL_INVALID_CONTEXT); + + if (!ctx->has_device(dev)) + throw error(CL_INVALID_DEVICE); + + if (props & ~(CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE | + CL_QUEUE_PROFILING_ENABLE)) + throw error(CL_INVALID_VALUE); + + ret_error(errcode_ret, CL_SUCCESS); + return new command_queue(*ctx, *dev, props); + +} catch (error &e) { + ret_error(errcode_ret, e); + return NULL; +} + +PUBLIC cl_int +clRetainCommandQueue(cl_command_queue q) { + if (!q) + return CL_INVALID_COMMAND_QUEUE; + + q->retain(); + return CL_SUCCESS; +} + +PUBLIC cl_int +clReleaseCommandQueue(cl_command_queue q) { + if (!q) + return CL_INVALID_COMMAND_QUEUE; + + if (q->release()) + delete q; + + return CL_SUCCESS; +} + +PUBLIC cl_int +clGetCommandQueueInfo(cl_command_queue q, cl_command_queue_info param, + size_t size, void *buf, size_t *size_ret) { + if (!q) + return CL_INVALID_COMMAND_QUEUE; + + switch (param) { + case CL_QUEUE_CONTEXT: + return scalar_property<cl_context>(buf, size, size_ret, &q->ctx); + + case CL_QUEUE_DEVICE: + return scalar_property<cl_device_id>(buf, size, size_ret, &q->dev); + + case CL_QUEUE_REFERENCE_COUNT: + return scalar_property<cl_uint>(buf, size, size_ret, q->ref_count()); + + case CL_QUEUE_PROPERTIES: + return scalar_property<cl_command_queue_properties>(buf, size, size_ret, + q->props()); + + default: + return CL_INVALID_VALUE; + } +} + +PUBLIC cl_int +clFlush(cl_command_queue q) { + if (!q) + return CL_INVALID_COMMAND_QUEUE; + + q->flush(); + return CL_SUCCESS; +} diff --git a/src/gallium/state_trackers/clover/api/sampler.cpp b/src/gallium/state_trackers/clover/api/sampler.cpp new file mode 100644 index 00000000000..32ce22ef90f --- /dev/null +++ b/src/gallium/state_trackers/clover/api/sampler.cpp @@ -0,0 +1,90 @@ +// +// Copyright 2012 Francisco Jerez +// +// Permission is hereby granted, free of charge, to any person obtaining a +// copy of this software and associated documentation files (the "Software"), +// to deal in the Software without restriction, including without limitation +// the rights to use, copy, modify, merge, publish, distribute, sublicense, +// and/or sell copies of the Software, and to permit persons to whom the +// Software is furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +// THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF +// OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. +// + +#include "api/util.hpp" +#include "core/sampler.hpp" + +using namespace clover; + +PUBLIC cl_sampler +clCreateSampler(cl_context ctx, cl_bool norm_mode, + cl_addressing_mode addr_mode, cl_filter_mode filter_mode, + cl_int *errcode_ret) try { + if (!ctx) + throw error(CL_INVALID_CONTEXT); + + ret_error(errcode_ret, CL_SUCCESS); + return new sampler(*ctx, norm_mode, addr_mode, filter_mode); + +} catch (error &e) { + ret_error(errcode_ret, e); + return NULL; +} + +PUBLIC cl_int +clRetainSampler(cl_sampler s) { + if (!s) + throw error(CL_INVALID_SAMPLER); + + s->retain(); + return CL_SUCCESS; +} + +PUBLIC cl_int +clReleaseSampler(cl_sampler s) { + if (!s) + throw error(CL_INVALID_SAMPLER); + + if (s->release()) + delete s; + + return CL_SUCCESS; +} + +PUBLIC cl_int +clGetSamplerInfo(cl_sampler s, cl_sampler_info param, + size_t size, void *buf, size_t *size_ret) { + if (!s) + throw error(CL_INVALID_SAMPLER); + + switch (param) { + case CL_SAMPLER_REFERENCE_COUNT: + return scalar_property<cl_uint>(buf, size, size_ret, s->ref_count()); + + case CL_SAMPLER_CONTEXT: + return scalar_property<cl_context>(buf, size, size_ret, &s->ctx); + + case CL_SAMPLER_NORMALIZED_COORDS: + return scalar_property<cl_bool>(buf, size, size_ret, s->norm_mode()); + + case CL_SAMPLER_ADDRESSING_MODE: + return scalar_property<cl_addressing_mode>(buf, size, size_ret, + s->addr_mode()); + + case CL_SAMPLER_FILTER_MODE: + return scalar_property<cl_filter_mode>(buf, size, size_ret, + s->filter_mode()); + + default: + return CL_INVALID_VALUE; + } +} diff --git a/src/gallium/state_trackers/clover/api/transfer.cpp b/src/gallium/state_trackers/clover/api/transfer.cpp new file mode 100644 index 00000000000..c67b75e8034 --- /dev/null +++ b/src/gallium/state_trackers/clover/api/transfer.cpp @@ -0,0 +1,506 @@ +// +// Copyright 2012 Francisco Jerez +// +// Permission is hereby granted, free of charge, to any person obtaining a +// copy of this software and associated documentation files (the "Software"), +// to deal in the Software without restriction, including without limitation +// the rights to use, copy, modify, merge, publish, distribute, sublicense, +// and/or sell copies of the Software, and to permit persons to whom the +// Software is furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +// THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF +// OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. +// + +#include <cstring> + +#include "api/util.hpp" +#include "core/event.hpp" +#include "core/resource.hpp" + +using namespace clover; + +namespace { + typedef resource::point point; + + /// + /// Common argument checking shared by memory transfer commands. + /// + void + validate_base(cl_command_queue q, cl_uint num_deps, const cl_event *deps) { + if (!q) + throw error(CL_INVALID_COMMAND_QUEUE); + + if (bool(num_deps) != bool(deps) || + any_of(is_zero<cl_event>(), deps, deps + num_deps)) + throw error(CL_INVALID_EVENT_WAIT_LIST); + + if (any_of([&](const cl_event ev) { + return &ev->ctx != &q->ctx; + }, deps, deps + num_deps)) + throw error(CL_INVALID_CONTEXT); + } + + /// + /// Memory object-specific argument checking shared by most memory + /// transfer commands. + /// + void + validate_obj(cl_command_queue q, cl_mem obj) { + if (!obj) + throw error(CL_INVALID_MEM_OBJECT); + + if (&obj->ctx != &q->ctx) + throw error(CL_INVALID_CONTEXT); + } + + /// + /// Class that encapsulates the task of mapping an object of type + /// \a T. The return value of get() should be implicitly + /// convertible to \a void *. + /// + template<typename T> struct __map; + + template<> struct __map<void *> { + static void * + get(cl_command_queue q, void *obj, cl_map_flags flags, + size_t offset, size_t size) { + return (char *)obj + offset; + } + }; + + template<> struct __map<const void *> { + static const void * + get(cl_command_queue q, const void *obj, cl_map_flags flags, + size_t offset, size_t size) { + return (const char *)obj + offset; + } + }; + + template<> struct __map<memory_obj *> { + static mapping + get(cl_command_queue q, memory_obj *obj, cl_map_flags flags, + size_t offset, size_t size) { + return { *q, obj->resource(q), flags, true, { offset }, { size }}; + } + }; + + /// + /// Software copy from \a src_obj to \a dst_obj. They can be + /// either pointers or memory objects. + /// + template<typename T, typename S> + std::function<void (event &)> + soft_copy_op(cl_command_queue q, + T dst_obj, const point &dst_orig, const point &dst_pitch, + S src_obj, const point &src_orig, const point &src_pitch, + const point ®ion) { + return [=](event &) { + auto dst = __map<T>::get(q, dst_obj, CL_MAP_WRITE, + dst_pitch(dst_orig), dst_pitch(region)); + auto src = __map<S>::get(q, src_obj, CL_MAP_READ, + src_pitch(src_orig), src_pitch(region)); + point p; + + for (p[2] = 0; p[2] < region[2]; ++p[2]) { + for (p[1] = 0; p[1] < region[1]; ++p[1]) { + std::memcpy(static_cast<char *>(dst) + dst_pitch(p), + static_cast<const char *>(src) + src_pitch(p), + src_pitch[0] * region[0]); + } + } + }; + } + + /// + /// Hardware copy from \a src_obj to \a dst_obj. + /// + template<typename T, typename S> + std::function<void (event &)> + hard_copy_op(cl_command_queue q, T dst_obj, const point &dst_orig, + S src_obj, const point &src_orig, const point ®ion) { + return [=](event &) { + dst_obj->resource(q).copy(*q, dst_orig, region, + src_obj->resource(q), src_orig); + }; + } +} + +PUBLIC cl_int +clEnqueueReadBuffer(cl_command_queue q, cl_mem obj, cl_bool blocking, + size_t offset, size_t size, void *ptr, + cl_uint num_deps, const cl_event *deps, + cl_event *ev) try { + validate_base(q, num_deps, deps); + validate_obj(q, obj); + + if (!ptr || offset > obj->size() || offset + size > obj->size()) + throw error(CL_INVALID_VALUE); + + hard_event *hev = new hard_event( + *q, CL_COMMAND_READ_BUFFER, { deps, deps + num_deps }, + soft_copy_op(q, + ptr, { 0 }, { 1 }, + obj, { offset }, { 1 }, + { size, 1, 1 })); + + ret_object(ev, hev); + return CL_SUCCESS; + +} catch (error &e) { + return e.get(); +} + +PUBLIC cl_int +clEnqueueWriteBuffer(cl_command_queue q, cl_mem obj, cl_bool blocking, + size_t offset, size_t size, const void *ptr, + cl_uint num_deps, const cl_event *deps, + cl_event *ev) try { + validate_base(q, num_deps, deps); + validate_obj(q, obj); + + if (!ptr || offset > obj->size() || offset + size > obj->size()) + throw error(CL_INVALID_VALUE); + + hard_event *hev = new hard_event( + *q, CL_COMMAND_WRITE_BUFFER, { deps, deps + num_deps }, + soft_copy_op(q, + obj, { offset }, { 1 }, + ptr, { 0 }, { 1 }, + { size, 1, 1 })); + + ret_object(ev, hev); + return CL_SUCCESS; + +} catch (error &e) { + return e.get(); +} + +PUBLIC cl_int +clEnqueueReadBufferRect(cl_command_queue q, cl_mem obj, cl_bool blocking, + const size_t *obj_origin, const size_t *host_origin, + const size_t *region, + size_t obj_row_pitch, size_t obj_slice_pitch, + size_t host_row_pitch, size_t host_slice_pitch, + void *ptr, + cl_uint num_deps, const cl_event *deps, + cl_event *ev) try { + validate_base(q, num_deps, deps); + validate_obj(q, obj); + + if (!ptr) + throw error(CL_INVALID_VALUE); + + hard_event *hev = new hard_event( + *q, CL_COMMAND_READ_BUFFER_RECT, { deps, deps + num_deps }, + soft_copy_op(q, + ptr, host_origin, + { 1, host_row_pitch, host_slice_pitch }, + obj, obj_origin, + { 1, obj_row_pitch, obj_slice_pitch }, + region)); + + ret_object(ev, hev); + return CL_SUCCESS; + +} catch (error &e) { + return e.get(); +} + +PUBLIC cl_int +clEnqueueWriteBufferRect(cl_command_queue q, cl_mem obj, cl_bool blocking, + const size_t *obj_origin, const size_t *host_origin, + const size_t *region, + size_t obj_row_pitch, size_t obj_slice_pitch, + size_t host_row_pitch, size_t host_slice_pitch, + const void *ptr, + cl_uint num_deps, const cl_event *deps, + cl_event *ev) try { + validate_base(q, num_deps, deps); + validate_obj(q, obj); + + if (!ptr) + throw error(CL_INVALID_VALUE); + + hard_event *hev = new hard_event( + *q, CL_COMMAND_WRITE_BUFFER_RECT, { deps, deps + num_deps }, + soft_copy_op(q, + obj, obj_origin, + { 1, obj_row_pitch, obj_slice_pitch }, + ptr, host_origin, + { 1, host_row_pitch, host_slice_pitch }, + region)); + + ret_object(ev, hev); + return CL_SUCCESS; + +} catch (error &e) { + return e.get(); +} + +PUBLIC cl_int +clEnqueueCopyBuffer(cl_command_queue q, cl_mem src_obj, cl_mem dst_obj, + size_t src_offset, size_t dst_offset, size_t size, + cl_uint num_deps, const cl_event *deps, + cl_event *ev) try { + validate_base(q, num_deps, deps); + validate_obj(q, src_obj); + validate_obj(q, dst_obj); + + hard_event *hev = new hard_event( + *q, CL_COMMAND_COPY_BUFFER, { deps, deps + num_deps }, + hard_copy_op(q, dst_obj, { dst_offset }, + src_obj, { src_offset }, + { size, 1, 1 })); + + ret_object(ev, hev); + return CL_SUCCESS; + +} catch (error &e) { + return e.get(); +} + +PUBLIC cl_int +clEnqueueCopyBufferRect(cl_command_queue q, cl_mem src_obj, cl_mem dst_obj, + const size_t *src_origin, const size_t *dst_origin, + const size_t *region, + size_t src_row_pitch, size_t src_slice_pitch, + size_t dst_row_pitch, size_t dst_slice_pitch, + cl_uint num_deps, const cl_event *deps, + cl_event *ev) try { + validate_base(q, num_deps, deps); + validate_obj(q, src_obj); + validate_obj(q, dst_obj); + + hard_event *hev = new hard_event( + *q, CL_COMMAND_COPY_BUFFER_RECT, { deps, deps + num_deps }, + soft_copy_op(q, + dst_obj, dst_origin, + { 1, dst_row_pitch, dst_slice_pitch }, + src_obj, src_origin, + { 1, src_row_pitch, src_slice_pitch }, + region)); + + ret_object(ev, hev); + return CL_SUCCESS; + +} catch (error &e) { + return e.get(); +} + +PUBLIC cl_int +clEnqueueReadImage(cl_command_queue q, cl_mem obj, cl_bool blocking, + const size_t *origin, const size_t *region, + size_t row_pitch, size_t slice_pitch, void *ptr, + cl_uint num_deps, const cl_event *deps, + cl_event *ev) try { + image *img = dynamic_cast<image *>(obj); + + validate_base(q, num_deps, deps); + validate_obj(q, img); + + if (!ptr) + throw error(CL_INVALID_VALUE); + + hard_event *hev = new hard_event( + *q, CL_COMMAND_READ_IMAGE, { deps, deps + num_deps }, + soft_copy_op(q, + ptr, {}, + { 1, row_pitch, slice_pitch }, + obj, origin, + { 1, img->row_pitch(), img->slice_pitch() }, + region)); + + ret_object(ev, hev); + return CL_SUCCESS; + +} catch (error &e) { + return e.get(); +} + +PUBLIC cl_int +clEnqueueWriteImage(cl_command_queue q, cl_mem obj, cl_bool blocking, + const size_t *origin, const size_t *region, + size_t row_pitch, size_t slice_pitch, const void *ptr, + cl_uint num_deps, const cl_event *deps, + cl_event *ev) try { + image *img = dynamic_cast<image *>(obj); + + validate_base(q, num_deps, deps); + validate_obj(q, img); + + if (!ptr) + throw error(CL_INVALID_VALUE); + + hard_event *hev = new hard_event( + *q, CL_COMMAND_WRITE_IMAGE, { deps, deps + num_deps }, + soft_copy_op(q, + obj, origin, + { 1, img->row_pitch(), img->slice_pitch() }, + ptr, {}, + { 1, row_pitch, slice_pitch }, + region)); + + ret_object(ev, hev); + return CL_SUCCESS; + +} catch (error &e) { + return e.get(); +} + +PUBLIC cl_int +clEnqueueCopyImage(cl_command_queue q, cl_mem src_obj, cl_mem dst_obj, + const size_t *src_origin, const size_t *dst_origin, + const size_t *region, + cl_uint num_deps, const cl_event *deps, + cl_event *ev) try { + image *src_img = dynamic_cast<image *>(src_obj); + image *dst_img = dynamic_cast<image *>(dst_obj); + + validate_base(q, num_deps, deps); + validate_obj(q, src_img); + validate_obj(q, dst_img); + + hard_event *hev = new hard_event( + *q, CL_COMMAND_COPY_IMAGE, { deps, deps + num_deps }, + hard_copy_op(q, dst_obj, dst_origin, src_obj, src_origin, region)); + + ret_object(ev, hev); + return CL_SUCCESS; + +} catch (error &e) { + return e.get(); +} + +PUBLIC cl_int +clEnqueueCopyImageToBuffer(cl_command_queue q, cl_mem src_obj, cl_mem dst_obj, + const size_t *src_origin, const size_t *region, + size_t dst_offset, + cl_uint num_deps, const cl_event *deps, + cl_event *ev) try { + image *src_img = dynamic_cast<image *>(src_obj); + + validate_base(q, num_deps, deps); + validate_obj(q, src_img); + validate_obj(q, dst_obj); + + hard_event *hev = new hard_event( + *q, CL_COMMAND_COPY_IMAGE_TO_BUFFER, { deps, deps + num_deps }, + soft_copy_op(q, + dst_obj, { dst_offset }, + { 0, 0, 0 }, + src_obj, src_origin, + { 1, src_img->row_pitch(), src_img->slice_pitch() }, + region)); + + ret_object(ev, hev); + return CL_SUCCESS; + +} catch (error &e) { + return e.get(); +} + +PUBLIC cl_int +clEnqueueCopyBufferToImage(cl_command_queue q, cl_mem src_obj, cl_mem dst_obj, + size_t src_offset, + const size_t *dst_origin, const size_t *region, + cl_uint num_deps, const cl_event *deps, + cl_event *ev) try { + image *dst_img = dynamic_cast<image *>(src_obj); + + validate_base(q, num_deps, deps); + validate_obj(q, src_obj); + validate_obj(q, dst_img); + + hard_event *hev = new hard_event( + *q, CL_COMMAND_COPY_BUFFER_TO_IMAGE, { deps, deps + num_deps }, + soft_copy_op(q, + dst_obj, dst_origin, + { 1, dst_img->row_pitch(), dst_img->slice_pitch() }, + src_obj, { src_offset }, + { 0, 0, 0 }, + region)); + + ret_object(ev, hev); + return CL_SUCCESS; + +} catch (error &e) { + return e.get(); +} + +PUBLIC void * +clEnqueueMapBuffer(cl_command_queue q, cl_mem obj, cl_bool blocking, + cl_map_flags flags, size_t offset, size_t size, + cl_uint num_deps, const cl_event *deps, + cl_event *ev, cl_int *errcode_ret) try { + validate_base(q, num_deps, deps); + validate_obj(q, obj); + + if (offset > obj->size() || offset + size > obj->size()) + throw error(CL_INVALID_VALUE); + + void *map = obj->resource(q).add_map( + *q, flags, blocking, { offset }, { size }); + + ret_object(ev, new hard_event(*q, CL_COMMAND_MAP_BUFFER, + { deps, deps + num_deps })); + ret_error(errcode_ret, CL_SUCCESS); + return map; + +} catch (error &e) { + ret_error(errcode_ret, e); + return NULL; +} + +PUBLIC void * +clEnqueueMapImage(cl_command_queue q, cl_mem obj, cl_bool blocking, + cl_map_flags flags, + const size_t *origin, const size_t *region, + size_t *row_pitch, size_t *slice_pitch, + cl_uint num_deps, const cl_event *deps, + cl_event *ev, cl_int *errcode_ret) try { + image *img = dynamic_cast<image *>(obj); + + validate_base(q, num_deps, deps); + validate_obj(q, img); + + void *map = obj->resource(q).add_map( + *q, flags, blocking, origin, region); + + ret_object(ev, new hard_event(*q, CL_COMMAND_MAP_IMAGE, + { deps, deps + num_deps })); + ret_error(errcode_ret, CL_SUCCESS); + return map; + +} catch (error &e) { + ret_error(errcode_ret, e); + return NULL; +} + +PUBLIC cl_int +clEnqueueUnmapMemObject(cl_command_queue q, cl_mem obj, void *ptr, + cl_uint num_deps, const cl_event *deps, + cl_event *ev) try { + validate_base(q, num_deps, deps); + validate_obj(q, obj); + + hard_event *hev = new hard_event( + *q, CL_COMMAND_UNMAP_MEM_OBJECT, { deps, deps + num_deps }, + [=](event &) { + obj->resource(q).del_map(ptr); + }); + + ret_object(ev, hev); + return CL_SUCCESS; + +} catch (error &e) { + return e.get(); +} diff --git a/src/gallium/state_trackers/clover/api/util.hpp b/src/gallium/state_trackers/clover/api/util.hpp new file mode 100644 index 00000000000..2f9ec1f6a10 --- /dev/null +++ b/src/gallium/state_trackers/clover/api/util.hpp @@ -0,0 +1,166 @@ +// +// Copyright 2012 Francisco Jerez +// +// Permission is hereby granted, free of charge, to any person obtaining a +// copy of this software and associated documentation files (the "Software"), +// to deal in the Software without restriction, including without limitation +// the rights to use, copy, modify, merge, publish, distribute, sublicense, +// and/or sell copies of the Software, and to permit persons to whom the +// Software is furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +// THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF +// OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. +// + +#ifndef __CL_UTIL_HPP__ +#define __CL_UTIL_HPP__ + +#include <cstdint> +#include <cstring> +#include <algorithm> +#include <map> + +#include "core/base.hpp" +#include "pipe/p_compiler.h" + +namespace clover { + /// + /// Return a matrix (a container of containers) in \a buf with + /// argument and bounds checking. Intended to be used by + /// implementations of \a clGetXXXInfo(). + /// + template<typename T, typename V> + cl_int + matrix_property(void *buf, size_t size, size_t *size_ret, const V& v) { + if (buf && size < sizeof(T *) * v.size()) + return CL_INVALID_VALUE; + + if (size_ret) + *size_ret = sizeof(T *) * v.size(); + + if (buf) + for_each([](typename V::value_type src, T *dst) { + if (dst) + std::copy(src.begin(), src.end(), dst); + }, + v.begin(), v.end(), (T **)buf); + + return CL_SUCCESS; + } + + /// + /// Return a vector in \a buf with argument and bounds checking. + /// Intended to be used by implementations of \a clGetXXXInfo(). + /// + template<typename T, typename V> + cl_int + vector_property(void *buf, size_t size, size_t *size_ret, const V& v) { + if (buf && size < sizeof(T) * v.size()) + return CL_INVALID_VALUE; + + if (size_ret) + *size_ret = sizeof(T) * v.size(); + if (buf) + std::copy(v.begin(), v.end(), (T *)buf); + + return CL_SUCCESS; + } + + /// + /// Return a scalar in \a buf with argument and bounds checking. + /// Intended to be used by implementations of \a clGetXXXInfo(). + /// + template<typename T> + cl_int + scalar_property(void *buf, size_t size, size_t *size_ret, T v) { + return vector_property<T>(buf, size, size_ret, std::vector<T>(1, v)); + } + + /// + /// Return a string in \a buf with argument and bounds checking. + /// Intended to be used by implementations of \a clGetXXXInfo(). + /// + inline cl_int + string_property(void *buf, size_t size, size_t *size_ret, + const std::string &v) { + if (buf && size < v.size() + 1) + return CL_INVALID_VALUE; + + if (size_ret) + *size_ret = v.size() + 1; + if (buf) + std::strcpy((char *)buf, v.c_str()); + + return CL_SUCCESS; + } + + /// + /// Convert a NULL-terminated property list into an std::map. + /// + template<typename T> + std::map<T, T> + property_map(const T *props) { + std::map<T, T> m; + + while (props && *props) { + T key = *props++; + T value = *props++; + + if (m.count(key)) + throw clover::error(CL_INVALID_PROPERTY); + + m.insert({ key, value }); + } + + return m; + } + + /// + /// Convert an std::map into a NULL-terminated property list. + /// + template<typename T> + std::vector<T> + property_vector(const std::map<T, T> &m) { + std::vector<T> v; + + for (auto &p : m) { + v.push_back(p.first); + v.push_back(p.second); + } + + v.push_back(0); + return v; + } + + /// + /// Return an error code in \a p if non-zero. + /// + inline void + ret_error(cl_int *p, const clover::error &e) { + if (p) + *p = e.get(); + } + + /// + /// Return a reference-counted object in \a p if non-zero. + /// Otherwise release object ownership. + /// + template<typename T, typename S> + void + ret_object(T p, S v) { + if (p) + *p = v; + else + v->release(); + } +} + +#endif diff --git a/src/gallium/state_trackers/clover/core/base.hpp b/src/gallium/state_trackers/clover/core/base.hpp new file mode 100644 index 00000000000..19053f39235 --- /dev/null +++ b/src/gallium/state_trackers/clover/core/base.hpp @@ -0,0 +1,285 @@ +// +// Copyright 2012 Francisco Jerez +// +// Permission is hereby granted, free of charge, to any person obtaining a +// copy of this software and associated documentation files (the "Software"), +// to deal in the Software without restriction, including without limitation +// the rights to use, copy, modify, merge, publish, distribute, sublicense, +// and/or sell copies of the Software, and to permit persons to whom the +// Software is furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +// THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF +// OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. +// + +#ifndef __CORE_BASE_HPP__ +#define __CORE_BASE_HPP__ + +#include <stdexcept> +#include <atomic> +#include <cassert> +#include <tuple> +#include <vector> +#include <functional> + +#include "CL/cl.h" + +/// +/// Main namespace of the CL state tracker. +/// +namespace clover { + /// + /// Class that represents an error that can be converted to an + /// OpenCL status code. + /// + class error : public std::runtime_error { + public: + error(cl_int code, std::string what = "") : + std::runtime_error(what), code(code) { + } + + cl_int get() const { + return code; + } + + protected: + cl_int code; + }; + + /// + /// Base class for objects that support reference counting. + /// + class ref_counter { + public: + ref_counter() : __ref_count(1) {} + + unsigned ref_count() { + return __ref_count; + } + + void retain() { + __ref_count++; + } + + bool release() { + return (--__ref_count) == 0; + } + + private: + std::atomic<unsigned> __ref_count; + }; + + /// + /// Intrusive smart pointer for objects that implement the + /// clover::ref_counter interface. + /// + template<typename T> + class ref_ptr { + public: + ref_ptr(T *q = NULL) : p(NULL) { + reset(q); + } + + template<typename S> + ref_ptr(const ref_ptr<S> &ref) : p(NULL) { + reset(ref.p); + } + + ~ref_ptr() { + reset(NULL); + } + + void reset(T *q = NULL) { + if (q) + q->retain(); + if (p && p->release()) + delete p; + p = q; + } + + ref_ptr &operator=(const ref_ptr &ref) { + reset(ref.p); + return *this; + } + + T *operator*() const { + return p; + } + + T *operator->() const { + return p; + } + + operator bool() const { + return p; + } + + private: + T *p; + }; + + /// + /// Transfer the caller's ownership of a reference-counted object + /// to a clover::ref_ptr smart pointer. + /// + template<typename T> + inline ref_ptr<T> + transfer(T *p) { + ref_ptr<T> ref { p }; + p->release(); + return ref; + } + + template<typename T, typename S, int N> + struct __iter_helper { + template<typename F, typename Its, typename... Args> + static T + step(F op, S state, Its its, Args... args) { + return __iter_helper<T, S, N - 1>::step( + op, state, its, *(std::get<N>(its)++), args...); + } + }; + + template<typename T, typename S> + struct __iter_helper<T, S, 0> { + template<typename F, typename Its, typename... Args> + static T + step(F op, S state, Its its, Args... args) { + return op(state, *(std::get<0>(its)++), args...); + } + }; + + struct __empty {}; + + template<typename T> + struct __iter_helper<T, __empty, 0> { + template<typename F, typename Its, typename... Args> + static T + step(F op, __empty state, Its its, Args... args) { + return op(*(std::get<0>(its)++), args...); + } + }; + + template<typename F, typename... Its> + struct __result_helper { + typedef typename std::remove_const< + typename std::result_of< + F (typename std::iterator_traits<Its>::value_type...) + >::type + >::type type; + }; + + /// + /// Iterate \a op on the result of zipping all the specified + /// iterators together. + /// + /// Similar to std::for_each, but it accepts functions of an + /// arbitrary number of arguments. + /// + template<typename F, typename It0, typename... Its> + F + for_each(F op, It0 it0, It0 end0, Its... its) { + while (it0 != end0) + __iter_helper<void, __empty, sizeof...(Its)>::step( + op, {}, std::tie(it0, its...)); + + return op; + } + + /// + /// Iterate \a op on the result of zipping all the specified + /// iterators together, storing return values in a new container. + /// + /// Similar to std::transform, but it accepts functions of an + /// arbitrary number of arguments and it doesn't have to be + /// provided with an output iterator. + /// + template<typename F, typename It0, typename... Its, + typename C = std::vector< + typename __result_helper<F, It0, Its...>::type>> + C + map(F op, It0 it0, It0 end0, Its... its) { + C c; + + while (it0 != end0) + c.push_back( + __iter_helper<typename C::value_type, __empty, sizeof...(Its)> + ::step(op, {}, std::tie(it0, its...))); + + return c; + } + + /// + /// Reduce the result of zipping all the specified iterators + /// together, using iterative application of \a op from left to + /// right. + /// + /// Similar to std::accumulate, but it accepts functions of an + /// arbitrary number of arguments. + /// + template<typename F, typename T, typename It0, typename... Its> + T + fold(F op, T a, It0 it0, It0 end0, Its... its) { + while (it0 != end0) + a = __iter_helper<T, T, sizeof...(Its)>::step( + op, a, std::tie(it0, its...)); + + return a; + } + + /// + /// Iterate \a op on the result of zipping the specified iterators + /// together, checking if any of the evaluations returns \a true. + /// + /// Similar to std::any_of, but it accepts functions of an + /// arbitrary number of arguments. + /// + template<typename F, typename It0, typename... Its> + bool + any_of(F op, It0 it0, It0 end0, Its... its) { + while (it0 != end0) + if (__iter_helper<bool, __empty, sizeof...(Its)>::step( + op, {}, std::tie(it0, its...))) + return true; + + return false; + } + + template<typename T, typename S> + T + keys(const std::pair<T, S> &ent) { + return ent.first; + } + + template<typename T, typename S> + std::function<bool (const std::pair<T, S> &)> + key_equals(const T &x) { + return [=](const std::pair<T, S> &ent) { + return ent.first == x; + }; + } + + template<typename T, typename S> + S + values(const std::pair<T, S> &ent) { + return ent.second; + } + + template<typename T> + std::function<bool (const T &)> + is_zero() { + return [](const T &x) { + return x == 0; + }; + } +} + +#endif diff --git a/src/gallium/state_trackers/clover/core/compat.hpp b/src/gallium/state_trackers/clover/core/compat.hpp new file mode 100644 index 00000000000..c0057af3258 --- /dev/null +++ b/src/gallium/state_trackers/clover/core/compat.hpp @@ -0,0 +1,290 @@ +// +// Copyright 2012 Francisco Jerez +// +// Permission is hereby granted, free of charge, to any person obtaining a +// copy of this software and associated documentation files (the "Software"), +// to deal in the Software without restriction, including without limitation +// the rights to use, copy, modify, merge, publish, distribute, sublicense, +// and/or sell copies of the Software, and to permit persons to whom the +// Software is furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +// THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF +// OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. +// + +#ifndef __CORE_COMPAT_HPP__ +#define __CORE_COMPAT_HPP__ + +#include <new> +#include <cstring> +#include <cstdlib> +#include <string> +#include <stdint.h> + + +namespace clover { + namespace compat { + // XXX - For cases where we can't rely on STL... I.e. the + // interface between code compiled as C++98 and C++11 + // source. Get rid of this as soon as everything can be + // compiled as C++11. + + template<typename T> + class vector { + protected: + static T * + alloc(int n, const T *q, int m) { + T *p = reinterpret_cast<T *>(std::malloc(n * sizeof(T))); + + for (int i = 0; i < m; ++i) + new(&p[i]) T(q[i]); + + return p; + } + + static void + free(int n, T *p) { + for (int i = 0; i < n; ++i) + p[i].~T(); + + std::free(p); + } + + public: + vector() : p(NULL), n(0) { + } + + vector(const vector &v) : p(alloc(v.n, v.p, v.n)), n(v.n) { + } + + vector(T *p, size_t n) : p(alloc(n, p, n)), n(n) { + } + + template<typename C> + vector(const C &v) : + p(alloc(v.size(), &*v.begin(), v.size())), n(v.size()) { + } + + ~vector() { + free(n, p); + } + + vector & + operator=(const vector &v) { + free(n, p); + + p = alloc(v.n, v.p, v.n); + n = v.n; + + return *this; + } + + void + reserve(size_t m) { + if (n < m) { + T *q = alloc(m, p, n); + free(n, p); + + p = q; + n = m; + } + } + + void + resize(size_t m, T x = T()) { + size_t n = size(); + + reserve(m); + + for (size_t i = n; i < m; ++i) + new(&p[i]) T(x); + } + + void + push_back(const T &x) { + size_t n = size(); + reserve(n + 1); + new(&p[n]) T(x); + } + + size_t + size() const { + return n; + } + + T * + begin() { + return p; + } + + const T * + begin() const { + return p; + } + + T * + end() { + return p + n; + } + + const T * + end() const { + return p + n; + } + + T & + operator[](int i) { + return p[i]; + } + + const T & + operator[](int i) const { + return p[i]; + } + + private: + T *p; + size_t n; + }; + + template<typename T> + class vector_ref { + public: + vector_ref(T *p, size_t n) : p(p), n(n) { + } + + template<typename C> + vector_ref(C &v) : p(&*v.begin()), n(v.size()) { + } + + size_t + size() const { + return n; + } + + T * + begin() { + return p; + } + + const T * + begin() const { + return p; + } + + T * + end() { + return p + n; + } + + const T * + end() const { + return p + n; + } + + T & + operator[](int i) { + return p[i]; + } + + const T & + operator[](int i) const { + return p[i]; + } + + private: + T *p; + size_t n; + }; + + class istream { + public: + typedef vector_ref<const unsigned char> buffer_t; + + class error { + public: + virtual ~error() {} + }; + + istream(const buffer_t &buf) : buf(buf), offset(0) {} + + void + read(char *p, size_t n) { + if (offset + n > buf.size()) + throw error(); + + std::memcpy(p, buf.begin() + offset, n); + offset += n; + } + + private: + const buffer_t &buf; + size_t offset; + }; + + class ostream { + public: + typedef vector<unsigned char> buffer_t; + + ostream(buffer_t &buf) : buf(buf), offset(buf.size()) {} + + void + write(const char *p, size_t n) { + buf.resize(offset + n); + std::memcpy(buf.begin() + offset, p, n); + offset += n; + } + + private: + buffer_t &buf; + size_t offset; + }; + + class string : public vector_ref<const char> { + public: + string(const char *p) : vector_ref(p, std::strlen(p)) { + } + + template<typename C> + string(const C &v) : vector_ref(v) { + } + + operator std::string() const { + return std::string(begin(), end()); + } + + const char * + find(const string &s) const { + for (size_t i = 0; i + s.size() < size(); ++i) { + if (!std::memcmp(begin() + i, s.begin(), s.size())) + return begin() + i; + } + + return end(); + } + }; + + template<typename T> + bool + operator==(const vector_ref<T> &a, const vector_ref<T> &b) { + if (a.size() != b.size()) + return false; + + for (size_t i = 0; i < a.size(); ++i) + if (a[i] != b[i]) + return false; + + return true; + } + } +} + +#endif diff --git a/src/gallium/state_trackers/clover/core/compiler.hpp b/src/gallium/state_trackers/clover/core/compiler.hpp new file mode 100644 index 00000000000..a3998d5e2fb --- /dev/null +++ b/src/gallium/state_trackers/clover/core/compiler.hpp @@ -0,0 +1,53 @@ +// +// Copyright 2012 Francisco Jerez +// +// Permission is hereby granted, free of charge, to any person obtaining a +// copy of this software and associated documentation files (the "Software"), +// to deal in the Software without restriction, including without limitation +// the rights to use, copy, modify, merge, publish, distribute, sublicense, +// and/or sell copies of the Software, and to permit persons to whom the +// Software is furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +// THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF +// OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. +// + +#ifndef __CORE_COMPILER_HPP__ +#define __CORE_COMPILER_HPP__ + +#include "core/compat.hpp" +#include "core/module.hpp" + +namespace clover { + class build_error { + public: + build_error(const compat::string &log) : log(log) { + } + + virtual ~build_error() { + } + + compat::string what() { + return log; + } + + private: + compat::vector<char> log; + }; + + module compile_program_llvm(const compat::string &source, + const compat::string &target); + + module compile_program_tgsi(const compat::string &source, + const compat::string &target); +} + +#endif diff --git a/src/gallium/state_trackers/clover/core/context.cpp b/src/gallium/state_trackers/clover/core/context.cpp new file mode 100644 index 00000000000..6e09a1acae0 --- /dev/null +++ b/src/gallium/state_trackers/clover/core/context.cpp @@ -0,0 +1,37 @@ +// +// Copyright 2012 Francisco Jerez +// +// Permission is hereby granted, free of charge, to any person obtaining a +// copy of this software and associated documentation files (the "Software"), +// to deal in the Software without restriction, including without limitation +// the rights to use, copy, modify, merge, publish, distribute, sublicense, +// and/or sell copies of the Software, and to permit persons to whom the +// Software is furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +// THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF +// OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. +// + +#include <algorithm> + +#include "core/context.hpp" + +using namespace clover; + +_cl_context::_cl_context(const std::vector<cl_context_properties> &props, + const std::vector<device *> &devs) : + devs(devs), __props(props) { +} + +bool +_cl_context::has_device(clover::device *dev) const { + return std::count(devs.begin(), devs.end(), dev); +} diff --git a/src/gallium/state_trackers/clover/core/context.hpp b/src/gallium/state_trackers/clover/core/context.hpp new file mode 100644 index 00000000000..d783fb6b14b --- /dev/null +++ b/src/gallium/state_trackers/clover/core/context.hpp @@ -0,0 +1,51 @@ +// +// Copyright 2012 Francisco Jerez +// +// Permission is hereby granted, free of charge, to any person obtaining a +// copy of this software and associated documentation files (the "Software"), +// to deal in the Software without restriction, including without limitation +// the rights to use, copy, modify, merge, publish, distribute, sublicense, +// and/or sell copies of the Software, and to permit persons to whom the +// Software is furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +// THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF +// OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. +// + +#ifndef __CORE_CONTEXT_HPP__ +#define __CORE_CONTEXT_HPP__ + +#include "core/base.hpp" +#include "core/device.hpp" + +namespace clover { + typedef struct _cl_context context; +} + +struct _cl_context : public clover::ref_counter { +public: + _cl_context(const std::vector<cl_context_properties> &props, + const std::vector<clover::device *> &devs); + _cl_context(const _cl_context &ctx) = delete; + + bool has_device(clover::device *dev) const; + + const std::vector<cl_context_properties> &props() const { + return __props; + } + + const std::vector<clover::device *> devs; + +private: + std::vector<cl_context_properties> __props; +}; + +#endif diff --git a/src/gallium/state_trackers/clover/core/device.cpp b/src/gallium/state_trackers/clover/core/device.cpp new file mode 100644 index 00000000000..8390f3f4abb --- /dev/null +++ b/src/gallium/state_trackers/clover/core/device.cpp @@ -0,0 +1,179 @@ +// +// Copyright 2012 Francisco Jerez +// +// Permission is hereby granted, free of charge, to any person obtaining a +// copy of this software and associated documentation files (the "Software"), +// to deal in the Software without restriction, including without limitation +// the rights to use, copy, modify, merge, publish, distribute, sublicense, +// and/or sell copies of the Software, and to permit persons to whom the +// Software is furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +// THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF +// OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. +// + +#include "core/device.hpp" +#include "pipe/p_screen.h" +#include "pipe/p_state.h" + +using namespace clover; + +namespace { + template<typename T> + std::vector<T> + get_compute_param(pipe_screen *pipe, pipe_compute_cap cap) { + int sz = pipe->get_compute_param(pipe, cap, NULL); + std::vector<T> v(sz / sizeof(T)); + + pipe->get_compute_param(pipe, cap, &v.front()); + return v; + } +} + +_cl_device_id::_cl_device_id(pipe_loader_device *ldev) : ldev(ldev) { + pipe = pipe_loader_create_screen(ldev, PIPE_SEARCH_DIR); + if (!pipe || !pipe->get_param(pipe, PIPE_CAP_COMPUTE)) + throw error(CL_INVALID_DEVICE); +} + +_cl_device_id::_cl_device_id(_cl_device_id &&dev) : pipe(dev.pipe), ldev(dev.ldev) { + dev.ldev = NULL; + dev.pipe = NULL; +} + +_cl_device_id::~_cl_device_id() { + if (pipe) + pipe->destroy(pipe); + if (ldev) + pipe_loader_release(&ldev, 1); +} + +cl_device_type +_cl_device_id::type() const { + switch (ldev->type) { + case PIPE_LOADER_DEVICE_SOFTWARE: + return CL_DEVICE_TYPE_CPU; + case PIPE_LOADER_DEVICE_PCI: + return CL_DEVICE_TYPE_GPU; + default: + assert(0); + return 0; + } +} + +cl_uint +_cl_device_id::vendor_id() const { + switch (ldev->type) { + case PIPE_LOADER_DEVICE_SOFTWARE: + return 0; + case PIPE_LOADER_DEVICE_PCI: + return ldev->pci.vendor_id; + default: + assert(0); + return 0; + } +} + +size_t +_cl_device_id::max_images_read() const { + return PIPE_MAX_SHADER_RESOURCES; +} + +size_t +_cl_device_id::max_images_write() const { + return PIPE_MAX_SHADER_RESOURCES; +} + +cl_uint +_cl_device_id::max_image_levels_2d() const { + return pipe->get_param(pipe, PIPE_CAP_MAX_TEXTURE_2D_LEVELS); +} + +cl_uint +_cl_device_id::max_image_levels_3d() const { + return pipe->get_param(pipe, PIPE_CAP_MAX_TEXTURE_3D_LEVELS); +} + +cl_uint +_cl_device_id::max_samplers() const { + return pipe->get_shader_param(pipe, PIPE_SHADER_COMPUTE, + PIPE_SHADER_CAP_MAX_TEXTURE_SAMPLERS); +} + +cl_ulong +_cl_device_id::max_mem_global() const { + return get_compute_param<uint64_t>(pipe, + PIPE_COMPUTE_CAP_MAX_GLOBAL_SIZE)[0]; +} + +cl_ulong +_cl_device_id::max_mem_local() const { + return get_compute_param<uint64_t>(pipe, + PIPE_COMPUTE_CAP_MAX_LOCAL_SIZE)[0]; +} + +cl_ulong +_cl_device_id::max_mem_input() const { + return get_compute_param<uint64_t>(pipe, + PIPE_COMPUTE_CAP_MAX_INPUT_SIZE)[0]; +} + +cl_ulong +_cl_device_id::max_const_buffer_size() const { + return pipe->get_shader_param(pipe, PIPE_SHADER_COMPUTE, + PIPE_SHADER_CAP_MAX_CONSTS) * 16; +} + +cl_uint +_cl_device_id::max_const_buffers() const { + return pipe->get_shader_param(pipe, PIPE_SHADER_COMPUTE, + PIPE_SHADER_CAP_MAX_CONST_BUFFERS); +} + +std::vector<size_t> +_cl_device_id::max_block_size() const { + return get_compute_param<uint64_t>(pipe, PIPE_COMPUTE_CAP_MAX_BLOCK_SIZE); +} + +std::string +_cl_device_id::device_name() const { + return pipe->get_name(pipe); +} + +std::string +_cl_device_id::vendor_name() const { + return pipe->get_vendor(pipe); +} + +std::string +_cl_device_id::ir_target() const { + switch (pipe->get_shader_param(pipe, PIPE_SHADER_COMPUTE, + PIPE_SHADER_CAP_PREFERRED_IR)) { + case PIPE_SHADER_IR_TGSI: + return "tgsi"; + default: + assert(0); + return ""; + } +} + +device_registry::device_registry() { + int n = pipe_loader_probe(NULL, 0); + std::vector<pipe_loader_device *> ldevs(n); + + pipe_loader_probe(&ldevs.front(), n); + + for (pipe_loader_device *ldev : ldevs) { + try { + devs.emplace_back(ldev); + } catch (error &) {} + } +} diff --git a/src/gallium/state_trackers/clover/core/device.hpp b/src/gallium/state_trackers/clover/core/device.hpp new file mode 100644 index 00000000000..8f284ba5e42 --- /dev/null +++ b/src/gallium/state_trackers/clover/core/device.hpp @@ -0,0 +1,107 @@ +// +// Copyright 2012 Francisco Jerez +// +// Permission is hereby granted, free of charge, to any person obtaining a +// copy of this software and associated documentation files (the "Software"), +// to deal in the Software without restriction, including without limitation +// the rights to use, copy, modify, merge, publish, distribute, sublicense, +// and/or sell copies of the Software, and to permit persons to whom the +// Software is furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +// THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF +// OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. +// + +#ifndef __CORE_DEVICE_HPP__ +#define __CORE_DEVICE_HPP__ + +#include <set> +#include <vector> + +#include "core/base.hpp" +#include "core/format.hpp" +#include "pipe-loader/pipe_loader.h" + +namespace clover { + typedef struct _cl_device_id device; + class root_resource; + class hard_event; +} + +struct _cl_device_id { +public: + _cl_device_id(pipe_loader_device *ldev); + _cl_device_id(_cl_device_id &&dev); + _cl_device_id(const _cl_device_id &dev) = delete; + ~_cl_device_id(); + + cl_device_type type() const; + cl_uint vendor_id() const; + size_t max_images_read() const; + size_t max_images_write() const; + cl_uint max_image_levels_2d() const; + cl_uint max_image_levels_3d() const; + cl_uint max_samplers() const; + cl_ulong max_mem_global() const; + cl_ulong max_mem_local() const; + cl_ulong max_mem_input() const; + cl_ulong max_const_buffer_size() const; + cl_uint max_const_buffers() const; + + std::vector<size_t> max_block_size() const; + std::string device_name() const; + std::string vendor_name() const; + std::string ir_target() const; + + friend struct _cl_command_queue; + friend class clover::root_resource; + friend class clover::hard_event; + friend std::set<cl_image_format> + clover::supported_formats(cl_context, cl_mem_object_type); + +private: + pipe_screen *pipe; + pipe_loader_device *ldev; +}; + +namespace clover { + /// + /// Container of all the compute devices that are available in the + /// system. + /// + class device_registry { + public: + typedef std::vector<device>::iterator iterator; + + device_registry(); + + iterator begin() { + return devs.begin(); + } + + iterator end() { + return devs.end(); + } + + device &front() { + return devs.front(); + } + + device &back() { + return devs.back(); + } + + protected: + std::vector<device> devs; + }; +} + +#endif diff --git a/src/gallium/state_trackers/clover/core/event.cpp b/src/gallium/state_trackers/clover/core/event.cpp new file mode 100644 index 00000000000..aa287e9a0c9 --- /dev/null +++ b/src/gallium/state_trackers/clover/core/event.cpp @@ -0,0 +1,175 @@ +// +// Copyright 2012 Francisco Jerez +// +// Permission is hereby granted, free of charge, to any person obtaining a +// copy of this software and associated documentation files (the "Software"), +// to deal in the Software without restriction, including without limitation +// the rights to use, copy, modify, merge, publish, distribute, sublicense, +// and/or sell copies of the Software, and to permit persons to whom the +// Software is furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +// THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF +// OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. +// + +#include "core/event.hpp" +#include "pipe/p_screen.h" + +using namespace clover; + +_cl_event::_cl_event(clover::context &ctx, + std::vector<clover::event *> deps, + action action_ok, action action_fail) : + ctx(ctx), __status(0), wait_count(1), + action_ok(action_ok), action_fail(action_fail) { + for (auto ev : deps) + ev->chain(this); +} + +_cl_event::~_cl_event() { +} + +void +_cl_event::trigger() { + if (!--wait_count) { + action_ok(*this); + + while (!__chain.empty()) { + __chain.back()->trigger(); + __chain.pop_back(); + } + } +} + +void +_cl_event::abort(cl_int status) { + __status = status; + action_fail(*this); + + while (!__chain.empty()) { + __chain.back()->abort(status); + __chain.pop_back(); + } +} + +bool +_cl_event::signalled() const { + return !wait_count; +} + +void +_cl_event::chain(clover::event *ev) { + if (wait_count) { + ev->wait_count++; + __chain.push_back(ev); + ev->deps.push_back(this); + } +} + +hard_event::hard_event(clover::command_queue &q, cl_command_type command, + std::vector<clover::event *> deps, action action) : + _cl_event(q.ctx, deps, action, [](event &ev){}), + __queue(q), __command(command), __fence(NULL) { + q.sequence(this); + trigger(); +} + +hard_event::~hard_event() { + pipe_screen *screen = queue()->dev.pipe; + screen->fence_reference(screen, &__fence, NULL); +} + +cl_int +hard_event::status() const { + pipe_screen *screen = queue()->dev.pipe; + + if (__status < 0) + return __status; + + else if (!__fence) + return CL_QUEUED; + + else if (!screen->fence_signalled(screen, __fence)) + return CL_SUBMITTED; + + else + return CL_COMPLETE; +} + +cl_command_queue +hard_event::queue() const { + return &__queue; +} + +cl_command_type +hard_event::command() const { + return __command; +} + +void +hard_event::wait() const { + pipe_screen *screen = queue()->dev.pipe; + + if (status() == CL_QUEUED) + queue()->flush(); + + if (!__fence || + !screen->fence_finish(screen, __fence, PIPE_TIMEOUT_INFINITE)) + throw error(CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST); +} + +void +hard_event::fence(pipe_fence_handle *fence) { + pipe_screen *screen = queue()->dev.pipe; + screen->fence_reference(screen, &__fence, fence); +} + +soft_event::soft_event(clover::context &ctx, + std::vector<clover::event *> deps, + bool __trigger, action action) : + _cl_event(ctx, deps, action, action) { + if (__trigger) + trigger(); +} + +cl_int +soft_event::status() const { + if (__status < 0) + return __status; + + else if (!signalled() || + any_of([](const ref_ptr<event> &ev) { + return ev->status() != CL_COMPLETE; + }, deps.begin(), deps.end())) + return CL_SUBMITTED; + + else + return CL_COMPLETE; +} + +cl_command_queue +soft_event::queue() const { + return NULL; +} + +cl_command_type +soft_event::command() const { + return CL_COMMAND_USER; +} + +void +soft_event::wait() const { + for (auto ev : deps) + ev->wait(); + + if (status() != CL_COMPLETE) + throw error(CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST); +} diff --git a/src/gallium/state_trackers/clover/core/event.hpp b/src/gallium/state_trackers/clover/core/event.hpp new file mode 100644 index 00000000000..ea4ac4ae43c --- /dev/null +++ b/src/gallium/state_trackers/clover/core/event.hpp @@ -0,0 +1,138 @@ +// +// Copyright 2012 Francisco Jerez +// +// Permission is hereby granted, free of charge, to any person obtaining a +// copy of this software and associated documentation files (the "Software"), +// to deal in the Software without restriction, including without limitation +// the rights to use, copy, modify, merge, publish, distribute, sublicense, +// and/or sell copies of the Software, and to permit persons to whom the +// Software is furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +// THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF +// OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. +// + +#ifndef __CORE_EVENT_HPP__ +#define __CORE_EVENT_HPP__ + +#include <functional> + +#include "core/base.hpp" +#include "core/queue.hpp" + +namespace clover { + typedef struct _cl_event event; +} + +/// +/// Class that represents a task that might be executed asynchronously +/// at some point in the future. +/// +/// An event consists of a list of dependencies, a boolean signalled() +/// flag, and an associated task. An event is considered signalled as +/// soon as all its dependencies (if any) are signalled as well, and +/// the trigger() method is called; at that point the associated task +/// will be started through the specified \a action_ok. If the +/// abort() method is called instead, the specified \a action_fail is +/// executed and the associated task will never be started. Dependent +/// events will be aborted recursively. +/// +/// The execution status of the associated task can be queried using +/// the status() method, and it can be waited for completion using the +/// wait() method. +/// +struct _cl_event : public clover::ref_counter { +public: + typedef std::function<void (clover::event &)> action; + + _cl_event(clover::context &ctx, std::vector<clover::event *> deps, + action action_ok, action action_fail); + virtual ~_cl_event(); + + void trigger(); + void abort(cl_int status); + bool signalled() const; + + virtual cl_int status() const = 0; + virtual cl_command_queue queue() const = 0; + virtual cl_command_type command() const = 0; + virtual void wait() const = 0; + + clover::context &ctx; + +protected: + void chain(clover::event *ev); + + cl_int __status; + std::vector<clover::ref_ptr<clover::event>> deps; + +private: + unsigned wait_count; + action action_ok; + action action_fail; + std::vector<clover::ref_ptr<clover::event>> __chain; +}; + +namespace clover { + /// + /// Class that represents a task executed by a command queue. + /// + /// Similar to a normal clover::event. In addition it's associated + /// with a given command queue \a q and a given OpenCL \a command. + /// hard_event instances created for the same queue are implicitly + /// ordered with respect to each other, and they are implicitly + /// triggered on construction. + /// + /// A hard_event is considered complete when the associated + /// hardware task finishes execution. + /// + class hard_event : public event { + public: + hard_event(clover::command_queue &q, cl_command_type command, + std::vector<clover::event *> deps, + action action = [](event &){}); + ~hard_event(); + + virtual cl_int status() const; + virtual cl_command_queue queue() const; + virtual cl_command_type command() const; + virtual void wait() const; + + friend class ::_cl_command_queue; + + private: + virtual void fence(pipe_fence_handle *fence); + + clover::command_queue &__queue; + cl_command_type __command; + pipe_fence_handle *__fence; + }; + + /// + /// Class that represents a software event. + /// + /// A soft_event is not associated with any specific hardware task + /// or command queue. It's considered complete as soon as all its + /// dependencies finish execution. + /// + class soft_event : public event { + public: + soft_event(clover::context &ctx, std::vector<clover::event *> deps, + bool trigger, action action = [](event &){}); + + virtual cl_int status() const; + virtual cl_command_queue queue() const; + virtual cl_command_type command() const; + virtual void wait() const; + }; +} + +#endif diff --git a/src/gallium/state_trackers/clover/core/format.cpp b/src/gallium/state_trackers/clover/core/format.cpp new file mode 100644 index 00000000000..8f6e14d6567 --- /dev/null +++ b/src/gallium/state_trackers/clover/core/format.cpp @@ -0,0 +1,167 @@ +// +// Copyright 2012 Francisco Jerez +// +// Permission is hereby granted, free of charge, to any person obtaining a +// copy of this software and associated documentation files (the "Software"), +// to deal in the Software without restriction, including without limitation +// the rights to use, copy, modify, merge, publish, distribute, sublicense, +// and/or sell copies of the Software, and to permit persons to whom the +// Software is furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +// THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF +// OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. +// + +#include <algorithm> + +#include "core/format.hpp" +#include "core/memory.hpp" +#include "pipe/p_screen.h" +#include "pipe/p_context.h" + +namespace clover { + static const std::map<cl_image_format, pipe_format> formats { + { { CL_BGRA, CL_UNORM_INT8 }, PIPE_FORMAT_B8G8R8A8_UNORM }, + { { CL_ARGB, CL_UNORM_INT8 }, PIPE_FORMAT_A8R8G8B8_UNORM }, + { { CL_RGB, CL_UNORM_SHORT_565 }, PIPE_FORMAT_B5G6R5_UNORM }, + { { CL_LUMINANCE, CL_UNORM_INT8 }, PIPE_FORMAT_L8_UNORM }, + { { CL_A, CL_UNORM_INT8 }, PIPE_FORMAT_A8_UNORM }, + { { CL_INTENSITY, CL_UNORM_INT8 }, PIPE_FORMAT_I8_UNORM }, + { { CL_LUMINANCE, CL_UNORM_INT16 }, PIPE_FORMAT_L16_UNORM }, + { { CL_R, CL_FLOAT }, PIPE_FORMAT_R32_FLOAT }, + { { CL_RG, CL_FLOAT }, PIPE_FORMAT_R32G32_FLOAT }, + { { CL_RGB, CL_FLOAT }, PIPE_FORMAT_R32G32B32_FLOAT }, + { { CL_RGBA, CL_FLOAT }, PIPE_FORMAT_R32G32B32A32_FLOAT }, + { { CL_R, CL_UNORM_INT16 }, PIPE_FORMAT_R16_UNORM }, + { { CL_RG, CL_UNORM_INT16 }, PIPE_FORMAT_R16G16_UNORM }, + { { CL_RGB, CL_UNORM_INT16 }, PIPE_FORMAT_R16G16B16_UNORM }, + { { CL_RGBA, CL_UNORM_INT16 }, PIPE_FORMAT_R16G16B16A16_UNORM }, + { { CL_R, CL_SNORM_INT16 }, PIPE_FORMAT_R16_SNORM }, + { { CL_RG, CL_SNORM_INT16 }, PIPE_FORMAT_R16G16_SNORM }, + { { CL_RGB, CL_SNORM_INT16 }, PIPE_FORMAT_R16G16B16_SNORM }, + { { CL_RGBA, CL_SNORM_INT16 }, PIPE_FORMAT_R16G16B16A16_SNORM }, + { { CL_R, CL_UNORM_INT8 }, PIPE_FORMAT_R8_UNORM }, + { { CL_RG, CL_UNORM_INT8 }, PIPE_FORMAT_R8G8_UNORM }, + { { CL_RGB, CL_UNORM_INT8 }, PIPE_FORMAT_R8G8B8_UNORM }, + { { CL_RGBA, CL_UNORM_INT8 }, PIPE_FORMAT_R8G8B8A8_UNORM }, + { { CL_R, CL_SNORM_INT8 }, PIPE_FORMAT_R8_SNORM }, + { { CL_RG, CL_SNORM_INT8 }, PIPE_FORMAT_R8G8_SNORM }, + { { CL_RGB, CL_SNORM_INT8 }, PIPE_FORMAT_R8G8B8_SNORM }, + { { CL_RGBA, CL_SNORM_INT8 }, PIPE_FORMAT_R8G8B8A8_SNORM }, + { { CL_R, CL_HALF_FLOAT }, PIPE_FORMAT_R16_FLOAT }, + { { CL_RG, CL_HALF_FLOAT }, PIPE_FORMAT_R16G16_FLOAT }, + { { CL_RGB, CL_HALF_FLOAT }, PIPE_FORMAT_R16G16B16_FLOAT }, + { { CL_RGBA, CL_HALF_FLOAT }, PIPE_FORMAT_R16G16B16A16_FLOAT }, + { { CL_RGBx, CL_UNORM_SHORT_555 }, PIPE_FORMAT_B5G5R5X1_UNORM }, + { { CL_RGBx, CL_UNORM_INT8 }, PIPE_FORMAT_R8G8B8X8_UNORM }, + { { CL_A, CL_UNORM_INT16 }, PIPE_FORMAT_A16_UNORM }, + { { CL_INTENSITY, CL_UNORM_INT16 }, PIPE_FORMAT_I16_UNORM }, + { { CL_LUMINANCE, CL_SNORM_INT8 }, PIPE_FORMAT_L8_SNORM }, + { { CL_INTENSITY, CL_SNORM_INT8 }, PIPE_FORMAT_I8_SNORM }, + { { CL_A, CL_SNORM_INT16 }, PIPE_FORMAT_A16_SNORM }, + { { CL_LUMINANCE, CL_SNORM_INT16 }, PIPE_FORMAT_L16_SNORM }, + { { CL_INTENSITY, CL_SNORM_INT16 }, PIPE_FORMAT_I16_SNORM }, + { { CL_A, CL_HALF_FLOAT }, PIPE_FORMAT_A16_FLOAT }, + { { CL_LUMINANCE, CL_HALF_FLOAT }, PIPE_FORMAT_L16_FLOAT }, + { { CL_INTENSITY, CL_HALF_FLOAT }, PIPE_FORMAT_I16_FLOAT }, + { { CL_A, CL_FLOAT }, PIPE_FORMAT_A32_FLOAT }, + { { CL_LUMINANCE, CL_FLOAT }, PIPE_FORMAT_L32_FLOAT }, + { { CL_INTENSITY, CL_FLOAT }, PIPE_FORMAT_I32_FLOAT }, + { { CL_RA, CL_UNORM_INT8 }, PIPE_FORMAT_R8A8_UNORM }, + { { CL_R, CL_UNSIGNED_INT8 }, PIPE_FORMAT_R8_UINT }, + { { CL_RG, CL_UNSIGNED_INT8 }, PIPE_FORMAT_R8G8_UINT }, + { { CL_RGB, CL_UNSIGNED_INT8 }, PIPE_FORMAT_R8G8B8_UINT }, + { { CL_RGBA, CL_UNSIGNED_INT8 }, PIPE_FORMAT_R8G8B8A8_UINT }, + { { CL_R, CL_SIGNED_INT8 }, PIPE_FORMAT_R8_SINT }, + { { CL_RG, CL_SIGNED_INT8 }, PIPE_FORMAT_R8G8_SINT }, + { { CL_RGB, CL_SIGNED_INT8 }, PIPE_FORMAT_R8G8B8_SINT }, + { { CL_RGBA, CL_SIGNED_INT8 }, PIPE_FORMAT_R8G8B8A8_SINT }, + { { CL_R, CL_UNSIGNED_INT16 }, PIPE_FORMAT_R16_UINT }, + { { CL_RG, CL_UNSIGNED_INT16 }, PIPE_FORMAT_R16G16_UINT }, + { { CL_RGB, CL_UNSIGNED_INT16 }, PIPE_FORMAT_R16G16B16_UINT }, + { { CL_RGBA, CL_UNSIGNED_INT16 }, PIPE_FORMAT_R16G16B16A16_UINT }, + { { CL_R, CL_SIGNED_INT16 }, PIPE_FORMAT_R16_SINT }, + { { CL_RG, CL_SIGNED_INT16 }, PIPE_FORMAT_R16G16_SINT }, + { { CL_RGB, CL_SIGNED_INT16 }, PIPE_FORMAT_R16G16B16_SINT }, + { { CL_RGBA, CL_SIGNED_INT16 }, PIPE_FORMAT_R16G16B16A16_SINT }, + { { CL_R, CL_UNSIGNED_INT32 }, PIPE_FORMAT_R32_UINT }, + { { CL_RG, CL_UNSIGNED_INT32 }, PIPE_FORMAT_R32G32_UINT }, + { { CL_RGB, CL_UNSIGNED_INT32 }, PIPE_FORMAT_R32G32B32_UINT }, + { { CL_RGBA, CL_UNSIGNED_INT32 }, PIPE_FORMAT_R32G32B32A32_UINT }, + { { CL_R, CL_SIGNED_INT32 }, PIPE_FORMAT_R32_SINT }, + { { CL_RG, CL_SIGNED_INT32 }, PIPE_FORMAT_R32G32_SINT }, + { { CL_RGB, CL_SIGNED_INT32 }, PIPE_FORMAT_R32G32B32_SINT }, + { { CL_RGBA, CL_SIGNED_INT32 }, PIPE_FORMAT_R32G32B32A32_SINT }, + { { CL_A, CL_UNSIGNED_INT8 }, PIPE_FORMAT_A8_UINT }, + { { CL_INTENSITY, CL_UNSIGNED_INT8 }, PIPE_FORMAT_I8_UINT }, + { { CL_LUMINANCE, CL_UNSIGNED_INT8 }, PIPE_FORMAT_L8_UINT }, + { { CL_A, CL_SIGNED_INT8 }, PIPE_FORMAT_A8_SINT }, + { { CL_INTENSITY, CL_SIGNED_INT8 }, PIPE_FORMAT_I8_SINT }, + { { CL_LUMINANCE, CL_SIGNED_INT8 }, PIPE_FORMAT_L8_SINT }, + { { CL_A, CL_UNSIGNED_INT16 }, PIPE_FORMAT_A16_UINT }, + { { CL_INTENSITY, CL_UNSIGNED_INT16 }, PIPE_FORMAT_I16_UINT }, + { { CL_LUMINANCE, CL_UNSIGNED_INT16 }, PIPE_FORMAT_L16_UINT }, + { { CL_A, CL_SIGNED_INT16 }, PIPE_FORMAT_A16_SINT }, + { { CL_INTENSITY, CL_SIGNED_INT16 }, PIPE_FORMAT_I16_SINT }, + { { CL_LUMINANCE, CL_SIGNED_INT16 }, PIPE_FORMAT_L16_SINT }, + { { CL_A, CL_UNSIGNED_INT32 }, PIPE_FORMAT_A32_UINT }, + { { CL_INTENSITY, CL_UNSIGNED_INT32 }, PIPE_FORMAT_I32_UINT }, + { { CL_LUMINANCE, CL_UNSIGNED_INT32 }, PIPE_FORMAT_L32_UINT }, + { { CL_A, CL_SIGNED_INT32 }, PIPE_FORMAT_A32_SINT }, + { { CL_INTENSITY, CL_SIGNED_INT32 }, PIPE_FORMAT_I32_SINT }, + { { CL_LUMINANCE, CL_SIGNED_INT32 }, PIPE_FORMAT_L32_SINT } + }; + + pipe_texture_target + translate_target(cl_mem_object_type type) { + switch (type) { + case CL_MEM_OBJECT_BUFFER: + return PIPE_BUFFER; + case CL_MEM_OBJECT_IMAGE2D: + return PIPE_TEXTURE_2D; + case CL_MEM_OBJECT_IMAGE3D: + return PIPE_TEXTURE_3D; + default: + throw error(CL_INVALID_VALUE); + } + } + + pipe_format + translate_format(const cl_image_format &format) { + auto it = formats.find(format); + + if (it == formats.end()) + throw error(CL_IMAGE_FORMAT_NOT_SUPPORTED); + + return it->second; + } + + std::set<cl_image_format> + supported_formats(cl_context ctx, cl_mem_object_type type) { + std::set<cl_image_format> s; + pipe_texture_target target = translate_target(type); + unsigned bindings = (PIPE_BIND_SAMPLER_VIEW | + PIPE_BIND_COMPUTE_RESOURCE | + PIPE_BIND_TRANSFER_READ | + PIPE_BIND_TRANSFER_WRITE); + + for (auto f : formats) { + if (std::all_of(ctx->devs.begin(), ctx->devs.end(), + [=](const device *dev) { + return dev->pipe->is_format_supported( + dev->pipe, f.second, target, 1, bindings); + })) + s.insert(f.first); + } + + return s; + } +} diff --git a/src/gallium/state_trackers/clover/core/format.hpp b/src/gallium/state_trackers/clover/core/format.hpp new file mode 100644 index 00000000000..a24cbf37621 --- /dev/null +++ b/src/gallium/state_trackers/clover/core/format.hpp @@ -0,0 +1,51 @@ +// +// Copyright 2012 Francisco Jerez +// +// Permission is hereby granted, free of charge, to any person obtaining a +// copy of this software and associated documentation files (the "Software"), +// to deal in the Software without restriction, including without limitation +// the rights to use, copy, modify, merge, publish, distribute, sublicense, +// and/or sell copies of the Software, and to permit persons to whom the +// Software is furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +// THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF +// OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. +// + +#ifndef __CORE_FORMAT_HPP__ +#define __CORE_FORMAT_HPP__ + +#include <set> + +#include "core/base.hpp" +#include "pipe/p_defines.h" +#include "pipe/p_format.h" + +namespace clover { + pipe_texture_target translate_target(cl_mem_object_type type); + pipe_format translate_format(const cl_image_format &format); + + /// + /// Return all the image formats supported by a given context for + /// the given memory object type. + /// + std::set<cl_image_format> supported_formats(cl_context ctx, + cl_mem_object_type type); +} + +static inline bool +operator<(const cl_image_format &a, const cl_image_format &b) { + return (a.image_channel_order != b.image_channel_order ? + a.image_channel_order < b.image_channel_order : + a.image_channel_data_type < b.image_channel_data_type); +} + +#endif diff --git a/src/gallium/state_trackers/clover/core/geometry.hpp b/src/gallium/state_trackers/clover/core/geometry.hpp new file mode 100644 index 00000000000..027264e72f0 --- /dev/null +++ b/src/gallium/state_trackers/clover/core/geometry.hpp @@ -0,0 +1,72 @@ +// +// Copyright 2012 Francisco Jerez +// +// Permission is hereby granted, free of charge, to any person obtaining a +// copy of this software and associated documentation files (the "Software"), +// to deal in the Software without restriction, including without limitation +// the rights to use, copy, modify, merge, publish, distribute, sublicense, +// and/or sell copies of the Software, and to permit persons to whom the +// Software is furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +// THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF +// OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. +// + +#ifndef __CORE_GEOMETRY_HPP__ +#define __CORE_GEOMETRY_HPP__ + +#include <array> +#include <algorithm> + +namespace clover { + /// + /// N-dimensional coordinate array. + /// + template<typename T, int N> + class point { + public: + point() : a() { + } + + point(std::initializer_list<T> v) { + auto it = std::copy(v.begin(), v.end(), a.begin()); + std::fill(it, a.end(), 0); + } + + point(const T *v) { + std::copy(v, v + N, a.begin()); + } + + T &operator[](int i) { + return a[i]; + } + + const T &operator[](int i) const { + return a[i]; + } + + point operator+(const point &p) const { + point q; + std::transform(a.begin(), a.end(), p.a.begin(), + q.a.begin(), std::plus<T>()); + return q; + } + + T operator()(const point &p) const { + return std::inner_product(p.a.begin(), p.a.end(), a.begin(), 0); + } + + protected: + std::array<T, N> a; + }; +} + +#endif diff --git a/src/gallium/state_trackers/clover/core/kernel.cpp b/src/gallium/state_trackers/clover/core/kernel.cpp new file mode 100644 index 00000000000..6fa8bd63453 --- /dev/null +++ b/src/gallium/state_trackers/clover/core/kernel.cpp @@ -0,0 +1,393 @@ +// +// Copyright 2012 Francisco Jerez +// +// Permission is hereby granted, free of charge, to any person obtaining a +// copy of this software and associated documentation files (the "Software"), +// to deal in the Software without restriction, including without limitation +// the rights to use, copy, modify, merge, publish, distribute, sublicense, +// and/or sell copies of the Software, and to permit persons to whom the +// Software is furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +// THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF +// OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. +// + +#include "core/kernel.hpp" +#include "core/resource.hpp" +#include "pipe/p_context.h" + +using namespace clover; + +_cl_kernel::_cl_kernel(clover::program &prog, + const std::string &name, + const std::vector<clover::module::argument> &args) : + prog(prog), __name(name), exec(*this) { + for (auto arg : args) { + if (arg.type == module::argument::scalar) + this->args.emplace_back(new scalar_argument(arg.size)); + else if (arg.type == module::argument::global) + this->args.emplace_back(new global_argument(arg.size)); + else if (arg.type == module::argument::local) + this->args.emplace_back(new local_argument()); + else if (arg.type == module::argument::constant) + this->args.emplace_back(new constant_argument()); + else if (arg.type == module::argument::image2d_rd || + arg.type == module::argument::image3d_rd) + this->args.emplace_back(new image_rd_argument()); + else if (arg.type == module::argument::image2d_wr || + arg.type == module::argument::image3d_wr) + this->args.emplace_back(new image_wr_argument()); + else if (arg.type == module::argument::sampler) + this->args.emplace_back(new sampler_argument()); + else + throw error(CL_INVALID_KERNEL_DEFINITION); + } +} + +template<typename T, typename V> +static inline std::vector<T> +pad_vector(clover::command_queue &q, const V &v, T x) { + std::vector<T> w { v.begin(), v.end() }; + w.resize(q.dev.max_block_size().size(), x); + return w; +} + +void +_cl_kernel::launch(clover::command_queue &q, + const std::vector<size_t> &grid_offset, + const std::vector<size_t> &grid_size, + const std::vector<size_t> &block_size) { + void *st = exec.bind(&q); + auto g_handles = map([&](size_t h) { return (uint32_t *)&exec.input[h]; }, + exec.g_handles.begin(), exec.g_handles.end()); + + q.pipe->bind_compute_state(q.pipe, st); + q.pipe->bind_compute_sampler_states(q.pipe, 0, exec.samplers.size(), + exec.samplers.data()); + q.pipe->set_compute_sampler_views(q.pipe, 0, exec.sviews.size(), + exec.sviews.data()); + q.pipe->set_compute_resources(q.pipe, 0, exec.resources.size(), + exec.resources.data()); + q.pipe->set_global_binding(q.pipe, 0, exec.g_buffers.size(), + exec.g_buffers.data(), g_handles.data()); + + q.pipe->launch_grid(q.pipe, + pad_vector<uint>(q, block_size, 1).data(), + pad_vector<uint>(q, grid_size, 1).data(), + module(q).sym(__name).offset, + exec.input.data()); + + q.pipe->set_global_binding(q.pipe, 0, exec.g_buffers.size(), NULL, NULL); + q.pipe->set_compute_resources(q.pipe, 0, exec.resources.size(), NULL); + q.pipe->set_compute_sampler_views(q.pipe, 0, exec.sviews.size(), NULL); + q.pipe->bind_compute_sampler_states(q.pipe, 0, exec.samplers.size(), NULL); + exec.unbind(); +} + +size_t +_cl_kernel::mem_local() const { + size_t sz = 0; + + for (auto &arg : args) { + if (dynamic_cast<local_argument *>(arg.get())) + sz += arg->storage(); + } + + return sz; +} + +size_t +_cl_kernel::mem_private() const { + return 0; +} + +size_t +_cl_kernel::max_block_size() const { + return SIZE_MAX; +} + +const std::string & +_cl_kernel::name() const { + return __name; +} + +std::vector<size_t> +_cl_kernel::block_size() const { + return { 0, 0, 0 }; +} + +const clover::module & +_cl_kernel::module(const clover::command_queue &q) const { + return prog.binaries().find(&q.dev)->second; +} + + +_cl_kernel::exec_context::exec_context(clover::kernel &kern) : + kern(kern), q(NULL), mem_local(0), st(NULL) { +} + +_cl_kernel::exec_context::~exec_context() { + if (st) + q->pipe->delete_compute_state(q->pipe, st); +} + +void * +_cl_kernel::exec_context::bind(clover::command_queue *__q) { + std::swap(q, __q); + + for (auto &arg : kern.args) + arg->bind(*this); + + // Create a new compute state if anything changed. + if (!st || q != __q || + cs.req_local_mem != mem_local || + cs.req_input_mem != input.size()) { + if (st) + __q->pipe->delete_compute_state(__q->pipe, st); + + cs.prog = kern.module(*q).sec(module::section::text).data.begin(); + cs.req_local_mem = mem_local; + cs.req_input_mem = input.size(); + st = q->pipe->create_compute_state(q->pipe, &cs); + } + + return st; +} + +void +_cl_kernel::exec_context::unbind() { + for (auto &arg : kern.args) + arg->unbind(*this); + + input.clear(); + samplers.clear(); + sviews.clear(); + resources.clear(); + g_buffers.clear(); + g_handles.clear(); + mem_local = 0; +} + +_cl_kernel::argument::argument(size_t size) : + __size(size), __set(false) { +} + +bool +_cl_kernel::argument::set() const { + return __set; +} + +size_t +_cl_kernel::argument::storage() const { + return 0; +} + +_cl_kernel::scalar_argument::scalar_argument(size_t size) : + argument(size) { +} + +void +_cl_kernel::scalar_argument::set(size_t size, const void *value) { + if (size != __size) + throw error(CL_INVALID_ARG_SIZE); + + v = { (uint8_t *)value, (uint8_t *)value + size }; + __set = true; +} + +void +_cl_kernel::scalar_argument::bind(exec_context &ctx) { + ctx.input.insert(ctx.input.end(), v.begin(), v.end()); +} + +void +_cl_kernel::scalar_argument::unbind(exec_context &ctx) { +} + +_cl_kernel::global_argument::global_argument(size_t size) : + argument(size) { +} + +void +_cl_kernel::global_argument::set(size_t size, const void *value) { + if (size != sizeof(cl_mem)) + throw error(CL_INVALID_ARG_SIZE); + + obj = dynamic_cast<clover::buffer *>(*(cl_mem *)value); + __set = true; +} + +void +_cl_kernel::global_argument::bind(exec_context &ctx) { + size_t offset = ctx.input.size(); + size_t idx = ctx.g_buffers.size(); + + ctx.input.resize(offset + __size); + + ctx.g_buffers.resize(idx + 1); + ctx.g_buffers[idx] = obj->resource(ctx.q).pipe; + + ctx.g_handles.resize(idx + 1); + ctx.g_handles[idx] = offset; +} + +void +_cl_kernel::global_argument::unbind(exec_context &ctx) { +} + +_cl_kernel::local_argument::local_argument() : + argument(sizeof(uint32_t)) { +} + +size_t +_cl_kernel::local_argument::storage() const { + return __storage; +} + +void +_cl_kernel::local_argument::set(size_t size, const void *value) { + if (value) + throw error(CL_INVALID_ARG_VALUE); + + __storage = size; + __set = true; +} + +void +_cl_kernel::local_argument::bind(exec_context &ctx) { + size_t offset = ctx.input.size(); + size_t ptr = ctx.mem_local; + + ctx.input.resize(offset + sizeof(uint32_t)); + *(uint32_t *)&ctx.input[offset] = ptr; + + ctx.mem_local += __storage; +} + +void +_cl_kernel::local_argument::unbind(exec_context &ctx) { +} + +_cl_kernel::constant_argument::constant_argument() : + argument(sizeof(uint32_t)) { +} + +void +_cl_kernel::constant_argument::set(size_t size, const void *value) { + if (size != sizeof(cl_mem)) + throw error(CL_INVALID_ARG_SIZE); + + obj = dynamic_cast<clover::buffer *>(*(cl_mem *)value); + __set = true; +} + +void +_cl_kernel::constant_argument::bind(exec_context &ctx) { + size_t offset = ctx.input.size(); + size_t idx = ctx.resources.size(); + + ctx.input.resize(offset + sizeof(uint32_t)); + *(uint32_t *)&ctx.input[offset] = idx << 24; + + ctx.resources.resize(idx + 1); + ctx.resources[idx] = st = obj->resource(ctx.q).bind_surface(*ctx.q, false); +} + +void +_cl_kernel::constant_argument::unbind(exec_context &ctx) { + obj->resource(ctx.q).unbind_surface(*ctx.q, st); +} + +_cl_kernel::image_rd_argument::image_rd_argument() : + argument(sizeof(uint32_t)) { +} + +void +_cl_kernel::image_rd_argument::set(size_t size, const void *value) { + if (size != sizeof(cl_mem)) + throw error(CL_INVALID_ARG_SIZE); + + obj = dynamic_cast<clover::image *>(*(cl_mem *)value); + __set = true; +} + +void +_cl_kernel::image_rd_argument::bind(exec_context &ctx) { + size_t offset = ctx.input.size(); + size_t idx = ctx.sviews.size(); + + ctx.input.resize(offset + sizeof(uint32_t)); + *(uint32_t *)&ctx.input[offset] = idx; + + ctx.sviews.resize(idx + 1); + ctx.sviews[idx] = st = obj->resource(ctx.q).bind_sampler_view(*ctx.q); +} + +void +_cl_kernel::image_rd_argument::unbind(exec_context &ctx) { + obj->resource(ctx.q).unbind_sampler_view(*ctx.q, st); +} + +_cl_kernel::image_wr_argument::image_wr_argument() : + argument(sizeof(uint32_t)) { +} + +void +_cl_kernel::image_wr_argument::set(size_t size, const void *value) { + if (size != sizeof(cl_mem)) + throw error(CL_INVALID_ARG_SIZE); + + obj = dynamic_cast<clover::image *>(*(cl_mem *)value); + __set = true; +} + +void +_cl_kernel::image_wr_argument::bind(exec_context &ctx) { + size_t offset = ctx.input.size(); + size_t idx = ctx.resources.size(); + + ctx.input.resize(offset + sizeof(uint32_t)); + *(uint32_t *)&ctx.input[offset] = idx; + + ctx.resources.resize(idx + 1); + ctx.resources[idx] = st = obj->resource(ctx.q).bind_surface(*ctx.q, true); +} + +void +_cl_kernel::image_wr_argument::unbind(exec_context &ctx) { + obj->resource(ctx.q).unbind_surface(*ctx.q, st); +} + +_cl_kernel::sampler_argument::sampler_argument() : + argument(0) { +} + +void +_cl_kernel::sampler_argument::set(size_t size, const void *value) { + if (size != sizeof(cl_sampler)) + throw error(CL_INVALID_ARG_SIZE); + + obj = *(cl_sampler *)value; + __set = true; +} + +void +_cl_kernel::sampler_argument::bind(exec_context &ctx) { + size_t idx = ctx.samplers.size(); + + ctx.samplers.resize(idx + 1); + ctx.samplers[idx] = st = obj->bind(*ctx.q); +} + +void +_cl_kernel::sampler_argument::unbind(exec_context &ctx) { + obj->unbind(*ctx.q, st); +} diff --git a/src/gallium/state_trackers/clover/core/kernel.hpp b/src/gallium/state_trackers/clover/core/kernel.hpp new file mode 100644 index 00000000000..bc21de8094f --- /dev/null +++ b/src/gallium/state_trackers/clover/core/kernel.hpp @@ -0,0 +1,214 @@ +// +// Copyright 2012 Francisco Jerez +// +// Permission is hereby granted, free of charge, to any person obtaining a +// copy of this software and associated documentation files (the "Software"), +// to deal in the Software without restriction, including without limitation +// the rights to use, copy, modify, merge, publish, distribute, sublicense, +// and/or sell copies of the Software, and to permit persons to whom the +// Software is furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +// THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF +// OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. +// + +#ifndef __CORE_KERNEL_HPP__ +#define __CORE_KERNEL_HPP__ + +#include <memory> + +#include "core/base.hpp" +#include "core/program.hpp" +#include "core/memory.hpp" +#include "core/sampler.hpp" +#include "pipe/p_state.h" + +namespace clover { + typedef struct _cl_kernel kernel; + class argument; +} + +struct _cl_kernel : public clover::ref_counter { +private: + /// + /// Class containing all the state required to execute a compute + /// kernel. + /// + struct exec_context { + exec_context(clover::kernel &kern); + ~exec_context(); + + void *bind(clover::command_queue *q); + void unbind(); + + clover::kernel &kern; + clover::command_queue *q; + + std::vector<uint8_t> input; + std::vector<void *> samplers; + std::vector<pipe_sampler_view *> sviews; + std::vector<pipe_surface *> resources; + std::vector<pipe_resource *> g_buffers; + std::vector<size_t> g_handles; + size_t mem_local; + + private: + void *st; + pipe_compute_state cs; + }; + +public: + class argument { + public: + argument(size_t size); + + /// \a true if the argument has been set. + bool set() const; + + /// Argument size in the input buffer. + size_t size() const; + + /// Storage space required for the referenced object. + virtual size_t storage() const; + + /// Set this argument to some object. + virtual void set(size_t size, const void *value) = 0; + + /// Allocate the necessary resources to bind the specified + /// object to this argument, and update \a ctx accordingly. + virtual void bind(exec_context &ctx) = 0; + + /// Free any resources that were allocated in bind(). + virtual void unbind(exec_context &ctx) = 0; + + protected: + size_t __size; + bool __set; + }; + + _cl_kernel(clover::program &prog, + const std::string &name, + const std::vector<clover::module::argument> &args); + + void launch(clover::command_queue &q, + const std::vector<size_t> &grid_offset, + const std::vector<size_t> &grid_size, + const std::vector<size_t> &block_size); + + size_t mem_local() const; + size_t mem_private() const; + size_t max_block_size() const; + + const std::string &name() const; + std::vector<size_t> block_size() const; + + clover::program &prog; + std::vector<std::unique_ptr<argument>> args; + +private: + const clover::module & + module(const clover::command_queue &q) const; + + class scalar_argument : public argument { + public: + scalar_argument(size_t size); + + virtual void set(size_t size, const void *value); + virtual void bind(exec_context &ctx); + virtual void unbind(exec_context &ctx); + + private: + std::vector<uint8_t> v; + }; + + class global_argument : public argument { + public: + global_argument(size_t size); + + virtual void set(size_t size, const void *value); + virtual void bind(exec_context &ctx); + virtual void unbind(exec_context &ctx); + + private: + clover::buffer *obj; + }; + + class local_argument : public argument { + public: + local_argument(); + + virtual size_t storage() const; + + virtual void set(size_t size, const void *value); + virtual void bind(exec_context &ctx); + virtual void unbind(exec_context &ctx); + + private: + size_t __storage; + }; + + class constant_argument : public argument { + public: + constant_argument(); + + virtual void set(size_t size, const void *value); + virtual void bind(exec_context &ctx); + virtual void unbind(exec_context &ctx); + + private: + clover::buffer *obj; + pipe_surface *st; + }; + + class image_rd_argument : public argument { + public: + image_rd_argument(); + + virtual void set(size_t size, const void *value); + virtual void bind(exec_context &ctx); + virtual void unbind(exec_context &ctx); + + private: + clover::image *obj; + pipe_sampler_view *st; + }; + + class image_wr_argument : public argument { + public: + image_wr_argument(); + + virtual void set(size_t size, const void *value); + virtual void bind(exec_context &ctx); + virtual void unbind(exec_context &ctx); + + private: + clover::image *obj; + pipe_surface *st; + }; + + class sampler_argument : public argument { + public: + sampler_argument(); + + virtual void set(size_t size, const void *value); + virtual void bind(exec_context &ctx); + virtual void unbind(exec_context &ctx); + + private: + clover::sampler *obj; + void *st; + }; + + std::string __name; + exec_context exec; +}; + +#endif diff --git a/src/gallium/state_trackers/clover/core/memory.cpp b/src/gallium/state_trackers/clover/core/memory.cpp new file mode 100644 index 00000000000..8d8be2e547d --- /dev/null +++ b/src/gallium/state_trackers/clover/core/memory.cpp @@ -0,0 +1,199 @@ +// +// Copyright 2012 Francisco Jerez +// +// Permission is hereby granted, free of charge, to any person obtaining a +// copy of this software and associated documentation files (the "Software"), +// to deal in the Software without restriction, including without limitation +// the rights to use, copy, modify, merge, publish, distribute, sublicense, +// and/or sell copies of the Software, and to permit persons to whom the +// Software is furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +// THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF +// OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. +// + +#include "core/memory.hpp" +#include "core/resource.hpp" + +using namespace clover; + +_cl_mem::_cl_mem(clover::context &ctx, cl_mem_flags flags, + size_t size, void *host_ptr) : + ctx(ctx), __flags(flags), + __size(size), __host_ptr(host_ptr), + __destroy_notify([]{}) { + if (flags & CL_MEM_COPY_HOST_PTR) + data.append((char *)host_ptr, size); +} + +_cl_mem::~_cl_mem() { + __destroy_notify(); +} + +void +_cl_mem::destroy_notify(std::function<void ()> f) { + __destroy_notify = f; +} + +cl_mem_flags +_cl_mem::flags() const { + return __flags; +} + +size_t +_cl_mem::size() const { + return __size; +} + +void * +_cl_mem::host_ptr() const { + return __host_ptr; +} + +buffer::buffer(clover::context &ctx, cl_mem_flags flags, + size_t size, void *host_ptr) : + memory_obj(ctx, flags, size, host_ptr) { +} + +cl_mem_object_type +buffer::type() const { + return CL_MEM_OBJECT_BUFFER; +} + +root_buffer::root_buffer(clover::context &ctx, cl_mem_flags flags, + size_t size, void *host_ptr) : + buffer(ctx, flags, size, host_ptr) { +} + +clover::resource & +root_buffer::resource(cl_command_queue q) { + // Create a new resource if there's none for this device yet. + if (!resources.count(&q->dev)) { + auto r = (!resources.empty() ? + new root_resource(q->dev, *this, *resources.begin()->second) : + new root_resource(q->dev, *this, *q, data)); + + resources.insert(std::make_pair(&q->dev, + std::unique_ptr<root_resource>(r))); + data.clear(); + } + + return *resources.find(&q->dev)->second; +} + +sub_buffer::sub_buffer(clover::root_buffer &parent, cl_mem_flags flags, + size_t offset, size_t size) : + buffer(parent.ctx, flags, size, + (char *)parent.host_ptr() + offset), + parent(parent), __offset(offset) { +} + +clover::resource & +sub_buffer::resource(cl_command_queue q) { + // Create a new resource if there's none for this device yet. + if (!resources.count(&q->dev)) { + auto r = new sub_resource(parent.resource(q), { offset() }); + + resources.insert(std::make_pair(&q->dev, + std::unique_ptr<sub_resource>(r))); + } + + return *resources.find(&q->dev)->second; +} + +size_t +sub_buffer::offset() const { + return __offset; +} + +image::image(clover::context &ctx, cl_mem_flags flags, + const cl_image_format *format, + size_t width, size_t height, size_t depth, + size_t row_pitch, size_t slice_pitch, size_t size, + void *host_ptr) : + memory_obj(ctx, flags, size, host_ptr), + __format(*format), __width(width), __height(height), __depth(depth), + __row_pitch(row_pitch), __slice_pitch(slice_pitch) { +} + +clover::resource & +image::resource(cl_command_queue q) { + // Create a new resource if there's none for this device yet. + if (!resources.count(&q->dev)) { + auto r = (!resources.empty() ? + new root_resource(q->dev, *this, *resources.begin()->second) : + new root_resource(q->dev, *this, *q, data)); + + resources.insert(std::make_pair(&q->dev, + std::unique_ptr<root_resource>(r))); + data.clear(); + } + + return *resources.find(&q->dev)->second; +} + +cl_image_format +image::format() const { + return __format; +} + +size_t +image::width() const { + return __width; +} + +size_t +image::height() const { + return __height; +} + +size_t +image::depth() const { + return __depth; +} + +size_t +image::row_pitch() const { + return __row_pitch; +} + +size_t +image::slice_pitch() const { + return __slice_pitch; +} + +image2d::image2d(clover::context &ctx, cl_mem_flags flags, + const cl_image_format *format, size_t width, + size_t height, size_t row_pitch, + void *host_ptr) : + image(ctx, flags, format, width, height, 0, + row_pitch, 0, height * row_pitch, host_ptr) { +} + +cl_mem_object_type +image2d::type() const { + return CL_MEM_OBJECT_IMAGE2D; +} + +image3d::image3d(clover::context &ctx, cl_mem_flags flags, + const cl_image_format *format, + size_t width, size_t height, size_t depth, + size_t row_pitch, size_t slice_pitch, + void *host_ptr) : + image(ctx, flags, format, width, height, depth, + row_pitch, slice_pitch, depth * slice_pitch, + host_ptr) { +} + +cl_mem_object_type +image3d::type() const { + return CL_MEM_OBJECT_IMAGE3D; +} diff --git a/src/gallium/state_trackers/clover/core/memory.hpp b/src/gallium/state_trackers/clover/core/memory.hpp new file mode 100644 index 00000000000..96f70e931bc --- /dev/null +++ b/src/gallium/state_trackers/clover/core/memory.hpp @@ -0,0 +1,157 @@ +// +// Copyright 2012 Francisco Jerez +// +// Permission is hereby granted, free of charge, to any person obtaining a +// copy of this software and associated documentation files (the "Software"), +// to deal in the Software without restriction, including without limitation +// the rights to use, copy, modify, merge, publish, distribute, sublicense, +// and/or sell copies of the Software, and to permit persons to whom the +// Software is furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +// THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF +// OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. +// + +#ifndef __CORE_MEMORY_HPP__ +#define __CORE_MEMORY_HPP__ + +#include <functional> +#include <map> +#include <memory> + +#include "core/base.hpp" +#include "core/queue.hpp" + +namespace clover { + typedef struct _cl_mem memory_obj; + + class resource; + class sub_resource; +} + +struct _cl_mem : public clover::ref_counter { +protected: + _cl_mem(clover::context &ctx, cl_mem_flags flags, + size_t size, void *host_ptr); + _cl_mem(const _cl_mem &obj) = delete; + +public: + virtual ~_cl_mem(); + + virtual cl_mem_object_type type() const = 0; + virtual clover::resource &resource(cl_command_queue q) = 0; + + void destroy_notify(std::function<void ()> f); + cl_mem_flags flags() const; + size_t size() const; + void *host_ptr() const; + + clover::context &ctx; + +private: + cl_mem_flags __flags; + size_t __size; + void *__host_ptr; + std::function<void ()> __destroy_notify; + +protected: + std::string data; +}; + +namespace clover { + struct buffer : public memory_obj { + protected: + buffer(clover::context &ctx, cl_mem_flags flags, + size_t size, void *host_ptr); + + public: + virtual cl_mem_object_type type() const; + }; + + struct root_buffer : public buffer { + public: + root_buffer(clover::context &ctx, cl_mem_flags flags, + size_t size, void *host_ptr); + + virtual clover::resource &resource(cl_command_queue q); + + private: + std::map<clover::device *, + std::unique_ptr<clover::root_resource>> resources; + }; + + struct sub_buffer : public buffer { + public: + sub_buffer(clover::root_buffer &parent, cl_mem_flags flags, + size_t offset, size_t size); + + virtual clover::resource &resource(cl_command_queue q); + size_t offset() const; + + clover::root_buffer &parent; + + private: + size_t __offset; + std::map<clover::device *, + std::unique_ptr<clover::sub_resource>> resources; + }; + + struct image : public memory_obj { + protected: + image(clover::context &ctx, cl_mem_flags flags, + const cl_image_format *format, + size_t width, size_t height, size_t depth, + size_t row_pitch, size_t slice_pitch, size_t size, + void *host_ptr); + + public: + virtual clover::resource &resource(cl_command_queue q); + cl_image_format format() const; + size_t width() const; + size_t height() const; + size_t depth() const; + size_t row_pitch() const; + size_t slice_pitch() const; + + private: + cl_image_format __format; + size_t __width; + size_t __height; + size_t __depth; + size_t __row_pitch; + size_t __slice_pitch; + std::map<clover::device *, + std::unique_ptr<clover::root_resource>> resources; + }; + + struct image2d : public image { + public: + image2d(clover::context &ctx, cl_mem_flags flags, + const cl_image_format *format, size_t width, + size_t height, size_t row_pitch, + void *host_ptr); + + virtual cl_mem_object_type type() const; + }; + + struct image3d : public image { + public: + image3d(clover::context &ctx, cl_mem_flags flags, + const cl_image_format *format, + size_t width, size_t height, size_t depth, + size_t row_pitch, size_t slice_pitch, + void *host_ptr); + + virtual cl_mem_object_type type() const; + }; +} + +#endif diff --git a/src/gallium/state_trackers/clover/core/module.cpp b/src/gallium/state_trackers/clover/core/module.cpp new file mode 100644 index 00000000000..1865771443b --- /dev/null +++ b/src/gallium/state_trackers/clover/core/module.cpp @@ -0,0 +1,172 @@ +// +// Copyright 2012 Francisco Jerez +// +// Permission is hereby granted, free of charge, to any person obtaining a +// copy of this software and associated documentation files (the "Software"), +// to deal in the Software without restriction, including without limitation +// the rights to use, copy, modify, merge, publish, distribute, sublicense, +// and/or sell copies of the Software, and to permit persons to whom the +// Software is furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +// THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF +// OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. +// + +#include <type_traits> +#include <algorithm> + +#include "core/module.hpp" + +using namespace clover; + +namespace { + template<typename T, typename = void> + struct __serializer; + + /// Serialize the specified object. + template<typename T> + void + __proc(compat::ostream &os, const T &x) { + __serializer<T>::proc(os, x); + } + + /// Deserialize the specified object. + template<typename T> + void + __proc(compat::istream &is, T &x) { + __serializer<T>::proc(is, x); + } + + template<typename T> + T + __proc(compat::istream &is) { + T x; + __serializer<T>::proc(is, x); + return x; + } + + /// (De)serialize a scalar value. + template<typename T> + struct __serializer<T, typename std::enable_if< + std::is_scalar<T>::value>::type> { + static void + proc(compat::ostream &os, const T &x) { + os.write(reinterpret_cast<const char *>(&x), sizeof(x)); + } + + static void + proc(compat::istream &is, T &x) { + is.read(reinterpret_cast<char *>(&x), sizeof(x)); + } + }; + + /// (De)serialize a vector. + template<typename T> + struct __serializer<compat::vector<T>> { + static void + proc(compat::ostream &os, const compat::vector<T> &v) { + __proc<uint32_t>(os, v.size()); + + for (size_t i = 0; i < v.size(); i++) + __proc<T>(os, v[i]); + } + + static void + proc(compat::istream &is, compat::vector<T> &v) { + v.reserve(__proc<uint32_t>(is)); + + for (size_t i = 0; i < v.size(); i++) + new(&v[i]) T(__proc<T>(is)); + } + }; + + /// (De)serialize a module::section. + template<> + struct __serializer<module::section> { + template<typename S, typename QT> + static void + proc(S &s, QT &x) { + __proc(s, x.type); + __proc(s, x.size); + __proc(s, x.data); + } + }; + + /// (De)serialize a module::argument. + template<> + struct __serializer<module::argument> { + template<typename S, typename QT> + static void + proc(S &s, QT &x) { + __proc(s, x.type); + __proc(s, x.size); + } + }; + + /// (De)serialize a module::symbol. + template<> + struct __serializer<module::symbol> { + template<typename S, typename QT> + static void + proc(S &s, QT &x) { + __proc(s, x.section); + __proc(s, x.offset); + __proc(s, x.args); + } + }; + + /// (De)serialize a module. + template<> + struct __serializer<module> { + template<typename S, typename QT> + static void + proc(S &s, QT &x) { + __proc(s, x.syms); + __proc(s, x.secs); + } + }; +}; + +namespace clover { + void + module::serialize(compat::ostream &os) const { + __proc(os, *this); + } + + module + module::deserialize(compat::istream &is) { + return __proc<module>(is); + } + + const module::symbol & + module::sym(compat::string name) const { + auto it = std::find_if(syms.begin(), syms.end(), [&](const symbol &x) { + return compat::string(x.name) == name; + }); + + if (it == syms.end()) + throw noent_error(); + + return *it; + } + + const module::section & + module::sec(typename section::type type) const { + auto it = std::find_if(secs.begin(), secs.end(), [&](const section &x) { + return x.type == type; + }); + + if (it == secs.end()) + throw noent_error(); + + return *it; + } +} diff --git a/src/gallium/state_trackers/clover/core/module.hpp b/src/gallium/state_trackers/clover/core/module.hpp new file mode 100644 index 00000000000..bc4b203af8e --- /dev/null +++ b/src/gallium/state_trackers/clover/core/module.hpp @@ -0,0 +1,93 @@ +// +// Copyright 2012 Francisco Jerez +// +// Permission is hereby granted, free of charge, to any person obtaining a +// copy of this software and associated documentation files (the "Software"), +// to deal in the Software without restriction, including without limitation +// the rights to use, copy, modify, merge, publish, distribute, sublicense, +// and/or sell copies of the Software, and to permit persons to whom the +// Software is furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +// THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF +// OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. +// + +#ifndef __CORE_MODULE_HPP__ +#define __CORE_MODULE_HPP__ + +#include "core/compat.hpp" + +namespace clover { + struct module { + class noent_error { + public: + virtual ~noent_error() {} + }; + + typedef uint32_t resource_id; + typedef uint32_t size_t; + + struct section { + enum type { + text, + data_constant, + data_global, + data_local, + data_private + }; + + resource_id id; + type type; + size_t size; + clover::compat::vector<char> data; + }; + + struct argument { + enum type { + scalar, + constant, + global, + local, + image2d_rd, + image2d_wr, + image3d_rd, + image3d_wr, + sampler + }; + + type type; + size_t size; + }; + + struct symbol { + clover::compat::vector<char> name; + resource_id section; + size_t offset; + clover::compat::vector<argument> args; + }; + + void serialize(compat::ostream &os) const; + static module deserialize(compat::istream &is); + + /// Look up a symbol by name. Throws module::noent_error if not + /// found. + const symbol &sym(compat::string name) const; + + /// Look up a section by type. Throws module::noent_error if not + /// found. + const section &sec(typename section::type type) const; + + clover::compat::vector<symbol> syms; + clover::compat::vector<section> secs; + }; +} + +#endif diff --git a/src/gallium/state_trackers/clover/core/program.cpp b/src/gallium/state_trackers/clover/core/program.cpp new file mode 100644 index 00000000000..5ac9f93480e --- /dev/null +++ b/src/gallium/state_trackers/clover/core/program.cpp @@ -0,0 +1,85 @@ +// +// Copyright 2012 Francisco Jerez +// +// Permission is hereby granted, free of charge, to any person obtaining a +// copy of this software and associated documentation files (the "Software"), +// to deal in the Software without restriction, including without limitation +// the rights to use, copy, modify, merge, publish, distribute, sublicense, +// and/or sell copies of the Software, and to permit persons to whom the +// Software is furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +// THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF +// OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. +// + +#include "core/program.hpp" +#include "core/compiler.hpp" + +using namespace clover; + +_cl_program::_cl_program(clover::context &ctx, + const std::string &source) : + ctx(ctx), __source(source) { +} + +_cl_program::_cl_program(clover::context &ctx, + const std::vector<clover::device *> &devs, + const std::vector<clover::module> &binaries) : + ctx(ctx) { + for_each([&](clover::device *dev, const clover::module &bin) { + __binaries.insert({ dev, bin }); + }, + devs.begin(), devs.end(), binaries.begin()); +} + +void +_cl_program::build(const std::vector<clover::device *> &devs) { + __binaries.clear(); + __logs.clear(); + + for (auto dev : devs) { + try { + auto module = (dev->ir_target() == "tgsi" ? + compile_program_tgsi(__source, dev->ir_target()) : + compile_program_llvm(__source, dev->ir_target())); + __binaries.insert({ dev, module }); + + } catch (build_error &e) { + __logs.insert({ dev, e.what() }); + throw error(CL_BUILD_PROGRAM_FAILURE); + } + } +} + +const std::string & +_cl_program::source() const { + return __source; +} + +const std::map<clover::device *, clover::module> & +_cl_program::binaries() const { + return __binaries; +} + +cl_build_status +_cl_program::build_status(clover::device *dev) const { + return __binaries.count(dev) ? CL_BUILD_SUCCESS : CL_BUILD_NONE; +} + +std::string +_cl_program::build_opts(clover::device *dev) const { + return {}; +} + +std::string +_cl_program::build_log(clover::device *dev) const { + return __logs.count(dev) ? __logs.find(dev)->second : ""; +} diff --git a/src/gallium/state_trackers/clover/core/program.hpp b/src/gallium/state_trackers/clover/core/program.hpp new file mode 100644 index 00000000000..f3858f6ce98 --- /dev/null +++ b/src/gallium/state_trackers/clover/core/program.hpp @@ -0,0 +1,61 @@ +// +// Copyright 2012 Francisco Jerez +// +// Permission is hereby granted, free of charge, to any person obtaining a +// copy of this software and associated documentation files (the "Software"), +// to deal in the Software without restriction, including without limitation +// the rights to use, copy, modify, merge, publish, distribute, sublicense, +// and/or sell copies of the Software, and to permit persons to whom the +// Software is furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +// THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF +// OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. +// + +#ifndef __CORE_PROGRAM_HPP__ +#define __CORE_PROGRAM_HPP__ + +#include <map> + +#include "core/base.hpp" +#include "core/context.hpp" +#include "core/module.hpp" + +namespace clover { + typedef struct _cl_program program; +} + +struct _cl_program : public clover::ref_counter { +public: + _cl_program(clover::context &ctx, + const std::string &source); + _cl_program(clover::context &ctx, + const std::vector<clover::device *> &devs, + const std::vector<clover::module> &binaries); + + void build(const std::vector<clover::device *> &devs); + + const std::string &source() const; + const std::map<clover::device *, clover::module> &binaries() const; + + cl_build_status build_status(clover::device *dev) const; + std::string build_opts(clover::device *dev) const; + std::string build_log(clover::device *dev) const; + + clover::context &ctx; + +private: + std::map<clover::device *, clover::module> __binaries; + std::map<clover::device *, std::string> __logs; + std::string __source; +}; + +#endif diff --git a/src/gallium/state_trackers/clover/core/queue.cpp b/src/gallium/state_trackers/clover/core/queue.cpp new file mode 100644 index 00000000000..7e476c715e0 --- /dev/null +++ b/src/gallium/state_trackers/clover/core/queue.cpp @@ -0,0 +1,69 @@ +// +// Copyright 2012 Francisco Jerez +// +// Permission is hereby granted, free of charge, to any person obtaining a +// copy of this software and associated documentation files (the "Software"), +// to deal in the Software without restriction, including without limitation +// the rights to use, copy, modify, merge, publish, distribute, sublicense, +// and/or sell copies of the Software, and to permit persons to whom the +// Software is furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +// THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF +// OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. +// + +#include <algorithm> + +#include "core/queue.hpp" +#include "core/event.hpp" +#include "pipe/p_screen.h" +#include "pipe/p_context.h" + +using namespace clover; + +_cl_command_queue::_cl_command_queue(context &ctx, device &dev, + cl_command_queue_properties props) : + ctx(ctx), dev(dev), __props(props) { + pipe = dev.pipe->context_create(dev.pipe, NULL); + if (!pipe) + throw error(CL_INVALID_DEVICE); +} + +_cl_command_queue::~_cl_command_queue() { + pipe->destroy(pipe); +} + +void +_cl_command_queue::flush() { + pipe_screen *screen = dev.pipe; + pipe_fence_handle *fence = NULL; + + if (!queued_events.empty()) { + // Find out which events have already been signalled. + auto first = queued_events.begin(); + auto last = std::find_if(queued_events.begin(), queued_events.end(), + [](event_ptr &ev) { return !ev->signalled(); }); + + // Flush and fence them. + pipe->flush(pipe, &fence); + std::for_each(first, last, [&](event_ptr &ev) { ev->fence(fence); }); + screen->fence_reference(screen, &fence, NULL); + queued_events.erase(first, last); + } +} + +void +_cl_command_queue::sequence(clover::hard_event *ev) { + if (!queued_events.empty()) + queued_events.back()->chain(ev); + + queued_events.push_back(ev); +} diff --git a/src/gallium/state_trackers/clover/core/queue.hpp b/src/gallium/state_trackers/clover/core/queue.hpp new file mode 100644 index 00000000000..6c124eae83f --- /dev/null +++ b/src/gallium/state_trackers/clover/core/queue.hpp @@ -0,0 +1,72 @@ +// +// Copyright 2012 Francisco Jerez +// +// Permission is hereby granted, free of charge, to any person obtaining a +// copy of this software and associated documentation files (the "Software"), +// to deal in the Software without restriction, including without limitation +// the rights to use, copy, modify, merge, publish, distribute, sublicense, +// and/or sell copies of the Software, and to permit persons to whom the +// Software is furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +// THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF +// OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. +// + +#ifndef __CORE_QUEUE_HPP__ +#define __CORE_QUEUE_HPP__ + +#include "core/base.hpp" +#include "core/context.hpp" +#include "pipe/p_context.h" + +namespace clover { + typedef struct _cl_command_queue command_queue; + class resource; + class mapping; + class hard_event; +} + +struct _cl_command_queue : public clover::ref_counter { +public: + _cl_command_queue(clover::context &ctx, clover::device &dev, + cl_command_queue_properties props); + _cl_command_queue(const _cl_command_queue &q) = delete; + ~_cl_command_queue(); + + void flush(); + + cl_command_queue_properties props() const { + return __props; + } + + clover::context &ctx; + clover::device &dev; + + friend class clover::resource; + friend class clover::root_resource; + friend class clover::mapping; + friend class clover::hard_event; + friend struct _cl_sampler; + friend struct _cl_kernel; + +private: + /// Serialize a hardware event with respect to the previous ones, + /// and push it to the pending list. + void sequence(clover::hard_event *ev); + + cl_command_queue_properties __props; + pipe_context *pipe; + + typedef clover::ref_ptr<clover::hard_event> event_ptr; + std::vector<event_ptr> queued_events; +}; + +#endif diff --git a/src/gallium/state_trackers/clover/core/resource.cpp b/src/gallium/state_trackers/clover/core/resource.cpp new file mode 100644 index 00000000000..61085b2cb29 --- /dev/null +++ b/src/gallium/state_trackers/clover/core/resource.cpp @@ -0,0 +1,203 @@ +// +// Copyright 2012 Francisco Jerez +// +// Permission is hereby granted, free of charge, to any person obtaining a +// copy of this software and associated documentation files (the "Software"), +// to deal in the Software without restriction, including without limitation +// the rights to use, copy, modify, merge, publish, distribute, sublicense, +// and/or sell copies of the Software, and to permit persons to whom the +// Software is furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +// THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF +// OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. +// + +#include "core/resource.hpp" +#include "pipe/p_screen.h" +#include "util/u_sampler.h" +#include "util/u_format.h" + +using namespace clover; + +namespace { + class box { + public: + box(const resource::point &origin, const resource::point &size) : + pipe({ (unsigned)origin[0], (unsigned)origin[1], + (unsigned)origin[2], (unsigned)size[0], + (unsigned)size[1], (unsigned)size[2] }) { + } + + operator const pipe_box *() { + return &pipe; + } + + protected: + pipe_box pipe; + }; +} + +resource::resource(clover::device &dev, clover::memory_obj &obj) : + dev(dev), obj(obj), pipe(NULL), offset{0} { +} + +resource::~resource() { +} + +void +resource::copy(command_queue &q, const point &origin, const point ®ion, + resource &src_res, const point &src_origin) { + point p = offset + origin; + + q.pipe->resource_copy_region(q.pipe, pipe, 0, p[0], p[1], p[2], + src_res.pipe, 0, + box(src_res.offset + src_origin, region)); +} + +void * +resource::add_map(command_queue &q, cl_map_flags flags, bool blocking, + const point &origin, const point ®ion) { + maps.emplace_back(q, *this, flags, blocking, origin, region); + return maps.back(); +} + +void +resource::del_map(void *p) { + auto it = std::find(maps.begin(), maps.end(), p); + if (it != maps.end()) + maps.erase(it); +} + +unsigned +resource::map_count() const { + return maps.size(); +} + +pipe_sampler_view * +resource::bind_sampler_view(clover::command_queue &q) { + pipe_sampler_view info; + + u_sampler_view_default_template(&info, pipe, pipe->format); + return q.pipe->create_sampler_view(q.pipe, pipe, &info); +} + +void +resource::unbind_sampler_view(clover::command_queue &q, + pipe_sampler_view *st) { + q.pipe->sampler_view_destroy(q.pipe, st); +} + +pipe_surface * +resource::bind_surface(clover::command_queue &q, bool rw) { + pipe_surface info {}; + + info.format = pipe->format; + info.usage = pipe->bind; + info.writable = rw; + + if (pipe->target == PIPE_BUFFER) + info.u.buf.last_element = pipe->width0 - 1; + + return q.pipe->create_surface(q.pipe, pipe, &info); +} + +void +resource::unbind_surface(clover::command_queue &q, pipe_surface *st) { + q.pipe->surface_destroy(q.pipe, st); +} + +root_resource::root_resource(clover::device &dev, clover::memory_obj &obj, + clover::command_queue &q, + const std::string &data) : + resource(dev, obj) { + pipe_resource info {}; + + if (image *img = dynamic_cast<image *>(&obj)) { + info.format = translate_format(img->format()); + info.width0 = img->width(); + info.height0 = img->height(); + info.depth0 = img->depth(); + } else { + info.width0 = obj.size(); + info.height0 = 1; + info.depth0 = 1; + } + + info.target = translate_target(obj.type()); + info.bind = (PIPE_BIND_SAMPLER_VIEW | + PIPE_BIND_COMPUTE_RESOURCE | + PIPE_BIND_GLOBAL | + PIPE_BIND_TRANSFER_READ | + PIPE_BIND_TRANSFER_WRITE); + + pipe = dev.pipe->resource_create(dev.pipe, &info); + if (!pipe) + throw error(CL_OUT_OF_RESOURCES); + + if (!data.empty()) { + box rect { { 0, 0, 0 }, { info.width0, info.height0, info.depth0 } }; + unsigned cpp = util_format_get_blocksize(info.format); + + q.pipe->transfer_inline_write(q.pipe, pipe, 0, PIPE_TRANSFER_WRITE, + rect, data.data(), cpp * info.width0, + cpp * info.width0 * info.height0); + } +} + +root_resource::root_resource(clover::device &dev, clover::memory_obj &obj, + clover::root_resource &r) : + resource(dev, obj) { + assert(0); // XXX -- resource shared among dev and r.dev +} + +root_resource::~root_resource() { + dev.pipe->resource_destroy(dev.pipe, pipe); +} + +sub_resource::sub_resource(clover::resource &r, point offset) : + resource(r.dev, r.obj) { + pipe = r.pipe; + offset = r.offset + offset; +} + +mapping::mapping(command_queue &q, resource &r, + cl_map_flags flags, bool blocking, + const resource::point &origin, + const resource::point ®ion) : + pctx(q.pipe) { + unsigned usage = ((flags & CL_MAP_WRITE ? PIPE_TRANSFER_WRITE : 0 ) | + (flags & CL_MAP_READ ? PIPE_TRANSFER_READ : 0 ) | + (blocking ? PIPE_TRANSFER_UNSYNCHRONIZED : 0)); + + pxfer = pctx->get_transfer(pctx, r.pipe, 0, usage, + box(origin + r.offset, region)); + if (!pxfer) + throw error(CL_OUT_OF_RESOURCES); + + p = pctx->transfer_map(pctx, pxfer); + if (!p) { + pctx->transfer_destroy(pctx, pxfer); + throw error(CL_OUT_OF_RESOURCES); + } +} + +mapping::mapping(mapping &&m) : + pctx(m.pctx), pxfer(m.pxfer), p(m.p) { + m.p = NULL; + m.pxfer = NULL; +} + +mapping::~mapping() { + if (pxfer) { + pctx->transfer_unmap(pctx, pxfer); + pctx->transfer_destroy(pctx, pxfer); + } +} diff --git a/src/gallium/state_trackers/clover/core/resource.hpp b/src/gallium/state_trackers/clover/core/resource.hpp new file mode 100644 index 00000000000..947060139ec --- /dev/null +++ b/src/gallium/state_trackers/clover/core/resource.hpp @@ -0,0 +1,129 @@ +// +// Copyright 2012 Francisco Jerez +// +// Permission is hereby granted, free of charge, to any person obtaining a +// copy of this software and associated documentation files (the "Software"), +// to deal in the Software without restriction, including without limitation +// the rights to use, copy, modify, merge, publish, distribute, sublicense, +// and/or sell copies of the Software, and to permit persons to whom the +// Software is furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +// THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF +// OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. +// + +#ifndef __CORE_RESOURCE_HPP__ +#define __CORE_RESOURCE_HPP__ + +#include <list> + +#include "core/base.hpp" +#include "core/memory.hpp" +#include "core/geometry.hpp" +#include "pipe/p_state.h" + +namespace clover { + class mapping; + + /// + /// Class that represents a device-specific instance of some memory + /// object. + /// + class resource { + public: + typedef clover::point<size_t, 3> point; + + resource(const resource &r) = delete; + virtual ~resource(); + + void copy(command_queue &q, const point &origin, const point ®ion, + resource &src_resource, const point &src_origin); + + void *add_map(command_queue &q, cl_map_flags flags, bool blocking, + const point &origin, const point ®ion); + void del_map(void *p); + unsigned map_count() const; + + clover::device &dev; + clover::memory_obj &obj; + + friend class sub_resource; + friend class mapping; + friend struct ::_cl_kernel; + + protected: + resource(clover::device &dev, clover::memory_obj &obj); + + pipe_sampler_view *bind_sampler_view(clover::command_queue &q); + void unbind_sampler_view(clover::command_queue &q, + pipe_sampler_view *st); + + pipe_surface *bind_surface(clover::command_queue &q, bool rw); + void unbind_surface(clover::command_queue &q, pipe_surface *st); + + pipe_resource *pipe; + point offset; + + private: + std::list<mapping> maps; + }; + + /// + /// Resource associated with its own top-level data storage + /// allocated in some device. + /// + class root_resource : public resource { + public: + root_resource(clover::device &dev, clover::memory_obj &obj, + clover::command_queue &q, const std::string &data); + root_resource(clover::device &dev, clover::memory_obj &obj, + root_resource &r); + virtual ~root_resource(); + }; + + /// + /// Resource that reuses a portion of some other resource as data + /// storage. + /// + class sub_resource : public resource { + public: + sub_resource(clover::resource &r, point offset); + }; + + /// + /// Class that represents a mapping of some resource into the CPU + /// memory space. + /// + class mapping { + public: + mapping(command_queue &q, resource &r, cl_map_flags flags, + bool blocking, const resource::point &origin, + const resource::point ®ion); + mapping(const mapping &m) = delete; + mapping(mapping &&m); + ~mapping(); + + operator void *() { + return p; + } + + operator char *() { + return (char *)p; + } + + private: + pipe_context *pctx; + pipe_transfer *pxfer; + void *p; + }; +} + +#endif diff --git a/src/gallium/state_trackers/clover/core/sampler.cpp b/src/gallium/state_trackers/clover/core/sampler.cpp new file mode 100644 index 00000000000..6d683f2b41a --- /dev/null +++ b/src/gallium/state_trackers/clover/core/sampler.cpp @@ -0,0 +1,73 @@ +// +// Copyright 2012 Francisco Jerez +// +// Permission is hereby granted, free of charge, to any person obtaining a +// copy of this software and associated documentation files (the "Software"), +// to deal in the Software without restriction, including without limitation +// the rights to use, copy, modify, merge, publish, distribute, sublicense, +// and/or sell copies of the Software, and to permit persons to whom the +// Software is furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +// THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF +// OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. +// + +#include "core/sampler.hpp" +#include "pipe/p_state.h" + +using namespace clover; + +_cl_sampler::_cl_sampler(clover::context &ctx, bool norm_mode, + cl_addressing_mode addr_mode, + cl_filter_mode filter_mode) : + ctx(ctx), __norm_mode(norm_mode), + __addr_mode(addr_mode), __filter_mode(filter_mode) { +} + +bool +_cl_sampler::norm_mode() { + return __norm_mode; +} + +cl_addressing_mode +_cl_sampler::addr_mode() { + return __addr_mode; +} + +cl_filter_mode +_cl_sampler::filter_mode() { + return __filter_mode; +} + +void * +_cl_sampler::bind(clover::command_queue &q) { + struct pipe_sampler_state info {}; + + info.normalized_coords = norm_mode(); + + info.wrap_s = info.wrap_t = info.wrap_r = + (addr_mode() == CL_ADDRESS_CLAMP_TO_EDGE ? PIPE_TEX_WRAP_CLAMP_TO_EDGE : + addr_mode() == CL_ADDRESS_CLAMP ? PIPE_TEX_WRAP_CLAMP_TO_BORDER : + addr_mode() == CL_ADDRESS_REPEAT ? PIPE_TEX_WRAP_REPEAT : + addr_mode() == CL_ADDRESS_MIRRORED_REPEAT ? PIPE_TEX_WRAP_MIRROR_REPEAT : + PIPE_TEX_WRAP_CLAMP_TO_EDGE); + + info.min_img_filter = info.mag_img_filter = + (filter_mode() == CL_FILTER_LINEAR ? PIPE_TEX_FILTER_LINEAR : + PIPE_TEX_FILTER_NEAREST); + + return q.pipe->create_sampler_state(q.pipe, &info); +} + +void +_cl_sampler::unbind(clover::command_queue &q, void *st) { + q.pipe->delete_sampler_state(q.pipe, st); +} diff --git a/src/gallium/state_trackers/clover/core/sampler.hpp b/src/gallium/state_trackers/clover/core/sampler.hpp new file mode 100644 index 00000000000..5bb5bccb1a1 --- /dev/null +++ b/src/gallium/state_trackers/clover/core/sampler.hpp @@ -0,0 +1,55 @@ +// +// Copyright 2012 Francisco Jerez +// +// Permission is hereby granted, free of charge, to any person obtaining a +// copy of this software and associated documentation files (the "Software"), +// to deal in the Software without restriction, including without limitation +// the rights to use, copy, modify, merge, publish, distribute, sublicense, +// and/or sell copies of the Software, and to permit persons to whom the +// Software is furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +// THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF +// OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. +// + +#ifndef __CORE_SAMPLER_HPP__ +#define __CORE_SAMPLER_HPP__ + +#include "core/base.hpp" +#include "core/queue.hpp" + +namespace clover { + typedef struct _cl_sampler sampler; +} + +struct _cl_sampler : public clover::ref_counter { +public: + _cl_sampler(clover::context &ctx, bool norm_mode, + cl_addressing_mode addr_mode, cl_filter_mode filter_mode); + + bool norm_mode(); + cl_addressing_mode addr_mode(); + cl_filter_mode filter_mode(); + + clover::context &ctx; + + friend class _cl_kernel; + +private: + void *bind(clover::command_queue &q); + void unbind(clover::command_queue &q, void *st); + + bool __norm_mode; + cl_addressing_mode __addr_mode; + cl_filter_mode __filter_mode; +}; + +#endif diff --git a/src/gallium/state_trackers/clover/llvm/invocation.cpp b/src/gallium/state_trackers/clover/llvm/invocation.cpp new file mode 100644 index 00000000000..89e21bf9289 --- /dev/null +++ b/src/gallium/state_trackers/clover/llvm/invocation.cpp @@ -0,0 +1,94 @@ +// +// Copyright 2012 Francisco Jerez +// +// Permission is hereby granted, free of charge, to any person obtaining a +// copy of this software and associated documentation files (the "Software"), +// to deal in the Software without restriction, including without limitation +// the rights to use, copy, modify, merge, publish, distribute, sublicense, +// and/or sell copies of the Software, and to permit persons to whom the +// Software is furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +// THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF +// OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. +// + +#include "core/compiler.hpp" + +#if 0 +#include <clang/Frontend/CompilerInstance.h> +#include <clang/Frontend/TextDiagnosticPrinter.h> +#include <clang/CodeGen/CodeGenAction.h> +#include <llvm/LLVMContext.h> +#include <llvm/Support/TargetSelect.h> +#include <llvm/Support/MemoryBuffer.h> + +#include <iostream> +#include <iomanip> +#include <fstream> +#include <cstdio> +#endif + +using namespace clover; + +#if 0 +namespace { + void + build_binary(const std::string &source, const std::string &target, + const std::string &name) { + clang::CompilerInstance c; + clang::EmitObjAction act(&llvm::getGlobalContext()); + std::string log; + llvm::raw_string_ostream s_log(log); + + LLVMInitializeTGSITarget(); + LLVMInitializeTGSITargetInfo(); + LLVMInitializeTGSITargetMC(); + LLVMInitializeTGSIAsmPrinter(); + + c.getFrontendOpts().Inputs.push_back( + std::make_pair(clang::IK_OpenCL, name)); + c.getHeaderSearchOpts().UseBuiltinIncludes = false; + c.getHeaderSearchOpts().UseStandardIncludes = false; + c.getLangOpts().NoBuiltin = true; + c.getTargetOpts().Triple = target; + c.getInvocation().setLangDefaults(clang::IK_OpenCL); + c.createDiagnostics(0, NULL, new clang::TextDiagnosticPrinter( + s_log, c.getDiagnosticOpts())); + + c.getPreprocessorOpts().addRemappedFile( + name, llvm::MemoryBuffer::getMemBuffer(source)); + + if (!c.ExecuteAction(act)) + throw build_error(log); + } + + module + load_binary(const char *name) { + std::ifstream fs((name)); + std::vector<unsigned char> str((std::istreambuf_iterator<char>(fs)), + (std::istreambuf_iterator<char>())); + compat::istream cs(str); + return module::deserialize(cs); + } +} +#endif + +module +clover::compile_program_llvm(const compat::string &source, + const compat::string &target) { +#if 0 + build_binary(source, target, "cl_input"); + module m = load_binary("cl_input.o"); + std::remove("cl_input.o"); + return m; +#endif + return module(); +} diff --git a/src/gallium/state_trackers/clover/tgsi/compiler.cpp b/src/gallium/state_trackers/clover/tgsi/compiler.cpp new file mode 100644 index 00000000000..eb27db1aa76 --- /dev/null +++ b/src/gallium/state_trackers/clover/tgsi/compiler.cpp @@ -0,0 +1,100 @@ +// +// Copyright 2012 Francisco Jerez +// +// Permission is hereby granted, free of charge, to any person obtaining a +// copy of this software and associated documentation files (the "Software"), +// to deal in the Software without restriction, including without limitation +// the rights to use, copy, modify, merge, publish, distribute, sublicense, +// and/or sell copies of the Software, and to permit persons to whom the +// Software is furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +// THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF +// OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. +// + +#include <sstream> + +#include "core/compiler.hpp" + +#include "tgsi/tgsi_parse.h" +#include "tgsi/tgsi_text.h" +#include "util/u_memory.h" + +using namespace clover; + +namespace { + void + read_header(const std::string &header, module &m) { + std::istringstream ls(header); + std::string line; + + while (getline(ls, line)) { + std::istringstream ts(line); + std::string name, tok; + module::size_t offset; + compat::vector<module::argument> args; + + if (!(ts >> name)) + continue; + + if (!(ts >> offset)) + throw build_error("invalid kernel start address"); + + while (ts >> tok) { + if (tok == "scalar") + args.push_back({ module::argument::scalar, 4 }); + else if (tok == "global") + args.push_back({ module::argument::global, 4 }); + else if (tok == "local") + args.push_back({ module::argument::local, 4 }); + else if (tok == "constant") + args.push_back({ module::argument::constant, 4 }); + else if (tok == "image2d_rd") + args.push_back({ module::argument::image2d_rd, 4 }); + else if (tok == "image2d_wr") + args.push_back({ module::argument::image2d_wr, 4 }); + else if (tok == "image3d_rd") + args.push_back({ module::argument::image3d_rd, 4 }); + else if (tok == "image3d_wr") + args.push_back({ module::argument::image3d_wr, 4 }); + else if (tok == "sampler") + args.push_back({ module::argument::sampler, 0 }); + else + throw build_error("invalid kernel argument"); + } + + m.syms.push_back({ name, 0, offset, args }); + } + } + + void + read_body(const char *source, module &m) { + tgsi_token prog[1024]; + + if (!tgsi_text_translate(source, prog, Elements(prog))) + throw build_error("translate failed"); + + unsigned sz = tgsi_num_tokens(prog) * sizeof(tgsi_token); + m.secs.push_back({ 0, module::section::text, sz, { (char *)prog, sz } }); + } +} + +module +clover::compile_program_tgsi(const compat::string &source, + const compat::string &target) { + const char *body = source.find("COMP\n"); + module m; + + read_header({ source.begin(), body }, m); + read_body(body, m); + + return m; +} diff --git a/src/gallium/state_trackers/d3d1x/gd3d1x/sm4_to_tgsi.cpp b/src/gallium/state_trackers/d3d1x/gd3d1x/sm4_to_tgsi.cpp index 392fd3e0921..aaa46f19e8c 100644 --- a/src/gallium/state_trackers/d3d1x/gd3d1x/sm4_to_tgsi.cpp +++ b/src/gallium/state_trackers/d3d1x/gd3d1x/sm4_to_tgsi.cpp @@ -446,7 +446,7 @@ struct sm4_to_tgsi_converter break; case SM4_OPCODE_RESINFO: // TODO: return type - ureg_RESINFO(ureg, _dst(), _src(1), resources[_idx(SM4_FILE_RESOURCE, 2)]); + ureg_SVIEWINFO(ureg, _dst(), _src(1), resources[_idx(SM4_FILE_RESOURCE, 2)]); break; // TODO: sample index, texture offset case SM4_OPCODE_LD: // dst, coord_int, res; mipmap level in last coord_int arg @@ -750,11 +750,12 @@ next:; } if(resources.size() <= (unsigned)idx) resources.resize(idx + 1); - resources[idx] = ureg_DECL_resource(ureg, idx, targets[idx].first, - res_return_type(dcl.rrt.x), - res_return_type(dcl.rrt.y), - res_return_type(dcl.rrt.z), - res_return_type(dcl.rrt.w)); + resources[idx] = ureg_DECL_sampler_view( + ureg, idx, targets[idx].first, + res_return_type(dcl.rrt.x), + res_return_type(dcl.rrt.y), + res_return_type(dcl.rrt.z), + res_return_type(dcl.rrt.w)); break; case SM4_OPCODE_DCL_SAMPLER: check(idx >= 0); diff --git a/src/gallium/state_trackers/egl/fbdev/native_fbdev.c b/src/gallium/state_trackers/egl/fbdev/native_fbdev.c index b45ab5c4f2e..b17a8ce78c9 100644 --- a/src/gallium/state_trackers/egl/fbdev/native_fbdev.c +++ b/src/gallium/state_trackers/egl/fbdev/native_fbdev.c @@ -41,6 +41,7 @@ * - no pixmap support */ +#include <errno.h> #include <sys/ioctl.h> #include <sys/types.h> #include <sys/stat.h> diff --git a/src/gallium/state_trackers/gbm/gbm_drm.c b/src/gallium/state_trackers/gbm/gbm_drm.c index d4baf87096a..a327fdaae64 100644 --- a/src/gallium/state_trackers/gbm/gbm_drm.c +++ b/src/gallium/state_trackers/gbm/gbm_drm.c @@ -190,10 +190,7 @@ gbm_gallium_drm_destroy(struct gbm_device *gbm) { struct gbm_gallium_drm_device *gdrm = gbm_gallium_drm_device(gbm); - gdrm->screen->destroy(gdrm->screen); - - FREE(gdrm->base.driver_name); - + gallium_screen_destroy(gdrm); FREE(gdrm); } diff --git a/src/gallium/state_trackers/gbm/gbm_gallium_drmint.h b/src/gallium/state_trackers/gbm/gbm_gallium_drmint.h index 6277b8dba2e..a5d6d834737 100644 --- a/src/gallium/state_trackers/gbm/gbm_gallium_drmint.h +++ b/src/gallium/state_trackers/gbm/gbm_gallium_drmint.h @@ -71,4 +71,7 @@ gbm_gallium_drm_device_create(int fd); int gallium_screen_create(struct gbm_gallium_drm_device *gdrm); +void +gallium_screen_destroy(struct gbm_gallium_drm_device *gdrm); + #endif diff --git a/src/gallium/targets/gbm/Makefile b/src/gallium/targets/gbm/Makefile index cd0c61080d8..423debf176d 100644 --- a/src/gallium/targets/gbm/Makefile +++ b/src/gallium/targets/gbm/Makefile @@ -3,192 +3,39 @@ TOP = ../../../.. include $(TOP)/configs/current -PIPE_PREFIX := pipe_ - GBM_BACKEND = gbm_gallium_drm -GBM_SOURCES = gbm.c pipe_loader.c +GBM_SOURCES = gbm.c GBM_INCLUDES = \ -I$(TOP)/include \ -I$(TOP)/src/gallium/state_trackers/gbm \ -I$(TOP)/src/gbm/main \ -I$(TOP)/src/gallium/auxiliary \ - -I$(TOP)/src/gallium/include \ + -I$(TOP)/src/gallium/winsys \ + -I$(TOP)/src/gallium/include GBM_LIBS = $(LIBUDEV_LIBS) $(LIBDRM_LIB) -lm \ $(TOP)/src/gallium/state_trackers/gbm/libgbm.a \ - $(TOP)/src/gallium/drivers/identity/libidentity.a \ - $(TOP)/src/gallium/drivers/galahad/libgalahad.a \ - $(TOP)/src/gallium/drivers/trace/libtrace.a \ - $(TOP)/src/gallium/drivers/rbug/librbug.a \ - $(GALLIUM_AUXILIARIES) - + $(GALLIUM_PIPE_LOADER_LIBS) $(GALLIUM_AUXILIARIES) GBM_CFLAGS = \ - -DGBM_BACKEND_SEARCH_DIR=\"$(INSTALL_LIB_DIR)/gbm\" \ - -DPIPE_PREFIX=\"$(PIPE_PREFIX)\" \ + -DPIPE_SEARCH_DIR=\"$(PIPE_INSTALL_DIR)\" \ + $(GALLIUM_PIPE_LOADER_DEFINES) \ $(LIBUDEV_CFLAGS) \ - $(LIBDRM_CFLAGS) - - -pipe_INCLUDES = \ - -I$(TOP)/include \ - -I$(TOP)/src/gallium/auxiliary \ - -I$(TOP)/src/gallium/drivers \ - -I$(TOP)/src/gallium/include \ - -I$(TOP)/src/gallium/winsys - -pipe_LIBS = \ - $(TOP)/src/gallium/drivers/identity/libidentity.a \ - $(TOP)/src/gallium/drivers/trace/libtrace.a \ - $(TOP)/src/gallium/drivers/rbug/librbug.a \ - $(GALLIUM_AUXILIARIES) - -# as if we are DRI modules -pipe_SYS = $(DRI_LIB_DEPS) - -pipe_CLFLAGS = \ - -DGALLIUM_RBUG -DGALLIUM_TRACE -DGALLIUM_GALAHAD \ - $(LIBDRM_CFLAGS) - -pipe_LDFLAGS = -Wl,--no-undefined - -# i915 pipe driver -i915_LIBS = \ - $(TOP)/src/gallium/winsys/i915/drm/libi915drm.a \ - $(TOP)/src/gallium/drivers/i915/libi915.a -i915_SYS = -ldrm_intel - -# nouveau pipe driver -nouveau_LIBS = \ - $(TOP)/src/gallium/winsys/nouveau/drm/libnouveaudrm.a \ - $(TOP)/src/gallium/drivers/nv30/libnv30.a \ - $(TOP)/src/gallium/drivers/nv50/libnv50.a \ - $(TOP)/src/gallium/drivers/nvc0/libnvc0.a \ - $(TOP)/src/gallium/drivers/nouveau/libnouveau.a -nouveau_SYS = -ldrm_nouveau - -# r300 pipe driver -r300_LIBS = \ - $(TOP)/src/gallium/winsys/radeon/drm/libradeonwinsys.a \ - $(TOP)/src/gallium/drivers/r300/libr300.a -r300_SYS += -ldrm_radeon - -# r600 pipe driver -r600_LIBS = \ - $(TOP)/src/gallium/winsys/radeon/drm/libradeonwinsys.a \ - $(TOP)/src/gallium/drivers/r600/libr600.a -r600_SYS += -ldrm_radeon - -# radeonsi pipe driver -radeonsi_LIBS = \ - $(TOP)/src/gallium/winsys/radeon/drm/libradeonwinsys.a \ - $(TOP)/src/gallium/drivers/radeonsi/libradeonsi.a -radeonsi_SYS += -ldrm_radeon - -# vmwgfx pipe driver -vmwgfx_LIBS = \ - $(TOP)/src/gallium/winsys/svga/drm/libsvgadrm.a \ - $(TOP)/src/gallium/drivers/svga/libsvga.a - - + $(LIBDRM_CFLAGS) -# LLVM -ifeq ($(MESA_LLVM),1) -pipe_SYS += $(LLVM_LIBS) -pipe_LDFLAGS += $(LLVM_LDFLAGS) -endif - -ifneq ($(findstring llvmpipe,$(GALLIUM_DRIVERS_DIRS)),) -pipe_LIBS += $(TOP)/src/gallium/drivers/llvmpipe/libllvmpipe.a -endif - -# determine the targets/sources -_pipe_TARGETS_CC = -_pipe_TARGETS_CXX = -pipe_SOURCES = - -ifneq ($(findstring i915/drm,$(GALLIUM_WINSYS_DIRS)),) -_pipe_TARGETS_CC += $(PIPE_PREFIX)i915.so -pipe_SOURCES += pipe_i915.c -endif - -ifneq ($(findstring nouveau/drm,$(GALLIUM_WINSYS_DIRS)),) -_pipe_TARGETS_CXX += $(PIPE_PREFIX)nouveau.so -pipe_SOURCES += pipe_nouveau.c -endif - -ifneq ($(findstring radeon/drm,$(GALLIUM_WINSYS_DIRS)),) -ifneq ($(findstring r300,$(GALLIUM_DRIVERS_DIRS)),) -_pipe_TARGETS_CC += $(PIPE_PREFIX)r300.so -pipe_SOURCES += pipe_r300.c -endif -endif - -ifneq ($(findstring radeon/drm,$(GALLIUM_WINSYS_DIRS)),) -ifneq ($(findstring r600,$(GALLIUM_DRIVERS_DIRS)),) -_pipe_TARGETS_CC += $(PIPE_PREFIX)r600.so -pipe_SOURCES += pipe_r600.c -endif -endif - -ifneq ($(findstring radeon/drm,$(GALLIUM_WINSYS_DIRS)),) -ifneq ($(findstring radeonsi,$(GALLIUM_DRIVERS_DIRS)),) -_pipe_TARGETS_CC += $(PIPE_PREFIX)radeonsi.so -pipe_SOURCES += pipe_radeonsi.c -endif -endif - -ifneq ($(findstring svga/drm,$(GALLIUM_WINSYS_DIRS)),) -_pipe_TARGETS_CC += $(PIPE_PREFIX)vmwgfx.so -pipe_SOURCES += pipe_vmwgfx.c -endif - -pipe_OBJECTS = $(pipe_SOURCES:.c=.o) - -ifeq ($(MESA_LLVM),1) -pipe_TARGETS_CXX = $(_pipe_TARGETS_CXX) $(_pipe_TARGETS_CC) -pipe_TARGETS_CC = -else -pipe_TARGETS_CXX = $(_pipe_TARGETS_CXX) -pipe_TARGETS_CC = $(_pipe_TARGETS_CC) -endif - -GBM_EXTRA_TARGETS = $(addprefix $(TOP)/$(LIB_DIR)/gbm/, $(pipe_TARGETS_CC)) $(addprefix $(TOP)/$(LIB_DIR)/gbm/, $(pipe_TARGETS_CXX)) +GBM_EXTRA_TARGETS = pipes GBM_EXTRA_INSTALL = install-pipes GBM_EXTRA_CLEAN = clean-pipes -GBM_EXTRA_SOURCES = $(pipe_SOURCES) include $(TOP)/src/gbm/backends/Makefile.template +PIPE_SRC_DIR = $(TOP)/src/gallium/targets/pipe-loader +PIPE_INSTALL_DIR = $(INSTALL_LIB_DIR)/gbm -$(GBM_EXTRA_TARGETS): $(TOP)/$(LIB_DIR)/gbm/%: % - @$(INSTALL) -d $(dir $@) - $(INSTALL) $< $(dir $@) - -$(pipe_TARGETS_CC): $(PIPE_PREFIX)%.so: pipe_%.o $(pipe_LIBS) $($*_LIBS) - $(MKLIB) -o $@ -noprefix -linker '$(CC)' \ - -ldflags '-L$(TOP)/$(LIB_DIR) $(pipe_LDFLAGS) $(LDFLAGS)' \ - $(MKLIB_OPTIONS) $< \ - -Wl,--start-group $(pipe_LIBS) $($*_LIBS) -Wl,--end-group \ - $(pipe_SYS) $($*_SYS) - -$(pipe_TARGETS_CXX): $(PIPE_PREFIX)%.so: pipe_%.o $(pipe_LIBS) $($*_LIBS) - $(MKLIB) -o $@ -noprefix -linker '$(CXX)' \ - -ldflags '-L$(TOP)/$(LIB_DIR) $(pipe_LDFLAGS) $(LDFLAGS)' \ - $(MKLIB_OPTIONS) $< \ - -Wl,--start-group $(pipe_LIBS) $($*_LIBS) -Wl,--end-group \ - $(pipe_SYS) $($*_SYS) - -$(pipe_OBJECTS): %.o: %.c - $(CC) -c -o $@ $< $(pipe_INCLUDES) $(pipe_CFLAGS) $(CFLAGS) - -install-pipes: $(GBM_EXTRA_TARGETS) - $(INSTALL) -d $(DESTDIR)$(INSTALL_LIB_DIR)/gbm - for tgt in $(GBM_EXTRA_TARGETS); do \ - $(MINSTALL) "$$tgt" $(DESTDIR)$(INSTALL_LIB_DIR)/gbm; \ - done - +pipes: + @$(MAKE) -C $(PIPE_SRC_DIR) +install-pipes: + @$(MAKE) -C $(PIPE_SRC_DIR) PIPE_INSTALL_DIR=$(PIPE_INSTALL_DIR) install clean-pipes: - rm -f $(pipe_TARGETS) - rm -f $(pipe_OBJECTS) + @$(MAKE) -C $(PIPE_SRC_DIR) clean diff --git a/src/gallium/targets/gbm/gbm.c b/src/gallium/targets/gbm/gbm.c index e840fc5fa1a..7d2af513db8 100644 --- a/src/gallium/targets/gbm/gbm.c +++ b/src/gallium/targets/gbm/gbm.c @@ -25,36 +25,56 @@ * Benjamin Franzke <[email protected]> */ -#include "util/u_inlines.h" - #include "gbm_gallium_drmint.h" -#include "pipe_loader.h" -static struct pipe_screen * -create_drm_screen(const char *name, int fd) +#include "util/u_memory.h" +#include "util/u_inlines.h" +#include "pipe-loader/pipe_loader.h" + +static const char * +get_library_search_path(void) { - struct pipe_module *pmod = get_pipe_module(name); - - return (pmod && pmod->drmdd && pmod->drmdd->create_screen) ? - pmod->drmdd->create_screen(fd) : NULL; + const char *search_path = NULL; + + /* don't allow setuid apps to use GBM_BACKENDS_PATH */ + if (geteuid() == getuid()) + search_path = getenv("GBM_BACKENDS_PATH"); + if (search_path == NULL) + search_path = PIPE_SEARCH_DIR; + + return search_path; } int gallium_screen_create(struct gbm_gallium_drm_device *gdrm) { - gdrm->base.driver_name = drm_fd_get_screen_name(gdrm->base.base.fd); - if (gdrm->base.driver_name == NULL) + struct pipe_loader_device *dev; + int ret; + + ret = pipe_loader_drm_probe_fd(&dev, gdrm->base.base.fd); + if (!ret) return -1; - gdrm->screen = create_drm_screen(gdrm->base.driver_name, gdrm->base.base.fd); + gdrm->screen = pipe_loader_create_screen(dev, get_library_search_path()); if (gdrm->screen == NULL) { debug_printf("failed to load driver: %s\n", gdrm->base.driver_name); + pipe_loader_release(&dev, 1); return -1; }; + gdrm->driver = dev; + gdrm->base.driver_name = strdup(dev->driver_name); return 0; } +void +gallium_screen_destroy(struct gbm_gallium_drm_device *gdrm) +{ + FREE(gdrm->base.driver_name); + gdrm->screen->destroy(gdrm->screen); + pipe_loader_release((struct pipe_loader_device **)&gdrm->driver, 1); +} + GBM_EXPORT struct gbm_backend gbm_backend = { .backend_name = "gallium_drm", .create_device = gbm_gallium_drm_device_create, diff --git a/src/gallium/targets/gbm/pipe_loader.c b/src/gallium/targets/gbm/pipe_loader.c deleted file mode 100644 index 6200541dbf0..00000000000 --- a/src/gallium/targets/gbm/pipe_loader.c +++ /dev/null @@ -1,192 +0,0 @@ -/* - * Copyright © 2011 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT - * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, - * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - * - * Authors: - * Kristian Høgsberg <[email protected]> - * Benjamin Franzke <[email protected]> - */ - -#include <stdio.h> -#include "util/u_string.h" -#include "util/u_memory.h" - -#include <libudev.h> - -#include "gbm_gallium_drmint.h" -#include "pipe_loader.h" -#define DRIVER_MAP_GALLIUM_ONLY -#include "pci_ids/pci_id_driver_map.h" - -static struct pipe_module pipe_modules[16]; - -static INLINE char * -loader_strdup(const char *str) -{ - return mem_dup(str, strlen(str) + 1); -} - -char * -drm_fd_get_screen_name(int fd) -{ - struct udev *udev; - struct udev_device *device, *parent; - const char *pci_id; - char *driver = NULL; - int vendor_id, chip_id, i, j; - - udev = udev_new(); - device = _gbm_udev_device_new_from_fd(udev, fd); - if (device == NULL) - return NULL; - - parent = udev_device_get_parent(device); - if (parent == NULL) { - fprintf(stderr, "gbm: could not get parent device"); - goto out; - } - - pci_id = udev_device_get_property_value(parent, "PCI_ID"); - if (pci_id == NULL || - sscanf(pci_id, "%x:%x", &vendor_id, &chip_id) != 2) { - fprintf(stderr, "gbm: malformed or no PCI ID"); - goto out; - } - - for (i = 0; driver_map[i].driver; i++) { - if (vendor_id != driver_map[i].vendor_id) - continue; - if (driver_map[i].num_chips_ids == -1) { - driver = loader_strdup(driver_map[i].driver); - _gbm_log("pci id for %d: %04x:%04x, driver %s", - fd, vendor_id, chip_id, driver); - goto out; - } - - for (j = 0; j < driver_map[i].num_chips_ids; j++) - if (driver_map[i].chip_ids[j] == chip_id) { - driver = loader_strdup(driver_map[i].driver); - _gbm_log("pci id for %d: %04x:%04x, driver %s", - fd, vendor_id, chip_id, driver); - goto out; - } - } - -out: - udev_device_unref(device); - udev_unref(udev); - - return driver; -} - -static void -find_pipe_module(struct pipe_module *pmod, const char *name) -{ - char *search_paths, *end, *next, *p; - char path[PATH_MAX]; - int ret; - - search_paths = NULL; - if (geteuid() == getuid()) { - /* don't allow setuid apps to use GBM_BACKENDS_PATH */ - search_paths = getenv("GBM_BACKENDS_PATH"); - } - if (search_paths == NULL) - search_paths = GBM_BACKEND_SEARCH_DIR; - - end = search_paths + strlen(search_paths); - for (p = search_paths; p < end && pmod->lib == NULL; p = next + 1) { - int len; - next = strchr(p, ':'); - if (next == NULL) - next = end; - - len = next - p; - - if (len) { - ret = util_snprintf(path, sizeof(path), - "%.*s/" PIPE_PREFIX "%s" UTIL_DL_EXT, len, p, pmod->name); - } - else { - ret = util_snprintf(path, sizeof(path), - PIPE_PREFIX "%s" UTIL_DL_EXT, pmod->name); - } - if (ret > 0 && ret < sizeof(path)) { - pmod->lib = util_dl_open(path); - debug_printf("loaded %s\n", path); - } - - } -} - -static boolean -load_pipe_module(struct pipe_module *pmod, const char *name) -{ - pmod->name = loader_strdup(name); - if (!pmod->name) - return FALSE; - - find_pipe_module(pmod, name); - - if (pmod->lib) { - pmod->drmdd = (const struct drm_driver_descriptor *) - util_dl_get_proc_address(pmod->lib, "driver_descriptor"); - - /* sanity check on the name */ - if (pmod->drmdd && strcmp(pmod->drmdd->name, pmod->name) != 0) - pmod->drmdd = NULL; - - if (!pmod->drmdd) { - util_dl_close(pmod->lib); - pmod->lib = NULL; - } - } - - return (pmod->drmdd != NULL); -} - -struct pipe_module * -get_pipe_module(const char *name) -{ - struct pipe_module *pmod = NULL; - int i; - - if (!name) - return NULL; - - for (i = 0; i < Elements(pipe_modules); i++) { - if (!pipe_modules[i].initialized || - strcmp(pipe_modules[i].name, name) == 0) { - pmod = &pipe_modules[i]; - break; - } - } - if (!pmod) - return NULL; - - if (!pmod->initialized) { - load_pipe_module(pmod, name); - pmod->initialized = TRUE; - } - - return pmod; -} diff --git a/src/gallium/targets/gbm/pipe_loader.h b/src/gallium/targets/gbm/pipe_loader.h deleted file mode 100644 index 2e4cd9906b7..00000000000 --- a/src/gallium/targets/gbm/pipe_loader.h +++ /dev/null @@ -1,48 +0,0 @@ -/* - * Copyright © 2011 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT - * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, - * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - * - * Authors: - * Benjamin Franzke <[email protected]> - */ - -#ifndef _PIPE_LOADER_H_ -#define _PIPE_LOADER_H_ - -#include "pipe/p_compiler.h" -#include "util/u_dl.h" -#include "state_tracker/drm_driver.h" - -struct pipe_module { - boolean initialized; - char *name; - struct util_dl_library *lib; - const struct drm_driver_descriptor *drmdd; -}; - -struct pipe_module * -get_pipe_module(const char *name); - -char * -drm_fd_get_screen_name(int fd); - -#endif diff --git a/src/gallium/targets/opencl/Makefile.am b/src/gallium/targets/opencl/Makefile.am new file mode 100644 index 00000000000..0d233c11b8d --- /dev/null +++ b/src/gallium/targets/opencl/Makefile.am @@ -0,0 +1,36 @@ +AUTOMAKE_OPTIONS = subdir-objects + +lib_LTLIBRARIES = libOpenCL.la + +libOpenCL_la_LDFLAGS = \ + -version-number 1:0 + +libOpenCL_la_LIBADD = \ + $(top_builddir)/src/gallium/state_trackers/clover/libclover.la \ + $(top_builddir)/src/gallium/auxiliary/libgallium.a \ + $(GALLIUM_PIPE_LOADER_LIBS) $(LIBUDEV_LIBS) \ + -ldl + +libOpenCL_la_SOURCES = + +# Force usage of a C++ linker +nodist_EXTRA_libOpenCL_la_SOURCES = dummy.cpp + +PIPE_SRC_DIR = $(top_srcdir)/src/gallium/targets/pipe-loader + +# Provide compatibility with scripts for the old Mesa build system for +# a while by putting a link to the driver into /lib of the build tree. +all-local: libOpenCL.la + @$(MAKE) -C $(PIPE_SRC_DIR) + $(MKDIR_P) $(top_builddir)/$(LIB_DIR) + ln -f .libs/libOpenCL.so* $(top_builddir)/$(LIB_DIR)/ + +install-exec-local: + @$(MAKE) -C $(PIPE_SRC_DIR) PIPE_INSTALL_DIR=$(OPENCL_LIB_INSTALL_DIR) install + +clean-local: + @$(MAKE) -C $(PIPE_SRC_DIR) clean + +# FIXME: Remove when the rest of Gallium is converted to automake. +TOP=$(top_builddir) +default: all diff --git a/src/gallium/targets/pipe-loader/Makefile b/src/gallium/targets/pipe-loader/Makefile new file mode 100644 index 00000000000..eb3b4fc8aa3 --- /dev/null +++ b/src/gallium/targets/pipe-loader/Makefile @@ -0,0 +1,165 @@ +# Makefile for building pipe driver shared libraries. +# +# Input variables: PIPE_INSTALL_DIR, PIPE_PREFIX (optional) +# +TOP = ../../../.. +include $(TOP)/configs/current + +PIPE_PREFIX ?= pipe_ + +PIPE_CPPFLAGS = \ + -DGALLIUM_RBUG \ + -DGALLIUM_TRACE \ + -DGALLIUM_GALAHAD \ + -I$(TOP)/include \ + -I$(TOP)/src/gallium/auxiliary \ + -I$(TOP)/src/gallium/drivers \ + -I$(TOP)/src/gallium/include \ + -I$(TOP)/src/gallium/winsys + +PIPE_LIBS = \ + $(TOP)/src/gallium/drivers/identity/libidentity.a \ + $(TOP)/src/gallium/drivers/galahad/libgalahad.a \ + $(TOP)/src/gallium/drivers/trace/libtrace.a \ + $(TOP)/src/gallium/drivers/rbug/librbug.a \ + $(GALLIUM_AUXILIARIES) + +PIPE_SYS = $(LIBDRM_LIB) -lm -lpthread $(DLOPEN_LIBS) + +PIPE_CFLAGS = $(LIBDRM_CFLAGS) + +PIPE_LDFLAGS = -Wl,--no-undefined + +# i915 pipe driver +i915_LIBS = \ + $(TOP)/src/gallium/winsys/i915/drm/libi915drm.a \ + $(TOP)/src/gallium/drivers/i915/libi915.a +i915_SYS = -ldrm_intel + +# nouveau pipe driver +nouveau_LIBS = \ + $(TOP)/src/gallium/winsys/nouveau/drm/libnouveaudrm.a \ + $(TOP)/src/gallium/drivers/nv30/libnv30.a \ + $(TOP)/src/gallium/drivers/nv50/libnv50.a \ + $(TOP)/src/gallium/drivers/nvc0/libnvc0.a \ + $(TOP)/src/gallium/drivers/nouveau/libnouveau.a +nouveau_SYS = -ldrm_nouveau + +# r300 pipe driver +r300_LIBS = \ + $(TOP)/src/gallium/winsys/radeon/drm/libradeonwinsys.a \ + $(TOP)/src/gallium/drivers/r300/libr300.a +r300_SYS += -ldrm_radeon + +# r600 pipe driver +r600_LIBS = \ + $(TOP)/src/gallium/winsys/radeon/drm/libradeonwinsys.a \ + $(TOP)/src/gallium/drivers/r600/libr600.a +r600_SYS += -ldrm_radeon + +# vmwgfx pipe driver +vmwgfx_LIBS = \ + $(TOP)/src/gallium/winsys/svga/drm/libsvgadrm.a \ + $(TOP)/src/gallium/drivers/svga/libsvga.a + +ifneq ($(findstring llvmpipe,$(GALLIUM_DRIVERS_DIRS)),) + swrast_LIBS = $(TOP)/src/gallium/drivers/llvmpipe/libllvmpipe.a + PIPE_CFLAGS += -DGALLIUM_LLVMPIPE +else ifneq ($(findstring softpipe,$(GALLIUM_DRIVERS_DIRS)),) + swrast_LIBS = $(TOP)/src/gallium/drivers/softpipe/libsoftpipe.a + PIPE_CFLAGS += -DGALLIUM_SOFTPIPE +endif + +# LLVM +ifeq ($(MESA_LLVM),1) + PIPE_SYS += $(LLVM_LIBS) + PIPE_LDFLAGS += $(LLVM_LDFLAGS) +endif + +# determine the targets/sources +_PIPE_TARGETS_CC = +_PIPE_TARGETS_CXX = +PIPE_SOURCES = + +ifneq ($(findstring i915/drm,$(GALLIUM_WINSYS_DIRS)),) + _PIPE_TARGETS_CC += $(PIPE_PREFIX)i915.so + PIPE_SOURCES += pipe_i915.c +endif + +ifneq ($(findstring nouveau/drm,$(GALLIUM_WINSYS_DIRS)),) + _PIPE_TARGETS_CXX += $(PIPE_PREFIX)nouveau.so + PIPE_SOURCES += pipe_nouveau.c +endif + +ifneq ($(findstring radeon/drm,$(GALLIUM_WINSYS_DIRS)),) +ifneq ($(findstring r300,$(GALLIUM_DRIVERS_DIRS)),) + _PIPE_TARGETS_CC += $(PIPE_PREFIX)r300.so + PIPE_SOURCES += pipe_r300.c +endif +endif + +ifneq ($(findstring radeon/drm,$(GALLIUM_WINSYS_DIRS)),) +ifneq ($(findstring r600,$(GALLIUM_DRIVERS_DIRS)),) + _PIPE_TARGETS_CC += $(PIPE_PREFIX)r600.so + PIPE_SOURCES += pipe_r600.c +endif +endif + +ifneq ($(findstring svga/drm,$(GALLIUM_WINSYS_DIRS)),) + _PIPE_TARGETS_CC += $(PIPE_PREFIX)vmwgfx.so + PIPE_SOURCES += pipe_vmwgfx.c +endif + +ifneq ($(filter llvmpipe softpipe,$(GALLIUM_DRIVERS_DIRS)),) + _PIPE_TARGETS_CC += $(PIPE_PREFIX)swrast.so + PIPE_SOURCES += pipe_swrast.c +endif + +PIPE_OBJECTS := $(PIPE_SOURCES:.c=.o) + +ifeq ($(MESA_LLVM),1) + PIPE_TARGETS_CXX = $(_PIPE_TARGETS_CXX) $(_PIPE_TARGETS_CC) + PIPE_TARGETS_CC = +else + PIPE_TARGETS_CXX = $(_PIPE_TARGETS_CXX) + PIPE_TARGETS_CC = $(_PIPE_TARGETS_CC) +endif + +PIPE_TARGETS = $(PIPE_TARGETS_CC) $(PIPE_TARGETS_CXX) + +default: depend $(PIPE_TARGETS) + +.SECONDEXPANSION: + +$(PIPE_TARGETS_CC): $(PIPE_PREFIX)%.so: pipe_%.o $(PIPE_LIBS) $$(%_LIBS) + $(MKLIB) -o $@ -noprefix -linker '$(CC)' \ + -ldflags '-L$(TOP)/$(LIB_DIR) $(PIPE_LDFLAGS) $(LDFLAGS)' \ + $(MKLIB_OPTIONS) $< \ + -Wl,--start-group $(PIPE_LIBS) $($*_LIBS) -Wl,--end-group \ + $(PIPE_SYS) $($*_SYS) + +$(PIPE_TARGETS_CXX): $(PIPE_PREFIX)%.so: pipe_%.o $(PIPE_LIBS) $$(%_LIBS) + $(MKLIB) -o $@ -noprefix -linker '$(CXX)' \ + -ldflags '-L$(TOP)/$(LIB_DIR) $(PIPE_LDFLAGS) $(LDFLAGS)' \ + $(MKLIB_OPTIONS) $< \ + -Wl,--start-group $(PIPE_LIBS) $($*_LIBS) -Wl,--end-group \ + $(PIPE_SYS) $($*_SYS) + +$(PIPE_OBJECTS): %.o: %.c + $(CC) -c -o $@ $< $(PIPE_CPPFLAGS) $(PIPE_CFLAGS) $(CFLAGS) + +install: $(PIPE_TARGETS) + $(INSTALL) -d $(DESTDIR)/$(PIPE_INSTALL_DIR) + for tgt in $(PIPE_TARGETS); do \ + $(MINSTALL) "$$tgt" $(DESTDIR)/$(PIPE_INSTALL_DIR); \ + done + +clean: + rm -f $(PIPE_TARGETS) $(PIPE_OBJECTS) depend depend.bak + +depend: $(PIPE_SOURCES) + rm -f depend + touch depend + $(MKDEP) $(MKDEP_OPTIONS) $(PIPE_CPPFLAGS) $(PIPE_SOURCES) 2>/dev/null + +sinclude depend diff --git a/src/gallium/targets/gbm/pipe_i915.c b/src/gallium/targets/pipe-loader/pipe_i915.c index 85662cb85b5..85662cb85b5 100644 --- a/src/gallium/targets/gbm/pipe_i915.c +++ b/src/gallium/targets/pipe-loader/pipe_i915.c diff --git a/src/gallium/targets/gbm/pipe_nouveau.c b/src/gallium/targets/pipe-loader/pipe_nouveau.c index 65425e8d456..65425e8d456 100644 --- a/src/gallium/targets/gbm/pipe_nouveau.c +++ b/src/gallium/targets/pipe-loader/pipe_nouveau.c diff --git a/src/gallium/targets/gbm/pipe_r300.c b/src/gallium/targets/pipe-loader/pipe_r300.c index 055685996e6..055685996e6 100644 --- a/src/gallium/targets/gbm/pipe_r300.c +++ b/src/gallium/targets/pipe-loader/pipe_r300.c diff --git a/src/gallium/targets/gbm/pipe_r600.c b/src/gallium/targets/pipe-loader/pipe_r600.c index 5d89aca6ec3..5d89aca6ec3 100644 --- a/src/gallium/targets/gbm/pipe_r600.c +++ b/src/gallium/targets/pipe-loader/pipe_r600.c diff --git a/src/gallium/targets/gbm/pipe_swrast.c b/src/gallium/targets/pipe-loader/pipe_swrast.c index 092abf07a52..092abf07a52 100644 --- a/src/gallium/targets/gbm/pipe_swrast.c +++ b/src/gallium/targets/pipe-loader/pipe_swrast.c diff --git a/src/gallium/targets/gbm/pipe_vmwgfx.c b/src/gallium/targets/pipe-loader/pipe_vmwgfx.c index bfe665be6eb..bfe665be6eb 100644 --- a/src/gallium/targets/gbm/pipe_vmwgfx.c +++ b/src/gallium/targets/pipe-loader/pipe_vmwgfx.c diff --git a/src/gallium/tests/trivial/Makefile b/src/gallium/tests/trivial/Makefile index 4ddbb0b73dc..8c032016538 100644 --- a/src/gallium/tests/trivial/Makefile +++ b/src/gallium/tests/trivial/Makefile @@ -11,39 +11,39 @@ INCLUDES = \ -I$(TOP)/src/gallium/winsys \ $(PROG_INCLUDES) -ifeq ($(MESA_LLVM),1) -LINKS = $(TOP)/src/gallium/drivers/llvmpipe/libllvmpipe.a -LDFLAGS += $(LLVM_LDFLAGS) -endif - LINKS += \ - $(TOP)/src/gallium/drivers/rbug/librbug.a \ - $(TOP)/src/gallium/drivers/trace/libtrace.a \ - $(TOP)/src/gallium/drivers/galahad/libgalahad.a \ - $(TOP)/src/gallium/winsys/sw/null/libws_null.a \ - $(TOP)/src/gallium/drivers/softpipe/libsoftpipe.a \ + $(GALLIUM_PIPE_LOADER_LIBS) \ $(GALLIUM_AUXILIARIES) \ - $(PROG_LINKS) + $(PROG_LINKS) $(LIBUDEV_LIBS) SOURCES = \ tri.c \ - quad-tex.c + quad-tex.c \ + compute.c OBJECTS = $(SOURCES:.c=.o) PROGS = $(OBJECTS:.o=) -PROG_DEFINES = \ - -DGALLIUM_SOFTPIPE -DGALLIUM_RBUG -DGALLIUM_TRACE -DGALLIUM_GALAHAD +PROG_DEFINES = -DPIPE_SEARCH_DIR=\"$(PIPE_SRC_DIR)\" \ + $(GALLIUM_PIPE_LOADER_DEFINES) + +PIPE_SRC_DIR = $(TOP)/src/gallium/targets/pipe-loader ##### TARGETS ##### -default: $(PROGS) +default: $(PROGS) pipes + +install: clean: -rm -f $(PROGS) -rm -f *.o -rm -f result.bmp + @$(MAKE) -C $(PIPE_SRC_DIR) clean + +pipes: + @$(MAKE) -C $(PIPE_SRC_DIR) ##### RULES ##### diff --git a/src/gallium/tests/trivial/compute.c b/src/gallium/tests/trivial/compute.c new file mode 100644 index 00000000000..1812090d3a0 --- /dev/null +++ b/src/gallium/tests/trivial/compute.c @@ -0,0 +1,1592 @@ +/* + * Copyright (C) 2011 Francisco Jerez. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include <fcntl.h> +#include <stdio.h> +#include <sys/stat.h> +#include <inttypes.h> +#include "pipe/p_state.h" +#include "pipe/p_context.h" +#include "pipe/p_screen.h" +#include "pipe/p_defines.h" +#include "pipe/p_shader_tokens.h" +#include "util/u_memory.h" +#include "util/u_inlines.h" +#include "util/u_sampler.h" +#include "util/u_format.h" +#include "tgsi/tgsi_text.h" +#include "pipe-loader/pipe_loader.h" + +#define MAX_RESOURCES 4 + +struct context { + struct pipe_loader_device *dev; + struct pipe_screen *screen; + struct pipe_context *pipe; + void *hwcs; + void *hwsmp[MAX_RESOURCES]; + struct pipe_resource *tex[MAX_RESOURCES]; + bool tex_rw[MAX_RESOURCES]; + struct pipe_sampler_view *view[MAX_RESOURCES]; + struct pipe_surface *surf[MAX_RESOURCES]; +}; + +#define DUMP_COMPUTE_PARAM(p, c) do { \ + uint64_t __v[4]; \ + int __i, __n; \ + \ + __n = ctx->screen->get_compute_param(ctx->screen, c, __v); \ + printf("%s: {", #c); \ + \ + for (__i = 0; __i < __n / sizeof(*__v); ++__i) \ + printf(" %"PRIu64, __v[__i]); \ + \ + printf(" }\n"); \ + } while (0) + +static void init_ctx(struct context *ctx) +{ + int ret; + + ret = pipe_loader_probe(&ctx->dev, 1); + assert(ret); + + ctx->screen = pipe_loader_create_screen(ctx->dev, PIPE_SEARCH_DIR); + assert(ctx->screen); + + ctx->pipe = ctx->screen->context_create(ctx->screen, NULL); + assert(ctx->pipe); + + DUMP_COMPUTE_PARAM(p, PIPE_COMPUTE_CAP_GRID_DIMENSION); + DUMP_COMPUTE_PARAM(p, PIPE_COMPUTE_CAP_MAX_GRID_SIZE); + DUMP_COMPUTE_PARAM(p, PIPE_COMPUTE_CAP_MAX_BLOCK_SIZE); +} + +static void destroy_ctx(struct context *ctx) +{ + ctx->pipe->destroy(ctx->pipe); + ctx->screen->destroy(ctx->screen); + pipe_loader_release(&ctx->dev, 1); + FREE(ctx); +} + +static char * +preprocess_prog(struct context *ctx, const char *src, const char *defs) +{ + const char header[] = + "#define RGLOBAL RES[32767]\n" + "#define RLOCAL RES[32766]\n" + "#define RPRIVATE RES[32765]\n" + "#define RINPUT RES[32764]\n"; + char cmd[512]; + char tmp[] = "/tmp/test-compute.tgsi-XXXXXX"; + char *buf; + int fd, ret; + struct stat st; + FILE *p; + + /* Open a temporary file */ + fd = mkstemp(tmp); + assert(fd >= 0); + snprintf(cmd, sizeof(cmd), "cpp -P -nostdinc -undef %s > %s", + defs ? defs : "", tmp); + + /* Preprocess */ + p = popen(cmd, "w"); + fwrite(header, strlen(header), 1, p); + fwrite(src, strlen(src), 1, p); + ret = pclose(p); + assert(!ret); + + /* Read back */ + ret = fstat(fd, &st); + assert(!ret); + + buf = malloc(st.st_size + 1); + ret = read(fd, buf, st.st_size); + assert(ret == st.st_size); + buf[ret] = 0; + + /* Clean up */ + close(fd); + unlink(tmp); + + return buf; +} + +static void init_prog(struct context *ctx, unsigned local_sz, + unsigned private_sz, unsigned input_sz, + const char *src, const char *defs) +{ + struct pipe_context *pipe = ctx->pipe; + struct tgsi_token prog[1024]; + struct pipe_compute_state cs = { + .prog = prog, + .req_local_mem = local_sz, + .req_private_mem = private_sz, + .req_input_mem = input_sz + }; + char *psrc = preprocess_prog(ctx, src, defs); + int ret; + + ret = tgsi_text_translate(psrc, prog, Elements(prog)); + assert(ret); + free(psrc); + + ctx->hwcs = pipe->create_compute_state(pipe, &cs); + assert(ctx->hwcs); + + pipe->bind_compute_state(pipe, ctx->hwcs); +} + +static void destroy_prog(struct context *ctx) +{ + struct pipe_context *pipe = ctx->pipe; + + pipe->delete_compute_state(pipe, ctx->hwcs); + ctx->hwcs = NULL; +} + +static void init_tex(struct context *ctx, int slot, + enum pipe_texture_target target, bool rw, + enum pipe_format format, int w, int h, + void (*init)(void *, int, int, int)) +{ + struct pipe_context *pipe = ctx->pipe; + struct pipe_resource **tex = &ctx->tex[slot]; + struct pipe_resource ttex = { + .target = target, + .format = format, + .width0 = w, + .height0 = h, + .depth0 = 1, + .array_size = 1, + .bind = (PIPE_BIND_SAMPLER_VIEW | + PIPE_BIND_COMPUTE_RESOURCE | + PIPE_BIND_GLOBAL) + }; + int dx = util_format_get_blocksize(format); + int dy = util_format_get_stride(format, w); + int nx = (target == PIPE_BUFFER ? (w / dx) : + util_format_get_nblocksx(format, w)); + int ny = (target == PIPE_BUFFER ? 1 : + util_format_get_nblocksy(format, h)); + struct pipe_transfer *xfer; + char *map; + int x, y; + + *tex = ctx->screen->resource_create(ctx->screen, &ttex); + assert(*tex); + + xfer = pipe->get_transfer(pipe, *tex, 0, PIPE_TRANSFER_WRITE, + &(struct pipe_box) { .width = w, + .height = h, + .depth = 1 }); + assert(xfer); + + map = pipe->transfer_map(pipe, xfer); + assert(map); + + for (y = 0; y < ny; ++y) { + for (x = 0; x < nx; ++x) { + init(map + y * dy + x * dx, slot, x, y); + } + } + + pipe->transfer_unmap(pipe, xfer); + pipe->transfer_destroy(pipe, xfer); + + ctx->tex_rw[slot] = rw; +} + +static bool default_check(void *x, void *y, int sz) { + return !memcmp(x, y, sz); +} + +static void check_tex(struct context *ctx, int slot, + void (*expect)(void *, int, int, int), + bool (*check)(void *, void *, int)) +{ + struct pipe_context *pipe = ctx->pipe; + struct pipe_resource *tex = ctx->tex[slot]; + int dx = util_format_get_blocksize(tex->format); + int dy = util_format_get_stride(tex->format, tex->width0); + int nx = (tex->target == PIPE_BUFFER ? (tex->width0 / dx) : + util_format_get_nblocksx(tex->format, tex->width0)); + int ny = (tex->target == PIPE_BUFFER ? 1 : + util_format_get_nblocksy(tex->format, tex->height0)); + struct pipe_transfer *xfer; + char *map; + int x, y, i; + int err = 0; + + if (!check) + check = default_check; + + xfer = pipe->get_transfer(pipe, tex, 0, PIPE_TRANSFER_READ, + &(struct pipe_box) { .width = tex->width0, + .height = tex->height0, + .depth = 1 }); + assert(xfer); + + map = pipe->transfer_map(pipe, xfer); + assert(map); + + for (y = 0; y < ny; ++y) { + for (x = 0; x < nx; ++x) { + uint32_t exp[4]; + uint32_t *res = (uint32_t *)(map + y * dy + x * dx); + + expect(exp, slot, x, y); + if (check(res, exp, dx) || (++err) > 20) + continue; + + if (dx < 4) { + uint32_t u = 0, v = 0; + + for (i = 0; i < dx; i++) { + u |= ((uint8_t *)exp)[i] << (8 * i); + v |= ((uint8_t *)res)[i] << (8 * i); + } + printf("(%d, %d): got 0x%x, expected 0x%x\n", + x, y, v, u); + } else { + for (i = 0; i < dx / 4; i++) { + printf("(%d, %d)[%d]: got 0x%x/%f," + " expected 0x%x/%f\n", x, y, i, + res[i], ((float *)res)[i], + exp[i], ((float *)exp)[i]); + } + } + } + } + + pipe->transfer_unmap(pipe, xfer); + pipe->transfer_destroy(pipe, xfer); + + if (err) + printf("(%d, %d): \x1b[31mFAIL\x1b[0m (%d)\n", x, y, err); + else + printf("(%d, %d): \x1b[32mOK\x1b[0m\n", x, y); +} + +static void destroy_tex(struct context *ctx) +{ + int i; + + for (i = 0; i < MAX_RESOURCES; ++i) { + if (ctx->tex[i]) + pipe_resource_reference(&ctx->tex[i], NULL); + } +} + +static void init_sampler_views(struct context *ctx, const int *slots) +{ + struct pipe_context *pipe = ctx->pipe; + struct pipe_sampler_view tview; + int i; + + for (i = 0; *slots >= 0; ++i, ++slots) { + u_sampler_view_default_template(&tview, ctx->tex[*slots], + ctx->tex[*slots]->format); + + ctx->view[i] = pipe->create_sampler_view(pipe, ctx->tex[*slots], + &tview); + assert(ctx->view[i]); + } + + pipe->set_compute_sampler_views(pipe, 0, i, ctx->view); +} + +static void destroy_sampler_views(struct context *ctx) +{ + struct pipe_context *pipe = ctx->pipe; + int i; + + pipe->set_compute_sampler_views(pipe, 0, MAX_RESOURCES, NULL); + + for (i = 0; i < MAX_RESOURCES; ++i) { + if (ctx->view[i]) { + pipe->sampler_view_destroy(pipe, ctx->view[i]); + ctx->view[i] = NULL; + } + } +} + +static void init_compute_resources(struct context *ctx, const int *slots) +{ + struct pipe_context *pipe = ctx->pipe; + int i; + + for (i = 0; *slots >= 0; ++i, ++slots) { + struct pipe_surface tsurf = { + .format = ctx->tex[*slots]->format, + .usage = ctx->tex[*slots]->bind, + .writable = ctx->tex_rw[*slots] + }; + + if (ctx->tex[*slots]->target == PIPE_BUFFER) + tsurf.u.buf.last_element = ctx->tex[*slots]->width0 - 1; + + ctx->surf[i] = pipe->create_surface(pipe, ctx->tex[*slots], + &tsurf); + assert(ctx->surf[i]); + } + + pipe->set_compute_resources(pipe, 0, i, ctx->surf); +} + +static void destroy_compute_resources(struct context *ctx) +{ + struct pipe_context *pipe = ctx->pipe; + int i; + + pipe->set_compute_resources(pipe, 0, MAX_RESOURCES, NULL); + + for (i = 0; i < MAX_RESOURCES; ++i) { + if (ctx->surf[i]) { + pipe->surface_destroy(pipe, ctx->surf[i]); + ctx->surf[i] = NULL; + } + } +} + +static void init_sampler_states(struct context *ctx, int n) +{ + struct pipe_context *pipe = ctx->pipe; + struct pipe_sampler_state smp = { + .normalized_coords = 1, + }; + int i; + + for (i = 0; i < n; ++i) { + ctx->hwsmp[i] = pipe->create_sampler_state(pipe, &smp); + assert(ctx->hwsmp[i]); + } + + pipe->bind_compute_sampler_states(pipe, 0, i, ctx->hwsmp); +} + +static void destroy_sampler_states(struct context *ctx) +{ + struct pipe_context *pipe = ctx->pipe; + int i; + + pipe->bind_compute_sampler_states(pipe, 0, MAX_RESOURCES, NULL); + + for (i = 0; i < MAX_RESOURCES; ++i) { + if (ctx->hwsmp[i]) { + pipe->delete_sampler_state(pipe, ctx->hwsmp[i]); + ctx->hwsmp[i] = NULL; + } + } +} + +static void init_globals(struct context *ctx, const int *slots, + uint32_t **handles) +{ + struct pipe_context *pipe = ctx->pipe; + struct pipe_resource *res[MAX_RESOURCES]; + int i; + + for (i = 0; *slots >= 0; ++i, ++slots) + res[i] = ctx->tex[*slots]; + + pipe->set_global_binding(pipe, 0, i, res, handles); +} + +static void destroy_globals(struct context *ctx) +{ + struct pipe_context *pipe = ctx->pipe; + + pipe->set_global_binding(pipe, 0, MAX_RESOURCES, NULL, NULL); +} + +static void launch_grid(struct context *ctx, const uint *block_layout, + const uint *grid_layout, uint32_t pc, + const void *input) +{ + struct pipe_context *pipe = ctx->pipe; + + pipe->launch_grid(pipe, block_layout, grid_layout, pc, input); +} + +static void test_system_values(struct context *ctx) +{ + const char *src = "COMP\n" + "DCL RES[0], BUFFER, RAW, WR\n" + "DCL SV[0], BLOCK_ID[0]\n" + "DCL SV[1], BLOCK_SIZE[0]\n" + "DCL SV[2], GRID_SIZE[0]\n" + "DCL SV[3], THREAD_ID[0]\n" + "DCL TEMP[0], LOCAL\n" + "DCL TEMP[1], LOCAL\n" + "IMM UINT32 { 64, 0, 0, 0 }\n" + "IMM UINT32 { 16, 0, 0, 0 }\n" + "IMM UINT32 { 0, 0, 0, 0 }\n" + "\n" + "BGNSUB" + " UMUL TEMP[0], SV[0], SV[1]\n" + " UADD TEMP[0], TEMP[0], SV[3]\n" + " UMUL TEMP[1], SV[1], SV[2]\n" + " UMUL TEMP[0].w, TEMP[0], TEMP[1].zzzz\n" + " UMUL TEMP[0].zw, TEMP[0], TEMP[1].yyyy\n" + " UMUL TEMP[0].yzw, TEMP[0], TEMP[1].xxxx\n" + " UADD TEMP[0].xy, TEMP[0].xyxy, TEMP[0].zwzw\n" + " UADD TEMP[0].x, TEMP[0].xxxx, TEMP[0].yyyy\n" + " UMUL TEMP[0].x, TEMP[0], IMM[0]\n" + " STORE RES[0].xyzw, TEMP[0], SV[0]\n" + " UADD TEMP[0].x, TEMP[0], IMM[1]\n" + " STORE RES[0].xyzw, TEMP[0], SV[1]\n" + " UADD TEMP[0].x, TEMP[0], IMM[1]\n" + " STORE RES[0].xyzw, TEMP[0], SV[2]\n" + " UADD TEMP[0].x, TEMP[0], IMM[1]\n" + " STORE RES[0].xyzw, TEMP[0], SV[3]\n" + " RET\n" + "ENDSUB\n"; + void init(void *p, int s, int x, int y) { + *(uint32_t *)p = 0xdeadbeef; + } + void expect(void *p, int s, int x, int y) { + int id = x / 16, sv = (x % 16) / 4, c = x % 4; + int tid[] = { id % 20, (id % 240) / 20, id / 240, 0 }; + int bsz[] = { 4, 3, 5, 1}; + int gsz[] = { 5, 4, 1, 1}; + + switch (sv) { + case 0: + *(uint32_t *)p = tid[c] / bsz[c]; + break; + case 1: + *(uint32_t *)p = bsz[c]; + break; + case 2: + *(uint32_t *)p = gsz[c]; + break; + case 3: + *(uint32_t *)p = tid[c] % bsz[c]; + break; + } + } + + printf("- %s\n", __func__); + + init_prog(ctx, 0, 0, 0, src, NULL); + init_tex(ctx, 0, PIPE_BUFFER, true, PIPE_FORMAT_R32_FLOAT, + 76800, 0, init); + init_compute_resources(ctx, (int []) { 0, -1 }); + launch_grid(ctx, (uint []){4, 3, 5}, (uint []){5, 4, 1}, 0, NULL); + check_tex(ctx, 0, expect, NULL); + destroy_compute_resources(ctx); + destroy_tex(ctx); + destroy_prog(ctx); +} + +static void test_resource_access(struct context *ctx) +{ + const char *src = "COMP\n" + "DCL RES[0], BUFFER, RAW, WR\n" + "DCL RES[1], 2D, RAW, WR\n" + "DCL SV[0], BLOCK_ID[0]\n" + "DCL TEMP[0], LOCAL\n" + "DCL TEMP[1], LOCAL\n" + "IMM UINT32 { 15, 0, 0, 0 }\n" + "IMM UINT32 { 16, 1, 0, 0 }\n" + "\n" + " BGNSUB\n" + " UADD TEMP[0].x, SV[0].xxxx, SV[0].yyyy\n" + " AND TEMP[0].x, TEMP[0], IMM[0]\n" + " UMUL TEMP[0].x, TEMP[0], IMM[1]\n" + " LOAD TEMP[0].xyzw, RES[0], TEMP[0]\n" + " UMUL TEMP[1], SV[0], IMM[1]\n" + " STORE RES[1].xyzw, TEMP[1], TEMP[0]\n" + " RET\n" + " ENDSUB\n"; + void init0(void *p, int s, int x, int y) { + *(float *)p = 8.0 - (float)x; + } + void init1(void *p, int s, int x, int y) { + *(uint32_t *)p = 0xdeadbeef; + } + void expect(void *p, int s, int x, int y) { + *(float *)p = 8.0 - (float)((x + 4*y) & 0x3f); + } + + printf("- %s\n", __func__); + + init_prog(ctx, 0, 0, 0, src, NULL); + init_tex(ctx, 0, PIPE_BUFFER, true, PIPE_FORMAT_R32_FLOAT, + 256, 0, init0); + init_tex(ctx, 1, PIPE_TEXTURE_2D, true, PIPE_FORMAT_R32_FLOAT, + 60, 12, init1); + init_compute_resources(ctx, (int []) { 0, 1, -1 }); + launch_grid(ctx, (uint []){1, 1, 1}, (uint []){15, 12, 1}, 0, NULL); + check_tex(ctx, 1, expect, NULL); + destroy_compute_resources(ctx); + destroy_tex(ctx); + destroy_prog(ctx); +} + +static void test_function_calls(struct context *ctx) +{ + const char *src = "COMP\n" + "DCL RES[0], 2D, RAW, WR\n" + "DCL SV[0], BLOCK_ID[0]\n" + "DCL SV[1], BLOCK_SIZE[0]\n" + "DCL SV[2], GRID_SIZE[0]\n" + "DCL SV[3], THREAD_ID[0]\n" + "DCL TEMP[0]\n" + "DCL TEMP[1]\n" + "DCL TEMP[2], LOCAL\n" + "IMM UINT32 { 0, 11, 22, 33 }\n" + "IMM FLT32 { 11, 33, 55, 99 }\n" + "IMM UINT32 { 4, 1, 0, 0 }\n" + "IMM UINT32 { 12, 0, 0, 0 }\n" + "\n" + "00: BGNSUB\n" + "01: UMUL TEMP[0].x, TEMP[0], TEMP[0]\n" + "02: UADD TEMP[1].x, TEMP[1], IMM[2].yyyy\n" + "03: USLT TEMP[0].x, TEMP[0], IMM[0]\n" + "04: RET\n" + "05: ENDSUB\n" + "06: BGNSUB\n" + "07: UMUL TEMP[0].x, TEMP[0], TEMP[0]\n" + "08: UADD TEMP[1].x, TEMP[1], IMM[2].yyyy\n" + "09: USLT TEMP[0].x, TEMP[0], IMM[0].yyyy\n" + "10: IF TEMP[0].xxxx\n" + "11: CAL :0\n" + "12: ENDIF\n" + "13: RET\n" + "14: ENDSUB\n" + "15: BGNSUB\n" + "16: UMUL TEMP[2], SV[0], SV[1]\n" + "17: UADD TEMP[2], TEMP[2], SV[3]\n" + "18: UMUL TEMP[2], TEMP[2], IMM[2]\n" + "00: MOV TEMP[1].x, IMM[2].wwww\n" + "19: LOAD TEMP[0].x, RES[0].xxxx, TEMP[2]\n" + "20: CAL :6\n" + "21: STORE RES[0].x, TEMP[2], TEMP[1].xxxx\n" + "22: RET\n" + "23: ENDSUB\n"; + void init(void *p, int s, int x, int y) { + *(uint32_t *)p = 15 * y + x; + } + void expect(void *p, int s, int x, int y) { + *(uint32_t *)p = (15 * y + x) < 4 ? 2 : 1 ; + } + + printf("- %s\n", __func__); + + init_prog(ctx, 0, 0, 0, src, NULL); + init_tex(ctx, 0, PIPE_TEXTURE_2D, true, PIPE_FORMAT_R32_FLOAT, + 15, 12, init); + init_compute_resources(ctx, (int []) { 0, -1 }); + launch_grid(ctx, (uint []){3, 3, 3}, (uint []){5, 4, 1}, 15, NULL); + check_tex(ctx, 0, expect, NULL); + destroy_compute_resources(ctx); + destroy_tex(ctx); + destroy_prog(ctx); +} + +static void test_input_global(struct context *ctx) +{ + const char *src = "COMP\n" + "DCL SV[0], THREAD_ID[0]\n" + "DCL TEMP[0], LOCAL\n" + "DCL TEMP[1], LOCAL\n" + "IMM UINT32 { 8, 0, 0, 0 }\n" + "\n" + " BGNSUB\n" + " UMUL TEMP[0], SV[0], IMM[0]\n" + " LOAD TEMP[1].xy, RINPUT, TEMP[0]\n" + " LOAD TEMP[0].x, RGLOBAL, TEMP[1].yyyy\n" + " UADD TEMP[1].x, TEMP[0], -TEMP[1]\n" + " STORE RGLOBAL.x, TEMP[1].yyyy, TEMP[1]\n" + " RET\n" + " ENDSUB\n"; + void init(void *p, int s, int x, int y) { + *(uint32_t *)p = 0xdeadbeef; + } + void expect(void *p, int s, int x, int y) { + *(uint32_t *)p = 0xdeadbeef - (x == 0 ? 0x10001 + 2 * s : 0); + } + uint32_t input[8] = { 0x10001, 0x10002, 0x10003, 0x10004, + 0x10005, 0x10006, 0x10007, 0x10008 }; + + printf("- %s\n", __func__); + + init_prog(ctx, 0, 0, 32, src, NULL); + init_tex(ctx, 0, PIPE_BUFFER, true, PIPE_FORMAT_R32_FLOAT, 32, 0, init); + init_tex(ctx, 1, PIPE_BUFFER, true, PIPE_FORMAT_R32_FLOAT, 32, 0, init); + init_tex(ctx, 2, PIPE_BUFFER, true, PIPE_FORMAT_R32_FLOAT, 32, 0, init); + init_tex(ctx, 3, PIPE_BUFFER, true, PIPE_FORMAT_R32_FLOAT, 32, 0, init); + init_globals(ctx, (int []){ 0, 1, 2, 3, -1 }, + (uint32_t *[]){ &input[1], &input[3], + &input[5], &input[7] }); + launch_grid(ctx, (uint []){4, 1, 1}, (uint []){1, 1, 1}, 0, input); + check_tex(ctx, 0, expect, NULL); + check_tex(ctx, 1, expect, NULL); + check_tex(ctx, 2, expect, NULL); + check_tex(ctx, 3, expect, NULL); + destroy_globals(ctx); + destroy_tex(ctx); + destroy_prog(ctx); +} + +static void test_private(struct context *ctx) +{ + const char *src = "COMP\n" + "DCL RES[0], BUFFER, RAW, WR\n" + "DCL SV[0], BLOCK_ID[0]\n" + "DCL SV[1], BLOCK_SIZE[0]\n" + "DCL SV[2], THREAD_ID[0]\n" + "DCL TEMP[0], LOCAL\n" + "DCL TEMP[1], LOCAL\n" + "DCL TEMP[2], LOCAL\n" + "IMM UINT32 { 128, 0, 0, 0 }\n" + "IMM UINT32 { 4, 0, 0, 0 }\n" + "\n" + " BGNSUB\n" + " UMUL TEMP[0].x, SV[0], SV[1]\n" + " UADD TEMP[0].x, TEMP[0], SV[2]\n" + " MOV TEMP[1].x, IMM[0].wwww\n" + " BGNLOOP\n" + " USEQ TEMP[2].x, TEMP[1], IMM[0]\n" + " IF TEMP[2]\n" + " BRK\n" + " ENDIF\n" + " UDIV TEMP[2].x, TEMP[1], IMM[1]\n" + " UADD TEMP[2].x, TEMP[2], TEMP[0]\n" + " STORE RPRIVATE.x, TEMP[1], TEMP[2]\n" + " UADD TEMP[1].x, TEMP[1], IMM[1]\n" + " ENDLOOP\n" + " MOV TEMP[1].x, IMM[0].wwww\n" + " UMUL TEMP[0].x, TEMP[0], IMM[0]\n" + " BGNLOOP\n" + " USEQ TEMP[2].x, TEMP[1], IMM[0]\n" + " IF TEMP[2]\n" + " BRK\n" + " ENDIF\n" + " LOAD TEMP[2].x, RPRIVATE, TEMP[1]\n" + " STORE RES[0].x, TEMP[0], TEMP[2]\n" + " UADD TEMP[0].x, TEMP[0], IMM[1]\n" + " UADD TEMP[1].x, TEMP[1], IMM[1]\n" + " ENDLOOP\n" + " RET\n" + " ENDSUB\n"; + void init(void *p, int s, int x, int y) { + *(uint32_t *)p = 0xdeadbeef; + } + void expect(void *p, int s, int x, int y) { + *(uint32_t *)p = (x / 32) + x % 32; + } + + printf("- %s\n", __func__); + + init_prog(ctx, 0, 128, 0, src, NULL); + init_tex(ctx, 0, PIPE_BUFFER, true, PIPE_FORMAT_R32_FLOAT, + 32768, 0, init); + init_compute_resources(ctx, (int []) { 0, -1 }); + launch_grid(ctx, (uint []){16, 1, 1}, (uint []){16, 1, 1}, 0, NULL); + check_tex(ctx, 0, expect, NULL); + destroy_compute_resources(ctx); + destroy_tex(ctx); + destroy_prog(ctx); +} + +static void test_local(struct context *ctx) +{ + const char *src = "COMP\n" + "DCL RES[0], BUFFER, RAW, WR\n" + "DCL SV[0], BLOCK_ID[0]\n" + "DCL SV[1], BLOCK_SIZE[0]\n" + "DCL SV[2], THREAD_ID[0]\n" + "DCL TEMP[0], LOCAL\n" + "DCL TEMP[1], LOCAL\n" + "DCL TEMP[2], LOCAL\n" + "IMM UINT32 { 1, 0, 0, 0 }\n" + "IMM UINT32 { 2, 0, 0, 0 }\n" + "IMM UINT32 { 4, 0, 0, 0 }\n" + "IMM UINT32 { 32, 0, 0, 0 }\n" + "IMM UINT32 { 128, 0, 0, 0 }\n" + "\n" + " BGNSUB\n" + " UMUL TEMP[0].x, SV[2], IMM[2]\n" + " STORE RLOCAL.x, TEMP[0], IMM[0].wwww\n" + " MFENCE RLOCAL\n" + " USLT TEMP[1].x, SV[2], IMM[3]\n" + " IF TEMP[1]\n" + " UADD TEMP[1].x, TEMP[0], IMM[4]\n" + " BGNLOOP\n" + " LOAD TEMP[2].x, RLOCAL, TEMP[1]\n" + " USEQ TEMP[2].x, TEMP[2], IMM[0]\n" + " IF TEMP[2]\n" + " BRK\n" + " ENDIF\n" + " ENDLOOP\n" + " STORE RLOCAL.x, TEMP[0], IMM[0]\n" + " MFENCE RLOCAL\n" + " BGNLOOP\n" + " LOAD TEMP[2].x, RLOCAL, TEMP[1]\n" + " USEQ TEMP[2].x, TEMP[2], IMM[1]\n" + " IF TEMP[2]\n" + " BRK\n" + " ENDIF\n" + " ENDLOOP\n" + " ELSE\n" + " UADD TEMP[1].x, TEMP[0], -IMM[4]\n" + " BGNLOOP\n" + " LOAD TEMP[2].x, RLOCAL, TEMP[1]\n" + " USEQ TEMP[2].x, TEMP[2], IMM[0].wwww\n" + " IF TEMP[2]\n" + " BRK\n" + " ENDIF\n" + " ENDLOOP\n" + " STORE RLOCAL.x, TEMP[0], IMM[0]\n" + " MFENCE RLOCAL\n" + " BGNLOOP\n" + " LOAD TEMP[2].x, RLOCAL, TEMP[1]\n" + " USEQ TEMP[2].x, TEMP[2], IMM[0]\n" + " IF TEMP[2]\n" + " BRK\n" + " ENDIF\n" + " ENDLOOP\n" + " STORE RLOCAL.x, TEMP[0], IMM[1]\n" + " MFENCE RLOCAL\n" + " ENDIF\n" + " UMUL TEMP[1].x, SV[0], SV[1]\n" + " UMUL TEMP[1].x, TEMP[1], IMM[2]\n" + " UADD TEMP[1].x, TEMP[1], TEMP[0]\n" + " LOAD TEMP[0].x, RLOCAL, TEMP[0]\n" + " STORE RES[0].x, TEMP[1], TEMP[0]\n" + " RET\n" + " ENDSUB\n"; + void init(void *p, int s, int x, int y) { + *(uint32_t *)p = 0xdeadbeef; + } + void expect(void *p, int s, int x, int y) { + *(uint32_t *)p = x & 0x20 ? 2 : 1; + } + + printf("- %s\n", __func__); + + init_prog(ctx, 256, 0, 0, src, NULL); + init_tex(ctx, 0, PIPE_BUFFER, true, PIPE_FORMAT_R32_FLOAT, + 4096, 0, init); + init_compute_resources(ctx, (int []) { 0, -1 }); + launch_grid(ctx, (uint []){64, 1, 1}, (uint []){16, 1, 1}, 0, NULL); + check_tex(ctx, 0, expect, NULL); + destroy_compute_resources(ctx); + destroy_tex(ctx); + destroy_prog(ctx); +} + +static void test_sample(struct context *ctx) +{ + const char *src = "COMP\n" + "DCL SVIEW[0], 2D, FLOAT\n" + "DCL RES[0], 2D, RAW, WR\n" + "DCL SAMP[0]\n" + "DCL SV[0], BLOCK_ID[0]\n" + "DCL TEMP[0], LOCAL\n" + "DCL TEMP[1], LOCAL\n" + "IMM UINT32 { 16, 1, 0, 0 }\n" + "IMM FLT32 { 128, 32, 0, 0 }\n" + "\n" + " BGNSUB\n" + " I2F TEMP[1], SV[0]\n" + " DIV TEMP[1], TEMP[1], IMM[1]\n" + " SAMPLE TEMP[1], TEMP[1], SVIEW[0], SAMP[0]\n" + " UMUL TEMP[0], SV[0], IMM[0]\n" + " STORE RES[0].xyzw, TEMP[0], TEMP[1]\n" + " RET\n" + " ENDSUB\n"; + void init(void *p, int s, int x, int y) { + *(float *)p = s ? 1 : x * y; + } + void expect(void *p, int s, int x, int y) { + switch (x % 4) { + case 0: + *(float *)p = x / 4 * y; + break; + case 1: + case 2: + *(float *)p = 0; + break; + case 3: + *(float *)p = 1; + break; + } + } + + printf("- %s\n", __func__); + + init_prog(ctx, 0, 0, 0, src, NULL); + init_tex(ctx, 0, PIPE_TEXTURE_2D, true, PIPE_FORMAT_R32_FLOAT, + 128, 32, init); + init_tex(ctx, 1, PIPE_TEXTURE_2D, true, PIPE_FORMAT_R32_FLOAT, + 512, 32, init); + init_compute_resources(ctx, (int []) { 1, -1 }); + init_sampler_views(ctx, (int []) { 0, -1 }); + init_sampler_states(ctx, 2); + launch_grid(ctx, (uint []){1, 1, 1}, (uint []){128, 32, 1}, 0, NULL); + check_tex(ctx, 1, expect, NULL); + destroy_sampler_states(ctx); + destroy_sampler_views(ctx); + destroy_compute_resources(ctx); + destroy_tex(ctx); + destroy_prog(ctx); +} + +static void test_many_kern(struct context *ctx) +{ + const char *src = "COMP\n" + "DCL RES[0], BUFFER, RAW, WR\n" + "DCL TEMP[0], LOCAL\n" + "IMM UINT32 { 0, 1, 2, 3 }\n" + "IMM UINT32 { 4, 0, 0, 0 }\n" + "\n" + " BGNSUB\n" + " UMUL TEMP[0].x, IMM[0].xxxx, IMM[1].xxxx\n" + " STORE RES[0].x, TEMP[0], IMM[0].xxxx\n" + " RET\n" + " ENDSUB\n" + " BGNSUB\n" + " UMUL TEMP[0].x, IMM[0].yyyy, IMM[1].xxxx\n" + " STORE RES[0].x, TEMP[0], IMM[0].yyyy\n" + " RET\n" + " ENDSUB\n" + " BGNSUB\n" + " UMUL TEMP[0].x, IMM[0].zzzz, IMM[1].xxxx\n" + " STORE RES[0].x, TEMP[0], IMM[0].zzzz\n" + " RET\n" + " ENDSUB\n" + " BGNSUB\n" + " UMUL TEMP[0].x, IMM[0].wwww, IMM[1].xxxx\n" + " STORE RES[0].x, TEMP[0], IMM[0].wwww\n" + " RET\n" + " ENDSUB\n"; + void init(void *p, int s, int x, int y) { + *(uint32_t *)p = 0xdeadbeef; + } + void expect(void *p, int s, int x, int y) { + *(uint32_t *)p = x; + } + + printf("- %s\n", __func__); + + init_prog(ctx, 0, 0, 0, src, NULL); + init_tex(ctx, 0, PIPE_BUFFER, true, PIPE_FORMAT_R32_FLOAT, + 16, 0, init); + init_compute_resources(ctx, (int []) { 0, -1 }); + launch_grid(ctx, (uint []){1, 1, 1}, (uint []){1, 1, 1}, 0, NULL); + launch_grid(ctx, (uint []){1, 1, 1}, (uint []){1, 1, 1}, 5, NULL); + launch_grid(ctx, (uint []){1, 1, 1}, (uint []){1, 1, 1}, 10, NULL); + launch_grid(ctx, (uint []){1, 1, 1}, (uint []){1, 1, 1}, 15, NULL); + check_tex(ctx, 0, expect, NULL); + destroy_compute_resources(ctx); + destroy_tex(ctx); + destroy_prog(ctx); +} + +static void test_constant(struct context *ctx) +{ + const char *src = "COMP\n" + "DCL RES[0], BUFFER, RAW\n" + "DCL RES[1], BUFFER, RAW, WR\n" + "DCL SV[0], BLOCK_ID[0]\n" + "DCL TEMP[0], LOCAL\n" + "DCL TEMP[1], LOCAL\n" + "IMM UINT32 { 4, 0, 0, 0 }\n" + "\n" + " BGNSUB\n" + " UMUL TEMP[0].x, SV[0], IMM[0]\n" + " LOAD TEMP[1].x, RES[0], TEMP[0]\n" + " STORE RES[1].x, TEMP[0], TEMP[1]\n" + " RET\n" + " ENDSUB\n"; + void init(void *p, int s, int x, int y) { + *(float *)p = s ? 0xdeadbeef : 8.0 - (float)x; + } + void expect(void *p, int s, int x, int y) { + *(float *)p = 8.0 - (float)x; + } + + printf("- %s\n", __func__); + + init_prog(ctx, 0, 0, 0, src, NULL); + init_tex(ctx, 0, PIPE_BUFFER, false, PIPE_FORMAT_R32_FLOAT, + 256, 0, init); + init_tex(ctx, 1, PIPE_BUFFER, true, PIPE_FORMAT_R32_FLOAT, + 256, 0, init); + init_compute_resources(ctx, (int []) { 0, 1, -1 }); + launch_grid(ctx, (uint []){1, 1, 1}, (uint []){64, 1, 1}, 0, NULL); + check_tex(ctx, 1, expect, NULL); + destroy_compute_resources(ctx); + destroy_tex(ctx); + destroy_prog(ctx); +} + +static void test_resource_indirect(struct context *ctx) +{ + const char *src = "COMP\n" + "DCL RES[0], BUFFER, RAW, WR\n" + "DCL RES[1..3], BUFFER, RAW\n" + "DCL SV[0], BLOCK_ID[0]\n" + "DCL TEMP[0], LOCAL\n" + "DCL TEMP[1], LOCAL\n" + "IMM UINT32 { 4, 0, 0, 0 }\n" + "\n" + " BGNSUB\n" + " UMUL TEMP[0].x, SV[0], IMM[0]\n" + " LOAD TEMP[1].x, RES[1], TEMP[0]\n" + " LOAD TEMP[1].x, RES[TEMP[1].x+2], TEMP[0]\n" + " STORE RES[0].x, TEMP[0], TEMP[1]\n" + " RET\n" + " ENDSUB\n"; + void init(void *p, int s, int x, int y) { + *(uint32_t *)p = s == 0 ? 0xdeadbeef : + s == 1 ? x % 2 : + s == 2 ? 2 * x : + 2 * x + 1; + } + void expect(void *p, int s, int x, int y) { + *(uint32_t *)p = 2 * x + (x % 2 ? 1 : 0); + } + + printf("- %s\n", __func__); + + init_prog(ctx, 0, 0, 0, src, NULL); + init_tex(ctx, 0, PIPE_BUFFER, true, PIPE_FORMAT_R32_FLOAT, + 256, 0, init); + init_tex(ctx, 1, PIPE_BUFFER, false, PIPE_FORMAT_R32_FLOAT, + 256, 0, init); + init_tex(ctx, 2, PIPE_BUFFER, false, PIPE_FORMAT_R32_FLOAT, + 256, 0, init); + init_tex(ctx, 3, PIPE_BUFFER, false, PIPE_FORMAT_R32_FLOAT, + 256, 0, init); + init_compute_resources(ctx, (int []) { 0, 1, 2, 3, -1 }); + launch_grid(ctx, (uint []){1, 1, 1}, (uint []){64, 1, 1}, 0, NULL); + check_tex(ctx, 0, expect, NULL); + destroy_compute_resources(ctx); + destroy_tex(ctx); + destroy_prog(ctx); +} + +enum pipe_format surface_fmts[] = { + PIPE_FORMAT_B8G8R8A8_UNORM, + PIPE_FORMAT_B8G8R8X8_UNORM, + PIPE_FORMAT_A8R8G8B8_UNORM, + PIPE_FORMAT_X8R8G8B8_UNORM, + PIPE_FORMAT_X8R8G8B8_UNORM, + PIPE_FORMAT_L8_UNORM, + PIPE_FORMAT_A8_UNORM, + PIPE_FORMAT_I8_UNORM, + PIPE_FORMAT_L8A8_UNORM, + PIPE_FORMAT_R32_FLOAT, + PIPE_FORMAT_R32G32_FLOAT, + PIPE_FORMAT_R32G32B32A32_FLOAT, + PIPE_FORMAT_R32_UNORM, + PIPE_FORMAT_R32G32_UNORM, + PIPE_FORMAT_R32G32B32A32_UNORM, + PIPE_FORMAT_R32_SNORM, + PIPE_FORMAT_R32G32_SNORM, + PIPE_FORMAT_R32G32B32A32_SNORM, + PIPE_FORMAT_R8_UINT, + PIPE_FORMAT_R8G8_UINT, + PIPE_FORMAT_R8G8B8A8_UINT, + PIPE_FORMAT_R8_SINT, + PIPE_FORMAT_R8G8_SINT, + PIPE_FORMAT_R8G8B8A8_SINT, + PIPE_FORMAT_R32_UINT, + PIPE_FORMAT_R32G32_UINT, + PIPE_FORMAT_R32G32B32A32_UINT, + PIPE_FORMAT_R32_SINT, + PIPE_FORMAT_R32G32_SINT, + PIPE_FORMAT_R32G32B32A32_SINT +}; + +static void test_surface_ld(struct context *ctx) +{ + const char *src = "COMP\n" + "DCL RES[0], 2D\n" + "DCL RES[1], 2D, RAW, WR\n" + "DCL SV[0], BLOCK_ID[0]\n" + "DCL TEMP[0], LOCAL\n" + "DCL TEMP[1], LOCAL\n" + "IMM UINT32 { 16, 1, 0, 0 }\n" + "\n" + " BGNSUB\n" + " LOAD TEMP[1], RES[0], SV[0]\n" + " UMUL TEMP[0], SV[0], IMM[0]\n" + " STORE RES[1].xyzw, TEMP[0], TEMP[1]\n" + " RET\n" + " ENDSUB\n"; + int i = 0; + void init0f(void *p, int s, int x, int y) { + float v[] = { 1.0, -.75, .50, -.25 }; + util_format_write_4f(surface_fmts[i], v, 0, + p, 0, 0, 0, 1, 1); + } + void init0i(void *p, int s, int x, int y) { + int v[] = { 0xffffffff, 0xffff, 0xff, 0xf }; + util_format_write_4i(surface_fmts[i], v, 0, + p, 0, 0, 0, 1, 1); + } + void init1(void *p, int s, int x, int y) { + *(uint32_t *)p = 0xdeadbeef; + } + void expectf(void *p, int s, int x, int y) { + float v[4], w[4]; + init0f(v, s, x / 4, y); + util_format_read_4f(surface_fmts[i], w, 0, + v, 0, 0, 0, 1, 1); + *(float *)p = w[x % 4]; + } + void expecti(void *p, int s, int x, int y) { + int32_t v[4], w[4]; + init0i(v, s, x / 4, y); + util_format_read_4i(surface_fmts[i], w, 0, + v, 0, 0, 0, 1, 1); + *(uint32_t *)p = w[x % 4]; + } + + printf("- %s\n", __func__); + + init_prog(ctx, 0, 0, 0, src, NULL); + + for (i = 0; i < Elements(surface_fmts); i++) { + bool is_int = util_format_is_pure_integer(surface_fmts[i]); + + printf(" - %s\n", util_format_name(surface_fmts[i])); + + init_tex(ctx, 0, PIPE_TEXTURE_2D, true, surface_fmts[i], + 128, 32, (is_int ? init0i : init0f)); + init_tex(ctx, 1, PIPE_TEXTURE_2D, true, PIPE_FORMAT_R32_FLOAT, + 512, 32, init1); + init_compute_resources(ctx, (int []) { 0, 1, -1 }); + init_sampler_states(ctx, 2); + launch_grid(ctx, (uint []){1, 1, 1}, (uint []){128, 32, 1}, 0, + NULL); + check_tex(ctx, 1, (is_int ? expecti : expectf), NULL); + destroy_sampler_states(ctx); + destroy_compute_resources(ctx); + destroy_tex(ctx); + } + + destroy_prog(ctx); +} + +static void test_surface_st(struct context *ctx) +{ + const char *src = "COMP\n" + "DCL RES[0], 2D, RAW\n" + "DCL RES[1], 2D, WR\n" + "DCL SV[0], BLOCK_ID[0]\n" + "DCL TEMP[0], LOCAL\n" + "DCL TEMP[1], LOCAL\n" + "IMM UINT32 { 16, 1, 0, 0 }\n" + "\n" + " BGNSUB\n" + " UMUL TEMP[0], SV[0], IMM[0]\n" + " LOAD TEMP[1], RES[0], TEMP[0]\n" + " STORE RES[1], SV[0], TEMP[1]\n" + " RET\n" + " ENDSUB\n"; + int i = 0; + void init0f(void *p, int s, int x, int y) { + float v[] = { 1.0, -.75, 0.5, -.25 }; + *(float *)p = v[x % 4]; + } + void init0i(void *p, int s, int x, int y) { + int v[] = { 0xffffffff, 0xffff, 0xff, 0xf }; + *(int32_t *)p = v[x % 4]; + } + void init1(void *p, int s, int x, int y) { + memset(p, 1, util_format_get_blocksize(surface_fmts[i])); + } + void expectf(void *p, int s, int x, int y) { + float vf[4]; + int j; + + for (j = 0; j < 4; j++) + init0f(&vf[j], s, 4 * x + j, y); + util_format_write_4f(surface_fmts[i], vf, 0, + p, 0, 0, 0, 1, 1); + } + void expects(void *p, int s, int x, int y) { + int32_t v[4]; + int j; + + for (j = 0; j < 4; j++) + init0i(&v[j], s, 4 * x + j, y); + util_format_write_4i(surface_fmts[i], v, 0, + p, 0, 0, 0, 1, 1); + } + void expectu(void *p, int s, int x, int y) { + uint32_t v[4]; + int j; + + for (j = 0; j < 4; j++) + init0i(&v[j], s, 4 * x + j, y); + util_format_write_4ui(surface_fmts[i], v, 0, + p, 0, 0, 0, 1, 1); + } + bool check(void *x, void *y, int sz) { + int j; + + if (util_format_is_float(surface_fmts[i])) { + return fabs(*(float *)x - *(float *)y) < 3.92156863e-3; + + } else if ((sz % 4) == 0) { + for (j = 0; j < sz / 4; j++) + if (abs(((uint32_t *)x)[j] - + ((uint32_t *)y)[j]) > 1) + return false; + return true; + } else { + return !memcmp(x, y, sz); + } + } + + printf("- %s\n", __func__); + + init_prog(ctx, 0, 0, 0, src, NULL); + + for (i = 0; i < Elements(surface_fmts); i++) { + bool is_signed = (util_format_description(surface_fmts[i]) + ->channel[0].type == UTIL_FORMAT_TYPE_SIGNED); + bool is_int = util_format_is_pure_integer(surface_fmts[i]); + + printf(" - %s\n", util_format_name(surface_fmts[i])); + + init_tex(ctx, 0, PIPE_TEXTURE_2D, true, PIPE_FORMAT_R32_FLOAT, + 512, 32, (is_int ? init0i : init0f)); + init_tex(ctx, 1, PIPE_TEXTURE_2D, true, surface_fmts[i], + 128, 32, init1); + init_compute_resources(ctx, (int []) { 0, 1, -1 }); + init_sampler_states(ctx, 2); + launch_grid(ctx, (uint []){1, 1, 1}, (uint []){128, 32, 1}, 0, + NULL); + check_tex(ctx, 1, (is_int && is_signed ? expects : + is_int && !is_signed ? expectu : + expectf), check); + destroy_sampler_states(ctx); + destroy_compute_resources(ctx); + destroy_tex(ctx); + } + + destroy_prog(ctx); +} + +static void test_barrier(struct context *ctx) +{ + const char *src = "COMP\n" + "DCL RES[0], BUFFER, RAW, WR\n" + "DCL SV[0], BLOCK_ID[0]\n" + "DCL SV[1], BLOCK_SIZE[0]\n" + "DCL SV[2], THREAD_ID[0]\n" + "DCL TEMP[0], LOCAL\n" + "DCL TEMP[1], LOCAL\n" + "DCL TEMP[2], LOCAL\n" + "DCL TEMP[3], LOCAL\n" + "IMM UINT32 { 1, 0, 0, 0 }\n" + "IMM UINT32 { 4, 0, 0, 0 }\n" + "IMM UINT32 { 32, 0, 0, 0 }\n" + "\n" + " BGNSUB\n" + " UMUL TEMP[0].x, SV[2], IMM[1]\n" + " MOV TEMP[1].x, IMM[0].wwww\n" + " BGNLOOP\n" + " BARRIER\n" + " STORE RLOCAL.x, TEMP[0], TEMP[1]\n" + " BARRIER\n" + " MOV TEMP[2].x, IMM[0].wwww\n" + " BGNLOOP\n" + " UMUL TEMP[3].x, TEMP[2], IMM[1]\n" + " LOAD TEMP[3].x, RLOCAL, TEMP[3]\n" + " USNE TEMP[3].x, TEMP[3], TEMP[1]\n" + " IF TEMP[3]\n" + " END\n" + " ENDIF\n" + " UADD TEMP[2].x, TEMP[2], IMM[0]\n" + " USEQ TEMP[3].x, TEMP[2], SV[1]\n" + " IF TEMP[3]\n" + " BRK\n" + " ENDIF\n" + " ENDLOOP\n" + " UADD TEMP[1].x, TEMP[1], IMM[0]\n" + " USEQ TEMP[2].x, TEMP[1], IMM[2]\n" + " IF TEMP[2]\n" + " BRK\n" + " ENDIF\n" + " ENDLOOP\n" + " UMUL TEMP[1].x, SV[0], SV[1]\n" + " UMUL TEMP[1].x, TEMP[1], IMM[1]\n" + " UADD TEMP[1].x, TEMP[1], TEMP[0]\n" + " LOAD TEMP[0].x, RLOCAL, TEMP[0]\n" + " STORE RES[0].x, TEMP[1], TEMP[0]\n" + " RET\n" + " ENDSUB\n"; + void init(void *p, int s, int x, int y) { + *(uint32_t *)p = 0xdeadbeef; + } + void expect(void *p, int s, int x, int y) { + *(uint32_t *)p = 31; + } + + printf("- %s\n", __func__); + + init_prog(ctx, 256, 0, 0, src, NULL); + init_tex(ctx, 0, PIPE_BUFFER, true, PIPE_FORMAT_R32_FLOAT, + 4096, 0, init); + init_compute_resources(ctx, (int []) { 0, -1 }); + launch_grid(ctx, (uint []){64, 1, 1}, (uint []){16, 1, 1}, 0, NULL); + check_tex(ctx, 0, expect, NULL); + destroy_compute_resources(ctx); + destroy_tex(ctx); + destroy_prog(ctx); +} + +static void test_atom_ops(struct context *ctx, bool global) +{ + const char *src = "COMP\n" + "#ifdef TARGET_GLOBAL\n" + "#define target RES[0]\n" + "#else\n" + "#define target RLOCAL\n" + "#endif\n" + "" + "DCL RES[0], BUFFER, RAW, WR\n" + "#define threadid SV[0]\n" + "DCL threadid, THREAD_ID[0]\n" + "" + "#define offset TEMP[0]\n" + "DCL offset, LOCAL\n" + "#define tmp TEMP[1]\n" + "DCL tmp, LOCAL\n" + "" + "#define k0 IMM[0]\n" + "IMM UINT32 { 0, 0, 0, 0 }\n" + "#define k1 IMM[1]\n" + "IMM UINT32 { 1, 0, 0, 0 }\n" + "#define k2 IMM[2]\n" + "IMM UINT32 { 2, 0, 0, 0 }\n" + "#define k3 IMM[3]\n" + "IMM UINT32 { 3, 0, 0, 0 }\n" + "#define k4 IMM[4]\n" + "IMM UINT32 { 4, 0, 0, 0 }\n" + "#define k5 IMM[5]\n" + "IMM UINT32 { 5, 0, 0, 0 }\n" + "#define k6 IMM[6]\n" + "IMM UINT32 { 6, 0, 0, 0 }\n" + "#define k7 IMM[7]\n" + "IMM UINT32 { 7, 0, 0, 0 }\n" + "#define k8 IMM[8]\n" + "IMM UINT32 { 8, 0, 0, 0 }\n" + "#define k9 IMM[9]\n" + "IMM UINT32 { 9, 0, 0, 0 }\n" + "#define korig IMM[10].xxxx\n" + "#define karg IMM[10].yyyy\n" + "IMM UINT32 { 3735928559, 286331153, 0, 0 }\n" + "\n" + " BGNSUB\n" + " UMUL offset.x, threadid, k4\n" + " STORE target.x, offset, korig\n" + " USEQ tmp.x, threadid, k0\n" + " IF tmp\n" + " ATOMUADD tmp.x, target, offset, karg\n" + " ATOMUADD tmp.x, target, offset, tmp\n" + " ENDIF\n" + " USEQ tmp.x, threadid, k1\n" + " IF tmp\n" + " ATOMXCHG tmp.x, target, offset, karg\n" + " ATOMXCHG tmp.x, target, offset, tmp\n" + " ENDIF\n" + " USEQ tmp.x, threadid, k2\n" + " IF tmp\n" + " ATOMCAS tmp.x, target, offset, korig, karg\n" + " ATOMCAS tmp.x, target, offset, tmp, k0\n" + " ENDIF\n" + " USEQ tmp.x, threadid, k3\n" + " IF tmp\n" + " ATOMAND tmp.x, target, offset, karg\n" + " ATOMAND tmp.x, target, offset, tmp\n" + " ENDIF\n" + " USEQ tmp.x, threadid, k4\n" + " IF tmp\n" + " ATOMOR tmp.x, target, offset, karg\n" + " ATOMOR tmp.x, target, offset, tmp\n" + " ENDIF\n" + " USEQ tmp.x, threadid, k5\n" + " IF tmp\n" + " ATOMXOR tmp.x, target, offset, karg\n" + " ATOMXOR tmp.x, target, offset, tmp\n" + " ENDIF\n" + " USEQ tmp.x, threadid, k6\n" + " IF tmp\n" + " ATOMUMIN tmp.x, target, offset, karg\n" + " ATOMUMIN tmp.x, target, offset, tmp\n" + " ENDIF\n" + " USEQ tmp.x, threadid, k7\n" + " IF tmp\n" + " ATOMUMAX tmp.x, target, offset, karg\n" + " ATOMUMAX tmp.x, target, offset, tmp\n" + " ENDIF\n" + " USEQ tmp.x, threadid, k8\n" + " IF tmp\n" + " ATOMIMIN tmp.x, target, offset, karg\n" + " ATOMIMIN tmp.x, target, offset, tmp\n" + " ENDIF\n" + " USEQ tmp.x, threadid, k9\n" + " IF tmp\n" + " ATOMIMAX tmp.x, target, offset, karg\n" + " ATOMIMAX tmp.x, target, offset, tmp\n" + " ENDIF\n" + "#ifdef TARGET_LOCAL\n" + " LOAD tmp.x, RLOCAL, offset\n" + " STORE RES[0].x, offset, tmp\n" + "#endif\n" + " RET\n" + " ENDSUB\n"; + + void init(void *p, int s, int x, int y) { + *(uint32_t *)p = 0xbad; + } + void expect(void *p, int s, int x, int y) { + switch (x) { + case 0: + *(uint32_t *)p = 0xce6c8eef; + break; + case 1: + *(uint32_t *)p = 0xdeadbeef; + break; + case 2: + *(uint32_t *)p = 0x11111111; + break; + case 3: + *(uint32_t *)p = 0x10011001; + break; + case 4: + *(uint32_t *)p = 0xdfbdbfff; + break; + case 5: + *(uint32_t *)p = 0x11111111; + break; + case 6: + *(uint32_t *)p = 0x11111111; + break; + case 7: + *(uint32_t *)p = 0xdeadbeef; + break; + case 8: + *(uint32_t *)p = 0xdeadbeef; + break; + case 9: + *(uint32_t *)p = 0x11111111; + break; + } + } + + printf("- %s (%s)\n", __func__, global ? "global" : "local"); + + init_prog(ctx, 40, 0, 0, src, + (global ? "-DTARGET_GLOBAL" : "-DTARGET_LOCAL")); + init_tex(ctx, 0, PIPE_BUFFER, true, PIPE_FORMAT_R32_FLOAT, + 40, 0, init); + init_compute_resources(ctx, (int []) { 0, -1 }); + launch_grid(ctx, (uint []){10, 1, 1}, (uint []){1, 1, 1}, 0, NULL); + check_tex(ctx, 0, expect, NULL); + destroy_compute_resources(ctx); + destroy_tex(ctx); + destroy_prog(ctx); +} + +static void test_atom_race(struct context *ctx, bool global) +{ + const char *src = "COMP\n" + "#ifdef TARGET_GLOBAL\n" + "#define target RES[0]\n" + "#else\n" + "#define target RLOCAL\n" + "#endif\n" + "" + "DCL RES[0], BUFFER, RAW, WR\n" + "" + "#define blockid SV[0]\n" + "DCL blockid, BLOCK_ID[0]\n" + "#define blocksz SV[1]\n" + "DCL blocksz, BLOCK_SIZE[0]\n" + "#define threadid SV[2]\n" + "DCL threadid, THREAD_ID[0]\n" + "" + "#define offset TEMP[0]\n" + "DCL offset, LOCAL\n" + "#define arg TEMP[1]\n" + "DCL arg, LOCAL\n" + "#define count TEMP[2]\n" + "DCL count, LOCAL\n" + "#define vlocal TEMP[3]\n" + "DCL vlocal, LOCAL\n" + "#define vshared TEMP[4]\n" + "DCL vshared, LOCAL\n" + "#define last TEMP[5]\n" + "DCL last, LOCAL\n" + "#define tmp0 TEMP[6]\n" + "DCL tmp0, LOCAL\n" + "#define tmp1 TEMP[7]\n" + "DCL tmp1, LOCAL\n" + "" + "#define k0 IMM[0]\n" + "IMM UINT32 { 0, 0, 0, 0 }\n" + "#define k1 IMM[1]\n" + "IMM UINT32 { 1, 0, 0, 0 }\n" + "#define k4 IMM[2]\n" + "IMM UINT32 { 4, 0, 0, 0 }\n" + "#define k32 IMM[3]\n" + "IMM UINT32 { 32, 0, 0, 0 }\n" + "#define k128 IMM[4]\n" + "IMM UINT32 { 128, 0, 0, 0 }\n" + "#define kdeadcafe IMM[5]\n" + "IMM UINT32 { 3735931646, 0, 0, 0 }\n" + "#define kallowed_set IMM[6]\n" + "IMM UINT32 { 559035650, 0, 0, 0 }\n" + "#define k11111111 IMM[7]\n" + "IMM UINT32 { 286331153, 0, 0, 0 }\n" + "\n" + " BGNSUB\n" + " MOV offset.x, threadid\n" + "#ifdef TARGET_GLOBAL\n" + " UMUL tmp0.x, blockid, blocksz\n" + " UADD offset.x, offset, tmp0\n" + "#endif\n" + " UMUL offset.x, offset, k4\n" + " USLT tmp0.x, threadid, k32\n" + " STORE target.x, offset, k0\n" + " BARRIER\n" + " IF tmp0\n" + " MOV vlocal.x, k0\n" + " MOV arg.x, kdeadcafe\n" + " BGNLOOP\n" + " INEG arg.x, arg\n" + " ATOMUADD vshared.x, target, offset, arg\n" + " SFENCE target\n" + " USNE tmp0.x, vshared, vlocal\n" + " IF tmp0\n" + " BRK\n" + " ENDIF\n" + " UADD vlocal.x, vlocal, arg\n" + " ENDLOOP\n" + " UADD vlocal.x, vshared, arg\n" + " LOAD vshared.x, target, offset\n" + " USEQ tmp0.x, vshared, vlocal\n" + " STORE target.x, offset, tmp0\n" + " ELSE\n" + " UADD offset.x, offset, -k128\n" + " MOV count.x, k0\n" + " MOV last.x, k0\n" + " BGNLOOP\n" + " LOAD vshared.x, target, offset\n" + " USEQ tmp0.x, vshared, kallowed_set.xxxx\n" + " USEQ tmp1.x, vshared, kallowed_set.yyyy\n" + " OR tmp0.x, tmp0, tmp1\n" + " IF tmp0\n" + " USEQ tmp0.x, vshared, last\n" + " IF tmp0\n" + " CONT\n" + " ENDIF\n" + " MOV last.x, vshared\n" + " ELSE\n" + " END\n" + " ENDIF\n" + " UADD count.x, count, k1\n" + " USEQ tmp0.x, count, k128\n" + " IF tmp0\n" + " BRK\n" + " ENDIF\n" + " ENDLOOP\n" + " ATOMXCHG tmp0.x, target, offset, k11111111\n" + " UADD offset.x, offset, k128\n" + " ATOMXCHG tmp0.x, target, offset, k11111111\n" + " SFENCE target\n" + " ENDIF\n" + "#ifdef TARGET_LOCAL\n" + " LOAD tmp0.x, RLOCAL, offset\n" + " UMUL tmp1.x, blockid, blocksz\n" + " UMUL tmp1.x, tmp1, k4\n" + " UADD offset.x, offset, tmp1\n" + " STORE RES[0].x, offset, tmp0\n" + "#endif\n" + " RET\n" + " ENDSUB\n"; + + void init(void *p, int s, int x, int y) { + *(uint32_t *)p = 0xdeadbeef; + } + void expect(void *p, int s, int x, int y) { + *(uint32_t *)p = x & 0x20 ? 0x11111111 : 0xffffffff; + } + + printf("- %s (%s)\n", __func__, global ? "global" : "local"); + + init_prog(ctx, 256, 0, 0, src, + (global ? "-DTARGET_GLOBAL" : "-DTARGET_LOCAL")); + init_tex(ctx, 0, PIPE_BUFFER, true, PIPE_FORMAT_R32_FLOAT, + 4096, 0, init); + init_compute_resources(ctx, (int []) { 0, -1 }); + launch_grid(ctx, (uint []){64, 1, 1}, (uint []){16, 1, 1}, 0, NULL); + check_tex(ctx, 0, expect, NULL); + destroy_compute_resources(ctx); + destroy_tex(ctx); + destroy_prog(ctx); +} + +int main(int argc, char *argv[]) +{ + struct context *ctx = CALLOC_STRUCT(context); + + init_ctx(ctx); + test_system_values(ctx); + test_resource_access(ctx); + test_function_calls(ctx); + test_input_global(ctx); + test_private(ctx); + test_local(ctx); + test_sample(ctx); + test_many_kern(ctx); + test_constant(ctx); + test_resource_indirect(ctx); + test_surface_ld(ctx); + test_surface_st(ctx); + test_barrier(ctx); + test_atom_ops(ctx, true); + test_atom_race(ctx, true); + test_atom_ops(ctx, false); + test_atom_race(ctx, false); + destroy_ctx(ctx); + + return 0; +} diff --git a/src/gallium/tests/trivial/quad-tex.c b/src/gallium/tests/trivial/quad-tex.c index cc19e8d5eec..7caac29299f 100644 --- a/src/gallium/tests/trivial/quad-tex.c +++ b/src/gallium/tests/trivial/quad-tex.c @@ -57,16 +57,12 @@ #include "util/u_memory.h" /* util_make_[fragment|vertex]_passthrough_shader */ #include "util/u_simple_shaders.h" - -/* sw_screen_create: to get a software pipe driver */ -#include "target-helpers/inline_sw_helper.h" -/* debug_screen_wrap: to wrap with debug pipe drivers */ -#include "target-helpers/inline_debug_helper.h" -/* null software winsys */ -#include "sw/null/null_sw_winsys.h" +/* to get a hardware pipe driver */ +#include "pipe-loader/pipe_loader.h" struct program { + struct pipe_loader_device *dev; struct pipe_screen *screen; struct pipe_context *pipe; struct cso_context *cso; @@ -93,10 +89,15 @@ struct program static void init_prog(struct program *p) { struct pipe_surface surf_tmpl; - /* create the software rasterizer */ - p->screen = sw_screen_create(null_sw_create()); - /* wrap the screen with any debugger */ - p->screen = debug_screen_wrap(p->screen); + int ret; + + /* find a hardware device */ + ret = pipe_loader_probe(&p->dev, 1); + assert(ret); + + /* init a pipe screen */ + p->screen = pipe_loader_create_screen(p->dev, PIPE_SEARCH_DIR); + assert(p->screen); /* create the pipe driver context and cso context */ p->pipe = p->screen->context_create(p->screen, NULL); @@ -298,6 +299,7 @@ static void close_prog(struct program *p) cso_destroy_context(p->cso); p->pipe->destroy(p->pipe); p->screen->destroy(p->screen); + pipe_loader_release(&p->dev, 1); FREE(p); } diff --git a/src/gallium/tests/trivial/tri.c b/src/gallium/tests/trivial/tri.c index 9190f7824e9..f3e1e944154 100644 --- a/src/gallium/tests/trivial/tri.c +++ b/src/gallium/tests/trivial/tri.c @@ -55,16 +55,12 @@ #include "util/u_memory.h" /* util_make_[fragment|vertex]_passthrough_shader */ #include "util/u_simple_shaders.h" - -/* sw_screen_create: to get a software pipe driver */ -#include "target-helpers/inline_sw_helper.h" -/* debug_screen_wrap: to wrap with debug pipe drivers */ -#include "target-helpers/inline_debug_helper.h" -/* null software winsys */ -#include "sw/null/null_sw_winsys.h" +/* to get a hardware pipe driver */ +#include "pipe-loader/pipe_loader.h" struct program { + struct pipe_loader_device *dev; struct pipe_screen *screen; struct pipe_context *pipe; struct cso_context *cso; @@ -88,10 +84,15 @@ struct program static void init_prog(struct program *p) { struct pipe_surface surf_tmpl; - /* create the software rasterizer */ - p->screen = sw_screen_create(null_sw_create()); - /* wrap the screen with any debugger */ - p->screen = debug_screen_wrap(p->screen); + int ret; + + /* find a hardware device */ + ret = pipe_loader_probe(&p->dev, 1); + assert(ret); + + /* init a pipe screen */ + p->screen = pipe_loader_create_screen(p->dev, PIPE_SEARCH_DIR); + assert(p->screen); /* create the pipe driver context and cso context */ p->pipe = p->screen->context_create(p->screen, NULL); @@ -234,6 +235,7 @@ static void close_prog(struct program *p) cso_destroy_context(p->cso); p->pipe->destroy(p->pipe); p->screen->destroy(p->screen); + pipe_loader_release(&p->dev, 1); FREE(p); } diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c b/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c index 4d343b8489b..fc57d676876 100644 --- a/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c +++ b/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c @@ -81,6 +81,10 @@ #define RADEON_INFO_IB_VM_MAX_SIZE 0x0f #endif +#ifndef RADEON_INFO_MAX_PIPES +#define RADEON_INFO_MAX_PIPES 0x10 +#endif + /* Enable/disable feature access for one command stream. * If enable == TRUE, return TRUE on success. @@ -299,6 +303,12 @@ static boolean do_winsys_init(struct radeon_drm_winsys *ws) ws->info.r600_has_streamout = ws->info.drm_minor >= 13; } + /* Get max pipes, this is only needed for compute shaders. All evergreen+ + * chips have at least 2 pipes, so we use 2 as a default. */ + ws->info.r600_max_pipes = 2; + radeon_get_drm_value(ws->fd, RADEON_INFO_MAX_PIPES, NULL, + &ws->info.r600_max_pipes); + return TRUE; } diff --git a/src/gallium/winsys/radeon/drm/radeon_winsys.h b/src/gallium/winsys/radeon/drm/radeon_winsys.h index 99768248644..6f85b3e11d2 100644 --- a/src/gallium/winsys/radeon/drm/radeon_winsys.h +++ b/src/gallium/winsys/radeon/drm/radeon_winsys.h @@ -98,6 +98,7 @@ struct radeon_info { uint32_t r600_va_start; uint32_t r600_ib_vm_max_size; boolean r600_has_streamout; + uint32_t r600_max_pipes; }; enum radeon_feature_id { |