diff options
author | George Kyriazis <[email protected]> | 2018-03-19 19:05:38 -0500 |
---|---|---|
committer | George Kyriazis <[email protected]> | 2018-04-18 10:51:38 -0500 |
commit | 8f848ada8a42d9aaa8136afa1bafe32281a0fb48 (patch) | |
tree | 6f2fc66cf63a9f53ba6e3e58e678702f1211c680 /src/gallium/drivers/swr/rasterizer/codegen | |
parent | ffc0aeb4ec90464ac124a4209e0027b34148833c (diff) |
swr/rast: Start refactoring of builder/packetizer.
Move x86 intrinsic lowering to a separate pass. Builder now instantiates
generic intrinsics for features not supported by llvm. The separate x86
lowering pass is responsible for lowering to valid x86 for the target
SIMD architecture. Currently it's a port of existing code to get it
up and running quickly. Will eventually support optimized x86 for AVX,
AVX2 and AVX512.
Reviewed-by: Bruce Cherniak <[email protected]>
Diffstat (limited to 'src/gallium/drivers/swr/rasterizer/codegen')
3 files changed, 41 insertions, 30 deletions
diff --git a/src/gallium/drivers/swr/rasterizer/codegen/gen_llvm_ir_macros.py b/src/gallium/drivers/swr/rasterizer/codegen/gen_llvm_ir_macros.py index 324f24a3557..bdd785a155d 100644 --- a/src/gallium/drivers/swr/rasterizer/codegen/gen_llvm_ir_macros.py +++ b/src/gallium/drivers/swr/rasterizer/codegen/gen_llvm_ir_macros.py @@ -42,28 +42,28 @@ inst_aliases = { } intrinsics = [ - ['VGATHERPD', 'x86_avx2_gather_d_pd_256', ['src', 'pBase', 'indices', 'mask', 'scale']], - ['VGATHERPS', 'x86_avx2_gather_d_ps_256', ['src', 'pBase', 'indices', 'mask', 'scale']], - ['VGATHERPS_16', 'x86_avx512_gather_dps_512', ['src', 'pBase', 'indices', 'mask', 'scale']], - ['VGATHERDD', 'x86_avx2_gather_d_d_256', ['src', 'pBase', 'indices', 'mask', 'scale']], - ['VGATHERDD_16', 'x86_avx512_gather_dpi_512', ['src', 'pBase', 'indices', 'mask', 'scale']], - ['VRCPPS', 'x86_avx_rcp_ps_256', ['a']], - ['VROUND', 'x86_avx_round_ps_256', ['a', 'rounding']], - ['BEXTR_32', 'x86_bmi_bextr_32', ['src', 'control']], - ['VPSHUFB', 'x86_avx2_pshuf_b', ['a', 'b']], - ['VPERMD', 'x86_avx2_permd', ['a', 'idx']], - ['VPERMPS', 'x86_avx2_permps', ['idx', 'a']], - ['VCVTPD2PS', 'x86_avx_cvt_pd2_ps_256', ['a']], - ['VCVTPH2PS', 'x86_vcvtph2ps_256', ['a']], - ['VCVTPS2PH', 'x86_vcvtps2ph_256', ['a', 'round']], - ['VHSUBPS', 'x86_avx_hsub_ps_256', ['a', 'b']], - ['VPTESTC', 'x86_avx_ptestc_256', ['a', 'b']], - ['VPTESTZ', 'x86_avx_ptestz_256', ['a', 'b']], - ['VFMADDPS', 'x86_fma_vfmadd_ps_256', ['a', 'b', 'c']], - ['VMOVMSKPS', 'x86_avx_movmsk_ps_256', ['a']], - ['VPHADDD', 'x86_avx2_phadd_d', ['a', 'b']], - ['PDEP32', 'x86_bmi_pdep_32', ['a', 'b']], - ['RDTSC', 'x86_rdtsc', []], + ['VGATHERPD', 'x86_avx2_gather_d_pd_256', ['src', 'pBase', 'indices', 'mask', 'scale'], 'mSimd4FP64Ty'], + ['VGATHERPS', 'x86_avx2_gather_d_ps_256', ['src', 'pBase', 'indices', 'mask', 'scale'], 'mSimdFP32Ty'], + ['VGATHERPS_16', 'x86_avx512_gather_dps_512', ['src', 'pBase', 'indices', 'mask', 'scale'], 'mSimd16FP32Ty'], + ['VGATHERDD', 'x86_avx2_gather_d_d_256', ['src', 'pBase', 'indices', 'mask', 'scale'], 'mSimdInt32Ty'], + ['VGATHERDD_16', 'x86_avx512_gather_dpi_512', ['src', 'pBase', 'indices', 'mask', 'scale'], 'mSimd16Int32Ty'], + ['VRCPPS', 'x86_avx_rcp_ps_256', ['a'], 'mSimdFP32Ty'], + ['VROUND', 'x86_avx_round_ps_256', ['a', 'rounding'], 'mSimdFP32Ty'], + ['BEXTR_32', 'x86_bmi_bextr_32', ['src', 'control'], 'mInt32Ty'], + ['VPSHUFB', 'x86_avx2_pshuf_b', ['a', 'b'], 'mSimd32Int8Ty'], + ['VPERMD', 'x86_avx2_permd', ['a', 'idx'], 'mSimdInt32Ty'], + ['VPERMPS', 'x86_avx2_permps', ['idx', 'a'], 'mSimdFP32Ty'], + ['VCVTPD2PS', 'x86_avx_cvt_pd2_ps_256', ['a'], 'mSimdFP32Ty'], + ['VCVTPH2PS', 'x86_vcvtph2ps_256', ['a'], 'mSimdFP32Ty'], + ['VCVTPS2PH', 'x86_vcvtps2ph_256', ['a', 'round'], 'mSimdFP16Ty'], + ['VHSUBPS', 'x86_avx_hsub_ps_256', ['a', 'b'], 'mSimdFP32Ty'], + ['VPTESTC', 'x86_avx_ptestc_256', ['a', 'b'], 'mInt32Ty'], + ['VPTESTZ', 'x86_avx_ptestz_256', ['a', 'b'], 'mInt32Ty'], + ['VFMADDPS', 'x86_fma_vfmadd_ps_256', ['a', 'b', 'c'], 'mSimdFP32Ty'], + ['VMOVMSKPS', 'x86_avx_movmsk_ps_256', ['a'], 'mInt32Ty'], + ['VPHADDD', 'x86_avx2_phadd_d', ['a', 'b'], 'mSimdInt32Ty'], + ['PDEP32', 'x86_bmi_pdep_32', ['a', 'b'], 'mInt32Ty'], + ['RDTSC', 'x86_rdtsc', [], 'mInt64Ty'], ] llvm_intrinsics = [ @@ -223,8 +223,8 @@ def generate_gen_h(functions, output_dir): ''' Auto-generates macros for LLVM IR ''' -def generate_x86_h(output_dir): - filename = 'gen_builder_x86.hpp' +def generate_meta_h(output_dir): + filename = 'gen_builder_meta.hpp' output_filename = os.path.join(output_dir, filename) functions = [] @@ -238,15 +238,17 @@ def generate_x86_h(output_dir): functions.append({ 'decl' : decl, + 'name' : inst[0], 'intrin' : inst[1], 'args' : inst[2], + 'returnType': inst[3] }) MakoTemplateWriter.to_file( template, output_filename, cmdline=sys.argv, - comment='x86 intrinsics', + comment='meta intrinsics', filename=filename, functions=functions, isX86=True, isIntrin=False) @@ -291,7 +293,7 @@ def main(): parser.add_argument('--input', '-i', type=FileType('r'), help='Path to IRBuilder.h', required=False) parser.add_argument('--output-dir', '-o', action='store', dest='output', help='Path to output directory', required=True) parser.add_argument('--gen_h', help='Generate builder_gen.h', action='store_true', default=False) - parser.add_argument('--gen_x86_h', help='Generate x86 intrinsics. No input is needed.', action='store_true', default=False) + parser.add_argument('--gen_meta_h', help='Generate meta intrinsics. No input is needed.', action='store_true', default=False) parser.add_argument('--gen_intrin_h', help='Generate llvm intrinsics. No input is needed.', action='store_true', default=False) args = parser.parse_args() @@ -307,8 +309,8 @@ def main(): elif args.gen_h: print('Need to specify --input for --gen_h!') - if args.gen_x86_h: - generate_x86_h(args.output) + if args.gen_meta_h: + generate_meta_h(args.output) if args.gen_intrin_h: generate_intrin_h(args.output) diff --git a/src/gallium/drivers/swr/rasterizer/codegen/meson.build b/src/gallium/drivers/swr/rasterizer/codegen/meson.build index bbe6efff01a..841540e0f30 100644 --- a/src/gallium/drivers/swr/rasterizer/codegen/meson.build +++ b/src/gallium/drivers/swr/rasterizer/codegen/meson.build @@ -44,7 +44,7 @@ gen_knobs_h = custom_target( # The generators above this are needed individually, while the below generators # are all inputs to the same lib, so they don't need unique names. files_swr_common += [ - gen_builder_hpp, gen_builder_x86_hpp, gen_knobs_h, gen_knobs_cpp + gen_builder_hpp, gen_builder_meta_hpp, gen_knobs_h, gen_knobs_cpp ] foreach x : [[swr_context_files, 'gen_swr_context_llvm.h'], diff --git a/src/gallium/drivers/swr/rasterizer/codegen/templates/gen_builder.hpp b/src/gallium/drivers/swr/rasterizer/codegen/templates/gen_builder.hpp index 5a47c9aa105..bcbcb30cc14 100644 --- a/src/gallium/drivers/swr/rasterizer/codegen/templates/gen_builder.hpp +++ b/src/gallium/drivers/swr/rasterizer/codegen/templates/gen_builder.hpp @@ -40,7 +40,16 @@ ${func['decl']} { %if isX86: - Function * pFunc = Intrinsic::getDeclaration(JM()->mpCurrentModule, Intrinsic::${func['intrin']}); + %if len(func['args']) != 0: + SmallVector<Type*, ${len(func['args'])}> argTypes; + %for arg in func['args']: + argTypes.push_back(${arg}->getType()); + %endfor + FunctionType* pFuncTy = FunctionType::get(${ func['returnType'] }, argTypes, false); + %else: + FunctionType* pFuncTy = FunctionType::get(${ func['returnType'] }, {}, false); + %endif: + Function* pFunc = cast<Function>(JM()->mpCurrentModule->getOrInsertFunction("meta.intrinsic.${func['name']}", pFuncTy)); return CALL(pFunc, std::initializer_list<Value*>{${argList}}, name); %elif isIntrin: %if len(func['types']) != 0: |