summaryrefslogtreecommitdiffstats
path: root/src/gallium/drivers/swr/rasterizer/codegen
diff options
context:
space:
mode:
authorGeorge Kyriazis <[email protected]>2018-03-19 19:05:38 -0500
committerGeorge Kyriazis <[email protected]>2018-04-18 10:51:38 -0500
commit8f848ada8a42d9aaa8136afa1bafe32281a0fb48 (patch)
tree6f2fc66cf63a9f53ba6e3e58e678702f1211c680 /src/gallium/drivers/swr/rasterizer/codegen
parentffc0aeb4ec90464ac124a4209e0027b34148833c (diff)
swr/rast: Start refactoring of builder/packetizer.
Move x86 intrinsic lowering to a separate pass. Builder now instantiates generic intrinsics for features not supported by llvm. The separate x86 lowering pass is responsible for lowering to valid x86 for the target SIMD architecture. Currently it's a port of existing code to get it up and running quickly. Will eventually support optimized x86 for AVX, AVX2 and AVX512. Reviewed-by: Bruce Cherniak <[email protected]>
Diffstat (limited to 'src/gallium/drivers/swr/rasterizer/codegen')
-rw-r--r--src/gallium/drivers/swr/rasterizer/codegen/gen_llvm_ir_macros.py58
-rw-r--r--src/gallium/drivers/swr/rasterizer/codegen/meson.build2
-rw-r--r--src/gallium/drivers/swr/rasterizer/codegen/templates/gen_builder.hpp11
3 files changed, 41 insertions, 30 deletions
diff --git a/src/gallium/drivers/swr/rasterizer/codegen/gen_llvm_ir_macros.py b/src/gallium/drivers/swr/rasterizer/codegen/gen_llvm_ir_macros.py
index 324f24a3557..bdd785a155d 100644
--- a/src/gallium/drivers/swr/rasterizer/codegen/gen_llvm_ir_macros.py
+++ b/src/gallium/drivers/swr/rasterizer/codegen/gen_llvm_ir_macros.py
@@ -42,28 +42,28 @@ inst_aliases = {
}
intrinsics = [
- ['VGATHERPD', 'x86_avx2_gather_d_pd_256', ['src', 'pBase', 'indices', 'mask', 'scale']],
- ['VGATHERPS', 'x86_avx2_gather_d_ps_256', ['src', 'pBase', 'indices', 'mask', 'scale']],
- ['VGATHERPS_16', 'x86_avx512_gather_dps_512', ['src', 'pBase', 'indices', 'mask', 'scale']],
- ['VGATHERDD', 'x86_avx2_gather_d_d_256', ['src', 'pBase', 'indices', 'mask', 'scale']],
- ['VGATHERDD_16', 'x86_avx512_gather_dpi_512', ['src', 'pBase', 'indices', 'mask', 'scale']],
- ['VRCPPS', 'x86_avx_rcp_ps_256', ['a']],
- ['VROUND', 'x86_avx_round_ps_256', ['a', 'rounding']],
- ['BEXTR_32', 'x86_bmi_bextr_32', ['src', 'control']],
- ['VPSHUFB', 'x86_avx2_pshuf_b', ['a', 'b']],
- ['VPERMD', 'x86_avx2_permd', ['a', 'idx']],
- ['VPERMPS', 'x86_avx2_permps', ['idx', 'a']],
- ['VCVTPD2PS', 'x86_avx_cvt_pd2_ps_256', ['a']],
- ['VCVTPH2PS', 'x86_vcvtph2ps_256', ['a']],
- ['VCVTPS2PH', 'x86_vcvtps2ph_256', ['a', 'round']],
- ['VHSUBPS', 'x86_avx_hsub_ps_256', ['a', 'b']],
- ['VPTESTC', 'x86_avx_ptestc_256', ['a', 'b']],
- ['VPTESTZ', 'x86_avx_ptestz_256', ['a', 'b']],
- ['VFMADDPS', 'x86_fma_vfmadd_ps_256', ['a', 'b', 'c']],
- ['VMOVMSKPS', 'x86_avx_movmsk_ps_256', ['a']],
- ['VPHADDD', 'x86_avx2_phadd_d', ['a', 'b']],
- ['PDEP32', 'x86_bmi_pdep_32', ['a', 'b']],
- ['RDTSC', 'x86_rdtsc', []],
+ ['VGATHERPD', 'x86_avx2_gather_d_pd_256', ['src', 'pBase', 'indices', 'mask', 'scale'], 'mSimd4FP64Ty'],
+ ['VGATHERPS', 'x86_avx2_gather_d_ps_256', ['src', 'pBase', 'indices', 'mask', 'scale'], 'mSimdFP32Ty'],
+ ['VGATHERPS_16', 'x86_avx512_gather_dps_512', ['src', 'pBase', 'indices', 'mask', 'scale'], 'mSimd16FP32Ty'],
+ ['VGATHERDD', 'x86_avx2_gather_d_d_256', ['src', 'pBase', 'indices', 'mask', 'scale'], 'mSimdInt32Ty'],
+ ['VGATHERDD_16', 'x86_avx512_gather_dpi_512', ['src', 'pBase', 'indices', 'mask', 'scale'], 'mSimd16Int32Ty'],
+ ['VRCPPS', 'x86_avx_rcp_ps_256', ['a'], 'mSimdFP32Ty'],
+ ['VROUND', 'x86_avx_round_ps_256', ['a', 'rounding'], 'mSimdFP32Ty'],
+ ['BEXTR_32', 'x86_bmi_bextr_32', ['src', 'control'], 'mInt32Ty'],
+ ['VPSHUFB', 'x86_avx2_pshuf_b', ['a', 'b'], 'mSimd32Int8Ty'],
+ ['VPERMD', 'x86_avx2_permd', ['a', 'idx'], 'mSimdInt32Ty'],
+ ['VPERMPS', 'x86_avx2_permps', ['idx', 'a'], 'mSimdFP32Ty'],
+ ['VCVTPD2PS', 'x86_avx_cvt_pd2_ps_256', ['a'], 'mSimdFP32Ty'],
+ ['VCVTPH2PS', 'x86_vcvtph2ps_256', ['a'], 'mSimdFP32Ty'],
+ ['VCVTPS2PH', 'x86_vcvtps2ph_256', ['a', 'round'], 'mSimdFP16Ty'],
+ ['VHSUBPS', 'x86_avx_hsub_ps_256', ['a', 'b'], 'mSimdFP32Ty'],
+ ['VPTESTC', 'x86_avx_ptestc_256', ['a', 'b'], 'mInt32Ty'],
+ ['VPTESTZ', 'x86_avx_ptestz_256', ['a', 'b'], 'mInt32Ty'],
+ ['VFMADDPS', 'x86_fma_vfmadd_ps_256', ['a', 'b', 'c'], 'mSimdFP32Ty'],
+ ['VMOVMSKPS', 'x86_avx_movmsk_ps_256', ['a'], 'mInt32Ty'],
+ ['VPHADDD', 'x86_avx2_phadd_d', ['a', 'b'], 'mSimdInt32Ty'],
+ ['PDEP32', 'x86_bmi_pdep_32', ['a', 'b'], 'mInt32Ty'],
+ ['RDTSC', 'x86_rdtsc', [], 'mInt64Ty'],
]
llvm_intrinsics = [
@@ -223,8 +223,8 @@ def generate_gen_h(functions, output_dir):
'''
Auto-generates macros for LLVM IR
'''
-def generate_x86_h(output_dir):
- filename = 'gen_builder_x86.hpp'
+def generate_meta_h(output_dir):
+ filename = 'gen_builder_meta.hpp'
output_filename = os.path.join(output_dir, filename)
functions = []
@@ -238,15 +238,17 @@ def generate_x86_h(output_dir):
functions.append({
'decl' : decl,
+ 'name' : inst[0],
'intrin' : inst[1],
'args' : inst[2],
+ 'returnType': inst[3]
})
MakoTemplateWriter.to_file(
template,
output_filename,
cmdline=sys.argv,
- comment='x86 intrinsics',
+ comment='meta intrinsics',
filename=filename,
functions=functions,
isX86=True, isIntrin=False)
@@ -291,7 +293,7 @@ def main():
parser.add_argument('--input', '-i', type=FileType('r'), help='Path to IRBuilder.h', required=False)
parser.add_argument('--output-dir', '-o', action='store', dest='output', help='Path to output directory', required=True)
parser.add_argument('--gen_h', help='Generate builder_gen.h', action='store_true', default=False)
- parser.add_argument('--gen_x86_h', help='Generate x86 intrinsics. No input is needed.', action='store_true', default=False)
+ parser.add_argument('--gen_meta_h', help='Generate meta intrinsics. No input is needed.', action='store_true', default=False)
parser.add_argument('--gen_intrin_h', help='Generate llvm intrinsics. No input is needed.', action='store_true', default=False)
args = parser.parse_args()
@@ -307,8 +309,8 @@ def main():
elif args.gen_h:
print('Need to specify --input for --gen_h!')
- if args.gen_x86_h:
- generate_x86_h(args.output)
+ if args.gen_meta_h:
+ generate_meta_h(args.output)
if args.gen_intrin_h:
generate_intrin_h(args.output)
diff --git a/src/gallium/drivers/swr/rasterizer/codegen/meson.build b/src/gallium/drivers/swr/rasterizer/codegen/meson.build
index bbe6efff01a..841540e0f30 100644
--- a/src/gallium/drivers/swr/rasterizer/codegen/meson.build
+++ b/src/gallium/drivers/swr/rasterizer/codegen/meson.build
@@ -44,7 +44,7 @@ gen_knobs_h = custom_target(
# The generators above this are needed individually, while the below generators
# are all inputs to the same lib, so they don't need unique names.
files_swr_common += [
- gen_builder_hpp, gen_builder_x86_hpp, gen_knobs_h, gen_knobs_cpp
+ gen_builder_hpp, gen_builder_meta_hpp, gen_knobs_h, gen_knobs_cpp
]
foreach x : [[swr_context_files, 'gen_swr_context_llvm.h'],
diff --git a/src/gallium/drivers/swr/rasterizer/codegen/templates/gen_builder.hpp b/src/gallium/drivers/swr/rasterizer/codegen/templates/gen_builder.hpp
index 5a47c9aa105..bcbcb30cc14 100644
--- a/src/gallium/drivers/swr/rasterizer/codegen/templates/gen_builder.hpp
+++ b/src/gallium/drivers/swr/rasterizer/codegen/templates/gen_builder.hpp
@@ -40,7 +40,16 @@
${func['decl']}
{
%if isX86:
- Function * pFunc = Intrinsic::getDeclaration(JM()->mpCurrentModule, Intrinsic::${func['intrin']});
+ %if len(func['args']) != 0:
+ SmallVector<Type*, ${len(func['args'])}> argTypes;
+ %for arg in func['args']:
+ argTypes.push_back(${arg}->getType());
+ %endfor
+ FunctionType* pFuncTy = FunctionType::get(${ func['returnType'] }, argTypes, false);
+ %else:
+ FunctionType* pFuncTy = FunctionType::get(${ func['returnType'] }, {}, false);
+ %endif:
+ Function* pFunc = cast<Function>(JM()->mpCurrentModule->getOrInsertFunction("meta.intrinsic.${func['name']}", pFuncTy));
return CALL(pFunc, std::initializer_list<Value*>{${argList}}, name);
%elif isIntrin:
%if len(func['types']) != 0: