aboutsummaryrefslogtreecommitdiffstats
path: root/src/gallium/drivers/radeon
diff options
context:
space:
mode:
Diffstat (limited to 'src/gallium/drivers/radeon')
-rw-r--r--src/gallium/drivers/radeon/AMDGPU.h41
-rw-r--r--src/gallium/drivers/radeon/AMDGPUConstants.pm44
-rw-r--r--src/gallium/drivers/radeon/AMDGPUConvertToISA.cpp2
-rw-r--r--src/gallium/drivers/radeon/AMDGPUGenInstrEnums.pl27
-rw-r--r--src/gallium/drivers/radeon/AMDGPUGenShaderPatterns.pl30
-rw-r--r--src/gallium/drivers/radeon/AMDGPUISelLowering.cpp4
-rw-r--r--src/gallium/drivers/radeon/AMDGPUISelLowering.h5
-rw-r--r--src/gallium/drivers/radeon/AMDGPUInstrInfo.cpp5
-rw-r--r--src/gallium/drivers/radeon/AMDGPUInstrInfo.h31
-rw-r--r--src/gallium/drivers/radeon/AMDGPUInstructions.td37
-rw-r--r--src/gallium/drivers/radeon/AMDGPUIntrinsics.td22
-rw-r--r--src/gallium/drivers/radeon/AMDGPULowerInstructions.cpp14
-rw-r--r--src/gallium/drivers/radeon/AMDGPULowerShaderInstructions.cpp38
-rw-r--r--src/gallium/drivers/radeon/AMDGPULowerShaderInstructions.h40
-rw-r--r--src/gallium/drivers/radeon/AMDGPURegisterInfo.cpp4
-rw-r--r--src/gallium/drivers/radeon/AMDGPURegisterInfo.h5
-rw-r--r--src/gallium/drivers/radeon/AMDGPURegisterInfo.td4
-rw-r--r--src/gallium/drivers/radeon/AMDGPUReorderPreloadInstructions.cpp66
-rw-r--r--src/gallium/drivers/radeon/AMDGPUTargetMachine.cpp29
-rw-r--r--src/gallium/drivers/radeon/AMDGPUTargetMachine.h7
-rw-r--r--src/gallium/drivers/radeon/AMDGPUUtil.cpp47
-rw-r--r--src/gallium/drivers/radeon/AMDGPUUtil.h25
-rw-r--r--src/gallium/drivers/radeon/AMDIL.h25
-rw-r--r--src/gallium/drivers/radeon/AMDIL.td2
-rw-r--r--src/gallium/drivers/radeon/AMDIL7XXDevice.cpp2
-rw-r--r--src/gallium/drivers/radeon/AMDILBase.td5
-rw-r--r--src/gallium/drivers/radeon/AMDILCFGStructurizer.cpp33
-rw-r--r--src/gallium/drivers/radeon/AMDILCodeEmitter.h10
-rw-r--r--src/gallium/drivers/radeon/AMDILConversions.td2
-rw-r--r--src/gallium/drivers/radeon/AMDILDevice.cpp2
-rw-r--r--src/gallium/drivers/radeon/AMDILDeviceInfo.cpp10
-rw-r--r--src/gallium/drivers/radeon/AMDILDeviceInfo.h6
-rw-r--r--src/gallium/drivers/radeon/AMDILDevices.h2
-rw-r--r--src/gallium/drivers/radeon/AMDILEnumeratedTypes.td2
-rw-r--r--src/gallium/drivers/radeon/AMDILEvergreenDevice.cpp2
-rw-r--r--src/gallium/drivers/radeon/AMDILISelDAGToDAG.cpp102
-rw-r--r--src/gallium/drivers/radeon/AMDILISelLowering.cpp57
-rw-r--r--src/gallium/drivers/radeon/AMDILInstrInfo.cpp302
-rw-r--r--src/gallium/drivers/radeon/AMDILInstrInfo.h69
-rw-r--r--src/gallium/drivers/radeon/AMDILInstructions.td2
-rw-r--r--src/gallium/drivers/radeon/AMDILMCCodeEmitter.cpp158
-rw-r--r--src/gallium/drivers/radeon/AMDILMachinePeephole.cpp19
-rw-r--r--src/gallium/drivers/radeon/AMDILMultiClass.td2
-rw-r--r--src/gallium/drivers/radeon/AMDILNIDevice.cpp2
-rw-r--r--src/gallium/drivers/radeon/AMDILPeepholeOptimizer.cpp133
-rw-r--r--src/gallium/drivers/radeon/AMDILRegisterInfo.cpp7
-rw-r--r--src/gallium/drivers/radeon/AMDILSIDevice.cpp84
-rw-r--r--src/gallium/drivers/radeon/AMDILSIDevice.h74
-rw-r--r--src/gallium/drivers/radeon/AMDILSubtarget.cpp5
-rw-r--r--src/gallium/drivers/radeon/AMDILSubtarget.h2
-rw-r--r--src/gallium/drivers/radeon/AMDILTargetMachine.cpp10
-rw-r--r--src/gallium/drivers/radeon/AMDILTokenDesc.td2
-rw-r--r--src/gallium/drivers/radeon/AMDILUtilityFunctions.cpp683
-rw-r--r--src/gallium/drivers/radeon/AMDILUtilityFunctions.h186
-rw-r--r--src/gallium/drivers/radeon/AMDILVersion.td2
-rw-r--r--src/gallium/drivers/radeon/Makefile12
-rw-r--r--src/gallium/drivers/radeon/Makefile.sources9
-rw-r--r--src/gallium/drivers/radeon/R600CodeEmitter.cpp125
-rw-r--r--src/gallium/drivers/radeon/R600GenRegisterInfo.pl36
-rw-r--r--src/gallium/drivers/radeon/R600ISelLowering.cpp54
-rw-r--r--src/gallium/drivers/radeon/R600ISelLowering.h4
-rw-r--r--src/gallium/drivers/radeon/R600InstrInfo.cpp16
-rw-r--r--src/gallium/drivers/radeon/R600InstrInfo.h5
-rw-r--r--src/gallium/drivers/radeon/R600Instructions.td169
-rw-r--r--src/gallium/drivers/radeon/R600IntrinsicsNoOpenCL.td (renamed from src/gallium/drivers/radeon/R600Intrinsics.td)4
-rw-r--r--src/gallium/drivers/radeon/R600IntrinsicsOpenCL.td (renamed from src/gallium/drivers/radeon/R600InstrFormats.td)8
-rw-r--r--src/gallium/drivers/radeon/R600KernelParameters.cpp259
-rw-r--r--src/gallium/drivers/radeon/R600KernelParameters.h28
-rw-r--r--src/gallium/drivers/radeon/R600LowerInstructions.cpp19
-rw-r--r--src/gallium/drivers/radeon/R600LowerShaderInstructions.cpp143
-rw-r--r--src/gallium/drivers/radeon/R600MachineFunctionInfo.cpp16
-rw-r--r--src/gallium/drivers/radeon/R600MachineFunctionInfo.h33
-rw-r--r--src/gallium/drivers/radeon/R600OpenCLUtils.h49
-rw-r--r--src/gallium/drivers/radeon/R600RegisterInfo.cpp23
-rw-r--r--src/gallium/drivers/radeon/R600RegisterInfo.h4
-rw-r--r--src/gallium/drivers/radeon/R600Schedule.td6
-rw-r--r--src/gallium/drivers/radeon/SIAssignInterpRegs.cpp9
-rw-r--r--src/gallium/drivers/radeon/SICodeEmitter.cpp10
-rw-r--r--src/gallium/drivers/radeon/SIGenRegisterInfo.pl11
-rw-r--r--src/gallium/drivers/radeon/SIISelLowering.cpp5
-rw-r--r--src/gallium/drivers/radeon/SIISelLowering.h4
-rw-r--r--src/gallium/drivers/radeon/SIInstrFormats.td14
-rw-r--r--src/gallium/drivers/radeon/SIInstrInfo.cpp6
-rw-r--r--src/gallium/drivers/radeon/SIInstrInfo.h4
-rw-r--r--src/gallium/drivers/radeon/SIInstrInfo.td13
-rw-r--r--src/gallium/drivers/radeon/SIInstructions.td16
-rw-r--r--src/gallium/drivers/radeon/SIIntrinsics.td4
-rw-r--r--src/gallium/drivers/radeon/SILowerShaderInstructions.cpp90
-rw-r--r--src/gallium/drivers/radeon/SIMachineFunctionInfo.cpp6
-rw-r--r--src/gallium/drivers/radeon/SIMachineFunctionInfo.h6
-rw-r--r--src/gallium/drivers/radeon/SIPropagateImmReads.cpp6
-rw-r--r--src/gallium/drivers/radeon/SIRegisterInfo.cpp4
-rw-r--r--src/gallium/drivers/radeon/SIRegisterInfo.h4
-rw-r--r--src/gallium/drivers/radeon/SISchedule.td4
-rw-r--r--src/gallium/drivers/radeon/radeon_llvm.h38
-rw-r--r--src/gallium/drivers/radeon/radeon_llvm_emit.cpp13
-rw-r--r--src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c450
97 files changed, 1753 insertions, 2517 deletions
diff --git a/src/gallium/drivers/radeon/AMDGPU.h b/src/gallium/drivers/radeon/AMDGPU.h
index eff002a5eae..0f42cb744d3 100644
--- a/src/gallium/drivers/radeon/AMDGPU.h
+++ b/src/gallium/drivers/radeon/AMDGPU.h
@@ -1,4 +1,4 @@
-//===-- AMDGPU.h - TODO: Add brief description -------===//
+//===-- AMDGPU.h - MachineFunction passes hw codegen --------------*- C++ -*-=//
//
// The LLVM Compiler Infrastructure
//
@@ -6,10 +6,6 @@
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
-//
-// TODO: Add full description
-//
-//===----------------------------------------------------------------------===//
#ifndef AMDGPU_H
#define AMDGPU_H
@@ -19,29 +15,24 @@
#include "llvm/Target/TargetMachine.h"
namespace llvm {
- class FunctionPass;
- class AMDGPUTargetMachine;
-
- FunctionPass *createR600CodeEmitterPass(formatted_raw_ostream &OS);
- FunctionPass *createR600LowerShaderInstructionsPass(TargetMachine &tm);
- FunctionPass *createR600LowerInstructionsPass(TargetMachine &tm);
-
- FunctionPass *createSIAssignInterpRegsPass(TargetMachine &tm);
- FunctionPass *createSIInitMachineFunctionInfoPass(TargetMachine &tm);
- FunctionPass *createSILowerShaderInstructionsPass(TargetMachine &tm);
- FunctionPass *createSIPropagateImmReadsPass(TargetMachine &tm);
- FunctionPass *createSICodeEmitterPass(formatted_raw_ostream &OS);
- FunctionPass *createAMDGPUReorderPreloadInstructionsPass(TargetMachine &tm);
+class FunctionPass;
+class AMDGPUTargetMachine;
- FunctionPass *createAMDGPULowerInstructionsPass(TargetMachine &tm);
- FunctionPass *createAMDGPULowerShaderInstructionsPass(TargetMachine &tm);
+// R600 Passes
+FunctionPass* createR600KernelParametersPass(const TargetData* TD);
+FunctionPass *createR600CodeEmitterPass(formatted_raw_ostream &OS);
+FunctionPass *createR600LowerInstructionsPass(TargetMachine &tm);
- FunctionPass *createAMDGPUDelimitInstGroupsPass(TargetMachine &tm);
+// SI Passes
+FunctionPass *createSIAssignInterpRegsPass(TargetMachine &tm);
+FunctionPass *createSIPropagateImmReadsPass(TargetMachine &tm);
+FunctionPass *createSICodeEmitterPass(formatted_raw_ostream &OS);
- FunctionPass *createAMDGPUConvertToISAPass(TargetMachine &tm);
+// Passes common to R600 and SI
+FunctionPass *createAMDGPULowerInstructionsPass(TargetMachine &tm);
+FunctionPass *createAMDGPUConvertToISAPass(TargetMachine &tm);
- FunctionPass *createAMDGPUFixRegClassesPass(TargetMachine &tm);
+} // End namespace llvm
-} /* End namespace llvm */
-#endif /* AMDGPU_H */
+#endif // AMDGPU_H
diff --git a/src/gallium/drivers/radeon/AMDGPUConstants.pm b/src/gallium/drivers/radeon/AMDGPUConstants.pm
deleted file mode 100644
index b64ff49c187..00000000000
--- a/src/gallium/drivers/radeon/AMDGPUConstants.pm
+++ /dev/null
@@ -1,44 +0,0 @@
-#===-- AMDGPUConstants.pm - TODO: Add brief description -------===#
-#
-# The LLVM Compiler Infrastructure
-#
-# This file is distributed under the University of Illinois Open Source
-# License. See LICENSE.TXT for details.
-#
-#===----------------------------------------------------------------------===#
-#
-# TODO: Add full description
-#
-#===----------------------------------------------------------------------===#
-
-package AMDGPUConstants;
-
-use base 'Exporter';
-
-use constant CONST_REG_COUNT => 256;
-use constant TEMP_REG_COUNT => 128;
-
-our @EXPORT = ('TEMP_REG_COUNT', 'CONST_REG_COUNT', 'get_hw_index', 'get_chan_str');
-
-sub get_hw_index {
- my ($index) = @_;
- return int($index / 4);
-}
-
-sub get_chan_str {
- my ($index) = @_;
- my $chan = $index % 4;
- if ($chan == 0 ) {
- return 'X';
- } elsif ($chan == 1) {
- return 'Y';
- } elsif ($chan == 2) {
- return 'Z';
- } elsif ($chan == 3) {
- return 'W';
- } else {
- die("Unknown chan value: $chan");
- }
-}
-
-1;
diff --git a/src/gallium/drivers/radeon/AMDGPUConvertToISA.cpp b/src/gallium/drivers/radeon/AMDGPUConvertToISA.cpp
index ce947f8ff78..8e82b8438bb 100644
--- a/src/gallium/drivers/radeon/AMDGPUConvertToISA.cpp
+++ b/src/gallium/drivers/radeon/AMDGPUConvertToISA.cpp
@@ -34,7 +34,7 @@ namespace {
virtual bool runOnMachineFunction(MachineFunction &MF);
};
-} /* End anonymous namespace */
+} // End anonymous namespace
char AMDGPUConvertToISAPass::ID = 0;
diff --git a/src/gallium/drivers/radeon/AMDGPUGenInstrEnums.pl b/src/gallium/drivers/radeon/AMDGPUGenInstrEnums.pl
index 1fd4fb04b3e..130eaac72bc 100644
--- a/src/gallium/drivers/radeon/AMDGPUGenInstrEnums.pl
+++ b/src/gallium/drivers/radeon/AMDGPUGenInstrEnums.pl
@@ -1,15 +1,32 @@
-#===-- AMDGPUGenInstrEnums.pl - TODO: Add brief description -------===#
+#===-- AMDGPUGenInstrEnums.pl - Script for generating instruction enums ----===#
#
# The LLVM Compiler Infrastructure
#
# This file is distributed under the University of Illinois Open Source
# License. See LICENSE.TXT for details.
#
-#===----------------------------------------------------------------------===#
+#===-----------------------------------------------------------------------===#
#
-# TODO: Add full description
+# This perl script is used to generate the following files:
#
-#===----------------------------------------------------------------------===#
+# 1. perl AMDGPUGenInstrEnums.pl td > AMDGPUInstrEnums.td
+#
+# This file contains Tablegen constants used for matching hw instructions
+# from R600 and SI with functionally similar AMDIL instruction. It aslo
+# contains definitions of floating point constants like pi (in hex notation)
+# that are used in some of the shader patterns.
+#
+# 2. perl AMDGPUGenInstrEnums.pl h > AMDGPUInstrEnums.h
+#
+# This file contains cpp enums that match the constant values in
+# AMDGPUInstrEnums.td
+#
+# 3. perl AMDGPUGenInstrEnums.pl inc > AMDGPUInstrEnums.include
+#
+# This file contains a function called GetRealAMDILOpcode which maps the
+# constant values defined in AMDGPUInstrEnums.h to the corresponding AMDIL
+# instructions.
+#===-----------------------------------------------------------------------===#
use warnings;
use strict;
@@ -41,7 +58,7 @@ my $FILE_TYPE = $ARGV[0];
open AMDIL, '<', 'AMDILInstructions.td';
-my @INST_ENUMS = ('NONE', 'FEQ', 'FGE', 'FLT', 'FNE', 'MOVE_f32', 'MOVE_i32', 'FTOI', 'ITOF', 'CMOVLOG_f32', 'UGT', 'IGE', 'INE', 'UGE', 'IEQ');
+my @INST_ENUMS = ('NONE', 'FEQ', 'FGE', 'FLT', 'FNE', 'MOVE_f32', 'MOVE_i32', 'FTOI', 'ITOF', 'CMOVLOG_f32', 'UGT', 'IGE', 'INE', 'UGE', 'IEQ', 'BINARY_OR_i32', 'BINARY_NOT_i32');
while (<AMDIL>) {
if ($_ =~ /defm\s+([A-Z_]+)\s+:\s+([A-Za-z0-9]+)</) {
diff --git a/src/gallium/drivers/radeon/AMDGPUGenShaderPatterns.pl b/src/gallium/drivers/radeon/AMDGPUGenShaderPatterns.pl
deleted file mode 100644
index 60523a7b48f..00000000000
--- a/src/gallium/drivers/radeon/AMDGPUGenShaderPatterns.pl
+++ /dev/null
@@ -1,30 +0,0 @@
-#===-- AMDGPUGenShaderPatterns.pl - TODO: Add brief description -------===#
-#
-# The LLVM Compiler Infrastructure
-#
-# This file is distributed under the University of Illinois Open Source
-# License. See LICENSE.TXT for details.
-#
-#===----------------------------------------------------------------------===#
-#
-# TODO: Add full description
-#
-#===----------------------------------------------------------------------===#
-
-use strict;
-use warnings;
-
-use AMDGPUConstants;
-
-my $reg_prefix = $ARGV[0];
-
-for (my $i = 0; $i < CONST_REG_COUNT * 4; $i++) {
- my $index = get_hw_index($i);
- my $chan = get_chan_str($i);
-print <<STRING;
-def : Pat <
- (int_AMDGPU_load_const $i),
- (f32 (MOV (f32 $reg_prefix$index\_$chan)))
->;
-STRING
-}
diff --git a/src/gallium/drivers/radeon/AMDGPUISelLowering.cpp b/src/gallium/drivers/radeon/AMDGPUISelLowering.cpp
index 2c1052fd8ea..2bdc8a759f2 100644
--- a/src/gallium/drivers/radeon/AMDGPUISelLowering.cpp
+++ b/src/gallium/drivers/radeon/AMDGPUISelLowering.cpp
@@ -1,4 +1,4 @@
-//===-- AMDGPUISelLowering.cpp - TODO: Add brief description -------===//
+//===-- AMDGPUISelLowering.cpp - AMDGPU Common DAG lowering functions -----===//
//
// The LLVM Compiler Infrastructure
//
@@ -7,7 +7,7 @@
//
//===----------------------------------------------------------------------===//
//
-// TODO: Add full description
+// This is the parent TargetLowering class for hardware code gen targets.
//
//===----------------------------------------------------------------------===//
diff --git a/src/gallium/drivers/radeon/AMDGPUISelLowering.h b/src/gallium/drivers/radeon/AMDGPUISelLowering.h
index 3c5beb1cdae..1b3f71006e2 100644
--- a/src/gallium/drivers/radeon/AMDGPUISelLowering.h
+++ b/src/gallium/drivers/radeon/AMDGPUISelLowering.h
@@ -1,4 +1,4 @@
-//===-- AMDGPUISelLowering.h - TODO: Add brief description -------===//
+//===-- AMDGPUISelLowering.h - AMDGPU Lowering Interface --------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
@@ -7,7 +7,8 @@
//
//===----------------------------------------------------------------------===//
//
-// TODO: Add full description
+// This file contains the interface defintiion of the TargetLowering class
+// that is common to all AMD GPUs.
//
//===----------------------------------------------------------------------===//
diff --git a/src/gallium/drivers/radeon/AMDGPUInstrInfo.cpp b/src/gallium/drivers/radeon/AMDGPUInstrInfo.cpp
index 4742283f688..ecd8ac90526 100644
--- a/src/gallium/drivers/radeon/AMDGPUInstrInfo.cpp
+++ b/src/gallium/drivers/radeon/AMDGPUInstrInfo.cpp
@@ -108,9 +108,4 @@ unsigned AMDGPUInstrInfo::getISAOpcode(unsigned opcode) const
}
}
-bool AMDGPUInstrInfo::isRegPreload(const MachineInstr &MI) const
-{
- return (get(MI.getOpcode()).TSFlags >> AMDGPU_TFLAG_SHIFTS::PRELOAD_REG) & 0x1;
-}
-
#include "AMDGPUInstrEnums.include"
diff --git a/src/gallium/drivers/radeon/AMDGPUInstrInfo.h b/src/gallium/drivers/radeon/AMDGPUInstrInfo.h
index fa009bc6302..930b41e7191 100644
--- a/src/gallium/drivers/radeon/AMDGPUInstrInfo.h
+++ b/src/gallium/drivers/radeon/AMDGPUInstrInfo.h
@@ -1,4 +1,4 @@
-//===-- AMDGPUInstrInfo.h - TODO: Add brief description -------===//
+//===-- AMDGPUInstrInfo.h - AMDGPU Instruction Information ------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
@@ -7,7 +7,8 @@
//
//===----------------------------------------------------------------------===//
//
-// TODO: Add full description
+// This file contains the definitoin of a TargetInstrInfo class that is common
+// to all AMD GPUs.
//
//===----------------------------------------------------------------------===//
@@ -21,17 +22,17 @@
namespace llvm {
- class AMDGPUTargetMachine;
- class MachineFunction;
- class MachineInstr;
- class MachineInstrBuilder;
+class AMDGPUTargetMachine;
+class MachineFunction;
+class MachineInstr;
+class MachineInstrBuilder;
- class AMDGPUInstrInfo : public AMDILInstrInfo {
- private:
+class AMDGPUInstrInfo : public AMDILInstrInfo {
+private:
AMDGPUTargetMachine & TM;
std::map<unsigned, unsigned> amdilToISA;
- public:
+public:
explicit AMDGPUInstrInfo(AMDGPUTargetMachine &tm);
virtual const AMDGPURegisterInfo &getRegisterInfo() const = 0;
@@ -41,19 +42,9 @@ namespace llvm {
virtual MachineInstr * convertToISA(MachineInstr & MI, MachineFunction &MF,
DebugLoc DL) const;
- bool isRegPreload(const MachineInstr &MI) const;
-
#include "AMDGPUInstrEnums.h.include"
- };
+};
} // End llvm namespace
-/* AMDGPU target flags are stored in bits 32-39 */
-namespace AMDGPU_TFLAG_SHIFTS {
- enum TFLAGS {
- PRELOAD_REG = 32
- };
-}
-
-
#endif // AMDGPUINSTRINFO_H_
diff --git a/src/gallium/drivers/radeon/AMDGPUInstructions.td b/src/gallium/drivers/radeon/AMDGPUInstructions.td
index 0433c8dcd95..f689356e488 100644
--- a/src/gallium/drivers/radeon/AMDGPUInstructions.td
+++ b/src/gallium/drivers/radeon/AMDGPUInstructions.td
@@ -1,4 +1,4 @@
-//===-- AMDGPUInstructions.td - TODO: Add brief description -------===//
+//===-- AMDGPUInstructions.td - Common instruction defs ---*- tablegen -*-===//
//
// The LLVM Compiler Infrastructure
//
@@ -7,7 +7,8 @@
//
//===----------------------------------------------------------------------===//
//
-// TODO: Add full description
+// This file contains instruction defs that are common to all hw codegen
+// targets.
//
//===----------------------------------------------------------------------===//
@@ -16,14 +17,12 @@ include "AMDGPUInstrEnums.td"
class AMDGPUInst <dag outs, dag ins, string asm, list<dag> pattern> : Instruction {
field bits<16> AMDILOp = 0;
field bits<3> Gen = 0;
- field bit PreloadReg = 0;
let Namespace = "AMDIL";
let OutOperandList = outs;
let InOperandList = ins;
let AsmString = asm;
let Pattern = pattern;
- let TSFlags{32} = PreloadReg;
let TSFlags{42-40} = Gen;
let TSFlags{63-48} = AMDILOp;
}
@@ -37,42 +36,12 @@ class AMDGPUShaderInst <dag outs, dag ins, string asm, list<dag> pattern>
let isCodeGenOnly = 1 in {
- def EXPORT_REG : AMDGPUShaderInst <
- (outs),
- (ins GPRF32:$src),
- "EXPORT_REG $src",
- [(int_AMDGPU_export_reg GPRF32:$src)]
- >;
-
- def LOAD_INPUT : AMDGPUShaderInst <
- (outs GPRF32:$dst),
- (ins i32imm:$src),
- "LOAD_INPUT $dst, $src",
- [] >{
- let PreloadReg = 1;
- }
-
def MASK_WRITE : AMDGPUShaderInst <
(outs),
(ins GPRF32:$src),
"MASK_WRITE $src",
[]
>;
-
- def RESERVE_REG : AMDGPUShaderInst <
- (outs GPRF32:$dst),
- (ins i32imm:$src),
- "RESERVE_REG $dst, $src",
- [(set GPRF32:$dst, (int_AMDGPU_reserve_reg imm:$src))]> {
- let PreloadReg = 1;
- }
-
- def STORE_OUTPUT: AMDGPUShaderInst <
- (outs GPRF32:$dst),
- (ins GPRF32:$src0, i32imm:$src1),
- "STORE_OUTPUT $dst, $src0, $src1",
- [(set GPRF32:$dst, (int_AMDGPU_store_output GPRF32:$src0, imm:$src1))]
- >;
}
/* Generic helper patterns for intrinsics */
diff --git a/src/gallium/drivers/radeon/AMDGPUIntrinsics.td b/src/gallium/drivers/radeon/AMDGPUIntrinsics.td
index d2cda0db936..398fd11431f 100644
--- a/src/gallium/drivers/radeon/AMDGPUIntrinsics.td
+++ b/src/gallium/drivers/radeon/AMDGPUIntrinsics.td
@@ -1,4 +1,4 @@
-//===-- AMDGPUIntrinsics.td - TODO: Add brief description -------===//
+//===-- AMDGPUIntrinsics.td - Common intrinsics -*- tablegen -*-----------===//
//
// The LLVM Compiler Infrastructure
//
@@ -7,17 +7,16 @@
//
//===----------------------------------------------------------------------===//
//
-// TODO: Add full description
+// This file defines intrinsics that are used by all hw codegen targets.
//
//===----------------------------------------------------------------------===//
let TargetPrefix = "AMDGPU", isTarget = 1 in {
- def int_AMDGPU_export_reg : Intrinsic<[], [llvm_float_ty], []>;
def int_AMDGPU_load_const : Intrinsic<[llvm_float_ty], [llvm_i32_ty], []>;
def int_AMDGPU_load_imm : Intrinsic<[llvm_v4f32_ty], [llvm_i32_ty], []>;
- def int_AMDGPU_reserve_reg : Intrinsic<[llvm_float_ty], [llvm_i32_ty], []>;
- def int_AMDGPU_store_output : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_i32_ty], []>;
+ def int_AMDGPU_reserve_reg : Intrinsic<[], [llvm_i32_ty], []>;
+ def int_AMDGPU_store_output : Intrinsic<[], [llvm_float_ty, llvm_i32_ty], []>;
def int_AMDGPU_swizzle : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_i32_ty], []>;
def int_AMDGPU_arl : Intrinsic<[llvm_i32_ty], [llvm_float_ty], []>;
@@ -26,7 +25,7 @@ let TargetPrefix = "AMDGPU", isTarget = 1 in {
def int_AMDGPU_div : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], []>;
def int_AMDGPU_dp4 : Intrinsic<[llvm_float_ty], [llvm_v4f32_ty, llvm_v4f32_ty], []>;
def int_AMDGPU_floor : Intrinsic<[llvm_float_ty], [llvm_float_ty], []>;
- def int_AMDGPU_kill : Intrinsic<[llvm_float_ty], [llvm_float_ty], []>;
+ def int_AMDGPU_kill : Intrinsic<[], [llvm_float_ty], []>;
def int_AMDGPU_kilp : Intrinsic<[], [], []>;
def int_AMDGPU_lrp : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty, llvm_float_ty], []>;
def int_AMDGPU_mul : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], []>;
@@ -35,7 +34,7 @@ let TargetPrefix = "AMDGPU", isTarget = 1 in {
def int_AMDGPU_rsq : Intrinsic<[llvm_float_ty], [llvm_float_ty], []>;
def int_AMDGPU_seq : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], []>;
def int_AMDGPU_sgt : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], []>;
- def int_AMDGPU_sge : BinaryIntFloat;
+ def int_AMDGPU_sge : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], []>;
def int_AMDGPU_sin : Intrinsic<[llvm_float_ty], [llvm_float_ty], []>;
def int_AMDGPU_sle : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], []>;
def int_AMDGPU_sne : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], []>;
@@ -43,9 +42,18 @@ let TargetPrefix = "AMDGPU", isTarget = 1 in {
def int_AMDGPU_mullit : Intrinsic<[llvm_v4f32_ty], [llvm_float_ty, llvm_float_ty, llvm_float_ty], []>;
def int_AMDGPU_tex : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty], []>;
def int_AMDGPU_txb : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty], []>;
+ def int_AMDGPU_txf : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty], []>;
+ def int_AMDGPU_txq : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty], []>;
def int_AMDGPU_txd : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty], []>;
def int_AMDGPU_txl : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty], []>;
def int_AMDGPU_trunc : Intrinsic<[llvm_float_ty], [llvm_float_ty], []>;
+ def int_AMDGPU_ddx : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty], []>;
+ def int_AMDGPU_ddy : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty], []>;
+ def int_AMDGPU_imax : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], []>;
+ def int_AMDGPU_imin : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], []>;
+ def int_AMDGPU_umax : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], []>;
+ def int_AMDGPU_umin : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], []>;
+ def int_AMDGPU_cube : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty], []>;
}
let TargetPrefix = "TGSI", isTarget = 1 in {
diff --git a/src/gallium/drivers/radeon/AMDGPULowerInstructions.cpp b/src/gallium/drivers/radeon/AMDGPULowerInstructions.cpp
index b49d0dddf65..2e455fea8ab 100644
--- a/src/gallium/drivers/radeon/AMDGPULowerInstructions.cpp
+++ b/src/gallium/drivers/radeon/AMDGPULowerInstructions.cpp
@@ -1,4 +1,4 @@
-//===-- AMDGPULowerInstructions.cpp - TODO: Add brief description -------===//
+//===-- AMDGPULowerInstructions.cpp - AMDGPU lowering pass ----------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -7,7 +7,8 @@
//
//===----------------------------------------------------------------------===//
//
-// TODO: Add full description
+// This pass lowers unsupported AMDIL MachineInstrs to LLVM pseudo
+// MachineInstrs for hw codegen targets.
//
//===----------------------------------------------------------------------===//
@@ -27,7 +28,7 @@ namespace {
private:
static char ID;
TargetMachine &TM;
- void lowerVCREATE_v4f32(MachineInstr &MI, MachineBasicBlock::iterator I,
+ void lowerVCREATE_v4(MachineInstr &MI, MachineBasicBlock::iterator I,
MachineBasicBlock &MBB, MachineFunction &MF);
public:
@@ -56,8 +57,9 @@ bool AMDGPULowerInstructionsPass::runOnMachineFunction(MachineFunction &MF)
switch (MI.getOpcode()) {
default: continue;
- case AMDIL::VCREATE_v4f32: lowerVCREATE_v4f32(MI, I, MBB, MF); break;
-
+ case AMDIL::VCREATE_v4f32:
+ case AMDIL::VCREATE_v4i32:
+ lowerVCREATE_v4(MI, I, MBB, MF); break;
}
MI.eraseFromParent();
}
@@ -65,7 +67,7 @@ bool AMDGPULowerInstructionsPass::runOnMachineFunction(MachineFunction &MF)
return false;
}
-void AMDGPULowerInstructionsPass::lowerVCREATE_v4f32(MachineInstr &MI,
+void AMDGPULowerInstructionsPass::lowerVCREATE_v4(MachineInstr &MI,
MachineBasicBlock::iterator I, MachineBasicBlock &MBB, MachineFunction &MF)
{
MachineRegisterInfo & MRI = MF.getRegInfo();
diff --git a/src/gallium/drivers/radeon/AMDGPULowerShaderInstructions.cpp b/src/gallium/drivers/radeon/AMDGPULowerShaderInstructions.cpp
deleted file mode 100644
index d33055ccb87..00000000000
--- a/src/gallium/drivers/radeon/AMDGPULowerShaderInstructions.cpp
+++ /dev/null
@@ -1,38 +0,0 @@
-//===-- AMDGPULowerShaderInstructions.cpp - TODO: Add brief description -------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// TODO: Add full description
-//
-//===----------------------------------------------------------------------===//
-
-
-#include "AMDGPULowerShaderInstructions.h"
-#include "llvm/CodeGen/MachineFunction.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/Target/TargetInstrInfo.h"
-
-using namespace llvm;
-
-void AMDGPULowerShaderInstructionsPass::preloadRegister(MachineFunction * MF,
- const TargetInstrInfo * TII, unsigned physReg, unsigned virtReg) const
-{
- if (!MRI->isLiveIn(physReg)) {
- MRI->addLiveIn(physReg, virtReg);
- MachineBasicBlock &EntryMBB = MF->front();
- BuildMI(MF->front(), EntryMBB.begin(), DebugLoc(), TII->get(TargetOpcode::COPY),
- virtReg)
- .addReg(physReg);
- } else {
- /* We can't mark the same register as preloaded twice, but we still must
- * associate virtReg with the correct preloaded register. */
- unsigned newReg = MRI->getLiveInVirtReg(physReg);
- MRI->replaceRegWith(virtReg, newReg);
- }
-}
diff --git a/src/gallium/drivers/radeon/AMDGPULowerShaderInstructions.h b/src/gallium/drivers/radeon/AMDGPULowerShaderInstructions.h
deleted file mode 100644
index 5ee77fafe2b..00000000000
--- a/src/gallium/drivers/radeon/AMDGPULowerShaderInstructions.h
+++ /dev/null
@@ -1,40 +0,0 @@
-//===-- AMDGPULowerShaderInstructions.h - TODO: Add brief description -------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// TODO: Add full description
-//
-//===----------------------------------------------------------------------===//
-
-
-#ifndef AMDGPU_LOWER_SHADER_INSTRUCTIONS
-#define AMDGPU_LOWER_SHADER_INSTRUCTIONS
-
-namespace llvm {
-
-class MachineFunction;
-class MachineRegisterInfo;
-class TargetInstrInfo;
-
-class AMDGPULowerShaderInstructionsPass {
-
- protected:
- MachineRegisterInfo * MRI;
- /**
- * @param physReg The physical register that will be preloaded.
- * @param virtReg The virtual register that currently holds the
- * preloaded value.
- */
- void preloadRegister(MachineFunction * MF, const TargetInstrInfo * TII,
- unsigned physReg, unsigned virtReg) const;
-};
-
-} // end namespace llvm
-
-
-#endif // AMDGPU_LOWER_SHADER_INSTRUCTIONS
diff --git a/src/gallium/drivers/radeon/AMDGPURegisterInfo.cpp b/src/gallium/drivers/radeon/AMDGPURegisterInfo.cpp
index 162a49116a0..ad48335fd33 100644
--- a/src/gallium/drivers/radeon/AMDGPURegisterInfo.cpp
+++ b/src/gallium/drivers/radeon/AMDGPURegisterInfo.cpp
@@ -1,4 +1,4 @@
-//===-- AMDGPURegisterInfo.cpp - TODO: Add brief description -------===//
+//===-- AMDGPURegisterInfo.cpp - AMDGPU Register Information -------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -7,7 +7,7 @@
//
//===----------------------------------------------------------------------===//
//
-// TODO: Add full description
+// Parent TargetRegisterInfo class common to all hw codegen targets.
//
//===----------------------------------------------------------------------===//
diff --git a/src/gallium/drivers/radeon/AMDGPURegisterInfo.h b/src/gallium/drivers/radeon/AMDGPURegisterInfo.h
index f4492e9795d..d545c06f69e 100644
--- a/src/gallium/drivers/radeon/AMDGPURegisterInfo.h
+++ b/src/gallium/drivers/radeon/AMDGPURegisterInfo.h
@@ -1,4 +1,4 @@
-//===-- AMDGPURegisterInfo.h - TODO: Add brief description -------===//
+//===-- AMDGPURegisterInfo.h - AMDGPURegisterInfo Interface -*- C++ -*-----===//
//
// The LLVM Compiler Infrastructure
//
@@ -7,7 +7,8 @@
//
//===----------------------------------------------------------------------===//
//
-// TODO: Add full description
+// This file contains the TargetRegisterInfo interface that is implemented
+// by all hw codegen targets.
//
//===----------------------------------------------------------------------===//
diff --git a/src/gallium/drivers/radeon/AMDGPURegisterInfo.td b/src/gallium/drivers/radeon/AMDGPURegisterInfo.td
index 173d6622569..1707903ae7e 100644
--- a/src/gallium/drivers/radeon/AMDGPURegisterInfo.td
+++ b/src/gallium/drivers/radeon/AMDGPURegisterInfo.td
@@ -1,4 +1,4 @@
-//===-- AMDGPURegisterInfo.td - TODO: Add brief description -------===//
+//===-- AMDGPURegisterInfo.td - AMDGPU register info -------*- tablegen -*-===//
//
// The LLVM Compiler Infrastructure
//
@@ -7,7 +7,7 @@
//
//===----------------------------------------------------------------------===//
//
-// TODO: Add full description
+// Tablegen register definitions common to all hw codegen targets.
//
//===----------------------------------------------------------------------===//
diff --git a/src/gallium/drivers/radeon/AMDGPUReorderPreloadInstructions.cpp b/src/gallium/drivers/radeon/AMDGPUReorderPreloadInstructions.cpp
deleted file mode 100644
index c923f19c39f..00000000000
--- a/src/gallium/drivers/radeon/AMDGPUReorderPreloadInstructions.cpp
+++ /dev/null
@@ -1,66 +0,0 @@
-//===-- AMDGPUReorderPreloadInstructions.cpp - TODO: Add brief description -------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// TODO: Add full description
-//
-//===----------------------------------------------------------------------===//
-
-#include "AMDGPU.h"
-#include "AMDIL.h"
-#include "AMDILInstrInfo.h"
-#include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/Function.h"
-
-using namespace llvm;
-
-namespace {
- class AMDGPUReorderPreloadInstructionsPass : public MachineFunctionPass {
-
- private:
- static char ID;
- TargetMachine &TM;
-
- public:
- AMDGPUReorderPreloadInstructionsPass(TargetMachine &tm) :
- MachineFunctionPass(ID), TM(tm) { }
-
- bool runOnMachineFunction(MachineFunction &MF);
-
- const char *getPassName() const { return "AMDGPU Reorder Preload Instructions"; }
- };
-} /* End anonymous namespace */
-
-char AMDGPUReorderPreloadInstructionsPass::ID = 0;
-
-FunctionPass *llvm::createAMDGPUReorderPreloadInstructionsPass(TargetMachine &tm) {
- return new AMDGPUReorderPreloadInstructionsPass(tm);
-}
-
-/* This pass moves instructions that represent preloaded registers to the
- * start of the program. */
-bool AMDGPUReorderPreloadInstructionsPass::runOnMachineFunction(MachineFunction &MF)
-{
- const AMDGPUInstrInfo * TII =
- static_cast<const AMDGPUInstrInfo*>(TM.getInstrInfo());
-
- for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end();
- BB != BB_E; ++BB) {
- MachineBasicBlock &MBB = *BB;
- for (MachineBasicBlock::iterator I = MBB.begin(), Next = llvm::next(I);
- I != MBB.end(); I = Next, Next = llvm::next(I) ) {
- MachineInstr &MI = *I;
- if (TII->isRegPreload(MI)) {
- MF.front().insert(MF.front().begin(), MI.removeFromParent());
- }
- }
- }
- return false;
-}
diff --git a/src/gallium/drivers/radeon/AMDGPUTargetMachine.cpp b/src/gallium/drivers/radeon/AMDGPUTargetMachine.cpp
index 313349ce01b..c1c21abc9c1 100644
--- a/src/gallium/drivers/radeon/AMDGPUTargetMachine.cpp
+++ b/src/gallium/drivers/radeon/AMDGPUTargetMachine.cpp
@@ -1,4 +1,4 @@
-//===-- AMDGPUTargetMachine.cpp - TODO: Add brief description -------===//
+//===-- AMDGPUTargetMachine.cpp - TargetMachine for hw codegen targets-----===//
//
// The LLVM Compiler Infrastructure
//
@@ -7,7 +7,8 @@
//
//===----------------------------------------------------------------------===//
//
-// TODO: Add full description
+// The AMDGPU target machine contains all of the hardware specific information
+// needed to emit code for R600 and SI GPUs.
//
//===----------------------------------------------------------------------===//
@@ -16,7 +17,6 @@
#include "AMDILTargetMachine.h"
#include "R600ISelLowering.h"
#include "R600InstrInfo.h"
-#include "R600KernelParameters.h"
#include "SIISelLowering.h"
#include "SIInstrInfo.h"
#include "llvm/Analysis/Passes.h"
@@ -112,31 +112,28 @@ AMDGPUPassConfig::addPreISel()
{
const AMDILSubtarget &ST = TM->getSubtarget<AMDILSubtarget>();
if (ST.device()->getGeneration() <= AMDILDeviceInfo::HD6XXX) {
- PM.add(createR600KernelParametersPass(
+ PM->add(createR600KernelParametersPass(
getAMDGPUTargetMachine().getTargetData()));
}
return false;
}
bool AMDGPUPassConfig::addInstSelector() {
- PM.add(createAMDILPeepholeOpt(*TM));
- PM.add(createAMDILISelDag(getAMDGPUTargetMachine()));
+ PM->add(createAMDILPeepholeOpt(*TM));
+ PM->add(createAMDILISelDag(getAMDGPUTargetMachine()));
return false;
}
bool AMDGPUPassConfig::addPreRegAlloc() {
const AMDILSubtarget &ST = TM->getSubtarget<AMDILSubtarget>();
- PM.add(createAMDGPUReorderPreloadInstructionsPass(*TM));
if (ST.device()->getGeneration() <= AMDILDeviceInfo::HD6XXX) {
- PM.add(createR600LowerShaderInstructionsPass(*TM));
- PM.add(createR600LowerInstructionsPass(*TM));
+ PM->add(createR600LowerInstructionsPass(*TM));
} else {
- PM.add(createSILowerShaderInstructionsPass(*TM));
- PM.add(createSIAssignInterpRegsPass(*TM));
+ PM->add(createSIAssignInterpRegsPass(*TM));
}
- PM.add(createAMDGPULowerInstructionsPass(*TM));
- PM.add(createAMDGPUConvertToISAPass(*TM));
+ PM->add(createAMDGPULowerInstructionsPass(*TM));
+ PM->add(createAMDGPUConvertToISAPass(*TM));
return false;
}
@@ -150,10 +147,10 @@ bool AMDGPUPassConfig::addPreSched2() {
bool AMDGPUPassConfig::addPreEmitPass() {
const AMDILSubtarget &ST = TM->getSubtarget<AMDILSubtarget>();
- PM.add(createAMDILCFGPreparationPass(*TM));
- PM.add(createAMDILCFGStructurizerPass(*TM));
+ PM->add(createAMDILCFGPreparationPass(*TM));
+ PM->add(createAMDILCFGStructurizerPass(*TM));
if (ST.device()->getGeneration() == AMDILDeviceInfo::HD7XXX) {
- PM.add(createSIPropagateImmReadsPass(*TM));
+ PM->add(createSIPropagateImmReadsPass(*TM));
}
return false;
diff --git a/src/gallium/drivers/radeon/AMDGPUTargetMachine.h b/src/gallium/drivers/radeon/AMDGPUTargetMachine.h
index d4165b09e84..2428fe638a7 100644
--- a/src/gallium/drivers/radeon/AMDGPUTargetMachine.h
+++ b/src/gallium/drivers/radeon/AMDGPUTargetMachine.h
@@ -1,4 +1,4 @@
-//===-- AMDGPUTargetMachine.h - TODO: Add brief description -------===//
+//===-- AMDGPUTargetMachine.h - AMDGPU TargetMachine Interface --*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
@@ -7,7 +7,7 @@
//
//===----------------------------------------------------------------------===//
//
-// TODO: Add full description
+// The AMDGPU TargetMachine interface definition for hw codgen targets.
//
//===----------------------------------------------------------------------===//
@@ -52,9 +52,6 @@ public:
formatted_raw_ostream &Out,
CodeGenFileType FileType,
bool DisableVerify);
-public:
- void dumpCode() { mDump = true; }
- bool shouldDumpCode() const { return mDump; }
};
} /* End namespace llvm */
diff --git a/src/gallium/drivers/radeon/AMDGPUUtil.cpp b/src/gallium/drivers/radeon/AMDGPUUtil.cpp
index a5045436ab4..bd8f5eef697 100644
--- a/src/gallium/drivers/radeon/AMDGPUUtil.cpp
+++ b/src/gallium/drivers/radeon/AMDGPUUtil.cpp
@@ -1,4 +1,4 @@
-//===-- AMDGPUUtil.cpp - TODO: Add brief description -------===//
+//===-- AMDGPUUtil.cpp - AMDGPU Utility functions -------------------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -7,39 +7,39 @@
//
//===----------------------------------------------------------------------===//
//
-// TODO: Add full description
+// Common utility functions used by hw codegen targets
//
//===----------------------------------------------------------------------===//
#include "AMDGPUUtil.h"
#include "AMDGPURegisterInfo.h"
#include "AMDIL.h"
+#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/Support/ErrorHandling.h"
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetRegisterInfo.h"
using namespace llvm;
-/* Some instructions act as place holders to emulate operations that the GPU
- * hardware does automatically. This function can be used to check if
- * an opcode falls into this category. */
-bool llvm::isPlaceHolderOpcode(unsigned opcode)
+// Some instructions act as place holders to emulate operations that the GPU
+// hardware does automatically. This function can be used to check if
+// an opcode falls into this category.
+bool AMDGPU::isPlaceHolderOpcode(unsigned opcode)
{
switch (opcode) {
default: return false;
- case AMDIL::EXPORT_REG:
case AMDIL::RETURN:
case AMDIL::LOAD_INPUT:
case AMDIL::LAST:
+ case AMDIL::MASK_WRITE:
case AMDIL::RESERVE_REG:
return true;
}
}
-bool llvm::isTransOp(unsigned opcode)
+bool AMDGPU::isTransOp(unsigned opcode)
{
switch(opcode) {
default: return false;
@@ -67,10 +67,12 @@ bool llvm::isTransOp(unsigned opcode)
}
}
-bool llvm::isTexOp(unsigned opcode)
+bool AMDGPU::isTexOp(unsigned opcode)
{
switch(opcode) {
default: return false;
+ case AMDIL::TEX_LD:
+ case AMDIL::TEX_GET_TEXTURE_RESINFO:
case AMDIL::TEX_SAMPLE:
case AMDIL::TEX_SAMPLE_C:
case AMDIL::TEX_SAMPLE_L:
@@ -79,11 +81,13 @@ bool llvm::isTexOp(unsigned opcode)
case AMDIL::TEX_SAMPLE_C_LB:
case AMDIL::TEX_SAMPLE_G:
case AMDIL::TEX_SAMPLE_C_G:
+ case AMDIL::TEX_GET_GRADIENTS_H:
+ case AMDIL::TEX_GET_GRADIENTS_V:
return true;
}
}
-bool llvm::isReductionOp(unsigned opcode)
+bool AMDGPU::isReductionOp(unsigned opcode)
{
switch(opcode) {
default: return false;
@@ -93,13 +97,25 @@ bool llvm::isReductionOp(unsigned opcode)
}
}
-bool llvm::isFCOp(unsigned opcode)
+bool AMDGPU::isCubeOp(unsigned opcode)
+{
+ switch(opcode) {
+ default: return false;
+ case AMDIL::CUBE_r600:
+ case AMDIL::CUBE_eg:
+ return true;
+ }
+}
+
+
+bool AMDGPU::isFCOp(unsigned opcode)
{
switch(opcode) {
default: return false;
case AMDIL::BREAK_LOGICALZ_f32:
case AMDIL::BREAK_LOGICALNZ_i32:
case AMDIL::BREAK_LOGICALZ_i32:
+ case AMDIL::BREAK_LOGICALNZ_f32:
case AMDIL::CONTINUE_LOGICALNZ_f32:
case AMDIL::IF_LOGICALNZ_i32:
case AMDIL::IF_LOGICALZ_f32:
@@ -112,11 +128,14 @@ bool llvm::isFCOp(unsigned opcode)
}
}
-void AMDGPU::utilAddLiveIn(MachineFunction * MF, MachineRegisterInfo & MRI,
- const struct TargetInstrInfo * TII, unsigned physReg, unsigned virtReg)
+void AMDGPU::utilAddLiveIn(llvm::MachineFunction * MF,
+ llvm::MachineRegisterInfo & MRI,
+ const struct llvm::TargetInstrInfo * TII,
+ unsigned physReg, unsigned virtReg)
{
if (!MRI.isLiveIn(physReg)) {
MRI.addLiveIn(physReg, virtReg);
+ MF->front().addLiveIn(physReg);
BuildMI(MF->front(), MF->front().begin(), DebugLoc(),
TII->get(TargetOpcode::COPY), virtReg)
.addReg(physReg);
diff --git a/src/gallium/drivers/radeon/AMDGPUUtil.h b/src/gallium/drivers/radeon/AMDGPUUtil.h
index 299146e1ba7..15f2ce57af9 100644
--- a/src/gallium/drivers/radeon/AMDGPUUtil.h
+++ b/src/gallium/drivers/radeon/AMDGPUUtil.h
@@ -1,4 +1,4 @@
-//===-- AMDGPUUtil.h - TODO: Add brief description -------===//
+//===-- AMDGPUUtil.h - AMDGPU Utility function declarations -----*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
@@ -7,43 +7,40 @@
//
//===----------------------------------------------------------------------===//
//
-// TODO: Add full description
+// Declarations for utility functions common to all hw codegen targets.
//
//===----------------------------------------------------------------------===//
#ifndef AMDGPU_UTIL_H
#define AMDGPU_UTIL_H
-#include "AMDGPURegisterInfo.h"
-#include "llvm/Support/DataTypes.h"
-
namespace llvm {
-class AMDILMachineFunctionInfo;
+class MachineFunction;
+class MachineRegisterInfo;
+class TargetInstrInfo;
+
+}
-class TargetMachine;
-class TargetRegisterInfo;
+namespace AMDGPU {
bool isPlaceHolderOpcode(unsigned opcode);
bool isTransOp(unsigned opcode);
bool isTexOp(unsigned opcode);
bool isReductionOp(unsigned opcode);
+bool isCubeOp(unsigned opcode);
bool isFCOp(unsigned opcode);
-/* XXX: Move these to AMDGPUInstrInfo.h */
+// XXX: Move these to AMDGPUInstrInfo.h
#define MO_FLAG_CLAMP (1 << 0)
#define MO_FLAG_NEG (1 << 1)
#define MO_FLAG_ABS (1 << 2)
#define MO_FLAG_MASK (1 << 3)
-} /* End namespace llvm */
-
-namespace AMDGPU {
-
void utilAddLiveIn(llvm::MachineFunction * MF, llvm::MachineRegisterInfo & MRI,
const struct llvm::TargetInstrInfo * TII, unsigned physReg, unsigned virtReg);
} // End namespace AMDGPU
-#endif /* AMDGPU_UTIL_H */
+#endif // AMDGPU_UTIL_H
diff --git a/src/gallium/drivers/radeon/AMDIL.h b/src/gallium/drivers/radeon/AMDIL.h
index 317ea124f66..6759ccd9527 100644
--- a/src/gallium/drivers/radeon/AMDIL.h
+++ b/src/gallium/drivers/radeon/AMDIL.h
@@ -137,11 +137,6 @@ enum AddressSpaces {
LAST_ADDRESS = 8
};
-// We are piggybacking on the CommentFlag enum in MachineInstr.h to
-// set bits in AsmPrinterFlags of the MachineInstruction. We will
-// start at bit 16 and allocate down while LLVM will start at bit
-// 1 and allocate up.
-
// This union/struct combination is an easy way to read out the
// exact bits that are needed.
typedef union ResourceRec {
@@ -181,26 +176,6 @@ typedef union ResourceRec {
} // namespace AMDILAS
-// The OpSwizzle encodes a subset of all possible
-// swizzle combinations into a number of bits using
-// only the combinations utilized by the backend.
-// The lower 128 are for source swizzles and the
-// upper 128 or for destination swizzles.
-// The valid mappings can be found in the
-// getSrcSwizzle and getDstSwizzle functions of
-// AMDILUtilityFunctions.cpp.
-typedef union SwizzleRec {
- struct {
-#ifdef __BIG_ENDIAN__
- unsigned char dst : 1;
- unsigned char swizzle : 7;
-#else
- unsigned char swizzle : 7;
- unsigned char dst : 1;
-#endif
- } bits;
- unsigned char u8all;
-} OpSwizzle;
// Enums corresponding to AMDIL condition codes for IL. These
// values must be kept in sync with the ones in the .td file.
namespace AMDILCC {
diff --git a/src/gallium/drivers/radeon/AMDIL.td b/src/gallium/drivers/radeon/AMDIL.td
index 9bcccac2411..deee290fad5 100644
--- a/src/gallium/drivers/radeon/AMDIL.td
+++ b/src/gallium/drivers/radeon/AMDIL.td
@@ -1,4 +1,4 @@
-//===-- AMDIL.td - TODO: Add brief description -------===//
+//===-- AMDIL.td - AMDIL Tablegen files --*- tablegen -*-------------------===//
//
// The LLVM Compiler Infrastructure
//
diff --git a/src/gallium/drivers/radeon/AMDIL7XXDevice.cpp b/src/gallium/drivers/radeon/AMDIL7XXDevice.cpp
index 6625dd77d5f..d7c96573a15 100644
--- a/src/gallium/drivers/radeon/AMDIL7XXDevice.cpp
+++ b/src/gallium/drivers/radeon/AMDIL7XXDevice.cpp
@@ -1,4 +1,4 @@
-//===-- AMDIL7XXDevice.cpp - TODO: Add brief description -------===//
+//===-- AMDIL7XXDevice.cpp - Device Info for 7XX GPUs ---------------------===//
//
// The LLVM Compiler Infrastructure
//
diff --git a/src/gallium/drivers/radeon/AMDILBase.td b/src/gallium/drivers/radeon/AMDILBase.td
index 2706b211f2d..31ebed31d72 100644
--- a/src/gallium/drivers/radeon/AMDILBase.td
+++ b/src/gallium/drivers/radeon/AMDILBase.td
@@ -60,6 +60,11 @@ def FeatureDebug : SubtargetFeature<"debug",
"CapsOverride[AMDILDeviceInfo::Debug]",
"true",
"Debug mode is enabled, so disable hardware accelerated address spaces.">;
+def FeatureDumpCode : SubtargetFeature <"DumpCode",
+ "mDumpCode",
+ "true",
+ "Dump MachineInstrs in the CodeEmitter">;
+
//===----------------------------------------------------------------------===//
// Register File, Calling Conv, Instruction Descriptions
diff --git a/src/gallium/drivers/radeon/AMDILCFGStructurizer.cpp b/src/gallium/drivers/radeon/AMDILCFGStructurizer.cpp
index 289af6f210e..cdcd5e89880 100644
--- a/src/gallium/drivers/radeon/AMDILCFGStructurizer.cpp
+++ b/src/gallium/drivers/radeon/AMDILCFGStructurizer.cpp
@@ -7,22 +7,22 @@
//
//==-----------------------------------------------------------------------===//
-#define DEBUG_TYPE "structcfg"
-#ifdef DEBUG
-#define DEBUGME (DebugFlag && isCurrentDebugType(DEBUG_TYPE))
-#else
#define DEBUGME 0
-#endif
+#define DEBUG_TYPE "structcfg"
#include "AMDILTargetMachine.h"
#include "AMDILUtilityFunctions.h"
#include "llvm/ADT/SCCIterator.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/DominatorInternals.h"
+#include "llvm/Analysis/Dominators.h"
+#include "llvm/CodeGen/MachineDominators.h"
#include "llvm/CodeGen/MachineDominators.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFunctionAnalysis.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineJumpTableInfo.h"
#include "llvm/CodeGen/MachineLoopInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
@@ -30,8 +30,6 @@
#define FirstNonDebugInstr(A) A->begin()
using namespace llvm;
-// bixia TODO: move this out to analysis lib. Make this work for both target
-// AMDIL and CBackend.
// TODO: move-begin.
//===----------------------------------------------------------------------===//
@@ -109,23 +107,6 @@ void ReverseVector(SmallVector<NodeT *, DEFAULT_VEC_SLOTS> &Src) {
//
//===----------------------------------------------------------------------===//
-#include "AMDILTargetMachine.h"
-#include "AMDILUtilityFunctions.h"
-#include "llvm/ADT/SCCIterator.h"
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/Statistic.h"
-#include "llvm/Analysis/DominatorInternals.h"
-#include "llvm/Analysis/Dominators.h"
-#include "llvm/CodeGen/MachineDominators.h"
-#include "llvm/CodeGen/MachineDominators.h"
-#include "llvm/CodeGen/MachineFunction.h"
-#include "llvm/CodeGen/MachineFunctionAnalysis.h"
-#include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/CodeGen/MachineJumpTableInfo.h"
-#include "llvm/CodeGen/MachineLoopInfo.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
-
namespace llvm {
/// PostDominatorTree Class - Concrete subclass of DominatorTree that is used
@@ -3156,10 +3137,6 @@ struct CFGStructTraits<AMDILCFGStructurizer>
iterEnd = srcBlk->end();
iter != iterEnd; ++iter) {
MachineInstr *instr = func->CloneMachineInstr(iter);
- // This is a workaround for LLVM bugzilla 8420 because CloneMachineInstr
- // does not clone the AsmPrinterFlags.
- instr->setAsmPrinterFlag(
- (llvm::MachineInstr::CommentFlag)iter->getAsmPrinterFlags());
newBlk->push_back(instr);
}
return newBlk;
diff --git a/src/gallium/drivers/radeon/AMDILCodeEmitter.h b/src/gallium/drivers/radeon/AMDILCodeEmitter.h
index b0ea1455cf9..fa46cbd203d 100644
--- a/src/gallium/drivers/radeon/AMDILCodeEmitter.h
+++ b/src/gallium/drivers/radeon/AMDILCodeEmitter.h
@@ -1,23 +1,21 @@
-// The LLVM Compiler Infrastructure
+//===-- AMDILCodeEmitter.h - AMDIL Code Emitter interface -----------------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
-//===-- AMDILCodeEmitter.h - TODO: Add brief description -------===//
-//===-- AMDILCodeEmitter.h - TODO: Add brief description -------===//
-//===-- AMDILCodeEmitter.h - TODO: Add brief description -------===//
+//===----------------------------------------------------------------------===//
//
-// The LLVM Compiler Infrastructure
+// CodeEmitter interface for R600 and SI codegen.
//
+//===----------------------------------------------------------------------===//
#ifndef AMDILCODEEMITTER_H
#define AMDILCODEEMITTER_H
namespace llvm {
- /* XXX: Temp HACK to work around tablegen name generation */
class AMDILCodeEmitter {
public:
uint64_t getBinaryCodeForInstr(const MachineInstr &MI) const;
diff --git a/src/gallium/drivers/radeon/AMDILConversions.td b/src/gallium/drivers/radeon/AMDILConversions.td
index 0db66ae8475..1bc5e4ddf37 100644
--- a/src/gallium/drivers/radeon/AMDILConversions.td
+++ b/src/gallium/drivers/radeon/AMDILConversions.td
@@ -1,4 +1,4 @@
-//===-- AMDILConversions.td - TODO: Add brief description -------===//
+//==- AMDILConversions.td - Type conversion tablegen patterns -*-tablegen -*-=//
//
// The LLVM Compiler Infrastructure
//
diff --git a/src/gallium/drivers/radeon/AMDILDevice.cpp b/src/gallium/drivers/radeon/AMDILDevice.cpp
index aa6d8af7012..4294a8bef0c 100644
--- a/src/gallium/drivers/radeon/AMDILDevice.cpp
+++ b/src/gallium/drivers/radeon/AMDILDevice.cpp
@@ -1,4 +1,4 @@
-//===-- AMDILDevice.cpp - TODO: Add brief description -------===//
+//===-- AMDILDevice.cpp - Base class for AMDIL Devices --------------------===//
//
// The LLVM Compiler Infrastructure
//
diff --git a/src/gallium/drivers/radeon/AMDILDeviceInfo.cpp b/src/gallium/drivers/radeon/AMDILDeviceInfo.cpp
index 89b8312c294..cbf5b512471 100644
--- a/src/gallium/drivers/radeon/AMDILDeviceInfo.cpp
+++ b/src/gallium/drivers/radeon/AMDILDeviceInfo.cpp
@@ -1,4 +1,4 @@
-//===-- AMDILDeviceInfo.cpp - TODO: Add brief description -------===//
+//===-- AMDILDeviceInfo.cpp - AMDILDeviceInfo class -----------------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -6,11 +6,16 @@
// License. See LICENSE.TXT for details.
//
//==-----------------------------------------------------------------------===//
+//
+// Function that creates DeviceInfo from a device name and other information.
+//
+//==-----------------------------------------------------------------------===//
#include "AMDILDevices.h"
#include "AMDILSubtarget.h"
using namespace llvm;
namespace llvm {
+namespace AMDILDeviceInfo {
AMDILDevice*
getDeviceFromName(const std::string &deviceName, AMDILSubtarget *ptr, bool is64bit, bool is64on32bit)
{
@@ -84,4 +89,5 @@ getDeviceFromName(const std::string &deviceName, AMDILSubtarget *ptr, bool is64b
return new AMDIL7XXDevice(ptr);
}
}
-}
+} // End namespace AMDILDeviceInfo
+} // End namespace llvm
diff --git a/src/gallium/drivers/radeon/AMDILDeviceInfo.h b/src/gallium/drivers/radeon/AMDILDeviceInfo.h
index c4acf9145ae..06ac4322d0f 100644
--- a/src/gallium/drivers/radeon/AMDILDeviceInfo.h
+++ b/src/gallium/drivers/radeon/AMDILDeviceInfo.h
@@ -1,4 +1,4 @@
-//===-- AMDILDeviceInfo.h - TODO: Add brief description -------===//
+//===-- AMDILDeviceInfo.h - Constants for describing devices --------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -82,8 +82,8 @@ namespace llvm
};
+ AMDILDevice*
+ getDeviceFromName(const std::string &name, AMDILSubtarget *ptr, bool is64bit = false, bool is64on32bit = false);
} // namespace AMDILDeviceInfo
- llvm::AMDILDevice*
- getDeviceFromName(const std::string &name, llvm::AMDILSubtarget *ptr, bool is64bit = false, bool is64on32bit = false);
} // namespace llvm
#endif // _AMDILDEVICEINFO_H_
diff --git a/src/gallium/drivers/radeon/AMDILDevices.h b/src/gallium/drivers/radeon/AMDILDevices.h
index 3fc5fa05669..cfcc3304b4b 100644
--- a/src/gallium/drivers/radeon/AMDILDevices.h
+++ b/src/gallium/drivers/radeon/AMDILDevices.h
@@ -1,4 +1,4 @@
-//===-- AMDILDevices.h - TODO: Add brief description -------===//
+//===-- AMDILDevices.h - Consolidate AMDIL Device headers -----------------===//
//
// The LLVM Compiler Infrastructure
//
diff --git a/src/gallium/drivers/radeon/AMDILEnumeratedTypes.td b/src/gallium/drivers/radeon/AMDILEnumeratedTypes.td
index 445fd608bbb..f10936b8c6c 100644
--- a/src/gallium/drivers/radeon/AMDILEnumeratedTypes.td
+++ b/src/gallium/drivers/radeon/AMDILEnumeratedTypes.td
@@ -1,4 +1,4 @@
-//===-- AMDILEnumeratedTypes.td - TODO: Add brief description -------===//
+//===-- AMDILEnumeratedTypes.td - IL Type definitions --*- tablegen -*-----===//
//
// The LLVM Compiler Infrastructure
//
diff --git a/src/gallium/drivers/radeon/AMDILEvergreenDevice.cpp b/src/gallium/drivers/radeon/AMDILEvergreenDevice.cpp
index 7b5c52345d2..779b2d3df2f 100644
--- a/src/gallium/drivers/radeon/AMDILEvergreenDevice.cpp
+++ b/src/gallium/drivers/radeon/AMDILEvergreenDevice.cpp
@@ -1,4 +1,4 @@
-//===-- AMDILEvergreenDevice.cpp - TODO: Add brief description -------===//
+//===-- AMDILEvergreenDevice.cpp - Device Info for Evergreen --------------===//
//
// The LLVM Compiler Infrastructure
//
diff --git a/src/gallium/drivers/radeon/AMDILISelDAGToDAG.cpp b/src/gallium/drivers/radeon/AMDILISelDAGToDAG.cpp
index ff04d9d55bf..b8898828dd6 100644
--- a/src/gallium/drivers/radeon/AMDILISelDAGToDAG.cpp
+++ b/src/gallium/drivers/radeon/AMDILISelDAGToDAG.cpp
@@ -13,9 +13,12 @@
#include "AMDILDevices.h"
#include "AMDILTargetMachine.h"
#include "AMDILUtilityFunctions.h"
+#include "llvm/ADT/ValueMap.h"
#include "llvm/CodeGen/PseudoSourceValue.h"
#include "llvm/CodeGen/SelectionDAGISel.h"
#include "llvm/Support/Compiler.h"
+#include <list>
+#include <queue>
using namespace llvm;
@@ -35,13 +38,21 @@ class AMDILDAGToDAGISel : public SelectionDAGISel {
public:
AMDILDAGToDAGISel(AMDILTargetMachine &TM AMDIL_OPT_LEVEL_DECL);
virtual ~AMDILDAGToDAGISel();
- inline SDValue getSmallIPtrImm(unsigned Imm);
SDNode *Select(SDNode *N);
+ virtual const char *getPassName() const;
+
+private:
+ inline SDValue getSmallIPtrImm(unsigned Imm);
+
// Complex pattern selectors
bool SelectADDRParam(SDValue Addr, SDValue& R1, SDValue& R2);
bool SelectADDR(SDValue N, SDValue &R1, SDValue &R2);
bool SelectADDR64(SDValue N, SDValue &R1, SDValue &R2);
+
+ static bool checkType(const Value *ptr, unsigned int addrspace);
+ static const Value *getBasePointerValue(const Value *V);
+
static bool isGlobalStore(const StoreSDNode *N);
static bool isPrivateStore(const StoreSDNode *N);
static bool isLocalStore(const StoreSDNode *N);
@@ -54,8 +65,6 @@ public:
static bool isLocalLoad(const LoadSDNode *N);
static bool isRegionLoad(const LoadSDNode *N);
- virtual const char *getPassName() const;
-private:
SDNode *xformAtomicInst(SDNode *N);
// Include the pieces autogenerated from the target description.
@@ -165,26 +174,75 @@ SDNode *AMDILDAGToDAGISel::Select(SDNode *N) {
return SelectCode(N);
}
+bool AMDILDAGToDAGISel::checkType(const Value *ptr, unsigned int addrspace) {
+ if (!ptr) {
+ return false;
+ }
+ Type *ptrType = ptr->getType();
+ return dyn_cast<PointerType>(ptrType)->getAddressSpace() == addrspace;
+}
+
+const Value * AMDILDAGToDAGISel::getBasePointerValue(const Value *V)
+{
+ if (!V) {
+ return NULL;
+ }
+ const Value *ret = NULL;
+ ValueMap<const Value *, bool> ValueBitMap;
+ std::queue<const Value *, std::list<const Value *> > ValueQueue;
+ ValueQueue.push(V);
+ while (!ValueQueue.empty()) {
+ V = ValueQueue.front();
+ if (ValueBitMap.find(V) == ValueBitMap.end()) {
+ ValueBitMap[V] = true;
+ if (dyn_cast<Argument>(V) && dyn_cast<PointerType>(V->getType())) {
+ ret = V;
+ break;
+ } else if (dyn_cast<GlobalVariable>(V)) {
+ ret = V;
+ break;
+ } else if (dyn_cast<Constant>(V)) {
+ const ConstantExpr *CE = dyn_cast<ConstantExpr>(V);
+ if (CE) {
+ ValueQueue.push(CE->getOperand(0));
+ }
+ } else if (const AllocaInst *AI = dyn_cast<AllocaInst>(V)) {
+ ret = AI;
+ break;
+ } else if (const Instruction *I = dyn_cast<Instruction>(V)) {
+ uint32_t numOps = I->getNumOperands();
+ for (uint32_t x = 0; x < numOps; ++x) {
+ ValueQueue.push(I->getOperand(x));
+ }
+ } else {
+ // assert(0 && "Found a Value that we didn't know how to handle!");
+ }
+ }
+ ValueQueue.pop();
+ }
+ return ret;
+}
+
bool AMDILDAGToDAGISel::isGlobalStore(const StoreSDNode *N) {
- return check_type(N->getSrcValue(), AMDILAS::GLOBAL_ADDRESS);
+ return checkType(N->getSrcValue(), AMDILAS::GLOBAL_ADDRESS);
}
bool AMDILDAGToDAGISel::isPrivateStore(const StoreSDNode *N) {
- return (!check_type(N->getSrcValue(), AMDILAS::LOCAL_ADDRESS)
- && !check_type(N->getSrcValue(), AMDILAS::GLOBAL_ADDRESS)
- && !check_type(N->getSrcValue(), AMDILAS::REGION_ADDRESS));
+ return (!checkType(N->getSrcValue(), AMDILAS::LOCAL_ADDRESS)
+ && !checkType(N->getSrcValue(), AMDILAS::GLOBAL_ADDRESS)
+ && !checkType(N->getSrcValue(), AMDILAS::REGION_ADDRESS));
}
bool AMDILDAGToDAGISel::isLocalStore(const StoreSDNode *N) {
- return check_type(N->getSrcValue(), AMDILAS::LOCAL_ADDRESS);
+ return checkType(N->getSrcValue(), AMDILAS::LOCAL_ADDRESS);
}
bool AMDILDAGToDAGISel::isRegionStore(const StoreSDNode *N) {
- return check_type(N->getSrcValue(), AMDILAS::REGION_ADDRESS);
+ return checkType(N->getSrcValue(), AMDILAS::REGION_ADDRESS);
}
bool AMDILDAGToDAGISel::isConstantLoad(const LoadSDNode *N, int cbID) {
- if (check_type(N->getSrcValue(), AMDILAS::CONSTANT_ADDRESS)) {
+ if (checkType(N->getSrcValue(), AMDILAS::CONSTANT_ADDRESS)) {
return true;
}
MachineMemOperand *MMO = N->getMemOperand();
@@ -195,27 +253,27 @@ bool AMDILDAGToDAGISel::isConstantLoad(const LoadSDNode *N, int cbID) {
&& ((V && dyn_cast<GlobalValue>(V))
|| (BV && dyn_cast<GlobalValue>(
getBasePointerValue(MMO->getValue()))))) {
- return check_type(N->getSrcValue(), AMDILAS::PRIVATE_ADDRESS);
+ return checkType(N->getSrcValue(), AMDILAS::PRIVATE_ADDRESS);
} else {
return false;
}
}
bool AMDILDAGToDAGISel::isGlobalLoad(const LoadSDNode *N) {
- return check_type(N->getSrcValue(), AMDILAS::GLOBAL_ADDRESS);
+ return checkType(N->getSrcValue(), AMDILAS::GLOBAL_ADDRESS);
}
bool AMDILDAGToDAGISel::isLocalLoad(const LoadSDNode *N) {
- return check_type(N->getSrcValue(), AMDILAS::LOCAL_ADDRESS);
+ return checkType(N->getSrcValue(), AMDILAS::LOCAL_ADDRESS);
}
bool AMDILDAGToDAGISel::isRegionLoad(const LoadSDNode *N) {
- return check_type(N->getSrcValue(), AMDILAS::REGION_ADDRESS);
+ return checkType(N->getSrcValue(), AMDILAS::REGION_ADDRESS);
}
bool AMDILDAGToDAGISel::isCPLoad(const LoadSDNode *N) {
MachineMemOperand *MMO = N->getMemOperand();
- if (check_type(N->getSrcValue(), AMDILAS::PRIVATE_ADDRESS)) {
+ if (checkType(N->getSrcValue(), AMDILAS::PRIVATE_ADDRESS)) {
if (MMO) {
const Value *V = MMO->getValue();
const PseudoSourceValue *PSV = dyn_cast<PseudoSourceValue>(V);
@@ -228,19 +286,19 @@ bool AMDILDAGToDAGISel::isCPLoad(const LoadSDNode *N) {
}
bool AMDILDAGToDAGISel::isPrivateLoad(const LoadSDNode *N) {
- if (check_type(N->getSrcValue(), AMDILAS::PRIVATE_ADDRESS)) {
+ if (checkType(N->getSrcValue(), AMDILAS::PRIVATE_ADDRESS)) {
// Check to make sure we are not a constant pool load or a constant load
// that is marked as a private load
if (isCPLoad(N) || isConstantLoad(N, -1)) {
return false;
}
}
- if (!check_type(N->getSrcValue(), AMDILAS::LOCAL_ADDRESS)
- && !check_type(N->getSrcValue(), AMDILAS::GLOBAL_ADDRESS)
- && !check_type(N->getSrcValue(), AMDILAS::REGION_ADDRESS)
- && !check_type(N->getSrcValue(), AMDILAS::CONSTANT_ADDRESS)
- && !check_type(N->getSrcValue(), AMDILAS::PARAM_D_ADDRESS)
- && !check_type(N->getSrcValue(), AMDILAS::PARAM_I_ADDRESS))
+ if (!checkType(N->getSrcValue(), AMDILAS::LOCAL_ADDRESS)
+ && !checkType(N->getSrcValue(), AMDILAS::GLOBAL_ADDRESS)
+ && !checkType(N->getSrcValue(), AMDILAS::REGION_ADDRESS)
+ && !checkType(N->getSrcValue(), AMDILAS::CONSTANT_ADDRESS)
+ && !checkType(N->getSrcValue(), AMDILAS::PARAM_D_ADDRESS)
+ && !checkType(N->getSrcValue(), AMDILAS::PARAM_I_ADDRESS))
{
return true;
}
diff --git a/src/gallium/drivers/radeon/AMDILISelLowering.cpp b/src/gallium/drivers/radeon/AMDILISelLowering.cpp
index 54c6ea65065..19b12fcf72b 100644
--- a/src/gallium/drivers/radeon/AMDILISelLowering.cpp
+++ b/src/gallium/drivers/radeon/AMDILISelLowering.cpp
@@ -623,6 +623,48 @@ translateToOpcode(uint64_t CCCode, unsigned int regClass)
assert(0 && "Unknown opcode retrieved");
return 0;
}
+
+/// Helper function used by LowerFormalArguments
+static const TargetRegisterClass*
+getRegClassFromType(unsigned int type) {
+ switch (type) {
+ default:
+ assert(0 && "Passed in type does not match any register classes.");
+ case MVT::i8:
+ return &AMDIL::GPRI8RegClass;
+ case MVT::i16:
+ return &AMDIL::GPRI16RegClass;
+ case MVT::i32:
+ return &AMDIL::GPRI32RegClass;
+ case MVT::f32:
+ return &AMDIL::GPRF32RegClass;
+ case MVT::i64:
+ return &AMDIL::GPRI64RegClass;
+ case MVT::f64:
+ return &AMDIL::GPRF64RegClass;
+ case MVT::v4f32:
+ return &AMDIL::GPRV4F32RegClass;
+ case MVT::v4i8:
+ return &AMDIL::GPRV4I8RegClass;
+ case MVT::v4i16:
+ return &AMDIL::GPRV4I16RegClass;
+ case MVT::v4i32:
+ return &AMDIL::GPRV4I32RegClass;
+ case MVT::v2f32:
+ return &AMDIL::GPRV2F32RegClass;
+ case MVT::v2i8:
+ return &AMDIL::GPRV2I8RegClass;
+ case MVT::v2i16:
+ return &AMDIL::GPRV2I16RegClass;
+ case MVT::v2i32:
+ return &AMDIL::GPRV2I32RegClass;
+ case MVT::v2f64:
+ return &AMDIL::GPRV2F64RegClass;
+ case MVT::v2i64:
+ return &AMDIL::GPRV2I64RegClass;
+ }
+}
+
SDValue
AMDILTargetLowering::LowerMemArgument(
SDValue Chain,
@@ -2189,6 +2231,7 @@ AMDILTargetLowering::LowerExternalSymbol(SDValue Op, SelectionDAG &DAG) const
SDValue Result = DAG.getTargetExternalSymbol(Sym, MVT::i32);
return Result;
}
+
/// LowerFORMAL_ARGUMENTS - transform physical registers into
/// virtual registers and generate load operations for
/// arguments places on the stack.
@@ -3191,7 +3234,7 @@ AMDILTargetLowering::LowerFP_TO_SINT(SDValue Op, SelectionDAG &DAG) const
amdtm = reinterpret_cast<const AMDILTargetMachine*>
(&this->getTargetMachine());
const AMDILSubtarget*
- stm = dynamic_cast<const AMDILSubtarget*>(
+ stm = static_cast<const AMDILSubtarget*>(
amdtm->getSubtargetImpl());
if (RST == MVT::f64 && RHSVT.isVector()
&& stm->device()->getGeneration() > AMDILDeviceInfo::HD6XXX) {
@@ -3248,7 +3291,7 @@ AMDILTargetLowering::LowerFP_TO_UINT(SDValue Op, SelectionDAG &DAG) const
amdtm = reinterpret_cast<const AMDILTargetMachine*>
(&this->getTargetMachine());
const AMDILSubtarget*
- stm = dynamic_cast<const AMDILSubtarget*>(
+ stm = static_cast<const AMDILSubtarget*>(
amdtm->getSubtargetImpl());
if (RST == MVT::f64 && RHSVT.isVector()
&& stm->device()->getGeneration() > AMDILDeviceInfo::HD6XXX) {
@@ -3314,7 +3357,7 @@ AMDILTargetLowering::genu32tof64(SDValue RHS, EVT LHSVT,
amdtm = reinterpret_cast<const AMDILTargetMachine*>
(&this->getTargetMachine());
const AMDILSubtarget*
- stm = dynamic_cast<const AMDILSubtarget*>(
+ stm = static_cast<const AMDILSubtarget*>(
amdtm->getSubtargetImpl());
if (stm->calVersion() >= CAL_VERSION_SC_135) {
// unsigned x = RHS;
@@ -3489,7 +3532,7 @@ AMDILTargetLowering::LowerUINT_TO_FP(SDValue Op, SelectionDAG &DAG) const
amdtm = reinterpret_cast<const AMDILTargetMachine*>
(&this->getTargetMachine());
const AMDILSubtarget*
- stm = dynamic_cast<const AMDILSubtarget*>(
+ stm = static_cast<const AMDILSubtarget*>(
amdtm->getSubtargetImpl());
if (LST == MVT::f64 && LHSVT.isVector()
&& stm->device()->getGeneration() > AMDILDeviceInfo::HD6XXX) {
@@ -3543,7 +3586,7 @@ AMDILTargetLowering::LowerSINT_TO_FP(SDValue Op, SelectionDAG &DAG) const
amdtm = reinterpret_cast<const AMDILTargetMachine*>
(&this->getTargetMachine());
const AMDILSubtarget*
- stm = dynamic_cast<const AMDILSubtarget*>(
+ stm = static_cast<const AMDILSubtarget*>(
amdtm->getSubtargetImpl());
if (LST == MVT::f64 && LHSVT.isVector()
&& stm->device()->getGeneration() > AMDILDeviceInfo::HD6XXX) {
@@ -3843,7 +3886,6 @@ SDValue
AMDILTargetLowering::LowerBUILD_VECTOR( SDValue Op, SelectionDAG &DAG ) const
{
EVT VT = Op.getValueType();
- //printSDValue(Op, 1);
SDValue Nodes1;
SDValue second;
SDValue third;
@@ -3965,7 +4007,6 @@ AMDILTargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op,
SelectionDAG &DAG) const
{
EVT VT = Op.getValueType();
- //printSDValue(Op, 1);
const ConstantSDNode *CSDN = dyn_cast<ConstantSDNode>(Op.getOperand(1));
uint64_t swizzleNum = 0;
DebugLoc DL = Op.getDebugLoc();
@@ -4782,7 +4823,7 @@ uint32_t
AMDILTargetLowering::genVReg(uint32_t regType) const
{
return mBB->getParent()->getRegInfo().createVirtualRegister(
- getRegClassFromID(regType));
+ getTargetMachine().getRegisterInfo()->getRegClass(regType));
}
MachineInstrBuilder
diff --git a/src/gallium/drivers/radeon/AMDILInstrInfo.cpp b/src/gallium/drivers/radeon/AMDILInstrInfo.cpp
index fbc3e45b357..cd2fb48209c 100644
--- a/src/gallium/drivers/radeon/AMDILInstrInfo.cpp
+++ b/src/gallium/drivers/radeon/AMDILInstrInfo.cpp
@@ -10,13 +10,10 @@
// This file contains the AMDIL implementation of the TargetInstrInfo class.
//
//===----------------------------------------------------------------------===//
-#include "AMDILInstrInfo.h"
-#include "AMDILUtilityFunctions.h"
-
-#define GET_INSTRINFO_CTOR
-#include "AMDILGenInstrInfo.inc"
#include "AMDILInstrInfo.h"
+#include "AMDIL.h"
+#include "AMDILISelLowering.h"
#include "AMDILUtilityFunctions.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
@@ -24,6 +21,9 @@
#include "llvm/CodeGen/PseudoSourceValue.h"
#include "llvm/Instructions.h"
+#define GET_INSTRINFO_CTOR
+#include "AMDILGenInstrInfo.inc"
+
using namespace llvm;
AMDILInstrInfo::AMDILInstrInfo(AMDILTargetMachine &tm)
@@ -36,28 +36,6 @@ const AMDILRegisterInfo &AMDILInstrInfo::getRegisterInfo() const {
return RI;
}
-/// Return true if the instruction is a register to register move and leave the
-/// source and dest operands in the passed parameters.
-bool AMDILInstrInfo::isMoveInstr(const MachineInstr &MI, unsigned int &SrcReg,
- unsigned int &DstReg, unsigned int &SrcSubIdx,
- unsigned int &DstSubIdx) const {
- // FIXME: we should look for:
- // add with 0
- //assert(0 && "is Move Instruction has not been implemented yet!");
- //return true;
- if (!isMove(MI.getOpcode())) {
- return false;
- }
- if (!MI.getOperand(0).isReg() || !MI.getOperand(1).isReg()) {
- return false;
- }
- SrcReg = MI.getOperand(1).getReg();
- DstReg = MI.getOperand(0).getReg();
- DstSubIdx = 0;
- SrcSubIdx = 0;
- return true;
-}
-
bool AMDILInstrInfo::isCoalescableExtInstr(const MachineInstr &MI,
unsigned &SrcReg, unsigned &DstReg,
unsigned &SubIdx) const {
@@ -99,22 +77,7 @@ bool AMDILInstrInfo::hasStoreFromStackSlot(const MachineInstr *MI,
// TODO: Implement this function
return false;
}
-#if 0
-void
-AMDILInstrInfo::reMaterialize(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MI,
- unsigned DestReg, unsigned SubIdx,
- const MachineInstr *Orig,
- const TargetRegisterInfo *TRI) const {
-// TODO: Implement this function
-}
-MachineInst AMDILInstrInfo::duplicate(MachineInstr *Orig,
- MachineFunction &MF) const {
-// TODO: Implement this function
- return NULL;
-}
-#endif
MachineInstr *
AMDILInstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
MachineBasicBlock::iterator &MBBI,
@@ -122,25 +85,6 @@ AMDILInstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
// TODO: Implement this function
return NULL;
}
-#if 0
-MachineInst AMDILInstrInfo::commuteInstruction(MachineInstr *MI,
- bool NewMI = false) const {
-// TODO: Implement this function
- return NULL;
-}
-bool
-AMDILInstrInfo::findCommutedOpIndices(MachineInstr *MI, unsigned &SrcOpIdx1,
- unsigned &SrcOpIdx2) const
-{
-// TODO: Implement this function
-}
-bool
-AMDILInstrInfo::produceSameValue(const MachineInstr *MI0,
- const MachineInstr *MI1) const
-{
-// TODO: Implement this function
-}
-#endif
bool AMDILInstrInfo::getNextBranchInstr(MachineBasicBlock::iterator &iter,
MachineBasicBlock &MBB) const {
while (iter != MBB.end()) {
@@ -299,43 +243,6 @@ MachineBasicBlock::iterator skipFlowControl(MachineBasicBlock *MBB) {
return MBB->end();
}
-bool
-AMDILInstrInfo::copyRegToReg(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator I,
- unsigned DestReg, unsigned SrcReg,
- const TargetRegisterClass *DestRC,
- const TargetRegisterClass *SrcRC,
- DebugLoc DL) const {
- // If we are adding to the end of a basic block we can safely assume that the
- // move is caused by a PHI node since all move instructions that are non-PHI
- // have already been inserted into the basic blocks Therefor we call the skip
- // flow control instruction to move the iterator before the flow control
- // instructions and put the move instruction there.
- bool phi = (DestReg < 1025) || (SrcReg < 1025);
- int movInst = phi ? getMoveInstFromID(DestRC->getID())
- : getPHIMoveInstFromID(DestRC->getID());
-
- MachineBasicBlock::iterator iTemp = (I == MBB.end()) ? skipFlowControl(&MBB)
- : I;
- if (DestRC != SrcRC) {
- //int convInst;
- size_t dSize = DestRC->getSize();
- size_t sSize = SrcRC->getSize();
- if (dSize > sSize) {
- // Elements are going to get duplicated.
- BuildMI(MBB, iTemp, DL, get(movInst), DestReg).addReg(SrcReg);
- } else if (dSize == sSize) {
- // Direct copy, conversions are not handled.
- BuildMI(MBB, iTemp, DL, get(movInst), DestReg).addReg(SrcReg);
- } else if (dSize < sSize) {
- // Elements are going to get dropped.
- BuildMI(MBB, iTemp, DL, get(movInst), DestReg).addReg(SrcReg);
- }
- } else {
- BuildMI( MBB, iTemp, DL, get(movInst), DestReg).addReg(SrcReg);
- }
- return true;
-}
void
AMDILInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI, DebugLoc DL,
@@ -427,15 +334,11 @@ AMDILInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
if (MI != MBB.end()) {
DL = MI->getDebugLoc();
}
- MachineInstr *nMI = BuildMI(MBB, MI, DL, get(Opc))
+ BuildMI(MBB, MI, DL, get(Opc))
.addReg(SrcReg, getKillRegState(isKill))
.addFrameIndex(FrameIndex)
.addMemOperand(MMO)
.addImm(0);
- AMDILAS::InstrResEnc curRes;
- curRes.bits.ResourceID
- = TM.getSubtargetImpl()->device()->getResourceID(AMDILDevice::SCRATCH_ID);
- setAsmPrinterFlags(nMI, curRes);
}
void
@@ -511,16 +414,11 @@ AMDILInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
if (MI != MBB.end()) {
DL = MI->getDebugLoc();
}
- MachineInstr* nMI = BuildMI(MBB, MI, DL, get(Opc))
+ BuildMI(MBB, MI, DL, get(Opc))
.addReg(DestReg, RegState::Define)
.addFrameIndex(FrameIndex)
.addMemOperand(MMO)
.addImm(0);
- AMDILAS::InstrResEnc curRes;
- curRes.bits.ResourceID
- = TM.getSubtargetImpl()->device()->getResourceID(AMDILDevice::SCRATCH_ID);
- setAsmPrinterFlags(nMI, curRes);
-
}
MachineInstr *
AMDILInstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
@@ -569,65 +467,6 @@ AMDILInstrInfo::getOpcodeAfterMemoryUnfold(unsigned Opc,
return 0;
}
-bool
-AMDILInstrInfo::areLoadsFromSameBasePtr(SDNode *Load1, SDNode *Load2,
- int64_t &Offset1,
- int64_t &Offset2) const {
- return false;
- if (!Load1->isMachineOpcode() || !Load2->isMachineOpcode()) {
- return false;
- }
- const MachineSDNode *mload1 = dyn_cast<MachineSDNode>(Load1);
- const MachineSDNode *mload2 = dyn_cast<MachineSDNode>(Load2);
- if (!mload1 || !mload2) {
- return false;
- }
- if (mload1->memoperands_empty() ||
- mload2->memoperands_empty()) {
- return false;
- }
- MachineMemOperand *memOp1 = (*mload1->memoperands_begin());
- MachineMemOperand *memOp2 = (*mload2->memoperands_begin());
- const Value *mv1 = memOp1->getValue();
- const Value *mv2 = memOp2->getValue();
- if (!memOp1->isLoad() || !memOp2->isLoad()) {
- return false;
- }
- if (getBasePointerValue(mv1) == getBasePointerValue(mv2)) {
- if (isa<GetElementPtrInst>(mv1) && isa<GetElementPtrInst>(mv2)) {
- const GetElementPtrInst *gep1 = dyn_cast<GetElementPtrInst>(mv1);
- const GetElementPtrInst *gep2 = dyn_cast<GetElementPtrInst>(mv2);
- if (!gep1 || !gep2) {
- return false;
- }
- if (gep1->getNumOperands() != gep2->getNumOperands()) {
- return false;
- }
- for (unsigned i = 0, e = gep1->getNumOperands() - 1; i < e; ++i) {
- const Value *op1 = gep1->getOperand(i);
- const Value *op2 = gep2->getOperand(i);
- if (op1 != op2) {
- // If any value except the last one is different, return false.
- return false;
- }
- }
- unsigned size = gep1->getNumOperands()-1;
- if (!isa<ConstantInt>(gep1->getOperand(size))
- || !isa<ConstantInt>(gep2->getOperand(size))) {
- return false;
- }
- Offset1 = dyn_cast<ConstantInt>(gep1->getOperand(size))->getSExtValue();
- Offset2 = dyn_cast<ConstantInt>(gep2->getOperand(size))->getSExtValue();
- return true;
- } else if (isa<Argument>(mv1) && isa<Argument>(mv2)) {
- return false;
- } else if (isa<GlobalValue>(mv1) && isa<GlobalValue>(mv2)) {
- return false;
- }
- }
- return false;
-}
-
bool AMDILInstrInfo::shouldScheduleLoadsNear(SDNode *Load1, SDNode *Load2,
int64_t Offset1, int64_t Offset2,
unsigned NumLoads) const {
@@ -654,16 +493,6 @@ bool AMDILInstrInfo::isPredicated(const MachineInstr *MI) const {
// TODO: Implement this function
return false;
}
-#if 0
-bool AMDILInstrInfo::isUnpredicatedTerminator(const MachineInstr *MI) const {
- // TODO: Implement this function
-}
-
-bool AMDILInstrInfo::PredicateInstruction(MachineInstr *MI,
- const SmallVectorImpl<MachineOperand> &Pred) const {
- // TODO: Implement this function
-}
-#endif
bool
AMDILInstrInfo::SubsumesPredicate(const SmallVectorImpl<MachineOperand> &Pred1,
const SmallVectorImpl<MachineOperand> &Pred2)
@@ -689,21 +518,112 @@ AMDILInstrInfo::isSafeToMoveRegClassDefs(const TargetRegisterClass *RC) const {
return true;
}
-unsigned AMDILInstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const {
- // TODO: Implement this function
- return 0;
+bool AMDILInstrInfo::isLoadInst(MachineInstr *MI) const {
+ if (strstr(getName(MI->getOpcode()), "LOADCONST")) {
+ return false;
+ }
+ return strstr(getName(MI->getOpcode()), "LOAD");
}
-#if 0
-unsigned
-AMDILInstrInfo::GetFunctionSizeInBytes(const MachineFunction &MF) const {
- // TODO: Implement this function
- return 0;
+bool AMDILInstrInfo::isSWSExtLoadInst(MachineInstr *MI) const
+{
+switch (MI->getOpcode()) {
+ default:
+ break;
+ ExpandCaseToByteShortTypes(AMDIL::LOCALLOAD);
+ ExpandCaseToByteShortTypes(AMDIL::GLOBALLOAD);
+ ExpandCaseToByteShortTypes(AMDIL::REGIONLOAD);
+ ExpandCaseToByteShortTypes(AMDIL::PRIVATELOAD);
+ ExpandCaseToByteShortTypes(AMDIL::CPOOLLOAD);
+ ExpandCaseToByteShortTypes(AMDIL::CONSTANTLOAD);
+ return true;
+ };
+ return false;
}
-unsigned AMDILInstrInfo::getInlineAsmLength(const char *Str,
- const MCAsmInfo &MAI) const {
- // TODO: Implement this function
- return 0;
+bool AMDILInstrInfo::isExtLoadInst(MachineInstr *MI) const {
+ return strstr(getName(MI->getOpcode()), "EXTLOAD");
+}
+
+bool AMDILInstrInfo::isSExtLoadInst(MachineInstr *MI) const {
+ return strstr(getName(MI->getOpcode()), "SEXTLOAD");
+}
+
+bool AMDILInstrInfo::isAExtLoadInst(MachineInstr *MI) const {
+ return strstr(getName(MI->getOpcode()), "AEXTLOAD");
+}
+
+bool AMDILInstrInfo::isZExtLoadInst(MachineInstr *MI) const {
+ return strstr(getName(MI->getOpcode()), "ZEXTLOAD");
+}
+
+bool AMDILInstrInfo::isStoreInst(MachineInstr *MI) const {
+ return strstr(getName(MI->getOpcode()), "STORE");
+}
+
+bool AMDILInstrInfo::isTruncStoreInst(MachineInstr *MI) const {
+ return strstr(getName(MI->getOpcode()), "TRUNCSTORE");
+}
+
+bool AMDILInstrInfo::isAtomicInst(MachineInstr *MI) const {
+ return strstr(getName(MI->getOpcode()), "ATOM");
+}
+
+bool AMDILInstrInfo::isVolatileInst(MachineInstr *MI) const {
+ if (!MI->memoperands_empty()) {
+ for (MachineInstr::mmo_iterator mob = MI->memoperands_begin(),
+ moe = MI->memoperands_end(); mob != moe; ++mob) {
+ // If there is a volatile mem operand, this is a volatile instruction.
+ if ((*mob)->isVolatile()) {
+ return true;
+ }
+ }
+ }
+ return false;
+}
+bool AMDILInstrInfo::isGlobalInst(llvm::MachineInstr *MI) const
+{
+ return strstr(getName(MI->getOpcode()), "GLOBAL");
+}
+bool AMDILInstrInfo::isPrivateInst(llvm::MachineInstr *MI) const
+{
+ return strstr(getName(MI->getOpcode()), "PRIVATE");
+}
+bool AMDILInstrInfo::isConstantInst(llvm::MachineInstr *MI) const
+{
+ return strstr(getName(MI->getOpcode()), "CONSTANT")
+ || strstr(getName(MI->getOpcode()), "CPOOL");
+}
+bool AMDILInstrInfo::isRegionInst(llvm::MachineInstr *MI) const
+{
+ return strstr(getName(MI->getOpcode()), "REGION");
+}
+bool AMDILInstrInfo::isLocalInst(llvm::MachineInstr *MI) const
+{
+ return strstr(getName(MI->getOpcode()), "LOCAL");
+}
+bool AMDILInstrInfo::isImageInst(llvm::MachineInstr *MI) const
+{
+ return strstr(getName(MI->getOpcode()), "IMAGE");
+}
+bool AMDILInstrInfo::isAppendInst(llvm::MachineInstr *MI) const
+{
+ return strstr(getName(MI->getOpcode()), "APPEND");
+}
+bool AMDILInstrInfo::isRegionAtomic(llvm::MachineInstr *MI) const
+{
+ return strstr(getName(MI->getOpcode()), "ATOM_R");
+}
+bool AMDILInstrInfo::isLocalAtomic(llvm::MachineInstr *MI) const
+{
+ return strstr(getName(MI->getOpcode()), "ATOM_L");
+}
+bool AMDILInstrInfo::isGlobalAtomic(llvm::MachineInstr *MI) const
+{
+ return strstr(getName(MI->getOpcode()), "ATOM_G")
+ || isArenaAtomic(MI);
+}
+bool AMDILInstrInfo::isArenaAtomic(llvm::MachineInstr *MI) const
+{
+ return strstr(getName(MI->getOpcode()), "ATOM_A");
}
-#endif
diff --git a/src/gallium/drivers/radeon/AMDILInstrInfo.h b/src/gallium/drivers/radeon/AMDILInstrInfo.h
index 88dd4e9441a..4121246e6f9 100644
--- a/src/gallium/drivers/radeon/AMDILInstrInfo.h
+++ b/src/gallium/drivers/radeon/AMDILInstrInfo.h
@@ -40,12 +40,6 @@ public:
// always be able to get register info as well (through this method).
const AMDILRegisterInfo &getRegisterInfo() const;
- // Return true if the instruction is a register to register move and leave the
- // source and dest operands in the passed parameters.
- bool isMoveInstr(const MachineInstr &MI, unsigned int &SrcReg,
- unsigned int &DstReg, unsigned int &SrcSubIdx,
- unsigned int &DstSubIdx) const;
-
bool isCoalescableExtInstr(const MachineInstr &MI, unsigned &SrcReg,
unsigned &DstReg, unsigned &SubIdx) const;
@@ -62,29 +56,10 @@ public:
const MachineMemOperand *&MMO,
int &FrameIndex) const;
-
-#if 0
- void reMaterialize(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MI,
- unsigned DestReg, unsigned SubIdx,
- const MachineInstr *Orig,
- const TargetRegisterInfo *TRI) const;
- MachineInstr *duplicate(MachineInstr *Orig,
- MachineFunction &MF) const;
-#endif
MachineInstr *
convertToThreeAddress(MachineFunction::iterator &MFI,
MachineBasicBlock::iterator &MBBI,
LiveVariables *LV) const;
-#if 0
- MachineInstr *commuteInstruction(MachineInstr *MI,
- bool NewMI = false) const;
- bool findCommutedOpIndices(MachineInstr *MI, unsigned &SrcOpIdx1,
- unsigned &SrcOpIdx2) const;
- bool produceSameValue(const MachineInstr *MI0,
- const MachineInstr *MI1) const;
-
-#endif
bool AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB,
MachineBasicBlock *&FBB,
@@ -99,12 +74,6 @@ public:
const SmallVectorImpl<MachineOperand> &Cond,
DebugLoc DL) const;
- bool copyRegToReg(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator I,
- unsigned DestReg, unsigned SrcReg,
- const TargetRegisterClass *DestRC,
- const TargetRegisterClass *SrcRC,
- DebugLoc DL) const;
virtual void copyPhysReg(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI, DebugLoc DL,
unsigned DestReg, unsigned SrcReg,
@@ -141,8 +110,6 @@ public:
unsigned getOpcodeAfterMemoryUnfold(unsigned Opc,
bool UnfoldLoad, bool UnfoldStore,
unsigned *LoadRegIndex = 0) const;
- bool areLoadsFromSameBasePtr(SDNode *Load1, SDNode *Load2,
- int64_t &Offset1, int64_t &Offset2) const;
bool shouldScheduleLoadsNear(SDNode *Load1, SDNode *Load2,
int64_t Offset1, int64_t Offset2,
unsigned NumLoads) const;
@@ -151,24 +118,36 @@ public:
void insertNoop(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI) const;
bool isPredicated(const MachineInstr *MI) const;
-#if 0
- bool isUnpredicatedTerminator(const MachineInstr *MI) const;
- bool PredicateInstruction(MachineInstr *MI,
- const SmallVectorImpl<MachineOperand> &Pred) const;
-#endif
bool SubsumesPredicate(const SmallVectorImpl<MachineOperand> &Pred1,
const SmallVectorImpl<MachineOperand> &Pred2) const;
bool DefinesPredicate(MachineInstr *MI,
std::vector<MachineOperand> &Pred) const;
bool isPredicable(MachineInstr *MI) const;
bool isSafeToMoveRegClassDefs(const TargetRegisterClass *RC) const;
- unsigned GetInstSizeInBytes(const MachineInstr *MI) const;
-#if 0
- unsigned GetFunctionSizeInBytes(const MachineFunction &MF) const;
- unsigned getInlineAsmLength(const char *Str,
- const MCAsmInfo &MAI) const;
-#endif
- };
+
+ // Helper functions that check the opcode for status information
+ bool isLoadInst(llvm::MachineInstr *MI) const;
+ bool isExtLoadInst(llvm::MachineInstr *MI) const;
+ bool isSWSExtLoadInst(llvm::MachineInstr *MI) const;
+ bool isSExtLoadInst(llvm::MachineInstr *MI) const;
+ bool isZExtLoadInst(llvm::MachineInstr *MI) const;
+ bool isAExtLoadInst(llvm::MachineInstr *MI) const;
+ bool isStoreInst(llvm::MachineInstr *MI) const;
+ bool isTruncStoreInst(llvm::MachineInstr *MI) const;
+ bool isAtomicInst(llvm::MachineInstr *MI) const;
+ bool isVolatileInst(llvm::MachineInstr *MI) const;
+ bool isGlobalInst(llvm::MachineInstr *MI) const;
+ bool isPrivateInst(llvm::MachineInstr *MI) const;
+ bool isConstantInst(llvm::MachineInstr *MI) const;
+ bool isRegionInst(llvm::MachineInstr *MI) const;
+ bool isLocalInst(llvm::MachineInstr *MI) const;
+ bool isImageInst(llvm::MachineInstr *MI) const;
+ bool isAppendInst(llvm::MachineInstr *MI) const;
+ bool isRegionAtomic(llvm::MachineInstr *MI) const;
+ bool isLocalAtomic(llvm::MachineInstr *MI) const;
+ bool isGlobalAtomic(llvm::MachineInstr *MI) const;
+ bool isArenaAtomic(llvm::MachineInstr *MI) const;
+};
}
diff --git a/src/gallium/drivers/radeon/AMDILInstructions.td b/src/gallium/drivers/radeon/AMDILInstructions.td
index f824a67d7ad..db56e2121b3 100644
--- a/src/gallium/drivers/radeon/AMDILInstructions.td
+++ b/src/gallium/drivers/radeon/AMDILInstructions.td
@@ -1,4 +1,4 @@
-//===-- AMDILInstructions.td - TODO: Add brief description -------===//
+//===-- AMDILInstructions.td - AMDIL Instruction definitions --------------===//
//
// The LLVM Compiler Infrastructure
//
diff --git a/src/gallium/drivers/radeon/AMDILMCCodeEmitter.cpp b/src/gallium/drivers/radeon/AMDILMCCodeEmitter.cpp
deleted file mode 100644
index 9366f2e7bcb..00000000000
--- a/src/gallium/drivers/radeon/AMDILMCCodeEmitter.cpp
+++ /dev/null
@@ -1,158 +0,0 @@
-//===---- AMDILMCCodeEmitter.cpp - Convert AMDIL text to AMDIL binary ----===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//==-----------------------------------------------------------------------===//
-//
-//===---------------------------------------------------------------------===//
-
-#define DEBUG_TYPE "amdil-emitter"
-#include "AMDIL.h"
-#include "AMDILInstrInfo.h"
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/MC/MCCodeEmitter.h"
-#include "llvm/MC/MCExpr.h"
-#include "llvm/MC/MCInst.h"
-#include "llvm/Support/raw_ostream.h"
-
-using namespace llvm;
-#if 0
-namespace {
- class AMDILMCCodeEmitter : public MCCodeEmitter {
- AMDILMCCodeEmitter(const AMDILMCCodeEmitter &);// DO NOT IMPLEMENT
- void operator=(const AMDILMCCodeEmitter &); // DO NOT IMPLEMENT
- const TargetMachine &TM;
- const TargetInstrInfo &TII;
- MCContext &Ctx;
- bool Is64BitMode;
- public:
- AMDILMCCodeEmitter(TargetMachine &tm, MCContext &ctx, bool is64Bit);
- ~AMDILMCCodeEmitter();
- unsigned getNumFixupKinds() const;
- const MCFixupKindInfo& getFixupKindInfo(MCFixupKind Kind) const;
- static unsigned GetAMDILRegNum(const MCOperand &MO);
- void EmitByte(unsigned char C, unsigned &CurByte, raw_ostream &OS) const;
- void EmitConstant(uint64_t Val, unsigned Size, unsigned &CurByte,
- raw_ostream &OS) const;
- void EmitImmediate(const MCOperand &Disp, unsigned ImmSize,
- MCFixupKind FixupKind, unsigned &CurByte, raw_ostream &os,
- SmallVectorImpl<MCFixup> &Fixups, int ImmOffset = 0) const;
-
- void EncodeInstruction(const MCInst &MI, raw_ostream &OS,
- SmallVectorImpl<MCFixup> &Fixups) const;
-
- }; // class AMDILMCCodeEmitter
-}; // anonymous namespace
-
-namespace llvm {
- MCCodeEmitter *createAMDILMCCodeEmitter(const Target &,
- TargetMachine &TM, MCContext &Ctx)
- {
- return new AMDILMCCodeEmitter(TM, Ctx, false);
- }
-}
-
-AMDILMCCodeEmitter::AMDILMCCodeEmitter(TargetMachine &tm, MCContext &ctx
- , bool is64Bit)
-: TM(tm), TII(*TM.getInstrInfo()), Ctx(ctx)
-{
- Is64BitMode = is64Bit;
-}
-
-AMDILMCCodeEmitter::~AMDILMCCodeEmitter()
-{
-}
-
-unsigned
-AMDILMCCodeEmitter::getNumFixupKinds() const
-{
- return 0;
-}
-
-const MCFixupKindInfo &
-AMDILMCCodeEmitter::getFixupKindInfo(MCFixupKind Kind) const
-{
-// const static MCFixupKindInfo Infos[] = {};
- if (Kind < FirstTargetFixupKind) {
- return MCCodeEmitter::getFixupKindInfo(Kind);
- }
- assert(unsigned(Kind - FirstTargetFixupKind) < getNumFixupKinds() &&
- "Invalid kind!");
- return MCCodeEmitter::getFixupKindInfo(Kind);
- // return Infos[Kind - FirstTargetFixupKind];
-
-}
-
-void
-AMDILMCCodeEmitter::EmitByte(unsigned char C, unsigned &CurByte,
- raw_ostream &OS) const
-{
- OS << (char) C;
- ++CurByte;
-}
-void
-AMDILMCCodeEmitter::EmitConstant(uint64_t Val, unsigned Size, unsigned &CurByte,
- raw_ostream &OS) const
-{
- // Output the constant in little endian byte order
- for (unsigned i = 0; i != Size; ++i) {
- EmitByte(Val & 255, CurByte, OS);
- Val >>= 8;
- }
-}
-void
-AMDILMCCodeEmitter::EmitImmediate(const MCOperand &DispOp, unsigned ImmSize,
- MCFixupKind FixupKind, unsigned &CurByte, raw_ostream &OS,
- SmallVectorImpl<MCFixup> &Fixups, int ImmOffset) const
-{
- // If this is a simple integer displacement that doesn't require a relocation
- // emit it now.
- if (DispOp.isImm()) {
- EmitConstant(DispOp.getImm() + ImmOffset, ImmSize, CurByte, OS);
- }
-
- // If we have an immoffset, add it to the expression
- const MCExpr *Expr = DispOp.getExpr();
-
- if (ImmOffset) {
- Expr = MCBinaryExpr::CreateAdd(Expr,
- MCConstantExpr::Create(ImmOffset, Ctx), Ctx);
- }
- // Emit a symbolic constant as a fixup and 4 zeros.
- Fixups.push_back(MCFixup::Create(CurByte, Expr, FixupKind));
- // TODO: Why the 4 zeros?
- EmitConstant(0, ImmSize, CurByte, OS);
-}
-
-void
-AMDILMCCodeEmitter::EncodeInstruction(const MCInst &MI, raw_ostream &OS,
- SmallVectorImpl<MCFixup> &Fixups) const
-{
-#if 0
- unsigned Opcode = MI.getOpcode();
- const TargetInstrDesc &Desc = TII.get(Opcode);
- unsigned TSFlags = Desc.TSFlags;
-
- // Keep track of the current byte being emitted.
- unsigned CurByte = 0;
-
- unsigned NumOps = Desc.getNumOperands();
- unsigned CurOp = 0;
-
- unsigned char BaseOpcode = 0;
-#ifndef NDEBUG
- // FIXME: Verify.
- if (// !Desc.isVariadic() &&
- CurOp != NumOps) {
- errs() << "Cannot encode all operands of: ";
- MI.dump();
- errs() << '\n';
- abort();
- }
-#endif
-#endif
-}
-#endif
diff --git a/src/gallium/drivers/radeon/AMDILMachinePeephole.cpp b/src/gallium/drivers/radeon/AMDILMachinePeephole.cpp
index b8e536361f0..5cb988785e2 100644
--- a/src/gallium/drivers/radeon/AMDILMachinePeephole.cpp
+++ b/src/gallium/drivers/radeon/AMDILMachinePeephole.cpp
@@ -8,17 +8,11 @@
//==-----------------------------------------------------------------------===//
-#define DEBUG_TYPE "machine_peephole"
-#if !defined(NDEBUG)
-#define DEBUGME (DebugFlag && isCurrentDebugType(DEBUG_TYPE))
-#else
-#define DEBUGME (false)
-#endif
-
#include "AMDIL.h"
+#include "AMDILInstrInfo.h"
#include "AMDILSubtarget.h"
-#include "AMDILUtilityFunctions.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/Support/Debug.h"
#include "llvm/Target/TargetMachine.h"
@@ -56,7 +50,7 @@ namespace llvm
AMDILMachinePeephole::AMDILMachinePeephole(TargetMachine &tm AMDIL_OPT_LEVEL_DECL)
: MachineFunctionPass(ID), TM(tm)
{
- mDebug = DEBUGME;
+ mDebug = false;
}
bool
@@ -64,6 +58,8 @@ AMDILMachinePeephole::runOnMachineFunction(MachineFunction &MF)
{
bool Changed = false;
const AMDILSubtarget *STM = &TM.getSubtarget<AMDILSubtarget>();
+ const AMDILInstrInfo * AMDILII =
+ static_cast<const AMDILInstrInfo *>(TM.getInstrInfo());
for (MachineFunction::iterator MBB = MF.begin(), MBE = MF.end();
MBB != MBE; ++MBB) {
MachineBasicBlock *mb = MBB;
@@ -74,7 +70,7 @@ AMDILMachinePeephole::runOnMachineFunction(MachineFunction &MF)
name = TM.getInstrInfo()->getName(mi->getOpcode());
switch (mi->getOpcode()) {
default:
- if (isAtomicInst(TM.getInstrInfo(), mi)) {
+ if (AMDILII->isAtomicInst(mi)) {
// If we don't support the hardware accellerated address spaces,
// then the atomic needs to be transformed to the global atomic.
if (strstr(name, "_L_")
@@ -94,7 +90,8 @@ AMDILMachinePeephole::runOnMachineFunction(MachineFunction &MF)
TM.getInstrInfo()->get(
(mi->getOpcode() - AMDIL::ATOM_R_ADD) + AMDIL::ATOM_G_ADD));
}
- } else if ((isLoadInst(TM.getInstrInfo(), mi) || isStoreInst(TM.getInstrInfo(), mi)) && isVolatileInst(TM.getInstrInfo(), mi)) {
+ } else if ((AMDILII->isLoadInst(mi) || AMDILII->isStoreInst(mi))
+ && AMDILII->isVolatileInst(mi)) {
insertFence(MIB);
}
continue;
diff --git a/src/gallium/drivers/radeon/AMDILMultiClass.td b/src/gallium/drivers/radeon/AMDILMultiClass.td
index 92691db52fd..d6828178ba7 100644
--- a/src/gallium/drivers/radeon/AMDILMultiClass.td
+++ b/src/gallium/drivers/radeon/AMDILMultiClass.td
@@ -1,4 +1,4 @@
-//===-- AMDILMultiClass.td - TODO: Add brief description -------===//
+//===-- AMDILMultiClass.td - AMDIL Multiclass defs ---*- tablegen -*-------===//
//
// The LLVM Compiler Infrastructure
//
diff --git a/src/gallium/drivers/radeon/AMDILNIDevice.cpp b/src/gallium/drivers/radeon/AMDILNIDevice.cpp
index 8fda1c18ae5..d4112cda0b5 100644
--- a/src/gallium/drivers/radeon/AMDILNIDevice.cpp
+++ b/src/gallium/drivers/radeon/AMDILNIDevice.cpp
@@ -1,4 +1,4 @@
-//===-- AMDILNIDevice.cpp - TODO: Add brief description -------===//
+//===-- AMDILNIDevice.cpp - Device Info for Northern Islands devices ------===//
//
// The LLVM Compiler Infrastructure
//
diff --git a/src/gallium/drivers/radeon/AMDILPeepholeOptimizer.cpp b/src/gallium/drivers/radeon/AMDILPeepholeOptimizer.cpp
index 5fe9f53c8c8..b62c7ab048b 100644
--- a/src/gallium/drivers/radeon/AMDILPeepholeOptimizer.cpp
+++ b/src/gallium/drivers/radeon/AMDILPeepholeOptimizer.cpp
@@ -1,4 +1,4 @@
-//===-- AMDILPeepholeOptimizer.cpp - TODO: Add brief description -------===//
+//===-- AMDILPeepholeOptimizer.cpp - AMDIL Peephole optimizations ---------===//
//
// The LLVM Compiler Infrastructure
//
@@ -7,20 +7,14 @@
//
//==-----------------------------------------------------------------------===//
-#define DEBUG_TYPE "PeepholeOpt"
-#ifdef DEBUG
-#define DEBUGME (DebugFlag && isCurrentDebugType(DEBUG_TYPE))
-#else
-#define DEBUGME 0
-#endif
-
#include "AMDILAlgorithms.tpp"
#include "AMDILDevices.h"
-#include "AMDILUtilityFunctions.h"
+#include "AMDILInstrInfo.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/Twine.h"
+#include "llvm/Constants.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFunctionAnalysis.h"
#include "llvm/Function.h"
@@ -41,6 +35,9 @@ using namespace llvm;
// The Peephole optimization pass is used to do simple last minute optimizations
// that are required for correct code or to remove redundant functions
namespace {
+
+class OpaqueType;
+
class LLVM_LIBRARY_VISIBILITY AMDILPeepholeOpt : public FunctionPass {
public:
TargetMachine &TM;
@@ -114,6 +111,19 @@ private:
// samplers at compile time.
bool propagateSamplerInst(CallInst *CI);
+ // Helper functions
+
+ // Group of functions that recursively calculate the size of a structure based
+ // on it's sub-types.
+ size_t getTypeSize(Type * const T, bool dereferencePtr = false);
+ size_t getTypeSize(StructType * const ST, bool dereferencePtr = false);
+ size_t getTypeSize(IntegerType * const IT, bool dereferencePtr = false);
+ size_t getTypeSize(FunctionType * const FT,bool dereferencePtr = false);
+ size_t getTypeSize(ArrayType * const AT, bool dereferencePtr = false);
+ size_t getTypeSize(VectorType * const VT, bool dereferencePtr = false);
+ size_t getTypeSize(PointerType * const PT, bool dereferencePtr = false);
+ size_t getTypeSize(OpaqueType * const OT, bool dereferencePtr = false);
+
LLVMContext *mCTX;
Function *mF;
const AMDILSubtarget *mSTM;
@@ -134,7 +144,7 @@ namespace llvm {
AMDILPeepholeOpt::AMDILPeepholeOpt(TargetMachine &tm AMDIL_OPT_LEVEL_DECL)
: FunctionPass(ID), TM(tm)
{
- mDebug = DEBUGME;
+ mDebug = false;
optLevel = TM.getOptLevel();
}
@@ -1136,3 +1146,106 @@ AMDILPeepholeOpt::getAnalysisUsage(AnalysisUsage &AU) const
FunctionPass::getAnalysisUsage(AU);
AU.setPreservesAll();
}
+
+size_t AMDILPeepholeOpt::getTypeSize(Type * const T, bool dereferencePtr) {
+ size_t size = 0;
+ if (!T) {
+ return size;
+ }
+ switch (T->getTypeID()) {
+ case Type::X86_FP80TyID:
+ case Type::FP128TyID:
+ case Type::PPC_FP128TyID:
+ case Type::LabelTyID:
+ assert(0 && "These types are not supported by this backend");
+ default:
+ case Type::FloatTyID:
+ case Type::DoubleTyID:
+ size = T->getPrimitiveSizeInBits() >> 3;
+ break;
+ case Type::PointerTyID:
+ size = getTypeSize(dyn_cast<PointerType>(T), dereferencePtr);
+ break;
+ case Type::IntegerTyID:
+ size = getTypeSize(dyn_cast<IntegerType>(T), dereferencePtr);
+ break;
+ case Type::StructTyID:
+ size = getTypeSize(dyn_cast<StructType>(T), dereferencePtr);
+ break;
+ case Type::ArrayTyID:
+ size = getTypeSize(dyn_cast<ArrayType>(T), dereferencePtr);
+ break;
+ case Type::FunctionTyID:
+ size = getTypeSize(dyn_cast<FunctionType>(T), dereferencePtr);
+ break;
+ case Type::VectorTyID:
+ size = getTypeSize(dyn_cast<VectorType>(T), dereferencePtr);
+ break;
+ };
+ return size;
+}
+
+size_t AMDILPeepholeOpt::getTypeSize(StructType * const ST,
+ bool dereferencePtr) {
+ size_t size = 0;
+ if (!ST) {
+ return size;
+ }
+ Type *curType;
+ StructType::element_iterator eib;
+ StructType::element_iterator eie;
+ for (eib = ST->element_begin(), eie = ST->element_end(); eib != eie; ++eib) {
+ curType = *eib;
+ size += getTypeSize(curType, dereferencePtr);
+ }
+ return size;
+}
+
+size_t AMDILPeepholeOpt::getTypeSize(IntegerType * const IT,
+ bool dereferencePtr) {
+ return IT ? (IT->getBitWidth() >> 3) : 0;
+}
+
+size_t AMDILPeepholeOpt::getTypeSize(FunctionType * const FT,
+ bool dereferencePtr) {
+ assert(0 && "Should not be able to calculate the size of an function type");
+ return 0;
+}
+
+size_t AMDILPeepholeOpt::getTypeSize(ArrayType * const AT,
+ bool dereferencePtr) {
+ return (size_t)(AT ? (getTypeSize(AT->getElementType(),
+ dereferencePtr) * AT->getNumElements())
+ : 0);
+}
+
+size_t AMDILPeepholeOpt::getTypeSize(VectorType * const VT,
+ bool dereferencePtr) {
+ return VT ? (VT->getBitWidth() >> 3) : 0;
+}
+
+size_t AMDILPeepholeOpt::getTypeSize(PointerType * const PT,
+ bool dereferencePtr) {
+ if (!PT) {
+ return 0;
+ }
+ Type *CT = PT->getElementType();
+ if (CT->getTypeID() == Type::StructTyID &&
+ PT->getAddressSpace() == AMDILAS::PRIVATE_ADDRESS) {
+ return getTypeSize(dyn_cast<StructType>(CT));
+ } else if (dereferencePtr) {
+ size_t size = 0;
+ for (size_t x = 0, y = PT->getNumContainedTypes(); x < y; ++x) {
+ size += getTypeSize(PT->getContainedType(x), dereferencePtr);
+ }
+ return size;
+ } else {
+ return 4;
+ }
+}
+
+size_t AMDILPeepholeOpt::getTypeSize(OpaqueType * const OT,
+ bool dereferencePtr) {
+ //assert(0 && "Should not be able to calculate the size of an opaque type");
+ return 4;
+}
diff --git a/src/gallium/drivers/radeon/AMDILRegisterInfo.cpp b/src/gallium/drivers/radeon/AMDILRegisterInfo.cpp
index 5588233378c..d7c1dc74b8b 100644
--- a/src/gallium/drivers/radeon/AMDILRegisterInfo.cpp
+++ b/src/gallium/drivers/radeon/AMDILRegisterInfo.cpp
@@ -20,7 +20,8 @@
#include "AMDILRegisterInfo.h"
#include "AMDIL.h"
-#include "AMDILUtilityFunctions.h"
+#include "AMDILInstrInfo.h"
+#include "AMDILTargetMachine.h"
#include "llvm/ADT/BitVector.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
@@ -109,7 +110,9 @@ AMDILRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
if (!MI.getOperand(x).isFI()) {
continue;
}
- bool def = isStoreInst(TM.getInstrInfo(), &MI);
+ const AMDILInstrInfo * AMDILII =
+ static_cast<const AMDILInstrInfo *>(TM.getInstrInfo());
+ bool def = AMDILII->isStoreInst(&MI);
int FrameIndex = MI.getOperand(x).getIndex();
int64_t Offset = MFI->getObjectOffset(FrameIndex);
//int64_t Size = MF.getFrameInfo()->getObjectSize(FrameIndex);
diff --git a/src/gallium/drivers/radeon/AMDILSIDevice.cpp b/src/gallium/drivers/radeon/AMDILSIDevice.cpp
index ce560984ef9..ae402a5d1f7 100644
--- a/src/gallium/drivers/radeon/AMDILSIDevice.cpp
+++ b/src/gallium/drivers/radeon/AMDILSIDevice.cpp
@@ -1,49 +1,49 @@
-//===-- AMDILSIDevice.cpp - TODO: Add brief description -------===//
+//===-- AMDILSIDevice.cpp - Device Info for Southern Islands GPUs ---------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
-//==-----------------------------------------------------------------------===//
-#include "AMDILSIDevice.h"
-#include "AMDILEvergreenDevice.h"
-#include "AMDILNIDevice.h"
-#include "AMDILSubtarget.h"
+//==-----------------------------------------------------------------------===//
+#include "AMDILSIDevice.h"
+#include "AMDILEvergreenDevice.h"
+#include "AMDILNIDevice.h"
+#include "AMDILSubtarget.h"
-using namespace llvm;
-
-AMDILSIDevice::AMDILSIDevice(AMDILSubtarget *ST)
- : AMDILEvergreenDevice(ST)
-{
-}
-AMDILSIDevice::~AMDILSIDevice()
-{
-}
-
-size_t
-AMDILSIDevice::getMaxLDSSize() const
-{
- if (usesHardware(AMDILDeviceInfo::LocalMem)) {
- return MAX_LDS_SIZE_900;
- } else {
- return 0;
- }
-}
-
-uint32_t
-AMDILSIDevice::getGeneration() const
-{
- return AMDILDeviceInfo::HD7XXX;
-}
-
-std::string
-AMDILSIDevice::getDataLayout() const
-{
- return std::string("e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16"
- "-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f80:32:32"
- "-v16:16:16-v24:32:32-v32:32:32-v48:64:64-v64:64:64"
- "-v96:128:128-v128:128:128-v192:256:256-v256:256:256"
- "-v512:512:512-v1024:1024:1024-v2048:2048:2048"
- "-n8:16:32:64");
-}
+using namespace llvm;
+
+AMDILSIDevice::AMDILSIDevice(AMDILSubtarget *ST)
+ : AMDILEvergreenDevice(ST)
+{
+}
+AMDILSIDevice::~AMDILSIDevice()
+{
+}
+
+size_t
+AMDILSIDevice::getMaxLDSSize() const
+{
+ if (usesHardware(AMDILDeviceInfo::LocalMem)) {
+ return MAX_LDS_SIZE_900;
+ } else {
+ return 0;
+ }
+}
+
+uint32_t
+AMDILSIDevice::getGeneration() const
+{
+ return AMDILDeviceInfo::HD7XXX;
+}
+
+std::string
+AMDILSIDevice::getDataLayout() const
+{
+ return std::string("e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16"
+ "-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f80:32:32"
+ "-v16:16:16-v24:32:32-v32:32:32-v48:64:64-v64:64:64"
+ "-v96:128:128-v128:128:128-v192:256:256-v256:256:256"
+ "-v512:512:512-v1024:1024:1024-v2048:2048:2048"
+ "-n8:16:32:64");
+}
diff --git a/src/gallium/drivers/radeon/AMDILSIDevice.h b/src/gallium/drivers/radeon/AMDILSIDevice.h
index 69f35a0588d..b272af7cfcf 100644
--- a/src/gallium/drivers/radeon/AMDILSIDevice.h
+++ b/src/gallium/drivers/radeon/AMDILSIDevice.h
@@ -1,45 +1,45 @@
-//===------- AMDILSIDevice.h - Define SI Device for AMDIL -*- C++ -*------===//
+//===------- AMDILSIDevice.h - Define SI Device for AMDIL -*- C++ -*------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
-//==-----------------------------------------------------------------------===//
-//
-// Interface for the subtarget data classes.
-//
-//===---------------------------------------------------------------------===//
-// This file will define the interface that each generation needs to
-// implement in order to correctly answer queries on the capabilities of the
+//==-----------------------------------------------------------------------===//
+//
+// Interface for the subtarget data classes.
+//
+//===---------------------------------------------------------------------===//
+// This file will define the interface that each generation needs to
+// implement in order to correctly answer queries on the capabilities of the
// specific hardware.
-//===---------------------------------------------------------------------===//
-#ifndef _AMDILSIDEVICE_H_
-#define _AMDILSIDEVICE_H_
-#include "AMDILEvergreenDevice.h"
-#include "AMDILSubtarget.h"
+//===---------------------------------------------------------------------===//
+#ifndef _AMDILSIDEVICE_H_
+#define _AMDILSIDEVICE_H_
+#include "AMDILEvergreenDevice.h"
+#include "AMDILSubtarget.h"
+
+namespace llvm {
+ class AMDILSubtarget;
+//===---------------------------------------------------------------------===//
+// SI generation of devices and their respective sub classes
+//===---------------------------------------------------------------------===//
+
+// The AMDILSIDevice is the base class for all Northern Island series of
+// cards. It is very similiar to the AMDILEvergreenDevice, with the major
+// exception being differences in wavefront size and hardware capabilities. The
+// SI devices are all 64 wide wavefronts and also add support for signed 24 bit
+// integer operations
+
+ class AMDILSIDevice : public AMDILEvergreenDevice {
+ public:
+ AMDILSIDevice(AMDILSubtarget*);
+ virtual ~AMDILSIDevice();
+ virtual size_t getMaxLDSSize() const;
+ virtual uint32_t getGeneration() const;
+ virtual std::string getDataLayout() const;
+ protected:
+ }; // AMDILSIDevice
-namespace llvm {
- class AMDILSubtarget;
-//===---------------------------------------------------------------------===//
-// SI generation of devices and their respective sub classes
-//===---------------------------------------------------------------------===//
-
-// The AMDILSIDevice is the base class for all Northern Island series of
-// cards. It is very similiar to the AMDILEvergreenDevice, with the major
-// exception being differences in wavefront size and hardware capabilities. The
-// SI devices are all 64 wide wavefronts and also add support for signed 24 bit
-// integer operations
-
- class AMDILSIDevice : public AMDILEvergreenDevice {
- public:
- AMDILSIDevice(AMDILSubtarget*);
- virtual ~AMDILSIDevice();
- virtual size_t getMaxLDSSize() const;
- virtual uint32_t getGeneration() const;
- virtual std::string getDataLayout() const;
- protected:
- }; // AMDILSIDevice
-
-} // namespace llvm
-#endif // _AMDILSIDEVICE_H_
+} // namespace llvm
+#endif // _AMDILSIDEVICE_H_
diff --git a/src/gallium/drivers/radeon/AMDILSubtarget.cpp b/src/gallium/drivers/radeon/AMDILSubtarget.cpp
index 11b6bbe0c01..249cb03f4a3 100644
--- a/src/gallium/drivers/radeon/AMDILSubtarget.cpp
+++ b/src/gallium/drivers/radeon/AMDILSubtarget.cpp
@@ -27,7 +27,8 @@ using namespace llvm;
#define GET_SUBTARGETINFO_TARGET_DESC
#include "AMDILGenSubtargetInfo.inc"
-AMDILSubtarget::AMDILSubtarget(llvm::StringRef TT, llvm::StringRef CPU, llvm::StringRef FS) : AMDILGenSubtargetInfo( TT, CPU, FS )
+AMDILSubtarget::AMDILSubtarget(llvm::StringRef TT, llvm::StringRef CPU, llvm::StringRef FS) : AMDILGenSubtargetInfo( TT, CPU, FS ),
+ mDumpCode(false)
{
memset(CapsOverride, 0, sizeof(*CapsOverride)
* AMDILDeviceInfo::MaxNumberCapabilities);
@@ -93,7 +94,7 @@ AMDILSubtarget::AMDILSubtarget(llvm::StringRef TT, llvm::StringRef CPU, llvm::St
}
#endif
mDevName = GPU;
- mDevice = getDeviceFromName(mDevName, this, mIs64bit);
+ mDevice = AMDILDeviceInfo::getDeviceFromName(mDevName, this, mIs64bit);
}
AMDILSubtarget::~AMDILSubtarget()
{
diff --git a/src/gallium/drivers/radeon/AMDILSubtarget.h b/src/gallium/drivers/radeon/AMDILSubtarget.h
index a4b0e34ada7..38fcb859ac6 100644
--- a/src/gallium/drivers/radeon/AMDILSubtarget.h
+++ b/src/gallium/drivers/radeon/AMDILSubtarget.h
@@ -42,6 +42,7 @@ namespace llvm {
uint32_t mVersion;
bool mIs64bit;
bool mIs32on64bit;
+ bool mDumpCode;
public:
AMDILSubtarget(llvm::StringRef TT, llvm::StringRef CPU, llvm::StringRef FS);
virtual ~AMDILSubtarget();
@@ -67,6 +68,7 @@ namespace llvm {
ParseSubtargetFeatures(
llvm::StringRef CPU,
llvm::StringRef FS);
+ bool dumpCode() const { return mDumpCode; }
};
diff --git a/src/gallium/drivers/radeon/AMDILTargetMachine.cpp b/src/gallium/drivers/radeon/AMDILTargetMachine.cpp
index 77fac1d97bd..0879d43ad72 100644
--- a/src/gallium/drivers/radeon/AMDILTargetMachine.cpp
+++ b/src/gallium/drivers/radeon/AMDILTargetMachine.cpp
@@ -150,8 +150,8 @@ bool AMDILPassConfig::addPreISel()
bool AMDILPassConfig::addInstSelector()
{
- PM.add(createAMDILPeepholeOpt(*TM));
- PM.add(createAMDILISelDag(getAMDILTargetMachine()));
+ PM->add(createAMDILPeepholeOpt(*TM));
+ PM->add(createAMDILISelDag(getAMDILTargetMachine()));
return false;
}
@@ -162,7 +162,7 @@ bool AMDILPassConfig::addPreRegAlloc()
llvm::RegisterScheduler::setDefault(&llvm::createSourceListDAGScheduler);
}
- PM.add(createAMDILMachinePeephole(*TM));
+ PM->add(createAMDILMachinePeephole(*TM));
return false;
}
@@ -175,8 +175,8 @@ bool AMDILPassConfig::addPostRegAlloc() {
/// true if -print-machineinstrs should print out the code after the passes.
bool AMDILPassConfig::addPreEmitPass()
{
- PM.add(createAMDILCFGPreparationPass(*TM));
- PM.add(createAMDILCFGStructurizerPass(*TM));
+ PM->add(createAMDILCFGPreparationPass(*TM));
+ PM->add(createAMDILCFGStructurizerPass(*TM));
return true;
}
diff --git a/src/gallium/drivers/radeon/AMDILTokenDesc.td b/src/gallium/drivers/radeon/AMDILTokenDesc.td
index b81f593506f..2dafb2cd559 100644
--- a/src/gallium/drivers/radeon/AMDILTokenDesc.td
+++ b/src/gallium/drivers/radeon/AMDILTokenDesc.td
@@ -1,4 +1,4 @@
-//===-- AMDILTokenDesc.td - TODO: Add brief description -------===//
+//===-- AMDILTokenDesc.td - AMDIL Token Definitions --*- tablegen -*-----===//
//
// The LLVM Compiler Infrastructure
//
diff --git a/src/gallium/drivers/radeon/AMDILUtilityFunctions.cpp b/src/gallium/drivers/radeon/AMDILUtilityFunctions.cpp
deleted file mode 100644
index f2ef4eb7771..00000000000
--- a/src/gallium/drivers/radeon/AMDILUtilityFunctions.cpp
+++ /dev/null
@@ -1,683 +0,0 @@
-//===-- AMDILUtilityFunctions.cpp - AMDIL Utility Functions ---------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//==-----------------------------------------------------------------------===//
-//
-// This file provides the implementations of functions that are declared in the
-// AMDILUtilityFUnctions.h file.
-//
-//===----------------------------------------------------------------------===//
-#include "AMDILUtilityFunctions.h"
-#include "AMDILISelLowering.h"
-#include "llvm/ADT/ValueMap.h"
-#include "llvm/CodeGen/MachineInstr.h"
-#include "llvm/Constants.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/Instruction.h"
-#include "llvm/Instructions.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/FormattedStream.h"
-#include "llvm/Type.h"
-
-#include <cstdio>
-#include <list>
-#include <queue>
-
-#define GET_OPCODE_NAME(TII, MI) \
- TII->getName(MI->getOpcode())
-
-
-using namespace llvm;
-int64_t GET_SCALAR_SIZE(llvm::Type *A) {
- return A->getScalarSizeInBits();
-}
-
-const TargetRegisterClass * getRegClassFromID(unsigned int ID) {
- switch (ID) {
- default:
- assert(0 && "Passed in ID does not match any register classes.");
- return NULL;
- case AMDIL::GPRI8RegClassID:
- return &AMDIL::GPRI8RegClass;
- case AMDIL::GPRI16RegClassID:
- return &AMDIL::GPRI16RegClass;
- case AMDIL::GPRI32RegClassID:
- return &AMDIL::GPRI32RegClass;
- case AMDIL::GPRF32RegClassID:
- return &AMDIL::GPRF32RegClass;
- case AMDIL::GPRI64RegClassID:
- return &AMDIL::GPRI64RegClass;
- case AMDIL::GPRF64RegClassID:
- return &AMDIL::GPRF64RegClass;
- case AMDIL::GPRV4F32RegClassID:
- return &AMDIL::GPRV4F32RegClass;
- case AMDIL::GPRV4I8RegClassID:
- return &AMDIL::GPRV4I8RegClass;
- case AMDIL::GPRV4I16RegClassID:
- return &AMDIL::GPRV4I16RegClass;
- case AMDIL::GPRV4I32RegClassID:
- return &AMDIL::GPRV4I32RegClass;
- case AMDIL::GPRV2F32RegClassID:
- return &AMDIL::GPRV2F32RegClass;
- case AMDIL::GPRV2I8RegClassID:
- return &AMDIL::GPRV2I8RegClass;
- case AMDIL::GPRV2I16RegClassID:
- return &AMDIL::GPRV2I16RegClass;
- case AMDIL::GPRV2I32RegClassID:
- return &AMDIL::GPRV2I32RegClass;
- case AMDIL::GPRV2F64RegClassID:
- return &AMDIL::GPRV2F64RegClass;
- case AMDIL::GPRV2I64RegClassID:
- return &AMDIL::GPRV2I64RegClass;
- };
-}
-
-unsigned int getMoveInstFromID(unsigned int ID) {
- switch (ID) {
- default:
- assert(0 && "Passed in ID does not match any move instructions.");
- case AMDIL::GPRI8RegClassID:
- return AMDIL::MOVE_i8;
- case AMDIL::GPRI16RegClassID:
- return AMDIL::MOVE_i16;
- case AMDIL::GPRI32RegClassID:
- return AMDIL::MOVE_i32;
- case AMDIL::GPRF32RegClassID:
- return AMDIL::MOVE_f32;
- case AMDIL::GPRI64RegClassID:
- return AMDIL::MOVE_i64;
- case AMDIL::GPRF64RegClassID:
- return AMDIL::MOVE_f64;
- case AMDIL::GPRV4F32RegClassID:
- return AMDIL::MOVE_v4f32;
- case AMDIL::GPRV4I8RegClassID:
- return AMDIL::MOVE_v4i8;
- case AMDIL::GPRV4I16RegClassID:
- return AMDIL::MOVE_v4i16;
- case AMDIL::GPRV4I32RegClassID:
- return AMDIL::MOVE_v4i32;
- case AMDIL::GPRV2F32RegClassID:
- return AMDIL::MOVE_v2f32;
- case AMDIL::GPRV2I8RegClassID:
- return AMDIL::MOVE_v2i8;
- case AMDIL::GPRV2I16RegClassID:
- return AMDIL::MOVE_v2i16;
- case AMDIL::GPRV2I32RegClassID:
- return AMDIL::MOVE_v2i32;
- case AMDIL::GPRV2F64RegClassID:
- return AMDIL::MOVE_v2f64;
- case AMDIL::GPRV2I64RegClassID:
- return AMDIL::MOVE_v2i64;
- };
- return -1;
-}
-
-unsigned int getPHIMoveInstFromID(unsigned int ID) {
- switch (ID) {
- default:
- assert(0 && "Passed in ID does not match any move instructions.");
- case AMDIL::GPRI8RegClassID:
- return AMDIL::PHIMOVE_i8;
- case AMDIL::GPRI16RegClassID:
- return AMDIL::PHIMOVE_i16;
- case AMDIL::GPRI32RegClassID:
- return AMDIL::PHIMOVE_i32;
- case AMDIL::GPRF32RegClassID:
- return AMDIL::PHIMOVE_f32;
- case AMDIL::GPRI64RegClassID:
- return AMDIL::PHIMOVE_i64;
- case AMDIL::GPRF64RegClassID:
- return AMDIL::PHIMOVE_f64;
- case AMDIL::GPRV4F32RegClassID:
- return AMDIL::PHIMOVE_v4f32;
- case AMDIL::GPRV4I8RegClassID:
- return AMDIL::PHIMOVE_v4i8;
- case AMDIL::GPRV4I16RegClassID:
- return AMDIL::PHIMOVE_v4i16;
- case AMDIL::GPRV4I32RegClassID:
- return AMDIL::PHIMOVE_v4i32;
- case AMDIL::GPRV2F32RegClassID:
- return AMDIL::PHIMOVE_v2f32;
- case AMDIL::GPRV2I8RegClassID:
- return AMDIL::PHIMOVE_v2i8;
- case AMDIL::GPRV2I16RegClassID:
- return AMDIL::PHIMOVE_v2i16;
- case AMDIL::GPRV2I32RegClassID:
- return AMDIL::PHIMOVE_v2i32;
- case AMDIL::GPRV2F64RegClassID:
- return AMDIL::PHIMOVE_v2f64;
- case AMDIL::GPRV2I64RegClassID:
- return AMDIL::PHIMOVE_v2i64;
- };
- return -1;
-}
-
-const TargetRegisterClass* getRegClassFromType(unsigned int type) {
- switch (type) {
- default:
- assert(0 && "Passed in type does not match any register classes.");
- case MVT::i8:
- return &AMDIL::GPRI8RegClass;
- case MVT::i16:
- return &AMDIL::GPRI16RegClass;
- case MVT::i32:
- return &AMDIL::GPRI32RegClass;
- case MVT::f32:
- return &AMDIL::GPRF32RegClass;
- case MVT::i64:
- return &AMDIL::GPRI64RegClass;
- case MVT::f64:
- return &AMDIL::GPRF64RegClass;
- case MVT::v4f32:
- return &AMDIL::GPRV4F32RegClass;
- case MVT::v4i8:
- return &AMDIL::GPRV4I8RegClass;
- case MVT::v4i16:
- return &AMDIL::GPRV4I16RegClass;
- case MVT::v4i32:
- return &AMDIL::GPRV4I32RegClass;
- case MVT::v2f32:
- return &AMDIL::GPRV2F32RegClass;
- case MVT::v2i8:
- return &AMDIL::GPRV2I8RegClass;
- case MVT::v2i16:
- return &AMDIL::GPRV2I16RegClass;
- case MVT::v2i32:
- return &AMDIL::GPRV2I32RegClass;
- case MVT::v2f64:
- return &AMDIL::GPRV2F64RegClass;
- case MVT::v2i64:
- return &AMDIL::GPRV2I64RegClass;
- }
-}
-
-void printSDNode(const SDNode *N) {
- printf("Opcode: %d isTargetOpcode: %d isMachineOpcode: %d\n",
- N->getOpcode(), N->isTargetOpcode(), N->isMachineOpcode());
- printf("Empty: %d OneUse: %d Size: %d NodeID: %d\n",
- N->use_empty(), N->hasOneUse(), (int)N->use_size(), N->getNodeId());
- for (unsigned int i = 0; i < N->getNumOperands(); ++i) {
- printf("OperandNum: %d ValueCount: %d ValueType: %d\n",
- i, N->getNumValues(), N->getValueType(0) .getSimpleVT().SimpleTy);
- printSDValue(N->getOperand(i), 0);
- }
-}
-
-void printSDValue(const SDValue &Op, int level) {
- printf("\nOp: %p OpCode: %d NumOperands: %d ", (void*)&Op, Op.getOpcode(),
- Op.getNumOperands());
- printf("IsTarget: %d IsMachine: %d ", Op.isTargetOpcode(),
- Op.isMachineOpcode());
- if (Op.isMachineOpcode()) {
- printf("MachineOpcode: %d\n", Op.getMachineOpcode());
- } else {
- printf("\n");
- }
- EVT vt = Op.getValueType();
- printf("ValueType: %d \n", vt.getSimpleVT().SimpleTy);
- printf("UseEmpty: %d OneUse: %d\n", Op.use_empty(), Op.hasOneUse());
- if (level) {
- printf("Children for %d:\n", level);
- for (unsigned int i = 0; i < Op.getNumOperands(); ++i) {
- printf("Child %d->%d:", level, i);
- printSDValue(Op.getOperand(i), level - 1);
- }
- }
-}
-
-bool isPHIMove(unsigned int opcode) {
- switch (opcode) {
- default:
- return false;
- ExpandCaseToAllTypes(AMDIL::PHIMOVE);
- return true;
- }
- return false;
-}
-
-bool isMove(unsigned int opcode) {
- switch (opcode) {
- default:
- return false;
- ExpandCaseToAllTypes(AMDIL::MOVE);
- return true;
- }
- return false;
-}
-
-bool isMoveOrEquivalent(unsigned int opcode) {
- switch (opcode) {
- default:
- return isMove(opcode) || isPHIMove(opcode);
- ExpandCaseToAllScalarTypes(AMDIL::IL_ASCHAR);
- ExpandCaseToAllScalarTypes(AMDIL::IL_ASSHORT);
- ExpandCaseToAllScalarTypes(AMDIL::IL_ASINT);
- ExpandCaseToAllScalarTypes(AMDIL::IL_ASLONG);
- ExpandCaseToAllScalarTypes(AMDIL::IL_ASDOUBLE);
- ExpandCaseToAllScalarTypes(AMDIL::IL_ASFLOAT);
- ExpandCaseToAllScalarTypes(AMDIL::IL_ASV2CHAR);
- ExpandCaseToAllScalarTypes(AMDIL::IL_ASV2SHORT);
- ExpandCaseToAllScalarTypes(AMDIL::IL_ASV2INT);
- ExpandCaseToAllScalarTypes(AMDIL::IL_ASV2FLOAT);
- ExpandCaseToAllScalarTypes(AMDIL::IL_ASV2LONG);
- ExpandCaseToAllScalarTypes(AMDIL::IL_ASV2DOUBLE);
- ExpandCaseToAllScalarTypes(AMDIL::IL_ASV4CHAR);
- ExpandCaseToAllScalarTypes(AMDIL::IL_ASV4SHORT);
- ExpandCaseToAllScalarTypes(AMDIL::IL_ASV4INT);
- ExpandCaseToAllScalarTypes(AMDIL::IL_ASV4FLOAT);
- case AMDIL::INTTOANY_i8:
- case AMDIL::INTTOANY_i16:
- case AMDIL::INTTOANY_i32:
- case AMDIL::INTTOANY_f32:
- case AMDIL::DLO:
- case AMDIL::LLO:
- case AMDIL::LLO_v2i64:
- return true;
- };
- return false;
-}
-
-bool check_type(const Value *ptr, unsigned int addrspace) {
- if (!ptr) {
- return false;
- }
- Type *ptrType = ptr->getType();
- return dyn_cast<PointerType>(ptrType)->getAddressSpace() == addrspace;
-}
-
-size_t getTypeSize(Type * const T, bool dereferencePtr) {
- size_t size = 0;
- if (!T) {
- return size;
- }
- switch (T->getTypeID()) {
- case Type::X86_FP80TyID:
- case Type::FP128TyID:
- case Type::PPC_FP128TyID:
- case Type::LabelTyID:
- assert(0 && "These types are not supported by this backend");
- default:
- case Type::FloatTyID:
- case Type::DoubleTyID:
- size = T->getPrimitiveSizeInBits() >> 3;
- break;
- case Type::PointerTyID:
- size = getTypeSize(dyn_cast<PointerType>(T), dereferencePtr);
- break;
- case Type::IntegerTyID:
- size = getTypeSize(dyn_cast<IntegerType>(T), dereferencePtr);
- break;
- case Type::StructTyID:
- size = getTypeSize(dyn_cast<StructType>(T), dereferencePtr);
- break;
- case Type::ArrayTyID:
- size = getTypeSize(dyn_cast<ArrayType>(T), dereferencePtr);
- break;
- case Type::FunctionTyID:
- size = getTypeSize(dyn_cast<FunctionType>(T), dereferencePtr);
- break;
- case Type::VectorTyID:
- size = getTypeSize(dyn_cast<VectorType>(T), dereferencePtr);
- break;
- };
- return size;
-}
-
-size_t getTypeSize(StructType * const ST, bool dereferencePtr) {
- size_t size = 0;
- if (!ST) {
- return size;
- }
- Type *curType;
- StructType::element_iterator eib;
- StructType::element_iterator eie;
- for (eib = ST->element_begin(), eie = ST->element_end(); eib != eie; ++eib) {
- curType = *eib;
- size += getTypeSize(curType, dereferencePtr);
- }
- return size;
-}
-
-size_t getTypeSize(IntegerType * const IT, bool dereferencePtr) {
- return IT ? (IT->getBitWidth() >> 3) : 0;
-}
-
-size_t getTypeSize(FunctionType * const FT, bool dereferencePtr) {
- assert(0 && "Should not be able to calculate the size of an function type");
- return 0;
-}
-
-size_t getTypeSize(ArrayType * const AT, bool dereferencePtr) {
- return (size_t)(AT ? (getTypeSize(AT->getElementType(),
- dereferencePtr) * AT->getNumElements())
- : 0);
-}
-
-size_t getTypeSize(VectorType * const VT, bool dereferencePtr) {
- return VT ? (VT->getBitWidth() >> 3) : 0;
-}
-
-size_t getTypeSize(PointerType * const PT, bool dereferencePtr) {
- if (!PT) {
- return 0;
- }
- Type *CT = PT->getElementType();
- if (CT->getTypeID() == Type::StructTyID &&
- PT->getAddressSpace() == AMDILAS::PRIVATE_ADDRESS) {
- return getTypeSize(dyn_cast<StructType>(CT));
- } else if (dereferencePtr) {
- size_t size = 0;
- for (size_t x = 0, y = PT->getNumContainedTypes(); x < y; ++x) {
- size += getTypeSize(PT->getContainedType(x), dereferencePtr);
- }
- return size;
- } else {
- return 4;
- }
-}
-
-size_t getTypeSize(OpaqueType * const OT, bool dereferencePtr) {
- //assert(0 && "Should not be able to calculate the size of an opaque type");
- return 4;
-}
-
-size_t getNumElements(Type * const T) {
- size_t size = 0;
- if (!T) {
- return size;
- }
- switch (T->getTypeID()) {
- case Type::X86_FP80TyID:
- case Type::FP128TyID:
- case Type::PPC_FP128TyID:
- case Type::LabelTyID:
- assert(0 && "These types are not supported by this backend");
- default:
- case Type::FloatTyID:
- case Type::DoubleTyID:
- size = 1;
- break;
- case Type::PointerTyID:
- size = getNumElements(dyn_cast<PointerType>(T));
- break;
- case Type::IntegerTyID:
- size = getNumElements(dyn_cast<IntegerType>(T));
- break;
- case Type::StructTyID:
- size = getNumElements(dyn_cast<StructType>(T));
- break;
- case Type::ArrayTyID:
- size = getNumElements(dyn_cast<ArrayType>(T));
- break;
- case Type::FunctionTyID:
- size = getNumElements(dyn_cast<FunctionType>(T));
- break;
- case Type::VectorTyID:
- size = getNumElements(dyn_cast<VectorType>(T));
- break;
- };
- return size;
-}
-
-size_t getNumElements(StructType * const ST) {
- size_t size = 0;
- if (!ST) {
- return size;
- }
- Type *curType;
- StructType::element_iterator eib;
- StructType::element_iterator eie;
- for (eib = ST->element_begin(), eie = ST->element_end();
- eib != eie; ++eib) {
- curType = *eib;
- size += getNumElements(curType);
- }
- return size;
-}
-
-size_t getNumElements(IntegerType * const IT) {
- return (!IT) ? 0 : 1;
-}
-
-size_t getNumElements(FunctionType * const FT) {
- assert(0 && "Should not be able to calculate the number of "
- "elements of a function type");
- return 0;
-}
-
-size_t getNumElements(ArrayType * const AT) {
- return (!AT) ? 0
- : (size_t)(getNumElements(AT->getElementType()) *
- AT->getNumElements());
-}
-
-size_t getNumElements(VectorType * const VT) {
- return (!VT) ? 0
- : VT->getNumElements() * getNumElements(VT->getElementType());
-}
-
-size_t getNumElements(PointerType * const PT) {
- size_t size = 0;
- if (!PT) {
- return size;
- }
- for (size_t x = 0, y = PT->getNumContainedTypes(); x < y; ++x) {
- size += getNumElements(PT->getContainedType(x));
- }
- return size;
-}
-
-const llvm::Value *getBasePointerValue(const llvm::Value *V)
-{
- if (!V) {
- return NULL;
- }
- const Value *ret = NULL;
- ValueMap<const Value *, bool> ValueBitMap;
- std::queue<const Value *, std::list<const Value *> > ValueQueue;
- ValueQueue.push(V);
- while (!ValueQueue.empty()) {
- V = ValueQueue.front();
- if (ValueBitMap.find(V) == ValueBitMap.end()) {
- ValueBitMap[V] = true;
- if (dyn_cast<Argument>(V) && dyn_cast<PointerType>(V->getType())) {
- ret = V;
- break;
- } else if (dyn_cast<GlobalVariable>(V)) {
- ret = V;
- break;
- } else if (dyn_cast<Constant>(V)) {
- const ConstantExpr *CE = dyn_cast<ConstantExpr>(V);
- if (CE) {
- ValueQueue.push(CE->getOperand(0));
- }
- } else if (const AllocaInst *AI = dyn_cast<AllocaInst>(V)) {
- ret = AI;
- break;
- } else if (const Instruction *I = dyn_cast<Instruction>(V)) {
- uint32_t numOps = I->getNumOperands();
- for (uint32_t x = 0; x < numOps; ++x) {
- ValueQueue.push(I->getOperand(x));
- }
- } else {
- // assert(0 && "Found a Value that we didn't know how to handle!");
- }
- }
- ValueQueue.pop();
- }
- return ret;
-}
-
-const llvm::Value *getBasePointerValue(const llvm::MachineInstr *MI) {
- const Value *moVal = NULL;
- if (!MI->memoperands_empty()) {
- const MachineMemOperand *memOp = (*MI->memoperands_begin());
- moVal = memOp ? memOp->getValue() : NULL;
- moVal = getBasePointerValue(moVal);
- }
- return moVal;
-}
-
-bool commaPrint(int i, llvm::raw_ostream &O) {
- O << ":" << i;
- return false;
-}
-
-bool isLoadInst(const llvm::TargetInstrInfo * TII, MachineInstr *MI) {
- if (strstr(GET_OPCODE_NAME(TII, MI), "LOADCONST")) {
- return false;
- }
- return strstr(GET_OPCODE_NAME(TII, MI), "LOAD");
-}
-
-bool isSWSExtLoadInst(MachineInstr *MI)
-{
-switch (MI->getOpcode()) {
- default:
- break;
- ExpandCaseToByteShortTypes(AMDIL::LOCALLOAD);
- ExpandCaseToByteShortTypes(AMDIL::GLOBALLOAD);
- ExpandCaseToByteShortTypes(AMDIL::REGIONLOAD);
- ExpandCaseToByteShortTypes(AMDIL::PRIVATELOAD);
- ExpandCaseToByteShortTypes(AMDIL::CPOOLLOAD);
- ExpandCaseToByteShortTypes(AMDIL::CONSTANTLOAD);
- return true;
- };
- return false;
-}
-
-bool isExtLoadInst(const llvm::TargetInstrInfo * TII, MachineInstr *MI) {
- return strstr(GET_OPCODE_NAME(TII, MI), "EXTLOAD");
-}
-
-bool isSExtLoadInst(const llvm::TargetInstrInfo * TII, MachineInstr *MI) {
- return strstr(GET_OPCODE_NAME(TII, MI), "SEXTLOAD");
-}
-
-bool isAExtLoadInst(const llvm::TargetInstrInfo * TII, MachineInstr *MI) {
- return strstr(GET_OPCODE_NAME(TII, MI), "AEXTLOAD");
-}
-
-bool isZExtLoadInst(const llvm::TargetInstrInfo * TII, MachineInstr *MI) {
- return strstr(GET_OPCODE_NAME(TII, MI), "ZEXTLOAD");
-}
-
-bool isStoreInst(const llvm::TargetInstrInfo * TII, MachineInstr *MI) {
- return strstr(GET_OPCODE_NAME(TII, MI), "STORE");
-}
-
-bool isTruncStoreInst(const llvm::TargetInstrInfo * TII, MachineInstr *MI) {
- return strstr(GET_OPCODE_NAME(TII, MI), "TRUNCSTORE");
-}
-
-bool isAtomicInst(const llvm::TargetInstrInfo * TII, MachineInstr *MI) {
- return strstr(GET_OPCODE_NAME(TII, MI), "ATOM");
-}
-
-bool isVolatileInst(const llvm::TargetInstrInfo * TII, MachineInstr *MI) {
- if (!MI->memoperands_empty()) {
- for (MachineInstr::mmo_iterator mob = MI->memoperands_begin(),
- moe = MI->memoperands_end(); mob != moe; ++mob) {
- // If there is a volatile mem operand, this is a volatile instruction.
- if ((*mob)->isVolatile()) {
- return true;
- }
- }
- }
- return false;
-}
-bool isGlobalInst(const llvm::TargetInstrInfo * TII, llvm::MachineInstr *MI)
-{
- return strstr(GET_OPCODE_NAME(TII, MI), "GLOBAL");
-}
-bool isPrivateInst(const llvm::TargetInstrInfo * TII, llvm::MachineInstr *MI)
-{
- return strstr(GET_OPCODE_NAME(TII, MI), "PRIVATE");
-}
-bool isConstantInst(const llvm::TargetInstrInfo * TII, llvm::MachineInstr *MI)
-{
- return strstr(GET_OPCODE_NAME(TII, MI), "CONSTANT")
- || strstr(GET_OPCODE_NAME(TII, MI), "CPOOL");
-}
-bool isRegionInst(const llvm::TargetInstrInfo * TII, llvm::MachineInstr *MI)
-{
- return strstr(GET_OPCODE_NAME(TII, MI), "REGION");
-}
-bool isLocalInst(const llvm::TargetInstrInfo * TII, llvm::MachineInstr *MI)
-{
- return strstr(GET_OPCODE_NAME(TII, MI), "LOCAL");
-}
-bool isImageInst(const llvm::TargetInstrInfo * TII, llvm::MachineInstr *MI)
-{
- return strstr(GET_OPCODE_NAME(TII, MI), "IMAGE");
-}
-bool isAppendInst(const llvm::TargetInstrInfo * TII, llvm::MachineInstr *MI)
-{
- return strstr(GET_OPCODE_NAME(TII, MI), "APPEND");
-}
-bool isRegionAtomic(const llvm::TargetInstrInfo * TII, llvm::MachineInstr *MI)
-{
- return strstr(GET_OPCODE_NAME(TII, MI), "ATOM_R");
-}
-bool isLocalAtomic(const llvm::TargetInstrInfo * TII, llvm::MachineInstr *MI)
-{
- return strstr(GET_OPCODE_NAME(TII, MI), "ATOM_L");
-}
-bool isGlobalAtomic(const llvm::TargetInstrInfo * TII, llvm::MachineInstr *MI)
-{
- return strstr(GET_OPCODE_NAME(TII, MI), "ATOM_G")
- || isArenaAtomic(TII, MI);
-}
-bool isArenaAtomic(const llvm::TargetInstrInfo * TII, llvm::MachineInstr *MI)
-{
- return strstr(GET_OPCODE_NAME(TII, MI), "ATOM_A");
-}
-
-const char* getSrcSwizzle(unsigned idx) {
- const char *srcSwizzles[] = {
- "", ".x000", ".0x00", ".00x0", ".000x", ".y000", ".0y00", ".00y0", ".000y",
- ".z000", ".0z00", ".00z0", ".000z", ".w000", ".0w00", ".00w0", ".000w",
- ".xy00", ".00xy", ".zw00", ".00zw", ".xyz0", ".0xyz", ".xyzw", ".0000",
- ".xxxx", ".yyyy", ".zzzz", ".wwww", ".xyxy", ".zwzw", ".xzxz", ".ywyw",
- ".x0y0", ".0x0y", ".xy_neg(y)", "_neg(yw)", "_neg(x)", ".xy_neg(xy)",
- "_neg(xyzw)", ".0yzw", ".x0zw", ".xy0w", ".x", ".y", ".z", ".w", ".xy",
- ".zw"
- };
- assert(idx < sizeof(srcSwizzles)/sizeof(srcSwizzles[0])
- && "Idx passed in is invalid!");
- return srcSwizzles[idx];
-}
-const char* getDstSwizzle(unsigned idx) {
- const char *dstSwizzles[] = {
- "", ".x___", ".xy__", ".xyz_", ".xyzw", "._y__", "._yz_", "._yzw", ".__z_",
- ".__zw", ".___w", ".x_zw", ".xy_w", ".x_z_", ".x__w", "._y_w",
- };
- assert(idx < sizeof(dstSwizzles)/sizeof(dstSwizzles[0])
- && "Idx passed in is invalid!");
- return dstSwizzles[idx];
-}
-/// Helper function to get the currently set flags
-void getAsmPrinterFlags(MachineInstr *MI, AMDILAS::InstrResEnc &curRes)
-{
- // We need 16 bits of information, but LLVMr127097 cut the field in half.
- // So we have to use two different fields to store all of our information.
- uint16_t upper = MI->getFlags() << 8;
- uint16_t lower = MI->getAsmPrinterFlags();
- curRes.u16all = upper | lower;
-}
-/// Helper function to clear the currently set flags and add the new flags.
-void setAsmPrinterFlags(MachineInstr *MI, AMDILAS::InstrResEnc &curRes)
-{
- // We need 16 bits of information, but LLVMr127097 cut the field in half.
- // So we have to use two different fields to store all of our information.
- MI->clearAsmPrinterFlags();
- MI->setFlags(0);
- uint8_t lower = curRes.u16all & 0xFF;
- uint8_t upper = (curRes.u16all >> 8) & 0xFF;
- MI->setFlags(upper);
- MI->setAsmPrinterFlag((llvm::MachineInstr::CommentFlag)lower);
-}
diff --git a/src/gallium/drivers/radeon/AMDILUtilityFunctions.h b/src/gallium/drivers/radeon/AMDILUtilityFunctions.h
index 637c868b55c..66af706bbb3 100644
--- a/src/gallium/drivers/radeon/AMDILUtilityFunctions.h
+++ b/src/gallium/drivers/radeon/AMDILUtilityFunctions.h
@@ -7,191 +7,12 @@
//
//==-----------------------------------------------------------------------===//
//
-// This file provides declarations for functions that are used across different
-// classes and provide various conversions or utility to shorten the code
+// This file provides helper macros for expanding case statements.
//
//===----------------------------------------------------------------------===//
#ifndef AMDILUTILITYFUNCTIONS_H_
#define AMDILUTILITYFUNCTIONS_H_
-#include "AMDIL.h"
-#include "AMDILTargetMachine.h"
-#include "llvm/ADT/SmallVector.h"
-
-// Utility functions from ID
-//
-namespace llvm {
-class TargetRegisterClass;
-class SDValue;
-class SDNode;
-class Value;
-class Type;
-class StructType;
-class IntegerType;
-class FunctionType;
-class VectorType;
-class ArrayType;
-class PointerType;
-class OpaqueType;
-class MachineInstr;
-
-}
-enum SrcSwizzles {
- AMDIL_SRC_SWIZZLE_DEFAULT = 0,
- AMDIL_SRC_SWIZZLE_X000,
- AMDIL_SRC_SWIZZLE_0X00,
- AMDIL_SRC_SWIZZLE_00X0,
- AMDIL_SRC_SWIZZLE_000X,
- AMDIL_SRC_SWIZZLE_Y000,
- AMDIL_SRC_SWIZZLE_0Y00,
- AMDIL_SRC_SWIZZLE_00Y0,
- AMDIL_SRC_SWIZZLE_000Y,
- AMDIL_SRC_SWIZZLE_Z000,
- AMDIL_SRC_SWIZZLE_0Z00,
- AMDIL_SRC_SWIZZLE_00Z0,
- AMDIL_SRC_SWIZZLE_000Z,
- AMDIL_SRC_SWIZZLE_W000,
- AMDIL_SRC_SWIZZLE_0W00,
- AMDIL_SRC_SWIZZLE_00W0,
- AMDIL_SRC_SWIZZLE_000W,
- AMDIL_SRC_SWIZZLE_XY00,
- AMDIL_SRC_SWIZZLE_00XY,
- AMDIL_SRC_SWIZZLE_ZW00,
- AMDIL_SRC_SWIZZLE_00ZW,
- AMDIL_SRC_SWIZZLE_XYZ0,
- AMDIL_SRC_SWIZZLE_0XYZ,
- AMDIL_SRC_SWIZZLE_XYZW,
- AMDIL_SRC_SWIZZLE_0000,
- AMDIL_SRC_SWIZZLE_XXXX,
- AMDIL_SRC_SWIZZLE_YYYY,
- AMDIL_SRC_SWIZZLE_ZZZZ,
- AMDIL_SRC_SWIZZLE_WWWW,
- AMDIL_SRC_SWIZZLE_XYXY,
- AMDIL_SRC_SWIZZLE_ZWZW,
- AMDIL_SRC_SWIZZLE_XZXZ,
- AMDIL_SRC_SWIZZLE_YWYW,
- AMDIL_SRC_SWIZZLE_X0Y0,
- AMDIL_SRC_SWIZZLE_0X0Y,
- AMDIL_SRC_SWIZZLE_XY_NEGY,
- AMDIL_SRC_SWIZZLE_NEGYW,
- AMDIL_SRC_SWIZZLE_NEGX,
- AMDIL_SRC_SWIZZLE_XY_NEGXY,
- AMDIL_SRC_SWIZZLE_NEG_XYZW,
- AMDIL_SRC_SWIZZLE_0YZW,
- AMDIL_SRC_SWIZZLE_X0ZW,
- AMDIL_SRC_SWIZZLE_XY0W,
- AMDIL_SRC_SWIZZLE_X,
- AMDIL_SRC_SWIZZLE_Y,
- AMDIL_SRC_SWIZZLE_Z,
- AMDIL_SRC_SWIZZLE_W,
- AMDIL_SRC_SWIZZLE_XY,
- AMDIL_SRC_SWIZZLE_ZW,
- AMDIL_SRC_SWIZZLE_LAST
-};
-enum DstSwizzles {
- AMDIL_DST_SWIZZLE_DEFAULT = 0,
- AMDIL_DST_SWIZZLE_X___,
- AMDIL_DST_SWIZZLE_XY__,
- AMDIL_DST_SWIZZLE_XYZ_,
- AMDIL_DST_SWIZZLE_XYZW,
- AMDIL_DST_SWIZZLE__Y__,
- AMDIL_DST_SWIZZLE__YZ_,
- AMDIL_DST_SWIZZLE__YZW,
- AMDIL_DST_SWIZZLE___Z_,
- AMDIL_DST_SWIZZLE___ZW,
- AMDIL_DST_SWIZZLE____W,
- AMDIL_DST_SWIZZLE_X_ZW,
- AMDIL_DST_SWIZZLE_XY_W,
- AMDIL_DST_SWIZZLE_X_Z_,
- AMDIL_DST_SWIZZLE_X__W,
- AMDIL_DST_SWIZZLE__Y_W,
- AMDIL_DST_SWIZZLE_LAST
-};
-// Function to get the correct src swizzle string from ID
-const char *getSrcSwizzle(unsigned);
-
-// Function to get the correct dst swizzle string from ID
-const char *getDstSwizzle(unsigned);
-
-const llvm::TargetRegisterClass *getRegClassFromID(unsigned int ID);
-
-unsigned int getMoveInstFromID(unsigned int ID);
-unsigned int getPHIMoveInstFromID(unsigned int ID);
-
-// Utility functions from Type.
-const llvm::TargetRegisterClass *getRegClassFromType(unsigned int type);
-unsigned int getTargetIndependentMoveFromType(unsigned int type);
-
-// Debug functions for SDNode and SDValue.
-void printSDValue(const llvm::SDValue &Op, int level);
-void printSDNode(const llvm::SDNode *N);
-
-// Functions to check if an opcode is a specific type.
-bool isMove(unsigned int opcode);
-bool isPHIMove(unsigned int opcode);
-bool isMoveOrEquivalent(unsigned int opcode);
-
-// Function to check address space
-bool check_type(const llvm::Value *ptr, unsigned int addrspace);
-
-// Group of functions that recursively calculate the size of a structure based
-// on it's sub-types.
-size_t getTypeSize(llvm::Type * const T, bool dereferencePtr = false);
-size_t
-getTypeSize(llvm::StructType * const ST, bool dereferencePtr = false);
-size_t
-getTypeSize(llvm::IntegerType * const IT, bool dereferencePtr = false);
-size_t
-getTypeSize(llvm::FunctionType * const FT, bool dereferencePtr = false);
-size_t
-getTypeSize(llvm::ArrayType * const AT, bool dereferencePtr = false);
-size_t
-getTypeSize(llvm::VectorType * const VT, bool dereferencePtr = false);
-size_t
-getTypeSize(llvm::PointerType * const PT, bool dereferencePtr = false);
-size_t
-getTypeSize(llvm::OpaqueType * const OT, bool dereferencePtr = false);
-
-// Group of functions that recursively calculate the number of elements of a
-// structure based on it's sub-types.
-size_t getNumElements(llvm::Type * const T);
-size_t getNumElements(llvm::StructType * const ST);
-size_t getNumElements(llvm::IntegerType * const IT);
-size_t getNumElements(llvm::FunctionType * const FT);
-size_t getNumElements(llvm::ArrayType * const AT);
-size_t getNumElements(llvm::VectorType * const VT);
-size_t getNumElements(llvm::PointerType * const PT);
-size_t getNumElements(llvm::OpaqueType * const OT);
-const llvm::Value *getBasePointerValue(const llvm::Value *V);
-const llvm::Value *getBasePointerValue(const llvm::MachineInstr *MI);
-
-
-int64_t GET_SCALAR_SIZE(llvm::Type* A);
-
-// Helper functions that check the opcode for status information
-bool isLoadInst(const llvm::TargetInstrInfo * TII, llvm::MachineInstr *MI);
-bool isExtLoadInst(const llvm::TargetInstrInfo * TII, llvm::MachineInstr *MI);
-bool isSWSExtLoadInst(llvm::MachineInstr *MI);
-bool isSExtLoadInst(const llvm::TargetInstrInfo * TII, llvm::MachineInstr *MI);
-bool isZExtLoadInst(const llvm::TargetInstrInfo * TII, llvm::MachineInstr *MI);
-bool isAExtLoadInst(const llvm::TargetInstrInfo * TII, llvm::MachineInstr *MI);
-bool isStoreInst(const llvm::TargetInstrInfo * TII, llvm::MachineInstr *MI);
-bool isTruncStoreInst(const llvm::TargetInstrInfo * TII, llvm::MachineInstr *MI);
-bool isAtomicInst(const llvm::TargetInstrInfo * TII, llvm::MachineInstr *MI);
-bool isVolatileInst(const llvm::TargetInstrInfo * TII, llvm::MachineInstr *MI);
-bool isGlobalInst(const llvm::TargetInstrInfo * TII, llvm::MachineInstr *MI);
-bool isPrivateInst(const llvm::TargetInstrInfo * TII, llvm::MachineInstr *MI);
-bool isConstantInst(const llvm::TargetInstrInfo * TII, llvm::MachineInstr *MI);
-bool isRegionInst(const llvm::TargetInstrInfo * TII, llvm::MachineInstr *MI);
-bool isLocalInst(const llvm::TargetInstrInfo * TII, llvm::MachineInstr *MI);
-bool isImageInst(const llvm::TargetInstrInfo * TII, llvm::MachineInstr *MI);
-bool isAppendInst(const llvm::TargetInstrInfo * TII, llvm::MachineInstr *MI);
-bool isRegionAtomic(const llvm::TargetInstrInfo * TII, llvm::MachineInstr *MI);
-bool isLocalAtomic(const llvm::TargetInstrInfo * TII, llvm::MachineInstr *MI);
-bool isGlobalAtomic(const llvm::TargetInstrInfo * TII, llvm::MachineInstr *MI);
-bool isArenaAtomic(const llvm::TargetInstrInfo * TII, llvm::MachineInstr *MI);
-
-
// Macros that are used to help with switch statements for various data types
// However, these macro's do not return anything unlike the second set below.
#define ExpandCaseTo32bitIntTypes(Instr) \
@@ -354,9 +175,4 @@ case Instr##_v4f32: \
case Instr##_v2i64: \
case Instr##_v2f64:
-bool commaPrint(int i, llvm::raw_ostream &O);
-/// Helper function to get the currently get/set flags.
-void getAsmPrinterFlags(llvm::MachineInstr *MI, llvm::AMDILAS::InstrResEnc &curRes);
-void setAsmPrinterFlags(llvm::MachineInstr *MI, llvm::AMDILAS::InstrResEnc &curRes);
-
#endif // AMDILUTILITYFUNCTIONS_H_
diff --git a/src/gallium/drivers/radeon/AMDILVersion.td b/src/gallium/drivers/radeon/AMDILVersion.td
index b8b02608d3b..d863b068131 100644
--- a/src/gallium/drivers/radeon/AMDILVersion.td
+++ b/src/gallium/drivers/radeon/AMDILVersion.td
@@ -1,4 +1,4 @@
-//===-- AMDILVersion.td - TODO: Add brief description -------===//
+//===-- AMDILVersion.td - Barrier Instruction/Intrinsic definitions------===//
//
// The LLVM Compiler Infrastructure
//
diff --git a/src/gallium/drivers/radeon/Makefile b/src/gallium/drivers/radeon/Makefile
index 807dc781c7c..cc409645a6e 100644
--- a/src/gallium/drivers/radeon/Makefile
+++ b/src/gallium/drivers/radeon/Makefile
@@ -18,6 +18,8 @@ CXXFLAGS := $(filter-out -DDEBUG, $(CXXFLAGS))
tablegen = $(TBLGEN) -I $(LLVM_INCLUDEDIR) $1 $2 -o $3
+HAVE_LLVM_INTRINSICS = $(shell grep IntrinsicsR600.td $(LLVM_INCLUDEDIR)/llvm/Intrinsics.td)
+
gen: $(GENERATED_SOURCES)
SIRegisterInfo.td: SIGenRegisterInfo.pl
@@ -26,9 +28,13 @@ SIRegisterInfo.td: SIGenRegisterInfo.pl
SIRegisterGetHWRegNum.inc: SIGenRegisterInfo.pl
$(PERL) $^ $@ > /dev/null
-R600ShaderPatterns.td: AMDGPUGenShaderPatterns.pl
- $(PERL) $^ C > $@
-
+R600Intrinsics.td: R600IntrinsicsNoOpenCL.td R600IntrinsicsOpenCL.td
+ifeq ($(HAVE_LLVM_INTRINSICS),)
+ cp R600IntrinsicsNoOpenCL.td R600Intrinsics.td
+else
+ cp R600IntrinsicsOpenCL.td R600Intrinsics.td
+endif
+
R600RegisterInfo.td: R600GenRegisterInfo.pl
$(PERL) $^ > $@
diff --git a/src/gallium/drivers/radeon/Makefile.sources b/src/gallium/drivers/radeon/Makefile.sources
index 7d2932b4dbd..6dc62320f40 100644
--- a/src/gallium/drivers/radeon/Makefile.sources
+++ b/src/gallium/drivers/radeon/Makefile.sources
@@ -1,6 +1,6 @@
GENERATED_SOURCES := \
- R600ShaderPatterns.td \
+ R600Intrinsics.td \
R600RegisterInfo.td \
AMDGPUInstrEnums.td \
SIRegisterInfo.td \
@@ -29,20 +29,16 @@ CPP_SOURCES := \
AMDILISelDAGToDAG.cpp \
AMDILISelLowering.cpp \
AMDILMachinePeephole.cpp \
- AMDILMCCodeEmitter.cpp \
AMDILNIDevice.cpp \
AMDILPeepholeOptimizer.cpp \
AMDILRegisterInfo.cpp \
AMDILSIDevice.cpp \
AMDILSubtarget.cpp \
AMDILTargetMachine.cpp \
- AMDILUtilityFunctions.cpp \
AMDGPUTargetMachine.cpp \
AMDGPUISelLowering.cpp \
AMDGPUConvertToISA.cpp \
AMDGPULowerInstructions.cpp \
- AMDGPULowerShaderInstructions.cpp \
- AMDGPUReorderPreloadInstructions.cpp \
AMDGPUInstrInfo.cpp \
AMDGPURegisterInfo.cpp \
AMDGPUUtil.cpp \
@@ -51,13 +47,12 @@ CPP_SOURCES := \
R600InstrInfo.cpp \
R600KernelParameters.cpp \
R600LowerInstructions.cpp \
- R600LowerShaderInstructions.cpp \
+ R600MachineFunctionInfo.cpp \
R600RegisterInfo.cpp \
SIAssignInterpRegs.cpp \
SICodeEmitter.cpp \
SIInstrInfo.cpp \
SIISelLowering.cpp \
- SILowerShaderInstructions.cpp \
SIMachineFunctionInfo.cpp \
SIPropagateImmReads.cpp \
SIRegisterInfo.cpp \
diff --git a/src/gallium/drivers/radeon/R600CodeEmitter.cpp b/src/gallium/drivers/radeon/R600CodeEmitter.cpp
index 8faf0deb8c5..421562255f6 100644
--- a/src/gallium/drivers/radeon/R600CodeEmitter.cpp
+++ b/src/gallium/drivers/radeon/R600CodeEmitter.cpp
@@ -1,4 +1,4 @@
-//===-- R600CodeEmitter.cpp - TODO: Add brief description -------===//
+//===-- R600CodeEmitter.cpp - Code Emitter for R600->Cayman GPU families --===//
//
// The LLVM Compiler Infrastructure
//
@@ -7,7 +7,12 @@
//
//===----------------------------------------------------------------------===//
//
-// TODO: Add full description
+// This code emitters outputs bytecode that is understood by the r600g driver
+// in the Mesa [1] project. The bytecode is very similar to the hardware's ISA,
+// except that the size of the instruction fields are rounded up to the
+// nearest byte.
+//
+// [1] http://www.mesa3d.org/
//
//===----------------------------------------------------------------------===//
@@ -44,8 +49,9 @@ namespace {
const R600RegisterInfo * TRI;
bool evergreenEncoding;
+ bool isCube;
bool isReduction;
- unsigned reductionElement;
+ unsigned currentElement;
bool isLast;
unsigned section_start;
@@ -53,7 +59,7 @@ namespace {
public:
R600CodeEmitter(formatted_raw_ostream &OS) : MachineFunctionPass(ID),
- _OS(OS), TM(NULL), evergreenEncoding(false), isReduction(false),
+ _OS(OS), TM(NULL), evergreenEncoding(false), isCube(false), isReduction(false),
isLast(true) { }
const char *getPassName() const { return "AMDGPU Machine Code Emitter"; }
@@ -65,7 +71,7 @@ namespace {
private:
void emitALUInstr(MachineInstr &MI);
- void emitSrc(const MachineOperand & MO);
+ void emitSrc(const MachineOperand & MO, int chan_override = -1);
void emitDst(const MachineOperand & MO);
void emitALU(MachineInstr &MI, unsigned numSrc);
void emitTexInstr(MachineInstr &MI);
@@ -155,10 +161,8 @@ bool R600CodeEmitter::runOnMachineFunction(MachineFunction &MF) {
} else {
evergreenEncoding = true;
}
- const AMDGPUTargetMachine *amdtm =
- static_cast<const AMDGPUTargetMachine *>(&MF.getTarget());
- if (amdtm->shouldDumpCode()) {
+ if (STM.dumpCode()) {
MF.dump();
}
@@ -171,18 +175,26 @@ bool R600CodeEmitter::runOnMachineFunction(MachineFunction &MF) {
if (MI.getNumOperands() > 1 && MI.getOperand(0).isReg() && MI.getOperand(0).isDead()) {
continue;
}
- if (isTexOp(MI.getOpcode())) {
+ if (AMDGPU::isTexOp(MI.getOpcode())) {
emitTexInstr(MI);
- } else if (isFCOp(MI.getOpcode())){
+ } else if (AMDGPU::isFCOp(MI.getOpcode())){
emitFCInstr(MI);
- } else if (isReductionOp(MI.getOpcode())) {
+ } else if (AMDGPU::isReductionOp(MI.getOpcode())) {
isReduction = true;
isLast = false;
- for (reductionElement = 0; reductionElement < 4; reductionElement++) {
- isLast = (reductionElement == 3);
+ for (currentElement = 0; currentElement < 4; currentElement++) {
+ isLast = (currentElement == 3);
emitALUInstr(MI);
}
isReduction = false;
+ } else if (AMDGPU::isCubeOp(MI.getOpcode())) {
+ isCube = true;
+ isLast = false;
+ for (currentElement = 0; currentElement < 4; currentElement++) {
+ isLast = (currentElement == 3);
+ emitALUInstr(MI);
+ }
+ isCube = false;
} else if (MI.getOpcode() == AMDIL::RETURN ||
MI.getOpcode() == AMDIL::BUNDLE ||
MI.getOpcode() == AMDIL::KILL) {
@@ -191,12 +203,7 @@ bool R600CodeEmitter::runOnMachineFunction(MachineFunction &MF) {
switch(MI.getOpcode()) {
case AMDIL::RAT_WRITE_CACHELESS_eg:
{
- /* XXX: Support for autoencoding 64-bit instructions was added
- * in LLVM 3.1. Until we drop support for 3.0, we will use Magic
- * numbers for the high bits. */
- uint64_t high = 0x95c0100000000000;
uint64_t inst = getBinaryCodeForInstr(MI);
- inst |= high;
/* Set End Of Program bit */
/* XXX: Need better check of end of program. EOP should be
* encoded in one of the operands of the MI, and it should be
@@ -286,7 +293,7 @@ void R600CodeEmitter::emitALUInstr(MachineInstr &MI)
/* Some instructions are just place holder instructions that represent
* operations that the GPU does automatically. They should be ignored. */
- if (isPlaceHolderOpcode(MI.getOpcode())) {
+ if (AMDGPU::isPlaceHolderOpcode(MI.getOpcode())) {
return;
}
@@ -309,18 +316,25 @@ void R600CodeEmitter::emitALUInstr(MachineInstr &MI)
/* Emit instruction type */
emitByte(0);
- unsigned int opIndex;
- for (opIndex = 1; opIndex < numOperands; opIndex++) {
- /* Literal constants are always stored as the last operand. */
- if (MI.getOperand(opIndex).isImm() || MI.getOperand(opIndex).isFPImm()) {
- break;
+ if (isCube) {
+ static const int cube_src_swz[] = {2, 2, 0, 1};
+ emitSrc(MI.getOperand(1), cube_src_swz[currentElement]);
+ emitSrc(MI.getOperand(1), cube_src_swz[3-currentElement]);
+ emitNullBytes(SRC_BYTE_COUNT);
+ } else {
+ unsigned int opIndex;
+ for (opIndex = 1; opIndex < numOperands; opIndex++) {
+ /* Literal constants are always stored as the last operand. */
+ if (MI.getOperand(opIndex).isImm() || MI.getOperand(opIndex).isFPImm()) {
+ break;
+ }
+ emitSrc(MI.getOperand(opIndex));
}
- emitSrc(MI.getOperand(opIndex));
- }
/* Emit zeros for unused sources */
- for ( ; opIndex < 4; opIndex++) {
- emitNullBytes(SRC_BYTE_COUNT);
+ for ( ; opIndex < 4; opIndex++) {
+ emitNullBytes(SRC_BYTE_COUNT);
+ }
}
emitDst(dstOp);
@@ -328,7 +342,7 @@ void R600CodeEmitter::emitALUInstr(MachineInstr &MI)
emitALU(MI, numOperands - 1);
}
-void R600CodeEmitter::emitSrc(const MachineOperand & MO)
+void R600CodeEmitter::emitSrc(const MachineOperand & MO, int chan_override /* = -1 */)
{
uint32_t value = 0;
/* Emit the source select (2 bytes). For GPRs, this is the register index.
@@ -354,8 +368,10 @@ void R600CodeEmitter::emitSrc(const MachineOperand & MO)
}
/* Emit the source channel (1 byte) */
- if (isReduction) {
- emitByte(reductionElement);
+ if (chan_override != -1) {
+ emitByte(chan_override);
+ } else if (isReduction) {
+ emitByte(currentElement);
} else if (MO.isReg()) {
emitByte(TRI->getHWRegChan(MO.getReg()));
} else {
@@ -397,8 +413,8 @@ void R600CodeEmitter::emitDst(const MachineOperand & MO)
emitByte(getHWReg(MO.getReg()));
/* Emit the element of the destination register (1 byte)*/
- if (isReduction) {
- emitByte(reductionElement);
+ if (isReduction || isCube) {
+ emitByte(currentElement);
} else {
emitByte(TRI->getHWRegChan(MO.getReg()));
}
@@ -411,7 +427,7 @@ void R600CodeEmitter::emitDst(const MachineOperand & MO)
}
/* Emit writemask (1 byte). */
- if ((isReduction && reductionElement != TRI->getHWRegChan(MO.getReg()))
+ if ((isReduction && currentElement != TRI->getHWRegChan(MO.getReg()))
|| MO.getTargetFlags() & MO_FLAG_MASK) {
emitByte(0);
} else {
@@ -570,6 +586,7 @@ void R600CodeEmitter::emitFCInstr(MachineInstr &MI)
case AMDIL::BREAK_LOGICALZ_f32:
instr = FC_BREAK;
break;
+ case AMDIL::BREAK_LOGICALNZ_f32:
case AMDIL::BREAK_LOGICALNZ_i32:
instr = FC_BREAK_NZ_INT;
break;
@@ -577,6 +594,7 @@ void R600CodeEmitter::emitFCInstr(MachineInstr &MI)
instr = FC_BREAK_Z_INT;
break;
case AMDIL::CONTINUE_LOGICALNZ_f32:
+ case AMDIL::CONTINUE_LOGICALNZ_i32:
instr = FC_CONTINUE;
break;
/* XXX: This assumes that all IFs will be if (x != 0). If we add
@@ -706,44 +724,5 @@ RegElement maskBitToElement(unsigned int maskBit)
}
}
-unsigned int dstSwizzleToWriteMask(unsigned swizzle)
-{
- switch(swizzle) {
- default:
- case AMDIL_DST_SWIZZLE_DEFAULT:
- return WRITE_MASK_X | WRITE_MASK_Y | WRITE_MASK_Z | WRITE_MASK_W;
- case AMDIL_DST_SWIZZLE_X___:
- return WRITE_MASK_X;
- case AMDIL_DST_SWIZZLE_XY__:
- return WRITE_MASK_X | WRITE_MASK_Y;
- case AMDIL_DST_SWIZZLE_XYZ_:
- return WRITE_MASK_X | WRITE_MASK_Y | WRITE_MASK_Z;
- case AMDIL_DST_SWIZZLE_XYZW:
- return WRITE_MASK_X | WRITE_MASK_Y | WRITE_MASK_Z | WRITE_MASK_W;
- case AMDIL_DST_SWIZZLE__Y__:
- return WRITE_MASK_Y;
- case AMDIL_DST_SWIZZLE__YZ_:
- return WRITE_MASK_Y | WRITE_MASK_Z;
- case AMDIL_DST_SWIZZLE__YZW:
- return WRITE_MASK_Y | WRITE_MASK_Z | WRITE_MASK_W;
- case AMDIL_DST_SWIZZLE___Z_:
- return WRITE_MASK_Z;
- case AMDIL_DST_SWIZZLE___ZW:
- return WRITE_MASK_Z | WRITE_MASK_W;
- case AMDIL_DST_SWIZZLE____W:
- return WRITE_MASK_W;
- case AMDIL_DST_SWIZZLE_X_ZW:
- return WRITE_MASK_X | WRITE_MASK_Z | WRITE_MASK_W;
- case AMDIL_DST_SWIZZLE_XY_W:
- return WRITE_MASK_X | WRITE_MASK_Y | WRITE_MASK_W;
- case AMDIL_DST_SWIZZLE_X_Z_:
- return WRITE_MASK_X | WRITE_MASK_Z;
- case AMDIL_DST_SWIZZLE_X__W:
- return WRITE_MASK_X | WRITE_MASK_W;
- case AMDIL_DST_SWIZZLE__Y_W:
- return WRITE_MASK_Y | WRITE_MASK_W;
- }
-}
-
#include "AMDILGenCodeEmitter.inc"
diff --git a/src/gallium/drivers/radeon/R600GenRegisterInfo.pl b/src/gallium/drivers/radeon/R600GenRegisterInfo.pl
index cbded115766..406f3dfdd39 100644
--- a/src/gallium/drivers/radeon/R600GenRegisterInfo.pl
+++ b/src/gallium/drivers/radeon/R600GenRegisterInfo.pl
@@ -1,20 +1,23 @@
-#===-- R600GenRegisterInfo.pl - TODO: Add brief description -------===#
+#===-- R600GenRegisterInfo.pl - Script for generating register info files --===#
#
# The LLVM Compiler Infrastructure
#
# This file is distributed under the University of Illinois Open Source
# License. See LICENSE.TXT for details.
#
-#===----------------------------------------------------------------------===#
+#===------------------------------------------------------------------------===#
#
-# TODO: Add full description
+# This perl script prints to stdout .td code to be used as R600RegisterInfo.td
+# it also generates a file called R600HwRegInfo.include, which contains helper
+# functions for determining the hw encoding of registers.
#
-#===----------------------------------------------------------------------===#
+#===------------------------------------------------------------------------===#
use strict;
use warnings;
-use AMDGPUConstants;
+use constant CONST_REG_COUNT => 256;
+use constant TEMP_REG_COUNT => 128;
my $CREG_MAX = CONST_REG_COUNT - 1;
my $TREG_MAX = TEMP_REG_COUNT - 1;
@@ -81,7 +84,7 @@ def R600_Reg32 : RegisterClass <"AMDIL", [f32, i32], 32, (add
R600_CReg32,
ZERO, HALF, ONE, ONE_INT, PV_X, ALU_LITERAL_X, NEG_ONE, NEG_HALF)>;
-def R600_Reg128 : RegisterClass<"AMDIL", [v4f32], 128, (add
+def R600_Reg128 : RegisterClass<"AMDIL", [v4f32, v4i32], 128, (add
$t128_string)>
{
let SubRegClasses = [(R600_TReg32 sel_x, sel_y, sel_z, sel_w)];
@@ -170,3 +173,24 @@ sub print_reg_defs {
return @reg_list;
}
+#Helper functions
+sub get_hw_index {
+ my ($index) = @_;
+ return int($index / 4);
+}
+
+sub get_chan_str {
+ my ($index) = @_;
+ my $chan = $index % 4;
+ if ($chan == 0 ) {
+ return 'X';
+ } elsif ($chan == 1) {
+ return 'Y';
+ } elsif ($chan == 2) {
+ return 'Z';
+ } elsif ($chan == 3) {
+ return 'W';
+ } else {
+ die("Unknown chan value: $chan");
+ }
+}
diff --git a/src/gallium/drivers/radeon/R600ISelLowering.cpp b/src/gallium/drivers/radeon/R600ISelLowering.cpp
index f92fe2641a5..e85ac31b34c 100644
--- a/src/gallium/drivers/radeon/R600ISelLowering.cpp
+++ b/src/gallium/drivers/radeon/R600ISelLowering.cpp
@@ -1,4 +1,4 @@
-//===-- R600ISelLowering.cpp - TODO: Add brief description -------===//
+//===-- R600ISelLowering.cpp - R600 DAG Lowering Implementation -----------===//
//
// The LLVM Compiler Infrastructure
//
@@ -7,12 +7,14 @@
//
//===----------------------------------------------------------------------===//
//
-// TODO: Add full description
+// Most of the DAG lowering is handled in AMDILISelLowering.cpp. This file
+// is mostly EmitInstrWithCustomInserter().
//
//===----------------------------------------------------------------------===//
#include "R600ISelLowering.h"
#include "R600InstrInfo.h"
+#include "R600MachineFunctionInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
using namespace llvm;
@@ -25,9 +27,13 @@ R600TargetLowering::R600TargetLowering(TargetMachine &TM) :
// setSchedulingPreference(Sched::VLIW);
addRegisterClass(MVT::v4f32, &AMDIL::R600_Reg128RegClass);
addRegisterClass(MVT::f32, &AMDIL::R600_Reg32RegClass);
+ addRegisterClass(MVT::v4i32, &AMDIL::R600_Reg128RegClass);
+ addRegisterClass(MVT::i32, &AMDIL::R600_Reg32RegClass);
setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Legal);
setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f32, Legal);
+ setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4i32, Legal);
+ setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i32, Legal);
}
MachineBasicBlock * R600TargetLowering::EmitInstrWithCustomInserter(
@@ -35,10 +41,10 @@ MachineBasicBlock * R600TargetLowering::EmitInstrWithCustomInserter(
{
MachineFunction * MF = BB->getParent();
MachineRegisterInfo &MRI = MF->getRegInfo();
+ MachineBasicBlock::iterator I = *MI;
switch (MI->getOpcode()) {
default: return AMDGPUTargetLowering::EmitInstrWithCustomInserter(MI, BB);
- /* XXX: Use helper function from AMDGPULowerShaderInstructions here */
case AMDIL::TGID_X:
addLiveIn(MI, MF, MRI, TII, AMDIL::T1_X);
break;
@@ -84,7 +90,49 @@ MachineBasicBlock * R600TargetLowering::EmitInstrWithCustomInserter(
case AMDIL::LOCAL_SIZE_Z:
lowerImplicitParameter(MI, *BB, MRI, 8);
break;
+
+ case AMDIL::R600_LOAD_CONST:
+ {
+ int64_t RegIndex = MI->getOperand(1).getImm();
+ unsigned ConstantReg = AMDIL::R600_CReg32RegClass.getRegister(RegIndex);
+ BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDIL::COPY))
+ .addOperand(MI->getOperand(0))
+ .addReg(ConstantReg);
+ break;
+ }
+
+ case AMDIL::LOAD_INPUT:
+ {
+ int64_t RegIndex = MI->getOperand(1).getImm();
+ addLiveIn(MI, MF, MRI, TII,
+ AMDIL::R600_TReg32RegClass.getRegister(RegIndex));
+ break;
+ }
+ case AMDIL::STORE_OUTPUT:
+ {
+ int64_t OutputIndex = MI->getOperand(1).getImm();
+ unsigned OutputReg = AMDIL::R600_TReg32RegClass.getRegister(OutputIndex);
+
+ BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDIL::COPY), OutputReg)
+ .addOperand(MI->getOperand(0));
+
+ if (!MRI.isLiveOut(OutputReg)) {
+ MRI.addLiveOut(OutputReg);
+ }
+ break;
+ }
+
+ case AMDIL::RESERVE_REG:
+ {
+ R600MachineFunctionInfo * MFI = MF->getInfo<R600MachineFunctionInfo>();
+ int64_t ReservedIndex = MI->getOperand(0).getImm();
+ unsigned ReservedReg =
+ AMDIL::R600_TReg32RegClass.getRegister(ReservedIndex);
+ MFI->ReservedRegs.push_back(ReservedReg);
+ break;
+ }
}
+
MI->eraseFromParent();
return BB;
}
diff --git a/src/gallium/drivers/radeon/R600ISelLowering.h b/src/gallium/drivers/radeon/R600ISelLowering.h
index fd26bf538c4..fdd552a172d 100644
--- a/src/gallium/drivers/radeon/R600ISelLowering.h
+++ b/src/gallium/drivers/radeon/R600ISelLowering.h
@@ -1,4 +1,4 @@
-//===-- R600ISelLowering.h - TODO: Add brief description -------===//
+//===-- R600ISelLowering.h - R600 DAG Lowering Interface -*- C++ -*--------===//
//
// The LLVM Compiler Infrastructure
//
@@ -7,7 +7,7 @@
//
//===----------------------------------------------------------------------===//
//
-// TODO: Add full description
+// R600 DAG Lowering interface definition
//
//===----------------------------------------------------------------------===//
diff --git a/src/gallium/drivers/radeon/R600InstrInfo.cpp b/src/gallium/drivers/radeon/R600InstrInfo.cpp
index 0c7ffc4334d..2bd59fd5e1b 100644
--- a/src/gallium/drivers/radeon/R600InstrInfo.cpp
+++ b/src/gallium/drivers/radeon/R600InstrInfo.cpp
@@ -1,4 +1,4 @@
-//===-- R600InstrInfo.cpp - TODO: Add brief description -------===//
+//===-- R600InstrInfo.cpp - R600 Instruction Information ------------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -7,7 +7,7 @@
//
//===----------------------------------------------------------------------===//
//
-// TODO: Add full description
+// R600 Implementation of TargetInstrInfo.
//
//===----------------------------------------------------------------------===//
@@ -73,10 +73,22 @@ unsigned R600InstrInfo::getISAOpcode(unsigned opcode) const
case AMDIL::MOVE_i32:
return AMDIL::MOV;
case AMDIL::SHR_i32:
+ return getASHRop();
+ case AMDIL::USHR_i32:
return getLSHRop();
}
}
+unsigned R600InstrInfo::getASHRop() const
+{
+ unsigned gen = TM.getSubtarget<AMDILSubtarget>().device()->getGeneration();
+ if (gen < AMDILDeviceInfo::HD5XXX) {
+ return AMDIL::ASHR_r600;
+ } else {
+ return AMDIL::ASHR_eg;
+ }
+}
+
unsigned R600InstrInfo::getLSHRop() const
{
unsigned gen = TM.getSubtarget<AMDILSubtarget>().device()->getGeneration();
diff --git a/src/gallium/drivers/radeon/R600InstrInfo.h b/src/gallium/drivers/radeon/R600InstrInfo.h
index aedaa9f47f3..014eeb0b9f7 100644
--- a/src/gallium/drivers/radeon/R600InstrInfo.h
+++ b/src/gallium/drivers/radeon/R600InstrInfo.h
@@ -1,4 +1,4 @@
-//===-- R600InstrInfo.h - TODO: Add brief description -------===//
+//===-- R600InstrInfo.h - R600 Instruction Info Interface -------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
@@ -7,7 +7,7 @@
//
//===----------------------------------------------------------------------===//
//
-// TODO: Add full description
+// Interface definition for R600InstrInfo
//
//===----------------------------------------------------------------------===//
@@ -52,6 +52,7 @@ namespace llvm {
bool isTrig(const MachineInstr &MI) const;
unsigned getLSHRop() const;
+ unsigned getASHRop() const;
unsigned getMULHI_UINT() const;
unsigned getMULLO_UINT() const;
unsigned getRECIP_UINT() const;
diff --git a/src/gallium/drivers/radeon/R600Instructions.td b/src/gallium/drivers/radeon/R600Instructions.td
index 02043fdeea5..a18240f09bd 100644
--- a/src/gallium/drivers/radeon/R600Instructions.td
+++ b/src/gallium/drivers/radeon/R600Instructions.td
@@ -1,4 +1,4 @@
-//===-- R600Instructions.td - TODO: Add brief description -------===//
+//===-- R600Instructions.td - R600 Instruction defs -------*- tablegen -*-===//
//
// The LLVM Compiler Infrastructure
//
@@ -7,7 +7,7 @@
//
//===----------------------------------------------------------------------===//
//
-// TODO: Add full description
+// R600 Tablegen instruction definitions
//
//===----------------------------------------------------------------------===//
@@ -84,7 +84,7 @@ class R600_3OP <bits<32> inst, string opName, list<dag> pattern,
InstR600 <inst,
(outs R600_Reg32:$dst),
(ins R600_Reg32:$src0, R600_Reg32:$src1, R600_Reg32:$src2, variable_ops),
- !strconcat(opName, "$dst $src0, $src1, $src2"),
+ !strconcat(opName, " $dst, $src0, $src1, $src2"),
pattern,
itin>{
@@ -92,7 +92,7 @@ class R600_3OP <bits<32> inst, string opName, list<dag> pattern,
}
class R600_REDUCTION <bits<32> inst, dag ins, string asm, list<dag> pattern,
- InstrItinClass itin = AnyALU> :
+ InstrItinClass itin = VecALU> :
InstR600 <inst,
(outs R600_Reg32:$dst),
ins,
@@ -152,8 +152,6 @@ class EG_CF_RAT <bits <8> cf_inst, bits <6> rat_inst, dag outs, dag ins,
let Inst{31-30} = ELEM_SIZE;
/* CF_ALLOC_EXPORT_WORD1_BUF */
-/* XXX: We can't have auto encoding of 64-bit instructions until LLVM 3.1 :( */
-/*
let Inst{43-32} = ARRAY_SIZE;
let Inst{47-44} = COMP_MASK;
let Inst{51-48} = BURST_COUNT;
@@ -162,7 +160,6 @@ class EG_CF_RAT <bits <8> cf_inst, bits <6> rat_inst, dag outs, dag ins,
let Inst{61-54} = cf_inst;
let Inst{62} = MARK;
let Inst{63} = BARRIER;
-*/
}
/*
@@ -311,6 +308,18 @@ def TRUNC : R600_1OP <
[(set R600_Reg32:$dst, (int_AMDGPU_trunc R600_Reg32:$src))]
>;
+def CEIL : R600_1OP <
+ 0x12, "CEIL",
+ [(set R600_Reg32:$dst, (int_AMDIL_round_neginf R600_Reg32:$src))]> {
+ let AMDILOp = AMDILInst.ROUND_NEGINF_f32;
+}
+
+def RNDNE : R600_1OP <
+ 0x13, "RNDNE",
+ [(set R600_Reg32:$dst, (int_AMDIL_round_nearest R600_Reg32:$src))]> {
+ let AMDILOp = AMDILInst.ROUND_NEAREST_f32;
+}
+
def FLOOR : R600_1OP <
0x14, "FLOOR",
[(set R600_Reg32:$dst, (int_AMDGPU_floor R600_Reg32:$src))]
@@ -329,64 +338,114 @@ def AND_INT : R600_2OP <
let AMDILOp = AMDILInst.AND_i32;
}
+def OR_INT : R600_2OP <
+ 0x31, "OR_INT",
+ []>{
+ let AMDILOp = AMDILInst.BINARY_OR_i32;
+}
+
def XOR_INT : R600_2OP <
0x32, "XOR_INT",
[]
>;
+def NOT_INT : R600_1OP <
+ 0x33, "NOT_INT",
+ []>{
+ let AMDILOp = AMDILInst.BINARY_NOT_i32;
+}
+
def ADD_INT : R600_2OP <
- 0x34, "ADD_INT $dst, $src0, $src1",
+ 0x34, "ADD_INT",
[]>{
let AMDILOp = AMDILInst.ADD_i32;
}
def SUB_INT : R600_2OP <
- 0x35, "SUB_INT $dst, $src0, $src1",
+ 0x35, "SUB_INT",
[]
>;
+def MAX_INT : R600_2OP <
+ 0x36, "MAX_INT",
+ [(set R600_Reg32:$dst, (int_AMDGPU_imax R600_Reg32:$src0, R600_Reg32:$src1))]>;
+
+def MIN_INT : R600_2OP <
+ 0x37, "MIN_INT",
+ [(set R600_Reg32:$dst, (int_AMDGPU_imin R600_Reg32:$src0, R600_Reg32:$src1))]>;
+
+def MAX_UINT : R600_2OP <
+ 0x38, "MAX_UINT",
+ [(set R600_Reg32:$dst, (int_AMDGPU_umax R600_Reg32:$src0, R600_Reg32:$src1))]>;
+
+def MIN_UINT : R600_2OP <
+ 0x39, "MIN_UINT",
+ [(set R600_Reg32:$dst, (int_AMDGPU_umin R600_Reg32:$src0, R600_Reg32:$src1))]>;
+
+
def SETE_INT : R600_2OP <
- 0x3A, "SETE_INT $dst, $src0, $src1",
+ 0x3A, "SETE_INT",
[]>{
let AMDILOp = AMDILInst.IEQ;
}
def SETGT_INT : R600_2OP <
- 0x3B, "SGT_INT $dst, $src0, $src1",
+ 0x3B, "SGT_INT",
[]
>;
def SETGE_INT : R600_2OP <
- 0x3C, "SETGE_INT $dst, $src0, $src1",
+ 0x3C, "SETGE_INT",
[]>{
let AMDILOp = AMDILInst.IGE;
}
def SETNE_INT : R600_2OP <
- 0x3D, "SETNE_INT $dst, $src0, $src1",
+ 0x3D, "SETNE_INT",
[]>{
let AMDILOp = AMDILInst.INE;
}
def SETGT_UINT : R600_2OP <
- 0x3E, "SETGT_UINT $dst, $src0, $src1",
+ 0x3E, "SETGT_UINT",
[]>{
let AMDILOp = AMDILInst.UGT;
}
def SETGE_UINT : R600_2OP <
- 0x3F, "SETGE_UINT $dst, $src0, $src1",
+ 0x3F, "SETGE_UINT",
[]>{
let AMDILOp = AMDILInst.UGE;
}
def CNDE_INT : R600_3OP <
- 0x1C, "CNDE_INT $dst, $src0, $src1, $src2",
+ 0x1C, "CNDE_INT",
[]
>;
/* Texture instructions */
+
+def TEX_LD : R600_TEX <
+ 0x03, "TEX_LD",
+ [(set R600_Reg128:$dst, (int_AMDGPU_txf R600_Reg128:$src0, imm:$src1, imm:$src2))]
+>;
+
+def TEX_GET_TEXTURE_RESINFO : R600_TEX <
+ 0x04, "TEX_GET_TEXTURE_RESINFO",
+ [(set R600_Reg128:$dst, (int_AMDGPU_txq R600_Reg128:$src0, imm:$src1, imm:$src2))]
+>;
+
+def TEX_GET_GRADIENTS_H : R600_TEX <
+ 0x07, "TEX_GET_GRADIENTS_H",
+ [(set R600_Reg128:$dst, (int_AMDGPU_ddx R600_Reg128:$src0, imm:$src1, imm:$src2))]
+>;
+
+def TEX_GET_GRADIENTS_V : R600_TEX <
+ 0x08, "TEX_GET_GRADIENTS_V",
+ [(set R600_Reg128:$dst, (int_AMDGPU_ddy R600_Reg128:$src0, imm:$src1, imm:$src2))]
+>;
+
def TEX_SAMPLE : R600_TEX <
0x10, "TEX_SAMPLE",
[(set R600_Reg128:$dst, (int_AMDGPU_tex R600_Reg128:$src0, imm:$src1, imm:$src2))]
@@ -434,6 +493,11 @@ def KILP : Pat <
(MASK_WRITE (KILLGT (f32 ONE), (f32 ZERO)))
>;
+def KIL : Pat <
+ (int_AMDGPU_kill R600_Reg32:$src0),
+ (MASK_WRITE (KILLGT (f32 ZERO), (f32 R600_Reg32:$src0)))
+>;
+
/* Helper classes for common instructions */
class MUL_LIT_Common <bits<32> inst> : R600_3OP <
@@ -470,6 +534,15 @@ class DOT4_Common <bits<32> inst> : R600_REDUCTION <
[(set R600_Reg32:$dst, (int_AMDGPU_dp4 R600_Reg128:$src0, R600_Reg128:$src1))]
>;
+class CUBE_Common <bits<32> inst> : InstR600 <
+ inst,
+ (outs R600_Reg128:$dst),
+ (ins R600_Reg128:$src),
+ "CUBE $dst $src",
+ [(set R600_Reg128:$dst, (int_AMDGPU_cube R600_Reg128:$src))],
+ VecALU
+>;
+
class EXP_IEEE_Common <bits<32> inst> : R600_1OP <
inst, "EXP_IEEE",
[]> {
@@ -509,6 +582,12 @@ class LSHR_Common <bits<32> inst> : R600_2OP <
let AMDILOp = AMDILInst.USHR_i32;
}
+class ASHR_Common <bits<32> inst> : R600_2OP <
+ inst, "ASHR $dst, $src0, $src1",
+ [] >{
+ let AMDILOp = AMDILInst.SHR_i32;
+}
+
class MULHI_INT_Common <bits<32> inst> : R600_2OP <
inst, "MULHI_INT $dst, $src0, $src1",
[] >{
@@ -608,6 +687,7 @@ let Gen = AMDGPUGen.R600 in {
def CNDGT_r600 : CNDGT_Common<0x19>;
def CNDGE_r600 : CNDGE_Common<0x1A>;
def DOT4_r600 : DOT4_Common<0x50>;
+ def CUBE_r600 : CUBE_Common<0x52>;
def EXP_IEEE_r600 : EXP_IEEE_Common<0x61>;
def LOG_CLAMPED_r600 : LOG_CLAMPED_Common<0x62>;
def LOG_IEEE_r600 : LOG_IEEE_Common<0x63>;
@@ -619,6 +699,7 @@ let Gen = AMDGPUGen.R600 in {
def INT_TO_FLT_r600 : INT_TO_FLT_Common<0x6c>;
def SIN_r600 : SIN_Common<0x6E>;
def COS_r600 : COS_Common<0x6F>;
+ def ASHR_r600 : ASHR_Common<0x70>;
def LSHR_r600 : LSHR_Common<0x71>;
def LSHL_r600 : LSHL_Common<0x72>;
def MULLO_INT_r600 : MULLO_INT_Common<0x73>;
@@ -661,20 +742,12 @@ def RAT_WRITE_CACHELESS_eg :
EG_CF_RAT <0x57, 0x2, (outs), (ins R600_TReg32_X:$rw_gpr,
R600_TReg32_X:$index_gpr, i32imm:$rat_id), "">
{
-/*
- let Inst{3-0} = RAT_ID;
- let Inst{21-15} = RW_GPR;
- let Inst{29-23} = INDEX_GPR;
- /* Propery of the UAV */
- let Inst{31-30} = ELEM_SIZE;
-*/
let RIM = 0;
/* XXX: Have a separate instruction for non-indexed writes. */
let TYPE = 1;
let RW_REL = 0;
let ELEM_SIZE = 0;
-/*
let ARRAY_SIZE = 0;
let COMP_MASK = 1;
let BURST_COUNT = 0;
@@ -682,7 +755,6 @@ def RAT_WRITE_CACHELESS_eg :
let EOP = 0;
let MARK = 0;
let BARRIER = 1;
-*/
}
def VTX_READ_eg : InstR600ISA < (outs R600_TReg32_X:$dst),
@@ -789,6 +861,7 @@ class TRIG_eg <InstR600 trig, Intrinsic intr> : Pat<
let Gen = AMDGPUGen.EG_CAYMAN in {
def MULADD_eg : MULADD_Common<0x14>;
+ def ASHR_eg : ASHR_Common<0x15>;
def LSHR_eg : LSHR_Common<0x16>;
def LSHL_eg : LSHL_Common<0x17>;
def CNDE_eg : CNDE_Common<0x19>;
@@ -812,6 +885,7 @@ let Gen = AMDGPUGen.EG_CAYMAN in {
def RECIP_UINT_eg : RECIP_UINT_Common<0x94>;
def INT_TO_FLT_eg : INT_TO_FLT_Common<0x9B>;
def DOT4_eg : DOT4_Common<0xBE>;
+ def CUBE_eg : CUBE_Common<0xC0>;
} // End AMDGPUGen.EG_CAYMAN
@@ -905,6 +979,34 @@ def LOCAL_SIZE_Y : R600PreloadInst <"LOCAL_SIZE_Y",
def LOCAL_SIZE_Z : R600PreloadInst <"LOCAL_SIZE_Z",
int_r600_read_local_size_z>;
+def R600_LOAD_CONST : AMDGPUShaderInst <
+ (outs R600_Reg32:$dst),
+ (ins i32imm:$src0),
+ "R600_LOAD_CONST $dst, $src0",
+ [(set R600_Reg32:$dst, (int_AMDGPU_load_const imm:$src0))]
+>;
+
+def LOAD_INPUT : AMDGPUShaderInst <
+ (outs R600_Reg32:$dst),
+ (ins i32imm:$src),
+ "LOAD_INPUT $dst, $src",
+ [(set R600_Reg32:$dst, (int_R600_load_input imm:$src))]
+>;
+
+def RESERVE_REG : AMDGPUShaderInst <
+ (outs),
+ (ins i32imm:$src),
+ "RESERVE_REG $src",
+ [(int_AMDGPU_reserve_reg imm:$src)]
+>;
+
+def STORE_OUTPUT: AMDGPUShaderInst <
+ (outs),
+ (ins R600_Reg32:$src0, i32imm:$src1),
+ "STORE_OUTPUT $src0, $src1",
+ [(int_AMDGPU_store_output R600_Reg32:$src0, imm:$src1)]
+>;
+
} // End usesCustomInserter = 1, isPseudo = 1
} // End isCodeGenOnly = 1
@@ -933,15 +1035,14 @@ def : Insert_Element <f32, v4f32, R600_Reg32, R600_Reg128, 5, sel_y>;
def : Insert_Element <f32, v4f32, R600_Reg32, R600_Reg128, 6, sel_z>;
def : Insert_Element <f32, v4f32, R600_Reg32, R600_Reg128, 7, sel_w>;
+def : Extract_Element <i32, v4i32, R600_Reg128, 0, sel_x>;
+def : Extract_Element <i32, v4i32, R600_Reg128, 1, sel_y>;
+def : Extract_Element <i32, v4i32, R600_Reg128, 2, sel_z>;
+def : Extract_Element <i32, v4i32, R600_Reg128, 3, sel_w>;
-include "R600ShaderPatterns.td"
-
-// We need this pattern to avoid having real registers in PHI nodes.
-// For some reason this pattern only works when it comes after the other
-// instruction defs.
-def : Pat <
- (int_R600_load_input imm:$src),
- (LOAD_INPUT imm:$src)
->;
+def : Insert_Element <i32, v4i32, R600_Reg32, R600_Reg128, 4, sel_x>;
+def : Insert_Element <i32, v4i32, R600_Reg32, R600_Reg128, 5, sel_y>;
+def : Insert_Element <i32, v4i32, R600_Reg32, R600_Reg128, 6, sel_z>;
+def : Insert_Element <i32, v4i32, R600_Reg32, R600_Reg128, 7, sel_w>;
} // End isR600toCayman Predicate
diff --git a/src/gallium/drivers/radeon/R600Intrinsics.td b/src/gallium/drivers/radeon/R600IntrinsicsNoOpenCL.td
index 8038fee1a3c..73ef4aae234 100644
--- a/src/gallium/drivers/radeon/R600Intrinsics.td
+++ b/src/gallium/drivers/radeon/R600IntrinsicsNoOpenCL.td
@@ -1,4 +1,4 @@
-//===-- R600Intrinsics.td - TODO: Add brief description -------===//
+//===-- R600Intrinsics.td - R600 Instrinsic defs -------*- tablegen -*-----===//
//
// The LLVM Compiler Infrastructure
//
@@ -7,7 +7,7 @@
//
//===----------------------------------------------------------------------===//
//
-// TODO: Add full description
+// R600 Intrinsic Definitions
//
//===----------------------------------------------------------------------===//
diff --git a/src/gallium/drivers/radeon/R600InstrFormats.td b/src/gallium/drivers/radeon/R600IntrinsicsOpenCL.td
index 0890eb64509..cd761358475 100644
--- a/src/gallium/drivers/radeon/R600InstrFormats.td
+++ b/src/gallium/drivers/radeon/R600IntrinsicsOpenCL.td
@@ -1,4 +1,4 @@
-//===-- R600InstrFormats.td - TODO: Add brief description -------===//
+//===-- R600Intrinsics.td - TODO: Add brief description -------===//
//
// The LLVM Compiler Infrastructure
//
@@ -11,6 +11,6 @@
//
//===----------------------------------------------------------------------===//
-
-class ALUInst <bits<10> op, dag outs, dag ins, string asm, list<dag> pattern>
- : InstR600 <, outs, ins , asm, pattern>
+let TargetPrefix = "R600", isTarget = 1 in {
+ def int_R600_load_input : Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrReadWriteArgMem]>;
+}
diff --git a/src/gallium/drivers/radeon/R600KernelParameters.cpp b/src/gallium/drivers/radeon/R600KernelParameters.cpp
index 3fdf48a2bf2..53bfebc7364 100644
--- a/src/gallium/drivers/radeon/R600KernelParameters.cpp
+++ b/src/gallium/drivers/radeon/R600KernelParameters.cpp
@@ -1,4 +1,4 @@
-//===-- R600KernelParameters.cpp - TODO: Add brief description -------===//
+//===-- R600KernelParameters.cpp - Lower kernel function arguments --------===//
//
// The LLVM Compiler Infrastructure
//
@@ -7,89 +7,83 @@
//
//===----------------------------------------------------------------------===//
//
-// TODO: Add full description
+// This pass lowers kernel function arguments to loads from the vertex buffer.
+//
+// Kernel arguemnts are stored in the vertex buffer at an offset of 9 dwords,
+// so arg0 needs to be loaded from VTX_BUFFER[9] and arg1 is loaded from
+// VTX_BUFFER[10], etc.
//
//===----------------------------------------------------------------------===//
-#include <llvm-c/Core.h>
-#include "R600KernelParameters.h"
-#include "R600OpenCLUtils.h"
+#include "AMDGPU.h"
+#include "AMDIL.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/Constants.h"
+#include "llvm/Function.h"
#include "llvm/Intrinsics.h"
+#include "llvm/Metadata.h"
+#include "llvm/Module.h"
+#include "llvm/Target/TargetData.h"
#include "llvm/Support/IRBuilder.h"
#include "llvm/Support/TypeBuilder.h"
-// #include "llvm/CodeGen/Function.h"
-
-namespace AMDILAS {
-enum AddressSpaces {
- PRIVATE_ADDRESS = 0, // Address space for private memory.
- GLOBAL_ADDRESS = 1, // Address space for global memory (RAT0, VTX0).
- CONSTANT_ADDRESS = 2, // Address space for constant memory.
- LOCAL_ADDRESS = 3, // Address space for local memory.
- REGION_ADDRESS = 4, // Address space for region memory.
- ADDRESS_NONE = 5, // Address space for unknown memory.
- PARAM_D_ADDRESS = 6, // Address space for direct addressible parameter memory (CONST0)
- PARAM_I_ADDRESS = 7, // Address space for indirect addressible parameter memory (VTX1)
- LAST_ADDRESS = 8
-};
-}
-
#include <map>
#include <set>
using namespace llvm;
-using namespace std;
+
+namespace {
#define CONSTANT_CACHE_SIZE_DW 127
-class R600KernelParameters : public llvm::FunctionPass
+class R600KernelParameters : public FunctionPass
{
- const llvm::TargetData * TD;
+ const TargetData * TD;
LLVMContext* Context;
Module *mod;
-
+
struct param
{
- param() : val(NULL), ptr_val(NULL), offset_in_dw(0), size_in_dw(0), indirect(false), specialID(0) {}
-
- llvm::Value* val;
- llvm::Value* ptr_val;
+ param() : val(NULL), ptr_val(NULL), offset_in_dw(0), size_in_dw(0),
+ indirect(false), specialID(0) {}
+
+ Value* val;
+ Value* ptr_val;
int offset_in_dw;
int size_in_dw;
bool indirect;
-
- string specialType;
+
+ std::string specialType;
int specialID;
-
+
int end() { return offset_in_dw + size_in_dw; }
- /* The first 9 dwords are reserved for the grid sizes. */
+ // The first 9 dwords are reserved for the grid sizes.
int get_rat_offset() { return 9 + offset_in_dw; }
};
std::vector<param> params;
- int getLastSpecialID(const string& TypeName);
-
+ bool isOpenCLKernel(const Function* fun);
+ int getLastSpecialID(const std::string& TypeName);
+
int getListSize();
- void AddParam(llvm::Argument* arg);
- int calculateArgumentSize(llvm::Argument* arg);
- void RunAna(llvm::Function* fun);
- void Replace(llvm::Function* fun);
- bool isIndirect(Value* val, set<Value*>& visited);
- void Propagate(llvm::Function* fun);
- void Propagate(llvm::Value* v, const llvm::Twine& name, bool indirect = false);
+ void AddParam(Argument* arg);
+ int calculateArgumentSize(Argument* arg);
+ void RunAna(Function* fun);
+ void Replace(Function* fun);
+ bool isIndirect(Value* val, std::set<Value*>& visited);
+ void Propagate(Function* fun);
+ void Propagate(Value* v, const Twine& name, bool indirect = false);
Value* ConstantRead(Function* fun, param& p);
Value* handleSpecial(Function* fun, param& p);
bool isSpecialType(Type*);
- string getSpecialTypeName(Type*);
+ std::string getSpecialTypeName(Type*);
public:
static char ID;
R600KernelParameters() : FunctionPass(ID) {};
- R600KernelParameters(const llvm::TargetData* TD) : FunctionPass(ID), TD(TD) {}
-// bool runOnFunction (llvm::Function &F);
- bool runOnFunction (llvm::Function &F);
+ R600KernelParameters(const TargetData* TD) : FunctionPass(ID), TD(TD) {}
+ bool runOnFunction (Function &F);
void getAnalysisUsage(AnalysisUsage &AU) const;
const char *getPassName() const;
bool doInitialization(Module &M);
@@ -98,13 +92,42 @@ public:
char R600KernelParameters::ID = 0;
-static RegisterPass<R600KernelParameters> X("kerparam", "OpenCL Kernel Parameter conversion", false, false);
+static RegisterPass<R600KernelParameters> X("kerparam",
+ "OpenCL Kernel Parameter conversion", false, false);
-int R600KernelParameters::getLastSpecialID(const string& TypeName)
+bool R600KernelParameters::isOpenCLKernel(const Function* fun)
+{
+ Module *mod = const_cast<Function*>(fun)->getParent();
+ NamedMDNode * md = mod->getOrInsertNamedMetadata("opencl.kernels");
+
+ if (!md or !md->getNumOperands())
+ {
+ return false;
+ }
+
+ for (int i = 0; i < int(md->getNumOperands()); i++)
+ {
+ if (!md->getOperand(i) or !md->getOperand(i)->getOperand(0))
+ {
+ continue;
+ }
+
+ assert(md->getOperand(i)->getNumOperands() == 1);
+
+ if (md->getOperand(i)->getOperand(0)->getName() == fun->getName())
+ {
+ return true;
+ }
+ }
+
+ return false;
+}
+
+int R600KernelParameters::getLastSpecialID(const std::string& TypeName)
{
int lastID = -1;
-
- for (vector<param>::iterator i = params.begin(); i != params.end(); i++)
+
+ for (std::vector<param>::iterator i = params.begin(); i != params.end(); i++)
{
if (i->specialType == TypeName)
{
@@ -125,7 +148,7 @@ int R600KernelParameters::getListSize()
return params.back().end();
}
-bool R600KernelParameters::isIndirect(Value* val, set<Value*>& visited)
+bool R600KernelParameters::isIndirect(Value* val, std::set<Value*>& visited)
{
if (isa<LoadInst>(val))
{
@@ -144,7 +167,7 @@ bool R600KernelParameters::isIndirect(Value* val, set<Value*>& visited)
}
visited.insert(val);
-
+
if (isa<GetElementPtrInst>(val))
{
GetElementPtrInst* GEP = dyn_cast<GetElementPtrInst>(val);
@@ -158,7 +181,7 @@ bool R600KernelParameters::isIndirect(Value* val, set<Value*>& visited)
}
}
}
-
+
for (Value::use_iterator i = val->use_begin(); i != val->use_end(); i++)
{
Value* v2 = dyn_cast<Value>(*i);
@@ -175,24 +198,24 @@ bool R600KernelParameters::isIndirect(Value* val, set<Value*>& visited)
return false;
}
-void R600KernelParameters::AddParam(llvm::Argument* arg)
+void R600KernelParameters::AddParam(Argument* arg)
{
param p;
-
+
p.val = dyn_cast<Value>(arg);
p.offset_in_dw = getListSize();
p.size_in_dw = calculateArgumentSize(arg);
if (isa<PointerType>(arg->getType()) and arg->hasByValAttr())
{
- set<Value*> visited;
+ std::set<Value*> visited;
p.indirect = isIndirect(p.val, visited);
}
-
+
params.push_back(p);
}
-int R600KernelParameters::calculateArgumentSize(llvm::Argument* arg)
+int R600KernelParameters::calculateArgumentSize(Argument* arg)
{
Type* t = arg->getType();
@@ -200,16 +223,16 @@ int R600KernelParameters::calculateArgumentSize(llvm::Argument* arg)
{
t = dyn_cast<PointerType>(t)->getElementType();
}
-
+
int store_size_in_dw = (TD->getTypeStoreSize(t) + 3)/4;
assert(store_size_in_dw);
-
+
return store_size_in_dw;
}
-void R600KernelParameters::RunAna(llvm::Function* fun)
+void R600KernelParameters::RunAna(Function* fun)
{
assert(isOpenCLKernel(fun));
@@ -220,7 +243,7 @@ void R600KernelParameters::RunAna(llvm::Function* fun)
}
-void R600KernelParameters::Replace(llvm::Function* fun)
+void R600KernelParameters::Replace(Function* fun)
{
for (std::vector<param>::iterator i = params.begin(); i != params.end(); i++)
{
@@ -237,11 +260,11 @@ void R600KernelParameters::Replace(llvm::Function* fun)
if (new_val)
{
i->val->replaceAllUsesWith(new_val);
- }
+ }
}
}
-void R600KernelParameters::Propagate(llvm::Function* fun)
+void R600KernelParameters::Propagate(Function* fun)
{
for (std::vector<param>::iterator i = params.begin(); i != params.end(); i++)
{
@@ -256,8 +279,8 @@ void R600KernelParameters::Propagate(Value* v, const Twine& name, bool indirect)
{
LoadInst* load = dyn_cast<LoadInst>(v);
GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(v);
-
- unsigned addrspace;
+
+ unsigned addrspace;
if (indirect)
{
@@ -274,49 +297,54 @@ void R600KernelParameters::Propagate(Value* v, const Twine& name, bool indirect)
if (dyn_cast<PointerType>(op->getType())->getAddressSpace() != addrspace)
{
- op = new BitCastInst(op, PointerType::get(dyn_cast<PointerType>(op->getType())->getElementType(), addrspace), name, dyn_cast<Instruction>(v));
+ op = new BitCastInst(op, PointerType::get(dyn_cast<PointerType>(
+ op->getType())->getElementType(), addrspace),
+ name, dyn_cast<Instruction>(v));
}
- vector<Value*> params(GEP->idx_begin(), GEP->idx_end());
-
- GetElementPtrInst* GEP2 = GetElementPtrInst::Create(op, params, name, dyn_cast<Instruction>(v));
+ std::vector<Value*> params(GEP->idx_begin(), GEP->idx_end());
+
+ GetElementPtrInst* GEP2 = GetElementPtrInst::Create(op, params, name,
+ dyn_cast<Instruction>(v));
GEP2->setIsInBounds(GEP->isInBounds());
v = dyn_cast<Value>(GEP2);
GEP->replaceAllUsesWith(GEP2);
GEP->eraseFromParent();
load = NULL;
}
-
+
if (load)
{
- if (load->getPointerAddressSpace() != addrspace) ///normally at this point we have the right address space
+ ///normally at this point we have the right address space
+ if (load->getPointerAddressSpace() != addrspace)
{
Value *orig_ptr = load->getPointerOperand();
PointerType *orig_ptr_type = dyn_cast<PointerType>(orig_ptr->getType());
-
- Type* new_ptr_type = PointerType::get(orig_ptr_type->getElementType(), addrspace);
+
+ Type* new_ptr_type = PointerType::get(orig_ptr_type->getElementType(),
+ addrspace);
Value* new_ptr = orig_ptr;
-
+
if (orig_ptr->getType() != new_ptr_type)
{
new_ptr = new BitCastInst(orig_ptr, new_ptr_type, "prop_cast", load);
}
-
+
Value* new_load = new LoadInst(new_ptr, name, load);
load->replaceAllUsesWith(new_load);
load->eraseFromParent();
}
-
+
return;
}
- vector<User*> users(v->use_begin(), v->use_end());
-
+ std::vector<User*> users(v->use_begin(), v->use_end());
+
for (int i = 0; i < int(users.size()); i++)
{
Value* v2 = dyn_cast<Value>(users[i]);
-
+
if (v2)
{
Propagate(v2, name, indirect);
@@ -327,7 +355,7 @@ void R600KernelParameters::Propagate(Value* v, const Twine& name, bool indirect)
Value* R600KernelParameters::ConstantRead(Function* fun, param& p)
{
assert(fun->front().begin() != fun->front().end());
-
+
Instruction *first_inst = fun->front().begin();
IRBuilder <> builder (first_inst);
/* First 3 dwords are reserved for the dimmension info */
@@ -346,43 +374,54 @@ Value* R600KernelParameters::ConstantRead(Function* fun, param& p)
{
addrspace = AMDILAS::PARAM_D_ADDRESS;
}
-
+
Argument *arg = dyn_cast<Argument>(p.val);
Type * argType = p.val->getType();
PointerType * argPtrType = dyn_cast<PointerType>(p.val->getType());
-
+
if (argPtrType and arg->hasByValAttr())
{
- Value* param_addr_space_ptr = ConstantPointerNull::get(PointerType::get(Type::getInt32Ty(*Context), addrspace));
- Value* param_ptr = GetElementPtrInst::Create(param_addr_space_ptr, ConstantInt::get(Type::getInt32Ty(*Context), p.get_rat_offset()), arg->getName(), first_inst);
- param_ptr = new BitCastInst(param_ptr, PointerType::get(argPtrType->getElementType(), addrspace), arg->getName(), first_inst);
+ Value* param_addr_space_ptr = ConstantPointerNull::get(
+ PointerType::get(Type::getInt32Ty(*Context),
+ addrspace));
+ Value* param_ptr = GetElementPtrInst::Create(param_addr_space_ptr,
+ ConstantInt::get(Type::getInt32Ty(*Context),
+ p.get_rat_offset()), arg->getName(),
+ first_inst);
+ param_ptr = new BitCastInst(param_ptr,
+ PointerType::get(argPtrType->getElementType(),
+ addrspace),
+ arg->getName(), first_inst);
p.ptr_val = param_ptr;
return param_ptr;
}
else
{
- Value* param_addr_space_ptr = ConstantPointerNull::get(PointerType::get(argType, addrspace));
-
+ Value* param_addr_space_ptr = ConstantPointerNull::get(PointerType::get(
+ argType, addrspace));
+
Value* param_ptr = builder.CreateGEP(param_addr_space_ptr,
- ConstantInt::get(Type::getInt32Ty(*Context), p.get_rat_offset()), arg->getName());
-
+ ConstantInt::get(Type::getInt32Ty(*Context), p.get_rat_offset()),
+ arg->getName());
+
Value* param_value = builder.CreateLoad(param_ptr, arg->getName());
-
+
return param_value;
}
}
Value* R600KernelParameters::handleSpecial(Function* fun, param& p)
{
- string name = getSpecialTypeName(p.val->getType());
+ std::string name = getSpecialTypeName(p.val->getType());
int ID;
assert(!name.empty());
-
+
if (name == "image2d_t" or name == "image3d_t")
{
- int lastID = max(getLastSpecialID("image2d_t"), getLastSpecialID("image3d_t"));
-
+ int lastID = std::max(getLastSpecialID("image2d_t"),
+ getLastSpecialID("image3d_t"));
+
if (lastID == -1)
{
ID = 2; ///ID0 and ID1 are used internally by the driver
@@ -403,20 +442,22 @@ Value* R600KernelParameters::handleSpecial(Function* fun, param& p)
else
{
ID = lastID + 1;
- }
+ }
}
else
{
///TODO: give some error message
return NULL;
}
-
+
p.specialType = name;
p.specialID = ID;
Instruction *first_inst = fun->front().begin();
- return new IntToPtrInst(ConstantInt::get(Type::getInt32Ty(*Context), p.specialID), p.val->getType(), "resourceID", first_inst);
+ return new IntToPtrInst(ConstantInt::get(Type::getInt32Ty(*Context),
+ p.specialID), p.val->getType(),
+ "resourceID", first_inst);
}
@@ -425,7 +466,7 @@ bool R600KernelParameters::isSpecialType(Type* t)
return !getSpecialTypeName(t).empty();
}
-string R600KernelParameters::getSpecialTypeName(Type* t)
+std::string R600KernelParameters::getSpecialTypeName(Type* t)
{
PointerType *pt = dyn_cast<PointerType>(t);
StructType *st = NULL;
@@ -437,9 +478,9 @@ string R600KernelParameters::getSpecialTypeName(Type* t)
if (st)
{
- string prefix = "struct.opencl_builtin_type_";
-
- string name = st->getName().str();
+ std::string prefix = "struct.opencl_builtin_type_";
+
+ std::string name = st->getName().str();
if (name.substr(0, prefix.length()) == prefix)
{
@@ -458,19 +499,15 @@ bool R600KernelParameters::runOnFunction (Function &F)
return false;
}
-// F.dump();
-
RunAna(&F);
Replace(&F);
Propagate(&F);
-
- mod->dump();
+
return false;
}
void R600KernelParameters::getAnalysisUsage(AnalysisUsage &AU) const
{
-// AU.addRequired<FunctionAnalysis>();
FunctionPass::getAnalysisUsage(AU);
AU.setPreservesAll();
}
@@ -484,7 +521,7 @@ bool R600KernelParameters::doInitialization(Module &M)
{
Context = &M.getContext();
mod = &M;
-
+
return false;
}
@@ -493,10 +530,12 @@ bool R600KernelParameters::doFinalization(Module &M)
return false;
}
-llvm::FunctionPass* createR600KernelParametersPass(const llvm::TargetData* TD)
+} // End anonymous namespace
+
+FunctionPass* llvm::createR600KernelParametersPass(const TargetData* TD)
{
FunctionPass *p = new R600KernelParameters(TD);
-
+
return p;
}
diff --git a/src/gallium/drivers/radeon/R600KernelParameters.h b/src/gallium/drivers/radeon/R600KernelParameters.h
deleted file mode 100644
index 904a469a5f0..00000000000
--- a/src/gallium/drivers/radeon/R600KernelParameters.h
+++ /dev/null
@@ -1,28 +0,0 @@
-//===-- R600KernelParameters.h - TODO: Add brief description -------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// TODO: Add full description
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef KERNELPARAMETERS_H
-#define KERNELPARAMETERS_H
-
-#include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/Function.h"
-#include "llvm/Pass.h"
-#include "llvm/Target/TargetData.h"
-#include "llvm/Value.h"
-
-#include <vector>
-
-llvm::FunctionPass* createR600KernelParametersPass(const llvm::TargetData* TD);
-
-
-#endif
diff --git a/src/gallium/drivers/radeon/R600LowerInstructions.cpp b/src/gallium/drivers/radeon/R600LowerInstructions.cpp
index fb5431d0eef..dca1fe195cc 100644
--- a/src/gallium/drivers/radeon/R600LowerInstructions.cpp
+++ b/src/gallium/drivers/radeon/R600LowerInstructions.cpp
@@ -1,4 +1,4 @@
-//===-- R600LowerInstructions.cpp - TODO: Add brief description -------===//
+//===-- R600LowerInstructions.cpp - Lower unsupported AMDIL instructions --===//
//
// The LLVM Compiler Infrastructure
//
@@ -7,7 +7,8 @@
//
//===----------------------------------------------------------------------===//
//
-// TODO: Add full description
+// This pass lowers AMDIL MachineInstrs that aren't supported by the R600
+// target to either supported AMDIL MachineInstrs or R600 MachineInstrs.
//
//===----------------------------------------------------------------------===//
@@ -93,8 +94,8 @@ bool R600LowerInstructionsPass::runOnMachineFunction(MachineFunction &MF)
&AMDIL::R600_TReg32RegClass);
BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(AMDIL::SETGE_INT),
setgt)
- .addOperand(MI.getOperand(1))
- .addReg(AMDIL::ZERO);
+ .addReg(AMDIL::ZERO)
+ .addOperand(MI.getOperand(1));
unsigned add_int = MRI->createVirtualRegister(
&AMDIL::R600_TReg32RegClass);
@@ -311,7 +312,8 @@ bool R600LowerInstructionsPass::runOnMachineFunction(MachineFunction &MF)
MachineInstr * defInstr = MRI->getVRegDef(maskedRegister);
MachineOperand * def = defInstr->findRegisterDefOperand(maskedRegister);
def->addTargetFlag(MO_FLAG_MASK);
- break;
+ /* Continue so the instruction is not erased */
+ continue;
}
case AMDIL::NEGATE_i32:
@@ -342,6 +344,13 @@ bool R600LowerInstructionsPass::runOnMachineFunction(MachineFunction &MF)
break;
}
+ case AMDIL::ULT:
+ BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(AMDIL::SETGT_UINT))
+ .addOperand(MI.getOperand(0))
+ .addOperand(MI.getOperand(2))
+ .addOperand(MI.getOperand(1));
+ break;
+
default:
continue;
}
diff --git a/src/gallium/drivers/radeon/R600LowerShaderInstructions.cpp b/src/gallium/drivers/radeon/R600LowerShaderInstructions.cpp
deleted file mode 100644
index 394ee7006ce..00000000000
--- a/src/gallium/drivers/radeon/R600LowerShaderInstructions.cpp
+++ /dev/null
@@ -1,143 +0,0 @@
-//===-- R600LowerShaderInstructions.cpp - TODO: Add brief description -------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// TODO: Add full description
-//
-//===----------------------------------------------------------------------===//
-
-#include "AMDGPU.h"
-#include "AMDGPULowerShaderInstructions.h"
-#include "AMDIL.h"
-#include "AMDILInstrInfo.h"
-#include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
-
-using namespace llvm;
-
-namespace {
- class R600LowerShaderInstructionsPass : public MachineFunctionPass,
- public AMDGPULowerShaderInstructionsPass {
-
- private:
- static char ID;
- TargetMachine &TM;
-
- void lowerEXPORT_REG_FAKE(MachineInstr &MI, MachineBasicBlock &MBB,
- MachineBasicBlock::iterator I);
- void lowerLOAD_INPUT(MachineInstr & MI);
- bool lowerSTORE_OUTPUT(MachineInstr & MI, MachineBasicBlock &MBB,
- MachineBasicBlock::iterator I);
-
- public:
- R600LowerShaderInstructionsPass(TargetMachine &tm) :
- MachineFunctionPass(ID), TM(tm) { }
-
- bool runOnMachineFunction(MachineFunction &MF);
-
- const char *getPassName() const { return "R600 Lower Shader Instructions"; }
- };
-} /* End anonymous namespace */
-
-char R600LowerShaderInstructionsPass::ID = 0;
-
-FunctionPass *llvm::createR600LowerShaderInstructionsPass(TargetMachine &tm) {
- return new R600LowerShaderInstructionsPass(tm);
-}
-
-#define INSTR_CASE_FLOAT_V(inst) \
- case AMDIL:: inst##_v4f32: \
-
-#define INSTR_CASE_FLOAT_S(inst) \
- case AMDIL:: inst##_f32:
-
-#define INSTR_CASE_FLOAT(inst) \
- INSTR_CASE_FLOAT_V(inst) \
- INSTR_CASE_FLOAT_S(inst)
-bool R600LowerShaderInstructionsPass::runOnMachineFunction(MachineFunction &MF)
-{
- MRI = &MF.getRegInfo();
-
-
- for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end();
- BB != BB_E; ++BB) {
- MachineBasicBlock &MBB = *BB;
- for (MachineBasicBlock::iterator I = MBB.begin(); I != MBB.end();) {
- MachineInstr &MI = *I;
- bool deleteInstr = false;
- switch (MI.getOpcode()) {
-
- default: break;
-
- case AMDIL::RESERVE_REG:
- case AMDIL::EXPORT_REG:
- deleteInstr = true;
- break;
-
- case AMDIL::LOAD_INPUT:
- lowerLOAD_INPUT(MI);
- deleteInstr = true;
- break;
-
- case AMDIL::STORE_OUTPUT:
- deleteInstr = lowerSTORE_OUTPUT(MI, MBB, I);
- break;
-
- }
-
- ++I;
-
- if (deleteInstr) {
- MI.eraseFromParent();
- }
- }
- }
-
- return false;
-}
-
-/* The goal of this function is to replace the virutal destination register of
- * a LOAD_INPUT instruction with the correct physical register that will.
- *
- * XXX: I don't think this is the right way things assign physical registers,
- * but I'm not sure of another way to do this.
- */
-void R600LowerShaderInstructionsPass::lowerLOAD_INPUT(MachineInstr &MI)
-{
- MachineOperand &dst = MI.getOperand(0);
- MachineOperand &arg = MI.getOperand(1);
- int64_t inputIndex = arg.getImm();
- const TargetRegisterClass * inputClass = TM.getRegisterInfo()->getRegClass(AMDIL::R600_TReg32RegClassID);
- unsigned newRegister = inputClass->getRegister(inputIndex);
- unsigned dstReg = dst.getReg();
-
- preloadRegister(MI.getParent()->getParent(), TM.getInstrInfo(), newRegister,
- dstReg);
-}
-
-bool R600LowerShaderInstructionsPass::lowerSTORE_OUTPUT(MachineInstr &MI,
- MachineBasicBlock &MBB, MachineBasicBlock::iterator I)
-{
- MachineOperand &valueOp = MI.getOperand(1);
- MachineOperand &indexOp = MI.getOperand(2);
- unsigned valueReg = valueOp.getReg();
- int64_t outputIndex = indexOp.getImm();
- const TargetRegisterClass * outputClass = TM.getRegisterInfo()->getRegClass(AMDIL::R600_TReg32RegClassID);
- unsigned newRegister = outputClass->getRegister(outputIndex);
-
- BuildMI(MBB, I, MBB.findDebugLoc(I), TM.getInstrInfo()->get(AMDIL::COPY),
- newRegister)
- .addReg(valueReg);
-
- if (!MRI->isLiveOut(newRegister))
- MRI->addLiveOut(newRegister);
-
- return true;
-
-}
diff --git a/src/gallium/drivers/radeon/R600MachineFunctionInfo.cpp b/src/gallium/drivers/radeon/R600MachineFunctionInfo.cpp
new file mode 100644
index 00000000000..48443fb57d8
--- /dev/null
+++ b/src/gallium/drivers/radeon/R600MachineFunctionInfo.cpp
@@ -0,0 +1,16 @@
+//===-- R600MachineFunctionInfo.cpp - R600 Machine Function Info-*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "R600MachineFunctionInfo.h"
+
+using namespace llvm;
+
+R600MachineFunctionInfo::R600MachineFunctionInfo(const MachineFunction &MF)
+ : MachineFunctionInfo()
+ { }
diff --git a/src/gallium/drivers/radeon/R600MachineFunctionInfo.h b/src/gallium/drivers/radeon/R600MachineFunctionInfo.h
new file mode 100644
index 00000000000..948e1924272
--- /dev/null
+++ b/src/gallium/drivers/radeon/R600MachineFunctionInfo.h
@@ -0,0 +1,33 @@
+//===-- R600MachineFunctionInfo.h - R600 Machine Function Info ----*- C++ -*-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// R600MachineFunctionInfo is used for keeping track of which registers have
+// been reserved by the llvm.AMDGPU.reserve.reg intrinsic.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef R600MACHINEFUNCTIONINFO_H
+#define R600MACHINEFUNCTIONINFO_H
+
+#include "llvm/CodeGen/MachineFunction.h"
+#include <vector>
+
+namespace llvm {
+
+class R600MachineFunctionInfo : public MachineFunctionInfo {
+
+public:
+ R600MachineFunctionInfo(const MachineFunction &MF);
+ std::vector<unsigned> ReservedRegs;
+
+};
+
+} // End llvm namespace
+
+#endif //R600MACHINEFUNCTIONINFO_H
diff --git a/src/gallium/drivers/radeon/R600OpenCLUtils.h b/src/gallium/drivers/radeon/R600OpenCLUtils.h
deleted file mode 100644
index 91e41d63d0d..00000000000
--- a/src/gallium/drivers/radeon/R600OpenCLUtils.h
+++ /dev/null
@@ -1,49 +0,0 @@
-//===-- OpenCLUtils.h - TODO: Add brief description -------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// TODO: Add full description
-//
-//===----------------------------------------------------------------------===//
-#ifndef OPENCLUTILS_H
-#define OPENCLUTILS_H
-
-#include "llvm/Function.h"
-
-#include <llvm/Module.h>
-
-static bool isOpenCLKernel(const llvm::Function* fun)
-{
- llvm::Module *mod = const_cast<llvm::Function*>(fun)->getParent();
- llvm::NamedMDNode * md = mod->getOrInsertNamedMetadata("opencl.kernels");
-
- if (!md or !md->getNumOperands())
- {
- return false;
- }
-
- for (int i = 0; i < int(md->getNumOperands()); i++)
- {
- if (!md->getOperand(i) or !md->getOperand(i)->getOperand(0))
- {
- continue;
- }
-
- assert(md->getOperand(i)->getNumOperands() == 1);
-
- if (md->getOperand(i)->getOperand(0)->getName() == fun->getName())
- {
- return true;
- }
- }
-
- return false;
-}
-
-
-#endif
diff --git a/src/gallium/drivers/radeon/R600RegisterInfo.cpp b/src/gallium/drivers/radeon/R600RegisterInfo.cpp
index 96507b104cf..de559bd2dfa 100644
--- a/src/gallium/drivers/radeon/R600RegisterInfo.cpp
+++ b/src/gallium/drivers/radeon/R600RegisterInfo.cpp
@@ -1,4 +1,4 @@
-//===-- R600RegisterInfo.cpp - TODO: Add brief description -------===//
+//===-- R600RegisterInfo.cpp - R600 Register Information ------------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -7,12 +7,13 @@
//
//===----------------------------------------------------------------------===//
//
-// TODO: Add full description
+// The file contains the R600 implementation of the TargetRegisterInfo class.
//
//===----------------------------------------------------------------------===//
#include "R600RegisterInfo.h"
#include "AMDGPUTargetMachine.h"
+#include "R600MachineFunctionInfo.h"
using namespace llvm;
@@ -26,6 +27,8 @@ R600RegisterInfo::R600RegisterInfo(AMDGPUTargetMachine &tm,
BitVector R600RegisterInfo::getReservedRegs(const MachineFunction &MF) const
{
BitVector Reserved(getNumRegs());
+ const R600MachineFunctionInfo * MFI = MF.getInfo<R600MachineFunctionInfo>();
+
Reserved.set(AMDIL::ZERO);
Reserved.set(AMDIL::HALF);
Reserved.set(AMDIL::ONE);
@@ -40,19 +43,11 @@ BitVector R600RegisterInfo::getReservedRegs(const MachineFunction &MF) const
Reserved.set(*I);
}
- for (MachineFunction::const_iterator BB = MF.begin(),
- BB_E = MF.end(); BB != BB_E; ++BB) {
- const MachineBasicBlock &MBB = *BB;
- for (MachineBasicBlock::const_iterator I = MBB.begin(), E = MBB.end();
- I != E; ++I) {
- const MachineInstr &MI = *I;
- if (MI.getOpcode() == AMDIL::RESERVE_REG) {
- if (!TargetRegisterInfo::isVirtualRegister(MI.getOperand(0).getReg())) {
- Reserved.set(MI.getOperand(0).getReg());
- }
- }
- }
+ for (std::vector<unsigned>::const_iterator I = MFI->ReservedRegs.begin(),
+ E = MFI->ReservedRegs.end(); I != E; ++I) {
+ Reserved.set(*I);
}
+
return Reserved;
}
diff --git a/src/gallium/drivers/radeon/R600RegisterInfo.h b/src/gallium/drivers/radeon/R600RegisterInfo.h
index 95a44f971a0..89a11f9333b 100644
--- a/src/gallium/drivers/radeon/R600RegisterInfo.h
+++ b/src/gallium/drivers/radeon/R600RegisterInfo.h
@@ -1,4 +1,4 @@
-//===-- R600RegisterInfo.h - TODO: Add brief description -------===//
+//===-- R600RegisterInfo.h - R600 Register Info Interface ------*- C++ -*--===//
//
// The LLVM Compiler Infrastructure
//
@@ -7,7 +7,7 @@
//
//===----------------------------------------------------------------------===//
//
-// TODO: Add full description
+// Interface definition for R600RegisterInfo
//
//===----------------------------------------------------------------------===//
diff --git a/src/gallium/drivers/radeon/R600Schedule.td b/src/gallium/drivers/radeon/R600Schedule.td
index c6b1ca61bb5..d1957903d87 100644
--- a/src/gallium/drivers/radeon/R600Schedule.td
+++ b/src/gallium/drivers/radeon/R600Schedule.td
@@ -1,4 +1,4 @@
-//===-- R600Schedule.td - TODO: Add brief description -------===//
+//===-- R600Schedule.td - R600 Scheduling definitions ------*- tablegen -*-===//
//
// The LLVM Compiler Infrastructure
//
@@ -7,7 +7,9 @@
//
//===----------------------------------------------------------------------===//
//
-// TODO: Add full description
+// R600 has a VLIW architecture. On pre-cayman cards there are 5 instruction
+// slots ALU.X, ALU.Y, ALU.Z, ALU.W, and TRANS. For cayman cards, the TRANS
+// slot has been removed.
//
//===----------------------------------------------------------------------===//
diff --git a/src/gallium/drivers/radeon/SIAssignInterpRegs.cpp b/src/gallium/drivers/radeon/SIAssignInterpRegs.cpp
index b0bdf701a74..1ef097f7b1e 100644
--- a/src/gallium/drivers/radeon/SIAssignInterpRegs.cpp
+++ b/src/gallium/drivers/radeon/SIAssignInterpRegs.cpp
@@ -1,4 +1,4 @@
-//===-- SIAssignInterpRegs.cpp - TODO: Add brief description -------===//
+//===-- SIAssignInterpRegs.cpp - Assign interpolation registers -----------===//
//
// The LLVM Compiler Infrastructure
//
@@ -7,7 +7,12 @@
//
//===----------------------------------------------------------------------===//
//
-// TODO: Add full description
+// This pass maps the pseudo interpolation registers to the correct physical
+// registers. Prior to executing a fragment shader, the GPU loads interpolation
+// parameters into physical registers. The specific physical register that each
+// interpolation parameter ends up in depends on the type of the interpolation
+// parameter as well as how many interpolation parameters are used by the
+// shader.
//
//===----------------------------------------------------------------------===//
diff --git a/src/gallium/drivers/radeon/SICodeEmitter.cpp b/src/gallium/drivers/radeon/SICodeEmitter.cpp
index ad494fae7c6..6970d9f0875 100644
--- a/src/gallium/drivers/radeon/SICodeEmitter.cpp
+++ b/src/gallium/drivers/radeon/SICodeEmitter.cpp
@@ -1,4 +1,4 @@
-//===-- SICodeEmitter.cpp - TODO: Add brief description -------===//
+//===-- SICodeEmitter.cpp - SI Code Emitter -------------------------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -7,7 +7,8 @@
//
//===----------------------------------------------------------------------===//
//
-// TODO: Add full description
+// The SI code emitter produces machine code that can be executed directly on
+// the GPU device.
//
//===----------------------------------------------------------------------===//
@@ -144,8 +145,6 @@ bool SICodeEmitter::runOnMachineFunction(MachineFunction &MF)
{
MF.dump();
TM = &MF.getTarget();
- const AMDGPUInstrInfo * TII =
- static_cast<const AMDGPUInstrInfo*>(TM->getInstrInfo());
emitState(MF);
@@ -155,8 +154,7 @@ bool SICodeEmitter::runOnMachineFunction(MachineFunction &MF)
for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
I != E; ++I) {
MachineInstr &MI = *I;
- if (!TII->isRegPreload(MI) && MI.getOpcode() != AMDIL::KILL
- && MI.getOpcode() != AMDIL::RETURN) {
+ if (MI.getOpcode() != AMDIL::KILL && MI.getOpcode() != AMDIL::RETURN) {
emitInstr(MI);
}
}
diff --git a/src/gallium/drivers/radeon/SIGenRegisterInfo.pl b/src/gallium/drivers/radeon/SIGenRegisterInfo.pl
index 644daa1bc22..bb5ebbd67e6 100644
--- a/src/gallium/drivers/radeon/SIGenRegisterInfo.pl
+++ b/src/gallium/drivers/radeon/SIGenRegisterInfo.pl
@@ -1,16 +1,17 @@
-#===-- SIGenRegisterInfo.pl - TODO: Add brief description -------===#
+#===-- SIGenRegisterInfo.pl - Script for generating register info files ----===#
#
# The LLVM Compiler Infrastructure
#
# This file is distributed under the University of Illinois Open Source
# License. See LICENSE.TXT for details.
#
-#===----------------------------------------------------------------------===#
+#===------------------------------------------------------------------------===#
#
-# TODO: Add full description
+# This perl script prints to stdout .td code to be used as SIRegisterInfo.td
+# it also generates a file called SIHwRegInfo.include, which contains helper
+# functions for determining the hw encoding of registers.
#
-#===----------------------------------------------------------------------===#
-
+#===------------------------------------------------------------------------===#
use strict;
use warnings;
diff --git a/src/gallium/drivers/radeon/SIISelLowering.cpp b/src/gallium/drivers/radeon/SIISelLowering.cpp
index 1a4b47ecbf5..441a4a07290 100644
--- a/src/gallium/drivers/radeon/SIISelLowering.cpp
+++ b/src/gallium/drivers/radeon/SIISelLowering.cpp
@@ -1,4 +1,4 @@
-//===-- SIISelLowering.cpp - TODO: Add brief description -------===//
+//===-- SIISelLowering.cpp - SI DAG Lowering Implementation ---------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -7,7 +7,8 @@
//
//===----------------------------------------------------------------------===//
//
-// TODO: Add full description
+// Most of the DAG lowering is handled in AMDILISelLowering.cpp. This file is
+// mostly EmitInstrWithCustomInserter().
//
//===----------------------------------------------------------------------===//
diff --git a/src/gallium/drivers/radeon/SIISelLowering.h b/src/gallium/drivers/radeon/SIISelLowering.h
index e7a79f8e215..229e682ef51 100644
--- a/src/gallium/drivers/radeon/SIISelLowering.h
+++ b/src/gallium/drivers/radeon/SIISelLowering.h
@@ -1,4 +1,4 @@
-//===-- SIISelLowering.h - TODO: Add brief description -------===//
+//===-- SIISelLowering.h - SI DAG Lowering Interface ------------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
@@ -7,7 +7,7 @@
//
//===----------------------------------------------------------------------===//
//
-// TODO: Add full description
+// SI DAG Lowering interface definition
//
//===----------------------------------------------------------------------===//
diff --git a/src/gallium/drivers/radeon/SIInstrFormats.td b/src/gallium/drivers/radeon/SIInstrFormats.td
index caf9b0ef120..de0d4fa39d2 100644
--- a/src/gallium/drivers/radeon/SIInstrFormats.td
+++ b/src/gallium/drivers/radeon/SIInstrFormats.td
@@ -1,4 +1,4 @@
-//===-- SIInstrFormats.td - TODO: Add brief description -------===//
+//===-- SIInstrFormats.td - SI Instruction Formats ------------------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -7,7 +7,17 @@
//
//===----------------------------------------------------------------------===//
//
-// TODO: Add full description
+// SI Instruction format definitions.
+//
+// Instructions with _32 take 32-bit operands.
+// Instructions with _64 take 64-bit operands.
+//
+// VOP_* instructions can use either a 32-bit or 64-bit encoding. The 32-bit
+// encoding is the standard encoding, but instruction that make use of
+// any of the instruction modifiers must use the 64-bit encoding.
+//
+// Instructions with _e32 use the 32-bit encoding.
+// Instructions with _e64 use the 64-bit encoding.
//
//===----------------------------------------------------------------------===//
diff --git a/src/gallium/drivers/radeon/SIInstrInfo.cpp b/src/gallium/drivers/radeon/SIInstrInfo.cpp
index 6f92e96c6e7..0cb97643a7f 100644
--- a/src/gallium/drivers/radeon/SIInstrInfo.cpp
+++ b/src/gallium/drivers/radeon/SIInstrInfo.cpp
@@ -1,4 +1,4 @@
-//===-- SIInstrInfo.cpp - TODO: Add brief description -------===//
+//===-- SIInstrInfo.cpp - SI Instruction Information ---------------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -7,7 +7,7 @@
//
//===----------------------------------------------------------------------===//
//
-// TODO: Add full description
+// SI Implementation of TargetInstrInfo.
//
//===----------------------------------------------------------------------===//
@@ -107,6 +107,8 @@ unsigned SIInstrInfo::getISAOpcode(unsigned AMDILopcode) const
{
switch (AMDILopcode) {
case AMDIL::MAD_f32: return AMDIL::V_MAD_LEGACY_F32;
+ //XXX We need a better way of detecting end of program
+ case AMDIL::RETURN: return AMDIL::S_ENDPGM;
default: return AMDGPUInstrInfo::getISAOpcode(AMDILopcode);
}
}
diff --git a/src/gallium/drivers/radeon/SIInstrInfo.h b/src/gallium/drivers/radeon/SIInstrInfo.h
index bd76c3f94aa..68940ea3ca4 100644
--- a/src/gallium/drivers/radeon/SIInstrInfo.h
+++ b/src/gallium/drivers/radeon/SIInstrInfo.h
@@ -1,4 +1,4 @@
-//===-- SIInstrInfo.h - TODO: Add brief description -------===//
+//===-- SIInstrInfo.h - SI Instruction Info Interface ---------------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -7,7 +7,7 @@
//
//===----------------------------------------------------------------------===//
//
-// TODO: Add full description
+// Interface definition for SIInstrInfo.
//
//===----------------------------------------------------------------------===//
diff --git a/src/gallium/drivers/radeon/SIInstrInfo.td b/src/gallium/drivers/radeon/SIInstrInfo.td
index 65b28ec84ad..435948ff1de 100644
--- a/src/gallium/drivers/radeon/SIInstrInfo.td
+++ b/src/gallium/drivers/radeon/SIInstrInfo.td
@@ -1,4 +1,4 @@
-//===-- SIInstrInfo.td - TODO: Add brief description -------===//
+//===-- SIInstrInfo.td - SI Instruction Encodings ---------*- tablegen -*--===//
//
// The LLVM Compiler Infrastructure
//
@@ -6,10 +6,6 @@
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
-//
-// TODO: Add full description
-//
-//===----------------------------------------------------------------------===//
@@ -464,11 +460,4 @@ def IMM12bit : ImmLeaf <
include "SIInstrFormats.td"
-def LOAD_CONST : AMDGPUShaderInst <
- (outs GPRF32:$dst),
- (ins i32imm:$src),
- "LOAD_CONST $dst, $src",
- [(set GPRF32:$dst, (int_AMDGPU_load_const imm:$src))]
->;
-
include "SIInstructions.td"
diff --git a/src/gallium/drivers/radeon/SIInstructions.td b/src/gallium/drivers/radeon/SIInstructions.td
index 27a8b31757a..57bbc7a5d5a 100644
--- a/src/gallium/drivers/radeon/SIInstructions.td
+++ b/src/gallium/drivers/radeon/SIInstructions.td
@@ -1,4 +1,4 @@
-//===-- SIInstructions.td - TODO: Add brief description -------===//
+//===-- SIInstructions.td - SI Instruction Defintions ---------------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -6,10 +6,6 @@
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
-//
-// TODO: Add full description
-//
-//===----------------------------------------------------------------------===//
def isSI : Predicate<"Subtarget.device()"
@@ -800,6 +796,13 @@ def CONFIG_WRITE : InstSI <
field bits<32> Inst = 0;
}
+def LOAD_CONST : AMDGPUShaderInst <
+ (outs GPRF32:$dst),
+ (ins i32imm:$src),
+ "LOAD_CONST $dst, $src",
+ [(set GPRF32:$dst, (int_AMDGPU_load_const imm:$src))]
+>;
+
let usesCustomInserter = 1 in {
def SI_V_CNDLT : InstSI <
@@ -833,7 +836,6 @@ def USE_SGPR_32 : InstSI <
> {
field bits<32> Inst = 0;
- let PreloadReg = 1;
}
def USE_SGPR_64 : InstSI <
@@ -844,7 +846,6 @@ def USE_SGPR_64 : InstSI <
> {
field bits<32> Inst = 0;
- let PreloadReg = 1;
}
def VS_LOAD_BUFFER_INDEX : InstSI <
@@ -854,7 +855,6 @@ def VS_LOAD_BUFFER_INDEX : InstSI <
[(set VReg_32:$dst, (int_SI_vs_load_buffer_index))]> {
field bits<32> Inst = 0;
- let PreloadReg = 1;
}
} // end usesCustomInserter
diff --git a/src/gallium/drivers/radeon/SIIntrinsics.td b/src/gallium/drivers/radeon/SIIntrinsics.td
index e3014e13916..4d23072d4f1 100644
--- a/src/gallium/drivers/radeon/SIIntrinsics.td
+++ b/src/gallium/drivers/radeon/SIIntrinsics.td
@@ -1,4 +1,4 @@
-//===-- SIIntrinsics.td - TODO: Add brief description -------===//
+//===-- SIIntrinsics.td - SI Intrinsic defs ----------------*- tablegen -*-===//
//
// The LLVM Compiler Infrastructure
//
@@ -7,7 +7,7 @@
//
//===----------------------------------------------------------------------===//
//
-// TODO: Add full description
+// SI Intrinsic Definitions
//
//===----------------------------------------------------------------------===//
diff --git a/src/gallium/drivers/radeon/SILowerShaderInstructions.cpp b/src/gallium/drivers/radeon/SILowerShaderInstructions.cpp
deleted file mode 100644
index 5d49d88dc7c..00000000000
--- a/src/gallium/drivers/radeon/SILowerShaderInstructions.cpp
+++ /dev/null
@@ -1,90 +0,0 @@
-//===-- SILowerShaderInstructions.cpp - TODO: Add brief description -------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// TODO: Add full description
-//
-//===----------------------------------------------------------------------===//
-
-
-#include "AMDGPU.h"
-#include "AMDGPULowerShaderInstructions.h"
-#include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
-
-using namespace llvm;
-
-namespace {
- class SILowerShaderInstructionsPass : public MachineFunctionPass,
- public AMDGPULowerShaderInstructionsPass {
-
- private:
- static char ID;
- TargetMachine &TM;
-
- public:
- SILowerShaderInstructionsPass(TargetMachine &tm) :
- MachineFunctionPass(ID), TM(tm) { }
-
- bool runOnMachineFunction(MachineFunction &MF);
-
- const char *getPassName() const { return "SI Lower Shader Instructions"; }
-
- void lowerRETURN(MachineBasicBlock &MBB, MachineBasicBlock::iterator I);
- void lowerSET_M0(MachineInstr &MI, MachineBasicBlock &MBB,
- MachineBasicBlock::iterator I);
- };
-} /* End anonymous namespace */
-
-char SILowerShaderInstructionsPass::ID = 0;
-
-FunctionPass *llvm::createSILowerShaderInstructionsPass(TargetMachine &tm) {
- return new SILowerShaderInstructionsPass(tm);
-}
-
-bool SILowerShaderInstructionsPass::runOnMachineFunction(MachineFunction &MF)
-{
- MRI = &MF.getRegInfo();
- for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end();
- BB != BB_E; ++BB) {
- MachineBasicBlock &MBB = *BB;
- for (MachineBasicBlock::iterator I = MBB.begin(), Next = llvm::next(I);
- I != MBB.end(); I = Next, Next = llvm::next(I) ) {
- MachineInstr &MI = *I;
- switch (MI.getOpcode()) {
- case AMDIL::RETURN:
- lowerRETURN(MBB, I);
- break;
- case AMDIL::SET_M0:
- lowerSET_M0(MI, MBB, I);
- break;
- default: continue;
- }
- MI.removeFromParent();
- }
- }
-
- return false;
-}
-
-void SILowerShaderInstructionsPass::lowerRETURN(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator I)
-{
- const struct TargetInstrInfo * TII = TM.getInstrInfo();
- BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(AMDIL::S_ENDPGM));
-}
-
-void SILowerShaderInstructionsPass::lowerSET_M0(MachineInstr &MI,
- MachineBasicBlock &MBB, MachineBasicBlock::iterator I)
-{
- const struct TargetInstrInfo * TII = TM.getInstrInfo();
- BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(AMDIL::S_MOV_IMM_I32))
- .addReg(AMDIL::M0)
- .addOperand(MI.getOperand(1));
-}
diff --git a/src/gallium/drivers/radeon/SIMachineFunctionInfo.cpp b/src/gallium/drivers/radeon/SIMachineFunctionInfo.cpp
index eace40c226c..40ba76f1f86 100644
--- a/src/gallium/drivers/radeon/SIMachineFunctionInfo.cpp
+++ b/src/gallium/drivers/radeon/SIMachineFunctionInfo.cpp
@@ -1,4 +1,4 @@
-//===-- SIMachineFunctionInfo.cpp - TODO: Add brief description -------===//
+//===-- SIMachineFunctionInfo.cpp - SI Machine Function Info -------===//
//
// The LLVM Compiler Infrastructure
//
@@ -6,10 +6,6 @@
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
-//
-// TODO: Add full description
-//
-//===----------------------------------------------------------------------===//
#include "SIMachineFunctionInfo.h"
diff --git a/src/gallium/drivers/radeon/SIMachineFunctionInfo.h b/src/gallium/drivers/radeon/SIMachineFunctionInfo.h
index 5647de9d81f..46a021f3613 100644
--- a/src/gallium/drivers/radeon/SIMachineFunctionInfo.h
+++ b/src/gallium/drivers/radeon/SIMachineFunctionInfo.h
@@ -1,4 +1,4 @@
-//===-- SIMachineFunctionInfo.h - TODO: Add brief description -------===//
+//===- SIMachineFunctionInfo.h - SIMachineFunctionInfo interface -*- C++ -*-==//
//
// The LLVM Compiler Infrastructure
//
@@ -7,7 +7,9 @@
//
//===----------------------------------------------------------------------===//
//
-// TODO: Add full description
+// SIMachineFunctionInfo is used to keep track of the spi_sp_input_addr config
+// register, which is to tell the hardware which interpolation parameters to
+// load.
//
//===----------------------------------------------------------------------===//
diff --git a/src/gallium/drivers/radeon/SIPropagateImmReads.cpp b/src/gallium/drivers/radeon/SIPropagateImmReads.cpp
index 4f925d5de1c..6a165488831 100644
--- a/src/gallium/drivers/radeon/SIPropagateImmReads.cpp
+++ b/src/gallium/drivers/radeon/SIPropagateImmReads.cpp
@@ -1,4 +1,4 @@
-//===-- SIPropagateImmReads.cpp - TODO: Add brief description -------===//
+//===-- SIPropagateImmReads.cpp - Lower Immediate Reads Pass --------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -7,7 +7,9 @@
//
//===----------------------------------------------------------------------===//
//
-// TODO: Add full description
+// We can't do this in the ConvertToISA pass, because later passes might
+// create LOADCONST_* instructions that we would miss. This is why we need
+// a separate pass for this.
//
//===----------------------------------------------------------------------===//
diff --git a/src/gallium/drivers/radeon/SIRegisterInfo.cpp b/src/gallium/drivers/radeon/SIRegisterInfo.cpp
index da2ec36a773..2d530a4f022 100644
--- a/src/gallium/drivers/radeon/SIRegisterInfo.cpp
+++ b/src/gallium/drivers/radeon/SIRegisterInfo.cpp
@@ -1,4 +1,4 @@
-//===-- SIRegisterInfo.cpp - TODO: Add brief description -------===//
+//===-- SIRegisterInfo.cpp - SI Register Information ---------------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -7,7 +7,7 @@
//
//===----------------------------------------------------------------------===//
//
-// TODO: Add full description
+// This file contains the SI implementation of the TargetRegisterInfo class.
//
//===----------------------------------------------------------------------===//
diff --git a/src/gallium/drivers/radeon/SIRegisterInfo.h b/src/gallium/drivers/radeon/SIRegisterInfo.h
index c797e3c8ace..77f3261efc5 100644
--- a/src/gallium/drivers/radeon/SIRegisterInfo.h
+++ b/src/gallium/drivers/radeon/SIRegisterInfo.h
@@ -1,4 +1,4 @@
-//===-- SIRegisterInfo.h - TODO: Add brief description -------===//
+//===-- SIRegisterInfo.h - SI Register Info Interface ----------*- C++ -*--===//
//
// The LLVM Compiler Infrastructure
//
@@ -7,7 +7,7 @@
//
//===----------------------------------------------------------------------===//
//
-// TODO: Add full description
+// Interface definition for SIRegisterInfo
//
//===----------------------------------------------------------------------===//
diff --git a/src/gallium/drivers/radeon/SISchedule.td b/src/gallium/drivers/radeon/SISchedule.td
index 9e99268e9ca..28b65b82585 100644
--- a/src/gallium/drivers/radeon/SISchedule.td
+++ b/src/gallium/drivers/radeon/SISchedule.td
@@ -1,4 +1,4 @@
-//===-- SISchedule.td - TODO: Add brief description -------===//
+//===-- SISchedule.td - SI Scheduling definitons -------------------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -7,7 +7,7 @@
//
//===----------------------------------------------------------------------===//
//
-// TODO: Add full description
+// TODO: This is just a place holder for now.
//
//===----------------------------------------------------------------------===//
diff --git a/src/gallium/drivers/radeon/radeon_llvm.h b/src/gallium/drivers/radeon/radeon_llvm.h
index 9be7f90c3e6..4a706397fdd 100644
--- a/src/gallium/drivers/radeon/radeon_llvm.h
+++ b/src/gallium/drivers/radeon/radeon_llvm.h
@@ -36,6 +36,8 @@
#define RADEON_LLVM_MAX_BRANCH_DEPTH 16
#define RADEON_LLVM_MAX_LOOP_DEPTH 16
+#define RADEON_LLVM_MAX_SYSTEM_VALUES 4
+
struct radeon_llvm_branch {
LLVMBasicBlockRef endif_block;
LLVMBasicBlockRef if_block;
@@ -78,6 +80,9 @@ struct radeon_llvm_context {
unsigned input_index,
const struct tgsi_full_declaration *decl);
+ void (*load_system_value)(struct radeon_llvm_context *,
+ unsigned index,
+ const struct tgsi_full_declaration *decl);
/** User data to use with the callbacks */
void * userdata;
@@ -90,6 +95,8 @@ struct radeon_llvm_context {
LLVMValueRef outputs[RADEON_LLVM_MAX_OUTPUTS][TGSI_NUM_CHANNELS];
unsigned output_reg_count;
+ LLVMValueRef system_values[RADEON_LLVM_MAX_SYSTEM_VALUES];
+
unsigned reserved_reg_count;
/*=== Private Members ===*/
@@ -105,6 +112,37 @@ struct radeon_llvm_context {
struct gallivm_state gallivm;
};
+static inline LLVMValueRef bitcast(
+ struct lp_build_tgsi_context * bld_base,
+ enum tgsi_opcode_type type,
+ LLVMValueRef value
+)
+{
+ LLVMBuilderRef builder = bld_base->base.gallivm->builder;
+ LLVMContextRef ctx = bld_base->base.gallivm->context;
+ LLVMTypeRef dst_type;
+
+ switch (type) {
+ case TGSI_TYPE_UNSIGNED:
+ case TGSI_TYPE_SIGNED:
+ dst_type = LLVMInt32TypeInContext(ctx);
+ break;
+ case TGSI_TYPE_UNTYPED:
+ case TGSI_TYPE_FLOAT:
+ dst_type = LLVMFloatTypeInContext(ctx);
+ break;
+ default:
+ dst_type = 0;
+ break;
+ }
+
+ if (dst_type)
+ return LLVMBuildBitCast(builder, value, dst_type, "");
+ else
+ return value;
+}
+
+
void radeon_llvm_context_init(struct radeon_llvm_context * ctx);
void radeon_llvm_dispose(struct radeon_llvm_context * ctx);
diff --git a/src/gallium/drivers/radeon/radeon_llvm_emit.cpp b/src/gallium/drivers/radeon/radeon_llvm_emit.cpp
index b409cb2175e..ebc32106b52 100644
--- a/src/gallium/drivers/radeon/radeon_llvm_emit.cpp
+++ b/src/gallium/drivers/radeon/radeon_llvm_emit.cpp
@@ -93,19 +93,20 @@ radeon_llvm_compile(LLVMModuleRef M, unsigned char ** bytes,
AMDGPUTriple.setArch(Arch);
Module * mod = unwrap(M);
- std::string FS = gpu_family;
+ std::string FS;
TargetOptions TO;
+ if (dump) {
+ mod->dump();
+ FS += "+DumpCode";
+ }
+
std::auto_ptr<TargetMachine> tm(AMDGPUTarget->createTargetMachine(
- AMDGPUTriple.getTriple(), gpu_family, "" /* Features */,
+ AMDGPUTriple.getTriple(), gpu_family, FS,
TO, Reloc::Default, CodeModel::Default,
CodeGenOpt::Default
));
TargetMachine &AMDGPUTargetMachine = *tm.get();
- /* XXX: Use TargetMachine.Options in 3.0 */
- if (dump) {
- mod->dump();
- }
PassManager PM;
PM.add(new TargetData(*AMDGPUTargetMachine.getTargetData()));
PM.add(createPromoteMemoryToRegisterPass());
diff --git a/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c b/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c
index 62de9da28de..6e6fc3d12cd 100644
--- a/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c
+++ b/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c
@@ -29,6 +29,7 @@
#include "gallivm/lp_bld_gather.h"
#include "gallivm/lp_bld_flow.h"
#include "gallivm/lp_bld_init.h"
+#include "gallivm/lp_bld_intr.h"
#include "gallivm/lp_bld_swizzle.h"
#include "tgsi/tgsi_info.h"
#include "tgsi/tgsi_parse.h"
@@ -112,8 +113,25 @@ emit_fetch_immediate(
enum tgsi_opcode_type type,
unsigned swizzle)
{
+ LLVMTypeRef ctype;
+ LLVMContextRef ctx = bld_base->base.gallivm->context;
+
+ switch (type) {
+ case TGSI_TYPE_UNSIGNED:
+ case TGSI_TYPE_SIGNED:
+ ctype = LLVMInt32TypeInContext(ctx);
+ break;
+ case TGSI_TYPE_UNTYPED:
+ case TGSI_TYPE_FLOAT:
+ ctype = LLVMFloatTypeInContext(ctx);
+ break;
+ default:
+ ctype = 0;
+ break;
+ }
+
struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base);
- return bld->immediates[reg->Register.Index][swizzle];
+ return LLVMConstBitCast(bld->immediates[reg->Register.Index][swizzle], ctype);
}
static LLVMValueRef
@@ -134,7 +152,7 @@ emit_fetch_input(
return lp_build_gather_values(bld_base->base.gallivm, values,
TGSI_NUM_CHANNELS);
} else {
- return ctx->inputs[radeon_llvm_reg_index_soa(reg->Register.Index, swizzle)];
+ return bitcast(bld_base, type, ctx->inputs[radeon_llvm_reg_index_soa(reg->Register.Index, swizzle)]);
}
}
@@ -155,7 +173,7 @@ emit_fetch_temporary(
} else {
LLVMValueRef temp_ptr;
temp_ptr = lp_get_temp_ptr_soa(bld, reg->Register.Index, swizzle);
- return LLVMBuildLoad(builder, temp_ptr, "");
+ return bitcast(bld_base,type,LLVMBuildLoad(builder, temp_ptr, ""));
}
}
@@ -213,6 +231,15 @@ static void emit_declaration(
}
break;
+ case TGSI_FILE_SYSTEM_VALUE:
+ {
+ unsigned idx;
+ for (idx = decl->Range.First; idx <= decl->Range.Last; idx++) {
+ ctx->load_system_value(ctx, idx, decl);
+ }
+ }
+ break;
+
case TGSI_FILE_OUTPUT:
{
unsigned idx;
@@ -304,6 +331,9 @@ emit_store(
default:
return;
}
+
+ value = bitcast(bld_base, TGSI_TYPE_FLOAT, value);
+
LLVMBuildStore(builder, value, temp_ptr);
}
}
@@ -444,8 +474,10 @@ static void if_emit(
struct gallivm_state * gallivm = bld_base->base.gallivm;
LLVMValueRef cond;
LLVMBasicBlockRef if_block, else_block, endif_block;
- cond = LLVMBuildFCmp(gallivm->builder, LLVMRealOEQ, emit_data->args[0],
- bld_base->base.one, "");
+
+ cond = LLVMBuildICmp(gallivm->builder, LLVMIntNE,
+ bitcast(bld_base, TGSI_TYPE_UNSIGNED, emit_data->args[0]),
+ bld_base->int_bld.zero, "");
endif_block = LLVMAppendBasicBlockInContext(gallivm->context,
ctx->main_fn, "ENDIF");
@@ -463,6 +495,101 @@ static void if_emit(
ctx->branch[ctx->branch_depth - 1].has_else = 0;
}
+static void kil_emit(
+ const struct lp_build_tgsi_action * action,
+ struct lp_build_tgsi_context * bld_base,
+ struct lp_build_emit_data * emit_data)
+{
+ unsigned i;
+ for (i = 0; i < emit_data->arg_count; i++) {
+ emit_data->output[i] = lp_build_intrinsic_unary(
+ bld_base->base.gallivm->builder,
+ action->intr_name,
+ emit_data->dst_type, emit_data->args[i]);
+ }
+}
+
+
+static void emit_prepare_cube_coords(
+ struct lp_build_tgsi_context * bld_base,
+ struct lp_build_emit_data * emit_data)
+{
+ boolean shadowcube = (emit_data->inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE);
+ struct gallivm_state * gallivm = bld_base->base.gallivm;
+ LLVMBuilderRef builder = gallivm->builder;
+ LLVMTypeRef type = bld_base->base.elem_type;
+ LLVMValueRef coords[4];
+ LLVMValueRef mad_args[3];
+ unsigned i, cnt;
+
+ LLVMValueRef v = lp_build_intrinsic(builder, "llvm.AMDGPU.cube",
+ LLVMVectorType(type, 4),
+ &emit_data->args[0],1);
+
+ /* save src.w for shadow cube */
+ cnt = shadowcube ? 3 : 4;
+
+ for (i = 0; i < cnt; ++i) {
+ LLVMValueRef idx = lp_build_const_int32(gallivm, i);
+ coords[i] = LLVMBuildExtractElement(builder, v, idx, "");
+ }
+
+ coords[2] = lp_build_intrinsic(builder, "llvm.AMDIL.fabs.",
+ type, &coords[2], 1);
+ coords[2] = lp_build_intrinsic(builder, "llvm.AMDGPU.rcp",
+ type, &coords[2], 1);
+
+ mad_args[1] = coords[2];
+ mad_args[2] = LLVMConstReal(type, 1.5);
+
+ mad_args[0] = coords[0];
+ coords[0] = lp_build_intrinsic(builder, "llvm.AMDIL.mad.",
+ type, mad_args, 3);
+
+ mad_args[0] = coords[1];
+ coords[1] = lp_build_intrinsic(builder, "llvm.AMDIL.mad.",
+ type, mad_args, 3);
+
+ /* apply yxwy swizzle to cooords */
+ coords[2] = coords[3];
+ coords[3] = coords[1];
+ coords[1] = coords[0];
+ coords[0] = coords[3];
+
+ emit_data->args[0] = lp_build_gather_values(bld_base->base.gallivm,
+ coords, 4);
+}
+
+static void txp_fetch_args(
+ struct lp_build_tgsi_context * bld_base,
+ struct lp_build_emit_data * emit_data)
+{
+ const struct tgsi_full_instruction * inst = emit_data->inst;
+ LLVMValueRef src_w;
+ unsigned chan;
+ LLVMValueRef coords[4];
+
+ emit_data->dst_type = LLVMVectorType(bld_base->base.elem_type, 4);
+ src_w = lp_build_emit_fetch(bld_base, emit_data->inst, 0, TGSI_CHAN_W);
+
+ for (chan = 0; chan < 3; chan++ ) {
+ LLVMValueRef arg = lp_build_emit_fetch(bld_base,
+ emit_data->inst, 0, chan);
+ coords[chan] = lp_build_emit_llvm_binary(bld_base,
+ TGSI_OPCODE_DIV, arg, src_w);
+ }
+ coords[3] = bld_base->base.one;
+ emit_data->args[0] = lp_build_gather_values(bld_base->base.gallivm,
+ coords, 4);
+ emit_data->arg_count = 1;
+
+ if ((inst->Texture.Texture == TGSI_TEXTURE_CUBE ||
+ inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE) &&
+ inst->Instruction.Opcode != TGSI_OPCODE_TXQ) {
+ emit_prepare_cube_coords(bld_base, emit_data);
+ }
+}
+
static void tex_fetch_args(
struct lp_build_tgsi_context * bld_base,
struct lp_build_emit_data * emit_data)
@@ -475,16 +602,261 @@ static void tex_fetch_args(
*/
+ const struct tgsi_full_instruction * inst = emit_data->inst;
+
LLVMValueRef coords[4];
unsigned chan;
for (chan = 0; chan < 4; chan++) {
- coords[chan] = lp_build_emit_fetch(bld_base, emit_data->inst, 0, chan);
+ coords[chan] = lp_build_emit_fetch(bld_base, inst, 0, chan);
}
emit_data->arg_count = 1;
emit_data->args[0] = lp_build_gather_values(bld_base->base.gallivm,
coords, 4);
emit_data->dst_type = LLVMVectorType(bld_base->base.elem_type, 4);
+
+ if ((inst->Texture.Texture == TGSI_TEXTURE_CUBE ||
+ inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE) &&
+ inst->Instruction.Opcode != TGSI_OPCODE_TXQ) {
+ emit_prepare_cube_coords(bld_base, emit_data);
+ }
+}
+
+static void emit_icmp(
+ const struct lp_build_tgsi_action * action,
+ struct lp_build_tgsi_context * bld_base,
+ struct lp_build_emit_data * emit_data)
+{
+ unsigned pred;
+ LLVMBuilderRef builder = bld_base->base.gallivm->builder;
+ LLVMContextRef context = bld_base->base.gallivm->context;
+
+ switch (emit_data->inst->Instruction.Opcode) {
+ case TGSI_OPCODE_USEQ: pred = LLVMIntEQ; break;
+ case TGSI_OPCODE_USNE: pred = LLVMIntNE; break;
+ case TGSI_OPCODE_USGE: pred = LLVMIntUGE; break;
+ case TGSI_OPCODE_USLT: pred = LLVMIntULT; break;
+ case TGSI_OPCODE_ISGE: pred = LLVMIntSGE; break;
+ case TGSI_OPCODE_ISLT: pred = LLVMIntSLT; break;
+ default:
+ assert(!"unknown instruction");
+ }
+
+ LLVMValueRef v = LLVMBuildICmp(builder, pred,
+ emit_data->args[0], emit_data->args[1],"");
+
+ v = LLVMBuildSExtOrBitCast(builder, v,
+ LLVMInt32TypeInContext(context), "");
+
+ emit_data->output[emit_data->chan] = v;
+}
+
+static void emit_not(
+ const struct lp_build_tgsi_action * action,
+ struct lp_build_tgsi_context * bld_base,
+ struct lp_build_emit_data * emit_data)
+{
+ LLVMBuilderRef builder = bld_base->base.gallivm->builder;
+ LLVMValueRef v = bitcast(bld_base, TGSI_TYPE_UNSIGNED,
+ emit_data->args[0]);
+ emit_data->output[emit_data->chan] = LLVMBuildNot(builder, v, "");
+}
+
+static void emit_and(
+ const struct lp_build_tgsi_action * action,
+ struct lp_build_tgsi_context * bld_base,
+ struct lp_build_emit_data * emit_data)
+{
+ LLVMBuilderRef builder = bld_base->base.gallivm->builder;
+ emit_data->output[emit_data->chan] = LLVMBuildAnd(builder,
+ emit_data->args[0], emit_data->args[1], "");
+}
+
+static void emit_or(
+ const struct lp_build_tgsi_action * action,
+ struct lp_build_tgsi_context * bld_base,
+ struct lp_build_emit_data * emit_data)
+{
+ LLVMBuilderRef builder = bld_base->base.gallivm->builder;
+ emit_data->output[emit_data->chan] = LLVMBuildOr(builder,
+ emit_data->args[0], emit_data->args[1], "");
+}
+
+static void emit_uadd(
+ const struct lp_build_tgsi_action * action,
+ struct lp_build_tgsi_context * bld_base,
+ struct lp_build_emit_data * emit_data)
+{
+ LLVMBuilderRef builder = bld_base->base.gallivm->builder;
+ emit_data->output[emit_data->chan] = LLVMBuildAdd(builder,
+ emit_data->args[0], emit_data->args[1], "");
+}
+
+static void emit_udiv(
+ const struct lp_build_tgsi_action * action,
+ struct lp_build_tgsi_context * bld_base,
+ struct lp_build_emit_data * emit_data)
+{
+ LLVMBuilderRef builder = bld_base->base.gallivm->builder;
+ emit_data->output[emit_data->chan] = LLVMBuildUDiv(builder,
+ emit_data->args[0], emit_data->args[1], "");
+}
+
+static void emit_idiv(
+ const struct lp_build_tgsi_action * action,
+ struct lp_build_tgsi_context * bld_base,
+ struct lp_build_emit_data * emit_data)
+{
+ LLVMBuilderRef builder = bld_base->base.gallivm->builder;
+ emit_data->output[emit_data->chan] = LLVMBuildSDiv(builder,
+ emit_data->args[0], emit_data->args[1], "");
+}
+
+static void emit_mod(
+ const struct lp_build_tgsi_action * action,
+ struct lp_build_tgsi_context * bld_base,
+ struct lp_build_emit_data * emit_data)
+{
+ LLVMBuilderRef builder = bld_base->base.gallivm->builder;
+ emit_data->output[emit_data->chan] = LLVMBuildSRem(builder,
+ emit_data->args[0], emit_data->args[1], "");
+}
+
+static void emit_umod(
+ const struct lp_build_tgsi_action * action,
+ struct lp_build_tgsi_context * bld_base,
+ struct lp_build_emit_data * emit_data)
+{
+ LLVMBuilderRef builder = bld_base->base.gallivm->builder;
+ emit_data->output[emit_data->chan] = LLVMBuildURem(builder,
+ emit_data->args[0], emit_data->args[1], "");
+}
+
+static void emit_shl(
+ const struct lp_build_tgsi_action * action,
+ struct lp_build_tgsi_context * bld_base,
+ struct lp_build_emit_data * emit_data)
+{
+ LLVMBuilderRef builder = bld_base->base.gallivm->builder;
+ emit_data->output[emit_data->chan] = LLVMBuildShl(builder,
+ emit_data->args[0], emit_data->args[1], "");
+}
+
+static void emit_ushr(
+ const struct lp_build_tgsi_action * action,
+ struct lp_build_tgsi_context * bld_base,
+ struct lp_build_emit_data * emit_data)
+{
+ LLVMBuilderRef builder = bld_base->base.gallivm->builder;
+ emit_data->output[emit_data->chan] = LLVMBuildLShr(builder,
+ emit_data->args[0], emit_data->args[1], "");
+}
+static void emit_ishr(
+ const struct lp_build_tgsi_action * action,
+ struct lp_build_tgsi_context * bld_base,
+ struct lp_build_emit_data * emit_data)
+{
+ LLVMBuilderRef builder = bld_base->base.gallivm->builder;
+ emit_data->output[emit_data->chan] = LLVMBuildAShr(builder,
+ emit_data->args[0], emit_data->args[1], "");
+}
+
+static void emit_xor(
+ const struct lp_build_tgsi_action * action,
+ struct lp_build_tgsi_context * bld_base,
+ struct lp_build_emit_data * emit_data)
+{
+ LLVMBuilderRef builder = bld_base->base.gallivm->builder;
+ emit_data->output[emit_data->chan] = LLVMBuildXor(builder,
+ emit_data->args[0], emit_data->args[1], "");
+}
+
+static void emit_ssg(
+ const struct lp_build_tgsi_action * action,
+ struct lp_build_tgsi_context * bld_base,
+ struct lp_build_emit_data * emit_data)
+{
+ LLVMBuilderRef builder = bld_base->base.gallivm->builder;
+
+ LLVMValueRef cmp, val;
+
+ if (emit_data->inst->Instruction.Opcode == TGSI_OPCODE_ISSG) {
+ cmp = LLVMBuildICmp(builder, LLVMIntSGT, emit_data->args[0], bld_base->int_bld.zero, "");
+ val = LLVMBuildSelect(builder, cmp, bld_base->int_bld.one, emit_data->args[0], "");
+ cmp = LLVMBuildICmp(builder, LLVMIntSGE, val, bld_base->int_bld.zero, "");
+ val = LLVMBuildSelect(builder, cmp, val, LLVMConstInt(bld_base->int_bld.elem_type, -1, true), "");
+ } else { // float SSG
+ cmp = LLVMBuildFCmp(builder, LLVMRealUGT, emit_data->args[0], bld_base->int_bld.zero, "");
+ val = LLVMBuildSelect(builder, cmp, bld_base->base.one, emit_data->args[0], "");
+ cmp = LLVMBuildFCmp(builder, LLVMRealUGE, val, bld_base->base.zero, "");
+ val = LLVMBuildSelect(builder, cmp, val, LLVMConstReal(bld_base->base.elem_type, -1), "");
+ }
+
+ emit_data->output[emit_data->chan] = val;
+}
+
+static void emit_ineg(
+ const struct lp_build_tgsi_action * action,
+ struct lp_build_tgsi_context * bld_base,
+ struct lp_build_emit_data * emit_data)
+{
+ LLVMBuilderRef builder = bld_base->base.gallivm->builder;
+ emit_data->output[emit_data->chan] = LLVMBuildNeg(builder,
+ emit_data->args[0], "");
+}
+
+static void emit_f2i(
+ const struct lp_build_tgsi_action * action,
+ struct lp_build_tgsi_context * bld_base,
+ struct lp_build_emit_data * emit_data)
+{
+ LLVMBuilderRef builder = bld_base->base.gallivm->builder;
+ emit_data->output[emit_data->chan] = LLVMBuildFPToSI(builder,
+ emit_data->args[0], bld_base->int_bld.elem_type, "");
+}
+
+static void emit_f2u(
+ const struct lp_build_tgsi_action * action,
+ struct lp_build_tgsi_context * bld_base,
+ struct lp_build_emit_data * emit_data)
+{
+ LLVMBuilderRef builder = bld_base->base.gallivm->builder;
+ emit_data->output[emit_data->chan] = LLVMBuildFPToUI(builder,
+ emit_data->args[0], bld_base->uint_bld.elem_type, "");
+}
+
+static void emit_i2f(
+ const struct lp_build_tgsi_action * action,
+ struct lp_build_tgsi_context * bld_base,
+ struct lp_build_emit_data * emit_data)
+{
+ LLVMBuilderRef builder = bld_base->base.gallivm->builder;
+ emit_data->output[emit_data->chan] = LLVMBuildSIToFP(builder,
+ emit_data->args[0], bld_base->base.elem_type, "");
+}
+
+static void emit_u2f(
+ const struct lp_build_tgsi_action * action,
+ struct lp_build_tgsi_context * bld_base,
+ struct lp_build_emit_data * emit_data)
+{
+ LLVMBuilderRef builder = bld_base->base.gallivm->builder;
+ emit_data->output[emit_data->chan] = LLVMBuildUIToFP(builder,
+ emit_data->args[0], bld_base->base.elem_type, "");
+}
+
+static void emit_immediate(struct lp_build_tgsi_context * bld_base,
+ const struct tgsi_full_immediate *imm)
+{
+ unsigned i;
+ struct radeon_llvm_context * ctx = radeon_llvm_context(bld_base);
+
+ for (i = 0; i < 4; ++i) {
+ ctx->soa.immediates[ctx->soa.num_immediates][i] =
+ LLVMConstInt(bld_base->uint_bld.elem_type, imm->u[i].Uint, false );
+ }
+
+ ctx->soa.num_immediates++;
}
void radeon_llvm_context_init(struct radeon_llvm_context * ctx)
@@ -526,12 +898,13 @@ void radeon_llvm_context_init(struct radeon_llvm_context * ctx)
lp_build_context_init(&bld_base->base, &ctx->gallivm, type);
lp_build_context_init(&ctx->soa.bld_base.uint_bld, &ctx->gallivm, lp_uint_type(type));
+ lp_build_context_init(&ctx->soa.bld_base.int_bld, &ctx->gallivm, lp_int_type(type));
bld_base->soa = 1;
bld_base->emit_store = emit_store;
bld_base->emit_swizzle = emit_swizzle;
bld_base->emit_declaration = emit_declaration;
- bld_base->emit_immediate = lp_emit_immediate_soa;
+ bld_base->emit_immediate = emit_immediate;
bld_base->emit_fetch_funcs[TGSI_FILE_IMMEDIATE] = emit_fetch_immediate;
bld_base->emit_fetch_funcs[TGSI_FILE_INPUT] = emit_fetch_input;
@@ -545,6 +918,60 @@ void radeon_llvm_context_init(struct radeon_llvm_context * ctx)
lp_set_default_actions(bld_base);
+ bld_base->op_actions[TGSI_OPCODE_IABS].emit = lp_build_tgsi_intrinsic;
+ bld_base->op_actions[TGSI_OPCODE_IABS].intr_name = "llvm.AMDIL.abs.";
+ bld_base->op_actions[TGSI_OPCODE_NOT].emit = emit_not;
+ bld_base->op_actions[TGSI_OPCODE_AND].emit = emit_and;
+ bld_base->op_actions[TGSI_OPCODE_XOR].emit = emit_xor;
+ bld_base->op_actions[TGSI_OPCODE_OR].emit = emit_or;
+ bld_base->op_actions[TGSI_OPCODE_UADD].emit = emit_uadd;
+ bld_base->op_actions[TGSI_OPCODE_UDIV].emit = emit_udiv;
+ bld_base->op_actions[TGSI_OPCODE_IDIV].emit = emit_idiv;
+ bld_base->op_actions[TGSI_OPCODE_MOD].emit = emit_mod;
+ bld_base->op_actions[TGSI_OPCODE_UMOD].emit = emit_umod;
+ bld_base->op_actions[TGSI_OPCODE_INEG].emit = emit_ineg;
+ bld_base->op_actions[TGSI_OPCODE_SHL].emit = emit_shl;
+ bld_base->op_actions[TGSI_OPCODE_ISHR].emit = emit_ishr;
+ bld_base->op_actions[TGSI_OPCODE_USHR].emit = emit_ushr;
+ bld_base->op_actions[TGSI_OPCODE_SSG].emit = emit_ssg;
+ bld_base->op_actions[TGSI_OPCODE_ISSG].emit = emit_ssg;
+ bld_base->op_actions[TGSI_OPCODE_I2F].emit = emit_i2f;
+ bld_base->op_actions[TGSI_OPCODE_U2F].emit = emit_u2f;
+ bld_base->op_actions[TGSI_OPCODE_F2I].emit = emit_f2i;
+ bld_base->op_actions[TGSI_OPCODE_F2U].emit = emit_f2u;
+ bld_base->op_actions[TGSI_OPCODE_DDX].intr_name = "llvm.AMDGPU.ddx";
+ bld_base->op_actions[TGSI_OPCODE_DDX].fetch_args = tex_fetch_args;
+ bld_base->op_actions[TGSI_OPCODE_DDY].intr_name = "llvm.AMDGPU.ddy";
+ bld_base->op_actions[TGSI_OPCODE_DDY].fetch_args = tex_fetch_args;
+ bld_base->op_actions[TGSI_OPCODE_USEQ].emit = emit_icmp;
+ bld_base->op_actions[TGSI_OPCODE_USGE].emit = emit_icmp;
+ bld_base->op_actions[TGSI_OPCODE_USLT].emit = emit_icmp;
+ bld_base->op_actions[TGSI_OPCODE_USNE].emit = emit_icmp;
+ bld_base->op_actions[TGSI_OPCODE_ISGE].emit = emit_icmp;
+ bld_base->op_actions[TGSI_OPCODE_ISLT].emit = emit_icmp;
+ bld_base->op_actions[TGSI_OPCODE_ROUND].emit = lp_build_tgsi_intrinsic;
+ bld_base->op_actions[TGSI_OPCODE_ROUND].intr_name = "llvm.AMDIL.round.nearest.";
+ bld_base->op_actions[TGSI_OPCODE_MIN].emit = lp_build_tgsi_intrinsic;
+ bld_base->op_actions[TGSI_OPCODE_MIN].intr_name = "llvm.AMDIL.min.";
+ bld_base->op_actions[TGSI_OPCODE_MAX].emit = lp_build_tgsi_intrinsic;
+ bld_base->op_actions[TGSI_OPCODE_MAX].intr_name = "llvm.AMDIL.max.";
+ bld_base->op_actions[TGSI_OPCODE_IMIN].emit = lp_build_tgsi_intrinsic;
+ bld_base->op_actions[TGSI_OPCODE_IMIN].intr_name = "llvm.AMDGPU.imin";
+ bld_base->op_actions[TGSI_OPCODE_IMAX].emit = lp_build_tgsi_intrinsic;
+ bld_base->op_actions[TGSI_OPCODE_IMAX].intr_name = "llvm.AMDGPU.imax";
+ bld_base->op_actions[TGSI_OPCODE_UMIN].emit = lp_build_tgsi_intrinsic;
+ bld_base->op_actions[TGSI_OPCODE_UMIN].intr_name = "llvm.AMDGPU.umin";
+ bld_base->op_actions[TGSI_OPCODE_UMAX].emit = lp_build_tgsi_intrinsic;
+ bld_base->op_actions[TGSI_OPCODE_UMAX].intr_name = "llvm.AMDGPU.umax";
+ bld_base->op_actions[TGSI_OPCODE_TXF].fetch_args = tex_fetch_args;
+ bld_base->op_actions[TGSI_OPCODE_TXF].intr_name = "llvm.AMDGPU.txf";
+ bld_base->op_actions[TGSI_OPCODE_TXQ].fetch_args = tex_fetch_args;
+ bld_base->op_actions[TGSI_OPCODE_TXQ].intr_name = "llvm.AMDGPU.txq";
+ bld_base->op_actions[TGSI_OPCODE_CEIL].emit = lp_build_tgsi_intrinsic;
+ bld_base->op_actions[TGSI_OPCODE_CEIL].intr_name = "llvm.AMDIL.round.neginf.";
+
+
+
bld_base->op_actions[TGSI_OPCODE_ABS].emit = lp_build_tgsi_intrinsic;
bld_base->op_actions[TGSI_OPCODE_ABS].intr_name = "llvm.AMDIL.fabs.";
bld_base->op_actions[TGSI_OPCODE_ARL].emit = lp_build_tgsi_intrinsic;
@@ -558,10 +985,6 @@ void radeon_llvm_context_init(struct radeon_llvm_context * ctx)
bld_base->op_actions[TGSI_OPCODE_CMP].intr_name = "llvm.AMDGPU.cndlt";
bld_base->op_actions[TGSI_OPCODE_COS].emit = lp_build_tgsi_intrinsic;
bld_base->op_actions[TGSI_OPCODE_COS].intr_name = "llvm.AMDGPU.cos";
- bld_base->op_actions[TGSI_OPCODE_DDX].emit = lp_build_tgsi_intrinsic;
- bld_base->op_actions[TGSI_OPCODE_DDX].intr_name = "llvm.AMDGPU.ddx";
- bld_base->op_actions[TGSI_OPCODE_DDY].emit = lp_build_tgsi_intrinsic;
- bld_base->op_actions[TGSI_OPCODE_DDY].intr_name = "llvm.AMDGPU.ddy";
bld_base->op_actions[TGSI_OPCODE_DIV].emit = lp_build_tgsi_intrinsic;
bld_base->op_actions[TGSI_OPCODE_DIV].intr_name = "llvm.AMDGPU.div";
bld_base->op_actions[TGSI_OPCODE_ELSE].emit = else_emit;
@@ -574,7 +997,7 @@ void radeon_llvm_context_init(struct radeon_llvm_context * ctx)
bld_base->op_actions[TGSI_OPCODE_FRC].emit = lp_build_tgsi_intrinsic;
bld_base->op_actions[TGSI_OPCODE_FRC].intr_name = "llvm.AMDIL.fraction.";
bld_base->op_actions[TGSI_OPCODE_IF].emit = if_emit;
- bld_base->op_actions[TGSI_OPCODE_KIL].emit = lp_build_tgsi_intrinsic;
+ bld_base->op_actions[TGSI_OPCODE_KIL].emit = kil_emit;
bld_base->op_actions[TGSI_OPCODE_KIL].intr_name = "llvm.AMDGPU.kill";
bld_base->op_actions[TGSI_OPCODE_KILP].emit = lp_build_tgsi_intrinsic;
bld_base->op_actions[TGSI_OPCODE_KILP].intr_name = "llvm.AMDGPU.kilp";
@@ -597,7 +1020,7 @@ void radeon_llvm_context_init(struct radeon_llvm_context * ctx)
bld_base->op_actions[TGSI_OPCODE_SSG].emit = lp_build_tgsi_intrinsic;
bld_base->op_actions[TGSI_OPCODE_SSG].intr_name = "llvm.AMDGPU.ssg";
bld_base->op_actions[TGSI_OPCODE_SGE].emit = lp_build_tgsi_intrinsic;
- bld_base->op_actions[TGSI_OPCODE_SGE].intr_name = "llvm.AMDGPU.sge.";
+ bld_base->op_actions[TGSI_OPCODE_SGE].intr_name = "llvm.AMDGPU.sge";
bld_base->op_actions[TGSI_OPCODE_SEQ].emit = lp_build_tgsi_intrinsic;
bld_base->op_actions[TGSI_OPCODE_SEQ].intr_name = "llvm.AMDGPU.seq";
bld_base->op_actions[TGSI_OPCODE_SLE].fetch_args = radeon_llvm_fetch_args_2_reverse_soa;
@@ -620,6 +1043,7 @@ void radeon_llvm_context_init(struct radeon_llvm_context * ctx)
bld_base->op_actions[TGSI_OPCODE_TXD].intr_name = "llvm.AMDGPU.txd";
bld_base->op_actions[TGSI_OPCODE_TXL].fetch_args = tex_fetch_args;
bld_base->op_actions[TGSI_OPCODE_TXL].intr_name = "llvm.AMDGPU.txl";
+ bld_base->op_actions[TGSI_OPCODE_TXP].fetch_args = txp_fetch_args;
bld_base->op_actions[TGSI_OPCODE_TXP].intr_name = "llvm.AMDGPU.tex";
bld_base->op_actions[TGSI_OPCODE_TRUNC].emit = lp_build_tgsi_intrinsic;
bld_base->op_actions[TGSI_OPCODE_TRUNC].intr_name = "llvm.AMDGPU.trunc";