1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
|
//===-- SIAssignInterpRegs.cpp - Assign interpolation registers -----------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This pass maps the pseudo interpolation registers to the correct physical
// registers. Prior to executing a fragment shader, the GPU loads interpolation
// parameters into physical registers. The specific physical register that each
// interpolation parameter ends up in depends on the type of the interpolation
// parameter as well as how many interpolation parameters are used by the
// shader.
//
//===----------------------------------------------------------------------===//
#include "AMDGPU.h"
#include "AMDIL.h"
#include "SIMachineFunctionInfo.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
using namespace llvm;
namespace {
class SIAssignInterpRegsPass : public MachineFunctionPass {
private:
static char ID;
TargetMachine &TM;
void addLiveIn(MachineFunction * MF, MachineRegisterInfo & MRI,
unsigned physReg, unsigned virtReg);
public:
SIAssignInterpRegsPass(TargetMachine &tm) :
MachineFunctionPass(ID), TM(tm) { }
virtual bool runOnMachineFunction(MachineFunction &MF);
const char *getPassName() const { return "SI Assign intrpolation registers"; }
};
} // End anonymous namespace
char SIAssignInterpRegsPass::ID = 0;
#define INTERP_VALUES 16
#define REQUIRED_VALUE_MAX_INDEX 7
struct InterpInfo {
bool Enabled;
unsigned Regs[3];
unsigned RegCount;
};
FunctionPass *llvm::createSIAssignInterpRegsPass(TargetMachine &tm) {
return new SIAssignInterpRegsPass(tm);
}
bool SIAssignInterpRegsPass::runOnMachineFunction(MachineFunction &MF)
{
struct InterpInfo InterpUse[INTERP_VALUES] = {
{false, {AMDGPU::PERSP_SAMPLE_I, AMDGPU::PERSP_SAMPLE_J}, 2},
{false, {AMDGPU::PERSP_CENTER_I, AMDGPU::PERSP_CENTER_J}, 2},
{false, {AMDGPU::PERSP_CENTROID_I, AMDGPU::PERSP_CENTROID_J}, 2},
{false, {AMDGPU::PERSP_I_W, AMDGPU::PERSP_J_W, AMDGPU::PERSP_1_W}, 3},
{false, {AMDGPU::LINEAR_SAMPLE_I, AMDGPU::LINEAR_SAMPLE_J}, 2},
{false, {AMDGPU::LINEAR_CENTER_I, AMDGPU::LINEAR_CENTER_J}, 2},
{false, {AMDGPU::LINEAR_CENTROID_I, AMDGPU::LINEAR_CENTROID_J}, 2},
{false, {AMDGPU::LINE_STIPPLE_TEX_COORD}, 1},
{false, {AMDGPU::POS_X_FLOAT}, 1},
{false, {AMDGPU::POS_Y_FLOAT}, 1},
{false, {AMDGPU::POS_Z_FLOAT}, 1},
{false, {AMDGPU::POS_W_FLOAT}, 1},
{false, {AMDGPU::FRONT_FACE}, 1},
{false, {AMDGPU::ANCILLARY}, 1},
{false, {AMDGPU::SAMPLE_COVERAGE}, 1},
{false, {AMDGPU::POS_FIXED_PT}, 1}
};
SIMachineFunctionInfo * MFI = MF.getInfo<SIMachineFunctionInfo>();
// This pass is only needed for pixel shaders.
if (MFI->ShaderType != ShaderType::PIXEL) {
return false;
}
MachineRegisterInfo &MRI = MF.getRegInfo();
bool ForceEnable = true;
// First pass, mark the interpolation values that are used.
for (unsigned InterpIdx = 0; InterpIdx < INTERP_VALUES; InterpIdx++) {
for (unsigned RegIdx = 0; RegIdx < InterpUse[InterpIdx].RegCount;
RegIdx++) {
InterpUse[InterpIdx].Enabled = InterpUse[InterpIdx].Enabled ||
!MRI.use_empty(InterpUse[InterpIdx].Regs[RegIdx]);
if (InterpUse[InterpIdx].Enabled &&
InterpIdx <= REQUIRED_VALUE_MAX_INDEX) {
ForceEnable = false;
}
}
}
// At least one interpolation mode must be enabled or else the GPU will hang.
if (ForceEnable) {
InterpUse[0].Enabled = true;
}
unsigned UsedVgprs = 0;
// Second pass, replace with VGPRs.
for (unsigned InterpIdx = 0; InterpIdx < INTERP_VALUES; InterpIdx++) {
if (!InterpUse[InterpIdx].Enabled) {
continue;
}
MFI->SPIPSInputAddr |= (1 << InterpIdx);
for (unsigned RegIdx = 0; RegIdx < InterpUse[InterpIdx].RegCount;
RegIdx++, UsedVgprs++) {
unsigned NewReg = AMDGPU::VReg_32RegClass.getRegister(UsedVgprs);
unsigned VirtReg = MRI.createVirtualRegister(&AMDGPU::VReg_32RegClass);
MRI.replaceRegWith(InterpUse[InterpIdx].Regs[RegIdx], VirtReg);
addLiveIn(&MF, MRI, NewReg, VirtReg);
}
}
return false;
}
void SIAssignInterpRegsPass::addLiveIn(MachineFunction * MF,
MachineRegisterInfo & MRI,
unsigned physReg, unsigned virtReg)
{
const TargetInstrInfo * TII = TM.getInstrInfo();
if (!MRI.isLiveIn(physReg)) {
MRI.addLiveIn(physReg, virtReg);
MF->front().addLiveIn(physReg);
BuildMI(MF->front(), MF->front().begin(), DebugLoc(),
TII->get(TargetOpcode::COPY), virtReg)
.addReg(physReg);
} else {
MRI.replaceRegWith(virtReg, MRI.getLiveInVirtReg(physReg));
}
}
|