mesa: Changes to 'upstream-experimental'
Rebased ref, commits from common ancestor:
commit 1597176f7090eea73f41b3114ae2a02a50ac7a12
Author: Vinson Lee <vlee@freedesktop.org>
Date: Sun Aug 12 00:20:21 2012 -0700
llvmpipe: Silence Coverity incorrect sizeof expression defect.
Signed-off-by: Vinson Lee <vlee@freedesktop.org>
Reviewed-by: José Fonseca <jfonseca@vmware.com>
diff --git a/src/gallium/drivers/llvmpipe/lp_state_setup.c b/src/gallium/drivers/llvmpipe/lp_state_setup.c
index 1d5e50b..469a459 100644
--- a/src/gallium/drivers/llvmpipe/lp_state_setup.c
+++ b/src/gallium/drivers/llvmpipe/lp_state_setup.c
@@ -585,7 +585,7 @@ set_noalias(LLVMBuilderRef builder,
int nr_args)
{
int i;
- for(i = 0; i < Elements(arg_types); ++i)
+ for(i = 0; i < nr_args; ++i)
if(LLVMGetTypeKind(arg_types[i]) == LLVMPointerTypeKind)
LLVMAddAttribute(LLVMGetParam(function, i),
LLVMNoAliasAttribute);
commit 3d6892c4793ae1fd1ef82d40e4af8004393b0480
Author: Vinson Lee <vlee@freedesktop.org>
Date: Sun Aug 12 00:02:32 2012 -0700
scons: Add option to enable floating-point textures.
Signed-off-by: Vinson Lee <vlee@freedesktop.org>
Reviewed-by: José Fonseca <jfonseca@vmware.com>
diff --git a/common.py b/common.py
index 6121a41..6ff9608 100644
--- a/common.py
+++ b/common.py
@@ -98,5 +98,6 @@ def AddOptions(opts):
opts.Add(BoolOption('debug', 'DEPRECATED: debug build', 'yes'))
opts.Add(BoolOption('profile', 'DEPRECATED: profile build', 'no'))
opts.Add(BoolOption('quiet', 'DEPRECATED: profile build', 'yes'))
+ opts.Add(BoolOption('texture_float', 'enable floating-point textures and renderbuffers', 'no'))
if host_platform == 'windows':
opts.Add(EnumOption('MSVS_VERSION', 'MS Visual C++ version', None, allowed_values=('7.1', '8.0', '9.0')))
diff --git a/scons/gallium.py b/scons/gallium.py
index f4776d8..8fc17b3 100755
--- a/scons/gallium.py
+++ b/scons/gallium.py
@@ -298,6 +298,10 @@ def generate(env):
cppdefines += ['BEOS_THREADS']
if env['embedded']:
cppdefines += ['PIPE_SUBSYSTEM_EMBEDDED']
+ if env['texture_float']:
+ print 'warning: Floating-point textures enabled.'
+ print 'warning: Please consult docs/patents.txt with your lawyer before building Mesa.'
+ cppdefines += ['TEXTURE_FLOAT_ENABLED']
env.Append(CPPDEFINES = cppdefines)
# C compiler options
commit 6a3ac03f2b80c80655d66b31c0218754f70156de
Author: Dave Airlie <airlied@redhat.com>
Date: Fri Mar 12 16:09:23 2010 +1000
glx/dri2: add dri2 prime support.
This adds support for having libGL pick a different driver for prime support.
DRI_PRIME env var is set to the value retrieved from the server randr
provider calls, by the calling process. (generally DRI_PRIME=1 will be
the right answer).
Signed-off-by: Dave Airlie <airlied@redhat.com>
diff --git a/src/glx/dri2.c b/src/glx/dri2.c
index b1b5013..d6b99db 100644
--- a/src/glx/dri2.c
+++ b/src/glx/dri2.c
@@ -269,6 +269,7 @@ DRI2Connect(Display * dpy, XID window, char **driverName, char **deviceName)
XExtDisplayInfo *info = DRI2FindDisplay(dpy);
xDRI2ConnectReply rep;
xDRI2ConnectReq *req;
+ char *prime;
XextCheckExtension(dpy, info, dri2ExtensionName, False);
@@ -277,7 +278,19 @@ DRI2Connect(Display * dpy, XID window, char **driverName, char **deviceName)
req->reqType = info->codes->major_opcode;
req->dri2ReqType = X_DRI2Connect;
req->window = window;
+
req->driverType = DRI2DriverDRI;
+#ifdef DRI2DriverPrimeShift
+ prime = getenv("DRI_PRIME");
+ if (prime) {
+ uint32_t primeid;
+ errno = 0;
+ primeid = strtoul(prime, NULL, 0);
+ if (errno == 0)
+ req->driverType |= ((primeid & DRI2DriverPrimeMask) << DRI2DriverPrimeShift);
+ }
+#endif
+
if (!_XReply(dpy, (xReply *) & rep, 0, xFalse)) {
UnlockDisplay(dpy);
SyncHandle();
commit 565a4e2a8625c79bde0eacf674a4f633151eeb0e
Author: Vincent Lejeune <vljn@ovi.com>
Date: Wed Aug 1 22:49:44 2012 +0200
radeon/llvm: Enable if-cvt
Signed-off-by: Tom Stellard <thomas.stellard@amd.com>
diff --git a/src/gallium/drivers/radeon/AMDGPUTargetMachine.cpp b/src/gallium/drivers/radeon/AMDGPUTargetMachine.cpp
index 6b50128..b97c0fe 100644
--- a/src/gallium/drivers/radeon/AMDGPUTargetMachine.cpp
+++ b/src/gallium/drivers/radeon/AMDGPUTargetMachine.cpp
@@ -29,6 +29,7 @@
#include "llvm/Support/raw_os_ostream.h"
#include "llvm/Transforms/IPO.h"
#include "llvm/Transforms/Scalar.h"
+#include <llvm/CodeGen/Passes.h>
using namespace llvm;
@@ -148,6 +149,8 @@ bool AMDGPUPassConfig::addPostRegAlloc() {
}
bool AMDGPUPassConfig::addPreSched2() {
+
+ addPass(IfConverterID);
return false;
}
commit a614979286f8d329af318c1e9fb067e17cab4315
Author: Vincent Lejeune <vljn@ovi.com>
Date: Wed Aug 1 22:49:43 2012 +0200
radeon/llvm: Add callbacks needed by if-cvt
Signed-off-by: Tom Stellard <thomas.stellard@amd.com>
diff --git a/src/gallium/drivers/radeon/R600InstrInfo.cpp b/src/gallium/drivers/radeon/R600InstrInfo.cpp
index 4a396ef..7a8a58e 100644
--- a/src/gallium/drivers/radeon/R600InstrInfo.cpp
+++ b/src/gallium/drivers/radeon/R600InstrInfo.cpp
@@ -17,7 +17,6 @@
#include "R600RegisterInfo.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "AMDILUtilityFunctions.h"
-#include "AMDGPUUtil.h"
#define GET_INSTRINFO_CTOR
#include "AMDGPUGenDFAPacketizer.inc"
@@ -391,7 +390,6 @@ R600InstrInfo::isPredicated(const MachineInstr *MI) const
if (idx < 0)
return false;
- MI->dump();
unsigned Reg = MI->getOperand(idx).getReg();
switch (Reg) {
default: return false;
@@ -407,3 +405,117 @@ R600InstrInfo::isPredicable(MachineInstr *MI) const
{
return AMDGPUInstrInfo::isPredicable(MI);
}
+
+
+bool
+R600InstrInfo::isProfitableToIfCvt(MachineBasicBlock &MBB,
+ unsigned NumCyles,
+ unsigned ExtraPredCycles,
+ const BranchProbability &Probability) const{
+ return true;
+}
+
+bool
+R600InstrInfo::isProfitableToIfCvt(MachineBasicBlock &TMBB,
+ unsigned NumTCycles,
+ unsigned ExtraTCycles,
+ MachineBasicBlock &FMBB,
+ unsigned NumFCycles,
+ unsigned ExtraFCycles,
+ const BranchProbability &Probability) const
+{
+ return true;
+}
+
+bool
+R600InstrInfo::isProfitableToDupForIfCvt(MachineBasicBlock &MBB,
+ unsigned NumCyles,
+ const BranchProbability &Probability)
+ const
+{
+ return true;
+}
+
+bool
+R600InstrInfo::isProfitableToUnpredicate(MachineBasicBlock &TMBB,
+ MachineBasicBlock &FMBB) const
+{
+ return false;
+}
+
+
+bool
+R600InstrInfo::ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const
+{
+ MachineOperand &MO = Cond[1];
+ switch (MO.getImm()) {
+ case OPCODE_IS_ZERO_INT:
+ MO.setImm(OPCODE_IS_NOT_ZERO_INT);
+ break;
+ case OPCODE_IS_NOT_ZERO_INT:
+ MO.setImm(OPCODE_IS_ZERO_INT);
+ break;
+ case OPCODE_IS_ZERO:
+ MO.setImm(OPCODE_IS_NOT_ZERO);
+ break;
+ case OPCODE_IS_NOT_ZERO:
+ MO.setImm(OPCODE_IS_ZERO);
+ break;
+ default:
+ return true;
+ }
+
+ MachineOperand &MO2 = Cond[2];
+ switch (MO2.getReg()) {
+ case AMDGPU::PRED_SEL_ZERO:
+ MO2.setReg(AMDGPU::PRED_SEL_ONE);
+ break;
+ case AMDGPU::PRED_SEL_ONE:
+ MO2.setReg(AMDGPU::PRED_SEL_ZERO);
+ break;
+ default:
+ return true;
+ }
+ return false;
+}
+
+bool
+R600InstrInfo::DefinesPredicate(MachineInstr *MI,
+ std::vector<MachineOperand> &Pred) const
+{
+ return isPredicateSetter(MI->getOpcode());
+}
+
+
+bool
+R600InstrInfo::SubsumesPredicate(const SmallVectorImpl<MachineOperand> &Pred1,
+ const SmallVectorImpl<MachineOperand> &Pred2) const
+{
+ return false;
+}
+
+
+bool
+R600InstrInfo::PredicateInstruction(MachineInstr *MI,
+ const SmallVectorImpl<MachineOperand> &Pred) const
+{
+ int PIdx = MI->findFirstPredOperandIdx();
+
+ if (PIdx != -1) {
+ MachineOperand &PMO = MI->getOperand(PIdx);
+ PMO.setReg(Pred[2].getReg());
+ MachineInstrBuilder(MI).addReg(AMDGPU::PREDICATE_BIT, RegState::Implicit);
+ return true;
+ }
+
+ return false;
+}
+
+int R600InstrInfo::getInstrLatency(const InstrItineraryData *ItinData,
+ const MachineInstr *MI,
+ unsigned *PredCost) const
+{
+ if (PredCost)
+ *PredCost = 2;
+ return 2;
+}
diff --git a/src/gallium/drivers/radeon/R600InstrInfo.h b/src/gallium/drivers/radeon/R600InstrInfo.h
index 2819b0b..6e184cc 100644
--- a/src/gallium/drivers/radeon/R600InstrInfo.h
+++ b/src/gallium/drivers/radeon/R600InstrInfo.h
@@ -64,6 +64,9 @@ namespace llvm {
DFAPacketizer *CreateTargetScheduleState(const TargetMachine *TM,
const ScheduleDAG *DAG) const;
+
+ bool ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const;
+
bool AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB,
SmallVectorImpl<MachineOperand> &Cond, bool AllowModify) const;
@@ -74,6 +77,40 @@ namespace llvm {
bool isPredicated(const MachineInstr *MI) const;
bool isPredicable(MachineInstr *MI) const;
+
+ bool
+ isProfitableToDupForIfCvt(MachineBasicBlock &MBB, unsigned NumCyles,
+ const BranchProbability &Probability) const;
+
+ bool isProfitableToIfCvt(MachineBasicBlock &MBB, unsigned NumCyles,
+ unsigned ExtraPredCycles,
+ const BranchProbability &Probability) const ;
+
+ bool
+ isProfitableToIfCvt(MachineBasicBlock &TMBB,
+ unsigned NumTCycles, unsigned ExtraTCycles,
+ MachineBasicBlock &FMBB,
+ unsigned NumFCycles, unsigned ExtraFCycles,
+ const BranchProbability &Probability) const;
+
+ bool DefinesPredicate(MachineInstr *MI,
+ std::vector<MachineOperand> &Pred) const;
+
+ bool SubsumesPredicate(const SmallVectorImpl<MachineOperand> &Pred1,
+ const SmallVectorImpl<MachineOperand> &Pred2) const;
+
+ bool isProfitableToUnpredicate(MachineBasicBlock &TMBB,
+ MachineBasicBlock &FMBB) const;
+
+ bool PredicateInstruction(MachineInstr *MI,
+ const SmallVectorImpl<MachineOperand> &Pred) const;
+
+ int getInstrLatency(const InstrItineraryData *ItinData,
+ const MachineInstr *MI,
+ unsigned *PredCost = 0) const;
+
+ virtual int getInstrLatency(const InstrItineraryData *ItinData,
+ SDNode *Node) const { return 1;}
};
} // End llvm namespace
commit 0eca5fd919b0a31ea926b5f5072e5e56f7a55269
Author: Vincent Lejeune <vljn@ovi.com>
Date: Wed Aug 1 22:49:42 2012 +0200
radeon/llvm: Lower branch/branch_cond into predicated jump
Signed-off-by: Tom Stellard <thomas.stellard@amd.com>
diff --git a/src/gallium/drivers/radeon/AMDGPUInstrInfo.cpp b/src/gallium/drivers/radeon/AMDGPUInstrInfo.cpp
index 2af0367..03a647e 100644
--- a/src/gallium/drivers/radeon/AMDGPUInstrInfo.cpp
+++ b/src/gallium/drivers/radeon/AMDGPUInstrInfo.cpp
@@ -97,124 +97,6 @@ bool AMDGPUInstrInfo::getNextBranchInstr(MachineBasicBlock::iterator &iter,
return false;
}
-bool AMDGPUInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
- MachineBasicBlock *&TBB,
- MachineBasicBlock *&FBB,
- SmallVectorImpl<MachineOperand> &Cond,
- bool AllowModify) const {
- bool retVal = true;
- return retVal;
- MachineBasicBlock::iterator iter = MBB.begin();
- if (!getNextBranchInstr(iter, MBB)) {
- retVal = false;
- } else {
- MachineInstr *firstBranch = iter;
- if (!getNextBranchInstr(++iter, MBB)) {
- if (firstBranch->getOpcode() == AMDGPU::BRANCH) {
- TBB = firstBranch->getOperand(0).getMBB();
- firstBranch->eraseFromParent();
- retVal = false;
- } else {
- TBB = firstBranch->getOperand(0).getMBB();
- FBB = *(++MBB.succ_begin());
- if (FBB == TBB) {
- FBB = *(MBB.succ_begin());
- }
- Cond.push_back(firstBranch->getOperand(1));
- retVal = false;
- }
- } else {
- MachineInstr *secondBranch = iter;
- if (!getNextBranchInstr(++iter, MBB)) {
- if (secondBranch->getOpcode() == AMDGPU::BRANCH) {
- TBB = firstBranch->getOperand(0).getMBB();
- Cond.push_back(firstBranch->getOperand(1));
- FBB = secondBranch->getOperand(0).getMBB();
- secondBranch->eraseFromParent();
- retVal = false;
- } else {
- assert(0 && "Should not have two consecutive conditional branches");
- }
- } else {
- MBB.getParent()->viewCFG();
- assert(0 && "Should not have three branch instructions in"
- " a single basic block");
- retVal = false;
- }
- }
- }
- return retVal;
-}
-
-unsigned int AMDGPUInstrInfo::getBranchInstr(const MachineOperand &op) const {
- const MachineInstr *MI = op.getParent();
-
- switch (MI->getDesc().OpInfo->RegClass) {
- default: // FIXME: fallthrough??
- case AMDGPU::GPRI32RegClassID: return AMDGPU::BRANCH_COND_i32;
- case AMDGPU::GPRF32RegClassID: return AMDGPU::BRANCH_COND_f32;
- };
-}
-
-unsigned int
-AMDGPUInstrInfo::InsertBranch(MachineBasicBlock &MBB,
- MachineBasicBlock *TBB,
- MachineBasicBlock *FBB,
- const SmallVectorImpl<MachineOperand> &Cond,
- DebugLoc DL) const
-{
- assert(TBB && "InsertBranch must not be told to insert a fallthrough");
- for (unsigned int x = 0; x < Cond.size(); ++x) {
- Cond[x].getParent()->dump();
- }
- if (FBB == 0) {
- if (Cond.empty()) {
- BuildMI(&MBB, DL, get(AMDGPU::BRANCH)).addMBB(TBB);
- } else {
- BuildMI(&MBB, DL, get(getBranchInstr(Cond[0])))
- .addMBB(TBB).addReg(Cond[0].getReg());
- }
- return 1;
- } else {
- BuildMI(&MBB, DL, get(getBranchInstr(Cond[0])))
- .addMBB(TBB).addReg(Cond[0].getReg());
- BuildMI(&MBB, DL, get(AMDGPU::BRANCH)).addMBB(FBB);
- }
- assert(0 && "Inserting two branches not supported");
- return 0;
-}
-
-unsigned int AMDGPUInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const {
- MachineBasicBlock::iterator I = MBB.end();
- if (I == MBB.begin()) {
- return 0;
- }
- --I;
- switch (I->getOpcode()) {
- default:
- return 0;
- ExpandCaseToAllScalarTypes(AMDGPU::BRANCH_COND);
- case AMDGPU::BRANCH:
- I->eraseFromParent();
- break;
- }
- I = MBB.end();
-
- if (I == MBB.begin()) {
- return 1;
- }
- --I;
- switch (I->getOpcode()) {
- // FIXME: only one case??
- default:
- return 1;
- ExpandCaseToAllScalarTypes(AMDGPU::BRANCH_COND);
- I->eraseFromParent();
- break;
- }
- return 2;
-}
-
MachineBasicBlock::iterator skipFlowControl(MachineBasicBlock *MBB) {
MachineBasicBlock::iterator tmp = MBB->end();
if (!MBB->size()) {
diff --git a/src/gallium/drivers/radeon/AMDGPUInstrInfo.h b/src/gallium/drivers/radeon/AMDGPUInstrInfo.h
index 28952cf..31400a7 100644
--- a/src/gallium/drivers/radeon/AMDGPUInstrInfo.h
+++ b/src/gallium/drivers/radeon/AMDGPUInstrInfo.h
@@ -49,8 +49,6 @@ private:
TargetMachine &TM;
bool getNextBranchInstr(MachineBasicBlock::iterator &iter,
MachineBasicBlock &MBB) const;
- unsigned int getBranchInstr(const MachineOperand &op) const;
-
public:
explicit AMDGPUInstrInfo(TargetMachine &tm);
@@ -77,18 +75,6 @@ public:
MachineBasicBlock::iterator &MBBI,
LiveVariables *LV) const;
- bool AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB,
- MachineBasicBlock *&FBB,
- SmallVectorImpl<MachineOperand> &Cond,
- bool AllowModify) const;
-
- unsigned RemoveBranch(MachineBasicBlock &MBB) const;
-
- unsigned
- InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
- MachineBasicBlock *FBB,
- const SmallVectorImpl<MachineOperand> &Cond,
- DebugLoc DL) const;
virtual void copyPhysReg(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI, DebugLoc DL,
diff --git a/src/gallium/drivers/radeon/AMDILCFGStructurizer.cpp b/src/gallium/drivers/radeon/AMDILCFGStructurizer.cpp
index 95a75ac..b167d62 100644
--- a/src/gallium/drivers/radeon/AMDILCFGStructurizer.cpp
+++ b/src/gallium/drivers/radeon/AMDILCFGStructurizer.cpp
@@ -301,6 +301,7 @@ public:
bool prepare(FuncT &Func, PassT &Pass, const AMDGPURegisterInfo *tri);
private:
+ void reversePredicateSetter(typename BlockT::iterator);
void orderBlocks();
void printOrderedBlocks(llvm::raw_ostream &OS);
int patternMatch(BlockT *CurBlock);
@@ -1664,6 +1665,31 @@ void CFGStructurizer<PassT>::mergeLooplandBlock(BlockT *dstBlk,
} //mergeLooplandBlock
template<class PassT>
+void CFGStructurizer<PassT>::reversePredicateSetter(typename BlockT::iterator I)
+{
+ while (I--) {
+ if (I->getOpcode() == AMDGPU::PRED_X) {
+ switch (static_cast<MachineInstr *>(I)->getOperand(2).getImm()) {
+ case OPCODE_IS_ZERO_INT:
+ static_cast<MachineInstr *>(I)->getOperand(2).setImm(OPCODE_IS_NOT_ZERO_INT);
+ return;
+ case OPCODE_IS_NOT_ZERO_INT:
+ static_cast<MachineInstr *>(I)->getOperand(2).setImm(OPCODE_IS_ZERO_INT);
+ return;
+ case OPCODE_IS_ZERO:
+ static_cast<MachineInstr *>(I)->getOperand(2).setImm(OPCODE_IS_NOT_ZERO);
+ return;
+ case OPCODE_IS_NOT_ZERO:
+ static_cast<MachineInstr *>(I)->getOperand(2).setImm(OPCODE_IS_ZERO);
+ return;
+ default:
+ assert(0 && "PRED_X Opcode invalid!");
+ }
+ }
+ }
+}
+
+template<class PassT>
void CFGStructurizer<PassT>::mergeLoopbreakBlock(BlockT *exitingBlk,
BlockT *exitBlk,
BlockT *exitLandBlk,
@@ -1695,14 +1721,17 @@ void CFGStructurizer<PassT>::mergeLoopbreakBlock(BlockT *exitingBlk,
if (exitBlk == exitLandBlk && setReg == INVALIDREGNUM) {
//break_logical
- int newOpcode =
- (trueBranch == exitBlk) ? CFGTraits::getBreakNzeroOpcode(oldOpcode)
- : CFGTraits::getBreakZeroOpcode(oldOpcode);
+
+ if (trueBranch != exitBlk) {
+ reversePredicateSetter(branchInstrPos);
+ }
+ int newOpcode = CFGTraits::getBreakZeroOpcode(oldOpcode);
CFGTraits::insertCondBranchBefore(branchInstrPos, newOpcode, passRep, DL);
} else {
- int newOpcode =
- (trueBranch == exitBlk) ? CFGTraits::getBranchNzeroOpcode(oldOpcode)
- : CFGTraits::getBranchZeroOpcode(oldOpcode);
+ if (trueBranch != exitBlk) {
+ reversePredicateSetter(branchInstr);
+ }
+ int newOpcode = CFGTraits::getBreakZeroOpcode(oldOpcode);
CFGTraits::insertCondBranchBefore(branchInstrPos, newOpcode, passRep, DL);
if (exitBlk != exitLandBlk) {
//splice is insert-before ...
@@ -2765,7 +2794,7 @@ struct CFGStructTraits<AMDGPUCFGStructurizer>
static int getBreakNzeroOpcode(int oldOpcode) {
switch(oldOpcode) {
- ExpandCaseToAllScalarReturn(AMDGPU::BRANCH_COND, AMDGPU::BREAK_LOGICALNZ);
+ case AMDGPU::JUMP: return AMDGPU::BREAK_LOGICALNZ_i32;
default:
assert(0 && "internal error");
};
@@ -2774,7 +2803,7 @@ struct CFGStructTraits<AMDGPUCFGStructurizer>
static int getBreakZeroOpcode(int oldOpcode) {
switch(oldOpcode) {
- ExpandCaseToAllScalarReturn(AMDGPU::BRANCH_COND, AMDGPU::BREAK_LOGICALZ);
+ case AMDGPU::JUMP: return AMDGPU::BREAK_LOGICALZ_i32;
default:
assert(0 && "internal error");
};
@@ -2783,6 +2812,7 @@ struct CFGStructTraits<AMDGPUCFGStructurizer>
static int getBranchNzeroOpcode(int oldOpcode) {
switch(oldOpcode) {
+ case AMDGPU::JUMP: return AMDGPU::IF_LOGICALNZ_i32;
ExpandCaseToAllScalarReturn(AMDGPU::BRANCH_COND, AMDGPU::IF_LOGICALNZ);
case AMDGPU::SI_IF_NZ: return AMDGPU::SI_IF_NZ;
default:
@@ -2793,6 +2823,7 @@ struct CFGStructTraits<AMDGPUCFGStructurizer>
static int getBranchZeroOpcode(int oldOpcode) {
switch(oldOpcode) {
+ case AMDGPU::JUMP: return AMDGPU::IF_LOGICALZ_i32;
ExpandCaseToAllScalarReturn(AMDGPU::BRANCH_COND, AMDGPU::IF_LOGICALZ);
case AMDGPU::SI_IF_Z: return AMDGPU::SI_IF_Z;
default:
@@ -2804,7 +2835,7 @@ struct CFGStructTraits<AMDGPUCFGStructurizer>
static int getContinueNzeroOpcode(int oldOpcode)
{
switch(oldOpcode) {
- ExpandCaseToAllScalarReturn(AMDGPU::BRANCH_COND, AMDGPU::CONTINUE_LOGICALNZ);
+ case AMDGPU::JUMP: return AMDGPU::CONTINUE_LOGICALNZ_i32;
default:
assert(0 && "internal error");
};
@@ -2813,7 +2844,7 @@ struct CFGStructTraits<AMDGPUCFGStructurizer>
static int getContinueZeroOpcode(int oldOpcode) {
switch(oldOpcode) {
- ExpandCaseToAllScalarReturn(AMDGPU::BRANCH_COND, AMDGPU::CONTINUE_LOGICALZ);
+ case AMDGPU::JUMP: return AMDGPU::CONTINUE_LOGICALZ_i32;
default:
assert(0 && "internal error");
};
@@ -2845,6 +2876,8 @@ struct CFGStructTraits<AMDGPUCFGStructurizer>
static bool isCondBranch(MachineInstr *instr) {
switch (instr->getOpcode()) {
+ case AMDGPU::JUMP:
+ return instr->getOperand(instr->findFirstPredOperandIdx()).getReg() != 0;
ExpandCaseToAllScalarTypes(AMDGPU::BRANCH_COND);
case AMDGPU::SI_IF_NZ:
case AMDGPU::SI_IF_Z:
@@ -2857,8 +2890,8 @@ struct CFGStructTraits<AMDGPUCFGStructurizer>
static bool isUncondBranch(MachineInstr *instr) {
switch (instr->getOpcode()) {
- case AMDGPU::BRANCH:
- break;
+ case AMDGPU::JUMP:
+ return instr->getOperand(instr->findFirstPredOperandIdx()).getReg() == 0;
default:
return false;
}
diff --git a/src/gallium/drivers/radeon/AMDILInstrInfo.td b/src/gallium/drivers/radeon/AMDILInstrInfo.td
index b2a0541..b683e49 100644
--- a/src/gallium/drivers/radeon/AMDILInstrInfo.td
+++ b/src/gallium/drivers/radeon/AMDILInstrInfo.td
@@ -217,7 +217,7 @@ include "AMDILIntrinsics.td"
// Custom Inserter for Branches and returns, this eventually will be a
// seperate pass
//===---------------------------------------------------------------------===//
-let isTerminator = 1 in {
+let isTerminator = 1, usesCustomInserter = 1 in {
def BRANCH : ILFormat<(outs), (ins brtarget:$target),
"; Pseudo unconditional branch instruction",
[(br bb:$target)]>;
diff --git a/src/gallium/drivers/radeon/R600ISelLowering.cpp b/src/gallium/drivers/radeon/R600ISelLowering.cpp
index 26f14fa..1f5f417 100644
--- a/src/gallium/drivers/radeon/R600ISelLowering.cpp
+++ b/src/gallium/drivers/radeon/R600ISelLowering.cpp
@@ -210,6 +210,33 @@ MachineBasicBlock * R600TargetLowering::EmitInstrWithCustomInserter(
.addReg(t1, RegState::Implicit);
break;
}
+ case AMDGPU::BRANCH:
+ BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP))
+ .addOperand(MI->getOperand(0))
+ .addReg(0);
+ break;
+ case AMDGPU::BRANCH_COND_f32:
+ MI->getOperand(1).addTargetFlag(MO_FLAG_PUSH);
+
+ BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::PRED_X))
+ .addReg(AMDGPU::PREDICATE_BIT)
+ .addOperand(MI->getOperand(1))
+ .addImm(OPCODE_IS_ZERO);
+ BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP))
+ .addOperand(MI->getOperand(0))
+ .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill);
+ break;
+ case AMDGPU::BRANCH_COND_i32:
+ MI->getOperand(1).addTargetFlag(MO_FLAG_PUSH);
+
+ BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::PRED_X))
+ .addReg(AMDGPU::PREDICATE_BIT)
+ .addOperand(MI->getOperand(1))
+ .addImm(OPCODE_IS_ZERO_INT);
+ BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP))
+ .addOperand(MI->getOperand(0))
+ .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill);
+ break;
}
diff --git a/src/gallium/drivers/radeon/R600InstrInfo.cpp b/src/gallium/drivers/radeon/R600InstrInfo.cpp
index c807d5c..4a396ef 100644
--- a/src/gallium/drivers/radeon/R600InstrInfo.cpp
+++ b/src/gallium/drivers/radeon/R600InstrInfo.cpp
@@ -17,6 +17,7 @@
#include "R600RegisterInfo.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "AMDILUtilityFunctions.h"
+#include "AMDGPUUtil.h"
#define GET_INSTRINFO_CTOR
#include "AMDGPUGenDFAPacketizer.inc"
@@ -94,6 +95,8 @@ unsigned R600InstrInfo::getIEQOpcode() const
bool R600InstrInfo::isMov(unsigned Opcode) const
{
+
+
switch(Opcode) {
default: return false;
case AMDGPU::MOV:
@@ -188,6 +191,199 @@ DFAPacketizer *R600InstrInfo::CreateTargetScheduleState(const TargetMachine *TM,
return TM->getSubtarget<AMDGPUSubtarget>().createDFAPacketizer(II);
}
+static bool
+isPredicateSetter(unsigned opcode)
+{
+ switch (opcode) {
+ case AMDGPU::PRED_X:
+ return true;
+ default:
+ return false;
+ }
+}
+
+static MachineInstr *
+findFirstPredicateSetterFrom(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I)
+{
+ while (I != MBB.begin()) {
+ --I;
+ MachineInstr *MI = I;
+ if (isPredicateSetter(MI->getOpcode()))
+ return MI;
+ }
+
+ return NULL;
+}
+
+bool
+R600InstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
+ MachineBasicBlock *&TBB,
+ MachineBasicBlock *&FBB,
+ SmallVectorImpl<MachineOperand> &Cond,
+ bool AllowModify) const
+{
+ // Most of the following comes from the ARM implementation of AnalyzeBranch
+
+ // If the block has no terminators, it just falls into the block after it.
+ MachineBasicBlock::iterator I = MBB.end();
+ if (I == MBB.begin())
+ return false;
+ --I;
+ while (I->isDebugValue()) {
+ if (I == MBB.begin())
+ return false;
+ --I;
+ }
+ if (static_cast<MachineInstr *>(I)->getOpcode() != AMDGPU::JUMP) {
+ return false;
+ }
+
+ // Get the last instruction in the block.
+ MachineInstr *LastInst = I;
+
+ // If there is only one terminator instruction, process it.
+ unsigned LastOpc = LastInst->getOpcode();
+ if (I == MBB.begin() ||
+ static_cast<MachineInstr *>(--I)->getOpcode() != AMDGPU::JUMP) {
+ if (LastOpc == AMDGPU::JUMP) {
+ if(!isPredicated(LastInst)) {
+ TBB = LastInst->getOperand(0).getMBB();
+ return false;
+ } else {
+ MachineInstr *predSet = I;
+ while (!isPredicateSetter(predSet->getOpcode())) {
+ predSet = --I;
+ }
+ TBB = LastInst->getOperand(0).getMBB();
+ Cond.push_back(predSet->getOperand(1));
+ Cond.push_back(predSet->getOperand(2));
+ Cond.push_back(MachineOperand::CreateReg(AMDGPU::PRED_SEL_ONE, false));
+ return false;
+ }
+ }
+ return true; // Can't handle indirect branch.
+ }
+
+ // Get the instruction before it if it is a terminator.
+ MachineInstr *SecondLastInst = I;
+ unsigned SecondLastOpc = SecondLastInst->getOpcode();
+
+ // If the block ends with a B and a Bcc, handle it.
+ if (SecondLastOpc == AMDGPU::JUMP &&
+ isPredicated(SecondLastInst) &&
+ LastOpc == AMDGPU::JUMP &&
+ !isPredicated(LastInst)) {
+ MachineInstr *predSet = --I;
+ while (!isPredicateSetter(predSet->getOpcode())) {
+ predSet = --I;
+ }
+ TBB = SecondLastInst->getOperand(0).getMBB();
+ FBB = LastInst->getOperand(0).getMBB();
+ Cond.push_back(predSet->getOperand(1));
+ Cond.push_back(predSet->getOperand(2));
+ Cond.push_back(MachineOperand::CreateReg(AMDGPU::PRED_SEL_ONE, false));
+ return false;
+ }
+
+ // Otherwise, can't handle this.
+ return true;
+}
+
+int R600InstrInfo::getBranchInstr(const MachineOperand &op) const {
+ const MachineInstr *MI = op.getParent();
+
+ switch (MI->getDesc().OpInfo->RegClass) {
+ default: // FIXME: fallthrough??
+ case AMDGPU::GPRI32RegClassID: return AMDGPU::BRANCH_COND_i32;
+ case AMDGPU::GPRF32RegClassID: return AMDGPU::BRANCH_COND_f32;
+ };
+}
+
+unsigned
+R600InstrInfo::InsertBranch(MachineBasicBlock &MBB,
+ MachineBasicBlock *TBB,
+ MachineBasicBlock *FBB,
+ const SmallVectorImpl<MachineOperand> &Cond,
+ DebugLoc DL) const
+{
+ assert(TBB && "InsertBranch must not be told to insert a fallthrough");
+
+ if (FBB == 0) {
+ if (Cond.empty()) {
+ BuildMI(&MBB, DL, get(AMDGPU::JUMP)).addMBB(TBB).addReg(0);
+ return 1;
+ } else {
+ MachineInstr *PredSet = findFirstPredicateSetterFrom(MBB, MBB.end());
+ assert(PredSet && "No previous predicate !");
+ PredSet->getOperand(1).addTargetFlag(1<<4);
+ PredSet->getOperand(2).setImm(Cond[1].getImm());
+
+ BuildMI(&MBB, DL, get(AMDGPU::JUMP))
+ .addMBB(TBB)
+ .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill);
+ return 1;
+ }
+ } else {
+ MachineInstr *PredSet = findFirstPredicateSetterFrom(MBB, MBB.end());
+ assert(PredSet && "No previous predicate !");
+ PredSet->getOperand(1).addTargetFlag(1<<4);
+ PredSet->getOperand(2).setImm(Cond[1].getImm());
+ BuildMI(&MBB, DL, get(AMDGPU::JUMP))
+ .addMBB(TBB)
+ .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill);
+ BuildMI(&MBB, DL, get(AMDGPU::JUMP)).addMBB(FBB).addReg(0);
+ return 2;
+ }
+}
+
+unsigned
+R600InstrInfo::RemoveBranch(MachineBasicBlock &MBB) const
+{
+
+ // Note : we leave PRED* instructions there.
+ // They may be needed when predicating instructions.
+
+ MachineBasicBlock::iterator I = MBB.end();
+
+ if (I == MBB.begin()) {
+ return 0;
+ }
+ --I;
+ switch (I->getOpcode()) {
+ default:
+ return 0;
+ case AMDGPU::JUMP:
+ if (isPredicated(I)) {
+ MachineInstr *predSet = findFirstPredicateSetterFrom(MBB, I);
+ char flag = predSet->getOperand(1).getTargetFlags() & (~(1<<4));
+ predSet->getOperand(1).setTargetFlags(flag);
+ }
+ I->eraseFromParent();
+ break;
+ }
+ I = MBB.end();
+
+ if (I == MBB.begin()) {
+ return 1;
+ }
+ --I;
+ switch (I->getOpcode()) {
+ // FIXME: only one case??
+ default:
+ return 1;
+ case AMDGPU::JUMP:
+ if (isPredicated(I)) {
+ MachineInstr *predSet = findFirstPredicateSetterFrom(MBB, I);
+ char flag = predSet->getOperand(1).getTargetFlags() & (~(1<<4));
+ predSet->getOperand(1).setTargetFlags(flag);
+ }
+ I->eraseFromParent();
+ break;
+ }
+ return 2;
+}
+
bool
R600InstrInfo::isPredicated(const MachineInstr *MI) const
{
diff --git a/src/gallium/drivers/radeon/R600InstrInfo.h b/src/gallium/drivers/radeon/R600InstrInfo.h
index 9bdda7a..2819b0b 100644
--- a/src/gallium/drivers/radeon/R600InstrInfo.h
+++ b/src/gallium/drivers/radeon/R600InstrInfo.h
@@ -34,6 +34,8 @@ namespace llvm {
const R600RegisterInfo RI;
AMDGPUTargetMachine &TM;
+ int getBranchInstr(const MachineOperand &op) const;
+
public:
explicit R600InstrInfo(AMDGPUTargetMachine &tm);
@@ -62,6 +64,13 @@ namespace llvm {
DFAPacketizer *CreateTargetScheduleState(const TargetMachine *TM,
const ScheduleDAG *DAG) const;
+ bool AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB,
+ SmallVectorImpl<MachineOperand> &Cond, bool AllowModify) const;
+
+ unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, const SmallVectorImpl<MachineOperand> &Cond, DebugLoc DL) const;
+
+ unsigned RemoveBranch(MachineBasicBlock &MBB) const;
+
bool isPredicated(const MachineInstr *MI) const;
bool isPredicable(MachineInstr *MI) const;
commit 6db2e9fdb0a35e27e6fc86a1485918b78717a425
Author: Vincent Lejeune <vljn@ovi.com>
Date: Wed Aug 1 22:49:41 2012 +0200
radeon/llvm: Add a predicated JUMP instruction
Signed-off-by: Tom Stellard <thomas.stellard@amd.com>
diff --git a/src/gallium/drivers/radeon/R600Instructions.td b/src/gallium/drivers/radeon/R600Instructions.td
index 6f2ab1f..1819742 100644
--- a/src/gallium/drivers/radeon/R600Instructions.td
+++ b/src/gallium/drivers/radeon/R600Instructions.td
@@ -121,6 +121,15 @@ def PRED_X : AMDGPUInst <(outs R600_Predicate_Bit:$dst),
+let isTerminator = 1, isBranch = 1 in {
+def JUMP : InstR600 <0x10,
+ (outs),
+ (ins brtarget:$target, R600_Pred:$p),
+ "JUMP $target ($p)",
+ [], AnyALU
+ >;
+}
+
class R600_REDUCTION <bits<32> inst, dag ins, string asm, list<dag> pattern,
InstrItinClass itin = VecALU> :
InstR600 <inst,
commit 8263408a91b6b3beb5af5de6bdc7e5d13197a268
Author: Vincent Lejeune <vljn@ovi.com>
Date: Wed Aug 1 22:49:40 2012 +0200
radeon/llvm: Support for predicate bit
Tom Stellard:
- A few changes to predicate register defs
Signed-off-by: Tom Stellard <thomas.stellard@amd.com>
diff --git a/src/gallium/drivers/radeon/AMDGPUInstrInfo.h b/src/gallium/drivers/radeon/AMDGPUInstrInfo.h
index 7232c0b..28952cf 100644
--- a/src/gallium/drivers/radeon/AMDGPUInstrInfo.h
+++ b/src/gallium/drivers/radeon/AMDGPUInstrInfo.h
@@ -29,6 +29,12 @@
#define MO_FLAG_NEG (1 << 1)
#define MO_FLAG_ABS (1 << 2)
#define MO_FLAG_MASK (1 << 3)
+#define MO_FLAG_PUSH (1 << 4)
+
+#define OPCODE_IS_ZERO_INT 0x00000045
+#define OPCODE_IS_NOT_ZERO_INT 0x00000042
Reply to: