[Date Prev][Date Next] [Thread Prev][Thread Next] [Date Index] [Thread Index]

mesa: Changes to 'upstream-experimental'



Rebased ref, commits from common ancestor:
commit 1597176f7090eea73f41b3114ae2a02a50ac7a12
Author: Vinson Lee <vlee@freedesktop.org>
Date:   Sun Aug 12 00:20:21 2012 -0700

    llvmpipe: Silence Coverity incorrect sizeof expression defect.
    
    Signed-off-by: Vinson Lee <vlee@freedesktop.org>
    Reviewed-by: José Fonseca <jfonseca@vmware.com>

diff --git a/src/gallium/drivers/llvmpipe/lp_state_setup.c b/src/gallium/drivers/llvmpipe/lp_state_setup.c
index 1d5e50b..469a459 100644
--- a/src/gallium/drivers/llvmpipe/lp_state_setup.c
+++ b/src/gallium/drivers/llvmpipe/lp_state_setup.c
@@ -585,7 +585,7 @@ set_noalias(LLVMBuilderRef builder,
 	    int nr_args)
 {
    int i;
-   for(i = 0; i < Elements(arg_types); ++i)
+   for(i = 0; i < nr_args; ++i)
       if(LLVMGetTypeKind(arg_types[i]) == LLVMPointerTypeKind)
          LLVMAddAttribute(LLVMGetParam(function, i),
 			  LLVMNoAliasAttribute);

commit 3d6892c4793ae1fd1ef82d40e4af8004393b0480
Author: Vinson Lee <vlee@freedesktop.org>
Date:   Sun Aug 12 00:02:32 2012 -0700

    scons: Add option to enable floating-point textures.
    
    Signed-off-by: Vinson Lee <vlee@freedesktop.org>
    Reviewed-by: José Fonseca <jfonseca@vmware.com>

diff --git a/common.py b/common.py
index 6121a41..6ff9608 100644
--- a/common.py
+++ b/common.py
@@ -98,5 +98,6 @@ def AddOptions(opts):
 	opts.Add(BoolOption('debug', 'DEPRECATED: debug build', 'yes'))
 	opts.Add(BoolOption('profile', 'DEPRECATED: profile build', 'no'))
 	opts.Add(BoolOption('quiet', 'DEPRECATED: profile build', 'yes'))
+	opts.Add(BoolOption('texture_float', 'enable floating-point textures and renderbuffers', 'no'))
 	if host_platform == 'windows':
 		opts.Add(EnumOption('MSVS_VERSION', 'MS Visual C++ version', None, allowed_values=('7.1', '8.0', '9.0')))
diff --git a/scons/gallium.py b/scons/gallium.py
index f4776d8..8fc17b3 100755
--- a/scons/gallium.py
+++ b/scons/gallium.py
@@ -298,6 +298,10 @@ def generate(env):
         cppdefines += ['BEOS_THREADS']
     if env['embedded']:
         cppdefines += ['PIPE_SUBSYSTEM_EMBEDDED']
+    if env['texture_float']:
+        print 'warning: Floating-point textures enabled.'
+        print 'warning: Please consult docs/patents.txt with your lawyer before building Mesa.'
+        cppdefines += ['TEXTURE_FLOAT_ENABLED']
     env.Append(CPPDEFINES = cppdefines)
 
     # C compiler options

commit 6a3ac03f2b80c80655d66b31c0218754f70156de
Author: Dave Airlie <airlied@redhat.com>
Date:   Fri Mar 12 16:09:23 2010 +1000

    glx/dri2: add dri2 prime support.
    
    This adds support for having libGL pick a different driver for prime support.
    
    DRI_PRIME env var is set to the value retrieved from the server randr
    provider calls, by the calling process. (generally DRI_PRIME=1 will be
    the right answer).
    
    Signed-off-by: Dave Airlie <airlied@redhat.com>

diff --git a/src/glx/dri2.c b/src/glx/dri2.c
index b1b5013..d6b99db 100644
--- a/src/glx/dri2.c
+++ b/src/glx/dri2.c
@@ -269,6 +269,7 @@ DRI2Connect(Display * dpy, XID window, char **driverName, char **deviceName)
    XExtDisplayInfo *info = DRI2FindDisplay(dpy);
    xDRI2ConnectReply rep;
    xDRI2ConnectReq *req;
+   char *prime;
 
    XextCheckExtension(dpy, info, dri2ExtensionName, False);
 
@@ -277,7 +278,19 @@ DRI2Connect(Display * dpy, XID window, char **driverName, char **deviceName)
    req->reqType = info->codes->major_opcode;
    req->dri2ReqType = X_DRI2Connect;
    req->window = window;
+
    req->driverType = DRI2DriverDRI;
+#ifdef DRI2DriverPrimeShift
+   prime = getenv("DRI_PRIME");
+   if (prime) {
+      uint32_t primeid;
+      errno = 0;
+      primeid = strtoul(prime, NULL, 0);
+      if (errno == 0)
+         req->driverType |= ((primeid & DRI2DriverPrimeMask) << DRI2DriverPrimeShift);
+   }
+#endif
+
    if (!_XReply(dpy, (xReply *) & rep, 0, xFalse)) {
       UnlockDisplay(dpy);
       SyncHandle();

commit 565a4e2a8625c79bde0eacf674a4f633151eeb0e
Author: Vincent Lejeune <vljn@ovi.com>
Date:   Wed Aug 1 22:49:44 2012 +0200

    radeon/llvm: Enable if-cvt
    
    Signed-off-by: Tom Stellard <thomas.stellard@amd.com>

diff --git a/src/gallium/drivers/radeon/AMDGPUTargetMachine.cpp b/src/gallium/drivers/radeon/AMDGPUTargetMachine.cpp
index 6b50128..b97c0fe 100644
--- a/src/gallium/drivers/radeon/AMDGPUTargetMachine.cpp
+++ b/src/gallium/drivers/radeon/AMDGPUTargetMachine.cpp
@@ -29,6 +29,7 @@
 #include "llvm/Support/raw_os_ostream.h"
 #include "llvm/Transforms/IPO.h"
 #include "llvm/Transforms/Scalar.h"
+#include <llvm/CodeGen/Passes.h>
 
 using namespace llvm;
 
@@ -148,6 +149,8 @@ bool AMDGPUPassConfig::addPostRegAlloc() {
 }
 
 bool AMDGPUPassConfig::addPreSched2() {
+
+  addPass(IfConverterID);
   return false;
 }
 

commit a614979286f8d329af318c1e9fb067e17cab4315
Author: Vincent Lejeune <vljn@ovi.com>
Date:   Wed Aug 1 22:49:43 2012 +0200

    radeon/llvm: Add callbacks needed by if-cvt
    
    Signed-off-by: Tom Stellard <thomas.stellard@amd.com>

diff --git a/src/gallium/drivers/radeon/R600InstrInfo.cpp b/src/gallium/drivers/radeon/R600InstrInfo.cpp
index 4a396ef..7a8a58e 100644
--- a/src/gallium/drivers/radeon/R600InstrInfo.cpp
+++ b/src/gallium/drivers/radeon/R600InstrInfo.cpp
@@ -17,7 +17,6 @@
 #include "R600RegisterInfo.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "AMDILUtilityFunctions.h"
-#include "AMDGPUUtil.h"
 
 #define GET_INSTRINFO_CTOR
 #include "AMDGPUGenDFAPacketizer.inc"
@@ -391,7 +390,6 @@ R600InstrInfo::isPredicated(const MachineInstr *MI) const
   if (idx < 0)
     return false;
 
-  MI->dump();
   unsigned Reg = MI->getOperand(idx).getReg();
   switch (Reg) {
   default: return false;
@@ -407,3 +405,117 @@ R600InstrInfo::isPredicable(MachineInstr *MI) const
 {
   return AMDGPUInstrInfo::isPredicable(MI);
 }
+
+
+bool
+R600InstrInfo::isProfitableToIfCvt(MachineBasicBlock &MBB,
+                                   unsigned NumCyles,
+                                   unsigned ExtraPredCycles,
+                                   const BranchProbability &Probability) const{
+  return true;
+}
+
+bool
+R600InstrInfo::isProfitableToIfCvt(MachineBasicBlock &TMBB,
+                                   unsigned NumTCycles,
+                                   unsigned ExtraTCycles,
+                                   MachineBasicBlock &FMBB,
+                                   unsigned NumFCycles,
+                                   unsigned ExtraFCycles,
+                                   const BranchProbability &Probability) const
+{
+  return true;
+}
+
+bool
+R600InstrInfo::isProfitableToDupForIfCvt(MachineBasicBlock &MBB,
+                                         unsigned NumCyles,
+                                         const BranchProbability &Probability)
+                                         const
+{
+  return true;
+}
+
+bool
+R600InstrInfo::isProfitableToUnpredicate(MachineBasicBlock &TMBB,
+                                         MachineBasicBlock &FMBB) const
+{
+  return false;
+}
+
+
+bool
+R600InstrInfo::ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const
+{
+  MachineOperand &MO = Cond[1];
+  switch (MO.getImm()) {
+  case OPCODE_IS_ZERO_INT:
+    MO.setImm(OPCODE_IS_NOT_ZERO_INT);
+    break;
+  case OPCODE_IS_NOT_ZERO_INT:
+    MO.setImm(OPCODE_IS_ZERO_INT);
+    break;
+  case OPCODE_IS_ZERO:
+    MO.setImm(OPCODE_IS_NOT_ZERO);
+    break;
+  case OPCODE_IS_NOT_ZERO:
+    MO.setImm(OPCODE_IS_ZERO);
+    break;
+  default:
+    return true;
+  }
+
+  MachineOperand &MO2 = Cond[2];
+  switch (MO2.getReg()) {
+  case AMDGPU::PRED_SEL_ZERO:
+    MO2.setReg(AMDGPU::PRED_SEL_ONE);
+    break;
+  case AMDGPU::PRED_SEL_ONE:
+    MO2.setReg(AMDGPU::PRED_SEL_ZERO);
+    break;
+  default:
+    return true;
+  }
+  return false;
+}
+
+bool
+R600InstrInfo::DefinesPredicate(MachineInstr *MI,
+                                std::vector<MachineOperand> &Pred) const
+{
+  return isPredicateSetter(MI->getOpcode());
+}
+
+
+bool
+R600InstrInfo::SubsumesPredicate(const SmallVectorImpl<MachineOperand> &Pred1,
+                       const SmallVectorImpl<MachineOperand> &Pred2) const
+{
+  return false;
+}
+
+
+bool
+R600InstrInfo::PredicateInstruction(MachineInstr *MI,
+                      const SmallVectorImpl<MachineOperand> &Pred) const
+{
+  int PIdx = MI->findFirstPredOperandIdx();
+
+  if (PIdx != -1) {
+    MachineOperand &PMO = MI->getOperand(PIdx);
+    PMO.setReg(Pred[2].getReg());
+    MachineInstrBuilder(MI).addReg(AMDGPU::PREDICATE_BIT, RegState::Implicit);
+    return true;
+  }
+
+  return false;
+}
+
+int R600InstrInfo::getInstrLatency(const InstrItineraryData *ItinData,
+                                   const MachineInstr *MI,
+                                   unsigned *PredCost) const
+{
+  if (PredCost)
+    *PredCost = 2;
+  return 2;
+}
diff --git a/src/gallium/drivers/radeon/R600InstrInfo.h b/src/gallium/drivers/radeon/R600InstrInfo.h
index 2819b0b..6e184cc 100644
--- a/src/gallium/drivers/radeon/R600InstrInfo.h
+++ b/src/gallium/drivers/radeon/R600InstrInfo.h
@@ -64,6 +64,9 @@ namespace llvm {
 
   DFAPacketizer *CreateTargetScheduleState(const TargetMachine *TM,
                                            const ScheduleDAG *DAG) const;
+
+  bool ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const;
+
   bool AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB,
                      SmallVectorImpl<MachineOperand> &Cond, bool AllowModify) const;
 
@@ -74,6 +77,40 @@ namespace llvm {
   bool isPredicated(const MachineInstr *MI) const;
 
   bool isPredicable(MachineInstr *MI) const;
+
+  bool
+   isProfitableToDupForIfCvt(MachineBasicBlock &MBB, unsigned NumCyles,
+                             const BranchProbability &Probability) const;
+
+  bool isProfitableToIfCvt(MachineBasicBlock &MBB, unsigned NumCyles,
+                           unsigned ExtraPredCycles,
+                           const BranchProbability &Probability) const ;
+
+  bool
+   isProfitableToIfCvt(MachineBasicBlock &TMBB,
+                       unsigned NumTCycles, unsigned ExtraTCycles,
+                       MachineBasicBlock &FMBB,
+                       unsigned NumFCycles, unsigned ExtraFCycles,
+                       const BranchProbability &Probability) const;
+
+  bool DefinesPredicate(MachineInstr *MI,
+                                  std::vector<MachineOperand> &Pred) const;
+
+  bool SubsumesPredicate(const SmallVectorImpl<MachineOperand> &Pred1,
+                         const SmallVectorImpl<MachineOperand> &Pred2) const;
+
+  bool isProfitableToUnpredicate(MachineBasicBlock &TMBB,
+                                          MachineBasicBlock &FMBB) const;
+
+  bool PredicateInstruction(MachineInstr *MI,
+                        const SmallVectorImpl<MachineOperand> &Pred) const;
+
+  int getInstrLatency(const InstrItineraryData *ItinData,
+                              const MachineInstr *MI,
+                              unsigned *PredCost = 0) const;
+
+  virtual int getInstrLatency(const InstrItineraryData *ItinData,
+                              SDNode *Node) const { return 1;}
 };
 
 } // End llvm namespace

commit 0eca5fd919b0a31ea926b5f5072e5e56f7a55269
Author: Vincent Lejeune <vljn@ovi.com>
Date:   Wed Aug 1 22:49:42 2012 +0200

    radeon/llvm: Lower branch/branch_cond into predicated jump
    
    Signed-off-by: Tom Stellard <thomas.stellard@amd.com>

diff --git a/src/gallium/drivers/radeon/AMDGPUInstrInfo.cpp b/src/gallium/drivers/radeon/AMDGPUInstrInfo.cpp
index 2af0367..03a647e 100644
--- a/src/gallium/drivers/radeon/AMDGPUInstrInfo.cpp
+++ b/src/gallium/drivers/radeon/AMDGPUInstrInfo.cpp
@@ -97,124 +97,6 @@ bool AMDGPUInstrInfo::getNextBranchInstr(MachineBasicBlock::iterator &iter,
   return false;
 }
 
-bool AMDGPUInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
-                                   MachineBasicBlock *&TBB,
-                                   MachineBasicBlock *&FBB,
-                                   SmallVectorImpl<MachineOperand> &Cond,
-                                   bool AllowModify) const {
-  bool retVal = true;
-  return retVal;
-  MachineBasicBlock::iterator iter = MBB.begin();
-  if (!getNextBranchInstr(iter, MBB)) {
-    retVal = false;
-  } else {
-    MachineInstr *firstBranch = iter;
-    if (!getNextBranchInstr(++iter, MBB)) {
-      if (firstBranch->getOpcode() == AMDGPU::BRANCH) {
-        TBB = firstBranch->getOperand(0).getMBB();
-        firstBranch->eraseFromParent();
-        retVal = false;
-      } else {
-        TBB = firstBranch->getOperand(0).getMBB();
-        FBB = *(++MBB.succ_begin());
-        if (FBB == TBB) {
-          FBB = *(MBB.succ_begin());
-        }
-        Cond.push_back(firstBranch->getOperand(1));
-        retVal = false;
-      }
-    } else {
-      MachineInstr *secondBranch = iter;
-      if (!getNextBranchInstr(++iter, MBB)) {
-        if (secondBranch->getOpcode() == AMDGPU::BRANCH) {
-          TBB = firstBranch->getOperand(0).getMBB();
-          Cond.push_back(firstBranch->getOperand(1));
-          FBB = secondBranch->getOperand(0).getMBB();
-          secondBranch->eraseFromParent();
-          retVal = false;
-        } else {
-          assert(0 && "Should not have two consecutive conditional branches");
-        }
-      } else {
-        MBB.getParent()->viewCFG();
-        assert(0 && "Should not have three branch instructions in"
-               " a single basic block");
-        retVal = false;
-      }
-    }
-  }
-  return retVal;
-}
-
-unsigned int AMDGPUInstrInfo::getBranchInstr(const MachineOperand &op) const {
-  const MachineInstr *MI = op.getParent();
-  
-  switch (MI->getDesc().OpInfo->RegClass) {
-  default: // FIXME: fallthrough??
-  case AMDGPU::GPRI32RegClassID: return AMDGPU::BRANCH_COND_i32;
-  case AMDGPU::GPRF32RegClassID: return AMDGPU::BRANCH_COND_f32;
-  };
-}
-
-unsigned int
-AMDGPUInstrInfo::InsertBranch(MachineBasicBlock &MBB,
-                             MachineBasicBlock *TBB,
-                             MachineBasicBlock *FBB,
-                             const SmallVectorImpl<MachineOperand> &Cond,
-                             DebugLoc DL) const
-{
-  assert(TBB && "InsertBranch must not be told to insert a fallthrough");
-  for (unsigned int x = 0; x < Cond.size(); ++x) {
-    Cond[x].getParent()->dump();
-  }
-  if (FBB == 0) {
-    if (Cond.empty()) {
-      BuildMI(&MBB, DL, get(AMDGPU::BRANCH)).addMBB(TBB);
-    } else {
-      BuildMI(&MBB, DL, get(getBranchInstr(Cond[0])))
-        .addMBB(TBB).addReg(Cond[0].getReg());
-    }
-    return 1;
-  } else {
-    BuildMI(&MBB, DL, get(getBranchInstr(Cond[0])))
-      .addMBB(TBB).addReg(Cond[0].getReg());
-    BuildMI(&MBB, DL, get(AMDGPU::BRANCH)).addMBB(FBB);
-  }
-  assert(0 && "Inserting two branches not supported");
-  return 0;
-}
-
-unsigned int AMDGPUInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const {
-  MachineBasicBlock::iterator I = MBB.end();
-  if (I == MBB.begin()) {
-    return 0;
-  }
-  --I;
-  switch (I->getOpcode()) {
-  default:
-    return 0;
-    ExpandCaseToAllScalarTypes(AMDGPU::BRANCH_COND);
-  case AMDGPU::BRANCH:
-    I->eraseFromParent();
-    break;
-  }
-  I = MBB.end();
-  
-  if (I == MBB.begin()) {
-    return 1;
-  }
-  --I;
-  switch (I->getOpcode()) {
-    // FIXME: only one case??
-  default:
-    return 1;
-    ExpandCaseToAllScalarTypes(AMDGPU::BRANCH_COND);
-    I->eraseFromParent();
-    break;
-  }
-  return 2;
-}
-
 MachineBasicBlock::iterator skipFlowControl(MachineBasicBlock *MBB) {
   MachineBasicBlock::iterator tmp = MBB->end();
   if (!MBB->size()) {
diff --git a/src/gallium/drivers/radeon/AMDGPUInstrInfo.h b/src/gallium/drivers/radeon/AMDGPUInstrInfo.h
index 28952cf..31400a7 100644
--- a/src/gallium/drivers/radeon/AMDGPUInstrInfo.h
+++ b/src/gallium/drivers/radeon/AMDGPUInstrInfo.h
@@ -49,8 +49,6 @@ private:
   TargetMachine &TM;
   bool getNextBranchInstr(MachineBasicBlock::iterator &iter,
                           MachineBasicBlock &MBB) const;
-  unsigned int getBranchInstr(const MachineOperand &op) const;
-
 public:
   explicit AMDGPUInstrInfo(TargetMachine &tm);
 
@@ -77,18 +75,6 @@ public:
                         MachineBasicBlock::iterator &MBBI,
                         LiveVariables *LV) const;
 
-  bool AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB,
-                     MachineBasicBlock *&FBB,
-                     SmallVectorImpl<MachineOperand> &Cond,
-                     bool AllowModify) const;
-
-  unsigned RemoveBranch(MachineBasicBlock &MBB) const;
-
-  unsigned
-  InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
-               MachineBasicBlock *FBB,
-               const SmallVectorImpl<MachineOperand> &Cond,
-               DebugLoc DL) const;
 
   virtual void copyPhysReg(MachineBasicBlock &MBB,
                            MachineBasicBlock::iterator MI, DebugLoc DL,
diff --git a/src/gallium/drivers/radeon/AMDILCFGStructurizer.cpp b/src/gallium/drivers/radeon/AMDILCFGStructurizer.cpp
index 95a75ac..b167d62 100644
--- a/src/gallium/drivers/radeon/AMDILCFGStructurizer.cpp
+++ b/src/gallium/drivers/radeon/AMDILCFGStructurizer.cpp
@@ -301,6 +301,7 @@ public:
   bool prepare(FuncT &Func, PassT &Pass, const AMDGPURegisterInfo *tri);
 
 private:
+  void reversePredicateSetter(typename BlockT::iterator);
   void   orderBlocks();
   void   printOrderedBlocks(llvm::raw_ostream &OS);
   int patternMatch(BlockT *CurBlock);
@@ -1664,6 +1665,31 @@ void CFGStructurizer<PassT>::mergeLooplandBlock(BlockT *dstBlk,
 } //mergeLooplandBlock
 
 template<class PassT>
+void CFGStructurizer<PassT>::reversePredicateSetter(typename BlockT::iterator I)
+{
+  while (I--) {
+    if (I->getOpcode() == AMDGPU::PRED_X) {
+      switch (static_cast<MachineInstr *>(I)->getOperand(2).getImm()) {
+      case OPCODE_IS_ZERO_INT:
+        static_cast<MachineInstr *>(I)->getOperand(2).setImm(OPCODE_IS_NOT_ZERO_INT);
+        return;
+      case OPCODE_IS_NOT_ZERO_INT:
+        static_cast<MachineInstr *>(I)->getOperand(2).setImm(OPCODE_IS_ZERO_INT);
+        return;
+      case OPCODE_IS_ZERO:
+        static_cast<MachineInstr *>(I)->getOperand(2).setImm(OPCODE_IS_NOT_ZERO);
+        return;
+      case OPCODE_IS_NOT_ZERO:
+        static_cast<MachineInstr *>(I)->getOperand(2).setImm(OPCODE_IS_ZERO);
+        return;
+      default:
+        assert(0 && "PRED_X Opcode invalid!");
+      }
+    }
+  }
+}
+
+template<class PassT>
 void CFGStructurizer<PassT>::mergeLoopbreakBlock(BlockT *exitingBlk,
                                                  BlockT *exitBlk,
                                                  BlockT *exitLandBlk,
@@ -1695,14 +1721,17 @@ void CFGStructurizer<PassT>::mergeLoopbreakBlock(BlockT *exitingBlk,
 
   if (exitBlk == exitLandBlk && setReg == INVALIDREGNUM) {
     //break_logical
-    int newOpcode =
-    (trueBranch == exitBlk) ? CFGTraits::getBreakNzeroOpcode(oldOpcode)
-                            : CFGTraits::getBreakZeroOpcode(oldOpcode);
+
+    if (trueBranch != exitBlk) {
+      reversePredicateSetter(branchInstrPos);
+    }
+    int newOpcode = CFGTraits::getBreakZeroOpcode(oldOpcode);
     CFGTraits::insertCondBranchBefore(branchInstrPos, newOpcode, passRep, DL);
   } else {
-    int newOpcode =
-    (trueBranch == exitBlk) ? CFGTraits::getBranchNzeroOpcode(oldOpcode)
-                            : CFGTraits::getBranchZeroOpcode(oldOpcode);
+    if (trueBranch != exitBlk) {
+      reversePredicateSetter(branchInstr);
+    }
+    int newOpcode = CFGTraits::getBreakZeroOpcode(oldOpcode);
     CFGTraits::insertCondBranchBefore(branchInstrPos, newOpcode, passRep, DL);
     if (exitBlk != exitLandBlk) {
       //splice is insert-before ...
@@ -2765,7 +2794,7 @@ struct CFGStructTraits<AMDGPUCFGStructurizer>
 
   static int getBreakNzeroOpcode(int oldOpcode) {
     switch(oldOpcode) {
-      ExpandCaseToAllScalarReturn(AMDGPU::BRANCH_COND, AMDGPU::BREAK_LOGICALNZ);
+      case AMDGPU::JUMP: return AMDGPU::BREAK_LOGICALNZ_i32;
     default:
       assert(0 && "internal error");
     };
@@ -2774,7 +2803,7 @@ struct CFGStructTraits<AMDGPUCFGStructurizer>
 
   static int getBreakZeroOpcode(int oldOpcode) {
     switch(oldOpcode) {
-      ExpandCaseToAllScalarReturn(AMDGPU::BRANCH_COND, AMDGPU::BREAK_LOGICALZ);
+      case AMDGPU::JUMP: return AMDGPU::BREAK_LOGICALZ_i32;
     default:
       assert(0 && "internal error");
     };
@@ -2783,6 +2812,7 @@ struct CFGStructTraits<AMDGPUCFGStructurizer>
 
   static int getBranchNzeroOpcode(int oldOpcode) {
     switch(oldOpcode) {
+    case AMDGPU::JUMP: return AMDGPU::IF_LOGICALNZ_i32;
       ExpandCaseToAllScalarReturn(AMDGPU::BRANCH_COND, AMDGPU::IF_LOGICALNZ);
       case AMDGPU::SI_IF_NZ: return AMDGPU::SI_IF_NZ;
     default:
@@ -2793,6 +2823,7 @@ struct CFGStructTraits<AMDGPUCFGStructurizer>
 
   static int getBranchZeroOpcode(int oldOpcode) {
     switch(oldOpcode) {
+    case AMDGPU::JUMP: return AMDGPU::IF_LOGICALZ_i32;
       ExpandCaseToAllScalarReturn(AMDGPU::BRANCH_COND, AMDGPU::IF_LOGICALZ);
       case AMDGPU::SI_IF_Z: return AMDGPU::SI_IF_Z;
     default:
@@ -2804,7 +2835,7 @@ struct CFGStructTraits<AMDGPUCFGStructurizer>
   static int getContinueNzeroOpcode(int oldOpcode)
   {
     switch(oldOpcode) {
-      ExpandCaseToAllScalarReturn(AMDGPU::BRANCH_COND, AMDGPU::CONTINUE_LOGICALNZ);
+      case AMDGPU::JUMP: return AMDGPU::CONTINUE_LOGICALNZ_i32;
       default:
         assert(0 && "internal error");
     };
@@ -2813,7 +2844,7 @@ struct CFGStructTraits<AMDGPUCFGStructurizer>
 
   static int getContinueZeroOpcode(int oldOpcode) {
     switch(oldOpcode) {
-      ExpandCaseToAllScalarReturn(AMDGPU::BRANCH_COND, AMDGPU::CONTINUE_LOGICALZ);
+      case AMDGPU::JUMP: return AMDGPU::CONTINUE_LOGICALZ_i32;
     default:
       assert(0 && "internal error");
     };
@@ -2845,6 +2876,8 @@ struct CFGStructTraits<AMDGPUCFGStructurizer>
 
   static bool isCondBranch(MachineInstr *instr) {
     switch (instr->getOpcode()) {
+      case AMDGPU::JUMP:
+        return instr->getOperand(instr->findFirstPredOperandIdx()).getReg() != 0;
       ExpandCaseToAllScalarTypes(AMDGPU::BRANCH_COND);
       case AMDGPU::SI_IF_NZ:
       case AMDGPU::SI_IF_Z:
@@ -2857,8 +2890,8 @@ struct CFGStructTraits<AMDGPUCFGStructurizer>
 
   static bool isUncondBranch(MachineInstr *instr) {
     switch (instr->getOpcode()) {
-    case AMDGPU::BRANCH:
-      break;
+    case AMDGPU::JUMP:
+      return instr->getOperand(instr->findFirstPredOperandIdx()).getReg() == 0;
     default:
       return false;
     }
diff --git a/src/gallium/drivers/radeon/AMDILInstrInfo.td b/src/gallium/drivers/radeon/AMDILInstrInfo.td
index b2a0541..b683e49 100644
--- a/src/gallium/drivers/radeon/AMDILInstrInfo.td
+++ b/src/gallium/drivers/radeon/AMDILInstrInfo.td
@@ -217,7 +217,7 @@ include "AMDILIntrinsics.td"
 // Custom Inserter for Branches and returns, this eventually will be a
 // seperate pass
 //===---------------------------------------------------------------------===//
-let isTerminator = 1 in {
+let isTerminator = 1, usesCustomInserter = 1 in {
   def BRANCH : ILFormat<(outs), (ins brtarget:$target),
       "; Pseudo unconditional branch instruction",
       [(br bb:$target)]>;
diff --git a/src/gallium/drivers/radeon/R600ISelLowering.cpp b/src/gallium/drivers/radeon/R600ISelLowering.cpp
index 26f14fa..1f5f417 100644
--- a/src/gallium/drivers/radeon/R600ISelLowering.cpp
+++ b/src/gallium/drivers/radeon/R600ISelLowering.cpp
@@ -210,6 +210,33 @@ MachineBasicBlock * R600TargetLowering::EmitInstrWithCustomInserter(
               .addReg(t1, RegState::Implicit);
       break;
     }
+  case AMDGPU::BRANCH:
+      BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP))
+              .addOperand(MI->getOperand(0))
+              .addReg(0);
+      break;
+  case AMDGPU::BRANCH_COND_f32:
+    MI->getOperand(1).addTargetFlag(MO_FLAG_PUSH);
+
+    BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::PRED_X))
+            .addReg(AMDGPU::PREDICATE_BIT)
+            .addOperand(MI->getOperand(1))
+            .addImm(OPCODE_IS_ZERO);
+    BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP))
+            .addOperand(MI->getOperand(0))
+            .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill);
+    break;
+  case AMDGPU::BRANCH_COND_i32:
+    MI->getOperand(1).addTargetFlag(MO_FLAG_PUSH);
+
+    BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::PRED_X))
+            .addReg(AMDGPU::PREDICATE_BIT)
+            .addOperand(MI->getOperand(1))
+            .addImm(OPCODE_IS_ZERO_INT);
+    BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP))
+            .addOperand(MI->getOperand(0))
+            .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill);
+   break;
 
 
   }
diff --git a/src/gallium/drivers/radeon/R600InstrInfo.cpp b/src/gallium/drivers/radeon/R600InstrInfo.cpp
index c807d5c..4a396ef 100644
--- a/src/gallium/drivers/radeon/R600InstrInfo.cpp
+++ b/src/gallium/drivers/radeon/R600InstrInfo.cpp
@@ -17,6 +17,7 @@
 #include "R600RegisterInfo.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "AMDILUtilityFunctions.h"
+#include "AMDGPUUtil.h"
 
 #define GET_INSTRINFO_CTOR
 #include "AMDGPUGenDFAPacketizer.inc"
@@ -94,6 +95,8 @@ unsigned R600InstrInfo::getIEQOpcode() const
 
 bool R600InstrInfo::isMov(unsigned Opcode) const
 {
+
+
   switch(Opcode) {
   default: return false;
   case AMDGPU::MOV:
@@ -188,6 +191,199 @@ DFAPacketizer *R600InstrInfo::CreateTargetScheduleState(const TargetMachine *TM,
   return TM->getSubtarget<AMDGPUSubtarget>().createDFAPacketizer(II);
 }
 
+static bool
+isPredicateSetter(unsigned opcode)
+{
+  switch (opcode) {
+  case AMDGPU::PRED_X:
+    return true;
+  default:
+    return false;
+  }
+}
+
+static MachineInstr *
+findFirstPredicateSetterFrom(MachineBasicBlock &MBB,
+                             MachineBasicBlock::iterator I)
+{
+  while (I != MBB.begin()) {
+    --I;
+    MachineInstr *MI = I;
+    if (isPredicateSetter(MI->getOpcode()))
+      return MI;
+  }
+
+  return NULL;
+}
+
+bool
+R600InstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
+                             MachineBasicBlock *&TBB,
+                             MachineBasicBlock *&FBB,
+                             SmallVectorImpl<MachineOperand> &Cond,
+                             bool AllowModify) const
+{
+  // Most of the following comes from the ARM implementation of AnalyzeBranch
+
+  // If the block has no terminators, it just falls into the block after it.
+  MachineBasicBlock::iterator I = MBB.end();
+  if (I == MBB.begin())
+    return false;
+  --I;
+  while (I->isDebugValue()) {
+    if (I == MBB.begin())
+      return false;
+    --I;
+  }
+  if (static_cast<MachineInstr *>(I)->getOpcode() != AMDGPU::JUMP) {
+    return false;
+  }
+
+  // Get the last instruction in the block.
+  MachineInstr *LastInst = I;
+
+  // If there is only one terminator instruction, process it.
+  unsigned LastOpc = LastInst->getOpcode();
+  if (I == MBB.begin() ||
+      static_cast<MachineInstr *>(--I)->getOpcode() != AMDGPU::JUMP) {
+    if (LastOpc == AMDGPU::JUMP) {
+      if(!isPredicated(LastInst)) {
+        TBB = LastInst->getOperand(0).getMBB();
+        return false;
+      } else {
+        MachineInstr *predSet = I;
+        while (!isPredicateSetter(predSet->getOpcode())) {
+          predSet = --I;
+        }
+        TBB = LastInst->getOperand(0).getMBB();
+        Cond.push_back(predSet->getOperand(1));
+        Cond.push_back(predSet->getOperand(2));
+        Cond.push_back(MachineOperand::CreateReg(AMDGPU::PRED_SEL_ONE, false));
+        return false;
+      }
+    }
+    return true;  // Can't handle indirect branch.
+  }
+
+  // Get the instruction before it if it is a terminator.
+  MachineInstr *SecondLastInst = I;
+  unsigned SecondLastOpc = SecondLastInst->getOpcode();
+
+  // If the block ends with a B and a Bcc, handle it.
+  if (SecondLastOpc == AMDGPU::JUMP &&
+      isPredicated(SecondLastInst) &&
+      LastOpc == AMDGPU::JUMP &&
+      !isPredicated(LastInst)) {
+    MachineInstr *predSet = --I;
+    while (!isPredicateSetter(predSet->getOpcode())) {
+      predSet = --I;
+    }
+    TBB = SecondLastInst->getOperand(0).getMBB();
+    FBB = LastInst->getOperand(0).getMBB();
+    Cond.push_back(predSet->getOperand(1));
+    Cond.push_back(predSet->getOperand(2));
+    Cond.push_back(MachineOperand::CreateReg(AMDGPU::PRED_SEL_ONE, false));
+    return false;
+  }
+
+  // Otherwise, can't handle this.
+  return true;
+}
+
+int R600InstrInfo::getBranchInstr(const MachineOperand &op) const {
+  const MachineInstr *MI = op.getParent();
+
+  switch (MI->getDesc().OpInfo->RegClass) {
+  default: // FIXME: fallthrough??
+  case AMDGPU::GPRI32RegClassID: return AMDGPU::BRANCH_COND_i32;
+  case AMDGPU::GPRF32RegClassID: return AMDGPU::BRANCH_COND_f32;
+  };
+}
+
+unsigned
+R600InstrInfo::InsertBranch(MachineBasicBlock &MBB,
+                            MachineBasicBlock *TBB,
+                            MachineBasicBlock *FBB,
+                            const SmallVectorImpl<MachineOperand> &Cond,
+                            DebugLoc DL) const
+{
+  assert(TBB && "InsertBranch must not be told to insert a fallthrough");
+
+  if (FBB == 0) {
+    if (Cond.empty()) {
+      BuildMI(&MBB, DL, get(AMDGPU::JUMP)).addMBB(TBB).addReg(0);
+      return 1;
+    } else {
+      MachineInstr *PredSet = findFirstPredicateSetterFrom(MBB, MBB.end());
+      assert(PredSet && "No previous predicate !");
+      PredSet->getOperand(1).addTargetFlag(1<<4);
+      PredSet->getOperand(2).setImm(Cond[1].getImm());
+
+      BuildMI(&MBB, DL, get(AMDGPU::JUMP))
+             .addMBB(TBB)
+             .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill);
+      return 1;
+    }
+  } else {
+    MachineInstr *PredSet = findFirstPredicateSetterFrom(MBB, MBB.end());
+    assert(PredSet && "No previous predicate !");
+    PredSet->getOperand(1).addTargetFlag(1<<4);
+    PredSet->getOperand(2).setImm(Cond[1].getImm());
+    BuildMI(&MBB, DL, get(AMDGPU::JUMP))
+            .addMBB(TBB)
+            .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill);
+    BuildMI(&MBB, DL, get(AMDGPU::JUMP)).addMBB(FBB).addReg(0);
+    return 2;
+  }
+}
+
+unsigned
+R600InstrInfo::RemoveBranch(MachineBasicBlock &MBB) const
+{
+
+  // Note : we leave PRED* instructions there.
+  // They may be needed when predicating instructions.
+
+  MachineBasicBlock::iterator I = MBB.end();
+
+  if (I == MBB.begin()) {
+    return 0;
+  }
+  --I;
+  switch (I->getOpcode()) {
+  default:
+    return 0;
+  case AMDGPU::JUMP:
+    if (isPredicated(I)) {
+      MachineInstr *predSet = findFirstPredicateSetterFrom(MBB, I);
+      char flag = predSet->getOperand(1).getTargetFlags() & (~(1<<4));
+      predSet->getOperand(1).setTargetFlags(flag);
+    }
+    I->eraseFromParent();
+    break;
+  }
+  I = MBB.end();
+
+  if (I == MBB.begin()) {
+    return 1;
+  }
+  --I;
+  switch (I->getOpcode()) {
+    // FIXME: only one case??
+  default:
+    return 1;
+  case AMDGPU::JUMP:
+    if (isPredicated(I)) {
+      MachineInstr *predSet = findFirstPredicateSetterFrom(MBB, I);
+      char flag = predSet->getOperand(1).getTargetFlags() & (~(1<<4));
+      predSet->getOperand(1).setTargetFlags(flag);
+    }
+    I->eraseFromParent();
+    break;
+  }
+  return 2;
+}
+
 bool
 R600InstrInfo::isPredicated(const MachineInstr *MI) const
 {
diff --git a/src/gallium/drivers/radeon/R600InstrInfo.h b/src/gallium/drivers/radeon/R600InstrInfo.h
index 9bdda7a..2819b0b 100644
--- a/src/gallium/drivers/radeon/R600InstrInfo.h
+++ b/src/gallium/drivers/radeon/R600InstrInfo.h
@@ -34,6 +34,8 @@ namespace llvm {
   const R600RegisterInfo RI;
   AMDGPUTargetMachine &TM;
 
+  int getBranchInstr(const MachineOperand &op) const;
+
   public:
   explicit R600InstrInfo(AMDGPUTargetMachine &tm);
 
@@ -62,6 +64,13 @@ namespace llvm {
 
   DFAPacketizer *CreateTargetScheduleState(const TargetMachine *TM,
                                            const ScheduleDAG *DAG) const;
+  bool AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB,
+                     SmallVectorImpl<MachineOperand> &Cond, bool AllowModify) const;
+
+  unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, const SmallVectorImpl<MachineOperand> &Cond, DebugLoc DL) const;
+
+  unsigned RemoveBranch(MachineBasicBlock &MBB) const;
+
   bool isPredicated(const MachineInstr *MI) const;
 
   bool isPredicable(MachineInstr *MI) const;

commit 6db2e9fdb0a35e27e6fc86a1485918b78717a425
Author: Vincent Lejeune <vljn@ovi.com>
Date:   Wed Aug 1 22:49:41 2012 +0200

    radeon/llvm: Add a predicated JUMP instruction
    
    Signed-off-by: Tom Stellard <thomas.stellard@amd.com>

diff --git a/src/gallium/drivers/radeon/R600Instructions.td b/src/gallium/drivers/radeon/R600Instructions.td
index 6f2ab1f..1819742 100644
--- a/src/gallium/drivers/radeon/R600Instructions.td
+++ b/src/gallium/drivers/radeon/R600Instructions.td
@@ -121,6 +121,15 @@ def PRED_X : AMDGPUInst <(outs R600_Predicate_Bit:$dst),
 
 
 
+let isTerminator = 1, isBranch = 1 in {
+def JUMP : InstR600 <0x10,
+          (outs),
+          (ins brtarget:$target, R600_Pred:$p),
+          "JUMP $target ($p)",
+          [], AnyALU
+  >;
+}
+
 class R600_REDUCTION <bits<32> inst, dag ins, string asm, list<dag> pattern,
                       InstrItinClass itin = VecALU> :
   InstR600 <inst,

commit 8263408a91b6b3beb5af5de6bdc7e5d13197a268
Author: Vincent Lejeune <vljn@ovi.com>
Date:   Wed Aug 1 22:49:40 2012 +0200

    radeon/llvm: Support for predicate bit
    
    Tom Stellard:
      - A few changes to predicate register defs
    
    Signed-off-by: Tom Stellard <thomas.stellard@amd.com>

diff --git a/src/gallium/drivers/radeon/AMDGPUInstrInfo.h b/src/gallium/drivers/radeon/AMDGPUInstrInfo.h
index 7232c0b..28952cf 100644
--- a/src/gallium/drivers/radeon/AMDGPUInstrInfo.h
+++ b/src/gallium/drivers/radeon/AMDGPUInstrInfo.h
@@ -29,6 +29,12 @@
 #define MO_FLAG_NEG   (1 << 1)
 #define MO_FLAG_ABS   (1 << 2)
 #define MO_FLAG_MASK  (1 << 3)
+#define MO_FLAG_PUSH  (1 << 4)
+
+#define OPCODE_IS_ZERO_INT 0x00000045
+#define OPCODE_IS_NOT_ZERO_INT 0x00000042


Reply to: