Bug#782868: GHC calling convention incorrect
Source: llvm-toolchain-3.5
Version: 1:3.5-10
Severity: normal
Tags: patch
LLVM implements a special calling convention which is used exclusively
by the Glasgow Haskell Compiler (GHC). The llvm-3.5.0 release had a bug
in the GHC calling convention for Arm resulting in Haskell programs
compiled for arm would segfault on being run. See the GHC bug tracker:
    https://ghc.haskell.org/trac/ghc/ticket/9920
    
Release 7.10 of GHC can *only* use the llvm-3.5, but version 3.5.0 has
this bug while 3.5.1 and later do not.
The patch in the above GHC trac ticket applied to the 3.5.0 tree fixes 
this. I have also taken the llvm-toolchain-3.5 debian sources, add the
above patch to the debian/patches/ directory (and updated the series
file) and built it. Unfortunately I can build this package to completion
because of an FTBFS bug (#782860) but the build proceeds far enough for
me to grab the file `build-llvm/Release/bin/llc` to test it and it works
fine.
I will attach the require patch to the ticket.
-- System Information:
Debian Release: 8.0
  APT prefers testing
  APT policy: (900, 'testing'), (800, 'unstable'), (500, 'testing-updates'), (500, 'experimental')
Architecture: amd64 (x86_64)
Foreign Architectures: i386, armhf, arm64
Kernel: Linux 3.16.0-4-amd64 (SMP w/4 CPU cores)
Locale: LANG=en_AU.UTF-8, LC_CTYPE=en_AU.UTF-8 (charmap=UTF-8) (ignored: LC_ALL set to en_AU.UTF-8)
Shell: /bin/sh linked to /bin/dash
Init: systemd (via /run/systemd/system)
Origin: https://github.com/llvm-mirror/llvm/commit/f7f88095a32d1ac5bc7778204fd9a37a9fb8082c
Author: Tim Northover <tnorthover@apple.com>
Date:   Mon Dec 1 17:46:39 2014 +0000
Subject: ARM: lower tail calls correctly when using GHC calling convention.
 Patch by Ben Gamari.
 This patch was pulled from the LLVM git mirror and tweaked slightly to apply
 cleanly to llvm version 3.5.
---
 lib/Target/ARM/ARMFrameLowering.cpp      | 91 +++++++++++++++++++-------------
 lib/Target/ARM/ARMFrameLowering.h        |  2 +
 test/CodeGen/ARM/ghc-tcreturn-lowered.ll | 21 ++++++++
 3 files changed, 76 insertions(+), 38 deletions(-)
 create mode 100644 test/CodeGen/ARM/ghc-tcreturn-lowered.ll
diff --git a/lib/Target/ARM/ARMFrameLowering.cpp b/lib/Target/ARM/ARMFrameLowering.cpp
index a67b360..e55f652 100644
--- a/lib/Target/ARM/ARMFrameLowering.cpp
+++ b/lib/Target/ARM/ARMFrameLowering.cpp
@@ -566,11 +566,59 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF) const {
     AFI->setShouldRestoreSPFromFP(true);
 }
 
+// Resolve TCReturn pseudo-instruction
+void ARMFrameLowering::fixTCReturn(MachineFunction &MF,
+                                   MachineBasicBlock &MBB) const {
+  MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
+  assert(MBBI->isReturn() && "Can only insert epilog into returning blocks");
+  unsigned RetOpcode = MBBI->getOpcode();
+  DebugLoc dl = MBBI->getDebugLoc();
+  const ARMBaseInstrInfo &TII =
+      *static_cast<const ARMBaseInstrInfo *>(MF.getTarget().getInstrInfo());
+
+  if (!(RetOpcode == ARM::TCRETURNdi || RetOpcode == ARM::TCRETURNri))
+    return;
+
+  // Tail call return: adjust the stack pointer and jump to callee.
+  MBBI = MBB.getLastNonDebugInstr();
+  MachineOperand &JumpTarget = MBBI->getOperand(0);
+
+  // Jump to label or value in register.
+  if (RetOpcode == ARM::TCRETURNdi) {
+    unsigned TCOpcode = STI.isThumb() ?
+             (STI.isTargetMachO() ? ARM::tTAILJMPd : ARM::tTAILJMPdND) :
+             ARM::TAILJMPd;
+    MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII.get(TCOpcode));
+    if (JumpTarget.isGlobal())
+      MIB.addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset(),
+                           JumpTarget.getTargetFlags());
+    else {
+      assert(JumpTarget.isSymbol());
+      MIB.addExternalSymbol(JumpTarget.getSymbolName(),
+                            JumpTarget.getTargetFlags());
+    }
+
+    // Add the default predicate in Thumb mode.
+    if (STI.isThumb()) MIB.addImm(ARMCC::AL).addReg(0);
+  } else if (RetOpcode == ARM::TCRETURNri) {
+    BuildMI(MBB, MBBI, dl,
+            TII.get(STI.isThumb() ? ARM::tTAILJMPr : ARM::TAILJMPr)).
+      addReg(JumpTarget.getReg(), RegState::Kill);
+  }
+
+  MachineInstr *NewMI = std::prev(MBBI);
+  for (unsigned i = 1, e = MBBI->getNumOperands(); i != e; ++i)
+    NewMI->addOperand(MBBI->getOperand(i));
+
+  // Delete the pseudo instruction TCRETURN.
+  MBB.erase(MBBI);
+  MBBI = NewMI;
+}
+
 void ARMFrameLowering::emitEpilogue(MachineFunction &MF,
                                     MachineBasicBlock &MBB) const {
   MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
   assert(MBBI->isReturn() && "Can only insert epilog into returning blocks");
-  unsigned RetOpcode = MBBI->getOpcode();
   DebugLoc dl = MBBI->getDebugLoc();
   MachineFrameInfo *MFI = MF.getFrameInfo();
   ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
@@ -588,8 +636,10 @@ void ARMFrameLowering::emitEpilogue(MachineFunction &MF,
 
   // All calls are tail calls in GHC calling conv, and functions have no
   // prologue/epilogue.
-  if (MF.getFunction()->getCallingConv() == CallingConv::GHC)
+  if (MF.getFunction()->getCallingConv() == CallingConv::GHC) {
+    fixTCReturn(MF, MBB);
     return;
+  }
 
   if (!AFI->hasStackFrame()) {
     if (NumBytes - ArgRegsSaveSize != 0)
@@ -661,42 +711,7 @@ void ARMFrameLowering::emitEpilogue(MachineFunction &MF,
     if (AFI->getGPRCalleeSavedArea1Size()) MBBI++;
   }
 
-  if (RetOpcode == ARM::TCRETURNdi || RetOpcode == ARM::TCRETURNri) {
-    // Tail call return: adjust the stack pointer and jump to callee.
-    MBBI = MBB.getLastNonDebugInstr();
-    MachineOperand &JumpTarget = MBBI->getOperand(0);
-
-    // Jump to label or value in register.
-    if (RetOpcode == ARM::TCRETURNdi) {
-      unsigned TCOpcode = STI.isThumb() ?
-               (STI.isTargetMachO() ? ARM::tTAILJMPd : ARM::tTAILJMPdND) :
-               ARM::TAILJMPd;
-      MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII.get(TCOpcode));
-      if (JumpTarget.isGlobal())
-        MIB.addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset(),
-                             JumpTarget.getTargetFlags());
-      else {
-        assert(JumpTarget.isSymbol());
-        MIB.addExternalSymbol(JumpTarget.getSymbolName(),
-                              JumpTarget.getTargetFlags());
-      }
-
-      // Add the default predicate in Thumb mode.
-      if (STI.isThumb()) MIB.addImm(ARMCC::AL).addReg(0);
-    } else if (RetOpcode == ARM::TCRETURNri) {
-      BuildMI(MBB, MBBI, dl,
-              TII.get(STI.isThumb() ? ARM::tTAILJMPr : ARM::TAILJMPr)).
-        addReg(JumpTarget.getReg(), RegState::Kill);
-    }
-
-    MachineInstr *NewMI = std::prev(MBBI);
-    for (unsigned i = 1, e = MBBI->getNumOperands(); i != e; ++i)
-      NewMI->addOperand(MBBI->getOperand(i));
-
-    // Delete the pseudo instruction TCRETURN.
-    MBB.erase(MBBI);
-    MBBI = NewMI;
-  }
+  fixTCReturn(MF, MBB);
 
   if (ArgRegsSaveSize)
     emitSPUpdate(isARM, MBB, MBBI, dl, TII, ArgRegsSaveSize);
diff --git a/lib/Target/ARM/ARMFrameLowering.h b/lib/Target/ARM/ARMFrameLowering.h
index 709afbc..74629c3 100644
--- a/lib/Target/ARM/ARMFrameLowering.h
+++ b/lib/Target/ARM/ARMFrameLowering.h
@@ -31,6 +31,8 @@ public:
   void emitPrologue(MachineFunction &MF) const override;
   void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const override;
 
+  void fixTCReturn(MachineFunction &MF, MachineBasicBlock &MBB) const;
+
   bool spillCalleeSavedRegisters(MachineBasicBlock &MBB,
                                  MachineBasicBlock::iterator MI,
                                  const std::vector<CalleeSavedInfo> &CSI,
diff --git a/test/CodeGen/ARM/ghc-tcreturn-lowered.ll b/test/CodeGen/ARM/ghc-tcreturn-lowered.ll
new file mode 100644
index 0000000..6d2564b
--- /dev/null
+++ b/test/CodeGen/ARM/ghc-tcreturn-lowered.ll
@@ -0,0 +1,21 @@
+; RUN: llc -mtriple=thumbv7-eabi -o - %s | FileCheck %s
+
+declare cc 10 void @g()
+
+define cc 10 void @test_direct_tail() {
+; CHECK-LABEL: test_direct_tail:
+; CHECK: b g
+
+  tail call cc10 void @g()
+  ret void
+}
+
+@ind_func = global void()* zeroinitializer
+
+define cc 10 void @test_indirect_tail() {
+; CHECK-LABEL: test_indirect_tail:
+; CHECK: bx {{r[0-9]+}}
+  %func = load void()** @ind_func
+  tail call cc10 void()* %func()
+  ret void
+}
-- 
2.1.3
Reply to: