[Date Prev][Date Next] [Thread Prev][Thread Next] [Date Index] [Thread Index]

GHC arm64 porting



Just a heads-up: after a few earlier attempts where I had to give up,
I'm having another go at bootstrapping GHC on arm64.  I know that's not
in Debian yet, but it's on debian-ports and in Ubuntu.  arm64 hardware
is still pretty rare and emulation is slow, but I happen to have
hardware access via work so I'm in a good position to attempt this.

The upstream bug is https://ghc.haskell.org/trac/ghc/ticket/7942; I've
been exchanging mail with Karel Gardas who's involved in that bug trail,
and he's been very helpful in helping me get going.  Cross-bootstrapping
7.6 proved to be unreasonably difficult, but cross-bootstrapping 7.8
with some arm64 patches appears to be manageable with only a few hacks.
At this point I have:

  ghc-aarch64/inplace/lib/bin/ghc-stage2: ELF 64-bit LSB  executable, ARM aarch64, version 1 (SYSV), dynamically linked (uses shared libs), for GNU/Linux 3.7.0, BuildID[sha1]=445217fc627ed34b7e4f7346c0eb4e45f3ebaa44, not stripped

I'm now attempting to use this to build 7.6 with arm64 patches.
Building 7.6 with 7.8 apparently isn't really supported upstream, but so
far I think I've managed to get past most of the problems I've
encountered.  It's taking a while.

I've attached the arm64-specific parts of the current patch I'm working
with, most of which are due to Karel Gardas.  Assuming that I actually
manage to build 7.6 (which is by no means a given), would it be
acceptable to apply something like this patch set to the Debian
packaging?

Thanks,

-- 
Colin Watson                                       [cjwatson@debian.org]
--- ghc-7.6.3.orig/aclocal.m4
+++ ghc-7.6.3/aclocal.m4
@@ -173,6 +173,9 @@ AC_DEFUN([FPTOOLS_SET_HASKELL_PLATFORM_V
             GET_ARM_ISA()
             test -z "[$]2" || eval "[$]2=\"ArchARM {armISA = \$ARM_ISA, armISAExt = \$ARM_ISA_EXT, armABI = \$ARM_ABI}\""
             ;;
+        arm64)
+            test -z "[$]2" || eval "[$]2=ArchARM64"
+            ;;
         alpha|mips|mipseb|mipsel|hppa|hppa1_1|ia64|m68k|rs6000|s390|s390x|sparc64|vax)
             test -z "[$]2" || eval "[$]2=ArchUnknown"
             ;;
@@ -1841,6 +1844,9 @@ case "$1" in
   arm*)
     $2="arm"
     ;;
+  aarch64*)
+    $2="arm64"
+    ;;
   hppa1.1*)
     $2="hppa1_1"
     ;;
--- ghc-7.6.3.orig/compiler/nativeGen/AsmCodeGen.lhs
+++ ghc-7.6.3/compiler/nativeGen/AsmCodeGen.lhs
@@ -203,6 +203,8 @@ nativeCodeGen dflags h us cmms
                      }
                  ArchARM _ _ _ ->
                      panic "nativeCodeGen: No NCG for ARM"
+                 ArchARM64 ->
+                     panic "nativeCodeGen: No NCG for ARM64"
                  ArchPPC_64 ->
                      panic "nativeCodeGen: No NCG for PPC 64"
                  ArchUnknown ->
--- ghc-7.6.3.orig/compiler/nativeGen/RegAlloc/Graph/TrivColorable.hs
+++ ghc-7.6.3/compiler/nativeGen/RegAlloc/Graph/TrivColorable.hs
@@ -113,6 +113,7 @@ trivColorable platform virtualRegSqueeze
                             ArchSPARC     -> 14
                             ArchPPC_64    -> panic "trivColorable ArchPPC_64"
                             ArchARM _ _ _ -> panic "trivColorable ArchARM"
+                            ArchARM64     -> panic "trivColorable ArchARM64"
                             ArchUnknown   -> panic "trivColorable ArchUnknown")
         , count2        <- accSqueeze (_ILIT(0)) cALLOCATABLE_REGS_INTEGER
                                 (virtualRegSqueeze RcInteger)
@@ -133,6 +134,7 @@ trivColorable platform virtualRegSqueeze
                             ArchSPARC     -> 22
                             ArchPPC_64    -> panic "trivColorable ArchPPC_64"
                             ArchARM _ _ _ -> panic "trivColorable ArchARM"
+                            ArchARM64     -> panic "trivColorable ArchARM64"
                             ArchUnknown   -> panic "trivColorable ArchUnknown")
         , count2        <- accSqueeze (_ILIT(0)) cALLOCATABLE_REGS_FLOAT
                                 (virtualRegSqueeze RcFloat)
@@ -153,6 +155,7 @@ trivColorable platform virtualRegSqueeze
                             ArchSPARC     -> 11
                             ArchPPC_64    -> panic "trivColorable ArchPPC_64"
                             ArchARM _ _ _ -> panic "trivColorable ArchARM"
+                            ArchARM64     -> panic "trivColorable ArchARM64"
                             ArchUnknown   -> panic "trivColorable ArchUnknown")
         , count2        <- accSqueeze (_ILIT(0)) cALLOCATABLE_REGS_DOUBLE
                                 (virtualRegSqueeze RcDouble)
@@ -173,6 +176,7 @@ trivColorable platform virtualRegSqueeze
                             ArchSPARC     -> 0
                             ArchPPC_64    -> panic "trivColorable ArchPPC_64"
                             ArchARM _ _ _ -> panic "trivColorable ArchARM"
+                            ArchARM64     -> panic "trivColorable ArchARM64"
                             ArchUnknown   -> panic "trivColorable ArchUnknown")
         , count2        <- accSqueeze (_ILIT(0)) cALLOCATABLE_REGS_SSE
                                 (virtualRegSqueeze RcDoubleSSE)
--- ghc-7.6.3.orig/compiler/nativeGen/RegAlloc/Linear/FreeRegs.hs
+++ ghc-7.6.3/compiler/nativeGen/RegAlloc/Linear/FreeRegs.hs
@@ -73,6 +73,7 @@ maxSpillSlots platform
                 ArchPPC       -> PPC.Instr.maxSpillSlots
                 ArchSPARC     -> SPARC.Instr.maxSpillSlots
                 ArchARM _ _ _ -> panic "maxSpillSlots ArchARM"
+                ArchARM64     -> panic "maxSpillSlots ArchARM64"
                 ArchPPC_64    -> panic "maxSpillSlots ArchPPC_64"
                 ArchUnknown   -> panic "maxSpillSlots ArchUnknown"
 
--- ghc-7.6.3.orig/compiler/nativeGen/RegAlloc/Linear/Main.hs
+++ ghc-7.6.3/compiler/nativeGen/RegAlloc/Linear/Main.hs
@@ -185,6 +185,7 @@ linearRegAlloc dflags first_id block_liv
       ArchSPARC     -> linearRegAlloc' platform (frInitFreeRegs :: SPARC.FreeRegs) first_id block_live sccs
       ArchPPC       -> linearRegAlloc' platform (frInitFreeRegs :: PPC.FreeRegs)   first_id block_live sccs
       ArchARM _ _ _ -> panic "linearRegAlloc ArchARM"
+      ArchARM64     -> panic "linearRegAlloc ArchARM64"
       ArchPPC_64    -> panic "linearRegAlloc ArchPPC_64"
       ArchUnknown   -> panic "linearRegAlloc ArchUnknown"
 
--- ghc-7.6.3.orig/compiler/nativeGen/TargetReg.hs
+++ ghc-7.6.3/compiler/nativeGen/TargetReg.hs
@@ -56,6 +56,7 @@ targetVirtualRegSqueeze platform
       ArchSPARC     -> SPARC.virtualRegSqueeze
       ArchPPC_64    -> panic "targetVirtualRegSqueeze ArchPPC_64"
       ArchARM _ _ _ -> panic "targetVirtualRegSqueeze ArchARM"
+      ArchARM64     -> panic "targetVirtualRegSqueeze ArchARM64"
       ArchUnknown   -> panic "targetVirtualRegSqueeze ArchUnknown"
 
 targetRealRegSqueeze :: Platform -> RegClass -> RealReg -> FastInt
@@ -67,6 +68,7 @@ targetRealRegSqueeze platform
       ArchSPARC     -> SPARC.realRegSqueeze
       ArchPPC_64    -> panic "targetRealRegSqueeze ArchPPC_64"
       ArchARM _ _ _ -> panic "targetRealRegSqueeze ArchARM"
+      ArchARM64     -> panic "targetRealRegSqueeze ArchARM64"
       ArchUnknown   -> panic "targetRealRegSqueeze ArchUnknown"
 
 targetClassOfRealReg :: Platform -> RealReg -> RegClass
@@ -78,6 +80,7 @@ targetClassOfRealReg platform
       ArchSPARC     -> SPARC.classOfRealReg
       ArchPPC_64    -> panic "targetClassOfRealReg ArchPPC_64"
       ArchARM _ _ _ -> panic "targetClassOfRealReg ArchARM"
+      ArchARM64     -> panic "targetClassOfRealReg ArchARM64"
       ArchUnknown   -> panic "targetClassOfRealReg ArchUnknown"
 
 -- TODO: This should look at targetPlatform too
@@ -93,6 +96,7 @@ targetMkVirtualReg platform
       ArchSPARC     -> SPARC.mkVirtualReg
       ArchPPC_64    -> panic "targetMkVirtualReg ArchPPC_64"
       ArchARM _ _ _ -> panic "targetMkVirtualReg ArchARM"
+      ArchARM64     -> panic "targetMkVirtualReg ArchARM64"
       ArchUnknown   -> panic "targetMkVirtualReg ArchUnknown"
 
 targetRegDotColor :: Platform -> RealReg -> SDoc
@@ -104,6 +108,7 @@ targetRegDotColor platform
       ArchSPARC     -> SPARC.regDotColor
       ArchPPC_64    -> panic "targetRegDotColor ArchPPC_64"
       ArchARM _ _ _ -> panic "targetRegDotColor ArchARM"
+      ArchARM64     -> panic "targetRegDotColor ArchARM64"
       ArchUnknown   -> panic "targetRegDotColor ArchUnknown"
 
 
--- ghc-7.6.3.orig/compiler/utils/Platform.hs
+++ ghc-7.6.3/compiler/utils/Platform.hs
@@ -45,6 +45,7 @@ data Arch
           , armISAExt :: [ArmISAExt]
           , armABI    :: ArmABI
           }
+        | ArchARM64
         deriving (Read, Show, Eq)
 
 
--- ghc-7.6.3.orig/includes/stg/MachRegs.h
+++ ghc-7.6.3/includes/stg/MachRegs.h
@@ -43,6 +43,7 @@
 #define powerpc_REGS  (powerpc_TARGET_ARCH || powerpc64_TARGET_ARCH || rs6000_TARGET_ARCH)
 #define sparc_REGS    sparc_TARGET_ARCH
 #define arm_REGS      arm_TARGET_ARCH
+#define arm64_REGS    arm64_TARGET_ARCH
 #define darwin_REGS   darwin_TARGET_OS
 #else
 #define i386_REGS     i386_HOST_ARCH
@@ -50,6 +51,7 @@
 #define powerpc_REGS  (powerpc_HOST_ARCH || powerpc64_HOST_ARCH || rs6000_HOST_ARCH)
 #define sparc_REGS    sparc_HOST_ARCH
 #define arm_REGS      arm_HOST_ARCH
+#define arm64_REGS    arm64_HOST_ARCH
 #define darwin_REGS   darwin_HOST_OS
 #endif
 
@@ -461,6 +463,63 @@
 
 #endif /* arm */
 
+/* -----------------------------------------------------------------------------
+   The ARMv8/AArch64 ABI register mapping
+
+   The AArch64 provides 31 64-bit general purpose registers
+   and 32 128-bit SIMD/floating point registers.
+
+   General purpose registers (see Chapter 5.1.1 in ARM IHI 0055B)
+
+   Register | Special | Role in the procedure call standard
+   ---------+---------+------------------------------------
+     SP     |         | The Stack Pointer
+     r30    |  LR     | The Link Register
+     r29    |  FP     | The Frame Pointer
+   r19-r28  |         | Callee-saved registers
+     r18    |         | The Platform Register, if needed; 
+            |         | or temporary register
+     r17    |  IP1    | The second intra-procedure-call temporary register
+     r16    |  IP0    | The first intra-procedure-call scratch register
+    r9-r15  |         | Temporary registers
+     r8     |         | Indirect result location register
+    r0-r7   |         | Parameter/result registers
+
+
+   FPU/SIMD registers
+
+   s/d/q/v0-v7    Argument / result/ scratch registers
+   s/d/q/v8-v15   callee-saved registers (must be preserved across subrutine calls,
+                  but only bottom 64-bit value needs to be preserved)
+   s/d/q/v16-v31  temporary registers
+
+   ----------------------------------------------------------------------------- */
+
+#if arm64_REGS
+
+#define REG(x) __asm__(#x)
+
+#define REG_Base        r19
+#define REG_Sp          r20
+#define REG_Hp          r21
+#define REG_R1          r22
+#define REG_R2          r23
+#define REG_R3          r24
+#define REG_R4          r25
+#define REG_R5          r26
+#define REG_R6          r27
+#define REG_SpLim       r28
+
+#define REG_F1          s8
+#define REG_F2          s9
+#define REG_F3          s10
+#define REG_F4          s11
+
+#define REG_D1          d12
+#define REG_D2          d13
+
+#endif /* arm64 */
+
 #endif /* NO_REGS */
 
 /* -----------------------------------------------------------------------------
--- ghc-7.6.3.orig/rts/StgCRun.c
+++ ghc-7.6.3/rts/StgCRun.c
@@ -725,4 +725,70 @@ StgRun(StgFunPtr f, StgRegTable *basereg
 }
 #endif
 
+#ifdef arm64_HOST_ARCH
+
+StgRegTable *
+StgRun(StgFunPtr f, StgRegTable *basereg) {
+    StgRegTable * r;
+    __asm__ volatile (
+        /*
+         * save callee-saves registers on behalf of the STG code.
+         */
+        "stp x19, x20, [sp, #-16]!\n\t"
+        "stp x21, x22, [sp, #-16]!\n\t"
+        "stp x23, x24, [sp, #-16]!\n\t"
+        "stp x25, x26, [sp, #-16]!\n\t"
+        "stp x27, x28, [sp, #-16]!\n\t" 
+        "stp ip0, ip1, [sp, #-16]!\n\t"
+        "str lr, [sp, #-8]!\n\t"
+
+        /*
+         * allocate some space for Stg machine's temporary storage.
+         * Note: RESERVER_C_STACK_BYTES has to be a round number here or
+         * the assembler can't assemble it.
+         */
+        "str lr, [sp, %3]"
+        /* "sub sp, sp, %3\n\t" */
+        /*
+         * Set BaseReg
+         */
+        "mov x19, %2\n\t"
+        /*
+         * Jump to function argument.
+         */
+        "bx %1\n\t"
+
+        ".globl " STG_RETURN "\n\t"
+        ".type " STG_RETURN ", %%function\n"
+        STG_RETURN ":\n\t"
+        /*
+         * Free the space we allocated
+         */
+        "ldr lr, [sp], %3\n\t"
+        /* "add sp, sp, %3\n\t" */
+        /*
+         * Return the new register table, taking it from Stg's R1 (ARM64's R22).
+         */
+        "mov %0, x22\n\t"
+        /*
+         * restore callee-saves registers.
+         */
+        "ldr lr, [sp], #8\n\t"
+        "ldp ip0, ip1, [sp], #16\n\t"
+        "ldp x27, x28, [sp], #16\n\t"
+        "ldp x25, x26, [sp], #16\n\t"
+        "ldp x23, x24, [sp], #16\n\t"
+        "ldp x21, x22, [sp], #16\n\t"
+        "ldp x19, x20, [sp], #16\n\t"
+
+      : "=r" (r)
+      : "r" (f), "r" (basereg), "i" (RESERVED_C_STACK_BYTES)
+        : "%x19", "%x20", "%x21", "%x22", "%x23", "%x24", "%x25", "%x26", "%x27", "%x28",
+          "%ip0", "%ip1", "%lr"
+    );
+    return r;
+}
+
+#endif
+
 #endif /* !USE_MINIINTERPRETER */

Reply to: