[Date Prev][Date Next] [Thread Prev][Thread Next] [Date Index] [Thread Index]

Re: [fpc-devel] freepascal on armhf again





Maybe this will help you: http://wiki.freepascal.org/FPC_New_Features_2.6.0#Support_for_VFPv2_and_VFPv3

Thus additionially defining -Cparmv6 might allow the build to be completed.
It didn't.

I did find the issue though, it seems freepascal wasn't passing a fpu type parameter to the assembler for any case except softfloat.

I fixed that but in the process discovered that freepascals vfpv3 support is the "d32" version of vfpv3 (yes arm floating point is a mess) so I decided to try and add a vfpv3_d16 "fpu type". However when I did so the compiler started outputting a rfs and wfs instruction when compling the system unit which the assembler didn't like. I eventually tracked that down to inline assembler in the RTL. Maybe we should add a FPU_VFP define that is defined for all VFP variants so we don't have to mess with the ifdefs in the RTL every time we want to add a variant of VFP (afaict there are a lot of VFP variants).

In the process of working on that I discovered another problem, according to a comment in the source freepascal's register allocation does not currently understand the concept of two single precision registers that overlap with one double precision register, this will make correctly implementing the "eabi vfp hardfloat" calling convention difficult but i'm going to get passing doubles working before I worry about the complications involved in passing singles (and even worse passing a mixture of singles and doubles).

I've attatched the diff of what i've done so far.

P.S. (mostly as a note to self) i'm currently using the following commands to build

make compiler_cycle 'OPT=-dFPC_ARMEL' 2>&1 | tee ../buildlog
cp compiler/ppcarm /
make compiler_cycle PP=/ppcarm 'OPT=-dFPC_ARMEL -CfVFPV3_D16 -CpARMV7' 2>&1 | tee ../buildlog

Next step will be to modify the code in cpupara.pas to actually start passing values in vfp registers.
Index: rtl/arm/setjump.inc
===================================================================
--- rtl/arm/setjump.inc	(revision 20467)
+++ rtl/arm/setjump.inc	(working copy)
@@ -16,7 +16,7 @@
 
 function fpc_setjmp(var S : jmp_buf) : longint;assembler;[Public, alias : 'FPC_SETJMP'];nostackframe; compilerproc;
   asm
-    {$if defined(FPUVFPV2) or defined(FPUVFPV3)}
+    {$if defined(FPUVFPV2) or defined(FPUVFPV3) or defined(FPUVFPV3_D16)}
     {$if defined(CPUARMV3) or defined(CPUARMV4) or defined(CPUARMV5)}
     fstmiax r0!, {d8-d15}
     {$else}
@@ -46,7 +46,7 @@
     movs    r0, r1
     it eq
     moveq   r0, #1
-    {$if defined(FPUVFPV2) or defined(FPUVFPV3)}
+    {$if defined(FPUVFPV2) or defined(FPUVFPV3) or defined(FPUVFPV3_D16)}
     fldmiad ip!, {d8-d15}
     {$endif}
     ldmia   ip,{v1-v6, sl, fp}
@@ -57,7 +57,7 @@
     mov     ip, r0
     movs    r0, r1
     moveq   r0, #1
-    {$if defined(FPUVFPV2) or defined(FPUVFPV3)}
+    {$if defined(FPUVFPV2) or defined(FPUVFPV3) or defined(FPUVFPV3_D16)}
     {$if defined(CPUARMV3) or defined(CPUARMV4) or defined(CPUARMV5)}
     fldmiax ip!, {d8-d15}
     {$else}
Index: rtl/arm/math.inc
===================================================================
--- rtl/arm/math.inc	(revision 20467)
+++ rtl/arm/math.inc	(working copy)
@@ -14,7 +14,7 @@
 
  **********************************************************************}
 
-{$if defined(FPUFPA) or defined(FPUFPA10) or defined(FPUFPA11) or defined(FPUVFPV2) or defined(FPUVFPV3)}
+{$if defined(FPUFPA) or defined(FPUFPA10) or defined(FPUFPA11) or defined(FPUVFPV2) or defined(FPUVFPV3) or defined(FPUVFPV3_D16)}
     {$define FPC_SYSTEM_HAS_ABS}
     function fpc_abs_real(d : ValReal) : ValReal;compilerproc;
     begin
Index: rtl/arm/mathu.inc
===================================================================
--- rtl/arm/mathu.inc	(revision 20467)
+++ rtl/arm/mathu.inc	(working copy)
@@ -177,7 +177,7 @@
 begin
 end;
 
-{$elseif defined(darwin) or defined(FPUVFPV2) or defined(FPUVFPV3)}
+{$elseif defined(darwin) or defined(FPUVFPV2) or defined(FPUVFPV3) or defined(FPUVFPV3_d16)}
 
 const
   _VFP_ENABLE_IM  =  1 shl 8;         { invalid operation      }
Index: rtl/arm/arm.inc
===================================================================
--- rtl/arm/arm.inc	(revision 20467)
+++ rtl/arm/arm.inc	(working copy)
@@ -30,7 +30,7 @@
 {$if not(defined(wince)) and not(defined(gba)) and not(defined(nds)) and not(defined(FPUSOFT)) and not(defined(FPULIBGCC))}
 
 {$define FPC_SYSTEM_HAS_SYSINITFPU}
-{$if not defined(darwin) and not defined(FPUVFPV2) and not defined(FPUVFPV3)}
+{$if not defined(darwin) and not defined(FPUVFPV2) and not defined(FPUVFPV3) and not defined(FPUVFPV3_D16)}
 Procedure SysInitFPU;{$ifdef SYSTEMINLINE}inline;{$endif}
 begin
   { Enable FPU exceptions, but disable INEXACT, UNDERFLOW, DENORMAL }
Index: rtl/arm/setjumph.inc
===================================================================
--- rtl/arm/setjumph.inc	(revision 20467)
+++ rtl/arm/setjumph.inc	(working copy)
@@ -16,7 +16,7 @@
 
 type
    jmp_buf = packed record
-{$if defined(FPUVFPV2) or defined(FPUVFPV3)}
+{$if defined(FPUVFPV2) or defined(FPUVFPV3) or defined(FPUVFPV3_D16)}
       d8,d9,d10,d11,d12,d13,d14,d15: double;
 {$endif}
       v1,v2,v3,v4,v5,v6,sl,fp,sp,pc : dword;
Index: compiler/systems.pas
===================================================================
--- compiler/systems.pas	(revision 20467)
+++ compiler/systems.pas	(working copy)
@@ -84,7 +84,7 @@
           id          : tasm;
           idtxt       : string[12];
           asmbin      : string[8];
-          asmcmd      : string[50];
+          asmcmd      : string[100];
           supported_targets : set of tsystem;
           flags        : set of tasmflags;
           labelprefix : string[3];
Index: compiler/arm/narminl.pas
===================================================================
--- compiler/arm/narminl.pas	(revision 20467)
+++ compiler/arm/narminl.pas	(working copy)
@@ -89,7 +89,8 @@
                end;
             end;
           fpu_vfpv2,
-          fpu_vfpv3:
+          fpu_vfpv3,
+          fpu_vfpv3_d16:
             begin
               location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,true);
               location_copy(location,left.location);
@@ -118,7 +119,8 @@
               fpu_fpa11:
                 expectloc:=LOC_FPUREGISTER;
               fpu_vfpv2,
-              fpu_vfpv3:
+              fpu_vfpv3,
+              fpu_vfpv3_d16:
                 expectloc:=LOC_MMREGISTER;
               else
                 internalerror(2009112401);
@@ -140,7 +142,8 @@
               fpu_fpa11:
                 expectloc:=LOC_FPUREGISTER;
               fpu_vfpv2,
-              fpu_vfpv3:
+              fpu_vfpv3,
+              fpu_vfpv3_d16:
                 expectloc:=LOC_MMREGISTER;
               else
                 internalerror(2009112402);
@@ -162,7 +165,8 @@
               fpu_fpa11:
                 expectloc:=LOC_FPUREGISTER;
               fpu_vfpv2,
-              fpu_vfpv3:
+              fpu_vfpv3,
+              fpu_vfpv3_d16:
                 expectloc:=LOC_MMREGISTER;
               else
                 internalerror(2009112403);
@@ -213,7 +217,8 @@
           fpu_fpa11:
             current_asmdata.CurrAsmList.concat(setoppostfix(taicpu.op_reg_reg(A_ABS,location.register,left.location.register),get_fpu_postfix(resultdef)));
           fpu_vfpv2,
-          fpu_vfpv3:
+          fpu_vfpv3,
+          fpu_vfpv3_d16:
             begin
               if singleprec then
                 op:=A_FABSS
@@ -239,7 +244,8 @@
           fpu_fpa11:
             current_asmdata.CurrAsmList.concat(setoppostfix(taicpu.op_reg_reg_reg(A_MUF,location.register,left.location.register,left.location.register),get_fpu_postfix(resultdef)));
           fpu_vfpv2,
-          fpu_vfpv3:
+          fpu_vfpv3,
+          fpu_vfpv3_d16:
             begin
               if singleprec then
                 op:=A_FMULS
@@ -265,7 +271,8 @@
           fpu_fpa11:
             current_asmdata.CurrAsmList.concat(setoppostfix(taicpu.op_reg_reg(A_SQT,location.register,left.location.register),get_fpu_postfix(resultdef)));
           fpu_vfpv2,
-          fpu_vfpv3:
+          fpu_vfpv3,
+          fpu_vfpv3_d16:
             begin
               if singleprec then
                 op:=A_FSQRTS
Index: compiler/arm/cgcpu.pas
===================================================================
--- compiler/arm/cgcpu.pas	(revision 20467)
+++ compiler/arm/cgcpu.pas	(working copy)
@@ -226,7 +226,7 @@
           non-overlapping subregs per register, so we can only use
           half the single precision registers for now (as sub registers of the
           double precision ones). }
-        if current_settings.fputype=fpu_vfpv3 then
+        if (current_settings.fputype=fpu_vfpv3) then
           rg[R_MMREGISTER]:=trgcpu.create(R_MMREGISTER,R_SUBFD,
               [RS_D0,RS_D1,RS_D2,RS_D3,RS_D4,RS_D5,RS_D6,RS_D7,
                RS_D16,RS_D17,RS_D18,RS_D19,RS_D20,RS_D21,RS_D22,RS_D23,RS_D24,RS_D25,RS_D26,RS_D27,RS_D28,RS_D29,RS_D30,RS_D31,
@@ -1438,7 +1438,8 @@
                       end;
                 end;
               fpu_vfpv2,
-              fpu_vfpv3:
+              fpu_vfpv3,
+              fpu_vfpv3_d16:
                 begin;
                   mmregs:=rg[R_MMREGISTER].used_in_proc-paramanager.get_volatile_registers_mm(pocall_stdcall);
                 end;
@@ -1509,7 +1510,7 @@
              begin
                reference_reset(ref,4);
                if (tg.direction*tarmprocinfo(current_procinfo).floatregstart>=1023) or
-                  (current_settings.fputype in [fpu_vfpv2,fpu_vfpv3]) then
+                  (current_settings.fputype in [fpu_vfpv2,fpu_vfpv3,fpu_vfpv3_d16]) then
                  begin
                    if not is_shifter_const(tarmprocinfo(current_procinfo).floatregstart,shift) then
                      begin
@@ -1537,7 +1538,8 @@
                        lastfloatreg-firstfloatreg+1,ref));
                    end;
                  fpu_vfpv2,
-                 fpu_vfpv3:
+                 fpu_vfpv3,
+                 fpu_vfpv3_d16:
                    begin
                      ref.index:=ref.base;
                      ref.base:=NR_NO;
@@ -1591,7 +1593,8 @@
                       end;
                 end;
               fpu_vfpv2,
-              fpu_vfpv3:
+              fpu_vfpv3,
+              fpu_vfpv3_d16:
                 begin;
                   { restore vfp registers? }
                   mmregs:=rg[R_MMREGISTER].used_in_proc-paramanager.get_volatile_registers_mm(pocall_stdcall);
@@ -1603,7 +1606,7 @@
               begin
                 reference_reset(ref,4);
                 if (tg.direction*tarmprocinfo(current_procinfo).floatregstart>=1023) or
-                   (current_settings.fputype in [fpu_vfpv2,fpu_vfpv3]) then
+                   (current_settings.fputype in [fpu_vfpv2,fpu_vfpv3,fpu_vfpv3_d16]) then
                   begin
                     if not is_shifter_const(tarmprocinfo(current_procinfo).floatregstart,shift) then
                       begin
@@ -1630,7 +1633,8 @@
                         lastfloatreg-firstfloatreg+1,ref));
                     end;
                   fpu_vfpv2,
-                  fpu_vfpv3:
+                  fpu_vfpv3,
+                  fpu_vfpv3_d16:
                     begin
                       ref.index:=ref.base;
                       ref.base:=NR_NO;
Index: compiler/arm/narmcnv.pas
===================================================================
--- compiler/arm/narmcnv.pas	(revision 20467)
+++ compiler/arm/narmcnv.pas	(working copy)
@@ -116,7 +116,8 @@
               fpu_fpa11:
                 expectloc:=LOC_FPUREGISTER;
               fpu_vfpv2,
-              fpu_vfpv3:
+              fpu_vfpv3,
+              fpu_vfpv3_d16:
                 expectloc:=LOC_MMREGISTER;
               else
                 internalerror(2009112702);
@@ -195,7 +196,8 @@
               end;
             end;
           fpu_vfpv2,
-          fpu_vfpv3:
+          fpu_vfpv3,
+          fpu_vfpv3_d16:
             begin
               location_reset(location,LOC_MMREGISTER,def_cgsize(resultdef));
               signed:=left.location.size=OS_S32;
Index: compiler/arm/narmcal.pas
===================================================================
--- compiler/arm/narmcal.pas	(revision 20467)
+++ compiler/arm/narmcal.pas	(working copy)
@@ -47,7 +47,7 @@
     begin
       if (realresdef.typ=floatdef) and
          ((cs_fp_emulation in current_settings.moduleswitches) or
-          (current_settings.fputype in [fpu_vfpv2,fpu_vfpv3])) then
+          (current_settings.fputype in [fpu_vfpv2,fpu_vfpv3,fpu_vfpv3_d16])) then
         begin
           { keep the fpu values in integer registers for now, the code
             generator will move them to memory or an mmregister when necessary
Index: compiler/arm/agarmgas.pas
===================================================================
--- compiler/arm/agarmgas.pas	(revision 20467)
+++ compiler/arm/agarmgas.pas	(working copy)
@@ -80,7 +80,10 @@
         result:=inherited MakeCmdLine;
         if (current_settings.fputype = fpu_soft) then
           result:='-mfpu=softvfp '+result;
-
+        if (current_settings.fputype = fpu_vfpv3) then
+          result:='-mfpu=vfpv3 '+result;
+        if (current_settings.fputype = fpu_vfpv3_d16) then
+          result:='-mfpu=vfpv3-d16 '+result;
         if current_settings.cputype = cpu_armv7m then
           result:='-march=armv7m -mthumb -mthumb-interwork '+result;
       end;
@@ -300,7 +303,11 @@
 
             idtxt  : 'AS';
             asmbin : 'as';
-            asmcmd : '-o $OBJ $ASM';
+            {$ifdef FPC_ARMHF}
+              asmcmd : '-march=armv7-a -mfloat-abi=hard -meabi=5 -o $OBJ $ASM';
+            {$else}
+              asmcmd : '-o $OBJ $ASM';
+            {$endif}
             supported_targets : [system_arm_linux,system_arm_wince,system_arm_gba,system_arm_palmos,system_arm_nds,system_arm_embedded,system_arm_symbian];
             flags : [af_allowdirect,af_needar,af_smartlink_sections];
             labelprefix : '.L';
Index: compiler/arm/narmmat.pas
===================================================================
--- compiler/arm/narmmat.pas	(revision 20467)
+++ compiler/arm/narmmat.pas	(working copy)
@@ -331,7 +331,8 @@
                 cgsize2fpuoppostfix[def_cgsize(resultdef)]));
             end;
           fpu_vfpv2,
-          fpu_vfpv3:
+          fpu_vfpv3,
+          fpu_vfpv3_d16:
             begin
               location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,true);
               location:=left.location;
Index: compiler/arm/cpuinfo.pas
===================================================================
--- compiler/arm/cpuinfo.pas	(revision 20467)
+++ compiler/arm/cpuinfo.pas	(working copy)
@@ -55,7 +55,8 @@
       fpu_fpa10,
       fpu_fpa11,
       fpu_vfpv2,
-      fpu_vfpv3
+      fpu_vfpv3,
+      fpu_vfpv3_d16
      );
 
    tcontrollertype =
@@ -195,14 +196,15 @@
      'ARMV7M'
    );
 
-   fputypestr : array[tfputype] of string[6] = ('',
+   fputypestr : array[tfputype] of string[9] = ('',
      'SOFT',
      'LIBGCC',
      'FPA',
      'FPA10',
      'FPA11',
      'VFPV2',
-     'VFPV3'
+     'VFPV3',
+     'VFPV3_D16'
    );
 
 
@@ -1013,7 +1015,7 @@
         )
     );
 
-   vfp_scalar = [fpu_vfpv2,fpu_vfpv3];
+   vfp_scalar = [fpu_vfpv2,fpu_vfpv3,fpu_vfpv3_d16];
 
    { Supported optimizations, only used for information }
    supported_optimizerswitches = genericlevel1optimizerswitches+
Index: compiler/arm/narmadd.pas
===================================================================
--- compiler/arm/narmadd.pas	(revision 20467)
+++ compiler/arm/narmadd.pas	(working copy)
@@ -164,7 +164,8 @@
                  cgsize2fpuoppostfix[def_cgsize(resultdef)]));
             end;
           fpu_vfpv2,
-          fpu_vfpv3:
+          fpu_vfpv3,
+          fpu_vfpv3_d16:
             begin
               { force mmreg as location, left right doesn't matter
                 as both will be in a fpureg }
@@ -248,7 +249,8 @@
                    cgsize2fpuoppostfix[def_cgsize(resultdef)]));
             end;
           fpu_vfpv2,
-          fpu_vfpv3:
+          fpu_vfpv3,
+          fpu_vfpv3_d16:
             begin
               location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,true);
               location_force_mmregscalar(current_asmdata.CurrAsmList,right.location,true);
Index: compiler/arm/cpupi.pas
===================================================================
--- compiler/arm/cpupi.pas	(revision 20467)
+++ compiler/arm/cpupi.pas	(working copy)
@@ -106,7 +106,8 @@
                 floatsavesize:=(lastfloatreg-firstfloatreg+1)*12;
             end;
           fpu_vfpv2,
-          fpu_vfpv3:
+          fpu_vfpv3,
+          fpu_vfpv3_d16:
             begin
               floatsavesize:=0;
               regs:=cg.rg[R_MMREGISTER].used_in_proc-paramanager.get_volatile_registers_mm(pocall_stdcall);
Index: compiler/arm/cpupara.pas
===================================================================
--- compiler/arm/cpupara.pas	(revision 20467)
+++ compiler/arm/cpupara.pas	(working copy)
@@ -121,7 +121,7 @@
             floatdef:
               if (calloption in [pocall_cdecl,pocall_cppdecl,pocall_softfloat]) or
                  (cs_fp_emulation in current_settings.moduleswitches) or
-                 (current_settings.fputype in [fpu_vfpv2,fpu_vfpv3]) then
+                 (current_settings.fputype in [fpu_vfpv2,fpu_vfpv3,fpu_vfpv3_d16]) then
                 { the ARM eabi also allows passing VFP values via VFP registers,
                   but at least neither Mac OS X nor Linux seems to do that }
                 getparaloc:=LOC_REGISTER
@@ -501,7 +501,7 @@
           begin
             if (p.proccalloption in [pocall_softfloat]) or
                (cs_fp_emulation in current_settings.moduleswitches) or
-               (current_settings.fputype in [fpu_vfpv2,fpu_vfpv3]) then
+               (current_settings.fputype in [fpu_vfpv2,fpu_vfpv3,fpu_vfpv3_d16]) then
               begin
                 case retcgsize of
                   OS_64,

Reply to: