[Date Prev][Date Next] [Thread Prev][Thread Next] [Date Index] [Thread Index]

Bug#647288: marked as done (libffi: Simplify PowerPC assembly and avoid CPU-specific string instructions)



Your message dated Sat, 07 Apr 2012 13:25:02 +0200
with message-id <4F80240E.7060006@debian.org>
and subject line Fixed in 3.0.11~rc3
has caused the Debian Bug report #647288,
regarding libffi: Simplify PowerPC assembly and avoid CPU-specific string instructions
to be marked as done.

This means that you claim that the problem has been dealt with.
If this is not the case it is now your responsibility to reopen the
Bug report if necessary, and/or fix the problem forthwith.

(NB: If you are a system administrator and have no idea what this
message is talking about, this may indicate a serious mail system
misconfiguration somewhere. Please contact owner@bugs.debian.org
immediately.)


-- 
647288: http://bugs.debian.org/cgi-bin/bugreport.cgi?bug=647288
Debian Bug Tracking System
Contact owner@bugs.debian.org with problems
--- Begin Message ---
Source: libffi
Version: 3.0.10-3
Severity: wishlist
Tags: upstream patch

After upgrading to a new version of GNU ld for PowerPC e500, I started
seeing build errors on e500 systems again.  It turns out that the
PowerPC "string instructions" are unimplemented on PPC440 and most other
embedded cores, and also cause unexpectedly high instruction latencies
and pipeline stalls even on POWER processors.

This historically worked in the past because the unknown string opcodes
are trapped on PPC440 and similar systems and emulated in the kernel,
though that is obviously very inefficient and undesirable.

Since the struct-copy code doesn't really need to be implemented in
assembly, this patch ensures that there is always enough space to store
both r3 and r4 and then uses C to extract the 1-8 byte "small struct"
into the user-provided memory.

Even with all the big new comments, it's still removes 11 lines of code,
and the ASM is much simpler and easier to understand now.

Please consider applying.

Cheers,
Kyle Moffett

--
Curious about my work on the Debian powerpcspe port?
I'm keeping a blog here: http://pureperl.blogspot.com/

-- System Information:
Debian Release: 6.0.2
  APT prefers stable
  APT policy: (1010, 'stable'), (700, 'testing'), (600, 'unstable'), (500, 'stable-updates'), (500, 'experimental')
Architecture: amd64 (x86_64)

Kernel: Linux 2.6.32-5-amd64 (SMP w/4 CPU cores)
Locale: LANG=en_US.UTF-8, LC_CTYPE=en_US.UTF-8 (charmap=UTF-8)
Shell: /bin/sh linked to /bin/bash
diff -ru libffi-3.0.10/src/powerpc/ffi.c libffi-3.0.10.new/src/powerpc/ffi.c
--- libffi-3.0.10/src/powerpc/ffi.c	2011-11-01 11:22:38.000000000 -0400
+++ libffi-3.0.10.new/src/powerpc/ffi.c	2011-11-01 11:22:27.000000000 -0400
@@ -46,11 +46,6 @@
   FLAG_RETURNS_64BITS   = 1 << (31-28),
 
   FLAG_RETURNS_128BITS  = 1 << (31-27), /* cr6  */
-  FLAG_SYSV_SMST_R4     = 1 << (31-26), /* use r4 for FFI_SYSV 8 byte
-					   structs.  */
-  FLAG_SYSV_SMST_R3     = 1 << (31-25), /* use r3 for FFI_SYSV 4 byte
-					   structs.  */
-  /* Bits (31-24) through (31-19) store shift value for SMST */
 
   FLAG_ARG_NEEDS_COPY   = 1 << (31- 7),
 #ifndef __NO_FPRS__
@@ -688,37 +683,22 @@
       break;
 
     case FFI_TYPE_STRUCT:
-      if (cif->abi == FFI_SYSV)
-	{
-	  /* The final SYSV ABI says that structures smaller or equal 8 bytes
-	     are returned in r3/r4. The FFI_GCC_SYSV ABI instead returns them
-	     in memory.  */
+	/*
+	 * The final SYSV ABI says that structures smaller or equal 8 bytes
+	 * are returned in r3/r4. The FFI_GCC_SYSV ABI instead returns them
+	 * in memory.
+	 *
+	 * NOTE: The assembly code can safely assume that it just needs to
+	 *       store both r3 and r4 into a 8-byte word-aligned buffer, as
+	 *       we allocate a temporary buffer in ffi_call() if this flag is
+	 *       set.
+	 */
+	if (cif->abi == FFI_SYSV && size <= 8)
+		flags |= FLAG_RETURNS_SMST;
 
-	  /* Treat structs with size <= 8 bytes.  */
-	  if (size <= 8)
-	    {
-	      flags |= FLAG_RETURNS_SMST;
-	      /* These structs are returned in r3. We pack the type and the
-		 precalculated shift value (needed in the sysv.S) into flags.
-		 The same applies for the structs returned in r3/r4.  */
-	      if (size <= 4)
-		{
-		  flags |= FLAG_SYSV_SMST_R3;
-		  flags |= 8 * (4 - size) << 8;
-		  break;
-		}
-	      /* These structs are returned in r3 and r4. See above.   */
-	      if  (size <= 8)
-		{
-		  flags |= FLAG_SYSV_SMST_R3 | FLAG_SYSV_SMST_R4;
-		  flags |= 8 * (8 - size) << 8;
-		  break;
-		}
-	    }
-	}
-      intarg_count++;
-      flags |= FLAG_RETVAL_REFERENCE;
-      /* Fall through.  */
+	intarg_count++;
+	flags |= FLAG_RETVAL_REFERENCE;
+	/* Fall through.  */
     case FFI_TYPE_VOID:
       flags |= FLAG_RETURNS_NOTHING;
       break;
@@ -932,21 +912,30 @@
 void
 ffi_call(ffi_cif *cif, void (*fn)(void), void *rvalue, void **avalue)
 {
+  /*
+   * The final SYSV ABI says that structures smaller or equal 8 bytes
+   * are returned in r3/r4. The FFI_GCC_SYSV ABI instead returns them
+   * in memory.
+   *
+   * Just to keep things simple for the assembly code, we will always
+   * bounce-buffer struct return values less than or equal to 8 bytes.
+   * This allows the ASM to handle SYSV small structures by directly
+   * writing r3 and r4 to memory without worrying about struct size.
+   */
+  unsigned int smst_buffer[2];
   extended_cif ecif;
 
   ecif.cif = cif;
   ecif.avalue = avalue;
 
-  /* If the return value is a struct and we don't have a return	*/
-  /* value address then we need to make one		        */
-
-  if ((rvalue == NULL) && (cif->rtype->type == FFI_TYPE_STRUCT))
-    {
-      ecif.rvalue = alloca(cif->rtype->size);
-    }
-  else
-    ecif.rvalue = rvalue;
-
+  /* Ensure that we have a valid struct return value */
+  ecif.rvalue = rvalue;
+  if (cif->rtype->type == FFI_TYPE_STRUCT) {
+	if (cif->rtype->size <= 8)
+		ecif.rvalue = smst_buffer;
+	else if (!rvalue)
+		ecif.rvalue = alloca(cif->rtype->size);
+  }
 
   switch (cif->abi)
     {
@@ -968,6 +957,10 @@
       FFI_ASSERT (0);
       break;
     }
+
+  /* Check for a bounce-buffered return value */
+  if (rvalue && ecif.rvalue == smst_buffer)
+	memcpy(rvalue, smst_buffer, cif->rtype->size);
 }
 
 
diff -ru libffi-3.0.10/src/powerpc/sysv.S libffi-3.0.10.new/src/powerpc/sysv.S
--- libffi-3.0.10/src/powerpc/sysv.S	2011-11-01 11:22:38.000000000 -0400
+++ libffi-3.0.10.new/src/powerpc/sysv.S	2011-11-01 11:19:36.000000000 -0400
@@ -142,19 +142,14 @@
 #endif
 
 L(small_struct_return_value):
-	extrwi	%r6,%r31,2,19         /* number of bytes padding = shift/8 */
-	mtcrf	0x02,%r31	      /* copy flags to cr[24:27] (cr6) */
-	extrwi	%r5,%r31,5,19         /* r5 <- number of bits of padding */
-	subfic  %r6,%r6,4             /* r6 <- number of useful bytes in r3 */
-	bf-	25,L(done_return_value) /* struct in r3 ? if not, done. */
-/* smst_one_register: */
-	slw	%r3,%r3,%r5           /* Left-justify value in r3 */
-	mtxer	%r6                   /* move byte count to XER ... */
-	stswx	%r3,0,%r30            /* ... and store that many bytes */
-	bf+	26,L(done_return_value)  /* struct in r3:r4 ? */
-	add	%r6,%r6,%r30          /* adjust pointer */
-	stswi	%r4,%r6,4             /* store last four bytes */
-	b	L(done_return_value)
+	/*
+	 * The C code always allocates a properly-aligned 8-byte bounce
+	 * buffer to make this assembly code very simple.  Just write out
+	 * r3 and r4 to the buffer to allow the C code to handle the rest.
+	 */
+	stw %r3, 0(%r30)
+	stw %r4, 4(%r30)
+	b L(done_return_value)
 
 .LFE1:
 END(ffi_call_SYSV)

--- End Message ---
--- Begin Message ---
Version: 3.0.11~rc3-1


--- End Message ---

Reply to: