[Date Prev][Date Next] [Thread Prev][Thread Next] [Date Index] [Thread Index]

Re: Call for help: altivec-enabled vlc



Duh....

Forgot to actually attach the patch. Damn...

On  26 Aug, this message from To: linuxppc-dev@lists.linuxppc.org echoed through cyberspace:
> I've sat down a bit ;-) and came up with an Altivec-optimised IDCT
> implementation in vlc (well, I integrated Motorla's Altivec IDCT).
> 
> This is in fact the same code that already exists in vlc for MacOS X,
> but it uses the Motorola-published assembler code (you can find it on
> their site).

Cheers

Michel

-------------------------------------------------------------------------
Michel Lanners                 |  " Read Philosophy.  Study Art.
23, Rue Paul Henkes            |    Ask Questions.  Make Mistakes.
L-1710 Luxembourg              |
email   mlan@cpu.lu            |
http://www.cpu.lu/~mlan        |                     Learn Always. "
diff -uNr vlc-0.2.82/Makefile vlc-0.2.82-altivec/Makefile
--- vlc-0.2.82/Makefile	Tue Aug  7 12:55:49 2001
+++ vlc-0.2.82-altivec/Makefile	Sun Aug 26 10:04:20 2001
@@ -18,7 +18,7 @@
 #
 # All possible plugin objects
 #
-PLUGINS_TARGETS := alsa/alsa beos/beos darwin/darwin directx/directx dsp/dsp dummy/dummy dummy/null dvd/dvd esd/esd fb/fb ggi/ggi glide/glide gtk/gnome gtk/gtk downmix/downmix downmix/downmixsse downmix/downmix3dn idct/idct idct/idctclassic idct/idctmmx idct/idctmmxext imdct/imdct imdct/imdct3dn imdct/imdctsse kde/kde macosx/macosx mga/mga motion/motion motion/motionmmx motion/motionmmxext mpeg/es mpeg/ps mpeg/ts qt/qt sdl/sdl text/ncurses text/rc x11/x11 x11/xvideo yuv/yuv yuv/yuvmmx
+PLUGINS_TARGETS := alsa/alsa beos/beos darwin/darwin directx/directx dsp/dsp dummy/dummy dummy/null dvd/dvd esd/esd fb/fb ggi/ggi glide/glide gtk/gnome gtk/gtk downmix/downmix downmix/downmixsse downmix/downmix3dn idct/idct idct/idctclassic idct/idctmmx idct/idctmmxext idct/idctaltivec imdct/imdct imdct/imdct3dn imdct/imdctsse kde/kde macosx/macosx mga/mga motion/motion motion/motionmmx motion/motionmmxext mpeg/es mpeg/ps mpeg/ts qt/qt sdl/sdl text/ncurses text/rc x11/x11 x11/xvideo yuv/yuv yuv/yuvmmx
 
 #
 # C Objects
diff -uNr vlc-0.2.82/Makefile.opts.in vlc-0.2.82-altivec/Makefile.opts.in
--- vlc-0.2.82/Makefile.opts.in	Tue Aug  7 12:55:49 2001
+++ vlc-0.2.82-altivec/Makefile.opts.in	Sun Aug 26 15:12:53 2001
@@ -45,7 +45,7 @@
 # Build environment
 # 
 CC = @CC@
-CFLAGS = @CFLAGS@
+CFLAGS = -Wa,-m7400 @CFLAGS@
 SHELL = @SHELL@
 RANLIB = @RANLIB@
 WINDRES = @WINDRES@
diff -uNr vlc-0.2.82/configure vlc-0.2.82-altivec/configure
--- vlc-0.2.82/configure	Tue Aug  7 12:55:49 2001
+++ vlc-0.2.82-altivec/configure	Sun Aug 26 10:06:10 2001
@@ -3675,7 +3675,8 @@
   enableval="$enable_altivec"
    if test x$enableval = xyes; then ARCH="${ARCH} altivec"
     BUILTINS="${BUILTINS} idctaltivec"
-    LIB_IDCTALTIVEC="-framework vecLib"
+#    LIB_IDCTALTIVEC="-framework vecLib"
+    LIB_IDCTALTIVEC=""
   fi 
 fi
 
diff -uNr vlc-0.2.82/plugins/idct/idctaltivec.c vlc-0.2.82-altivec/plugins/idct/idctaltivec.c
--- vlc-0.2.82/plugins/idct/idctaltivec.c	Tue Aug  7 12:55:49 2001
+++ vlc-0.2.82-altivec/plugins/idct/idctaltivec.c	Sun Aug 26 16:18:39 2001
@@ -47,7 +47,9 @@
 #include "vdec_block.h"
 #include "vdec_idct.h"
 
-#include "idctaltivec.h"
+//#include "idctaltivec.h"
+//extern void IDCT(short *input, short *output);
+#include "idctaltivecasm.h"
 
 #include "modules_export.h"
 
@@ -115,7 +117,8 @@
     }
 
     /* The Altivec iDCT is deactivated until it really works */
-    return( 0 /* 200 */ );
+    //return( 0 /* 200 */ );
+    return( 200 );
 }
 
 /*****************************************************************************
@@ -130,6 +133,8 @@
  *****************************************************************************/
 void _M( vdec_IDCT )( void * p_idct_data, dctelem_t * p_block, int i_idontcare )
 {
+//	fprintf(stderr, "p_block alignment: 0x%p\n", p_block);
     IDCT( p_block, p_block );
+//     fprintf(stderr, "p_block alignment: 0x%p\n", p_block);
 }
 
diff -uNr vlc-0.2.82/plugins/idct/idctaltivecasm.h vlc-0.2.82-altivec/plugins/idct/idctaltivecasm.h
--- vlc-0.2.82/plugins/idct/idctaltivecasm.h	Thu Jan  1 01:00:00 1970
+++ vlc-0.2.82-altivec/plugins/idct/idctaltivecasm.h	Sun Aug 26 16:18:54 2001
@@ -0,0 +1,190 @@
+// IDCT ASM function from Motorola
+
+/***************************************************************
+ *
+ * Copyright:   (c) Copyright Motorola Inc. 1998
+ *
+ * Date:        April 17, 1998
+ *
+ * Function:    IDCT
+ *
+ * Description: Scaled Chen (III) algorithm for IDCT
+ *              Arithmetic is 16-bit fixed point.
+ *
+ * Inputs:      input - Pointer to input data (short), which
+ *                      must be between -2048 to +2047.
+ *                      It is assumed that the allocated array
+ *                      has been 128-bit aligned and contains
+ *                      8x8 short elements.
+ *
+ * Outputs:     output - Pointer to output area for the transfored
+ *                       data. The output values are between -255
+ *                       and 255 . It is assumed that a 128-bit
+ *                       aligned 8x8 array of short has been
+ *                       pre-allocated.
+ *
+ * Return:      None
+ *
+ ***************************************************************/
+
+signed short SpecialConstants[8] __attribute__ ((aligned (16))) = {
+			23170, 13573, 6518, 21895, -23170, -21895, 0, 0 };
+
+signed short PreScale[64] __attribute__ ((aligned (16))) = {
+			4095, 5681, 5351, 4816, 4095, 4816, 5351, 5681,
+			5681, 7880, 7422, 6680, 5681, 6680, 7422, 7880,
+			5351, 7422, 6992, 6292, 5351, 6292, 6992, 7422,
+			4816, 6680, 6292, 5663, 4816, 5663, 6292, 6680,
+			4095, 5681, 5351, 4816, 4095, 4816, 5351, 5681,
+			4816, 6680, 6292, 5663, 4816, 5663, 6292, 6680,
+			5351, 7422, 6992, 6292, 5351, 6292, 6992, 7422,
+			5681, 7880, 7422, 6680, 5681, 6680, 7422, 7880 };
+
+static __inline__ void IDCT(short *input, short *output) {
+
+
+	void * sc;
+	void * ps;
+
+//	fprintf(stderr, "Addresses: %p, %p, %p, %p\n", SpecialConstants,
+//			&SpecialConstants[0], PreScale, &PreScale[0]);
+	asm ( "
+		lwz     12,0(%5)
+		addi    11,0,16
+		lwz     10,0(%4)
+		mr      %0,10
+		mr      %1,12
+		addi    9,0,32
+		lvx     0,%3,11
+		addi    7,0,48
+		lvx     1,12,11
+		vspltisw        2,0
+		lvx     3,0,10
+		addi    10,0,80
+		lvx     4,%3,9
+		vmhraddshs      5,0,1,2
+		lvx     6,12,9
+		addi    6,0,112
+		lvx     7,%3,7
+		vsplth  8,3,0x2
+		lvx     9,12,7
+		vmhraddshs      10,4,6,2
+		lvx     11,%3,10
+		vsplth  12,3,0x1
+		lvx     13,12,10
+		vsplth  14,3,0x3
+		lvx     15,%3,6
+		vmhraddshs      16,7,9,2
+		lvx     17,0,%3
+		vsplth  18,3,0x5
+		lvx     19,12,6
+		addi    5,0,64
+		lvx     1,0,12
+		vmhraddshs      0,11,13,2
+		addi    8,0,96
+		lvx     6,%3,5
+		lvx     4,12,5
+		vmhraddshs      9,15,19,2
+		vmhraddshs      7,17,1,2
+		lvx     13,%3,8
+		lvx     11,12,8
+		vmhraddshs      19,6,4,2
+		vmhraddshs      15,8,5,2
+		vsplth  1,3,0x0
+		vmhraddshs      17,13,11,2
+		vsplth  4,3,0x4
+		vmhraddshs      6,8,9,5
+		vmhraddshs      11,14,0,16
+		vmhraddshs      13,18,16,0
+		vmhraddshs      3,12,10,2
+		vsubshs 5,15,9
+		vsubshs 0,7,19
+		vsubshs 16,3,17
+		vmhraddshs      15,12,17,10
+		vsubshs 9,5,13
+		vsubshs 3,6,11
+		vaddshs 17,7,19
+		vaddshs 10,0,16
+		vsubshs 19,0,16
+		vsubshs 7,3,9
+		vaddshs 16,3,9
+		vaddshs 0,5,13
+		vmhraddshs      3,1,16,10
+		vaddshs 9,6,11
+		vmhraddshs      5,4,16,10
+		vaddshs 13,17,15
+		vmhraddshs      11,1,7,19
+		vsubshs 6,17,15
+		vmhraddshs      16,4,7,19
+		vaddshs 10,13,9
+		vmrghh  17,11,5
+		vsubshs 15,13,9
+		vmrglh  7,11,5
+		vaddshs 19,6,0
+		vmrghh  13,3,16
+		vsubshs 9,6,0
+		vmrghh  11,19,15
+		vmrghh  5,10,9
+		vmrglh  6,10,9
+		vmrglh  0,3,16
+		vmrglh  9,19,15
+		vmrghh  10,5,17
+		vmrghh  16,13,11
+		vmrglh  3,5,17
+		vmrghh  19,6,7
+		vmrglh  15,6,7
+		vmrglh  5,13,11
+		vmrghh  17,0,9
+		vmrglh  6,0,9
+		vmrglh  7,10,16
+		vmrghh  11,3,5
+		vmhraddshs      13,8,7,2
+		vmrglh  9,3,5
+		vmhraddshs      0,12,11,2
+		vmrglh  5,19,17
+		vmrglh  3,15,6
+		vmhraddshs      2,14,5,9
+		vmhraddshs      14,8,3,7
+		vmrghh  7,10,16
+		vmhraddshs      8,18,9,5
+		vmrghh  16,19,17
+		vmrghh  10,15,6
+		vsubshs 5,13,3
+		vsubshs 9,7,16
+		vsubshs 18,0,10
+		vmhraddshs      17,12,10,11
+		vsubshs 19,5,8
+		vsubshs 6,14,2
+		vaddshs 15,7,16
+		vaddshs 3,9,18
+		vsubshs 13,9,18
+		vsubshs 0,6,19
+		vaddshs 10,6,19
+		vaddshs 11,5,8
+		vmhraddshs	12,1,10,3
+		vaddshs		16,14,2
+		vmhraddshs	7,4,10,3
+		vaddshs		18,15,17
+		vmhraddshs	9,1,0,13
+		vsubshs		6,15,17
+		vmhraddshs	19,4,0,13
+		vaddshs		5,18,16
+		stvx		5,0,%2
+		vsubshs		8,18,16
+		stvx		12,%2,11
+		vaddshs		14,6,11
+		stvx		9,%2,9
+		vsubshs		2,6,11
+		stvx	14,%2,7
+		stvx	2,%2,5
+		stvx	19,%2,10
+		stvx	7,%2,8
+		stvx	8,%2,6
+		"
+		: "=r" (sc), "=r" (ps)
+		: "r" (output), "r" (input), "r" (SpecialConstants), "r" (PreScale)
+		: "cc", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12" );
+	/* End asm */
+
+//	fprintf(stderr, "Addresses now: %p, %p\n", sc, ps);
+}
diff -uNr vlc-0.2.82/src/interface/main.c vlc-0.2.82-altivec/src/interface/main.c
--- vlc-0.2.82/src/interface/main.c	Tue Aug  7 12:55:48 2001
+++ vlc-0.2.82-altivec/src/interface/main.c	Sun Aug 26 13:26:16 2001
@@ -1031,6 +1031,7 @@
 {
     volatile int i_capabilities = CPU_CAPABILITY_NONE;
 
+        i_capabilities |= CPU_CAPABILITY_ALTIVEC;
 #if defined( SYS_BEOS )
     i_capabilities |= CPU_CAPABILITY_486
                       | CPU_CAPABILITY_586

Reply to: