Bug#203835: [powerpc] gcc: ice on due to unrecognizable insn when -O specified
Package: gcc-3.3
Version: 1:3.3.1-0rc2
Priority: serious
Compiling fftw3 3.0.1 on powerpc fails since gcc updated to version -0rc2
(essentially the same package of fftw3 compiled using -0rc1).
To make the bug easier to reproduce, I created a .i file of the
offending file. Compiling with optimization (the file compiles when
optimization is turned off) results in:
treacy@voltaire:~/fftw3-3.0.1/dft/simd/codelets$ gcc -O -maltivec -mabi=altivec -c n1fv_9.i
n1fv_9.c: In function `n1fv_9':
n1fv_9.c:128: error: unrecognizable insn:
(insn 2983 1654 1655 2 (nil) (set (reg:V4SF 77 v0)
(const_vector:V4SF [
(const_double:SF 9.3969261646270751953125e-1 [0x0.f08fb2p+0])
(const_double:SF 9.3969261646270751953125e-1 [0x0.f08fb2p+0])
(const_double:SF 9.3969261646270751953125e-1 [0x0.f08fb2p+0])
(const_double:SF 9.3969261646270751953125e-1 [0x0.f08fb2p+0])
])) -1 (nil)
(nil))
n1fv_9.c:128: internal compiler error: in extract_insn, at recog.c:2175
The file n1fv_9.i is attached.
--
James (Jay) Treacy
treacy@debian.org
# 1 "n1fv_9.c"
# 1 "<built-in>"
# 1 "<command line>"
# 1 "n1fv_9.c"
# 24 "n1fv_9.c"
# 1 "../../../dft/codelet-dft.h" 1
# 31 "../../../dft/codelet-dft.h"
# 1 "../../../kernel/ifftw.h" 1
# 27 "../../../kernel/ifftw.h"
# 1 "../../../config.h" 1
# 28 "../../../kernel/ifftw.h" 2
# 1 "/usr/include/stdlib.h" 1 3 4
# 25 "/usr/include/stdlib.h" 3 4
# 1 "/usr/include/features.h" 1 3 4
# 291 "/usr/include/features.h" 3 4
# 1 "/usr/include/sys/cdefs.h" 1 3 4
# 292 "/usr/include/features.h" 2 3 4
# 314 "/usr/include/features.h" 3 4
# 1 "/usr/include/gnu/stubs.h" 1 3 4
# 315 "/usr/include/features.h" 2 3 4
# 26 "/usr/include/stdlib.h" 2 3 4
# 1 "/usr/lib/gcc-lib/powerpc-linux/3.3.1/include/stddef.h" 1 3 4
# 213 "/usr/lib/gcc-lib/powerpc-linux/3.3.1/include/stddef.h" 3 4
typedef unsigned int size_t;
# 325 "/usr/lib/gcc-lib/powerpc-linux/3.3.1/include/stddef.h" 3 4
typedef long int wchar_t;
# 34 "/usr/include/stdlib.h" 2 3 4
# 93 "/usr/include/stdlib.h" 3 4
typedef struct
{
int quot;
int rem;
} div_t;
typedef struct
{
long int quot;
long int rem;
} ldiv_t;
# 137 "/usr/include/stdlib.h" 3 4
extern size_t __ctype_get_mb_cur_max (void) ;
extern double atof (__const char *__nptr) __attribute__ ((__pure__));
extern int atoi (__const char *__nptr) __attribute__ ((__pure__));
extern long int atol (__const char *__nptr) __attribute__ ((__pure__));
__extension__ extern long long int atoll (__const char *__nptr)
__attribute__ ((__pure__));
extern double strtod (__const char *__restrict __nptr,
char **__restrict __endptr) ;
# 174 "/usr/include/stdlib.h" 3 4
extern long int strtol (__const char *__restrict __nptr,
char **__restrict __endptr, int __base) ;
extern unsigned long int strtoul (__const char *__restrict __nptr,
char **__restrict __endptr, int __base)
;
__extension__
extern long long int strtoq (__const char *__restrict __nptr,
char **__restrict __endptr, int __base) ;
__extension__
extern unsigned long long int strtouq (__const char *__restrict __nptr,
char **__restrict __endptr, int __base)
;
__extension__
extern long long int strtoll (__const char *__restrict __nptr,
char **__restrict __endptr, int __base) ;
__extension__
extern unsigned long long int strtoull (__const char *__restrict __nptr,
char **__restrict __endptr, int __base)
;
# 264 "/usr/include/stdlib.h" 3 4
extern double __strtod_internal (__const char *__restrict __nptr,
char **__restrict __endptr, int __group)
;
extern float __strtof_internal (__const char *__restrict __nptr,
char **__restrict __endptr, int __group)
;
extern long double __strtold_internal (__const char *__restrict __nptr,
char **__restrict __endptr,
int __group) ;
extern long int __strtol_internal (__const char *__restrict __nptr,
char **__restrict __endptr,
int __base, int __group) ;
extern unsigned long int __strtoul_internal (__const char *__restrict __nptr,
char **__restrict __endptr,
int __base, int __group) ;
__extension__
extern long long int __strtoll_internal (__const char *__restrict __nptr,
char **__restrict __endptr,
int __base, int __group) ;
__extension__
extern unsigned long long int __strtoull_internal (__const char *
__restrict __nptr,
char **__restrict __endptr,
int __base, int __group)
;
extern __inline double
strtod (__const char *__restrict __nptr, char **__restrict __endptr)
{
return __strtod_internal (__nptr, __endptr, 0);
}
extern __inline long int
strtol (__const char *__restrict __nptr, char **__restrict __endptr,
int __base)
{
return __strtol_internal (__nptr, __endptr, __base, 0);
}
extern __inline unsigned long int
strtoul (__const char *__restrict __nptr, char **__restrict __endptr,
int __base)
{
return __strtoul_internal (__nptr, __endptr, __base, 0);
}
# 343 "/usr/include/stdlib.h" 3 4
__extension__ extern __inline long long int
strtoq (__const char *__restrict __nptr, char **__restrict __endptr,
int __base)
{
return __strtoll_internal (__nptr, __endptr, __base, 0);
}
__extension__ extern __inline unsigned long long int
strtouq (__const char *__restrict __nptr, char **__restrict __endptr,
int __base)
{
return __strtoull_internal (__nptr, __endptr, __base, 0);
}
__extension__ extern __inline long long int
strtoll (__const char *__restrict __nptr, char **__restrict __endptr,
int __base)
{
return __strtoll_internal (__nptr, __endptr, __base, 0);
}
__extension__ extern __inline unsigned long long int
strtoull (__const char * __restrict __nptr, char **__restrict __endptr,
int __base)
{
return __strtoull_internal (__nptr, __endptr, __base, 0);
}
extern __inline double
atof (__const char *__nptr)
{
return strtod (__nptr, (char **) ((void *)0));
}
extern __inline int
atoi (__const char *__nptr)
{
return (int) strtol (__nptr, (char **) ((void *)0), 10);
}
extern __inline long int
atol (__const char *__nptr)
{
return strtol (__nptr, (char **) ((void *)0), 10);
}
__extension__ extern __inline long long int
atoll (__const char *__nptr)
{
return strtoll (__nptr, (char **) ((void *)0), 10);
}
# 408 "/usr/include/stdlib.h" 3 4
extern char *l64a (long int __n) ;
extern long int a64l (__const char *__s) __attribute__ ((__pure__));
# 1 "/usr/include/sys/types.h" 1 3 4
# 29 "/usr/include/sys/types.h" 3 4
# 1 "/usr/include/bits/types.h" 1 3 4
# 28 "/usr/include/bits/types.h" 3 4
# 1 "/usr/include/bits/wordsize.h" 1 3 4
# 29 "/usr/include/bits/types.h" 2 3 4
# 1 "/usr/lib/gcc-lib/powerpc-linux/3.3.1/include/stddef.h" 1 3 4
# 32 "/usr/include/bits/types.h" 2 3 4
typedef unsigned char __u_char;
typedef unsigned short int __u_short;
typedef unsigned int __u_int;
typedef unsigned long int __u_long;
typedef signed char __int8_t;
typedef unsigned char __uint8_t;
typedef signed short int __int16_t;
typedef unsigned short int __uint16_t;
typedef signed int __int32_t;
typedef unsigned int __uint32_t;
__extension__ typedef signed long long int __int64_t;
__extension__ typedef unsigned long long int __uint64_t;
__extension__ typedef long long int __quad_t;
__extension__ typedef unsigned long long int __u_quad_t;
# 128 "/usr/include/bits/types.h" 3 4
# 1 "/usr/include/bits/typesizes.h" 1 3 4
# 129 "/usr/include/bits/types.h" 2 3 4
typedef unsigned long long int __dev_t;
typedef unsigned int __uid_t;
typedef unsigned int __gid_t;
typedef unsigned long int __ino_t;
typedef unsigned long long int __ino64_t;
typedef unsigned int __mode_t;
typedef unsigned int __nlink_t;
typedef long int __off_t;
typedef long long int __off64_t;
typedef int __pid_t;
typedef struct { int __val[2]; } __fsid_t;
typedef long int __clock_t;
typedef unsigned long int __rlim_t;
typedef unsigned long long int __rlim64_t;
typedef unsigned int __id_t;
typedef long int __time_t;
typedef unsigned int __useconds_t;
typedef long int __suseconds_t;
typedef int __daddr_t;
typedef long int __swblk_t;
typedef int __key_t;
typedef int __clockid_t;
typedef int __timer_t;
typedef long int __blksize_t;
typedef long int __blkcnt_t;
typedef long long int __blkcnt64_t;
typedef unsigned long int __fsblkcnt_t;
typedef unsigned long long int __fsblkcnt64_t;
typedef unsigned long int __fsfilcnt_t;
typedef unsigned long long int __fsfilcnt64_t;
typedef int __ssize_t;
typedef __off64_t __loff_t;
typedef __quad_t *__qaddr_t;
typedef char *__caddr_t;
typedef int __intptr_t;
typedef unsigned int __socklen_t;
# 32 "/usr/include/sys/types.h" 2 3 4
typedef __u_char u_char;
typedef __u_short u_short;
typedef __u_int u_int;
typedef __u_long u_long;
typedef __quad_t quad_t;
typedef __u_quad_t u_quad_t;
typedef __fsid_t fsid_t;
typedef __loff_t loff_t;
typedef __ino_t ino_t;
# 62 "/usr/include/sys/types.h" 3 4
typedef __dev_t dev_t;
typedef __gid_t gid_t;
typedef __mode_t mode_t;
typedef __nlink_t nlink_t;
typedef __uid_t uid_t;
typedef __off_t off_t;
# 100 "/usr/include/sys/types.h" 3 4
typedef __pid_t pid_t;
typedef __id_t id_t;
typedef __ssize_t ssize_t;
typedef __daddr_t daddr_t;
typedef __caddr_t caddr_t;
typedef __key_t key_t;
# 133 "/usr/include/sys/types.h" 3 4
# 1 "/usr/include/time.h" 1 3 4
# 74 "/usr/include/time.h" 3 4
typedef __time_t time_t;
# 92 "/usr/include/time.h" 3 4
typedef __clockid_t clockid_t;
# 104 "/usr/include/time.h" 3 4
typedef __timer_t timer_t;
# 134 "/usr/include/sys/types.h" 2 3 4
# 147 "/usr/include/sys/types.h" 3 4
# 1 "/usr/lib/gcc-lib/powerpc-linux/3.3.1/include/stddef.h" 1 3 4
# 148 "/usr/include/sys/types.h" 2 3 4
typedef unsigned long int ulong;
typedef unsigned short int ushort;
typedef unsigned int uint;
# 191 "/usr/include/sys/types.h" 3 4
typedef int int8_t __attribute__ ((__mode__ (__QI__)));
typedef int int16_t __attribute__ ((__mode__ (__HI__)));
typedef int int32_t __attribute__ ((__mode__ (__SI__)));
typedef int int64_t __attribute__ ((__mode__ (__DI__)));
typedef unsigned int u_int8_t __attribute__ ((__mode__ (__QI__)));
typedef unsigned int u_int16_t __attribute__ ((__mode__ (__HI__)));
typedef unsigned int u_int32_t __attribute__ ((__mode__ (__SI__)));
typedef unsigned int u_int64_t __attribute__ ((__mode__ (__DI__)));
typedef int register_t __attribute__ ((__mode__ (__word__)));
# 213 "/usr/include/sys/types.h" 3 4
# 1 "/usr/include/endian.h" 1 3 4
# 37 "/usr/include/endian.h" 3 4
# 1 "/usr/include/bits/endian.h" 1 3 4
# 38 "/usr/include/endian.h" 2 3 4
# 214 "/usr/include/sys/types.h" 2 3 4
# 1 "/usr/include/sys/select.h" 1 3 4
# 31 "/usr/include/sys/select.h" 3 4
# 1 "/usr/include/bits/select.h" 1 3 4
# 32 "/usr/include/sys/select.h" 2 3 4
# 1 "/usr/include/bits/sigset.h" 1 3 4
# 23 "/usr/include/bits/sigset.h" 3 4
typedef int __sig_atomic_t;
typedef struct
{
unsigned long int __val[(1024 / (8 * sizeof (unsigned long int)))];
} __sigset_t;
# 35 "/usr/include/sys/select.h" 2 3 4
typedef __sigset_t sigset_t;
# 1 "/usr/include/time.h" 1 3 4
# 116 "/usr/include/time.h" 3 4
struct timespec
{
__time_t tv_sec;
long int tv_nsec;
};
# 45 "/usr/include/sys/select.h" 2 3 4
# 1 "/usr/include/bits/time.h" 1 3 4
# 67 "/usr/include/bits/time.h" 3 4
struct timeval
{
__time_t tv_sec;
__suseconds_t tv_usec;
};
# 47 "/usr/include/sys/select.h" 2 3 4
typedef __suseconds_t suseconds_t;
typedef long int __fd_mask;
# 67 "/usr/include/sys/select.h" 3 4
typedef struct
{
__fd_mask __fds_bits[1024 / (8 * sizeof (__fd_mask))];
} fd_set;
typedef __fd_mask fd_mask;
# 99 "/usr/include/sys/select.h" 3 4
extern int select (int __nfds, fd_set *__restrict __readfds,
fd_set *__restrict __writefds,
fd_set *__restrict __exceptfds,
struct timeval *__restrict __timeout) ;
# 122 "/usr/include/sys/select.h" 3 4
# 217 "/usr/include/sys/types.h" 2 3 4
# 1 "/usr/include/sys/sysmacros.h" 1 3 4
# 220 "/usr/include/sys/types.h" 2 3 4
# 231 "/usr/include/sys/types.h" 3 4
typedef __blkcnt_t blkcnt_t;
typedef __fsblkcnt_t fsblkcnt_t;
typedef __fsfilcnt_t fsfilcnt_t;
# 266 "/usr/include/sys/types.h" 3 4
# 1 "/usr/include/bits/pthreadtypes.h" 1 3 4
# 23 "/usr/include/bits/pthreadtypes.h" 3 4
# 1 "/usr/include/bits/sched.h" 1 3 4
# 83 "/usr/include/bits/sched.h" 3 4
struct __sched_param
{
int __sched_priority;
};
# 24 "/usr/include/bits/pthreadtypes.h" 2 3 4
typedef int __atomic_lock_t;
struct _pthread_fastlock
{
long int __status;
__atomic_lock_t __spinlock;
};
typedef struct _pthread_descr_struct *_pthread_descr;
typedef struct __pthread_attr_s
{
int __detachstate;
int __schedpolicy;
struct __sched_param __schedparam;
int __inheritsched;
int __scope;
size_t __guardsize;
int __stackaddr_set;
void *__stackaddr;
size_t __stacksize;
} pthread_attr_t;
typedef struct
{
struct _pthread_fastlock __c_lock;
_pthread_descr __c_waiting;
} pthread_cond_t;
typedef struct
{
int __dummy;
} pthread_condattr_t;
typedef unsigned int pthread_key_t;
typedef struct
{
int __m_reserved;
int __m_count;
_pthread_descr __m_owner;
int __m_kind;
struct _pthread_fastlock __m_lock;
} pthread_mutex_t;
typedef struct
{
int __mutexkind;
} pthread_mutexattr_t;
typedef int pthread_once_t;
# 142 "/usr/include/bits/pthreadtypes.h" 3 4
typedef unsigned long int pthread_t;
# 267 "/usr/include/sys/types.h" 2 3 4
# 415 "/usr/include/stdlib.h" 2 3 4
extern long int random (void) ;
extern void srandom (unsigned int __seed) ;
extern char *initstate (unsigned int __seed, char *__statebuf,
size_t __statelen) ;
extern char *setstate (char *__statebuf) ;
struct random_data
{
int32_t *fptr;
int32_t *rptr;
int32_t *state;
int rand_type;
int rand_deg;
int rand_sep;
int32_t *end_ptr;
};
extern int random_r (struct random_data *__restrict __buf,
int32_t *__restrict __result) ;
extern int srandom_r (unsigned int __seed, struct random_data *__buf) ;
extern int initstate_r (unsigned int __seed, char *__restrict __statebuf,
size_t __statelen,
struct random_data *__restrict __buf) ;
extern int setstate_r (char *__restrict __statebuf,
struct random_data *__restrict __buf) ;
extern int rand (void) ;
extern void srand (unsigned int __seed) ;
extern int rand_r (unsigned int *__seed) ;
extern double drand48 (void) ;
extern double erand48 (unsigned short int __xsubi[3]) ;
extern long int lrand48 (void) ;
extern long int nrand48 (unsigned short int __xsubi[3]) ;
extern long int mrand48 (void) ;
extern long int jrand48 (unsigned short int __xsubi[3]) ;
extern void srand48 (long int __seedval) ;
extern unsigned short int *seed48 (unsigned short int __seed16v[3]) ;
extern void lcong48 (unsigned short int __param[7]) ;
struct drand48_data
{
unsigned short int __x[3];
unsigned short int __old_x[3];
unsigned short int __c;
unsigned short int __init;
unsigned long long int __a;
};
extern int drand48_r (struct drand48_data *__restrict __buffer,
double *__restrict __result) ;
extern int erand48_r (unsigned short int __xsubi[3],
struct drand48_data *__restrict __buffer,
double *__restrict __result) ;
extern int lrand48_r (struct drand48_data *__restrict __buffer,
long int *__restrict __result) ;
extern int nrand48_r (unsigned short int __xsubi[3],
struct drand48_data *__restrict __buffer,
long int *__restrict __result) ;
extern int mrand48_r (struct drand48_data *__restrict __buffer,
long int *__restrict __result) ;
extern int jrand48_r (unsigned short int __xsubi[3],
struct drand48_data *__restrict __buffer,
long int *__restrict __result) ;
extern int srand48_r (long int __seedval, struct drand48_data *__buffer)
;
extern int seed48_r (unsigned short int __seed16v[3],
struct drand48_data *__buffer) ;
extern int lcong48_r (unsigned short int __param[7],
struct drand48_data *__buffer) ;
extern void *malloc (size_t __size) __attribute__ ((__malloc__));
extern void *calloc (size_t __nmemb, size_t __size)
__attribute__ ((__malloc__));
extern void *realloc (void *__ptr, size_t __size) __attribute__ ((__malloc__));
extern void free (void *__ptr) ;
extern void cfree (void *__ptr) ;
# 1 "/usr/include/alloca.h" 1 3 4
# 25 "/usr/include/alloca.h" 3 4
# 1 "/usr/lib/gcc-lib/powerpc-linux/3.3.1/include/stddef.h" 1 3 4
# 26 "/usr/include/alloca.h" 2 3 4
extern void *alloca (size_t __size) ;
# 577 "/usr/include/stdlib.h" 2 3 4
extern void *valloc (size_t __size) __attribute__ ((__malloc__));
# 590 "/usr/include/stdlib.h" 3 4
extern void abort (void) __attribute__ ((__noreturn__));
extern int atexit (void (*__func) (void)) ;
extern int on_exit (void (*__func) (int __status, void *__arg), void *__arg)
;
extern void exit (int __status) __attribute__ ((__noreturn__));
# 622 "/usr/include/stdlib.h" 3 4
extern char *getenv (__const char *__name) ;
extern char *__secure_getenv (__const char *__name) ;
extern int putenv (char *__string) ;
extern int setenv (__const char *__name, __const char *__value, int __replace)
;
extern int unsetenv (__const char *__name) ;
extern int clearenv (void) ;
# 661 "/usr/include/stdlib.h" 3 4
extern char *mktemp (char *__template) ;
extern int mkstemp (char *__template) ;
# 688 "/usr/include/stdlib.h" 3 4
extern char *mkdtemp (char *__template) ;
extern int system (__const char *__command) ;
# 712 "/usr/include/stdlib.h" 3 4
extern char *realpath (__const char *__restrict __name,
char *__restrict __resolved) ;
typedef int (*__compar_fn_t) (__const void *, __const void *);
extern void *bsearch (__const void *__key, __const void *__base,
size_t __nmemb, size_t __size, __compar_fn_t __compar);
extern void qsort (void *__base, size_t __nmemb, size_t __size,
__compar_fn_t __compar);
extern int abs (int __x) __attribute__ ((__const__));
extern long int labs (long int __x) __attribute__ ((__const__));
extern div_t div (int __numer, int __denom)
__attribute__ ((__const__));
extern ldiv_t ldiv (long int __numer, long int __denom)
__attribute__ ((__const__));
# 776 "/usr/include/stdlib.h" 3 4
extern char *ecvt (double __value, int __ndigit, int *__restrict __decpt,
int *__restrict __sign) ;
extern char *fcvt (double __value, int __ndigit, int *__restrict __decpt,
int *__restrict __sign) ;
extern char *gcvt (double __value, int __ndigit, char *__buf) ;
extern char *qecvt (long double __value, int __ndigit,
int *__restrict __decpt, int *__restrict __sign) ;
extern char *qfcvt (long double __value, int __ndigit,
int *__restrict __decpt, int *__restrict __sign) ;
extern char *qgcvt (long double __value, int __ndigit, char *__buf) ;
extern int ecvt_r (double __value, int __ndigit, int *__restrict __decpt,
int *__restrict __sign, char *__restrict __buf,
size_t __len) ;
extern int fcvt_r (double __value, int __ndigit, int *__restrict __decpt,
int *__restrict __sign, char *__restrict __buf,
size_t __len) ;
extern int qecvt_r (long double __value, int __ndigit,
int *__restrict __decpt, int *__restrict __sign,
char *__restrict __buf, size_t __len) ;
extern int qfcvt_r (long double __value, int __ndigit,
int *__restrict __decpt, int *__restrict __sign,
char *__restrict __buf, size_t __len) ;
extern int mblen (__const char *__s, size_t __n) ;
extern int mbtowc (wchar_t *__restrict __pwc,
__const char *__restrict __s, size_t __n) ;
extern int wctomb (char *__s, wchar_t __wchar) ;
extern size_t mbstowcs (wchar_t *__restrict __pwcs,
__const char *__restrict __s, size_t __n) ;
extern size_t wcstombs (char *__restrict __s,
__const wchar_t *__restrict __pwcs, size_t __n)
;
extern int rpmatch (__const char *__response) ;
# 908 "/usr/include/stdlib.h" 3 4
extern int getloadavg (double __loadavg[], int __nelem) ;
# 30 "../../../kernel/ifftw.h" 2
# 1 "/usr/lib/gcc-lib/powerpc-linux/3.3.1/include/stdarg.h" 1 3 4
# 43 "/usr/lib/gcc-lib/powerpc-linux/3.3.1/include/stdarg.h" 3 4
typedef __builtin_va_list __gnuc_va_list;
# 105 "/usr/lib/gcc-lib/powerpc-linux/3.3.1/include/stdarg.h" 3 4
typedef __gnuc_va_list va_list;
# 31 "../../../kernel/ifftw.h" 2
# 1 "/usr/lib/gcc-lib/powerpc-linux/3.3.1/include/stddef.h" 1 3 4
# 151 "/usr/lib/gcc-lib/powerpc-linux/3.3.1/include/stddef.h" 3 4
typedef int ptrdiff_t;
# 32 "../../../kernel/ifftw.h" 2
# 1 "/usr/include/stdint.h" 1 3 4
# 27 "/usr/include/stdint.h" 3 4
# 1 "/usr/include/bits/wchar.h" 1 3 4
# 28 "/usr/include/stdint.h" 2 3 4
# 1 "/usr/include/bits/wordsize.h" 1 3 4
# 29 "/usr/include/stdint.h" 2 3 4
# 49 "/usr/include/stdint.h" 3 4
typedef unsigned char uint8_t;
typedef unsigned short int uint16_t;
typedef unsigned int uint32_t;
__extension__
typedef unsigned long long int uint64_t;
typedef signed char int_least8_t;
typedef short int int_least16_t;
typedef int int_least32_t;
__extension__
typedef long long int int_least64_t;
typedef unsigned char uint_least8_t;
typedef unsigned short int uint_least16_t;
typedef unsigned int uint_least32_t;
__extension__
typedef unsigned long long int uint_least64_t;
typedef signed char int_fast8_t;
typedef int int_fast16_t;
typedef int int_fast32_t;
__extension__
typedef long long int int_fast64_t;
typedef unsigned char uint_fast8_t;
typedef unsigned int uint_fast16_t;
typedef unsigned int uint_fast32_t;
__extension__
typedef unsigned long long int uint_fast64_t;
# 126 "/usr/include/stdint.h" 3 4
typedef int intptr_t;
typedef unsigned int uintptr_t;
# 138 "/usr/include/stdint.h" 3 4
__extension__
typedef long long int intmax_t;
__extension__
typedef unsigned long long int uintmax_t;
# 39 "../../../kernel/ifftw.h" 2
# 1 "/usr/include/inttypes.h" 1 3 4
# 35 "/usr/include/inttypes.h" 3 4
typedef long int __gwchar_t;
# 274 "/usr/include/inttypes.h" 3 4
# 288 "/usr/include/inttypes.h" 3 4
typedef struct
{
long long int quot;
long long int rem;
} imaxdiv_t;
extern intmax_t imaxabs (intmax_t __n) __attribute__ ((__const__));
extern imaxdiv_t imaxdiv (intmax_t __numer, intmax_t __denom)
__attribute__ ((__const__));
extern intmax_t strtoimax (__const char *__restrict __nptr,
char **__restrict __endptr, int __base) ;
extern uintmax_t strtoumax (__const char *__restrict __nptr,
char ** __restrict __endptr, int __base) ;
extern intmax_t wcstoimax (__const __gwchar_t *__restrict __nptr,
__gwchar_t **__restrict __endptr, int __base)
;
extern uintmax_t wcstoumax (__const __gwchar_t *__restrict __nptr,
__gwchar_t ** __restrict __endptr, int __base)
;
# 396 "/usr/include/inttypes.h" 3 4
extern __inline intmax_t
strtoimax (__const char *__restrict nptr, char **__restrict endptr,
int base)
{
return __strtoll_internal (nptr, endptr, base, 0);
}
# 414 "/usr/include/inttypes.h" 3 4
extern __inline uintmax_t
strtoumax (__const char *__restrict nptr, char **__restrict endptr,
int base)
{
return __strtoull_internal (nptr, endptr, base, 0);
}
__extension__
extern long long int __wcstoll_internal (__const __gwchar_t *
__restrict __nptr,
__gwchar_t **__restrict __endptr,
int __base, int __group) ;
extern __inline intmax_t
wcstoimax (__const __gwchar_t *__restrict nptr, __gwchar_t **__restrict endptr,
int base)
{
return __wcstoll_internal (nptr, endptr, base, 0);
}
__extension__
extern unsigned long long int __wcstoull_internal (__const __gwchar_t *
__restrict __nptr,
__gwchar_t **
__restrict __endptr,
int __base,
int __group) ;
extern __inline uintmax_t
wcstoumax (__const __gwchar_t *__restrict nptr, __gwchar_t **__restrict endptr,
int base)
{
return __wcstoull_internal (nptr, endptr, base, 0);
}
# 43 "../../../kernel/ifftw.h" 2
typedef float R;
# 80 "../../../kernel/ifftw.h"
typedef struct problem_s problem;
typedef struct plan_s plan;
typedef struct solver_s solver;
typedef struct planner_s planner;
typedef struct printer_s printer;
typedef struct scanner_s scanner;
# 157 "../../../kernel/ifftw.h"
extern void fftwf_assertion_failed(const char *s, int line, const char *file);
# 171 "../../../kernel/ifftw.h"
extern void fftwf_debug(const char *format, ...);
enum malloc_tag {
EVERYTHING,
PLANS,
SOLVERS,
PROBLEMS,
BUFFERS,
HASHT,
TENSORS,
PLANNERS,
SLVDESCS,
TWIDDLES,
STRIDES,
OTHER,
MALLOC_WHAT_LAST
};
extern void fftwf_ifree(void *ptr);
extern void fftwf_ifree0(void *ptr);
# 207 "../../../kernel/ifftw.h"
extern void *fftwf_malloc_plain(size_t sz);
# 231 "../../../kernel/ifftw.h"
typedef struct {
double add;
double mul;
double fma;
double other;
} opcnt;
void fftwf_ops_zero(opcnt *dst);
void fftwf_ops_other(int o, opcnt *dst);
void fftwf_ops_cpy(const opcnt *src, opcnt *dst);
void fftwf_ops_add(const opcnt *a, const opcnt *b, opcnt *dst);
void fftwf_ops_add2(const opcnt *a, opcnt *dst);
void fftwf_ops_madd(int m, const opcnt *a, const opcnt *b, opcnt *dst);
void fftwf_ops_madd2(int m, const opcnt *a, opcnt *dst);
int fftwf_imax(int a, int b);
int fftwf_imin(int a, int b);
int fftwf_iabs(int a);
typedef unsigned long md5uint;
typedef md5uint md5sig[4];
typedef struct {
md5sig s;
unsigned char c[64];
unsigned l;
} md5;
void fftwf_md5begin(md5 *p);
void fftwf_md5putb(md5 *p, const void *d_, int len);
void fftwf_md5puts(md5 *p, const char *s);
void fftwf_md5putc(md5 *p, unsigned char c);
void fftwf_md5int(md5 *p, int i);
void fftwf_md5unsigned(md5 *p, unsigned i);
void fftwf_md5ptrdiff(md5 *p, ptrdiff_t d);
void fftwf_md5end(md5 *p);
typedef struct {
int n;
int is;
int os;
} iodim;
typedef struct {
int rnk;
iodim dims[1];
} tensor;
# 322 "../../../kernel/ifftw.h"
typedef enum { INPLACE_IS, INPLACE_OS } inplace_kind;
tensor *fftwf_mktensor(int rnk);
tensor *fftwf_mktensor_0d(void);
tensor *fftwf_mktensor_1d(int n, int is, int os);
tensor *fftwf_mktensor_2d(int n0, int is0, int os0,
int n1, int is1, int os1);
int fftwf_tensor_sz(const tensor *sz);
void fftwf_tensor_md5(md5 *p, const tensor *t);
int fftwf_tensor_max_index(const tensor *sz);
int fftwf_tensor_min_istride(const tensor *sz);
int fftwf_tensor_min_ostride(const tensor *sz);
int fftwf_tensor_min_stride(const tensor *sz);
int fftwf_tensor_inplace_strides(const tensor *sz);
int fftwf_tensor_inplace_strides2(const tensor *a, const tensor *b);
tensor *fftwf_tensor_copy(const tensor *sz);
int fftwf_tensor_kosherp(const tensor *x);
tensor *fftwf_tensor_copy_inplace(const tensor *sz, inplace_kind k);
tensor *fftwf_tensor_copy_except(const tensor *sz, int except_dim);
tensor *fftwf_tensor_copy_sub(const tensor *sz, int start_dim, int rnk);
tensor *fftwf_tensor_compress(const tensor *sz);
tensor *fftwf_tensor_compress_contiguous(const tensor *sz);
tensor *fftwf_tensor_append(const tensor *a, const tensor *b);
void fftwf_tensor_split(const tensor *sz, tensor **a, int a_rnk, tensor **b);
int fftwf_tensor_tornk1(const tensor *t, int *n, int *is, int *os);
void fftwf_tensor_destroy(tensor *sz);
void fftwf_tensor_destroy2(tensor *a, tensor *b);
void fftwf_tensor_destroy4(tensor *a, tensor *b, tensor *c, tensor *d);
void fftwf_tensor_print(const tensor *sz, printer *p);
int fftwf_dimcmp(const iodim *a, const iodim *b);
typedef struct {
void (*hash) (const problem *ego, md5 *p);
void (*zero) (const problem *ego);
void (*print) (problem *ego, printer *p);
void (*destroy) (problem *ego);
} problem_adt;
struct problem_s {
const problem_adt *adt;
};
problem *fftwf_mkproblem(size_t sz, const problem_adt *adt);
void fftwf_problem_destroy(problem *ego);
struct printer_s {
void (*print)(printer *p, const char *format, ...);
void (*vprint)(printer *p, const char *format, va_list ap);
void (*putchr)(printer *p, char c);
void (*cleanup)(printer *p);
int indent;
int indent_incr;
};
printer *fftwf_mkprinter(size_t size,
void (*putchr)(printer *p, char c),
void (*cleanup)(printer *p));
void fftwf_printer_destroy(printer *p);
struct scanner_s {
int (*scan)(scanner *sc, const char *format, ...);
int (*vscan)(scanner *sc, const char *format, va_list ap);
int (*getchr)(scanner *sc);
int ungotc;
};
scanner *fftwf_mkscanner(size_t size, int (*getchr)(scanner *sc));
void fftwf_scanner_destroy(scanner *sc);
typedef struct {
void (*solve)(const plan *ego, const problem *p);
void (*awake)(plan *ego, int flag);
void (*print)(const plan *ego, printer *p);
void (*destroy)(plan *ego);
} plan_adt;
struct plan_s {
const plan_adt *adt;
int awake_refcnt;
opcnt ops;
double pcost;
};
plan *fftwf_mkplan(size_t size, const plan_adt *adt);
void fftwf_plan_destroy_internal(plan *ego);
void fftwf_plan_awake(plan *ego, int flag);
void fftwf_plan_null_destroy(plan *ego);
typedef struct {
plan *(*mkplan)(const solver *ego, const problem *p, planner *plnr);
} solver_adt;
struct solver_s {
const solver_adt *adt;
int refcnt;
};
solver *fftwf_mksolver(size_t size, const solver_adt *adt);
void fftwf_solver_use(solver *ego);
void fftwf_solver_destroy(solver *ego);
void fftwf_solver_register(planner *plnr, solver *s);
typedef struct slvdesc_s {
solver *slv;
const char *reg_nam;
unsigned nam_hash;
int reg_id;
} slvdesc;
typedef struct solution_s solution;
enum {
DESTROY_INPUT = 0x1,
NO_SIMD = 0x2,
CONSERVE_MEMORY = 0x4,
NO_DHT_R2HC = 0x8
};
enum {
BELIEVE_PCOST = 0x1,
DFT_R2HC_ICKY = 0x2,
NONTHREADED_ICKY = 0x4,
NO_BUFFERING = 0x8,
NO_EXHAUSTIVE = 0x10,
NO_INDIRECT_OP = 0x20,
NO_LARGE_GENERIC = 0x40,
NO_RANK_SPLITS = 0x80,
NO_VRANK_SPLITS = 0x100,
NO_VRECURSE = 0x200,
NO_UGLY = 0x400,
NO_SEARCH = 0x800,
ESTIMATE = 0x1000,
IMPATIENCE_FLAGS = (ESTIMATE | (ESTIMATE - 1)),
BLESSING = 0x4000,
H_VALID = 0x8000,
NONIMPATIENCE_FLAGS = BLESSING
};
# 507 "../../../kernel/ifftw.h"
typedef enum { FORGET_ACCURSED, FORGET_EVERYTHING } amnesia;
typedef struct {
void (*register_solver)(planner *ego, solver *s);
plan *(*mkplan)(planner *ego, problem *p);
void (*forget)(planner *ego, amnesia a);
void (*exprt)(planner *ego, printer *p);
int (*imprt)(planner *ego, scanner *sc);
} planner_adt;
struct planner_s {
const planner_adt *adt;
void (*hook)(plan *pln, const problem *p, int optimalp);
slvdesc *slvdescs;
unsigned nslvdesc, slvdescsiz;
const char *cur_reg_nam;
int cur_reg_id;
solution *solutions;
unsigned hashsiz, nelem;
int nthr;
unsigned problem_flags;
unsigned short planner_flags;
int nplan;
double pcost, epcost;
int nprob;
int lookup, succ_lookup, lookup_iter;
int insert, insert_iter, insert_unknown;
int nrehash;
};
planner *fftwf_mkplanner(void);
void fftwf_planner_destroy(planner *ego);
# 575 "../../../kernel/ifftw.h"
plan *fftwf_mkplan_d(planner *ego, problem *p);
# 593 "../../../kernel/ifftw.h"
typedef int stride;
# 607 "../../../kernel/ifftw.h"
struct solvtab_s { void (*reg)(planner *); const char *reg_nam; };
typedef struct solvtab_s solvtab[];
void fftwf_solvtab_exec(const solvtab tbl, planner *p);
int fftwf_pickdim(int which_dim, const int *buddies, int nbuddies,
const tensor *sz, int oop, int *dp);
enum { TW_COS = 0, TW_SIN = 1, TW_TAN = 2, TW_NEXT = 3,
TW_FULL = 4, TW_GENERIC = 5 };
typedef struct {
unsigned char op;
unsigned char v;
short i;
} tw_instr;
typedef struct twid_s {
R *W;
int n, r, m;
int refcnt;
const tw_instr *instr;
struct twid_s *cdr;
} twid;
void fftwf_mktwiddle(twid **pp, const tw_instr *instr, int n, int r, int m);
void fftwf_twiddle_destroy(twid **pp);
int fftwf_twiddle_length(int r, const tw_instr *p);
void fftwf_twiddle_awake(int flg, twid **pp,
const tw_instr *instr, int n, int r, int m);
typedef double trigreal;
extern trigreal fftwf_cos2pi(int, int);
extern trigreal fftwf_sin2pi(int, int);
extern trigreal fftwf_tan2pi(int, int);
extern trigreal fftwf_sincos(trigreal m, trigreal n, int sinp);
# 676 "../../../kernel/ifftw.h"
int fftwf_power_mod(int n, int m, int p);
int fftwf_find_generator(int p);
int fftwf_first_divisor(int n);
int fftwf_is_prime(int n);
int fftwf_next_prime(int n);
typedef struct rader_tls rader_tl;
void fftwf_rader_tl_insert(int k1, int k2, int k3, R *W, rader_tl **tl);
R *fftwf_rader_tl_find(int k1, int k2, int k3, rader_tl *t);
void fftwf_rader_tl_delete(R *W, rader_tl **tl);
void fftwf_transpose(R *A, int n, int m, int d, int N, R *buf);
void fftwf_transpose_slow(R *a, int nx, int ny, int N,
char *move, int move_size, R *buf);
int fftwf_transposable(const iodim *a, const iodim *b,
int vl, int s, R *ri, R *ii);
void fftwf_transpose_dims(const iodim *a, const iodim *b,
int *n, int *m, int *d, int *nd, int *md);
int fftwf_transpose_simplep(const iodim *a, const iodim *b, int vl, int s,
R *ri, R *ii);
int fftwf_transpose_slowp(const iodim *a, const iodim *b, int N);
void fftwf_null_awake(plan *ego, int awake);
int fftwf_square(int x);
double fftwf_measure_execution_time(plan *pln, const problem *p);
int fftwf_alignment_of(R *p);
unsigned fftwf_hash(const char *s);
int fftwf_compute_nbuf(int n, int vl, int nbuf, int maxbufsz);
int fftwf_ct_uglyp(int min_n, int n, int r);
R *fftwf_taint(R *p, int s);
R *fftwf_join_taint(R *p1, R *p2);
# 744 "../../../kernel/ifftw.h"
typedef R E;
# 763 "../../../kernel/ifftw.h"
static __inline__ E FMA(E a, E b, E c)
{
E x = a * b;
x = x + c;
return x;
}
static __inline__ E FMS(E a, E b, E c)
{
E x = a * b;
x = x - c;
return x;
}
static __inline__ E FNMA(E a, E b, E c)
{
E x = a * b;
x = - (x + c);
return x;
}
static __inline__ E FNMS(E a, E b, E c)
{
E x = a * b;
x = - (x - c);
return x;
}
# 32 "../../../dft/codelet-dft.h" 2
typedef struct kdft_desc_s kdft_desc;
typedef struct {
int (*okp)(
const kdft_desc *desc,
const R *ri, const R *ii, const R *ro, const R *io,
int is, int os, int vl, int ivs, int ovs,
const planner *plnr);
int vl;
} kdft_genus;
struct kdft_desc_s {
int sz;
const char *nam;
opcnt ops;
const kdft_genus *genus;
int is;
int os;
int ivs;
int ovs;
};
typedef void (*kdft) (const R *ri, const R *ii, R *ro, R *io,
stride is, stride os, int vl, int ivs, int ovs);
void fftwf_kdft_register(planner *p, kdft codelet, const kdft_desc *desc);
typedef struct ct_desc_s ct_desc;
typedef struct {
int (*okp)(
const struct ct_desc_s *desc,
const R *rio, const R *iio, int ios, int vs, int m, int dist,
const planner *plnr);
int vl;
} ct_genus;
struct ct_desc_s {
int radix;
const char *nam;
const tw_instr *tw;
opcnt ops;
const ct_genus *genus;
int s1;
int s2;
int dist;
};
typedef const R *(*kdft_dit) (R *rioarray, R *iioarray, const R *W,
stride ios, int m, int dist);
void fftwf_kdft_dit_register(planner *p, kdft_dit codelet, const ct_desc *desc);
typedef const R *(*kdft_difsq) (R *rioarray, R *iioarray,
const R *W, stride is, stride vs,
int m, int dist);
void fftwf_kdft_difsq_register(planner *p, kdft_difsq codelet,
const ct_desc *desc);
typedef const R *(*kdft_dif) (R *rioarray, R *iioarray, const R *W,
stride ios, int m, int dist);
void fftwf_kdft_dif_register(planner *p, kdft_dif codelet, const ct_desc *desc);
extern const solvtab fftwf_solvtab_dft_standard;
extern const solvtab fftwf_solvtab_dft_inplace;
extern const solvtab fftwf_solvtab_dft_simd;
# 25 "n1fv_9.c" 2
# 40 "n1fv_9.c"
# 1 "../../../dft/simd/n1f.h" 1
# 22 "../../../dft/simd/n1f.h"
# 1 "../../../simd/simd.h" 1
# 30 "../../../simd/simd.h"
# 1 "../../../simd/simd-altivec.h" 1
# 41 "../../../simd/simd-altivec.h"
static inline __attribute__((vector_size(16))) float
vec_perm (__attribute__((vector_size(16))) float a1, __attribute__((vector_size(16))) float a2, __attribute__((vector_size(16))) unsigned char a3)
{
return (__attribute__((vector_size(16))) float) __builtin_altivec_vperm_4si ((__attribute__((vector_size(16))) signed int) a1, (__attribute__((vector_size(16))) signed int) a2, (__attribute__((vector_size(16))) signed char) a3);
}
static inline __attribute__((vector_size(16))) float
vec_sel (__attribute__((vector_size(16))) float a1, __attribute__((vector_size(16))) float a2, __attribute__((vector_size(16))) unsigned int a3)
{
return (__attribute__((vector_size(16))) float) __builtin_altivec_vsel_4si ((__attribute__((vector_size(16))) signed int) a1, (__attribute__((vector_size(16))) signed int) a2, (__attribute__((vector_size(16))) signed int) a3);
}
static inline __attribute__((vector_size(16))) float
vec_ld (int a1, float *a2)
{
return (__attribute__((vector_size(16))) float) __builtin_altivec_lvx (a1, (void *) a2);
}
static inline void
vec_ste (__attribute__((vector_size(16))) float a1, int a2, void *a3)
{
__builtin_altivec_stvewx ((__attribute__((vector_size(16))) signed int) a1, a2, (void *) a3);
}
static inline void
vec_st (__attribute__((vector_size(16))) float a1, int a2, void *a3)
{
__builtin_altivec_stvx ((__attribute__((vector_size(16))) signed int) a1, a2, (void *) a3);
}
static inline __attribute__((vector_size(16))) float
vec_mergeh (__attribute__((vector_size(16))) float a1, __attribute__((vector_size(16))) float a2)
{
return (__attribute__((vector_size(16))) float) __builtin_altivec_vmrghw ((__attribute__((vector_size(16))) signed int) a1, (__attribute__((vector_size(16))) signed int) a2);
}
static inline __attribute__((vector_size(16))) float
vec_mergel (__attribute__((vector_size(16))) float a1, __attribute__((vector_size(16))) float a2)
{
__attribute__((vector_size(16))) float ret;
__asm__("vmrglw %0, %1, %2" : "=v"(ret) : "v"(a1), "v"(a2));
return ret;
}
static inline __attribute__((vector_size(16))) float
vec_xor (__attribute__((vector_size(16))) float a1, __attribute__((vector_size(16))) float a2)
{
return (__attribute__((vector_size(16))) float) __builtin_altivec_vxor ((__attribute__((vector_size(16))) signed int) a1, (__attribute__((vector_size(16))) signed int) a2);
}
# 103 "../../../simd/simd-altivec.h"
typedef __attribute__((vector_size(16))) float V;
static inline V VMUL(V a, V b)
{
const V zero = (__attribute__((vector_size(16))) float){-0.0, -0.0, -0.0, -0.0};
return __builtin_altivec_vmaddfp(a, b, zero);
}
static inline V VFMS(V a, V b, V c)
{
return __builtin_altivec_vsubfp(VMUL(a, b), c);
}
extern const __attribute__((vector_size(16))) unsigned int fftwf_altivec_ld_selmsk;
static inline V LDA(const R *x, int ivs, const R *aligned_like)
{
(void)ivs;
(void)aligned_like;
return vec_ld(0, (R *)x);
}
static inline V LD(const R *x, int ivs, const R *aligned_like)
{
int fivs = 4 * ivs;
__attribute__((vector_size(16))) unsigned char ml = ((__attribute__((vector_size(16))) unsigned char) __builtin_altivec_lvsr ((fivs + 8), ((R *)aligned_like)));
__attribute__((vector_size(16))) unsigned char mh = ((__attribute__((vector_size(16))) unsigned char) __builtin_altivec_lvsl ((0), ((R *)aligned_like)));
__attribute__((vector_size(16))) unsigned char msk =
(__attribute__((vector_size(16))) unsigned char)vec_sel((V)mh, (V)ml, fftwf_altivec_ld_selmsk);
return vec_perm(vec_ld(0, (R *)x), vec_ld(fivs, (R *)x), msk);
}
static inline void STH(R *x, V v, const R *aligned_like)
{
v = vec_perm(v, v, ((__attribute__((vector_size(16))) unsigned char) __builtin_altivec_lvsr ((0), ((R *)aligned_like))));
vec_ste(v, 0, x);
vec_ste(v, 4, x);
}
static inline void STL(R *x, V v, int ovs, const R *aligned_like)
{
int fovs = 4 * ovs;
v = vec_perm(v, v, ((__attribute__((vector_size(16))) unsigned char) __builtin_altivec_lvsr ((fovs + 8), ((R *)aligned_like))));
vec_ste(v, fovs, x);
vec_ste(v, 4 + fovs, x);
}
static inline void STA(R *x, V v, int ovs, const R *aligned_like)
{
vec_st(v, 0, x);
}
static inline void ST(R *x, V v, int ovs, const R *aligned_like)
{
STH(x, v, aligned_like);
STL(x, v, ovs, aligned_like);
}
extern const __attribute__((vector_size(16))) unsigned int fftwf_altivec_flipri_perm;
static inline V FLIP_RI(V x)
{
return vec_perm(x, x, (const __attribute__((vector_size(16))) unsigned char)fftwf_altivec_flipri_perm);
}
extern const __attribute__((vector_size(16))) float fftwf_altivec_chsr_msk;
extern const __attribute__((vector_size(16))) float fftwf_altivec_chsr_sgn;
static inline V CHS_R(V x)
{
return vec_xor(x, fftwf_altivec_chsr_msk);
}
static inline V VBYI(V x)
{
return CHS_R(FLIP_RI(x));
}
static inline V VFMAI(V b, V c)
{
return __builtin_altivec_vmaddfp(FLIP_RI(b), fftwf_altivec_chsr_sgn, c);
}
static inline V VFNMSI(V b, V c)
{
return __builtin_altivec_vnmsubfp(FLIP_RI(b), fftwf_altivec_chsr_sgn, c);
}
static inline V BYTW(const R *t, V sr)
{
const V *twp = (const V *)t;
V si = VBYI(sr);
V tx = twp[0];
V tr = vec_mergeh(tx, tx);
V ti = vec_mergel(tx, tx);
return __builtin_altivec_vmaddfp(ti, si, VMUL(tr, sr));
}
static inline V BYTWJ(const R *t, V sr)
{
const V *twp = (const V *)t;
V si = VBYI(sr);
V tx = twp[0];
V tr = vec_mergeh(tx, tx);
V ti = vec_mergel(tx, tx);
return __builtin_altivec_vnmsubfp(ti, si, VMUL(tr, sr));
}
# 31 "../../../simd/simd.h" 2
# 23 "../../../dft/simd/n1f.h" 2
extern const kdft_genus fftwf_dft_n1fsimd_genus;
# 41 "n1fv_9.c" 2
static void n1fv_9(const R *ri, const R *ii, R *ro, R *io, stride is, stride os, int v, int ivs, int ovs)
{
const V KP342020143 = (__attribute__((vector_size(16))) float){+0.342020143325668733044099614682259580763083368, +0.342020143325668733044099614682259580763083368, +0.342020143325668733044099614682259580763083368, +0.342020143325668733044099614682259580763083368};
const V KP813797681 = (__attribute__((vector_size(16))) float){+0.813797681349373692844693217248393223289101568, +0.813797681349373692844693217248393223289101568, +0.813797681349373692844693217248393223289101568, +0.813797681349373692844693217248393223289101568};
const V KP939692620 = (__attribute__((vector_size(16))) float){+0.939692620785908384054109277324731469936208134, +0.939692620785908384054109277324731469936208134, +0.939692620785908384054109277324731469936208134, +0.939692620785908384054109277324731469936208134};
const V KP296198132 = (__attribute__((vector_size(16))) float){+0.296198132726023843175338011893050938967728390, +0.296198132726023843175338011893050938967728390, +0.296198132726023843175338011893050938967728390, +0.296198132726023843175338011893050938967728390};
const V KP642787609 = (__attribute__((vector_size(16))) float){+0.642787609686539326322643409907263432907559884, +0.642787609686539326322643409907263432907559884, +0.642787609686539326322643409907263432907559884, +0.642787609686539326322643409907263432907559884};
const V KP663413948 = (__attribute__((vector_size(16))) float){+0.663413948168938396205421319635891297216863310, +0.663413948168938396205421319635891297216863310, +0.663413948168938396205421319635891297216863310, +0.663413948168938396205421319635891297216863310};
const V KP556670399 = (__attribute__((vector_size(16))) float){+0.556670399226419366452912952047023132968291906, +0.556670399226419366452912952047023132968291906, +0.556670399226419366452912952047023132968291906, +0.556670399226419366452912952047023132968291906};
const V KP766044443 = (__attribute__((vector_size(16))) float){+0.766044443118978035202392650555416673935832457, +0.766044443118978035202392650555416673935832457, +0.766044443118978035202392650555416673935832457, +0.766044443118978035202392650555416673935832457};
const V KP984807753 = (__attribute__((vector_size(16))) float){+0.984807753012208059366743024589523013670643252, +0.984807753012208059366743024589523013670643252, +0.984807753012208059366743024589523013670643252, +0.984807753012208059366743024589523013670643252};
const V KP150383733 = (__attribute__((vector_size(16))) float){+0.150383733180435296639271897612501926072238258, +0.150383733180435296639271897612501926072238258, +0.150383733180435296639271897612501926072238258, +0.150383733180435296639271897612501926072238258};
const V KP852868531 = (__attribute__((vector_size(16))) float){+0.852868531952443209628250963940074071936020296, +0.852868531952443209628250963940074071936020296, +0.852868531952443209628250963940074071936020296, +0.852868531952443209628250963940074071936020296};
const V KP173648177 = (__attribute__((vector_size(16))) float){+0.173648177666930348851716626769314796000375677, +0.173648177666930348851716626769314796000375677, +0.173648177666930348851716626769314796000375677, +0.173648177666930348851716626769314796000375677};
const V KP500000000 = (__attribute__((vector_size(16))) float){+0.500000000000000000000000000000000000000000000, +0.500000000000000000000000000000000000000000000, +0.500000000000000000000000000000000000000000000, +0.500000000000000000000000000000000000000000000};
const V KP866025403 = (__attribute__((vector_size(16))) float){+0.866025403784438646763723170752936183471402627, +0.866025403784438646763723170752936183471402627, +0.866025403784438646763723170752936183471402627, +0.866025403784438646763723170752936183471402627};
int i;
const R *xi;
R *xo;
xi = ri;
xo = ro;
;
for (i = v; i > 0; i = i - 2, xi = xi + (2 * ivs), xo = xo + (2 * ovs)) {
V T5, Ts, Tj, To, Tf, Tn, Tp, Tu, Tl, Ta, Tk, Tm, Tt;
{
V T1, T2, T3, T4;
T1 = LD(&(xi[0]), ivs, &(xi[0]));
T2 = LD(&(xi[(is * 3)]), ivs, &(xi[(is * 1)]));
T3 = LD(&(xi[(is * 6)]), ivs, &(xi[0]));
T4 = __builtin_altivec_vaddfp(T2, T3);
T5 = __builtin_altivec_vaddfp(T1, T4);
Ts = VMUL(KP866025403, __builtin_altivec_vsubfp(T3, T2));
Tj = __builtin_altivec_vnmsubfp(KP500000000, T4, T1);
}
{
V Tb, Te, Tc, Td;
Tb = LD(&(xi[(is * 2)]), ivs, &(xi[0]));
Tc = LD(&(xi[(is * 5)]), ivs, &(xi[(is * 1)]));
Td = LD(&(xi[(is * 8)]), ivs, &(xi[0]));
Te = __builtin_altivec_vaddfp(Tc, Td);
To = __builtin_altivec_vsubfp(Td, Tc);
Tf = __builtin_altivec_vaddfp(Tb, Te);
Tn = __builtin_altivec_vnmsubfp(KP500000000, Te, Tb);
Tp = __builtin_altivec_vmaddfp(KP173648177, Tn, VMUL(KP852868531, To));
Tu = __builtin_altivec_vnmsubfp(KP984807753, Tn, VMUL(KP150383733, To));
}
{
V T6, T9, T7, T8;
T6 = LD(&(xi[(is * 1)]), ivs, &(xi[(is * 1)]));
T7 = LD(&(xi[(is * 4)]), ivs, &(xi[0]));
T8 = LD(&(xi[(is * 7)]), ivs, &(xi[(is * 1)]));
T9 = __builtin_altivec_vaddfp(T7, T8);
Tl = __builtin_altivec_vsubfp(T8, T7);
Ta = __builtin_altivec_vaddfp(T6, T9);
Tk = __builtin_altivec_vnmsubfp(KP500000000, T9, T6);
Tm = __builtin_altivec_vmaddfp(KP766044443, Tk, VMUL(KP556670399, Tl));
Tt = __builtin_altivec_vnmsubfp(KP642787609, Tk, VMUL(KP663413948, Tl));
}
{
V Ti, Tg, Th, Tz, TA;
Ti = VBYI(VMUL(KP866025403, __builtin_altivec_vsubfp(Tf, Ta)));
Tg = __builtin_altivec_vaddfp(Ta, Tf);
Th = __builtin_altivec_vnmsubfp(KP500000000, Tg, T5);
ST(&(xo[0]), __builtin_altivec_vaddfp(T5, Tg), ovs, &(xo[0]));
ST(&(xo[(os * 3)]), __builtin_altivec_vaddfp(Th, Ti), ovs, &(xo[(os * 1)]));
ST(&(xo[(os * 6)]), __builtin_altivec_vsubfp(Th, Ti), ovs, &(xo[0]));
Tz = __builtin_altivec_vmaddfp(KP173648177, Tk, __builtin_altivec_vnmsubfp(KP296198132, To, __builtin_altivec_vnmsubfp(KP939692620, Tn, __builtin_altivec_vnmsubfp(KP852868531, Tl, Tj))));
TA = VBYI(__builtin_altivec_vsubfp(__builtin_altivec_vnmsubfp(KP342020143, Tn, __builtin_altivec_vnmsubfp(KP150383733, Tl, __builtin_altivec_vnmsubfp(KP984807753, Tk, VMUL(KP813797681, To)))), Ts));
ST(&(xo[(os * 7)]), __builtin_altivec_vsubfp(Tz, TA), ovs, &(xo[(os * 1)]));
ST(&(xo[(os * 2)]), __builtin_altivec_vaddfp(Tz, TA), ovs, &(xo[0]));
{
V Tr, Tx, Tw, Ty, Tq, Tv;
Tq = __builtin_altivec_vaddfp(Tm, Tp);
Tr = __builtin_altivec_vaddfp(Tj, Tq);
Tx = __builtin_altivec_vmaddfp(KP866025403, __builtin_altivec_vsubfp(Tt, Tu), __builtin_altivec_vnmsubfp(KP500000000, Tq, Tj));
Tv = __builtin_altivec_vaddfp(Tt, Tu);
Tw = VBYI(__builtin_altivec_vaddfp(Ts, Tv));
Ty = VBYI(__builtin_altivec_vaddfp(Ts, __builtin_altivec_vnmsubfp(KP500000000, Tv, VMUL(KP866025403, __builtin_altivec_vsubfp(Tp, Tm)))));
ST(&(xo[(os * 8)]), __builtin_altivec_vsubfp(Tr, Tw), ovs, &(xo[0]));
ST(&(xo[(os * 4)]), __builtin_altivec_vaddfp(Tx, Ty), ovs, &(xo[0]));
ST(&(xo[(os * 1)]), __builtin_altivec_vaddfp(Tw, Tr), ovs, &(xo[(os * 1)]));
ST(&(xo[(os * 5)]), __builtin_altivec_vsubfp(Tx, Ty), ovs, &(xo[(os * 1)]));
}
}
}
;
}
static const kdft_desc desc = { 9, "n1fv_9", {30, 10, 16, 0}, &fftwf_dft_n1fsimd_genus, 0, 0, 0, 0 };
void fftwf_codelet_n1fv_9 (planner *p) {
fftwf_kdft_register (p, n1fv_9, &desc);
}
Reply to: