[patch] SSE on amd64, other config hax

Nathan (Jasper) Myers ncm at codesourcery.com
Wed Dec 21 05:04:38 UTC 2005


Patch below makes FFTW3 use SSE/SSE2 on x86-64, and cleans up some
config file and minor build details.  OK?

Nathan Myers
ncm

Index: ChangeLog
===================================================================
RCS file: /home/cvs/Repository/vpp/ChangeLog,v
retrieving revision 1.344
diff -u -p -r1.344 ChangeLog
--- ChangeLog	21 Dec 2005 04:04:36 -0000	1.344
+++ ChangeLog	21 Dec 2005 05:00:45 -0000
@@ -1,3 +1,11 @@
+2005-12-20  Nathan Myers  <ncm at codesourcery.com>
+
+	* configure.ac, vendor/fftw/simd/sse.c, vendor/fftw/simd/sse2.c:
+	  enable using SSE/SSE2 on x86-64.
+	* vendor/GNUmakefile.inc.in: improve build status reports. 
+	* configure.ac, GNUmakefile.in: rearrange -I, -L so compiler will
+	  find internal includes & libs first, installed second.
+
 2005-12-20  Stefan Seefeld  <stefan at codesourcery.com>
 
 	* synopsis.py.in: Additional code not yet part of the last (0.8) release.
Index: GNUmakefile.in
===================================================================
RCS file: /home/cvs/Repository/vpp/GNUmakefile.in,v
retrieving revision 1.32
diff -u -p -r1.32 GNUmakefile.in
--- GNUmakefile.in	20 Dec 2005 17:10:34 -0000	1.32
+++ GNUmakefile.in	21 Dec 2005 05:00:45 -0000
@@ -58,13 +58,13 @@ CXXINCLUDES := -I src -I $(srcdir)/src
 # C++ macro definitions.
 CXXDEFS :=
 # C++ preprocessor flags.
-CXXCPPFLAGS := $(CXXINCLUDES) $(CXXDEFS) @CPPFLAGS@ @INT_CPPFLAGS@
+CXXCPPFLAGS := $(CXXINCLUDES) $(CXXDEFS) @INT_CPPFLAGS@ @CPPFLAGS@
 # C++ compilation flags.
 CXXFLAGS := $(CXXCPPFLAGS) @CXXFLAGS@
 # The extension for executable programs.
 EXEEXT := @EXEEXT@
 # Linker flags.
-LDFLAGS := @LDFLAGS@ @INT_LDFLAGS@
+LDFLAGS := @INT_LDFLAGS@ @LDFLAGS@
 # Libraries to link to.
 LIBS := @LIBS@
 # The extension for object files.
Index: configure.ac
===================================================================
RCS file: /home/cvs/Repository/vpp/configure.ac,v
retrieving revision 1.60
diff -u -p -r1.60 configure.ac
--- configure.ac	20 Dec 2005 18:50:29 -0000	1.60
+++ configure.ac	21 Dec 2005 05:00:45 -0000
@@ -456,11 +456,9 @@ if test "$build_fftw3" != "no"; then
     fftw3_d_simd=
     fftw3_l_simd=
     case "$host_cpu" in
-      (ia32|i686)        fftw3_f_simd="--enable-sse"
+      (ia32|i686|x86_64) fftw3_f_simd="--enable-sse"
 	                 fftw3_d_simd="--enable-sse2" 
 	                 ;;
-      (x86_64)           fftw3_d_simd=""
-	                 ;;
       (ppc*)             fftw3_f_simd="--enable-altivec" ;;
     esac
     AC_MSG_NOTICE([fftw3 config options: $fftw3_opts $fftw3_simd.])
@@ -523,11 +521,11 @@ if test "$build_fftw3" != "no"; then
   # fail).  Instead we add them to LATE_LIBS, which gets added to
   # LIBS just before AC_OUTPUT.
 
-  LATE_LIBS="$LATE_LIBS $FFTW3_LIBS"
-  INT_CPPFLAGS="$INT_CPPFLAGS -I$curdir/vendor/fftw/include"
-  INT_LDFLAGS="$INT_LDFLAGS -L$curdir/vendor/fftw/lib"
-  CPPFLAGS="$CPPFLAGS -I$includedir/fftw3"
-  LDFLAGS="$LDFLAGS -L$libdir/fftw3"
+  LATE_LIBS="$FFTW3_LIBS $LATE_LIBS"
+  INT_CPPFLAGS="-I$curdir/vendor/fftw/include $INT_CPPFLAGS"
+  INT_LDFLAGS="-L$curdir/vendor/fftw/lib $INT_LDFLAGS"
+  CPPFLAGS="-I$includedir/fftw3 $CPPFLAGS"
+  LDFLAGS="-L$libdir/fftw3 $LDFLAGS"
 fi
 
 if test "$enable_fftw2" != "no" ; then
@@ -1083,12 +1081,12 @@ if test "$with_lapack" != "no"; then
 	# fail).  Instead we add them to LATE_LIBS, which gets added to
 	# LIBS just before AC_OUTPUT.
 
-        LATE_LIBS="$LATE_LIBS -lcsl_lapack -lcsl_cblas -lcsl_f77blas -lcsl_atlas $use_g2c"
+        LATE_LIBS="-lcsl_lapack -lcsl_cblas -lcsl_f77blas -lcsl_atlas $use_g2c $LATE_LIBS"
 
-	INT_CPPFLAGS="$INT_CPPFLAGS -I$my_abs_top_srcdir/vendor/atlas/include"
-	INT_LDFLAGS="$INT_LDFLAGS -L$curdir/vendor/atlas/lib"
-        CPPFLAGS="$keep_CPPFLAGS -I$includedir/atlas"
-        LDFLAGS="$keep_LDFLAGS -L$libdir/atlas"
+	INT_CPPFLAGS="-I$my_abs_top_srcdir/vendor/atlas/include $INT_CPPFLAGS"
+	INT_LDFLAGS="-L$curdir/vendor/atlas/lib $INT_LDFLAGS"
+        CPPFLAGS="-I$includedir/atlas $keep_CPPFLAGS"
+        LDFLAGS="-L$libdir/atlas $keep_LDFLAGS"
         LIBS="$keep_LIBS"
         lapack_use_ilaenv=0
         cblas_style="1"	# use cblas.h
@@ -1231,7 +1229,7 @@ AC_CHECK_PROGS(XEP, xep)
 AC_PROG_INSTALL
 
 # "Late" variables
-LIBS="$LIBS $LATE_LIBS"
+LIBS="$LATE_LIBS $LIBS"
 AC_SUBST(INT_LDFLAGS)
 AC_SUBST(INT_CPPFLAGS)
 
Index: vendor/GNUmakefile.inc.in
===================================================================
RCS file: /home/cvs/Repository/vpp/vendor/GNUmakefile.inc.in,v
retrieving revision 1.3
diff -u -p -r1.3 GNUmakefile.inc.in
--- vendor/GNUmakefile.inc.in	20 Dec 2005 18:50:30 -0000	1.3
+++ vendor/GNUmakefile.inc.in	21 Dec 2005 05:00:45 -0000
@@ -42,11 +42,11 @@ all:: $(vendor_LIBS)
 libs:: $(vendor_LIBS)
 
 $(vendor_ATLAS_LIBS):
-	@echo "Building ATLAS (atlas.build.log)"
+	@echo "Building ATLAS (see atlas.build.log)"
 	@$(MAKE) -C vendor/atlas build > atlas.build.log 2>&1
 
 $(vendor_REF_LAPACK):
-	@echo "Building LAPACK (lapack.build.log)"
+	@echo "Building LAPACK (see lapack.build.log)"
 	@$(MAKE) -C vendor/lapack/SRC all > lapack.build.log 2>&1
 
 $(vendor_USE_LAPACK): $(vendor_PRE_LAPACK) $(vendor_REF_LAPACK)
@@ -57,11 +57,11 @@ $(vendor_USE_LAPACK): $(vendor_PRE_LAPAC
 	rm -rf vendor/atlas/lib/tmp
 
 clean::
-	@echo "Cleaning ATLAS (atlas.clean.log)"
+	@echo "Cleaning ATLAS (see atlas.clean.log)"
 	@$(MAKE) -C vendor/atlas clean > atlas.clean.log 2>&1
 
 install::
-	@echo "Installing ATLAS (atlas.install.log)"
+	@echo "Installing ATLAS (see atlas.install.log)"
 	# @$(MAKE) -C vendor/atlas installinstall > atlas.install.log 2>&1
 	$(INSTALL) -d $(libdir)/atlas
 	$(INSTALL_DATA) vendor/atlas/lib/libcsl_atlas.a   $(libdir)/atlas
@@ -84,28 +84,28 @@ all:: $(vendor_FFTW_LIBS)
 libs:: $(vendor_FFTW_LIBS)
 
 vendor/fftw3f/libfftw3f-csl.a:
-	@echo "Building FFTW float (fftw-f.build.log)"
+	@echo "Building FFTW float (see fftw-f.build.log)"
 	@$(MAKE) -C vendor/fftw3f > fftw-f.build.log 2>&1
 	mv vendor/fftw3f/.libs/libfftw3f.a vendor/fftw3f/libfftw3f-csl.a
 
 vendor/fftw3/libfftw3-csl.a:
-	@echo "Building FFTW double (fftw-d.build.log)"
+	@echo "Building FFTW double (see fftw-d.build.log)"
 	@$(MAKE) -C vendor/fftw3 > fftw-d.build.log 2>&1
 	mv vendor/fftw3/.libs/libfftw3.a vendor/fftw3/libfftw3-csl.a
 
 vendor/fftw3l/libfftw3l-csl.a:
-	@echo "Building FFTW double (fftw-l.build.log)"
+	@echo "Building FFTW long double (see fftw-l.build.log)"
 	@$(MAKE) -C vendor/fftw3l > fftw-l.build.log 2>&1
 	mv vendor/fftw3l/.libs/libfftw3l.a vendor/fftw3l/libfftw3l-csl.a
 
 clean::
-	@echo "Cleaning FFTW (fftw.clean.log)"
+	@echo "Cleaning FFTW (see fftw.clean.log)"
 	@$(MAKE) -C vendor/fftw3f clean  > fftw.clean.log 2>&1
 	@$(MAKE) -C vendor/fftw3  clean >> fftw.clean.log 2>&1
 	@$(MAKE) -C vendor/fftw3l clean >> fftw.clean.log 2>&1
 
 install::
-	@echo "Installing FFTW (fftw.install.log)"
+	@echo "Installing FFTW (see fftw.install.log)"
 	$(INSTALL) -d $(libdir)/fftw3
 	$(INSTALL_DATA) vendor/fftw3f/libfftw3f-csl.a  $(libdir)/fftw3
 	$(INSTALL_DATA) vendor/fftw3/libfftw3-csl.a    $(libdir)/fftw3
Index: vendor/fftw/simd/sse.c
===================================================================
RCS file: /home/cvs/Repository/fftw/simd/sse.c,v
retrieving revision 1.1.1.1
diff -u -p -r1.1.1.1 sse.c
--- vendor/fftw/simd/sse.c	1 Dec 2005 10:33:03 -0000	1.1.1.1
+++ vendor/fftw/simd/sse.c	21 Dec 2005 05:00:46 -0000
@@ -40,6 +40,13 @@ static inline int cpuid_edx(int op)
           pop ebx
      }
      return ret;
+#elif defined(__x86_64__)
+     int rax, rcx, rdx;
+
+     __asm__("pushq %%rbx\n\tcpuid\n\tpopq %%rbx"
+	     : "=a" (rax), "=c" (rcx), "=d" (rdx)
+	     : "a" (op));
+     return rdx;
 #else
      int eax, ecx, edx;
 
Index: vendor/fftw/simd/sse2.c
===================================================================
RCS file: /home/cvs/Repository/fftw/simd/sse2.c,v
retrieving revision 1.1.1.1
diff -u -p -r1.1.1.1 sse2.c
--- vendor/fftw/simd/sse2.c	1 Dec 2005 10:33:03 -0000	1.1.1.1
+++ vendor/fftw/simd/sse2.c	21 Dec 2005 05:00:46 -0000
@@ -40,6 +40,13 @@ static inline int cpuid_edx(int op)
           pop ebx
      }
      return ret;
+#elif defined(__x86_64__)
+     int rax, rcx, rdx;
+
+     __asm__("pushq %%rbx\n\tcpuid\n\tpopq %%rbx"
+	     : "=a" (rax), "=c" (rcx), "=d" (rdx)
+	     : "a" (op));
+     return rdx;
 #else
      int eax, ecx, edx;
 



More information about the vsipl++ mailing list