[PATCH] Fir<> IPP cleanup

Wed Oct 26 00:05:27 UTC 2005

I have checked in the patch below.

Fir<> now uses IPP for types and modes it supports, and native C++
code otherwise.  Before, if IPP was turned on it would only support
types IPP supports -- e.g., not long double, or int.   It also 
avoids exposing user code to Intel-header definitions.

Nathan Myers
ncm

Index: ChangeLog
===================================================================
RCS file: /home/cvs/Repository/vpp/ChangeLog,v
retrieving revision 1.296
diff -u -p -r1.296 ChangeLog

--- ChangeLog	24 Oct 2005 13:25:30 -0000	1.296
+++ ChangeLog	25 Oct 2005 23:50:18 -0000
@@ -1,3 +1,10 @@
+2005-10-25  Nathan Myers  <ncm at codesourcery.com>
+
+	* src/vsip/impl/ipp.cpp, src/vsip/impl/signal-fir.hpp:
+	  Use native C++ FIR code for all types and modes not supported 
+	  by IPP FIR.  Confine Intel ipp*.h includes to ipp.cpp where
+	  users' code will not be exposed to them.
+
 2005-10-24  Nathan Myers  <ncm at codesourcery.com>
 
 	* configure.ac: fix help for "--enable-profile-timer".
Index: src/vsip/impl/ipp.cpp
===================================================================
RCS file: /home/cvs/Repository/vpp/src/vsip/impl/ipp.cpp,v
retrieving revision 1.5
diff -u -p -r1.5 ipp.cpp
--- src/vsip/impl/ipp.cpp	21 Sep 2005 09:38:59 -0000	1.5
+++ src/vsip/impl/ipp.cpp	25 Oct 2005 23:50:18 -0000
@@ -10,7 +10,13 @@
   Included Files
 ***********************************************************************/
 
-#include "ipp.hpp"
+#include <vsip/impl/acconfig.hpp>
+
+#if defined(VSIP_IMPL_HAVE_IPP)
+
+#include <vsip/math.hpp>
+#include <vsip/signal.hpp>
+#include <vsip/impl/ipp.hpp>
 #include <ipps.h>
 
 /***********************************************************************
@@ -192,7 +198,74 @@ void conv(double* coeff, length_type coe
   ippsConv_64f(coeff, coeff_size, in, in_size, out);
 }
 
+//
+// FIR support
+//
+
+template
+<
+  typename T, typename IppT,
+  IppStatus (&ippFirF)(IppT const*,IppT*,int,IppT const*,int,IppT*,int*),
+  IppStatus (&ippFirDecF)(
+    IppT const*,IppT*,int,IppT const*,int,int,int,int,int,IppT*)
+>
+struct Ipp_fir_base
+{
+  typedef Ipp_fir_base  base_type;
+
+  inline static void
+  run(
+    T const* xin, T* xout, vsip::length_type outsize,
+    T const* xkernel, vsip::length_type ksize,  
+    T* xstate, vsip::length_type* xstate_ix, vsip::length_type dec)
+  {
+    IppT const* const  in = reinterpret_cast<IppT const*>(xin);
+    IppT* const  out = reinterpret_cast<IppT*>(xout);
+    IppT const* const  kernel = reinterpret_cast<IppT const*>(xkernel);
+    IppT* const  state = reinterpret_cast<IppT*>(xstate);
+    int state_ix = *xstate_ix;
+    IppStatus stat = (dec == 1) ?
+      ippFirF(in, out, outsize, kernel, ksize, state, &state_ix) :
+      ippFirDecF(in, out, outsize, kernel, ksize, 1, 0, dec, 0, state);
+    assert(stat == ippStsNoErr);
+    *xstate_ix = state_ix;
+  }
+};
+
+template <typename T> struct Ipp_fir;
+
+template<> struct Ipp_fir<float> : Ipp_fir_base<
+  float,Ipp32f,ippsFIR_Direct_32f,ippsFIRMR_Direct_32f> { };
+
+template<> struct Ipp_fir<double> : Ipp_fir_base<
+  double,Ipp64f,ippsFIR_Direct_64f,ippsFIRMR_Direct_64f> {};
+
+template<> struct Ipp_fir<std::complex<float> > : Ipp_fir_base<
+  std::complex<float>,Ipp32fc,ippsFIR_Direct_32fc,ippsFIRMR_Direct_32fc> {};
+
+template<> struct Ipp_fir<std::complex<double> > : Ipp_fir_base<
+  std::complex<double>,Ipp64fc,ippsFIR_Direct_64fc,ippsFIRMR_Direct_64fc> {};
+
+template <typename T>
+void
+Ipp_fir_driver<T>::run_fir(
+      T const* xin, T* xout, vsip::length_type outsize,
+      T const* xkernel, vsip::length_type ksize,  
+      T* xstate, vsip::length_type* xstate_ix, vsip::length_type dec)
+{
+  Ipp_fir<T>::run(
+    xin, xout, outsize, xkernel, ksize, xstate, xstate_ix, dec);   
+}
+
+// instantiate the specialized IPP FIR drivers here, along with what they use.
+
+template struct Ipp_fir_driver<float>;
+template struct Ipp_fir_driver<double>;
+template struct Ipp_fir_driver<std::complex<float> >;
+template struct Ipp_fir_driver<std::complex<double> >;
+
 } // namespace vsip::impl::ipp
 } // namespace vsip::impl
 } // namespace vsip
 
+#endif /* VSIP_IMPL_HAVE_IPP */
Index: src/vsip/impl/signal-fir.hpp
===================================================================
RCS file: /home/cvs/Repository/vpp/src/vsip/impl/signal-fir.hpp,v
retrieving revision 1.4
diff -u -p -r1.4 signal-fir.hpp
--- src/vsip/impl/signal-fir.hpp	24 Oct 2005 13:25:30 -0000	1.4
+++ src/vsip/impl/signal-fir.hpp	25 Oct 2005 23:50:18 -0000
@@ -19,13 +19,6 @@
 #include <vsip/impl/global_map.hpp>
 #include <vsip/impl/profile.hpp>
 
-#if VSIP_IMPL_HAVE_IPP
-#include <vsip/impl/ipp.hpp>
-#include <ipps.h>
-#endif
-
-#include <new>
-
 namespace vsip
 {
 
@@ -48,49 +41,54 @@ struct Fir_aligned
     block_type;
 };
 
-#if VSIP_IMPL_HAVE_IPP
-
-template
-<
-  typename T, typename IppT,
-  IppStatus (&ippFirF)(IppT const*,IppT*,int,IppT const*,int,IppT*,int*),
-  IppStatus (&ippFirDecF)(
-    IppT const*,IppT*,int,IppT const*,int,int,int,int,int,IppT*)
->
-struct Ipp_fir_driver_base
+template <typename T>
+struct Fir_driver
 {
+  static const bool  reverse_kernel = true;
+  static const bool  use_native = true;
+  static const bool  mismatch_ok = true;
+
+  // code that calls this should be elided by the optimizer.
   static void
   run_fir(
-    T const* xin, T* xout, vsip::length_type outsize,
-    T const* xkernel, vsip::length_type ksize,  
-    T* xstate, vsip::length_type* xstate_ix, vsip::length_type dec)
-  {
-    IppT const* const  in = reinterpret_cast<IppT const*>(xin);
-    IppT* const  out = reinterpret_cast<IppT*>(xout);
-    IppT const* const  kernel = reinterpret_cast<IppT const*>(xkernel);
-    IppT* const  state = reinterpret_cast<IppT*>(xstate);
-    int state_ix = *xstate_ix;
-    IppStatus stat = (dec == 1) ?
-      ippFirF(in, out, outsize, kernel, ksize, state, &state_ix) :
-      ippFirDecF(in, out, outsize, kernel, ksize, 1, 0, dec, 0, state);
-    assert(stat == ippStsNoErr);
-    *xstate_ix = state_ix;
-  }
+      T const* xin, T* xout, vsip::length_type outsize,
+      T const* xkernel, vsip::length_type ksize,  
+      T* xstate, vsip::length_type* xstate_ix, vsip::length_type dec)
+    { assert(false); }
 };
 
-template<typename T> struct Ipp_fir_driver;
+#if VSIP_IMPL_HAVE_IPP
 
-template < > struct Ipp_fir_driver<float> : Ipp_fir_driver_base<
-  float,Ipp32f,ippsFIR_Direct_32f,ippsFIRMR_Direct_32f> { };
+namespace ipp
+{
+
+template <typename T>
+struct Ipp_fir_driver
+{
+  static const bool  reverse_kernel = false;
+  static const bool  use_native = false;
+  static const bool  mismatch_ok = false;
+
+  // same API as in Fir_driver, but implemented in src/vsip/impl/ipp.cpp
+  static void
+  run_fir(
+      T const* xin, T* xout, vsip::length_type outsize,
+      T const* xkernel, vsip::length_type ksize,  
+      T* xstate, vsip::length_type* xstate_ix, vsip::length_type dec);
+};
 
-template<> struct Ipp_fir_driver<double> : Ipp_fir_driver_base<
-  double,Ipp64f,ippsFIR_Direct_64f,ippsFIRMR_Direct_64f> {};
+} // namespace vsip::impl::ipp
 
-template<> struct Ipp_fir_driver<std::complex<float> > : Ipp_fir_driver_base<
-  std::complex<float>,Ipp32fc,ippsFIR_Direct_32fc,ippsFIRMR_Direct_32fc> {};
+// use IPP specialization for certain T:
 
-template<> struct Ipp_fir_driver<std::complex<double> > : Ipp_fir_driver_base<
-  std::complex<double>,Ipp64fc,ippsFIR_Direct_64fc,ippsFIRMR_Direct_64fc> {};
+template<> struct Fir_driver<float>
+  : ipp::Ipp_fir_driver<float> {};
+template<> struct Fir_driver<double>
+  : ipp::Ipp_fir_driver<double> {};
+template<> struct Fir_driver<std::complex<float> >
+  : ipp::Ipp_fir_driver<std::complex<float> > {};
+template<> struct Fir_driver<std::complex<double> >
+  : ipp::Ipp_fir_driver<std::complex<double> > {};
 
 #endif // VSIP_IMPL_HAVE_IPP
 
@@ -147,18 +145,10 @@ public:
     // must be after asserts because of division
     this->output_size_ = (input_size + decimation - 1) / decimation;
 
-#if VSIP_IMPL_HAVE_IPP
     // use IPP only if decimation is a factor of input size.
-    if (this->output_size_ * decimation == this->input_size_)
-    {
-      // IPP doesn't want it reversed.
-      this->kernel_(vsip::Domain<1>(kernel.size())) = kernel;
-      if (symV != vsip::nonsym)
-	this->kernel_(vsip::Domain<1>(
-	   this->kernel_.size() - 1, -1, kernel.size())) = kernel;
-    }
-    else
-#endif
+    if (impl::Fir_driver<T>::reverse_kernel || 
+        (!impl::Fir_driver<T>::mismatch_ok && 
+          this->output_size_ * decimation != this->input_size_))
     {
       // mirror the kernel
       unsigned const ksz = kernel.size();
@@ -167,6 +157,14 @@ public:
       if (symV != vsip::nonsym)
 	this->kernel_(vsip::Domain<1>(ksz)) = kernel;
     }
+    else
+    {
+      // e.g. IPP doesn't want it reversed.
+      this->kernel_(vsip::Domain<1>(kernel.size())) = kernel;
+      if (symV != vsip::nonsym)
+	this->kernel_(vsip::Domain<1>(
+	   this->kernel_.size() - 1, -1, kernel.size())) = kernel;
+    }
   }
 
   // FIXME: spec says this should be nothrow, but it has to allocate
@@ -240,7 +238,9 @@ public:
 #if VSIP_IMPL_HAVE_IPP
     
     // use IPP only if decimation is a factor of input size.
-    if (this->input_size_ == this->output_size_ * dec)
+    if (!impl::Fir_driver<T>::use_native &&
+        (impl::Fir_driver<T>::mismatch_ok ||
+          this->input_size_ == this->output_size_ * dec))
     {
       typedef impl::Layout<1,vsip::tuple<0,1,2>,
 	  impl::Stride_unit,impl::Cmplx_inter_fmt>  layout_type;
@@ -254,7 +254,7 @@ public:
       impl::Ext_data<block_type,layout_type>  raw_state(this->state_.block());
       oix = (this->input_size_ - skip + dec - 1) / dec;
       
-      impl::Ipp_fir_driver<T>::run_fir(raw_in.data(), raw_out.data(), oix,
+      impl::Fir_driver<T>::run_fir(raw_in.data(), raw_out.data(), oix,
 	raw_kernel.data(), m + 1, raw_state.data(), &this->state_saved_, dec);
 
       if (useOldState != state_save)
@@ -304,7 +304,7 @@ public:
 
   void reset()  VSIP_NOTHROW
     { this->state_saved_ = this->skip_ = 0;
-      this->state_ = T(0.0); }
+      this->state_ = T(0); }
 
 public: