[PATCH] Use IPP for Fir<>
Nathan (Jasper) Myers
ncm at codesourcery.com
Thu Oct 13 10:44:41 UTC 2005
I have checked in the patch below.
It makes vsip::Fir<> use IPP's FIR support where possible. In practice,
that means whenever block size and decimation are not relatively prime.
(IPP produces bad output when they are. The IPP API seems to make it
impossible, so it amounts to an IPP documentation bug.) Fir<> uses
the native C++ implementation for such cases. They are probably rare
in real programs.
The spec says the copy constructor Fir(Fir const&) is supposed to
be VSIP_NOTHROW, but it seems to me that to implement it safely, it
needs to do allocation. I declared it VSIP_THROW((std::bad_alloc)).
The no-macro method used here to adapt to IPP's version of overloading
is similar to that in fft-core.hpp, and seems practical for general
use.
Nathan Myers
ncm
Index: ChangeLog
===================================================================
RCS file: /home/cvs/Repository/vpp/ChangeLog,v
retrieving revision 1.292
retrieving revision 1.293
diff -u -p -r1.292 -r1.293
--- ChangeLog 12 Oct 2005 13:40:17 -0000 1.292
+++ ChangeLog 13 Oct 2005 10:23:34 -0000 1.293
@@ -1,3 +1,8 @@
+2005-10-13 Nathan Myers <ncm at codesourcery.com>
+
+ * src/vsip/impl/signal-fir.hpp: use IPP FIR support where available.
+ * tests/fir.cpp: forgive FFT noise on big samples.
+
2005-10-12 Jules Bergmann <jules at codesourcery.com>
* configure.ac (--with-atlas-prefix, --with-atlas-libdir): New
Index: src/vsip/impl/signal-fir.hpp
===================================================================
RCS file: /home/cvs/Repository/vpp/src/vsip/impl/signal-fir.hpp,v
retrieving revision 1.2
retrieving revision 1.3
diff -u -p -r1.2 -r1.3
--- src/vsip/impl/signal-fir.hpp 10 Oct 2005 06:33:40 -0000 1.2
+++ src/vsip/impl/signal-fir.hpp 13 Oct 2005 10:23:34 -0000 1.3
@@ -19,6 +19,11 @@
#include <vsip/impl/global_map.hpp>
#include <vsip/impl/profile.hpp>
+#if VSIP_IMPL_HAVE_IPP
+#include <vsip/impl/ipp.hpp>
+#include <ipps.h>
+#endif
+
#include <new>
namespace vsip
@@ -43,6 +48,52 @@ struct Fir_aligned
block_type;
};
+#if VSIP_IMPL_HAVE_IPP
+
+template
+<
+ typename T, typename IppT,
+ IppStatus (&ippFirF)(IppT const*,IppT*,int,IppT const*,int,IppT*,int*),
+ IppStatus (&ippFirDecF)(
+ IppT const*,IppT*,int,IppT const*,int,int,int,int,int,IppT*)
+>
+struct Ipp_fir_driver_base
+{
+ static void
+ run_fir(
+ T const* xin, T* xout, vsip::length_type outsize,
+ T const* xkernel, vsip::length_type ksize,
+ T* xstate, vsip::length_type* xstate_ix, vsip::length_type dec)
+ {
+ IppT const* const in = reinterpret_cast<IppT const*>(xin);
+ IppT* const out = reinterpret_cast<IppT*>(xout);
+ IppT const* const kernel = reinterpret_cast<IppT const*>(xkernel);
+ IppT* const state = reinterpret_cast<IppT*>(xstate);
+ int state_ix = *xstate_ix;
+ IppStatus stat = (dec == 1) ?
+ ippFirF(in, out, outsize, kernel, ksize, state, &state_ix) :
+ ippFirDecF(in, out, outsize, kernel, ksize, 1, 0, dec, 0, state);
+ assert(stat == ippStsNoErr);
+ *xstate_ix = state_ix;
+ }
+};
+
+template<typename T> struct Ipp_fir_driver;
+
+template < > struct Ipp_fir_driver<float> : Ipp_fir_driver_base<
+ float,Ipp32f,ippsFIR_Direct_32f,ippsFIRMR_Direct_32f> { };
+
+template<> struct Ipp_fir_driver<double> : Ipp_fir_driver_base<
+ double,Ipp64f,ippsFIR_Direct_64f,ippsFIRMR_Direct_64f> {};
+
+template<> struct Ipp_fir_driver<std::complex<float> > : Ipp_fir_driver_base<
+ std::complex<float>,Ipp32fc,ippsFIR_Direct_32fc,ippsFIRMR_Direct_32fc> {};
+
+template<> struct Ipp_fir_driver<std::complex<double> > : Ipp_fir_driver_base<
+ std::complex<double>,Ipp64fc,ippsFIR_Direct_64fc,ippsFIRMR_Direct_64fc> {};
+
+#endif // VSIP_IMPL_HAVE_IPP
+
} // namespace impl
///////////////////////////////////////////////////////////////////
@@ -63,22 +114,27 @@ class Fir
public:
static const vsip::symmetry_type symmetry = symV;
static const vsip::obj_state continuous_filter = useOldState;
-
+ typedef typename impl::Fir_aligned<T>::block_type block_type;
+
template <typename BlockT>
Fir(
vsip::const_Vector<T,BlockT> kernel,
vsip::length_type input_size,
vsip::length_type decimation = 1)
VSIP_THROW((std::bad_alloc))
- : input_size_(input_size),
- order_(kernel.size() * (1 + (symV != vsip::nonsym)) -
- (symV == vsip::sym_even_len_odd) - 1),
- decimation_(decimation),
- skip_(0),
- state_saved_(0),
- op_calls_(0),
- state_(this->order_, T(0)),
- kernel_(this->order_ + 1)
+ : input_size_(input_size)
+ , order_(kernel.size() * (1 + (symV != vsip::nonsym)) -
+ (symV == vsip::sym_even_len_odd) - 1)
+ , decimation_(decimation)
+ , skip_(0)
+ , op_calls_(0)
+ , kernel_(this->order_ + 1)
+ , state_(2 * (this->order_ + 1), T(0)) // IPP wants 2x.
+ , state_saved_(0)
+#if VSIP_IMPL_HAVE_IPP
+ , temp_in_(this->input_size_)
+ , temp_out_(this->input_size_)
+#endif
{
assert(input_size > 0);
assert(this->order_ + 1 > 1); // counter unsigned wraparound
@@ -89,14 +145,58 @@ public:
// must be after asserts because of division
this->output_size_ = (input_size + decimation - 1) / decimation;
- // mirror the kernel
- unsigned const ksz = kernel.size();
- this->kernel_(vsip::Domain<1>(this->order_, -1, ksz)) = kernel;
- // and maybe unmirror a copy, too
- if (symV != vsip::nonsym)
- this->kernel_(vsip::Domain<1>(ksz)) = kernel;
+#if VSIP_IMPL_HAVE_IPP
+ // use IPP only if decimation is a factor of input size.
+ if (this->output_size_ * decimation == this->input_size_)
+ {
+ // IPP doesn't want it reversed.
+ this->kernel_(vsip::Domain<1>(kernel.size())) = kernel;
+ if (symV != vsip::nonsym)
+ this->kernel_(vsip::Domain<1>(
+ this->kernel_.size() - 1, -1, kernel.size())) = kernel;
+ }
+ else
+#endif
+ {
+ // mirror the kernel
+ unsigned const ksz = kernel.size();
+ this->kernel_(vsip::Domain<1>(this->order_, -1, ksz)) = kernel;
+ // and maybe unmirror a copy, too
+ if (symV != vsip::nonsym)
+ this->kernel_(vsip::Domain<1>(ksz)) = kernel;
+ }
+ }
+
+ // FIXME: spec says this should be nothrow, but it has to allocate
+ Fir(Fir const& fir) VSIP_THROW((std::bad_alloc))
+ : input_size_(fir.input_size_)
+ , order_(fir.order_)
+ , decimation_(fir.decimation_)
+ , skip_(fir.skip_)
+ , op_calls_(0)
+ , kernel_(fir.kernel_)
+ , state_(fir.state_(vsip::Domain<1>(fir.state_.size()))) // actual copy
+ , state_saved_(fir.state_saved_)
+#if VSIP_IMPL_HAVE_IPP
+ , temp_in_(this->input_size_) // allocate
+ , temp_out_(this->input_size_) // allocate
+#endif
+ { }
+
+ Fir& operator=(Fir const& fir) VSIP_NOTHROW
+ {
+ assert(this->input_size_ == fir.input_size_);
+ assert(this->order_ == fir.order_);
+ assert(this->decimation_ = fir.decimation_);
+ this->skip_ = fir.skip_;
+ this->op_calls_ = 0;
+ this->kernel_ = fir.kernel_;
+ this->state_ = fir.state_;
+ this->state_saved_ = fir.state_saved_;
}
+ ~Fir() VSIP_NOTHROW {}
+
vsip::length_type kernel_size() const VSIP_NOTHROW
{ return this->order_ + 1; }
vsip::length_type filter_order() const VSIP_NOTHROW
@@ -119,6 +219,7 @@ public:
vsip::Vector<T, Block1> out)
VSIP_NOTHROW
{
+ ++ this->op_calls_;
assert(data.size() == this->input_size_);
assert(out.size() == this->output_size_);
@@ -131,52 +232,78 @@ public:
const len_type saved = this->state_saved_;
len_type oix = 0;
len_type i = 0;
- for (; i < m - skip; ++oix, i += dec)
- {
- // Conceptually this comes second, but it's more convenient
- // to put it here. So, read the second statement below first.
- T sum = vsip::dot(
- this->kernel_(d_type(m - skip - i, 1, i + skip + 1)),
- data(d_type(i + skip + 1)));
-
- if (useOldState == vsip::state_save && i < saved)
- {
- sum += vsip::dot(
- this->kernel_(d_type(saved - i)),
- this->state_(d_type(i, 1, saved - i)));
- }
- out.put(oix, sum);
- }
-
- const len_type isz = this->input_size_;
- len_type start = i - (m - skip);
- for ( ; start + m < isz; ++oix, start += dec)
+#if VSIP_IMPL_HAVE_IPP
+
+ // use IPP only if decimation is a factor of input size.
+ if (this->input_size_ == this->output_size_ * dec)
{
- T sum = vsip::dot(this->kernel_, data(d_type(start, 1, m + 1)));
- out.put(oix, sum);
+ typedef impl::Layout<1,vsip::tuple<0,1,2>,
+ impl::Stride_unit,impl::Cmplx_inter_fmt> layout_type;
+ impl::Ext_data<Block0,layout_type> raw_in(
+ data.block(), impl::SYNC_IN,
+ impl::Ext_data<block_type>(this->temp_in_.block()).data());
+ impl::Ext_data<Block1,layout_type> raw_out(
+ out.block(), impl::SYNC_OUT,
+ impl::Ext_data<block_type>(this->temp_out_.block()).data());
+ impl::Ext_data<block_type,layout_type> raw_kernel(this->kernel_.block());
+ impl::Ext_data<block_type,layout_type> raw_state(this->state_.block());
+ oix = (this->input_size_ - skip + dec - 1) / dec;
+
+ impl::Ipp_fir_driver<T>::run_fir(raw_in.data(), raw_out.data(), oix,
+ raw_kernel.data(), m + 1, raw_state.data(), &this->state_saved_, dec);
+
+ if (useOldState != state_save)
+ this->reset();
}
+ else
+
+#endif
- if (useOldState == state_save)
{
- this->skip_ = start + m - isz;
- const len_type new_save = isz - start;
- this->state_saved_ = new_save;
- this->state_(d_type(new_save)) = data(d_type(start, 1, new_save));
+ for (; i < m - skip; ++oix, i += dec)
+ {
+ // Conceptually this comes second, but it's more convenient
+ // to put it here. So, read the second statement below first.
+ T sum = vsip::dot(
+ this->kernel_(d_type(m - skip - i, 1, i + skip + 1)),
+ data(d_type(i + skip + 1)));
+
+ if (useOldState == vsip::state_save && i < saved)
+ {
+ sum += vsip::dot(
+ this->kernel_(d_type(saved - i)),
+ this->state_(d_type(i, 1, saved - i)));
+ }
+
+ out.put(oix, sum);
+ }
+
+ const len_type isz = this->input_size_;
+ len_type start = i - (m - skip);
+ for ( ; start + m < isz; ++oix, start += dec)
+ {
+ T sum = vsip::dot(this->kernel_, data(d_type(start, 1, m + 1)));
+ out.put(oix, sum);
+ }
+
+ if (useOldState == state_save)
+ {
+ this->skip_ = start + m - isz;
+ const len_type new_save = isz - start;
+ this->state_saved_ = new_save;
+ this->state_(d_type(new_save)) = data(d_type(start, 1, new_save));
+ }
}
- ++ this->op_calls_;
return oix;
}
void reset() VSIP_NOTHROW
{ this->state_saved_ = this->skip_ = 0; }
- ~Fir()
- { }
-
public:
- float impl_performance(char* what) const
+ float impl_performance(char* what) const VSIP_NOTHROW
{
if (!strcmp(what, "mflops"))
{
@@ -199,11 +326,14 @@ private:
vsip::length_type order_; // M in the spec
vsip::length_type decimation_;
vsip::length_type skip_; // how much of next input to skip
- vsip::length_type state_saved_; // number of elements saved
unsigned long op_calls_;
- vsip::Vector<T,typename impl::Fir_aligned<T>::block_type> state_;
vsip::Vector<T,typename impl::Fir_aligned<T>::block_type> kernel_;
-
+ vsip::Vector<T,typename impl::Fir_aligned<T>::block_type> state_;
+ vsip::length_type state_saved_; // number of elements saved
+#if VSIP_IMPL_HAVE_IPP
+ vsip::Vector<T,typename impl::Fir_aligned<T>::block_type> temp_in_;
+ vsip::Vector<T,typename impl::Fir_aligned<T>::block_type> temp_out_;
+#endif
impl::profile::Acc_timer timer_;
};
Index: tests/fir.cpp
===================================================================
RCS file: /home/cvs/Repository/vpp/tests/fir.cpp,v
retrieving revision 1.2
retrieving revision 1.3
diff -u -p -r1.2 -r1.3
--- tests/fir.cpp 10 Oct 2005 06:33:40 -0000 1.2
+++ tests/fir.cpp 13 Oct 2005 10:23:34 -0000 1.3
@@ -28,6 +28,39 @@
Definitions
***********************************************************************/
+
+template <typename T, typename BlockT>
+double
+error_db(
+ vsip::const_Vector<T,BlockT> v1,
+ vsip::const_Vector<T,BlockT> v2)
+{
+ double refmax = 0.0;
+ double maxsum = -250;
+ double sum;
+
+ vsip::Index<1> idx;
+
+ refmax = vsip::maxval(vsip::magsq(v1), idx);
+
+ for (vsip::index_type i=0; i<v1.size(); ++i)
+ {
+ double val = vsip::magsq(v1.get(i) - v2.get(i));
+
+ if (val < 1.e-20)
+ sum = -201.;
+ else
+ sum = 10.0 * log10(val/(2.0*refmax));
+
+ if (sum > maxsum)
+ maxsum = sum;
+ }
+
+ return maxsum;
+}
+
+
+
template <typename T, vsip::symmetry_type sym>
void
test_fir(
@@ -70,7 +103,7 @@ test_fir(
assert(fir2.symmetry == sym);
assert(fir1a.continuous_filter == vsip::state_save);
assert(fir2.continuous_filter == vsip::state_no_save);
-
+
const vsip::length_type order = (sym == vsip::nonsym) ? M :
(sym == vsip::sym_even_len_even) ? 2 * M : (2 * M) - 1;
assert(fir1a.kernel_size() == order);
@@ -90,6 +123,8 @@ test_fir(
output1(vsip::Domain<1>(got, 1, (N + D - 1) / D)));
}
+ // vsip::Vector<T> o1(output1.size(), T(0));
+ // o1 = convout(vsip::Domain<1>(output1.size())) - output1;
vsip::length_type got1b = 0;
vsip::length_type got2 = 0;
@@ -104,11 +139,26 @@ test_fir(
vsip::Vector<T> reference(convout(vsip::Domain<1>(got)));
vsip::Vector<T> result(output1(vsip::Domain<1>(got)));
+
assert(outsize - got <= 1);
- assert(vsip::alltrue(result == reference));
+ if (got > 256)
+ {
+ double error = error_db(result, reference);
+ assert(error < -100);
+ }
+ else
+ assert(view_equal(result, reference));
+
assert(got1b == got2);
- assert(vsip::alltrue(
- output2(vsip::Domain<1>(got1b)) == output3(vsip::Domain<1>(got1b))));
+ if (got > 256)
+ {
+ double error = error_db(output2(vsip::Domain<1>(got1b)),
+ output3(vsip::Domain<1>(got1b)));
+ assert(error < -100);
+ }
+ else
+ assert(view_equal(output2(vsip::Domain<1>(got1b)),
+ output3(vsip::Domain<1>(got1b))));
}
int
@@ -116,22 +166,18 @@ main()
{
vsip::vsipl init;
-
test_fir<float,vsip::nonsym>(1,2,3);
-
- test_fir<float,vsip::nonsym>(3,23,31);
-
test_fir<float,vsip::nonsym>(1,3,5);
test_fir<float,vsip::nonsym>(1,3,9);
test_fir<float,vsip::nonsym>(1,4,8);
test_fir<float,vsip::nonsym>(1,23,31);
- test_fir<float,vsip::nonsym>(1,32,256);
+ test_fir<float,vsip::nonsym>(1,32,1024);
test_fir<float,vsip::nonsym>(2,3,5);
test_fir<float,vsip::nonsym>(2,3,9);
test_fir<float,vsip::nonsym>(2,4,8);
test_fir<float,vsip::nonsym>(2,23,31);
- test_fir<float,vsip::nonsym>(2,32,256);
+ test_fir<float,vsip::nonsym>(2,32,1024);
test_fir<double,vsip::nonsym>(2,3,5);
test_fir<double,vsip::nonsym>(2,3,9);
@@ -143,7 +189,7 @@ main()
test_fir<std::complex<float>,vsip::nonsym>(2,3,9);
test_fir<std::complex<float>,vsip::nonsym>(2,4,8);
test_fir<std::complex<float>,vsip::nonsym>(2,23,31);
- test_fir<std::complex<float>,vsip::nonsym>(2,32,256);
+ test_fir<std::complex<float>,vsip::nonsym>(2,32,1024);
test_fir<std::complex<double>,vsip::nonsym>(2,3,5);
test_fir<std::complex<double>,vsip::nonsym>(2,3,9);
@@ -155,13 +201,13 @@ main()
test_fir<float,vsip::nonsym>(3,4,21);
test_fir<float,vsip::nonsym>(3,9,27);
test_fir<float,vsip::nonsym>(3,23,31);
- test_fir<float,vsip::nonsym>(3,32,256);
+ test_fir<float,vsip::nonsym>(3,32,1024);
test_fir<float,vsip::nonsym>(4,5,13);
test_fir<float,vsip::nonsym>(4,7,31);
test_fir<float,vsip::nonsym>(4,8,32);
test_fir<float,vsip::nonsym>(4,23,31);
- test_fir<float,vsip::nonsym>(4,32,256);
+ test_fir<float,vsip::nonsym>(4,32,1024);
test_fir<float,vsip::sym_even_len_even>(1,1,3);
test_fir<float,vsip::sym_even_len_even>(1,2,3);
@@ -169,52 +215,38 @@ main()
test_fir<float,vsip::sym_even_len_even>(1,3,9);
test_fir<float,vsip::sym_even_len_even>(1,4,8);
test_fir<float,vsip::sym_even_len_even>(1,23,57);
-#if 0
- // FIXME: this exposes a bug in vsip::Convolution.
- test_fir<float,vsip::sym_even_len_even>(1,32,256);
-#endif
+ test_fir<float,vsip::sym_even_len_even>(1,32,1024);
test_fir<float,vsip::sym_even_len_even>(2,2,3);
test_fir<float,vsip::sym_even_len_even>(2,3,5);
test_fir<float,vsip::sym_even_len_even>(2,3,9);
test_fir<float,vsip::sym_even_len_even>(2,4,8);
test_fir<float,vsip::sym_even_len_even>(2,23,57);
-#if 0
- // FIXME: likewise
- test_fir<float,vsip::sym_even_len_even>(2,32,256);
-#endif
+ test_fir<float,vsip::sym_even_len_even>(2,32,1024);
test_fir<float,vsip::sym_even_len_even>(3,3,5);
test_fir<float,vsip::sym_even_len_even>(3,4,8);
test_fir<float,vsip::sym_even_len_even>(3,23,57);
-#if 0
- test_fir<float,vsip::sym_even_len_even>(3,32,256);
-#endif
+ test_fir<float,vsip::sym_even_len_even>(3,32,1024);
test_fir<float,vsip::sym_even_len_odd>(1,2,3);
test_fir<float,vsip::sym_even_len_odd>(1,3,5);
test_fir<float,vsip::sym_even_len_odd>(1,3,9);
test_fir<float,vsip::sym_even_len_odd>(1,4,9);
test_fir<float,vsip::sym_even_len_odd>(1,23,57);
-#if 0
- test_fir<float,vsip::sym_even_len_odd>(1,32,256);
-#endif
+ test_fir<float,vsip::sym_even_len_odd>(1,32,1024);
test_fir<float,vsip::sym_even_len_odd>(2,2,3);
test_fir<float,vsip::sym_even_len_odd>(2,3,5);
test_fir<float,vsip::sym_even_len_odd>(2,3,9);
test_fir<float,vsip::sym_even_len_odd>(2,4,10);
test_fir<float,vsip::sym_even_len_odd>(2,23,57);
-#if 0
- test_fir<float,vsip::sym_even_len_odd>(2,32,256);
-#endif
+ test_fir<float,vsip::sym_even_len_odd>(2,32,1024);
test_fir<float,vsip::sym_even_len_odd>(3,3,5);
test_fir<float,vsip::sym_even_len_odd>(3,4,9);
test_fir<float,vsip::sym_even_len_odd>(3,23,55);
-#if 0
- test_fir<float,vsip::sym_even_len_odd>(3,32,256);
-#endif
+ test_fir<float,vsip::sym_even_len_odd>(3,32,1024);
return 0;
}
More information about the vsipl++
mailing list