[PATCH] fix IPP 2D FFT, complete FFT tests
Nathan (Jasper) Myers
ncm at codesourcery.com
Thu Sep 29 02:12:58 UTC 2005
I have checked in the patch below.
It adds (nearly) exhaustive testing on Fft features, and fixes
failures in IPP FFT support the testing reveals. It also adds
tests for real->complex and complex -> real Fftm.
Don't be surprised when fft.cpp takes one or two minutes to compile,
now, and spends most of that time producing 40MB of assembly code.
Nathan Myers
ncm
Index: ChangeLog
===================================================================
RCS file: /home/cvs/Repository/vpp/ChangeLog,v
retrieving revision 1.280
retrieving revision 1.281
diff -u -p -r1.280 -r1.281
--- ChangeLog 28 Sep 2005 19:07:26 -0000 1.280
+++ ChangeLog 29 Sep 2005 02:01:09 -0000 1.281
@@ -1,3 +1,17 @@
+2005-09-28 Nathan Myers <ncm at codesourcery.com>
+
+ * src/vsip/fft-core.hpp: Make IPP FFT work for 2D FFT.
+ Make unimplemented IPP driver functions report failure.
+ * src/vsip/signal-fft.hpp: Initialize scale member early enough
+ for IPP create_plan use.
+ * tests/fftm.cpp: Enable tests for complex->real, real->complex.
+ * tests/fft.cpp: Add comprehensive testing:
+ (2D, 3D) x ((cx->cx fwd, inv), ((re->cx, cx->re) x (all axes)))
+ x (Dense/row-major, Dense/column-major, Fast_block)
+ x (single,double) x (in-place, by_reference, by_value)
+ x (unscaled, arbitrary-scaled, scaled by N)
+ Tested with gcc-3.4/em64t/IPP and gcc-4.0.1/x86/FFTW3.
+
2005-09-28 Jules Bergmann <jules at codesourcery.com>
* src/vsip/impl/block-traits.hpp (View_block_storage):
Index: src/vsip/impl/fft-core.hpp
===================================================================
RCS file: /home/cvs/Repository/vpp/src/vsip/impl/fft-core.hpp,v
retrieving revision 1.17
retrieving revision 1.18
diff -u -p -r1.17 -r1.18
--- src/vsip/impl/fft-core.hpp 28 Sep 2005 00:34:11 -0000 1.17
+++ src/vsip/impl/fft-core.hpp 29 Sep 2005 02:01:09 -0000 1.18
@@ -997,17 +997,19 @@ struct Ipp_DFT_base
}
static void
- forward2(void* plan, void const* in, void* out, void* buffer, bool f)
+ forward2(
+ void* plan, void const* in, unsigned in_row_step,
+ void* out, unsigned out_row_step, void* buffer, bool f)
VSIP_NOTHROW
{
IppStatus result = (f ?
(*forwardFFun2)(
- reinterpret_cast<T const*>(in), sizeof(T),
- reinterpret_cast<T*>(out), sizeof(T),
+ reinterpret_cast<T const*>(in), in_row_step,
+ reinterpret_cast<T*>(out), out_row_step,
reinterpret_cast<planFT*>(plan), reinterpret_cast<Ipp8u*>(buffer)) :
(*forwardDFun2)(
- reinterpret_cast<T const*>(in), sizeof(T),
- reinterpret_cast<T*>(out), sizeof(T),
+ reinterpret_cast<T const*>(in), in_row_step,
+ reinterpret_cast<T*>(out), out_row_step,
reinterpret_cast<planDT*>(plan), reinterpret_cast<Ipp8u*>(buffer)));
assert(result == ippStsNoErr);
}
@@ -1029,17 +1031,19 @@ struct Ipp_DFT_base
}
static void
- inverse2(void* plan, void const* in, void* out, void* buffer, bool f)
+ inverse2(
+ void* plan, void const* in, unsigned in_row_step,
+ void* out, unsigned out_row_step, void* buffer, bool f)
VSIP_NOTHROW
{
IppStatus result = (f ?
(*inverseFFun2)(
- reinterpret_cast<T const*>(in), sizeof(T),
- reinterpret_cast<T*>(out), sizeof(T),
+ reinterpret_cast<T const*>(in), in_row_step,
+ reinterpret_cast<T*>(out), out_row_step,
reinterpret_cast<planFT*>(plan), reinterpret_cast<Ipp8u*>(buffer)) :
(*inverseDFun2)(
- reinterpret_cast<T const*>(in), sizeof(T),
- reinterpret_cast<T*>(out), sizeof(T),
+ reinterpret_cast<T const*>(in), in_row_step,
+ reinterpret_cast<T*>(out), out_row_step,
reinterpret_cast<planDT*>(plan), reinterpret_cast<Ipp8u*>(buffer)));
assert(result == ippStsNoErr);
}
@@ -1049,21 +1053,21 @@ struct Ipp_DFT_base
// template Ipp_DFT_base<>.
template <typename P> inline IppStatus dum(P**, int, int, IppHintAlgorithm)
- { return ippStsNoErr; }
+ { return ippStsErr; }
template <typename P> inline IppStatus dum(P**, int, int, int, IppHintAlgorithm)
- { return ippStsNoErr; }
+ { return ippStsErr; }
template <typename P> inline IppStatus dum(P**, IppiSize, int, IppHintAlgorithm)
- { return ippStsNoErr; }
+ { return ippStsErr; }
template <typename P> inline IppStatus dum(P*)
- { return ippStsNoErr; }
+ { return ippStsErr; }
template <typename P> inline IppStatus dum(P const*, int*)
- { return ippStsNoErr; }
+ { return ippStsErr; }
template <typename P, typename T> inline IppStatus dum(
T const*, T*, P const*, Ipp8u*)
- { return ippStsNoErr; }
+ { return ippStsErr; }
template <typename P, typename T> inline IppStatus dum(
T const*, int, T*, int, P const*, Ipp8u*)
- { return ippStsNoErr; }
+ { return ippStsErr; }
// Specializations of Ipp_DFT create the mappings from argument
@@ -1255,10 +1259,15 @@ create_ipp_plan(
typedef typename Time_domain<inT,outT>::type time_domain_type;
typedef Ipp_DFT< (Dim-doFFTM),time_domain_type> fft_type;
- self.plan_from_to_ = ((Dim - doFFTM == 1) ?
- fft_type::create_plan(sizex, flags, self.use_fft_) :
- fft_type::create_plan2(sizex, sizey, flags, self.use_fft_));
-
+ if (Dim - doFFTM == 1)
+ self.plan_from_to_ = fft_type::create_plan(sizex, flags, self.use_fft_);
+ else
+ {
+ self.plan_from_to_ =
+ fft_type::create_plan2(sizex, sizey, flags, self.use_fft_);
+ self.row_step_ = sizeof(outT) * dom[0].size();
+ }
+
self.p_buffer_ = impl::alloc_align(
16, fft_type::bufsize(self.plan_from_to_, self.use_fft_));
if (self.p_buffer_ == 0)
@@ -1373,11 +1382,11 @@ from_to(
// IPP doesn't implement 2D double FFT. Spec allows that.
#if ! defined(VSIP_IMPL_DOUBLE)
if (self.is_forward_)
- Ipp_DFT<2,std::complex<SCALAR_TYPE> >::forward(
- self.plan_from_to_, in, out, self.p_buffer_, self.use_fft_) ;
+ Ipp_DFT<2,std::complex<SCALAR_TYPE> >::forward2(self.plan_from_to_,
+ in, self.row_step_, out, self.row_step_, self.p_buffer_, self.use_fft_) ;
else
- Ipp_DFT<2,std::complex<SCALAR_TYPE> >::inverse(
- self.plan_from_to_, in, out, self.p_buffer_, self.use_fft_);
+ Ipp_DFT<2,std::complex<SCALAR_TYPE> >::inverse2(self.plan_from_to_,
+ in, self.row_step_, out, self.row_step_, self.p_buffer_, self.use_fft_);
if (self.doing_scaling_)
self.scale_ = 1.0;
@@ -1421,8 +1430,8 @@ from_to(
VSIP_IMPL_THROW(impl::unimplemented(
"IPP FFT-2D real->complex not implemented"));
#if 0
- Ipp_DFT<1,SCALAR_TYPE>::forward2(
- self.plan_from_to_, in, out, self.p_buffer_, self.use_fft_) ;
+ Ipp_DFT<1,SCALAR_TYPE>::forward2(self.plan_from_to_,
+ in, self.row_step_, out, self.row_step_, self.p_buffer_, self.use_fft_) ;
// unpack in place
if (self.doing_scaling_)
self.scale_ = 1.0;
@@ -1463,8 +1472,8 @@ from_to(
#if 0
// pack in place; maybe this must happen in
// fft_by_ref, where _in_, just copied into, is writeable.
- Ipp_DFT<1,SCALAR_TYPE>::inverse2(
- self.plan_from_to_, in, out, self.p_buffer_, self.use_fft_) ;
+ Ipp_DFT<1,SCALAR_TYPE>::inverse2(self.plan_from_to_,
+ in, self.row_step_, out, self.row_step_, self.p_buffer_, self.use_fft_) ;
if (self.doing_scaling_)
self.scale_ = 1.0;
#endif
Index: src/vsip/impl/signal-fft.hpp
===================================================================
RCS file: /home/cvs/Repository/vpp/src/vsip/impl/signal-fft.hpp,v
retrieving revision 1.27
retrieving revision 1.28
diff -u -p -r1.27 -r1.28
--- src/vsip/impl/signal-fft.hpp 28 Sep 2005 00:34:11 -0000 1.27
+++ src/vsip/impl/signal-fft.hpp 29 Sep 2005 02:01:10 -0000 1.28
@@ -80,6 +80,7 @@ struct Fft_core : impl::Ref_count<impl::
bool doing_scaling_; // scaling is performed in the driver.
bool is_forward_;
void* p_buffer_; // temporary storage not allocated in the plan
+ unsigned row_step_; // length in bytes of 2D row.
# endif
#endif
@@ -328,6 +329,7 @@ protected:
, in_temp_(this->input_size_)
, out_temp_(this->output_size_)
{
+ core_->scale_ = scale; // IPP needs this.
impl::Ext_data<in_block_type> raw_in(this->in_temp_);
impl::Ext_data<out_block_type> raw_out(this->out_temp_);
this->core_->create_plan(
Index: tests/fftm.cpp
===================================================================
RCS file: /home/cvs/Repository/vpp/tests/fftm.cpp,v
retrieving revision 1.7
retrieving revision 1.8
diff -u -p -r1.7 -r1.8
--- tests/fftm.cpp 28 Sep 2005 04:32:55 -0000 1.7
+++ tests/fftm.cpp 29 Sep 2005 02:01:10 -0000 1.8
@@ -107,6 +107,32 @@ void dft_y(
}
+template <typename T,
+ typename Block1,
+ typename Block2>
+void dft_y_real(
+ vsip::Matrix<T, Block1> in,
+ vsip::Matrix<vsip::complex<T>, Block2> out)
+{
+ length_type const xsize = in.size(1);
+ length_type const ysize = in.size(0);
+ assert(in.size(0)/2 + 1 == out.size(0));
+ assert(in.size(1) == out.size(1));
+ typedef long double AccT;
+
+ AccT const phi = -2.0 * M_PI/ysize;
+
+ for (index_type v=0; v < xsize; ++v)
+ for (index_type w=0; w < ysize / 2 + 1; ++w)
+ {
+ vsip::complex<AccT> sum = vsip::complex<AccT>();
+ for (index_type k=0; k<ysize; ++k)
+ sum += vsip::complex<AccT>(in(k,v)) * sin_cos<AccT>(phi*k*w);
+ out(w,v) = vsip::complex<T>(sum);
+ }
+}
+
+
// Error metric between two vectors.
template <typename T1,
@@ -412,64 +438,47 @@ test_by_val_y(length_type N)
}
-#if 0
/// Test r->c and c->r by-value Fft.
template <typename T>
void
-test_real(const int set, const length_type N)
+test_real(const length_type N)
{
- typedef Fftm<T, std::complex<T>, col, 0, by_value, 1, alg_space>
+ typedef Fftm<T, std::complex<T>, col, fft_fwd, by_value, 1, alg_space>
f_fftm_type;
- typedef Fftm<std::complex<T>, T, col, 0, by_value, 1, alg_space>
+ typedef Fftm<std::complex<T>, T, col, fft_inv, by_value, 1, alg_space>
i_fftm_type;
const length_type N2 = N/2 + 1;
- f_fftm_type f_fftm(Domain<1>(N), 1.0);
- i_fftm_type i_fftm(Domain<1>(N), 1.0/(N));
+ f_fftm_type f_fftm(Domain<2>(Domain<1>(N),Domain<1>(5)), 1.0);
+ i_fftm_type i_fftm(Domain<2>(Domain<1>(N),Domain<1>(5)), 1.0/N);
- assert(f_fftm.input_size().size() == N);
- assert(f_fftm.output_size().size() == N2);
+ assert(f_fftm.input_size().size() == 5*N);
+ assert(f_fftm.output_size().size() == 5*N2);
- assert(i_fftm.input_size().size() == N2);
- assert(i_fftm.output_size().size() == N);
+ assert(i_fftm.input_size().size() == 5*N2);
+ assert(i_fftm.output_size().size() == 5*N);
assert(f_fftm.scale() == 1.0); // can represent exactly
assert(i_fftm.scale() > 1.0/(N + 1) && i_fftm.scale() < 1.0/(N - 1));
assert(f_fftm.forward() == true);
assert(i_fftm.forward() == false);
- Matrix<T> in(N, T());
- Matrix<std::complex<T> > out(N2);
- Matrix<std::complex<T> > ref(N2);
- Matrix<T> inv(N);
- Matrix<T> inv2(N);
+ Matrix<T> in(N, 5, T());
+ Matrix<std::complex<T> > out(N2, 5);
+ Matrix<std::complex<T> > ref(N2, 5);
+ Matrix<T> inv(N, 5);
- setup_data(set, in, 3.0);
+ setup_data_y(in);
+ dft_y_real(in, ref);
out = f_fftm(in);
-
- if (set == 1)
- {
- setup_data(3, ref, 3.0);
- assert(error_db(ref, out) < -100);
- }
- if (set == 3)
- {
- setup_data(1, ref, 3.0 * N);
- assert(error_db(ref, out) < -100);
- }
-
- ref = out;
inv = i_fftm(out);
+ assert(error_db(ref, out) < -100);
assert(error_db(inv, in) < -100);
-
- // make sure out has not been scribbled in during the conversion.
- assert(error_db(ref,out) < -100);
}
-#endif
int
@@ -494,12 +503,10 @@ main()
test_by_val_y<complex<float> >(18);
test_by_val_y<complex<float> >(256);
-# if 0
// Tests for test r->c, c->r.
test_real<float>(128);
test_real<float>(242);
test_real<float>(16);
-# endif
#endif
#if defined(VSIP_IMPL_FFT_USE_DOUBLE)
@@ -519,12 +526,10 @@ main()
test_by_val_y<complex<double> >(18);
test_by_val_y<complex<double> >(256);
-# if 0
// Tests for test r->c, c->r.
test_real<double>(128);
test_real<double>(242);
test_real<double>(16);
-# endif
#endif
return 0;
Index: tests/fft.cpp
===================================================================
RCS file: /home/cvs/Repository/vpp/tests/fft.cpp,v
retrieving revision 1.7
retrieving revision 1.8
diff -u -p -r1.7 -r1.8
--- tests/fft.cpp 28 Sep 2005 04:32:54 -0000 1.7
+++ tests/fft.cpp 29 Sep 2005 02:01:10 -0000 1.8
@@ -17,6 +17,7 @@
#include <vsip/support.hpp>
#include <vsip/signal.hpp>
#include <vsip/math.hpp>
+#include <vsip/random.hpp>
#include "test.hpp"
#include "output.hpp"
@@ -134,6 +135,52 @@ error_db(
return maxsum;
}
+// Error metric between two Matrices.
+
+template <typename T1,
+ typename T2,
+ typename Block1,
+ typename Block2>
+double
+error_db(
+ const_Matrix<T1, Block1> v1,
+ const_Matrix<T2, Block2> v2)
+{
+ double maxsum = -250;
+ for (unsigned i = 0; i < v1.size(0); ++i)
+ {
+ double sum = error_db(v1.row(i), v2.row(i));
+ if (sum > maxsum)
+ maxsum = sum;
+ }
+ return maxsum;
+}
+
+
+
+// Error metric between two Tensors.
+
+template <typename T1,
+ typename T2,
+ typename Block1,
+ typename Block2>
+double
+error_db(
+ const_Tensor<T1, Block1> v1,
+ const_Tensor<T2, Block2> v2)
+{
+ double maxsum = -250;
+ for (unsigned i = 0; i < v1.size(0); ++i)
+ {
+ vsip::Domain<1> y(v1.size(1));
+ vsip::Domain<1> x(v1.size(2));
+ double sum = error_db(v1(i,y,x), v2(i,y,x));
+ if (sum > maxsum)
+ maxsum = sum;
+ }
+ return maxsum;
+}
+
// Setup input data for Fft.
@@ -307,12 +354,573 @@ test_real(const int set, const length_ty
assert(error_db(ref,out) < -100);
}
+/////////////////////////////////////////////////////////////////////
+//
+// Comprehensive 2D, 3D test
+//
+
+// Elt: unsigned -> element type
+
+template <typename T, bool realV> struct Elt;
+template <typename T> struct Elt<T,true>
+{
+ typedef T in_type;
+ typedef std::complex<T> out_type;
+};
+template <typename T> struct Elt<T,false>
+{
+ typedef std::complex<T> in_type;
+ typedef std::complex<T> out_type;
+};
+
+template <unsigned Dim, typename T, unsigned L> struct Arg;
+
+template <unsigned Dim, typename T>
+struct Arg<Dim,T,0>
+{
+ typedef typename vsip::impl::View_of_dim<Dim,T,
+ vsip::Dense<Dim,T,typename vsip::impl::Row_major<Dim>::type> >::type type;
+};
+
+template <unsigned Dim, typename T>
+struct Arg<Dim,T,1>
+{
+ typedef typename vsip::impl::View_of_dim<Dim,T,
+ vsip::Dense<Dim,T,typename vsip::impl::Col_major<Dim>::type> >::type type;
+};
+
+template <unsigned Dim, typename T>
+struct Arg<Dim,T,2>
+{
+ typedef typename vsip::impl::View_of_dim<Dim,T,
+ vsip::impl::Fast_block<Dim,T,
+ vsip::impl::Layout<Dim,
+ typename vsip::impl::Row_major<Dim>::type,
+ vsip::impl::Stride_unit_dense
+ > > >::type type;
+};
+
+inline unsigned
+adjust_size(unsigned size, bool is_short, bool is_short_dim, bool no_odds)
+{
+ // no odd sizes along axis for real->complex
+ if ((size & 1) && no_odds && is_short_dim)
+ ++size;
+ return (is_short && is_short_dim) ? size / 2 + 1 : size;
+}
+
+template <unsigned Dim> vsip::Domain<Dim> make_dom(unsigned*, bool, int, bool);
+template <> vsip::Domain<2> make_dom<2>(
+ unsigned* d, bool is_short, int sd, bool no_odds)
+{
+ return vsip::Domain<2>(
+ vsip::Domain<1>(adjust_size(d[1], is_short, sd == 0, no_odds)),
+ vsip::Domain<1>(adjust_size(d[2], is_short, sd == 1, no_odds)));
+}
+template <> vsip::Domain<3> make_dom<3>(
+ unsigned* d, bool is_short, int sd, bool no_odds)
+{
+ return vsip::Domain<3>(
+ vsip::Domain<1>(adjust_size(d[0], is_short, sd == 0, no_odds)),
+ vsip::Domain<1>(adjust_size(d[1], is_short, sd == 1, no_odds)),
+ vsip::Domain<1>(adjust_size(d[2], is_short, sd == 2, no_odds)));
+}
+
+template <typename T, typename BlockT>
+vsip::Domain<2>
+domain_of(vsip::Matrix<T,BlockT> const& src)
+{
+ return vsip::Domain<2>(vsip::Domain<1>(src.size(0)),
+ vsip::Domain<1>(src.size(1)));
+}
+
+
+template <typename T, typename BlockT>
+vsip::Domain<3>
+domain_of(vsip::Tensor<T,BlockT> const& src)
+{
+ return vsip::Domain<2>(vsip::Domain<1>(src.size(0)),
+ vsip::Domain<1>(src.size(1)),
+ vsip::Domain<1>(src.size(2)));
+}
+
+//
+
+template <typename T, typename BlockT>
+vsip::Matrix<T,BlockT>
+force_copy_init(vsip::Matrix<T,BlockT> const& src)
+{
+ vsip::Matrix<T,BlockT> tmp(src.size(0), src.size(1));
+ tmp = src;
+ return tmp;
+}
+
+template <typename T, typename BlockT>
+vsip::Tensor<T,BlockT>
+force_copy_init(vsip::Tensor<T,BlockT> const& src)
+{
+ vsip::Tensor<T,BlockT> tmp(src.size(0), src.size(1), src.size(2));
+ tmp = src;
+ return tmp;
+}
+
+//
+
+template <typename T> void set_values(T& v1, T& v2)
+{ v1 = T(10); v2 = T(20); }
+
+template <typename T> void set_values(std::complex<T>& z1, std::complex<T>& z2)
+{
+ z1 = std::complex<T>(T(10), T(10));
+ z2 = std::complex<T>(T(20), T(20));
+}
+
+#if 1
+
+// 2D
+
+template <typename BlockT, typename T>
+void fill_random(
+ vsip::Matrix<T,BlockT> in, vsip::Rand<T>& rander)
+{
+ in = (rander.randu(in.size(0), in.size(1)) * 20.0) - 10.0;
+}
+
+template <typename BlockT, typename T>
+void fill_random(
+ vsip::Matrix<std::complex<T>,BlockT> in,
+ vsip::Rand<std::complex<T> >& rander)
+{
+ in = rander.randu(in.size(0), in.size(1)) * std::complex<T>(20.0) -
+ std::complex<T>(10.0, 10.0);
+}
+
+// 3D
+
+template <typename BlockT, typename T>
+void fill_random(
+ vsip::Tensor<T,BlockT>& in, vsip::Rand<T>& rander)
+{
+ vsip::Domain<2> sub(vsip::Domain<1>(in.size(1)),
+ vsip::Domain<1>(in.size(2)));
+ for (unsigned i = in.size(0); i-- > 0;)
+ fill_random(in(i, vsip::Domain<1>(in.size(1)),
+ vsip::Domain<1>(in.size(2))), rander);
+}
+
+#else
+// debug -- keep this.
+
+// 2D
+
+template <typename BlockT, typename T>
+void fill_random(
+ vsip::Matrix<T,BlockT> in, vsip::Rand<T>& rander)
+{
+ in = T(0);
+ in.block().put(0, 0, T(1.0));
+}
+
+// 3D
+
+template <typename BlockT, typename T>
+void fill_random(
+ vsip::Tensor<T,BlockT>& in, vsip::Rand<T>& rander)
+{
+ in = T(0);
+ in.block().put(0, 0, 0, T(1.0));
+}
+
+#endif
+
+//////
+
+// 2D, cc
+
+template <typename T, typename inBlock, typename outBlock>
+void
+compute_ref(
+ vsip::Matrix<std::complex<T>,inBlock> const& in,
+ vsip::Domain<2> const& in_dom,
+ vsip::Matrix<std::complex<T>,outBlock>& ref,
+ vsip::Domain<2> const& out_dom,
+ int (& /* dum */)[1])
+{
+ vsip::Fftm<std::complex<T>,std::complex<T>,0,
+ vsip::fft_fwd,vsip::by_reference,1> fftm_across(in_dom, 1.0);
+ fftm_across(in, ref);
+
+ vsip::Fftm<std::complex<T>,std::complex<T>,1,
+ vsip::fft_fwd,vsip::by_reference,1> fftm_down(out_dom, 1.0);
+ fftm_down(ref);
+}
+
+// 2D, rc
+
+template <typename T, typename inBlock, typename outBlock>
+void
+compute_ref(
+ vsip::Matrix<T,inBlock> const& in,
+ vsip::Domain<2> const& in_dom,
+ vsip::Matrix<std::complex<T>,outBlock>& ref,
+ vsip::Domain<2> const& out_dom,
+ int (& /* dum */)[1])
+{
+ vsip::Fftm<T,std::complex<T>,1,
+ vsip::fft_fwd,vsip::by_reference,1> fftm_across(in_dom, 1.0);
+ fftm_across(in, ref);
+
+ typedef std::complex<T> CT;
+ vsip::Fftm<CT,CT,0,
+ vsip::fft_fwd,vsip::by_reference,1> fftm_down(out_dom, 1.0);
+ fftm_down(ref);
+}
+
+// 2D, rc
+
+template <typename T, typename inBlock, typename outBlock>
+void
+compute_ref(
+ vsip::Matrix<T,inBlock> const& in,
+ vsip::Domain<2> const& in_dom,
+ vsip::Matrix<std::complex<T>,outBlock>& ref,
+ vsip::Domain<2> const& out_dom,
+ int (& /* dum */)[2])
+{
+ vsip::Fftm<T,std::complex<T>,0,
+ vsip::fft_fwd,vsip::by_reference,1> fftm_across(in_dom, 1.0);
+ fftm_across(in, ref);
+
+ typedef std::complex<T> CT;
+ vsip::Fftm<CT,CT,1,
+ vsip::fft_fwd,vsip::by_reference,1> fftm_down(out_dom, 1.0);
+ fftm_down(ref);
+}
+
+// 3D, cc
+
+template <typename T, typename inBlock, typename outBlock>
+void
+compute_ref(
+ vsip::Tensor<std::complex<T>,inBlock> const& in,
+ vsip::Domain<3> const& in_dom,
+ vsip::Tensor<std::complex<T>,outBlock>& ref,
+ vsip::Domain<3> const& out_dom,
+ int (& /* dum */)[1])
+{
+ typedef std::complex<T> CT;
+
+ vsip::Fft<vsip::const_Matrix,CT,CT,vsip::fft_fwd,vsip::by_reference,1> fft_across(
+ vsip::Domain<2>(in_dom[1], in_dom[2]), 1.0);
+ for (unsigned i = in_dom[0].size(); i-- > 0; )
+ fft_across(in(i, in_dom[1], in_dom[2]),
+ ref(i, out_dom[1], out_dom[2]));
+
+ // note: axis ---v--- here is reverse of notation used otherwise.
+ vsip::Fftm<CT,CT,1,vsip::fft_fwd,vsip::by_reference,1> fftm_down(
+ vsip::Domain<2>(in_dom[0], in_dom[1]), 1.0);
+ for (unsigned k = in_dom[2].size(); k-- > 0; )
+ fftm_down(ref(out_dom[0], out_dom[1], k));
+}
+
+// 3D, rc, shorten bottom-top
+
+template <typename T, typename inBlock, typename outBlock>
+void
+compute_ref(
+ vsip::Tensor<T,inBlock> const& in,
+ vsip::Domain<3> const& in_dom,
+ vsip::Tensor<std::complex<T>,outBlock>& ref,
+ vsip::Domain<3> const& out_dom,
+ int (& /* dum */)[1])
+{
+ typedef std::complex<T> CT;
+
+ // first, planes left-right, squeeze top-bottom
+ vsip::Fft<vsip::const_Matrix,T,CT,0,vsip::by_reference,1> fft_across(
+ vsip::Domain<2>(in_dom[0], in_dom[1]), 1.0);
+ for (unsigned k = in_dom[2].size(); k-- > 0; )
+ fft_across(in(in_dom[0], in_dom[1], k),
+ ref(out_dom[0], out_dom[1], k));
+
+ // planes top-bottom, running left-right
+ // note: axis ---v--- here is reverse of notation used otherwise.
+ vsip::Fftm<CT,CT,0,vsip::fft_fwd,vsip::by_reference,1> fftm_down(
+ vsip::Domain<2>(in_dom[1], in_dom[2]), 1.0);
+ for (unsigned i = out_dom[0].size(); i-- > 0; )
+ fftm_down(ref(i, out_dom[1], out_dom[2]));
+}
+
+// 3D, rc, shorten front->back
+
+template <typename T, typename inBlock, typename outBlock>
+void
+compute_ref(
+ vsip::Tensor<T,inBlock> const& in,
+ vsip::Domain<3> const& in_dom,
+ vsip::Tensor<std::complex<T>,outBlock>& ref,
+ vsip::Domain<3> const& out_dom,
+ int (& /* dum */)[2])
+{
+ typedef std::complex<T> CT;
+
+ // planes top-bottom, squeeze front-back
+ vsip::Fft<vsip::const_Matrix,T,CT,0,vsip::by_reference,1> fft_across(
+ vsip::Domain<2>(in_dom[1], in_dom[2]), 1.0);
+ for (unsigned i = in_dom[0].size(); i-- > 0; )
+ fft_across(in(i, in_dom[1], in_dom[2]),
+ ref(i, out_dom[1], out_dom[2]));
+
+ // planes front-back, running bottom-top
+ // note: axis ---v--- here is reverse of notation used otherwise.
+ vsip::Fftm<CT,CT,1,vsip::fft_fwd,vsip::by_reference,1> fftm_down(
+ vsip::Domain<2>(in_dom[0], in_dom[2]), 1.0);
+ for (unsigned j = out_dom[1].size(); j-- > 0; )
+ fftm_down(ref(out_dom[0], j, out_dom[2]));
+}
+
+// 3D, rc, shorten left-right
+
+template <typename T, typename inBlock, typename outBlock>
+void
+compute_ref(
+ vsip::Tensor<T,inBlock> const& in,
+ vsip::Domain<3> const& in_dom,
+ vsip::Tensor<std::complex<T>,outBlock>& ref,
+ vsip::Domain<3> const& out_dom,
+ int (& /* dum */)[3])
+{
+ typedef std::complex<T> CT;
+
+ // planes top-bottom, squeeze left-right
+ vsip::Fft<vsip::const_Matrix,T,CT,1,vsip::by_reference,1> fft_across(
+ vsip::Domain<2>(in_dom[1], in_dom[2]), 1.0);
+ for (unsigned i = in_dom[0].size(); i-- > 0; )
+ fft_across(in(i, in_dom[1], in_dom[2]),
+ ref(i, out_dom[1], out_dom[2]));
+
+ // planes left-right, running bottom-top
+ // note: axis ---v--- here is reverse of notation used otherwise.
+ vsip::Fftm<CT,CT,1,vsip::fft_fwd,vsip::by_reference,1> fftm_down(
+ vsip::Domain<2>(in_dom[0], in_dom[1]), 1.0);
+ for (unsigned k = out_dom[2].size(); k-- > 0; )
+ fftm_down(ref(out_dom[0], out_dom[1], k));
+}
+
+template <unsigned Dim, typename T1, typename T2,
+ int sD, vsip::return_mechanism_type How>
+struct Test_fft;
+
+template <typename T1, typename T2, int sD, vsip::return_mechanism_type How>
+struct Test_fft<2,T1,T2,sD,How>
+{ typedef vsip::Fft<vsip::const_Matrix,T1,T2,sD,How,1,vsip::alg_time> type; };
+
+template <typename T1, typename T2, int sD, vsip::return_mechanism_type How>
+struct Test_fft<3,T1,T2,sD,How>
+{ typedef vsip::Fft<vsip::const_Tensor,T1,T2,sD,How,1,vsip::alg_time> type; };
+
+// check_in_place
+//
+
+// there is no in-place for real->complex
+
+template <template <typename,typename> class ViewT1,
+ template <typename,typename> class ViewT2,
+ template <typename,typename> class ViewT3,
+ typename T, typename Block1, typename Block2, int sDf, int sDi>
+void
+check_in_place(
+ vsip::Fft<ViewT1,T,std::complex<T>,sDf,vsip::by_reference,1,vsip::alg_time>&,
+ vsip::Fft<ViewT1,std::complex<T>,T,sDi,vsip::by_reference,1,vsip::alg_time>&,
+ ViewT2<T,Block1>&, ViewT3<std::complex<T>,Block2>&, double)
+{ }
+
+template <template <typename,typename> class ViewT1,
+ template <typename,typename> class ViewT2,
+ template <typename,typename> class ViewT3,
+ typename T, typename Block1, typename Block2>
+void
+check_in_place(
+ vsip::Fft<ViewT1,T,T,vsip::fft_fwd,vsip::by_reference,1,vsip::alg_time>& fwd,
+ vsip::Fft<ViewT1,T,T,vsip::fft_inv,vsip::by_reference,1,vsip::alg_time>& inv,
+ ViewT2<T,Block1> const& in,
+ ViewT3<T,Block2> const& ref,
+ double scalei)
+{
+ typename vsip::impl::View_of_dim<Block1::dim,T,Block1>::type inout(
+ force_copy_init(in));
+
+ fwd(inout);
+ assert(error_db(inout, ref) < -100);
+
+ inv(inout);
+ inout *= T(scalei);
+ assert(error_db(inout, in) < -100);
+}
+
+// when testing matrices, will use latter two values
+
+unsigned sizes[][3] =
+{
+ { 2, 2, 2 },
+ { 8, 8, 8 },
+ { 1, 1, 1 },
+ { 2, 2, 1 },
+ { 2, 8, 128 },
+ { 3, 5, 7 },
+ { 2, 24, 48 },
+ { 24, 1, 5 },
+};
+
+// the generic test
+
+template <unsigned inL, unsigned outL, typename F, bool isReal,
+ unsigned Dim, int sD>
+void
+test_fft()
+{
+ typedef typename Elt<F,isReal>::in_type in_elt_type;
+ typedef typename Elt<F,false>::out_type out_elt_type;
+
+ static const int sdf = (sD < 0) ? vsip::fft_fwd : sD;
+ static const int sdi = (sD < 0) ? vsip::fft_inv : sD;
+ typedef typename Test_fft<Dim,in_elt_type,out_elt_type,
+ sdf,vsip::by_reference>::type fwd_by_ref_type;
+ typedef typename Test_fft<Dim,in_elt_type,out_elt_type,
+ sdf,vsip::by_value>::type fwd_by_value_type;
+ typedef typename Test_fft<Dim,out_elt_type,in_elt_type,
+ sdi,vsip::by_reference>::type inv_by_ref_type;
+ typedef typename Test_fft<Dim,out_elt_type,in_elt_type,
+ sdi,vsip::by_value>::type inv_by_value_type;
+
+ typedef typename Arg<Dim,in_elt_type,inL>::type in_type;
+ typedef typename Arg<Dim,out_elt_type,outL>::type out_type;
+
+ for (unsigned i = 0; i < sizeof(sizes)/(sizeof(*sizes)*3); ++i)
+ {
+ vsip::Rand<in_elt_type> rander(
+ sizes[i][0] * sizes[i][1] * sizes[i][2] * Dim * (sD+5) * (isReal+1));
+
+ Domain<Dim> in_dom(make_dom<Dim>(sizes[i], false, sD, isReal));
+ Domain<Dim> out_dom(make_dom<Dim>(sizes[i], isReal, sD, isReal));
+
+ typedef typename in_type::block_type in_block_type;
+ typedef typename out_type::block_type out_block_type;
+
+ in_block_type in_block(in_dom);
+ in_type in(in_block);
+ fill_random(in, rander);
+ in_type in_copy(force_copy_init(in));
+
+ out_block_type ref1_block(out_dom);
+ out_type ref1(ref1_block);
+ int dum[(sD < 0) ? 1 : sD + 1];
+ compute_ref(in, in_dom, ref1, out_dom, dum);
+
+ out_type ref4(force_copy_init(ref1));
+ ref4 *= out_elt_type(0.25);
+
+ out_type refN(force_copy_init(ref1));
+ refN /= out_elt_type(in_dom.size());
+
+ assert(error_db(in, in_copy) < -200); // not clobbered
+
+ { fwd_by_ref_type fft_ref1(in_dom, 1.0);
+ out_block_type out_block(out_dom);
+ out_type out(out_block);
+ out_type other = fft_ref1(in, out);
+ assert(&out.block() == &other.block());
+ assert(error_db(in, in_copy) < -200); // not clobbered
+ assert(error_db(out, ref1) < -100);
+
+ inv_by_ref_type inv_refN(in_dom, 1.0/in_dom.size());
+ in_block_type in2_block(in_dom);
+ in_type in2(in2_block);
+ inv_refN(out, in2);
+ assert(error_db(out, ref1) < -100); // not clobbered
+ assert(error_db(in2, in) < -100);
+
+ check_in_place(fft_ref1, inv_refN, in, ref1, 1.0);
+ }
+ { fwd_by_ref_type fft_ref4(in_dom, 0.25);
+ out_block_type out_block(out_dom);
+ out_type out(out_block);
+ out_type other = fft_ref4(in, out);
+ assert(&out.block() == &other.block());
+ assert(error_db(in, in_copy) < -200); // not clobbered
+ assert(error_db(out, ref4) < -100);
+
+ inv_by_ref_type inv_ref8(in_dom, .125);
+ in_block_type in2_block(in_dom);
+ in_type in2(in2_block);
+ inv_ref8(out, in2);
+ assert(error_db(out, ref4) < -100); // not clobbered
+ in2 /= in_elt_type(in_dom.size() / 32.0);
+ assert(error_db(in2, in) < -100);
+
+ check_in_place(fft_ref4, inv_ref8, in, ref4, 32.0/in_dom.size());
+ }
+ { fwd_by_ref_type fft_refN(in_dom, 1.0/in_dom.size());
+ out_block_type out_block(out_dom);
+ out_type out(out_block);
+ out_type other = fft_refN(in, out);
+ assert(&out.block() == &other.block());
+ assert(error_db(in, in_copy) < -200); // not clobbered
+ assert(error_db(out, refN) < -100);
+
+ inv_by_ref_type inv_ref1(in_dom, 1.0);
+ in_block_type in2_block(in_dom);
+ in_type in2(in2_block);
+ inv_ref1(out, in2);
+ assert(error_db(out, refN) < -100); // not clobbered
+ assert(error_db(in2, in) < -100);
+
+ check_in_place(fft_refN, inv_ref1, in, refN, 1.0);
+ }
+
+
+ { fwd_by_value_type fwd_val1(in_dom, 1.0);
+ out_type out(fwd_val1(in));
+ assert(error_db(in, in_copy) < -200); // not clobbered
+ assert(error_db(out, ref1) < -100);
+
+ inv_by_value_type inv_valN(in_dom, 1.0/in_dom.size());
+ in_type in2(inv_valN(out));
+ assert(error_db(out, ref1) < -100); // not clobbered
+ assert(error_db(in2, in) < -100);
+ }
+ { fwd_by_value_type fwd_val4(in_dom, 0.25);
+ out_type out(fwd_val4(in));
+ assert(error_db(in, in_copy) < -200); // not clobbered
+ assert(error_db(out, ref4) < -100);
+
+ inv_by_value_type inv_val8(in_dom, 0.125);
+ in_type in2(inv_val8(out));
+ assert(error_db(out, ref4) < -100); // not clobbered
+ in2 /= in_elt_type(in_dom.size() / 32.0);
+ assert(error_db(in2, in) < -100);
+ }
+ { fwd_by_value_type fwd_valN(in_dom, 1.0/in_dom.size());
+ out_type out(fwd_valN(in));
+ assert(error_db(in, in_copy) < -200); // not clobbered
+ assert(error_db(out, refN) < -100);
+
+ inv_by_value_type inv_val1(in_dom, 1.0);
+ in_type in2(inv_val1(out));
+ assert(error_db(out, refN) < -100); // not clobbered
+ assert(error_db(in2, in) < -100);
+ }
+ }
+};
int
main()
{
vsipl init;
+//
+// First check 1D
+//
#if defined(VSIP_IMPL_FFT_USE_FLOAT)
test_by_ref<complex<float> >(2, 64);
@@ -329,7 +937,7 @@ main()
test_real<float>(2, 242);
test_real<float>(3, 16);
-#endif
+#endif
#if defined(VSIP_IMPL_FFT_USE_DOUBLE)
@@ -347,6 +955,126 @@ main()
test_real<double>(2, 242);
test_real<double>(3, 16);
+#endif
+
+
+//
+// check 2D, 3D
+//
+
+#if defined(VSIP_IMPL_FFT_USE_FLOAT)
+
+ test_fft<0,0,float,false,2,vsip::fft_fwd>();
+
+#if ! defined(VSIP_IMPL_IPP_FFT)
+ test_fft<0,0,float,false,3,vsip::fft_fwd>();
+
+ test_fft<0,0,float,true,2,1>();
+ test_fft<0,0,float,true,2,0>();
+
+ test_fft<0,0,float,true,3,2>();
+ test_fft<0,0,float,true,3,1>();
+ test_fft<0,0,float,true,3,0>();
+#endif /* VSIP_IMPL_IPP_FFT */
+
+#endif
+
+#if defined(VSIP_IMPL_FFT_USE_DOUBLE)
+
+#if ! defined(VSIP_IMPL_IPP_FFT)
+ test_fft<0,0,double,false,2,vsip::fft_fwd>();
+ test_fft<0,0,double,false,3,vsip::fft_fwd>();
+
+ test_fft<0,0,double,true,2,1>();
+ test_fft<0,0,double,true,2,0>();
+
+ test_fft<0,0,double,true,3,2>();
+ test_fft<0,0,double,true,3,1>();
+ test_fft<0,0,double,true,3,0>();
+#endif /* VSIP_IMPL_IPP_FFT */
+
+#endif
+
+//
+// check with different block types
+//
+
+#if defined(VSIP_IMPL_FFT_USE_FLOAT)
+# define SCALAR float
+#elif defined(VSIP_IMPL_FFT_USE_FLOAT)
+# define SCALAR double
+#endif
+
+#if defined(SCALAR)
+
+ test_fft<0,1,SCALAR,false,2,vsip::fft_fwd>();
+ test_fft<0,2,SCALAR,false,2,vsip::fft_fwd>();
+ test_fft<1,0,SCALAR,false,2,vsip::fft_fwd>();
+ test_fft<1,1,SCALAR,false,2,vsip::fft_fwd>();
+ test_fft<1,2,SCALAR,false,2,vsip::fft_fwd>();
+ test_fft<2,0,SCALAR,false,2,vsip::fft_fwd>();
+ test_fft<2,1,SCALAR,false,2,vsip::fft_fwd>();
+ test_fft<2,2,SCALAR,false,2,vsip::fft_fwd>();
+
+#if ! defined(VSIP_IMPL_IPP_FFT)
+ test_fft<0,1,SCALAR,true,2,1>();
+ test_fft<0,1,SCALAR,true,2,0>();
+ test_fft<0,2,SCALAR,true,2,1>();
+ test_fft<0,2,SCALAR,true,2,0>();
+
+ test_fft<1,0,SCALAR,true,2,1>();
+ test_fft<1,0,SCALAR,true,2,0>();
+ test_fft<1,1,SCALAR,true,2,1>();
+ test_fft<1,1,SCALAR,true,2,0>();
+ test_fft<1,2,SCALAR,true,2,1>();
+ test_fft<1,2,SCALAR,true,2,0>();
+
+ test_fft<2,0,SCALAR,true,2,1>();
+ test_fft<2,0,SCALAR,true,2,0>();
+ test_fft<2,1,SCALAR,true,2,1>();
+ test_fft<2,1,SCALAR,true,2,0>();
+ test_fft<2,2,SCALAR,true,2,1>();
+ test_fft<2,2,SCALAR,true,2,0>();
+
+
+ test_fft<0,1,SCALAR,false,3,vsip::fft_fwd>();
+ test_fft<0,2,SCALAR,false,3,vsip::fft_fwd>();
+ test_fft<1,0,SCALAR,false,3,vsip::fft_fwd>();
+ test_fft<1,1,SCALAR,false,3,vsip::fft_fwd>();
+ test_fft<1,2,SCALAR,false,3,vsip::fft_fwd>();
+ test_fft<2,0,SCALAR,false,3,vsip::fft_fwd>();
+ test_fft<2,1,SCALAR,false,3,vsip::fft_fwd>();
+ test_fft<2,2,SCALAR,false,3,vsip::fft_fwd>();
+
+ test_fft<0,1,SCALAR,true,3,2>();
+ test_fft<0,1,SCALAR,true,3,1>();
+ test_fft<0,1,SCALAR,true,3,0>();
+ test_fft<0,2,SCALAR,true,3,2>();
+ test_fft<0,2,SCALAR,true,3,1>();
+ test_fft<0,2,SCALAR,true,3,0>();
+
+ test_fft<1,0,SCALAR,true,3,2>();
+ test_fft<1,0,SCALAR,true,3,1>();
+ test_fft<1,0,SCALAR,true,3,0>();
+ test_fft<1,1,SCALAR,true,3,2>();
+ test_fft<1,1,SCALAR,true,3,1>();
+ test_fft<1,1,SCALAR,true,3,0>();
+ test_fft<1,2,SCALAR,true,3,2>();
+ test_fft<1,2,SCALAR,true,3,1>();
+ test_fft<1,2,SCALAR,true,3,0>();
+
+ test_fft<2,0,SCALAR,true,3,2>();
+ test_fft<2,0,SCALAR,true,3,1>();
+ test_fft<2,0,SCALAR,true,3,0>();
+ test_fft<2,1,SCALAR,true,3,2>();
+ test_fft<2,1,SCALAR,true,3,1>();
+ test_fft<2,1,SCALAR,true,3,0>();
+ test_fft<2,2,SCALAR,true,3,2>();
+ test_fft<2,2,SCALAR,true,3,1>();
+ test_fft<2,2,SCALAR,true,3,0>();
+
+#endif /* VSIP_IMPL_IPP_FFT */
+
#endif
return 0;
More information about the vsipl++
mailing list