[vsipl++] [patch] support for non-contiguous rows or columns with Cell FFTM

Mon Mar 12 22:37:26 UTC 2007

Don McCoy wrote:

> Index: src/vsip/opt/cbe/ppu/fft.cpp
> ===================================================================
> --- src/vsip/opt/cbe/ppu/fft.cpp	(revision 165340)
> +++ src/vsip/opt/cbe/ppu/fft.cpp	(working copy)
> @@ -53,18 +53,16 @@
>    fft(std::complex<T> const* in, std::complex<T>* out, 
>      length_type length, T scale, int exponent)
>    {
> -    // Note: the twiddle factors require only 1/4 the memory of the input and 
> -    // output arrays.
>      Fft_params fftp;
>      fftp.direction = (exponent == -1 ? fwd_fft : inv_fft);
>      fftp.elements = length;
>      fftp.scale = scale;
>      fftp.ea_twiddle_factors = 
>        reinterpret_cast<unsigned long long>(twiddle_factors_.get());
> -    fftp.ea_input_buffer    = 0;
> -    fftp.ea_output_buffer   = 0;
> -    fftp.in_blk_stride      = 0;
> -    fftp.out_blk_stride     = 0;
> +    fftp.ea_input_buffer    = reinterpret_cast<unsigned long long>(in);
> +    fftp.ea_output_buffer   = reinterpret_cast<unsigned long long>(out);
> +    fftp.in_blk_stride      = 1;  // not applicable in the single FFT case
> +    fftp.out_blk_stride     = 1;
>  
>      Task_manager *mgr = Task_manager::instance();
>      // The stack size is determined by accounting for the *worst case*
> @@ -76,11 +74,9 @@
>         sizeof(Fft_params),
>         sizeof(complex<T>)*length*2, 

Could you please add a comment explaining this factor '2' ? It isn't obvious...

>         sizeof(complex<T>)*length,
> -       false);
> -    Workblock block = task.create_block();
> +       true);
> +    Workblock block = task.create_multi_block(1);
>      block.set_parameters(fftp);
> -    block.add_input(in, length);
> -    block.add_output(out, length);
>      task.enqueue(block);
>      task.sync();
>    }

[...]

> Index: tests/fft_be.cpp
> ===================================================================
> --- tests/fft_be.cpp	(revision 165340)
> +++ tests/fft_be.cpp	(working copy)

[...]

> @@ -152,24 +166,33 @@
>    static Domain<D> out_dom(Domain<D> const &dom) { return dom;}
>  };
>  
> -template <typename T>
> +template <typename T,
> +          typename OrderT>
>  const_Vector<T, impl::Generator_expr_block<1, impl::Ramp_generator<T> > const>
>  ramp(Domain<1> const &dom) 
>  { return vsip::ramp(T(0.), T(1.), dom.length() * dom.stride());}
>  
> -template <typename T>
> -Matrix<T>
> +template <typename T,
> +          typename OrderT>
> +Matrix<T, Dense<2, T, OrderT> >
>  ramp(Domain<2> const &dom) 
>  {
> +  typedef OrderT order_type;
> +  typedef Dense<2, T, order_type> block_type;
>    length_type rows = dom[0].length() * dom[0].stride();
>    length_type cols = dom[1].length() * dom[1].stride();
> -  Matrix<T> m(rows, cols);
> -  for (size_t r = 0; r != rows; ++r)
> -    m.row(r) = ramp(T(r), T(1.), m.size(1));
> +  Matrix<T, block_type> m(rows, cols);
> +  if (impl::Type_equal<row2_type, order_type>::value)
> +    for (size_t r = 0; r != rows; ++r)
> +      m.row(r) = ramp(T(r), T(1.), m.size(1));
> +  else
> +    for (size_t c = 0; c != cols; ++c)
> +      m.col(c) = ramp(T(c), T(1.), m.size(0));
>    return m;
>  }

While I like the addition of the dimension-ordering parameter, I think
the conditional initialization here is a bit misleading: The value of
matrix(x, y) should be the same, no matter its dimension-ordering.

> -template <typename T>
> +template <typename T,
> +          typename OrderT>
>  Tensor<T>
>  ramp(Domain<3> const &dom) 
>  {

[...]

> @@ -222,7 +246,7 @@
>    typedef typename rfft_type<T, F, 1, A>::I I;
>    static typename impl::View_of_dim<D, I, Dense<D, I> >::type
>    create(Domain<D> const &dom) 
> -  { return ramp<I>(rfft_type<T, F, 1, A>::in_dom(dom));}
> +    { return ramp<I, row1_type>(rfft_type<T, F, 1, A>::in_dom(dom));}
>  };

I think with the above in place we should go all the way and push the order parameter
up to the highest level, so all tests get run twice, once for row-major and once for
col-major. That gives maximum coverage.

>  // Real inverse 2D FFT.
> @@ -238,7 +262,7 @@
>      length_type rows2 = rows/2+1;
>      length_type cols2 = cols/2+1;
>  
> -    Matrix<I> input = ramp<I>(rfft_type<T, F, 1, A>::in_dom(dom));
> +    Matrix<I> input = ramp<I, row1_type>(rfft_type<T, F, 1, A>::in_dom(dom));
>      if (rfft_type<T, F, 1, A>::axis == 0)
>      {
>        // Necessary symmetry:
> @@ -330,8 +354,8 @@
>    typedef impl::Fast_block<D, CT, layout_type> block_type;
>    typedef typename impl::View_of_dim<D, CT, block_type>::type View;
>  
> -  View data = ramp<T>(dom);
> -  View ref = ramp<T>(dom);
> +  View data = ramp<T, row1_type>(dom);
> +  View ref = ramp<T, row1_type>(dom);
>  
>    typename View::subview_type sub_data = data(dom);
>  
> @@ -357,9 +381,10 @@
>  {
>    typedef typename T::I I;
>    typedef typename T::O O;
> -  typedef typename impl::Layout<2, row1_type,
> +  typedef typename T::order_type order_type;
> +  typedef typename impl::Layout<2, order_type,
>      impl::Stride_unit_dense, typename T::i_format> i_layout_type;
> -  typedef typename impl::Layout<2, row1_type,
> +  typedef typename impl::Layout<2, order_type,
>      impl::Stride_unit_dense, typename T::o_format> o_layout_type;
>    return_mechanism_type const r = by_reference;
>  
> @@ -371,7 +396,7 @@
>    Domain<2> in_dom = T::in_dom(dom);
>    Domain<2> out_dom = T::out_dom(dom);
>  
> -  Iview input = input_creator<T, 2>::create(dom);
> +  Iview input = input_creator<T, 2, order_type>::create(dom);
>    typename Iview::subview_type sub_input = input(in_dom);
>  
>    Oview output = empty<O>(out_dom);
> @@ -408,8 +433,8 @@
>    typedef impl::Fast_block<2, CT, layout_type> block_type;
>    typedef Matrix<CT, block_type> View;
>  
> -  View data = ramp<T>(dom);
> -  View ref = ramp<T>(dom);
> +  View data = ramp<T, row1_type>(dom);
> +  View ref = ramp<T, row1_type>(dom);
>  
>    typename View::subview_type sub_data = data(dom);
>  
> @@ -498,6 +523,13 @@
>    fft_in_place<T, F, 1, cvsip>(Domain<1>(0, 2, 8));
>  #endif
>  
> +#if VSIP_IMPL_CBE_SDK
> +  std::cout << "testing fwd in_place cbe...";
> +  fft_in_place<T, F, -1, cbe>(Domain<1>(32));
> +  std::cout << "testing inv in_place cbe...";
> +  fft_in_place<T, F, 1, cbe>(Domain<1>(32));
> +#endif
> +
>  #if VSIP_IMPL_FFTW3
>    std::cout << "testing c->c fwd by_ref fftw...";
>    fft_by_ref<cfft_type<T, F, -1>, fftw>(Domain<1>(16));
> @@ -558,7 +590,14 @@
>    fft_by_ref<rfft_type<T, F, 1, 0>, cvsip>(Domain<1>(0, 2, 8));
>  #endif
>  
> +#if VSIP_IMPL_CBE_SDK
> +  std::cout << "testing c->c fwd by_ref cbe...";
> +  fft_by_ref<cfft_type<T, F, -1>, cbe>(Domain<1>(32));
> +  std::cout << "testing c->c inv by_ref cbe...";
> +  fft_by_ref<cfft_type<T, F, 1>, cbe>(Domain<1>(32));
>  #endif
> +
> +#endif
>  }
>  
>  template <typename T, typename F>
> @@ -902,6 +941,23 @@
>    fftm_in_place<T, F, 1, 1, cvsip>(Domain<2>(8, 16));
>  #endif
>  
> +#if VSIP_IMPL_CBE_SDK
> +// Note: column-wise FFTs need to be performed on
> +// col-major data in this case.  These are commented
> +// out until fftm_in_place is changed to be like
> +// fftm_by_ref, where the cfft_type<> template allows
> +// the dimension order to be specified.

That's OK, though I believe we should fix that as soon as possible,
such that fft_be.cpp remains as much backend-agnostic as possible,
i.e. no backend-specific tests creep in.

(I can complete that if you are busy finishing other bits.)

> +
> +//  std::cout << "testing fwd on cols in_place cbe...";
> +//  fftm_in_place<T, F, -1, 0, cbe>(Domain<2>(64, 32));
> +  std::cout << "testing fwd on rows in_place cbe...";
> +  fftm_in_place<T, F, -1, 1, cbe>(Domain<2>(32, 64));
> +//  std::cout << "testing inv on cols in_place cbe...";
> +//  fftm_in_place<T, F, 1, 0, cbe>(Domain<2>(64, 32));
> +  std::cout << "testing inv on rows in_place cbe...";
> +  fftm_in_place<T, F, 1, 1, cbe>(Domain<2>(32, 64));
> +#endif
> +
>  #if VSIP_IMPL_FFTW3
>    std::cout << "testing c->c fwd 0 by_ref fftw...";
>    fftm_by_ref<cfft_type<T, F, -1, 0>, fftw>(Domain<2>(8, 16));
> @@ -978,7 +1034,24 @@
>    fftm_by_ref<rfft_type<T, F, 1, 1>, cvsip> (Domain<2>(4, 16));
>  #endif
>  
> +#if VSIP_IMPL_CBE_SDK
> +  std::cout << "testing c->c fwd on cols by_ref cbe...";
> +  fftm_by_ref<cfft_type<T, F, -1, 0, col2_type>, cbe>(Domain<2>(32, 64));
> +  fftm_by_ref<cfft_type<T, F, -1, 0, col2_type>, cbe>(Domain<2>(Domain<1>(32), Domain<1>(0, 2, 32)));
> +  std::cout << "testing c->c fwd on rows by_ref cbe...";
> +  fftm_by_ref<cfft_type<T, F, -1, 1, row2_type>, cbe>(Domain<2>(32, 64));
> +  fftm_by_ref<cfft_type<T, F, -1, 1, row2_type>, cbe>(Domain<2>(Domain<1>(0, 2, 32), Domain<1>(64)));
> +  std::cout << "testing c->c inv 0 by_ref cbe...";
> +  fftm_by_ref<cfft_type<T, F, 1, 0, col2_type>, cbe>(Domain<2>(32, 64));
> +  fftm_by_ref<cfft_type<T, F, 1, 0, col2_type>, cbe>(Domain<2>(Domain<1>(32), Domain<1>(0, 2, 32)));
> +  std::cout << "testing c->c inv 1 by_ref cbe...";
> +  fftm_by_ref<cfft_type<T, F, 1, 1, row2_type>, cbe>(Domain<2>(32, 64));
> +  fftm_by_ref<cfft_type<T, F, 1, 1, row2_type>, cbe>(Domain<2>(Domain<1>(0, 2, 32), Domain<1>(64)));
>  #endif
> +
> +
> +
> +#endif
>  }
>  
>  int main(int argc, char **argv)

Same here.

Thanks,
		Stefan

-- 
Stefan Seefeld
CodeSourcery
stefan at codesourcery.com
(650) 331-3385 x718