[vsipl++] Matlab IO Patch

Thu May 25 14:06:46 UTC 2006

Assem,

Can you take a look at generalizing the functions and structures to deal 
with arbitrary dimensions, rather than having separate instances for 
each?  Things like:

  - common struct for matlab header (instead of matrix, tensor, etc).

  - common write routine

I'm not sure how vector fits in, since it looks like matlab treats it as 
a special case of matrx.

				-- Jules

Assem Salama wrote:

> +
> +  struct matrix
> +  {
> +    data_element header;
> +    data_element array_flags_header;
> +    char array_flags[8];
> +    data_element dim_header;
> +    int32_t dim1;
> +    int32_t dim2;
> +    data_element array_name_header;
> +  };
> +
> +  struct tensor
> +  {
> +    data_element header;
> +    data_element array_flags_header;
> +    char array_flags[8];
> +    data_element dim_header;
> +    int32_t dim1;
> +    int32_t dim2;
> +    int32_t dim3;
> +    int32_t pad;
> +    data_element array_name_header;
> +  };

To generalize this, how about something like:

template <typename Dim>
struct matlab_header
{
     data_element header;
     data_element array_flags_header;
     char array_flags[8];
     data_element dim_header;
     int32_t dim[Dim];
     int32_t pad[Dim%2];
     data_element array_name_header;
}

> +
> +  // some structures to helps determine if a type is single precision
> +  template <typename T>
> +  struct Is_single
> +  { static bool const value = false; };
> +
> +  template <>
> +  struct Is_single<float>
> +  { static bool const value = true; };
> +
> +  template <>
> +  struct Is_single<std::complex<float> >
> +  { static bool const value = true; };

If Is_single<complex<T> >::value is always the same as 
Is_single<T>::value, then the following is preferrable since it avoids 
the duplicated entries for 'float' and 'complex<float>':

template <typename T>
struct Is_single<std::complex<T> >
   : Is_single<T>
{};

However, judging from how Is_single is used (to determine an enum to 
indicate the value type of elements in a view), we need something more 
general to deal with types other than float and double (i.e. we will 
want to read/write views of int, short, etc).  A traits class that 
converts a C++ type into a matlab enum would work well for this:

// General case.
template <typename T>
struct Matlab_type_traits;

// Complex types reduce to same value as scalar_type.
template <typename T>
struct Matlab_type_traits<std::complex<T> >
   : Matlab_type_traits<T>
{};

template <>
struct Matlab_type_traits<float>
{
   static int const data_type  = muSINGLE;
   static int const class_type = mxSINGLE_CLASS;
}

... double

... int

... etc

For int, we need some support from configure.ac to determine whether int 
is 32 bits (miINT32) or 64 bits (miINT64).

> +
> +  struct header
> +  {
> +    char description[116];
> +    char subsyt_data[8];
> +    char version[2];
> +    char endian[2];
> +  };
> +
> +  // constants for matlab binary format
> +
> +  // data types
> +  const int miINT8           = 1;

Coding standard point: we prefer to put the type before the 'const' 
(i.e. 'int const' instead of 'const int').  For simple types like 'int' 
they are equivalent, but for pointer and reference types, the location 
of the const changes the meaning, i.e. 'const int*' == 'int const*' != 
'int* const'.

> +  const int miUINT8          = 2;

We should generalize this function to work with arbitrary dimension 
views (i.e. vectors, matrices, and tensors).

> +// operator to write tensor to matlab file
> +template <typename T,
> +          typename Block0>
> +inline
> +std::ostream&
> +operator<<(
> +  std::ostream&                                               o,
> +  Matlab_bin_formatter<vsip::const_Tensor<T,Block0> >const&   mbf)
> +{
> +  typedef typename vsip::impl::Scalar_of<T>::type scalar_type;
> +  matlab::data_element temp_data_el;
> +  int    num_points = mbf.v.size(0)*mbf.v.size(1)*mbf.v.size(2);
> +  int    sz;
> +  matlab::tensor m_tensor;
> +
> +  memset(&m_tensor,0,sizeof(m_tensor));
> +
> +  // matrix data type
> +  m_tensor.header.type = matlab::miMATRIX;
> +  m_tensor.header.size = 1; // TEMP
> +
> +  // array flags
> +  m_tensor.array_flags_header.type = matlab::miUINT32;
> +  m_tensor.array_flags_header.size = 8;
> +  if(vsip::impl::Is_complex<T>::value) 
> +    m_tensor.array_flags[1] |= 0x8; // Complex
> +  if(matlab::Is_single<T>::value)
> +    m_tensor.array_flags[0] = matlab::mxSINGLE_CLASS; // single precision
> +  else
> +    m_tensor.array_flags[0] = matlab::mxDOUBLE_CLASS; // double precision
> +  
> +  // dimension sizes
> +  m_tensor.dim_header.type = matlab::miINT32;
> +  m_tensor.dim_header.size = 12;
> +  m_tensor.dim1 = mbf.v.size(0);
> +  m_tensor.dim2 = mbf.v.size(1);
> +  m_tensor.dim3 = mbf.v.size(2);
> +
> +  // array name
> +  m_tensor.array_name_header.type = matlab::miINT8;
> +  m_tensor.array_name_header.size = mbf.view_name.length();
> +
> +
> +  // calculate size
> +  sz = sizeof(m_tensor)-8;
> +  sz += mbf.view_name.length();
> +  sz += (8-mbf.view_name.length())&0x7;
> +  sz += 8; // 8 bytes of header for real data
> +  if(vsip::impl::Is_complex<T>::value) sz += 8; // 8 more for complex data
> +  sz += num_points*sizeof(T);
> +  m_tensor.header.size = sz;
> +
> +  o.write(reinterpret_cast<char*>(&m_tensor),sizeof(m_tensor));
> +
> +  // write array name
> +  o.write(mbf.view_name.c_str(),mbf.view_name.length());
> +  // pad

Can you be more specific about the padding requirements?  I.e.

	// Pad this array to 8-byte boundary.

> +  { 
> +    char c=0;
> +    for(int i=0;i < ((8-mbf.view_name.length())&0x7);i++) o.write(&c,1);
> +  }
> +
> +  // write real data
> +  if(matlab::Is_single<T>::value)
> +    temp_data_el.type = matlab::miSINGLE;
> +  else
> +    temp_data_el.type = matlab::miDOUBLE;
> +
> +  temp_data_el.size = sizeof(scalar_type)*num_points;
> +  o.write(reinterpret_cast<char*>(&temp_data_el),sizeof(temp_data_el));
> +
> +  {
> +    scalar_type real_data;
> +

Instead of explicitly handling each dimension, you could use the 
Index/Extent bits here.  The performance is slightly worse, but it 
shouldn't be noticeable on top of performing IO an element at a time.

To improve performance, we should check if data is in right format 
(dense, col-major for non-complex; dense, col-major, split for complex) 
for using Ext_data.  If it is, we can write the data with one or two 
large writes.

> +    // Matlab wants data in col major format
> +    for(vsip::length_type i=0;i<mbf.v.size(2);i++) {
> +      for(vsip::length_type j=0;j<mbf.v.size(1);j++) {
> +        for(vsip::length_type k=0;k<mbf.v.size(0);k++) {
> +          real_data = vsip::impl::fn::impl_real(mbf.v.get(k,j,i));
> +          o.write(reinterpret_cast<char*>(&real_data),sizeof(real_data));
> +	}
> +      }
> +    }
> +  }
> +
> +  if(!vsip::impl::Is_complex<T>::value) return o; // we are done here
> +
> +  // write imaginary data
> +  if(matlab::Is_single<T>::value)
> +    temp_data_el.type = matlab::miSINGLE;
> +  else
> +    temp_data_el.type = matlab::miDOUBLE;
> +
> +  temp_data_el.size = sizeof(scalar_type)*num_points;
> +  o.write(reinterpret_cast<char*>(&temp_data_el),sizeof(temp_data_el));
> +
> +  {
> +    scalar_type imag_data;
> +
> +    // Matlab wants data in col major format
> +    for(vsip::length_type i=0;i<mbf.v.size(2);i++) {
> +      for(vsip::length_type j=0;j<mbf.v.size(1);j++) {
> +        for(vsip::length_type k=0;k<mbf.v.size(0);k++) {
> +          imag_data = vsip::impl::fn::impl_imag(mbf.v.get(k,j,i));
> +          o.write(reinterpret_cast<char*>(&imag_data),sizeof(imag_data));
> +	}
> +      }
> +    }
> +  }
> +
> +  return o;
> +}
> +

> +
> +// operator to write vector to matlab file
> +template <typename T,
> +          typename Block0>
> +inline
> +std::ostream&
> +operator<<(
> +  std::ostream&                                               o,
> +  Matlab_bin_formatter<vsip::const_Vector<T,Block0> > const&  mbf)
> +{

This function will go away as we merge the write functions together 
(although it looks like handling vectors will require some special case 
logic).

> +  // A vector is treated like a mx1 matrix
> +  vsip::Matrix<T> m(1,mbf.v_.size(0));
> +  m.row(0) = mbf.v_;
> +  return o << Matlab_bin_formatter<vsip::Matrix<T> >(m,mbf.view_name_);
> +}
> +

-- 
Jules Bergmann
CodeSourcery
jules at codesourcery.com
(650) 331-3385 x705