pooma performance

Tijskens, Bert Bert.Tijskens at agr.kuleuven.ac.be
Tue Dec 4 07:10:39 UTC 2001


Hi,
looking for support for POOMA++ 2.3.0 I found this e-mail adress
somewhere on the internet.
I wrote the following benchmark program which computes something of the
form y=3Dax+b, where y and x are (DynamicArrays of) vectors and a and b
are (DynamicArrays of) scalars. I was surprised and dissappointed to
find out that the simple c version of this loop is 4-5 times faster than
the POOMA version? I suppose I must have overlooked something. Can you
help? the program is at the bottom of this message

the tests were run on a PC using the Intel C++ compiler

many thanks in advance,
bert



Dr. Engelbert TIJSKENS

Laboratory for Agro-Machinery and -Processing
Department of Agro-Engineering and -Economy
KULeuven

Kasteelpark Arenberg 30
B-3001 LEUVEN
BELGIUM

tel: ++(32) 16 32 8557
fax: ++(32) 16 32 8590
e-mail: engelbert.tijskens at agr.kuleuven.ac.be



Here's the program

#include "Pooma/Particles.h"
#include "Pooma/DynamicArrays.h"
#include "Tiny/Vector.h"
#include "Utilities/Inform.h"
#include <iostream>
#include <stdlib.h>
#include <Timer/Timer.h>

#if POOMA_CHEETAH
	typedef MultiPatch< DynamicTag, Remote<Dynamic> >
AttributeEngineTag_t;
#else
	typedef MultiPatch< DynamicTag,        Dynamic  >
AttributeEngineTag_t;
#endif

template <class Layout_t>
struct PC_UniformLayout_traits
{
	typedef AttributeEngineTag_t AttributeEngineTag_t;
	typedef Layout_t ParticleLayout_t;
};


// The particle traits class and layout type for this application
typedef PC_UniformLayout_traits<UniformLayout> PC_UniformLayout_t;

// Dimensionality of this problem
static const int nsd =3D 3;
static const int NumPart =3D 10000;	// Number of particles in
simulation
static const int nLoops  =3D 100;		// Number of loops

// Particles subclass with position and velocity
class PC : public Particles<PC_UniformLayout_t>
{
public:
 // Typedefs
	typedef Particles<PC_UniformLayout_t>   Base_t;
	typedef Base_t::AttributeEngineTag_t	AttributeEngineTag_t;
	typedef Base_t::ParticleLayout_t
ParticleLayout_t;
	typedef double                          AxisType_t;
	typedef Vector<nsd,AxisType_t>         PointType_t;

 // Constructor: set up layouts, register attributes
	PC(const ParticleLayout_t &pl) :
Particles<PC_UniformLayout_t>(pl)
	{
		addAttribute(y);
		addAttribute(x);
		addAttribute(a);
		addAttribute(b);=09
	}

 // Position and velocity attributes (as public members)
	DynamicArray<PointType_t,AttributeEngineTag_t>  x,y;
	DynamicArray<AxisType_t ,AttributeEngineTag_t>  a,b;

	double x_[NumPart][nsd], y_[NumPart][nsd];
	double a_[NumPart], b_[NumPart];
};


// Number of patches to distribute particles across.
// Typically one would use one patch per processor.
const int numPatches =3D 1;


// Main simulation routine
int main(int argc, char *argv[])
{
 // Initialize POOMA and output stream
	Pooma::initialize(argc,argv);
	Inform out(argv[0]);
=09
	out << "Begin Bounce example code" << std::endl;
	out << "-------------------------" << std::endl;

 // Create a particle layout object for our use
	PC_UniformLayout_t::ParticleLayout_t particleLayout(numPatches);

 // Create the Particles subclass object
	PC pc(particleLayout);

 // Create some particles, recompute the global domain, and initialize
 // the attributes randomly.
	pc.globalCreate(NumPart);

	srand(12345U);
	typedef PC::AxisType_t Coordinate_t;
	Coordinate_t recranmax =3D
1.0/static_cast<Coordinate_t>(RAND_MAX);
	for (int i =3D 0; i < NumPart; ++i)
	{
		for (int d =3D 0; d < nsd; ++d) {
			pc.x(i)(d) =3D rand() * recranmax;
			pc.x_[i][d] =3D pc.x(i)(d);
		}
		pc.a_[i] =3D pc.a(i) =3D rand() * recranmax;
		pc.b_[i] =3D pc.b(i) =3D rand() * recranmax;
	}

 // reference using ordinary arrays : y =3D ax+b
	Timer t_array("ordinary arrays",cout); // starts a timer
 	for (int it=3D1; it <=3D nLoops; ++it)
	{
		for (int i =3D 0; i < NumPart; ++i)
			for (int d =3D 0; d < nsd; ++d)
				pc.y_[i][d] =3D pc.a_[i]*pc.x_[i][d] +
pc.b_[i];
	}
	t_array.stop();
	t_array.print();

 // using pooma attributes: y =3D ax+b
	Timer t_PoomaAttributes("pooma attributes",cout); // starts a
timer
 	for (int it=3D1; it <=3D nLoops; ++it)
	{
		pc.y =3D pc.a*pc.x + pc.b;
	}
	t_PoomaAttributes.stop();
	t_PoomaAttributes.print();

 // Shut down POOMA and exit
	Pooma::finalize();
	return 0;
}





More information about the pooma-dev mailing list