[RFA] better igc up-to-date-ness tracking, optimized igc communication
Richard Guenther
rguenth at tat.physik.uni-tuebingen.de
Fri Jul 23 12:10:40 UTC 2004
As a proof of concept the attached patch implements more accurate tracking
of which igc cells are up-to-date to reduce communication. It also
implements a new GuardSend/Receive iterate for the POOMA_MPI case which
is able to merge consecutive igc updates by using a global std::map for
bookkeeping (yay!).
A new test Array/tests/array_test30.cpp tries to completely cover all
possible cases of igc updates and passes in serial mode and in MPI mode
for 1 process and #processes == #patches. For mixed remote/local updates
there seems to be a problem, thus this is RFA and not a patch submission.
Please look at it, tell me how to avoid the global std::map (or if it is
ok), test it with your favorite application, etc. I'll be away for
holidays for a few weeks now.
Richard.
2004Jul23 Richard Guenther <richard.guenther at uni-tuebingen.de>
* src/Engine/MultiPatchEngine.h: abstract the dirty flag
and record partial up-to-date-ness via a GuardLayer object.
(setDirty): for POOMA_MPI mode clear the global hash.
src/Engine/MultiPatchEngine.cpp: handle the abstract dirty
flag.
(simpleAssign): add optimized mode using the new GuardSend
and GuardReceive iterates for POOMA_MPI.
(fillGuardsHandler): honour partial up-to-date information,
produce partial update domains.
src/Layout/GuardLayers.h: abstract from the data-type,
add operator>=.
src/Threads/IterateSchedulers/SerialAsync.h
(waitForSomeRequests): fix bug for non-blocking operation.
(runSomething): always complete complete message iterates.
src/Tulip/SendReceive.h: add GuardSend and GuardReceive
iterates that are able to merge with consecutive iterates
using a global std::map.
src/Array/tests/array_test30.cpp: new.
-------------- next part --------------
===== src/Engine/MultiPatchEngine.h 1.12 vs edited =====
--- 1.12/r2/src/Engine/MultiPatchEngine.h 2004-01-29 12:01:35 +01:00
+++ edited/src/Engine/MultiPatchEngine.h 2004-07-23 13:34:34 +02:00
@@ -659,27 +659,36 @@
inline void setDirty() const
{
- *pDirty_m = (1<<(Dim*2))-1;
+ pDirty_m->setDirty();
+
+#if POOMA_MPI
+ // We need to clear the GuardSend and GuardReceive iterate map for
+ // all DataObjects in this engine.
+ // Possibly instead of the O(n^2) looking loop we could loop over
+ // the guard fill list and identify all possible communication
+ // iterates that way. For later optimization.
+ typedef std::pair<int, Pooma::DataObject_t*> GIMKey_t;
+ typedef std::map<GIMKey_t, Pooma::Iterate_t*> GIMap_t;
+ extern GIMap_t guardSendIterateMap_g;
+ extern GIMap_t guardReceiveIterateMap_g;
+ for (int i=0; i<layout().sizeLocal(); ++i) {
+ for (int j=0; j<Pooma::contexts(); ++j) {
+ GIMKey_t key(j, localPatch(i).localEngine().dataObject());
+ guardSendIterateMap_g.erase(key);
+ guardReceiveIterateMap_g.erase(key);
+ }
+ }
+#endif
}
- inline void clearDirty(int face = -1) const
+ inline void clearDirty() const
{
- if (face == -1)
- *pDirty_m = 0;
- else {
- PAssert(face >= 0 && face <= Dim*2-1);
- *pDirty_m &= ~(1<<face);
- }
+ pDirty_m->clearDirty();
}
- inline bool isDirty(int face = -1) const
+ inline bool isDirty() const
{
- if (face == -1)
- return *pDirty_m != 0;
- else {
- PAssert(face >= 0 && face <= Dim*2-1);
- return *pDirty_m & (1<<face);
- }
+ return pDirty_m->isDirty();
}
//============================================================
@@ -879,6 +888,36 @@
};
+ /// Opaque type for the dirty flag
+
+ struct DirtyFlag
+ {
+ void setDirty()
+ {
+ dirty_m = true;
+ clean_m = GuardLayers<Dim>(0);
+ }
+ void clearDirty()
+ {
+ dirty_m = false;
+ clean_m = GuardLayers<Dim>(255);
+ }
+ bool isDirty()
+ {
+ return dirty_m;
+ }
+ int upToDate(int face) const
+ {
+ return face&1 ? clean_m.upper(face/2) : clean_m.lower(face/2);
+ }
+ typename GuardLayers<Dim>::Element_t& upToDate(int face)
+ {
+ return face&1 ? clean_m.upper(face/2) : clean_m.lower(face/2);
+ }
+ GuardLayers<Dim> clean_m;
+ bool dirty_m;
+ };
+
//===========================================================================
// Data
//===========================================================================
@@ -896,7 +935,7 @@
/// must share the same flag. We use the reference count in
/// data_m to decide whether to clean this up.
- int *pDirty_m;
+ DirtyFlag *pDirty_m;
};
@@ -1245,14 +1284,14 @@
baseEngine_m.setDirty();
}
- inline void clearDirty(int face=-1) const
+ inline void clearDirty() const
{
- baseEngine_m.clearDirty(face);
+ baseEngine_m.clearDirty();
}
- inline bool isDirty(int face=-1) const
+ inline bool isDirty() const
{
- return baseEngine_m.isDirty(face);
+ return baseEngine_m.isDirty();
}
//---------------------------------------------------------------------------
===== src/Engine/MultiPatchEngine.cpp 1.12 vs edited =====
--- 1.12/r2/src/Engine/MultiPatchEngine.cpp 2004-01-29 12:06:22 +01:00
+++ edited/src/Engine/MultiPatchEngine.cpp 2004-07-23 13:17:28 +02:00
@@ -79,12 +79,10 @@
Engine(const Layout_t &layout)
: layout_m(layout),
data_m(layout.sizeGlobal()),
- pDirty_m(new int)
+ pDirty_m(new DirtyFlag)
{
typedef typename Layout_t::Value_t Node_t;
- setDirty();
-
// check for correct match of PatchTag and the mapper used to make the
// layout.
// THIS IS A HACK! we test on the context of the first patch, and if it
@@ -135,6 +133,8 @@
// Attach ourself to the layout so we can receive messages.
layout_m.attach(*this);
+
+ setDirty();
}
@@ -250,7 +250,7 @@
{
if (data_m.isValid() && data_m.isShared()) {
data_m.makeOwnCopy();
- pDirty_m = new int(*pDirty_m);
+ pDirty_m = new DirtyFlag(*pDirty_m);
}
return *this;
@@ -264,88 +264,134 @@
//
//-----------------------------------------------------------------------------
+#if POOMA_MPI
+
/// Guard layer assign between non-remote engines, just use the
-/// ET mechanisms
+/// ET mechanisms on the optimized domain
template <int Dim, class T, class Tag>
static inline
void simpleAssign(const Array<Dim, T, Tag>& lhs,
const Array<Dim, T, Tag>& rhs,
- const Interval<Dim>& domain)
+ const Interval<Dim>&,
+ const Interval<Dim>& domain2,
+ bool optimize)
{
- lhs(domain) = rhs(domain);
+ lhs(domain2) = rhs(domain2);
}
/// Guard layer assign between remote engines, use Send/Receive directly
/// to avoid one extra copy of the data.
+/// Uses domain2 for local->local copies and domain for copies involving
+/// remote engines as Send/Receive requests get merged downway.
template <int Dim, class T, class Tag>
static inline
void simpleAssign(const Array<Dim, T, Remote<Tag> >& lhs,
const Array<Dim, T, Remote<Tag> >& rhs,
- const Interval<Dim>& domain)
+ const Interval<Dim>& domain,
+ const Interval<Dim>& domain2,
+ bool optimize)
{
- if (lhs.engine().owningContext() == rhs.engine().owningContext())
- lhs(domain) = rhs(domain);
- else {
- typedef typename NewEngine<Engine<Dim, T, Tag>, Interval<Dim> >::Type_t ViewEngine_t;
- if (lhs.engine().engineIsLocal())
- Receive<ViewEngine_t>::receive(ViewEngine_t(lhs.engine().localEngine(), domain),
- rhs.engine().owningContext());
- else if (rhs.engine().engineIsLocal())
- SendReceive::send(ViewEngine_t(rhs.engine().localEngine(), domain),
+ if (lhs.engine().owningContext() == rhs.engine().owningContext()) {
+ PAssert(lhs.engine().engineIsLocal() && rhs.engine().engineIsLocal());
+ Array<Dim, T, Tag> llhs, lrhs;
+ llhs.engine() = lhs.engine().localEngine();
+ lrhs.engine() = rhs.engine().localEngine();
+ llhs(domain2) = lrhs(domain2);
+ } else {
+ if (!optimize) {
+ typedef typename NewEngine<Engine<Dim, T, Tag>, Interval<Dim> >::Type_t ViewEngine_t;
+ if (lhs.engine().engineIsLocal())
+ Receive<ViewEngine_t>::receive(ViewEngine_t(lhs.engine().localEngine(), domain),
+ rhs.engine().owningContext());
+ else if (rhs.engine().engineIsLocal())
+ SendReceive::send(ViewEngine_t(rhs.engine().localEngine(), domain),
+ lhs.engine().owningContext());
+ } else {
+ if (lhs.engine().engineIsLocal())
+ GuardReceive::receive(lhs.engine().localEngine(), domain,
+ rhs.engine().owningContext());
+ else if (rhs.engine().engineIsLocal())
+ GuardSend::send(rhs.engine().localEngine(), domain,
lhs.engine().owningContext());
+ }
}
}
+#endif
+
template <int Dim, class T, class LayoutTag, class PatchTag>
void Engine<Dim, T, MultiPatch<LayoutTag,PatchTag> >::
fillGuardsHandler(const GuardLayers<Dim>& g, const WrappedInt<true> &) const
{
- if (!isDirty()) return;
+ if (!isDirty())
+ return;
- int updated = 0;
typename Layout_t::FillIterator_t p = layout_m.beginFillList();
+ GuardLayers<Dim> clean = pDirty_m->clean_m;
while (p != layout_m.endFillList())
{
- int src = p->ownedID_m;
- int dest = p->guardID_m;
-
- // Skip face, if not dirty.
+ // Check how much of the guards (needed) we need where (dim, upper)
- if (isDirty(p->face_m)) {
+ int dim = p->face_m/2;
+ bool upper = p->face_m&1;
+ int needed = upper ? g.upper(dim) : g.lower(dim);
- // Check, if the p->domain_m is a guard which matches the
- // needed guard g.
+ // Check against up-to-date status
- int d = p->face_m/2;
- int guardSizeNeeded = p->face_m & 1 ? g.upper(d) : g.lower(d);
- if (!(p->face_m != -1
- && guardSizeNeeded == 0)) {
+ if (pDirty_m->upToDate(p->face_m) < needed) {
- // Create patch arrays that see the entire patch:
+ // Create patch arrays that see the entire patch:
- Array<Dim, T, PatchTag> lhs(data()[dest]), rhs(data()[src]);
-
- // Now do assignment from the subdomains.
+ int src = p->ownedID_m;
+ int dest = p->guardID_m;
+ Array<Dim, T, PatchTag> lhs(data()[dest]), rhs(data()[src]);
+
+ // Compute subdomain we need to assign by
+ // - shrinking the domain according to the needed guards
+ // - shrinking the domain according to the already up-to-date guards
+
+ // needed
+ Interval<Dim> domain = p->domain_m;
+ if (upper)
+ domain[dim] = shrinkRight(p->domain_m[dim], p->domain_m[dim].size() - g.upper(dim));
+ else
+ domain[dim] = shrinkLeft(p->domain_m[dim], p->domain_m[dim].size() - g.lower(dim));
+
+ // needed minus already up-to-date
+ Interval<Dim> domain2 = domain;
+ if (upper)
+ domain2[dim] = shrinkLeft(domain[dim], pDirty_m->upToDate(p->face_m));
+ else
+ domain2[dim] = shrinkRight(domain[dim], pDirty_m->upToDate(p->face_m));
+
+ // Now do the assignment
#if POOMA_MPI
- simpleAssign(lhs, rhs, p->domain_m);
+ simpleAssign(lhs, rhs, domain, domain2, true);
#else
- lhs(p->domain_m) = rhs(p->domain_m);
+ lhs(domain2) = rhs(domain2);
#endif
- // Mark up-to-date.
- updated |= 1<<p->face_m;
-
- }
+ // Record guard up-to-date-ness change, but defer real update
+ // because it would confuse further processing of the fill list.
+ if (upper)
+ clean.upper(dim) = needed;
+ else
+ clean.lower(dim) = needed;
}
++p;
}
- *pDirty_m &= ~updated;
+ // Do the deferred update of the clean status.
+ pDirty_m->clean_m = clean;
+
+ // Check, if all internal guards are clean and update dirty flag accordingly.
+ if (pDirty_m->clean_m >= layout().internalGuards())
+ pDirty_m->dirty_m = false;
}
@@ -377,7 +423,7 @@
++p;
}
- setDirty();
+ clearDirty();
}
===== src/Layout/GuardLayers.h 1.5 vs edited =====
--- 1.5/r2/src/Layout/GuardLayers.h 2003-12-03 12:30:43 +01:00
+++ edited/src/Layout/GuardLayers.h 2004-07-22 17:27:26 +02:00
@@ -57,6 +57,7 @@
class GuardLayers
{
public:
+ typedef int Element_t;
//============================================================
// Constructors
@@ -136,12 +137,12 @@
// Mutators
//============================================================
- int &lower(int i)
+ Element_t &lower(int i)
{
PInsist(i<Dim&&i>=0," GuardLayers index out of range ");
return lower_m[i];
}
- int &upper(int i)
+ Element_t &upper(int i)
{
PInsist(i<Dim&&i>=0," GuardLayers index out of range ");
return upper_m[i];
@@ -162,6 +163,17 @@
return result;
}
+ bool operator>=(const GuardLayers<Dim> &gcs) const
+ {
+ for (int d = 0; d < Dim; ++d)
+ {
+ if (!(lower_m[d] >= gcs.lower_m[d]
+ && upper_m[d] >= gcs.upper_m[d]))
+ return false;
+ }
+ return true;
+ }
+
bool operator==(int gcw) const
{
bool result = true;
@@ -250,8 +262,8 @@
private:
- int lower_m[Dim];
- int upper_m[Dim];
+ Element_t lower_m[Dim];
+ Element_t upper_m[Dim];
};
template<int Dim>
===== src/Threads/IterateSchedulers/SerialAsync.h 1.13 vs edited =====
--- 1.13/r2/src/Threads/IterateSchedulers/SerialAsync.h 2004-07-15 16:55:47 +02:00
+++ edited/src/Threads/IterateSchedulers/SerialAsync.h 2004-07-21 15:09:35 +02:00
@@ -263,7 +263,7 @@
res = MPI_Testsome(last_used_request+1, requests_m,
&nr_finished, finished, statuses);
PAssert(res == MPI_SUCCESS || res == MPI_ERR_IN_STATUS);
- if (nr_finished == MPI_UNDEFINED)
+ if (nr_finished == MPI_UNDEFINED || nr_finished == 0)
return false;
// release finised requests
@@ -311,9 +311,13 @@
static bool runSomething(bool mayBlock = true)
{
// do work in this order to minimize communication latency:
+ // - process finished messages
// - issue all messages
// - do some regular work
// - wait for messages to complete
+
+ if (waitForSomeRequests(false))
+ return true;
RunnablePtr_t p = NULL;
if (!workQueueMessages_m.empty()) {
===== src/Tulip/Messaging.cmpl.cpp 1.5 vs edited =====
--- 1.5/r2/src/Tulip/Messaging.cmpl.cpp 2004-01-07 12:18:08 +01:00
+++ edited/src/Tulip/Messaging.cmpl.cpp 2004-07-21 10:05:49 +02:00
@@ -48,6 +48,12 @@
int RemoteProxyBase::tag_m = 0;
#endif
+#if POOMA_MPI
+typedef std::pair<int, Pooma::DataObject_t*> GIMKey_t;
+typedef std::map<GIMKey_t, Pooma::Iterate_t*> GIMap_t;
+GIMap_t guardSendIterateMap_g;
+GIMap_t guardReceiveIterateMap_g;
+#endif
//-----------------------------------------------------------------------------
// Tag generator creates a set of tags for global use in r2. There is a
===== src/Tulip/SendReceive.h 1.12 vs edited =====
--- 1.12/r2/src/Tulip/SendReceive.h 2004-01-07 12:18:11 +01:00
+++ edited/src/Tulip/SendReceive.h 2004-07-23 13:50:24 +02:00
@@ -491,6 +491,262 @@
};
+/// A map of <dest/source context, DataObject*> -> Iterate* relations.
+typedef std::pair<int, Pooma::DataObject_t*> GIMKey_t;
+typedef std::map<GIMKey_t, Pooma::Iterate_t*> GIMap_t;
+extern GIMap_t guardSendIterateMap_g;
+extern GIMap_t guardReceiveIterateMap_g;
+
+/**
+ * A SendIterate requests a read lock on a piece of data. When that read lock
+ * is granted, we call a cheetah matching handler to send the data to the
+ * appropriate context. We construct the SendIterate with a tag that is used
+ * to match the appropriate ReceiveIterate on the remote context.
+ */
+
+template<class View>
+class GuardSendIterate
+ : public Pooma::Iterate_t
+{
+public:
+ GuardSendIterate(const View &view, const Interval<View::dimensions> &domain,
+ int toContext, int tag)
+ : Pooma::Iterate_t(Pooma::scheduler()),
+ toContext_m(toContext),
+ tag_m(tag),
+ view_m(view),
+ domain_m(domain)
+ {
+ hintAffinity(engineFunctor(view_m,
+ DataObjectRequest<BlockAffinity>()));
+
+#if POOMA_REORDER_ITERATES
+ // Priority interface was added to r2 version of serial async so that
+ // message send iterates would run before any other iterates.
+ priority(-1);
+#endif
+
+ DataObjectRequest<WriteRequest> writeReq(*this);
+ DataObjectRequest<ReadRequest> readReq(writeReq);
+ engineFunctor(view_m, readReq);
+ }
+
+ virtual void run()
+ {
+ // at this point we no longer can accept merging with other
+ // GuardSendIterates.
+ guardSendIterateMap_g.erase(GIMKey_t(toContext_m, view_m.dataObject()));
+
+ typedef typename NewEngine<View, Interval<View::dimensions> >::Type_t View_t;
+ typedef Cheetah::Serialize<Cheetah::CHEETAH, View_t> Serialize_t;
+
+ // take the view
+ View_t view(view_m, domain_m);
+
+ // serialize and send buffer
+ int length = Serialize_t::size(view);
+ buffer_m = new char[length];
+ Serialize_t::pack(view, buffer_m);
+ MPI_Request *request = Smarts::SystemContext::getMPIRequest(this);
+ int res = MPI_Isend(buffer_m, length, MPI_CHAR, toContext_m, tag_m,
+ MPI_COMM_WORLD, request);
+ PAssert(res == MPI_SUCCESS);
+
+ // release locks
+ DataObjectRequest<WriteRelease> writeReq;
+ DataObjectRequest<ReadRelease> readReq(writeReq);
+ engineFunctor(view_m, readReq);
+ }
+
+ virtual ~GuardSendIterate()
+ {
+ // cleanup temporary objects.
+ delete[] buffer_m;
+ }
+
+ void addDomain(const Interval<View::dimensions>& domain)
+ {
+ if (contains(domain, domain_m))
+ domain_m = domain;
+ else
+ PAssert(contains(domain_m, domain));
+ }
+
+private:
+
+ // Context we're sending the data to.
+
+ int toContext_m;
+
+ // A tag used to match the sent data with the right receive.
+
+ int tag_m;
+
+ // Communication buffer.
+
+ char *buffer_m;
+
+ // The data we're sending, a local patch with the domain to send
+
+ View view_m;
+ Interval<View::dimensions> domain_m;
+};
+
+
+/**
+ * ReceiveIterate requests a write lock on a piece of data. When that lock
+ * is granted, we register the data with the cheetah matching handler which
+ * will fill the block when a message arrives. The write lock is released
+ * by the matching handler.
+ */
+
+template<class View>
+class GuardReceiveIterate
+ : public Pooma::Iterate_t
+{
+public:
+
+ typedef GuardReceiveIterate<View> This_t;
+
+ GuardReceiveIterate(const View &view, const Interval<View::dimensions> &domain,
+ int fromContext, int tag)
+ : Pooma::Iterate_t(Pooma::scheduler()),
+ fromContext_m(fromContext),
+ tag_m(tag), buffer_m(NULL),
+ view_m(view),
+ domain_m(domain)
+ {
+ hintAffinity(engineFunctor(view,
+ DataObjectRequest<BlockAffinity>()));
+
+#if POOMA_REORDER_ITERATES
+ // Priority interface was added to r2 version of serial async so that
+ // message receive iterates would run after any other iterates.
+ priority(-1);
+#endif
+
+ DataObjectRequest<WriteRequest> writeReq(*this);
+ engineFunctor(view, writeReq);
+
+ Pooma::addIncomingMessage();
+ }
+
+ virtual void run()
+ {
+ // at this point we can no longer accept merges with other
+ // GuardReceiveIterates.
+ guardReceiveIterateMap_g.erase(GIMKey_t(fromContext_m, view_m.dataObject()));
+
+ typedef typename NewEngine<View, Interval<View::dimensions> >::Type_t View_t;
+
+ // take the view - maybe can optimize this, because we need it only
+ // for size calculation
+ View_t view(view_m, domain_m);
+
+ int length = Cheetah::Serialize<Cheetah::CHEETAH, View_t>::size(view);
+ buffer_m = new char[length];
+ MPI_Request *request = Smarts::SystemContext::getMPIRequest(this);
+ int res = MPI_Irecv(buffer_m, length, MPI_CHAR, fromContext_m, tag_m,
+ MPI_COMM_WORLD, request);
+ PAssert(res == MPI_SUCCESS);
+ }
+
+ virtual ~GuardReceiveIterate()
+ {
+ typedef typename NewEngine<View, Interval<View::dimensions> >::Type_t View_t;
+ typedef Cheetah::Serialize<Cheetah::CHEETAH, View_t> Serialize_t;
+
+ // take the view
+ View_t view(view_m, domain_m);
+
+ // de-serialize into target view directly
+ Serialize_t::unpack(view, buffer_m);
+
+ // cleanup temporary objects
+ delete[] buffer_m;
+
+ // release locks
+ DataObjectRequest<WriteRelease> writeReq;
+ engineFunctor(view_m, writeReq);
+
+ Pooma::gotIncomingMessage();
+ }
+
+ void addDomain(const Interval<View::dimensions>& domain)
+ {
+ if (contains(domain, domain_m))
+ domain_m = domain;
+ else
+ PAssert(contains(domain_m, domain));
+ }
+
+private:
+
+ // Context we're sending the data to.
+
+ int fromContext_m;
+
+ // A tag used to match the sent data with the right send.
+
+ int tag_m;
+
+ // Communication buffer.
+
+ char *buffer_m;
+
+ // The place to put the data we're receiving, a local patch with
+ // the domain to receive to.
+
+ View view_m;
+ Interval<View::dimensions> domain_m;
+};
+
+/**
+ * SendReceive contains two static functions, send(view, context) and
+ * receive(view, context). These functions encapsulate generating matching
+ * tags for the send and receive and launching the iterates to perform the
+ * send and receive.
+ */
+
+struct GuardSend
+{
+ template<class View>
+ static
+ void send(const View &view, const Interval<View::dimensions> &domain,
+ int toContext)
+ {
+ PAssert(toContext >= 0 && toContext < Pooma::contexts());
+ Pooma::Iterate_t* &it = guardSendIterateMap_g[GIMKey_t(toContext, view.dataObject())];
+ if (!it) {
+ int tag = Pooma::sendTag(toContext);
+ it = new GuardSendIterate<View>(view, domain, toContext, tag);
+ Pooma::scheduler().handOff(it);
+ } else {
+ static_cast<GuardSendIterate<View>*>(it)->addDomain(domain);
+ }
+ }
+};
+
+struct GuardReceive
+{
+ template<class View>
+ static
+ void receive(const View &view, const Interval<View::dimensions> &domain,
+ int fromContext)
+ {
+ PAssert(fromContext >= 0 && fromContext < Pooma::contexts());
+ Pooma::Iterate_t* &it = guardReceiveIterateMap_g[GIMKey_t(fromContext, view.dataObject())];
+ if (!it) {
+ int tag = Pooma::receiveTag(fromContext);
+ it = new GuardReceiveIterate<View>(view, domain, fromContext, tag);
+ Pooma::scheduler().handOff(it);
+ } else {
+ static_cast<GuardReceiveIterate<View>*>(it)->addDomain(domain);
+ }
+ }
+};
+
+
#else // not POOMA_MESSAGING
--- /dev/null Tue May 18 17:20:27 2004
+++ src/Array/tests/array_test30.cpp Fri Jul 23 13:36:41 2004
@@ -0,0 +1,127 @@
+// -*- C++ -*-
+// ACL:license
+// ----------------------------------------------------------------------
+// This software and ancillary information (herein called "SOFTWARE")
+// called POOMA (Parallel Object-Oriented Methods and Applications) is
+// made available under the terms described here. The SOFTWARE has been
+// approved for release with associated LA-CC Number LA-CC-98-65.
+//
+// Unless otherwise indicated, this SOFTWARE has been authored by an
+// employee or employees of the University of California, operator of the
+// Los Alamos National Laboratory under Contract No. W-7405-ENG-36 with
+// the U.S. Department of Energy. The U.S. Government has rights to use,
+// reproduce, and distribute this SOFTWARE. The public may copy, distribute,
+// prepare derivative works and publicly display this SOFTWARE without
+// charge, provided that this Notice and any statement of authorship are
+// reproduced on all copies. Neither the Government nor the University
+// makes any warranty, express or implied, or assumes any liability or
+// responsibility for the use of this SOFTWARE.
+//
+// If SOFTWARE is modified to produce derivative works, such modified
+// SOFTWARE should be clearly marked, so as not to confuse it with the
+// version available from LANL.
+//
+// For more information about POOMA, send e-mail to pooma at acl.lanl.gov,
+// or visit the POOMA web page at http://www.acl.lanl.gov/pooma/.
+// ----------------------------------------------------------------------
+// ACL:license
+
+//-----------------------------------------------------------------------------
+// array_test30: verify correctness of igc updates
+//-----------------------------------------------------------------------------
+
+// Include files
+
+#include "Pooma/Arrays.h"
+#include "Utilities/Tester.h"
+#include <iostream>
+
+
+template <class A1, class A2>
+bool test(Pooma::Tester& tester,
+ const A1& a_mp, const A1& b_mp,
+ const A2& a_sp, const A2& b_sp,
+ const Loc<2>& delta1, const Loc<2>& delta2)
+{
+ static int sequence = 0;
+ Interval<2> I;
+
+ // initialize rhs arrays, ensure wrong igc values
+ // via sequence number.
+ I = b_sp.totalDomain();
+ b_sp(I) = sequence + iota(I).comp(0) + I[0].size()*iota(I).comp(1);
+ b_mp.engine().setGuards(0);
+ b_mp(I) = b_sp(I);
+
+ // do calculation both sp and mp
+ I = a_sp.physicalDomain();
+ a_sp(I) = b_sp(I+delta1) - b_sp(I+delta2);
+ a_mp(I) = b_mp(I+delta1) - b_mp(I+delta2);
+
+ // check the results are the same everywhere
+ bool res = all(a_sp(I) == a_mp(I));
+ tester.out() << "For deltas " << delta1
+ << " and " << delta2 << " ";
+ tester.check("result is", res);
+ if (!res) {
+ int n = b_mp.layout().sizeGlobal();
+ for (int i=0; i<n; ++i) {
+ Array<2, int, Remote<Brick> > b(b_mp.engine().globalPatch(i));
+ tester.out() << "Brick " << i << " " << intersect(b.domain(), b_mp.physicalDomain())
+ << " is\n" << b(intersect(b.totalDomain(), b_mp.physicalDomain()))
+ << std::endl;
+ }
+ tester.out() << "Aborting." << std::endl;
+ return false;
+ }
+
+ sequence++;
+
+ return true;
+}
+
+
+int main(int argc, char *argv[])
+{
+ // Initialize POOMA and output stream, using Tester class
+ Pooma::initialize(argc, argv);
+ Pooma::Tester tester(argc, argv);
+
+ Interval<2> domain(12, 12);
+ UniformGridLayout<2> layout_mp(domain, Loc<2>(3, 3),
+ GuardLayers<2>(2), DistributedTag());
+ DomainLayout<2> layout_sp(domain, GuardLayers<2>(2));
+
+ Array<2, int, MultiPatch<UniformTag, Remote<Brick> > >
+ a_mp(layout_mp), b_mp(layout_mp);
+ Array<2, int, Brick>
+ a_sp(layout_sp), b_sp(layout_sp);
+
+ // all 5^4 == 625 cases
+ for (int d1i = -2; d1i <= 2; ++d1i)
+ for (int d1j = -2; d1j <= 2; ++d1j)
+ for (int d2i = -2; d2i <= 2; ++d2i)
+ for (int d2j = -2; d2j <= 2; ++d2j)
+ if (!test(tester, a_mp, b_mp, a_sp, b_sp,
+ Loc<2>(d1i, d1j), Loc<2>(d2i, d2j)))
+ goto out;
+ out:
+
+ // Expected results are
+ // passes with 1, 2, 9
+ // fails with 3, 4, 5, 6, 7, 8
+ // Which hints at problems with mixed local->local / local->remote igc updates
+ // for diagonal igc cells.
+ tester.out() << "Best testing is done with all 1 to 9 processes" << std::endl;
+
+ int retval = tester.results("array_test30");
+ Pooma::finalize();
+ return retval;
+}
+
+// ACL:rcsinfo
+// ----------------------------------------------------------------------
+// $RCSfile: array_test29.cpp,v $ $Author: pooma $
+// $Revision: 1.1 $ $Date: 2004/07/20 18:41:00 $
+// ----------------------------------------------------------------------
+// ACL:rcsinfo
More information about the pooma-dev
mailing list