[PATCH] Speed up guard update.
Richard Guenther
rguenth at tat.physik.uni-tuebingen.de
Wed Jan 14 20:56:51 UTC 2004
Hi!
This is a refined (aka shorter) patch which unifies the tracking of
up-to-date faces and the special optimized copy for MPI.
Tested on serial ia32 linux with gcc3.4 with no regression.
Ok?
Richard.
2004Jan14 Richard Guenther <richard.guenther at uni-tuebingen.de>
* src/Engine/Intersector.h: track used guard faces.
src/Engine/MultiPatchEngine.h: track up-to-dateness per
face using a bitmask.
src/Engine/Stencil.h: track used guard faces.
src/Field/DiffOps/FieldStencil.h: track used guard faces.
src/Layout/GridLayout.cpp: record face of guard update.
src/Layout/LayoutBase.h: add face_m member to guard update
struct.
src/Layout/UniformGridLayout.cpp: record face of guard update.
src/Engine/MultiPatchEngine.cpp: update only not up-to-date
and needed faces during fillGuards(). Do manual Send/Receive
of the inner guards domain for MPI.
--- cvs/r2/src/Engine/Intersector.h 2004-01-14 20:08:06.000000000 +0100
+++ pooma-mpi3/r2/src/Engine/Intersector.h 2004-01-14 20:13:32.000000000 +0100
@@ -129,7 +129,8 @@
}
template<class Engine, int Dim2>
- bool intersect(const Engine &engine, const GuardLayers<Dim2> &guard)
+ bool intersect(const Engine &engine, const GuardLayers<Dim2> &guard,
+ GuardLayers<Dim2> &usedGuards)
{
CTAssert(Engine::dimensions == Dim);
@@ -145,9 +146,7 @@
// If we've seen this ID before, we're done.
if (ids_m[i] == layout.ID())
- {
return false;
- }
// If we've seen the base ID before and the base domain is the same
// we're done.
@@ -157,10 +156,27 @@
{
shared(layout.ID(),ids_m[i]);
- // In this case we are using the guard cells unless this domain
- // is exactly the same as one we've seen before.
+ // was: return (!sameBaseDomain(i,layout.baseDomain()));
- return (!sameBaseDomain(i,layout.baseDomain()));
+ // We should be able to find out the actual shape of the
+ // used internal guards here, rather than just returning bool.
+ // Something like:
+
+ // But what do, if Dim2 > baseDims_m[i]!?
+ if (baseDims_m[i] < Dim2)
+ return true;
+
+ bool used = false;
+ for (int j = 0; j < Dim2; j++)
+ {
+ usedGuards.lower(j) = std::max(0, baseDomains_m[i][j].first() - layout.baseDomain()[j].first());
+ if (usedGuards.lower(j) != 0)
+ used = true;
+ usedGuards.upper(j) = std::max(0, layout.baseDomain()[j].last() - baseDomains_m[i][j].last());
+ if (usedGuards.upper(j) != 0)
+ used = true;
+ }
+ return used;
}
}
@@ -437,9 +453,9 @@
template<class Engine, int Dim2>
inline
- bool intersect(const Engine &l, const GuardLayers<Dim2> &guard)
+ bool intersect(const Engine &l, const GuardLayers<Dim2> &guard, GuardLayers<Dim2> &usedGuards)
{
- return (data()->intersect(l,guard));
+ return (data()->intersect(l,guard,usedGuards));
}
private:
--- cvs/r2/src/Engine/MultiPatchEngine.h 2004-01-14 20:11:36.000000000 +0100
+++ pooma-mpi3/r2/src/Engine/MultiPatchEngine.h 2004-01-14 20:13:32.000000000 +0100
@@ -628,13 +628,18 @@
//---------------------------------------------------------------------------
/// Fill the internal guard cells.
- inline void fillGuards() const
+ inline void fillGuards(const GuardLayers<Dim>& g) const
{
- fillGuardsHandler(WrappedInt<Layout_t::supportsGuards>());
+ fillGuardsHandler(g, WrappedInt<Layout_t::supportsGuards>());
+ }
+
+ inline void fillGuards() const
+ {
+ fillGuards(layout().internalGuards());
}
- inline void fillGuardsHandler(const WrappedInt<false>&) const { };
- void fillGuardsHandler(const WrappedInt<true>&) const ;
+ inline void fillGuardsHandler(const GuardLayers<Dim>&, const WrappedInt<false>&) const { };
+ void fillGuardsHandler(const GuardLayers<Dim>&, const WrappedInt<true>&) const ;
//---------------------------------------------------------------------------
/// Set the internal guard cells to a particular value.
@@ -650,14 +655,31 @@
/// Set and get the dirty flag (fillGuards is a no-op unless the
/// dirty flag is true).
+ inline int dirty() const { return *pDirty_m; }
+
inline void setDirty() const
{
- *pDirty_m = true;
+ *pDirty_m = (1<<(Dim*2))-1;
+ }
+
+ inline void clearDirty(int face = -1) const
+ {
+ if (face == -1)
+ *pDirty_m = 0;
+ else {
+ PAssert(face >= 0 && face <= Dim*2-1);
+ *pDirty_m &= ~(1<<face);
+ }
}
- inline bool isDirty() const
+ inline bool isDirty(int face = -1) const
{
- return *pDirty_m;
+ if (face == -1)
+ return *pDirty_m != 0;
+ else {
+ PAssert(face >= 0 && face <= Dim*2-1);
+ return *pDirty_m & (1<<face);
+ }
}
//============================================================
@@ -874,7 +896,7 @@
/// must share the same flag. We use the reference count in
/// data_m to decide whether to clean this up.
- bool *pDirty_m;
+ int *pDirty_m;
};
@@ -1193,6 +1215,11 @@
baseEngine_m.fillGuards();
}
+ inline void fillGuards(const GuardLayers<Dim2>& g) const
+ {
+ baseEngine_m.fillGuards(g);
+ }
+
//---------------------------------------------------------------------------
/// Set the internal guard cells to a particular value (default zero)
@@ -1217,10 +1244,15 @@
{
baseEngine_m.setDirty();
}
+
+ inline void clearDirty(int face=-1) const
+ {
+ baseEngine_m.clearDirty(face);
+ }
- inline bool isDirty() const
+ inline bool isDirty(int face=-1) const
{
- return baseEngine_m.isDirty();
+ return baseEngine_m.isDirty(face);
}
//---------------------------------------------------------------------------
@@ -1694,12 +1726,13 @@
apply(const Engine<Dim,T,MultiPatch<LayoutTag,PatchTag> > &engine,
const ExpressionApply<IntersectorTag<Intersect> > &tag)
{
+ GuardLayers<Dim> usedGuards;
bool useGuards =
tag.tag().intersector_m.intersect(engine,
- engine.layout().internalGuards());
+ engine.layout().internalGuards(), usedGuards);
if (useGuards)
- engine.fillGuards();
+ engine.fillGuards(usedGuards);
return 0;
}
@@ -1725,13 +1758,14 @@
const ExpressionApply<IntersectorTag<Intersect> > &tag,
const WrappedInt<true> &)
{
+ GuardLayers<BD> usedGuards;
bool useGuards =
tag.tag().intersector_m.
intersect(engine,
- engine.layout().baseLayout().internalGuards());
+ engine.layout().baseLayout().internalGuards(), usedGuards);
if (useGuards)
- engine.fillGuards();
+ engine.fillGuards(usedGuards);
return 0;
}
@@ -1741,7 +1775,7 @@
const ExpressionApply<IntersectorTag<Intersect> > &tag,
const WrappedInt<false> &)
{
- tag.tag().intersector_m.intersect(engine, GuardLayers<Dim>());
+ tag.tag().intersector_m.intersect(engine);
return 0;
}
};
--- cvs/r2/src/Engine/Stencil.h 2004-01-14 20:08:07.000000000 +0100
+++ pooma-mpi3/r2/src/Engine/Stencil.h 2004-01-14 20:13:32.000000000 +0100
@@ -752,11 +752,14 @@
StencilIntersector(const This_t &model)
: domain_m(model.domain_m),
+ stencilExtent_m(model.stencilExtent_m),
intersector_m(model.intersector_m)
{ }
- StencilIntersector(const Interval<Dim> &domain, const Intersect &intersect)
+ StencilIntersector(const Interval<Dim> &domain, const Intersect &intersect,
+ const GuardLayers<Dim> &stencilExtent)
: domain_m(domain),
+ stencilExtent_m(stencilExtent),
intersector_m(intersect)
{ }
@@ -766,6 +769,7 @@
{
intersector_m = model.intersector_m;
domain_m = model.domain_m;
+ stencilExtent_m = model.stencilExtent_m;
}
return *this;
}
@@ -807,14 +811,19 @@
template<class Engine, int Dim2>
inline
- bool intersect(const Engine &engine, const GuardLayers<Dim2> &)
+ bool intersect(const Engine &engine, const GuardLayers<Dim2> &g,
+ GuardLayers<Dim> &usedGuards)
{
intersect(engine);
+ // FIXME: accumulate used guards from intersect above and
+ // stencil extent? I.e. allow Stencil<>(a(i-1)+a(i+1))?
+ usedGuards = stencilExtent_m;
return true;
}
private:
Interval<Dim> domain_m;
+ GuardLayers<Dim> stencilExtent_m;
Intersect intersector_m;
};
@@ -833,8 +842,14 @@
const ExpressionApply<IntersectorTag<Intersect> > &tag)
{
typedef StencilIntersector<D, Intersect> NewIntersector_t;
+ GuardLayers<D> stencilExtent;
+ for (int i=0; i<D; ++i) {
+ stencilExtent.lower(i) = engine.function().lowerExtent(i);
+ stencilExtent.upper(i) = engine.function().upperExtent(i);
+ }
NewIntersector_t newIntersector(engine.intersectDomain(),
- tag.tag().intersector_m);
+ tag.tag().intersector_m,
+ stencilExtent);
expressionApply(engine.expression(),
IntersectorTag<NewIntersector_t>(newIntersector));
--- cvs/r2/src/Field/DiffOps/FieldStencil.h 2004-01-14 20:08:09.000000000 +0100
+++ pooma-mpi3/r2/src/Field/DiffOps/FieldStencil.h 2004-01-14 20:13:32.000000000 +0100
@@ -614,11 +617,13 @@
// Constructors
FieldStencilIntersector(const This_t &model)
- : domain_m(model.domain_m), intersector_m(model.intersector_m)
+ : domain_m(model.domain_m), stencilExtent_m(model.stencilExtent_m),
+ intersector_m(model.intersector_m)
{ }
- FieldStencilIntersector(const Domain_t &dom, const Intersect &intersect)
- : domain_m(dom), intersector_m(intersect)
+ FieldStencilIntersector(const Domain_t &dom, const Intersect &intersect,
+ const GuardLayers<Dim> &stencilExtent)
+ : domain_m(dom), stencilExtent_m(stencilExtent), intersector_m(intersect)
{ }
This_t &operator=(const This_t &model)
@@ -626,6 +631,7 @@
if (this != &model)
{
domain_m = model.domain_m;
+ stencilExtent_m = model.stencilExtent_m;
intersector_m = model.intersector_m;
}
return *this;
@@ -662,9 +668,13 @@
}
template<class Engine, int Dim2>
- inline bool intersect(const Engine &engine, const GuardLayers<Dim2> &)
+ inline bool intersect(const Engine &engine, const GuardLayers<Dim2> &,
+ GuardLayers<Dim> &usedGuards)
{
intersect(engine);
+ // FIXME: accumulate used guards from intersect above and
+ // stencil extent? I.e. allow Stencil<>(a(i-1)+a(i+1))?
+ usedGuards = stencilExtent_m;
return true;
}
@@ -672,6 +682,7 @@
Interval<Dim> domain_m;
+ GuardLayers<Dim> stencilExtent_m;
Intersect intersector_m;
};
@@ -699,8 +710,14 @@
// cells results in an error in the multipatch inode view.)
typedef FieldStencilIntersector<Dim, Intersect> NewIntersector_t;
+ GuardLayers<Dim> stencilExtent;
+ for (int i=0; i<Dim; ++i) {
+ stencilExtent.lower(i) = engine.functor().lowerExtent(i);
+ stencilExtent.upper(i) = engine.functor().upperExtent(i);
+ }
NewIntersector_t newIntersector(engine.intersectDomain(),
- tag.tag().intersector_m);
+ tag.tag().intersector_m,
+ stencilExtent);
expressionApply(engine.field(),
IntersectorTag<NewIntersector_t>(newIntersector));
--- cvs/r2/src/Layout/GridLayout.cpp 2004-01-14 20:08:10.000000000 +0100
+++ pooma-mpi3/r2/src/Layout/GridLayout.cpp 2004-01-14 20:13:32.000000000 +0100
@@ -429,7 +436,7 @@
// Now, push IDs and source into cache...
- this->gcFillList_m.push_back(GCFillInfo_t(gcdom, sourceID, destID));
+ this->gcFillList_m.push_back(GCFillInfo_t(gcdom, sourceID, destID, d*2));
}
}
}
@@ -481,7 +488,7 @@
// Now, push IDs and source into cache...
- this->gcFillList_m.push_back(GCFillInfo_t(gcdom, sourceID, destID));
+ this->gcFillList_m.push_back(GCFillInfo_t(gcdom, sourceID, destID, d*2+1));
}
}
}
--- cvs/r2/src/Layout/LayoutBase.h 2004-01-14 20:08:12.000000000 +0100
+++ pooma-mpi3/r2/src/Layout/LayoutBase.h 2004-01-14 20:13:32.000000000 +0100
@@ -119,8 +121,8 @@
struct GCFillInfo
{
- GCFillInfo(const Domain_t &dom, int ownedID, int guardID)
- : domain_m(dom), ownedID_m(ownedID), guardID_m(guardID) { }
+ GCFillInfo(const Domain_t &dom, int ownedID, int guardID, int face=-1)
+ : domain_m(dom), ownedID_m(ownedID), guardID_m(guardID), face_m(face) { }
// Get a CW warning about this not having a default constructor
// when we instantiate the vector<GCFillInfo> below. This never
@@ -131,6 +133,7 @@
Domain_t domain_m; // guard layer domain
int ownedID_m; // node ID for which domain_m is owned
int guardID_m; // node ID for which domain_m is in the guards
+ int face_m; // destination face of the guard layer (or -1, if unknown)
Domain_t & domain() { return domain_m;}
int & ownedID() { return ownedID_m;}
--- cvs/r2/src/Layout/UniformGridLayout.cpp 2004-01-14 20:08:13.000000000 +0100
+++ pooma-mpi3/r2/src/Layout/UniformGridLayout.cpp 2004-01-14 20:13:32.000000000 +0100
@@ -279,7 +279,7 @@
//-----------------------------------------------------------------------------
//
// template <int Dim>
-// void UniformGridLayout<Dim>::calcGCFillList()
+// void UniformGridLayoutData<Dim>::calcGCFillList()
//
// Calculates the cached information needed by MultiPatch Engine to
// fill the guard cells.
@@ -370,7 +370,7 @@
this->all_m[sourceID]->context() == Pooma::context() ||
this->all_m[destID]->context() == Pooma::context()
)
- this->gcFillList_m.push_back(GCFillInfo_t(gcdom,sourceID,destID));
+ this->gcFillList_m.push_back(GCFillInfo_t(gcdom,sourceID,destID,d*2));
}
}
@@ -417,7 +417,7 @@
this->all_m[sourceID]->context() == Pooma::context() ||
this->all_m[destID]->context() == Pooma::context()
)
- this->gcFillList_m.push_back(GCFillInfo_t(gcdom,sourceID,destID));
+ this->gcFillList_m.push_back(GCFillInfo_t(gcdom,sourceID,destID,d*2+1));
}
}
}
--- cvs/r2/src/Engine/MultiPatchEngine.cpp 2004-01-14 20:11:34.000000000 +0100
+++ pooma-mpi3/r2/src/Engine/MultiPatchEngine.cpp 2004-01-14 20:23:23.000000000 +0100
@@ -34,6 +34,7 @@
#include "Engine/CompressedFraction.h"
#include "Array/Array.h"
#include "Tulip/ReduceOverContexts.h"
+#include "Tulip/SendReceive.h"
#include "Threads/PoomaCSem.h"
#include "Domain/IteratorPairDomain.h"
@@ -77,16 +78,18 @@
Engine(const Layout_t &layout)
: layout_m(layout),
data_m(layout.sizeGlobal()),
- pDirty_m(new bool(true))
+ pDirty_m(new int)
{
typedef typename Layout_t::Value_t Node_t;
+ setDirty();
+
// check for correct match of PatchTag and the mapper used to make the
// layout.
// THIS IS A HACK! we test on the context of the first patch, and if it
// is -1, we have a Layout made with the LocalMapper.
-#if POOMA_CHEETAH
+#if POOMA_MESSAGING
if( layout_m.nodeListGlobal().size() > 0)
{
@@ -247,7 +250,7 @@
PAssert(data_m.isValid());
if (data_m.isShared()) {
data_m.makeOwnCopy();
- pDirty_m = new bool(*pDirty_m);
+ pDirty_m = new int(*pDirty_m);
}
return *this;
@@ -261,45 +264,88 @@
//
//-----------------------------------------------------------------------------
+/// Guard layer assign between non-remote engines, just use the
+/// ET mechanisms
+
+template <int Dim, class T, class Tag>
+static inline
+void simpleAssign(const Array<Dim, T, Tag>& lhs,
+ const Array<Dim, T, Tag>& rhs,
+ const Interval<Dim>& domain)
+{
+ lhs(domain) = rhs(domain);
+}
+
+/// Guard layer assign between remote engines, use Send/Receive directly
+/// to avoid one extra copy of the data.
+
+template <int Dim, class T, class Tag>
+static inline
+void simpleAssign(const Array<Dim, T, Remote<Tag> >& lhs,
+ const Array<Dim, T, Remote<Tag> >& rhs,
+ const Interval<Dim>& domain)
+{
+ if (lhs.engine().owningContext() == rhs.engine().owningContext())
+ lhs(domain) = rhs(domain);
+ else {
+ typedef typename NewEngine<Engine<Dim, T, Tag>, Interval<Dim> >::Type_t ViewEngine_t;
+ if (lhs.engine().engineIsLocal())
+ Receive<ViewEngine_t>::receive(ViewEngine_t(lhs.engine().localEngine(), domain),
+ rhs.engine().owningContext());
+ else if (rhs.engine().engineIsLocal())
+ SendReceive::send(ViewEngine_t(rhs.engine().localEngine(), domain),
+ lhs.engine().owningContext());
+ }
+}
+
template <int Dim, class T, class LayoutTag, class PatchTag>
void Engine<Dim, T, MultiPatch<LayoutTag,PatchTag> >::
-fillGuardsHandler(const WrappedInt<true> &) const
+fillGuardsHandler(const GuardLayers<Dim>& g, const WrappedInt<true> &) const
{
if (!isDirty()) return;
-
-#if POOMA_PURIFY
-
- // This is here to remove spurious UMRs that result when un-initialized
- // guards are copied in the following loop. All of the unitialized data
- // is ultimately overwritten with good data, so I don't see why purify
- // calls these UMRs in stead of unitialized memory copies, but it does.
- // I don't do this in general since it would be slow and since T(0) is
- // not generally valid. This does mean that fillGuards() will fail
- // with purify for types that do not know what to do with T(0).
-
- setGuards(T(0));
-
-#endif
+ int updated = 0;
typename Layout_t::FillIterator_t p = layout_m.beginFillList();
-
+
while (p != layout_m.endFillList())
{
int src = p->ownedID_m;
int dest = p->guardID_m;
- // Create patch arrays that see the entire patch:
+ // Skip face, if not dirty.
+
+ if (isDirty(p->face_m)) {
+
+ // Check, if the p->domain_m is a guard which matches the
+ // needed guard g.
+
+ int d = p->face_m/2;
+ int guardSizeNeeded = p->face_m & 1 ? g.upper(d) : g.lower(d);
+ if (!(p->face_m != -1
+ && guardSizeNeeded == 0)) {
+
+ // Create patch arrays that see the entire patch:
- Array<Dim, T, PatchTag> lhs(data()[dest]), rhs(data()[src]);
+ Array<Dim, T, PatchTag> lhs(data()[dest]), rhs(data()[src]);
- // Now do assignment from the subdomains.
+ // Now do assignment from the subdomains.
+#if POOMA_MPI
+ simpleAssign(lhs, rhs, p->domain_m);
+#else
+ lhs(p->domain_m) = rhs(p->domain_m);
+#endif
+
+ // Mark up-to-date.
+ updated |= 1<<p->face_m;
+
+ }
+
+ }
- lhs(p->domain_m) = rhs(p->domain_m);
-
++p;
}
-
- *pDirty_m = false;
+
+ *pDirty_m &= ~updated;
}
@@ -331,7 +377,7 @@
++p;
}
- *pDirty_m = true;
+ setDirty();
}
@@ -366,7 +412,7 @@
++p;
}
- *pDirty_m = true;
+ setDirty();
}
More information about the pooma-dev
mailing list