[PATCH] Speed up guard update.
Jeffrey D. Oldham
oldham at codesourcery.com
Fri Jan 16 03:04:14 UTC 2004
Richard Guenther wrote:
> Hi!
>
> This is a refined (aka shorter) patch which unifies the tracking of
> up-to-date faces and the special optimized copy for MPI.
>
> Tested on serial ia32 linux with gcc3.4 with no regression.
>
> Ok?
Yes, assuming the user interface did not change. It looks like
GCFillInfo's interface changed but existing code will still run because
a parameter with a default argument was added.
> Richard.
>
>
> 2004Jan14 Richard Guenther <richard.guenther at uni-tuebingen.de>
>
> * src/Engine/Intersector.h: track used guard faces.
> src/Engine/MultiPatchEngine.h: track up-to-dateness per
> face using a bitmask.
> src/Engine/Stencil.h: track used guard faces.
> src/Field/DiffOps/FieldStencil.h: track used guard faces.
> src/Layout/GridLayout.cpp: record face of guard update.
> src/Layout/LayoutBase.h: add face_m member to guard update
> struct.
> src/Layout/UniformGridLayout.cpp: record face of guard update.
> src/Engine/MultiPatchEngine.cpp: update only not up-to-date
> and needed faces during fillGuards(). Do manual Send/Receive
> of the inner guards domain for MPI.
>
> --- cvs/r2/src/Engine/Intersector.h 2004-01-14 20:08:06.000000000 +0100
> +++ pooma-mpi3/r2/src/Engine/Intersector.h 2004-01-14 20:13:32.000000000 +0100
> @@ -129,7 +129,8 @@
> }
>
> template<class Engine, int Dim2>
> - bool intersect(const Engine &engine, const GuardLayers<Dim2> &guard)
> + bool intersect(const Engine &engine, const GuardLayers<Dim2> &guard,
> + GuardLayers<Dim2> &usedGuards)
> {
> CTAssert(Engine::dimensions == Dim);
>
> @@ -145,9 +146,7 @@
> // If we've seen this ID before, we're done.
>
> if (ids_m[i] == layout.ID())
> - {
> return false;
> - }
>
> // If we've seen the base ID before and the base domain is the same
> // we're done.
> @@ -157,10 +156,27 @@
> {
> shared(layout.ID(),ids_m[i]);
>
> - // In this case we are using the guard cells unless this domain
> - // is exactly the same as one we've seen before.
> + // was: return (!sameBaseDomain(i,layout.baseDomain()));
>
> - return (!sameBaseDomain(i,layout.baseDomain()));
> + // We should be able to find out the actual shape of the
> + // used internal guards here, rather than just returning bool.
> + // Something like:
> +
> + // But what do, if Dim2 > baseDims_m[i]!?
> + if (baseDims_m[i] < Dim2)
> + return true;
> +
> + bool used = false;
> + for (int j = 0; j < Dim2; j++)
> + {
> + usedGuards.lower(j) = std::max(0, baseDomains_m[i][j].first() - layout.baseDomain()[j].first());
> + if (usedGuards.lower(j) != 0)
> + used = true;
> + usedGuards.upper(j) = std::max(0, layout.baseDomain()[j].last() - baseDomains_m[i][j].last());
> + if (usedGuards.upper(j) != 0)
> + used = true;
> + }
> + return used;
> }
> }
>
> @@ -437,9 +453,9 @@
>
> template<class Engine, int Dim2>
> inline
> - bool intersect(const Engine &l, const GuardLayers<Dim2> &guard)
> + bool intersect(const Engine &l, const GuardLayers<Dim2> &guard, GuardLayers<Dim2> &usedGuards)
> {
> - return (data()->intersect(l,guard));
> + return (data()->intersect(l,guard,usedGuards));
> }
>
> private:
> --- cvs/r2/src/Engine/MultiPatchEngine.h 2004-01-14 20:11:36.000000000 +0100
> +++ pooma-mpi3/r2/src/Engine/MultiPatchEngine.h 2004-01-14 20:13:32.000000000 +0100
> @@ -628,13 +628,18 @@
> //---------------------------------------------------------------------------
> /// Fill the internal guard cells.
>
> - inline void fillGuards() const
> + inline void fillGuards(const GuardLayers<Dim>& g) const
> {
> - fillGuardsHandler(WrappedInt<Layout_t::supportsGuards>());
> + fillGuardsHandler(g, WrappedInt<Layout_t::supportsGuards>());
> + }
> +
> + inline void fillGuards() const
> + {
> + fillGuards(layout().internalGuards());
> }
>
> - inline void fillGuardsHandler(const WrappedInt<false>&) const { };
> - void fillGuardsHandler(const WrappedInt<true>&) const ;
> + inline void fillGuardsHandler(const GuardLayers<Dim>&, const WrappedInt<false>&) const { };
> + void fillGuardsHandler(const GuardLayers<Dim>&, const WrappedInt<true>&) const ;
>
> //---------------------------------------------------------------------------
> /// Set the internal guard cells to a particular value.
> @@ -650,14 +655,31 @@
> /// Set and get the dirty flag (fillGuards is a no-op unless the
> /// dirty flag is true).
>
> + inline int dirty() const { return *pDirty_m; }
> +
> inline void setDirty() const
> {
> - *pDirty_m = true;
> + *pDirty_m = (1<<(Dim*2))-1;
> + }
> +
> + inline void clearDirty(int face = -1) const
> + {
> + if (face == -1)
> + *pDirty_m = 0;
> + else {
> + PAssert(face >= 0 && face <= Dim*2-1);
> + *pDirty_m &= ~(1<<face);
> + }
> }
>
> - inline bool isDirty() const
> + inline bool isDirty(int face = -1) const
> {
> - return *pDirty_m;
> + if (face == -1)
> + return *pDirty_m != 0;
> + else {
> + PAssert(face >= 0 && face <= Dim*2-1);
> + return *pDirty_m & (1<<face);
> + }
> }
>
> //============================================================
> @@ -874,7 +896,7 @@
> /// must share the same flag. We use the reference count in
> /// data_m to decide whether to clean this up.
>
> - bool *pDirty_m;
> + int *pDirty_m;
> };
>
>
> @@ -1193,6 +1215,11 @@
> baseEngine_m.fillGuards();
> }
>
> + inline void fillGuards(const GuardLayers<Dim2>& g) const
> + {
> + baseEngine_m.fillGuards(g);
> + }
> +
> //---------------------------------------------------------------------------
> /// Set the internal guard cells to a particular value (default zero)
>
> @@ -1217,10 +1244,15 @@
> {
> baseEngine_m.setDirty();
> }
> +
> + inline void clearDirty(int face=-1) const
> + {
> + baseEngine_m.clearDirty(face);
> + }
>
> - inline bool isDirty() const
> + inline bool isDirty(int face=-1) const
> {
> - return baseEngine_m.isDirty();
> + return baseEngine_m.isDirty(face);
> }
>
> //---------------------------------------------------------------------------
> @@ -1694,12 +1726,13 @@
> apply(const Engine<Dim,T,MultiPatch<LayoutTag,PatchTag> > &engine,
> const ExpressionApply<IntersectorTag<Intersect> > &tag)
> {
> + GuardLayers<Dim> usedGuards;
> bool useGuards =
> tag.tag().intersector_m.intersect(engine,
> - engine.layout().internalGuards());
> + engine.layout().internalGuards(), usedGuards);
>
> if (useGuards)
> - engine.fillGuards();
> + engine.fillGuards(usedGuards);
>
> return 0;
> }
> @@ -1725,13 +1758,14 @@
> const ExpressionApply<IntersectorTag<Intersect> > &tag,
> const WrappedInt<true> &)
> {
> + GuardLayers<BD> usedGuards;
> bool useGuards =
> tag.tag().intersector_m.
> intersect(engine,
> - engine.layout().baseLayout().internalGuards());
> + engine.layout().baseLayout().internalGuards(), usedGuards);
>
> if (useGuards)
> - engine.fillGuards();
> + engine.fillGuards(usedGuards);
>
> return 0;
> }
> @@ -1741,7 +1775,7 @@
> const ExpressionApply<IntersectorTag<Intersect> > &tag,
> const WrappedInt<false> &)
> {
> - tag.tag().intersector_m.intersect(engine, GuardLayers<Dim>());
> + tag.tag().intersector_m.intersect(engine);
> return 0;
> }
> };
> --- cvs/r2/src/Engine/Stencil.h 2004-01-14 20:08:07.000000000 +0100
> +++ pooma-mpi3/r2/src/Engine/Stencil.h 2004-01-14 20:13:32.000000000 +0100
> @@ -752,11 +752,14 @@
>
> StencilIntersector(const This_t &model)
> : domain_m(model.domain_m),
> + stencilExtent_m(model.stencilExtent_m),
> intersector_m(model.intersector_m)
> { }
>
> - StencilIntersector(const Interval<Dim> &domain, const Intersect &intersect)
> + StencilIntersector(const Interval<Dim> &domain, const Intersect &intersect,
> + const GuardLayers<Dim> &stencilExtent)
> : domain_m(domain),
> + stencilExtent_m(stencilExtent),
> intersector_m(intersect)
> { }
>
> @@ -766,6 +769,7 @@
> {
> intersector_m = model.intersector_m;
> domain_m = model.domain_m;
> + stencilExtent_m = model.stencilExtent_m;
> }
> return *this;
> }
> @@ -807,14 +811,19 @@
>
> template<class Engine, int Dim2>
> inline
> - bool intersect(const Engine &engine, const GuardLayers<Dim2> &)
> + bool intersect(const Engine &engine, const GuardLayers<Dim2> &g,
> + GuardLayers<Dim> &usedGuards)
> {
> intersect(engine);
> + // FIXME: accumulate used guards from intersect above and
> + // stencil extent? I.e. allow Stencil<>(a(i-1)+a(i+1))?
> + usedGuards = stencilExtent_m;
> return true;
> }
>
> private:
> Interval<Dim> domain_m;
> + GuardLayers<Dim> stencilExtent_m;
> Intersect intersector_m;
> };
>
> @@ -833,8 +842,14 @@
> const ExpressionApply<IntersectorTag<Intersect> > &tag)
> {
> typedef StencilIntersector<D, Intersect> NewIntersector_t;
> + GuardLayers<D> stencilExtent;
> + for (int i=0; i<D; ++i) {
> + stencilExtent.lower(i) = engine.function().lowerExtent(i);
> + stencilExtent.upper(i) = engine.function().upperExtent(i);
> + }
> NewIntersector_t newIntersector(engine.intersectDomain(),
> - tag.tag().intersector_m);
> + tag.tag().intersector_m,
> + stencilExtent);
>
> expressionApply(engine.expression(),
> IntersectorTag<NewIntersector_t>(newIntersector));
> --- cvs/r2/src/Field/DiffOps/FieldStencil.h 2004-01-14 20:08:09.000000000 +0100
> +++ pooma-mpi3/r2/src/Field/DiffOps/FieldStencil.h 2004-01-14 20:13:32.000000000 +0100
> @@ -614,11 +617,13 @@
> // Constructors
>
> FieldStencilIntersector(const This_t &model)
> - : domain_m(model.domain_m), intersector_m(model.intersector_m)
> + : domain_m(model.domain_m), stencilExtent_m(model.stencilExtent_m),
> + intersector_m(model.intersector_m)
> { }
>
> - FieldStencilIntersector(const Domain_t &dom, const Intersect &intersect)
> - : domain_m(dom), intersector_m(intersect)
> + FieldStencilIntersector(const Domain_t &dom, const Intersect &intersect,
> + const GuardLayers<Dim> &stencilExtent)
> + : domain_m(dom), stencilExtent_m(stencilExtent), intersector_m(intersect)
> { }
>
> This_t &operator=(const This_t &model)
> @@ -626,6 +631,7 @@
> if (this != &model)
> {
> domain_m = model.domain_m;
> + stencilExtent_m = model.stencilExtent_m;
> intersector_m = model.intersector_m;
> }
> return *this;
> @@ -662,9 +668,13 @@
> }
>
> template<class Engine, int Dim2>
> - inline bool intersect(const Engine &engine, const GuardLayers<Dim2> &)
> + inline bool intersect(const Engine &engine, const GuardLayers<Dim2> &,
> + GuardLayers<Dim> &usedGuards)
> {
> intersect(engine);
> + // FIXME: accumulate used guards from intersect above and
> + // stencil extent? I.e. allow Stencil<>(a(i-1)+a(i+1))?
> + usedGuards = stencilExtent_m;
> return true;
> }
>
> @@ -672,6 +682,7 @@
>
>
> Interval<Dim> domain_m;
> + GuardLayers<Dim> stencilExtent_m;
> Intersect intersector_m;
> };
>
> @@ -699,8 +710,14 @@
> // cells results in an error in the multipatch inode view.)
>
> typedef FieldStencilIntersector<Dim, Intersect> NewIntersector_t;
> + GuardLayers<Dim> stencilExtent;
> + for (int i=0; i<Dim; ++i) {
> + stencilExtent.lower(i) = engine.functor().lowerExtent(i);
> + stencilExtent.upper(i) = engine.functor().upperExtent(i);
> + }
> NewIntersector_t newIntersector(engine.intersectDomain(),
> - tag.tag().intersector_m);
> + tag.tag().intersector_m,
> + stencilExtent);
>
> expressionApply(engine.field(),
> IntersectorTag<NewIntersector_t>(newIntersector));
> --- cvs/r2/src/Layout/GridLayout.cpp 2004-01-14 20:08:10.000000000 +0100
> +++ pooma-mpi3/r2/src/Layout/GridLayout.cpp 2004-01-14 20:13:32.000000000 +0100
> @@ -429,7 +436,7 @@
>
> // Now, push IDs and source into cache...
>
> - this->gcFillList_m.push_back(GCFillInfo_t(gcdom, sourceID, destID));
> + this->gcFillList_m.push_back(GCFillInfo_t(gcdom, sourceID, destID, d*2));
> }
> }
> }
> @@ -481,7 +488,7 @@
>
> // Now, push IDs and source into cache...
>
> - this->gcFillList_m.push_back(GCFillInfo_t(gcdom, sourceID, destID));
> + this->gcFillList_m.push_back(GCFillInfo_t(gcdom, sourceID, destID, d*2+1));
> }
> }
> }
> --- cvs/r2/src/Layout/LayoutBase.h 2004-01-14 20:08:12.000000000 +0100
> +++ pooma-mpi3/r2/src/Layout/LayoutBase.h 2004-01-14 20:13:32.000000000 +0100
> @@ -119,8 +121,8 @@
>
> struct GCFillInfo
> {
> - GCFillInfo(const Domain_t &dom, int ownedID, int guardID)
> - : domain_m(dom), ownedID_m(ownedID), guardID_m(guardID) { }
> + GCFillInfo(const Domain_t &dom, int ownedID, int guardID, int face=-1)
> + : domain_m(dom), ownedID_m(ownedID), guardID_m(guardID), face_m(face) { }
>
> // Get a CW warning about this not having a default constructor
> // when we instantiate the vector<GCFillInfo> below. This never
> @@ -131,6 +133,7 @@
> Domain_t domain_m; // guard layer domain
> int ownedID_m; // node ID for which domain_m is owned
> int guardID_m; // node ID for which domain_m is in the guards
> + int face_m; // destination face of the guard layer (or -1, if unknown)
>
> Domain_t & domain() { return domain_m;}
> int & ownedID() { return ownedID_m;}
> --- cvs/r2/src/Layout/UniformGridLayout.cpp 2004-01-14 20:08:13.000000000 +0100
> +++ pooma-mpi3/r2/src/Layout/UniformGridLayout.cpp 2004-01-14 20:13:32.000000000 +0100
> @@ -279,7 +279,7 @@
> //-----------------------------------------------------------------------------
> //
> // template <int Dim>
> -// void UniformGridLayout<Dim>::calcGCFillList()
> +// void UniformGridLayoutData<Dim>::calcGCFillList()
> //
> // Calculates the cached information needed by MultiPatch Engine to
> // fill the guard cells.
> @@ -370,7 +370,7 @@
> this->all_m[sourceID]->context() == Pooma::context() ||
> this->all_m[destID]->context() == Pooma::context()
> )
> - this->gcFillList_m.push_back(GCFillInfo_t(gcdom,sourceID,destID));
> + this->gcFillList_m.push_back(GCFillInfo_t(gcdom,sourceID,destID,d*2));
> }
> }
>
> @@ -417,7 +417,7 @@
> this->all_m[sourceID]->context() == Pooma::context() ||
> this->all_m[destID]->context() == Pooma::context()
> )
> - this->gcFillList_m.push_back(GCFillInfo_t(gcdom,sourceID,destID));
> + this->gcFillList_m.push_back(GCFillInfo_t(gcdom,sourceID,destID,d*2+1));
> }
> }
> }
> --- cvs/r2/src/Engine/MultiPatchEngine.cpp 2004-01-14 20:11:34.000000000 +0100
> +++ pooma-mpi3/r2/src/Engine/MultiPatchEngine.cpp 2004-01-14 20:23:23.000000000 +0100
> @@ -34,6 +34,7 @@
> #include "Engine/CompressedFraction.h"
> #include "Array/Array.h"
> #include "Tulip/ReduceOverContexts.h"
> +#include "Tulip/SendReceive.h"
> #include "Threads/PoomaCSem.h"
> #include "Domain/IteratorPairDomain.h"
>
> @@ -77,16 +78,18 @@
> Engine(const Layout_t &layout)
> : layout_m(layout),
> data_m(layout.sizeGlobal()),
> - pDirty_m(new bool(true))
> + pDirty_m(new int)
> {
> typedef typename Layout_t::Value_t Node_t;
>
> + setDirty();
> +
> // check for correct match of PatchTag and the mapper used to make the
> // layout.
> // THIS IS A HACK! we test on the context of the first patch, and if it
> // is -1, we have a Layout made with the LocalMapper.
>
> -#if POOMA_CHEETAH
> +#if POOMA_MESSAGING
>
> if( layout_m.nodeListGlobal().size() > 0)
> {
> @@ -247,7 +250,7 @@
> PAssert(data_m.isValid());
> if (data_m.isShared()) {
> data_m.makeOwnCopy();
> - pDirty_m = new bool(*pDirty_m);
> + pDirty_m = new int(*pDirty_m);
> }
>
> return *this;
> @@ -261,45 +264,88 @@
> //
> //-----------------------------------------------------------------------------
>
> +/// Guard layer assign between non-remote engines, just use the
> +/// ET mechanisms
> +
> +template <int Dim, class T, class Tag>
> +static inline
> +void simpleAssign(const Array<Dim, T, Tag>& lhs,
> + const Array<Dim, T, Tag>& rhs,
> + const Interval<Dim>& domain)
> +{
> + lhs(domain) = rhs(domain);
> +}
> +
> +/// Guard layer assign between remote engines, use Send/Receive directly
> +/// to avoid one extra copy of the data.
> +
> +template <int Dim, class T, class Tag>
> +static inline
> +void simpleAssign(const Array<Dim, T, Remote<Tag> >& lhs,
> + const Array<Dim, T, Remote<Tag> >& rhs,
> + const Interval<Dim>& domain)
> +{
> + if (lhs.engine().owningContext() == rhs.engine().owningContext())
> + lhs(domain) = rhs(domain);
> + else {
> + typedef typename NewEngine<Engine<Dim, T, Tag>, Interval<Dim> >::Type_t ViewEngine_t;
> + if (lhs.engine().engineIsLocal())
> + Receive<ViewEngine_t>::receive(ViewEngine_t(lhs.engine().localEngine(), domain),
> + rhs.engine().owningContext());
> + else if (rhs.engine().engineIsLocal())
> + SendReceive::send(ViewEngine_t(rhs.engine().localEngine(), domain),
> + lhs.engine().owningContext());
> + }
> +}
> +
> template <int Dim, class T, class LayoutTag, class PatchTag>
> void Engine<Dim, T, MultiPatch<LayoutTag,PatchTag> >::
> -fillGuardsHandler(const WrappedInt<true> &) const
> +fillGuardsHandler(const GuardLayers<Dim>& g, const WrappedInt<true> &) const
> {
> if (!isDirty()) return;
> -
> -#if POOMA_PURIFY
> -
> - // This is here to remove spurious UMRs that result when un-initialized
> - // guards are copied in the following loop. All of the unitialized data
> - // is ultimately overwritten with good data, so I don't see why purify
> - // calls these UMRs in stead of unitialized memory copies, but it does.
> - // I don't do this in general since it would be slow and since T(0) is
> - // not generally valid. This does mean that fillGuards() will fail
> - // with purify for types that do not know what to do with T(0).
> -
> - setGuards(T(0));
> -
> -#endif
>
> + int updated = 0;
> typename Layout_t::FillIterator_t p = layout_m.beginFillList();
> -
> +
> while (p != layout_m.endFillList())
> {
> int src = p->ownedID_m;
> int dest = p->guardID_m;
>
> - // Create patch arrays that see the entire patch:
> + // Skip face, if not dirty.
> +
> + if (isDirty(p->face_m)) {
> +
> + // Check, if the p->domain_m is a guard which matches the
> + // needed guard g.
> +
> + int d = p->face_m/2;
> + int guardSizeNeeded = p->face_m & 1 ? g.upper(d) : g.lower(d);
> + if (!(p->face_m != -1
> + && guardSizeNeeded == 0)) {
> +
> + // Create patch arrays that see the entire patch:
>
> - Array<Dim, T, PatchTag> lhs(data()[dest]), rhs(data()[src]);
> + Array<Dim, T, PatchTag> lhs(data()[dest]), rhs(data()[src]);
>
> - // Now do assignment from the subdomains.
> + // Now do assignment from the subdomains.
> +#if POOMA_MPI
> + simpleAssign(lhs, rhs, p->domain_m);
> +#else
> + lhs(p->domain_m) = rhs(p->domain_m);
> +#endif
> +
> + // Mark up-to-date.
> + updated |= 1<<p->face_m;
> +
> + }
> +
> + }
>
> - lhs(p->domain_m) = rhs(p->domain_m);
> -
> ++p;
> }
> -
> - *pDirty_m = false;
> +
> + *pDirty_m &= ~updated;
> }
>
>
> @@ -331,7 +377,7 @@
> ++p;
> }
>
> - *pDirty_m = true;
> + setDirty();
> }
>
>
> @@ -366,7 +412,7 @@
> ++p;
> }
>
> - *pDirty_m = true;
> + setDirty();
> }
>
>
--
Jeffrey D. Oldham
oldham at codesourcery.com
More information about the pooma-dev
mailing list