[PATCH] Speed up guard update.

Jeffrey D. Oldham oldham at codesourcery.com
Fri Jan 16 03:04:14 UTC 2004


Richard Guenther wrote:
> Hi!
> 
> This is a refined (aka shorter) patch which unifies the tracking of
> up-to-date faces and the special optimized copy for MPI.
> 
> Tested on serial ia32 linux with gcc3.4 with no regression.
> 
> Ok?

Yes, assuming the user interface did not change.  It looks like 
GCFillInfo's interface changed but existing code will still run because 
a parameter with a default argument was added.

> Richard.
> 
> 
> 2004Jan14  Richard Guenther <richard.guenther at uni-tuebingen.de>
> 
> 	* src/Engine/Intersector.h: track used guard faces.
> 	src/Engine/MultiPatchEngine.h: track up-to-dateness per
> 	face using a bitmask.
> 	src/Engine/Stencil.h: track used guard faces.
> 	src/Field/DiffOps/FieldStencil.h: track used guard faces.
> 	src/Layout/GridLayout.cpp: record face of guard update.
> 	src/Layout/LayoutBase.h: add face_m member to guard update
> 	struct.
> 	src/Layout/UniformGridLayout.cpp: record face of guard update.
> 	src/Engine/MultiPatchEngine.cpp: update only not up-to-date
> 	and needed faces during fillGuards(). Do manual Send/Receive
> 	of the inner guards domain for MPI.
> 
> --- cvs/r2/src/Engine/Intersector.h	2004-01-14 20:08:06.000000000 +0100
> +++ pooma-mpi3/r2/src/Engine/Intersector.h	2004-01-14 20:13:32.000000000 +0100
> @@ -129,7 +129,8 @@
>    }
> 
>    template<class Engine, int Dim2>
> -  bool intersect(const Engine &engine, const GuardLayers<Dim2> &guard)
> +  bool intersect(const Engine &engine, const GuardLayers<Dim2> &guard,
> +		 GuardLayers<Dim2> &usedGuards)
>    {
>      CTAssert(Engine::dimensions == Dim);
> 
> @@ -145,9 +146,7 @@
>        // If we've seen this ID before, we're done.
> 
>        if (ids_m[i] == layout.ID())
> -      {
>  	return false;
> -      }
> 
>        // If we've seen the base ID before and the base domain is the same
>        // we're done.
> @@ -157,10 +156,27 @@
>        {
>  	shared(layout.ID(),ids_m[i]);
> 
> -	// In this case we are using the guard cells unless this domain
> -	// is exactly the same as one we've seen before.
> +	// was: return (!sameBaseDomain(i,layout.baseDomain()));
> 
> -	return (!sameBaseDomain(i,layout.baseDomain()));
> +        // We should be able to find out the actual shape of the
> +	// used internal guards here, rather than just returning bool.
> +	// Something like:
> +
> +	// But what do, if Dim2 > baseDims_m[i]!?
> +	if (baseDims_m[i] < Dim2)
> +	  return true;
> +
> +	bool used = false;
> +	for (int j = 0; j < Dim2; j++)
> +	{
> +	  usedGuards.lower(j) = std::max(0, baseDomains_m[i][j].first() - layout.baseDomain()[j].first());
> +	  if (usedGuards.lower(j) != 0)
> +	    used = true;
> +	  usedGuards.upper(j) = std::max(0, layout.baseDomain()[j].last() - baseDomains_m[i][j].last());
> +	  if (usedGuards.upper(j) != 0)
> +	    used = true;
> +	}
> +	return used;
>        }
>      }
> 
> @@ -437,9 +453,9 @@
> 
>    template<class Engine, int Dim2>
>    inline
> -  bool intersect(const Engine &l, const GuardLayers<Dim2> &guard)
> +  bool intersect(const Engine &l, const GuardLayers<Dim2> &guard, GuardLayers<Dim2> &usedGuards)
>    {
> -    return (data()->intersect(l,guard));
> +    return (data()->intersect(l,guard,usedGuards));
>    }
> 
>  private:
> --- cvs/r2/src/Engine/MultiPatchEngine.h	2004-01-14 20:11:36.000000000 +0100
> +++ pooma-mpi3/r2/src/Engine/MultiPatchEngine.h	2004-01-14 20:13:32.000000000 +0100
> @@ -628,13 +628,18 @@
>    //---------------------------------------------------------------------------
>    /// Fill the internal guard cells.
> 
> -  inline void fillGuards() const
> +  inline void fillGuards(const GuardLayers<Dim>& g) const
>    {
> -    fillGuardsHandler(WrappedInt<Layout_t::supportsGuards>());
> +    fillGuardsHandler(g, WrappedInt<Layout_t::supportsGuards>());
> +  }
> +
> +  inline void fillGuards() const
> +  {
> +    fillGuards(layout().internalGuards());
>    }
> 
> -  inline void fillGuardsHandler(const WrappedInt<false>&) const { };
> -  void fillGuardsHandler(const WrappedInt<true>&) const ;
> +  inline void fillGuardsHandler(const GuardLayers<Dim>&, const WrappedInt<false>&) const { };
> +  void fillGuardsHandler(const GuardLayers<Dim>&, const WrappedInt<true>&) const ;
> 
>    //---------------------------------------------------------------------------
>    /// Set the internal guard cells to a particular value.
> @@ -650,14 +655,31 @@
>    /// Set and get the dirty flag (fillGuards is a no-op unless the
>    /// dirty flag is true).
> 
> +  inline int dirty() const { return *pDirty_m; }
> +
>    inline void setDirty() const
>    {
> -    *pDirty_m = true;
> +    *pDirty_m = (1<<(Dim*2))-1;
> +  }
> +
> +  inline void clearDirty(int face = -1) const
> +  {
> +    if (face == -1)
> +      *pDirty_m = 0;
> +    else {
> +      PAssert(face >= 0 && face <= Dim*2-1);
> +      *pDirty_m &= ~(1<<face);
> +    }
>    }
> 
> -  inline bool isDirty() const
> +  inline bool isDirty(int face = -1) const
>    {
> -    return *pDirty_m;
> +    if (face == -1)
> +      return *pDirty_m != 0;
> +    else {
> +      PAssert(face >= 0 && face <= Dim*2-1);
> +      return *pDirty_m & (1<<face);
> +    }
>    }
> 
>    //============================================================
> @@ -874,7 +896,7 @@
>    /// must share the same flag. We use the reference count in
>    /// data_m to decide whether to clean this up.
> 
> -  bool *pDirty_m;
> +  int *pDirty_m;
>  };
> 
> 
> @@ -1193,6 +1215,11 @@
>      baseEngine_m.fillGuards();
>    }
> 
> +  inline void fillGuards(const GuardLayers<Dim2>& g) const
> +  {
> +    baseEngine_m.fillGuards(g);
> +  }
> +
>    //---------------------------------------------------------------------------
>    /// Set the internal guard cells to a particular value (default zero)
> 
> @@ -1217,10 +1244,15 @@
>    {
>      baseEngine_m.setDirty();
>    }
> +
> +  inline void clearDirty(int face=-1) const
> +  {
> +    baseEngine_m.clearDirty(face);
> +  }
> 
> -  inline bool isDirty() const
> +  inline bool isDirty(int face=-1) const
>    {
> -    return baseEngine_m.isDirty();
> +    return baseEngine_m.isDirty(face);
>    }
> 
>    //---------------------------------------------------------------------------
> @@ -1694,12 +1726,13 @@
>    apply(const Engine<Dim,T,MultiPatch<LayoutTag,PatchTag> > &engine,
>  	const ExpressionApply<IntersectorTag<Intersect> > &tag)
>    {
> +    GuardLayers<Dim> usedGuards;
>      bool useGuards =
>        tag.tag().intersector_m.intersect(engine,
> -				  engine.layout().internalGuards());
> +				  engine.layout().internalGuards(), usedGuards);
> 
>      if (useGuards)
> -      engine.fillGuards();
> +      engine.fillGuards(usedGuards);
> 
>      return 0;
>    }
> @@ -1725,13 +1758,14 @@
>  	       const ExpressionApply<IntersectorTag<Intersect> > &tag,
>  	       const WrappedInt<true> &)
>    {
> +    GuardLayers<BD> usedGuards;
>      bool useGuards =
>        tag.tag().intersector_m.
>        intersect(engine,
> -		engine.layout().baseLayout().internalGuards());
> +		engine.layout().baseLayout().internalGuards(), usedGuards);
> 
>      if (useGuards)
> -      engine.fillGuards();
> +      engine.fillGuards(usedGuards);
> 
>      return 0;
>    }
> @@ -1741,7 +1775,7 @@
>  	       const ExpressionApply<IntersectorTag<Intersect> > &tag,
>  	       const WrappedInt<false> &)
>    {
> -    tag.tag().intersector_m.intersect(engine, GuardLayers<Dim>());
> +    tag.tag().intersector_m.intersect(engine);
>      return 0;
>    }
>  };
> --- cvs/r2/src/Engine/Stencil.h	2004-01-14 20:08:07.000000000 +0100
> +++ pooma-mpi3/r2/src/Engine/Stencil.h	2004-01-14 20:13:32.000000000 +0100
> @@ -752,11 +752,14 @@
> 
>    StencilIntersector(const This_t &model)
>      : domain_m(model.domain_m),
> +      stencilExtent_m(model.stencilExtent_m),
>        intersector_m(model.intersector_m)
>    { }
> 
> -  StencilIntersector(const Interval<Dim> &domain, const Intersect &intersect)
> +  StencilIntersector(const Interval<Dim> &domain, const Intersect &intersect,
> +		  const GuardLayers<Dim> &stencilExtent)
>      : domain_m(domain),
> +      stencilExtent_m(stencilExtent),
>        intersector_m(intersect)
>    { }
> 
> @@ -766,6 +769,7 @@
>      {
>        intersector_m = model.intersector_m;
>        domain_m = model.domain_m;
> +      stencilExtent_m = model.stencilExtent_m;
>      }
>      return *this;
>    }
> @@ -807,14 +811,19 @@
> 
>    template<class Engine, int Dim2>
>    inline
> -  bool intersect(const Engine &engine, const GuardLayers<Dim2> &)
> +  bool intersect(const Engine &engine, const GuardLayers<Dim2> &g,
> +		  GuardLayers<Dim> &usedGuards)
>    {
>      intersect(engine);
> +    // FIXME: accumulate used guards from intersect above and
> +    // stencil extent? I.e. allow  Stencil<>(a(i-1)+a(i+1))?
> +    usedGuards = stencilExtent_m;
>      return true;
>    }
> 
>  private:
>    Interval<Dim> domain_m;
> +  GuardLayers<Dim> stencilExtent_m;
>    Intersect     intersector_m;
>  };
> 
> @@ -833,8 +842,14 @@
>  	       const ExpressionApply<IntersectorTag<Intersect> > &tag)
>    {
>      typedef StencilIntersector<D, Intersect> NewIntersector_t;
> +    GuardLayers<D> stencilExtent;
> +    for (int i=0; i<D; ++i) {
> +      stencilExtent.lower(i) = engine.function().lowerExtent(i);
> +      stencilExtent.upper(i) = engine.function().upperExtent(i);
> +    }
>      NewIntersector_t newIntersector(engine.intersectDomain(),
> -				    tag.tag().intersector_m);
> +				    tag.tag().intersector_m,
> +				    stencilExtent);
> 
>      expressionApply(engine.expression(),
>  		    IntersectorTag<NewIntersector_t>(newIntersector));
> --- cvs/r2/src/Field/DiffOps/FieldStencil.h	2004-01-14 20:08:09.000000000 +0100
> +++ pooma-mpi3/r2/src/Field/DiffOps/FieldStencil.h	2004-01-14 20:13:32.000000000 +0100
> @@ -614,11 +617,13 @@
>    // Constructors
> 
>    FieldStencilIntersector(const This_t &model)
> -    : domain_m(model.domain_m), intersector_m(model.intersector_m)
> +    : domain_m(model.domain_m), stencilExtent_m(model.stencilExtent_m),
> +      intersector_m(model.intersector_m)
>    { }
> 
> -  FieldStencilIntersector(const Domain_t &dom, const Intersect &intersect)
> -    : domain_m(dom), intersector_m(intersect)
> +  FieldStencilIntersector(const Domain_t &dom, const Intersect &intersect,
> +		  const GuardLayers<Dim> &stencilExtent)
> +    : domain_m(dom), stencilExtent_m(stencilExtent), intersector_m(intersect)
>    { }
> 
>    This_t &operator=(const This_t &model)
> @@ -626,6 +631,7 @@
>      if (this != &model)
>      {
>        domain_m = model.domain_m;
> +      stencilExtent_m = model.stencilExtent_m;
>        intersector_m = model.intersector_m;
>      }
>      return *this;
> @@ -662,9 +668,13 @@
>    }
> 
>    template<class Engine, int Dim2>
> -  inline bool intersect(const Engine &engine, const GuardLayers<Dim2> &)
> +  inline bool intersect(const Engine &engine, const GuardLayers<Dim2> &,
> +		        GuardLayers<Dim> &usedGuards)
>    {
>      intersect(engine);
> +    // FIXME: accumulate used guards from intersect above and
> +    // stencil extent? I.e. allow  Stencil<>(a(i-1)+a(i+1))?
> +    usedGuards = stencilExtent_m;
>      return true;
>    }
> 
> @@ -672,6 +682,7 @@
> 
> 
>    Interval<Dim> domain_m;
> +  GuardLayers<Dim> stencilExtent_m;
>    Intersect     intersector_m;
>  };
> 
> @@ -699,8 +710,14 @@
>      // cells results in an error in the multipatch inode view.)
> 
>      typedef FieldStencilIntersector<Dim, Intersect> NewIntersector_t;
> +    GuardLayers<Dim> stencilExtent;
> +    for (int i=0; i<Dim; ++i) {
> +      stencilExtent.lower(i) = engine.functor().lowerExtent(i);
> +      stencilExtent.upper(i) = engine.functor().upperExtent(i);
> +    }
>      NewIntersector_t newIntersector(engine.intersectDomain(),
> -				    tag.tag().intersector_m);
> +				    tag.tag().intersector_m,
> +				    stencilExtent);
> 
>      expressionApply(engine.field(),
>  		    IntersectorTag<NewIntersector_t>(newIntersector));
> --- cvs/r2/src/Layout/GridLayout.cpp	2004-01-14 20:08:10.000000000 +0100
> +++ pooma-mpi3/r2/src/Layout/GridLayout.cpp	2004-01-14 20:13:32.000000000 +0100
> @@ -429,7 +436,7 @@
> 
>  		      // Now, push IDs and source into cache...
> 
> -		      this->gcFillList_m.push_back(GCFillInfo_t(gcdom, sourceID, destID));
> +		      this->gcFillList_m.push_back(GCFillInfo_t(gcdom, sourceID, destID, d*2));
>  		    }
>  		}
>  	    }
> @@ -481,7 +488,7 @@
> 
>  		      // Now, push IDs and source into cache...
> 
> -		      this->gcFillList_m.push_back(GCFillInfo_t(gcdom, sourceID, destID));
> +		      this->gcFillList_m.push_back(GCFillInfo_t(gcdom, sourceID, destID, d*2+1));
>  		    }
>  		}
>  	    }
> --- cvs/r2/src/Layout/LayoutBase.h	2004-01-14 20:08:12.000000000 +0100
> +++ pooma-mpi3/r2/src/Layout/LayoutBase.h	2004-01-14 20:13:32.000000000 +0100
> @@ -119,8 +121,8 @@
> 
>    struct GCFillInfo
>    {
> -    GCFillInfo(const Domain_t &dom, int ownedID, int guardID)
> -    : domain_m(dom), ownedID_m(ownedID), guardID_m(guardID) { }
> +    GCFillInfo(const Domain_t &dom, int ownedID, int guardID, int face=-1)
> +    : domain_m(dom), ownedID_m(ownedID), guardID_m(guardID), face_m(face) { }
> 
>      // Get a CW warning about this not having a default constructor
>      // when we instantiate the vector<GCFillInfo> below. This never
> @@ -131,6 +133,7 @@
>      Domain_t domain_m;    // guard layer domain
>      int      ownedID_m;   // node ID for which domain_m is owned
>      int      guardID_m;   // node ID for which domain_m is in the guards
> +    int      face_m;      // destination face of the guard layer (or -1, if unknown)
> 
>      Domain_t & domain() { return domain_m;}
>      int & ownedID() { return ownedID_m;}
> --- cvs/r2/src/Layout/UniformGridLayout.cpp	2004-01-14 20:08:13.000000000 +0100
> +++ pooma-mpi3/r2/src/Layout/UniformGridLayout.cpp	2004-01-14 20:13:32.000000000 +0100
> @@ -279,7 +279,7 @@
>  //-----------------------------------------------------------------------------
>  //
>  // template <int Dim>
> -// void UniformGridLayout<Dim>::calcGCFillList()
> +// void UniformGridLayoutData<Dim>::calcGCFillList()
>  //
>  // Calculates the cached information needed by MultiPatch Engine to
>  // fill the guard cells.
> @@ -370,7 +370,7 @@
>  		    this->all_m[sourceID]->context() == Pooma::context() ||
>   		    this->all_m[destID]->context() == Pooma::context()
>  		    )
> -                this->gcFillList_m.push_back(GCFillInfo_t(gcdom,sourceID,destID));
> +                this->gcFillList_m.push_back(GCFillInfo_t(gcdom,sourceID,destID,d*2));
>                }
>            }
> 
> @@ -417,7 +417,7 @@
>  		    this->all_m[sourceID]->context() == Pooma::context() ||
>   		    this->all_m[destID]->context() == Pooma::context()
>  		    )
> -		  this->gcFillList_m.push_back(GCFillInfo_t(gcdom,sourceID,destID));
> +		  this->gcFillList_m.push_back(GCFillInfo_t(gcdom,sourceID,destID,d*2+1));
>                }
>            }
>        }
> --- cvs/r2/src/Engine/MultiPatchEngine.cpp	2004-01-14 20:11:34.000000000 +0100
> +++ pooma-mpi3/r2/src/Engine/MultiPatchEngine.cpp	2004-01-14 20:23:23.000000000 +0100
> @@ -34,6 +34,7 @@
>  #include "Engine/CompressedFraction.h"
>  #include "Array/Array.h"
>  #include "Tulip/ReduceOverContexts.h"
> +#include "Tulip/SendReceive.h"
>  #include "Threads/PoomaCSem.h"
>  #include "Domain/IteratorPairDomain.h"
> 
> @@ -77,16 +78,18 @@
>  Engine(const Layout_t &layout)
>    : layout_m(layout),
>      data_m(layout.sizeGlobal()),
> -    pDirty_m(new bool(true))
> +    pDirty_m(new int)
>  {
>    typedef typename Layout_t::Value_t Node_t;
> 
> +  setDirty();
> +
>    // check for correct match of PatchTag and the mapper used to make the
>    // layout.
>    // THIS IS A HACK! we test on the context of the first patch, and if it
>    // is -1, we have a Layout made with the LocalMapper.
> 
> -#if POOMA_CHEETAH
> +#if POOMA_MESSAGING
> 
>    if( layout_m.nodeListGlobal().size() > 0)
>    {
> @@ -247,7 +250,7 @@
>    PAssert(data_m.isValid());
>    if (data_m.isShared()) {
>      data_m.makeOwnCopy();
> -    pDirty_m = new bool(*pDirty_m);
> +    pDirty_m = new int(*pDirty_m);
>    }
> 
>    return *this;
> @@ -261,45 +264,88 @@
>  //
>  //-----------------------------------------------------------------------------
> 
> +/// Guard layer assign between non-remote engines, just use the
> +/// ET mechanisms
> +
> +template <int Dim, class T, class Tag>
> +static inline
> +void simpleAssign(const Array<Dim, T, Tag>& lhs,
> +		  const Array<Dim, T, Tag>& rhs,
> +		  const Interval<Dim>& domain)
> +{
> +  lhs(domain) = rhs(domain);
> +}
> +
> +/// Guard layer assign between remote engines, use Send/Receive directly
> +/// to avoid one extra copy of the data.
> +
> +template <int Dim, class T, class Tag>
> +static inline
> +void simpleAssign(const Array<Dim, T, Remote<Tag> >& lhs,
> +		  const Array<Dim, T, Remote<Tag> >& rhs,
> +		  const Interval<Dim>& domain)
> +{
> +  if (lhs.engine().owningContext() == rhs.engine().owningContext())
> +    lhs(domain) = rhs(domain);
> +  else {
> +    typedef typename NewEngine<Engine<Dim, T, Tag>, Interval<Dim> >::Type_t ViewEngine_t;
> +    if (lhs.engine().engineIsLocal())
> +      Receive<ViewEngine_t>::receive(ViewEngine_t(lhs.engine().localEngine(), domain),
> +				     rhs.engine().owningContext());
> +    else if (rhs.engine().engineIsLocal())
> +      SendReceive::send(ViewEngine_t(rhs.engine().localEngine(), domain),
> +			lhs.engine().owningContext());
> +  }
> +}
> +
>  template <int Dim, class T, class LayoutTag, class PatchTag>
>  void Engine<Dim, T, MultiPatch<LayoutTag,PatchTag> >::
> -fillGuardsHandler(const WrappedInt<true> &) const
> +fillGuardsHandler(const GuardLayers<Dim>& g, const WrappedInt<true> &) const
>  {
>    if (!isDirty()) return;
> -
> -#if POOMA_PURIFY
> -
> -  // This is here to remove spurious UMRs that result when un-initialized
> -  // guards are copied in the following loop. All of the unitialized data
> -  // is ultimately overwritten with good data, so I don't see why purify
> -  // calls these UMRs in stead of unitialized memory copies, but it does.
> -  // I don't do this in general since it would be slow and since T(0) is
> -  // not generally valid. This does mean that fillGuards() will fail
> -  // with purify for types that do not know what to do with T(0).
> -
> -  setGuards(T(0));
> -
> -#endif
> 
> +  int updated = 0;
>    typename Layout_t::FillIterator_t p = layout_m.beginFillList();
> -
> +
>    while (p != layout_m.endFillList())
>      {
>        int src  = p->ownedID_m;
>        int dest = p->guardID_m;
> 
> -      // Create patch arrays that see the entire patch:
> +      // Skip face, if not dirty.
> +
> +      if (isDirty(p->face_m)) {
> +
> +        // Check, if the p->domain_m is a guard which matches the
> +        // needed guard g.
> +
> +	int d = p->face_m/2;
> +	int guardSizeNeeded = p->face_m & 1 ? g.upper(d) : g.lower(d);
> +        if (!(p->face_m != -1
> +	      && guardSizeNeeded == 0)) {
> +
> +          // Create patch arrays that see the entire patch:
> 
> -      Array<Dim, T, PatchTag> lhs(data()[dest]), rhs(data()[src]);
> +          Array<Dim, T, PatchTag> lhs(data()[dest]), rhs(data()[src]);
> 
> -      // Now do assignment from the subdomains.
> +          // Now do assignment from the subdomains.
> +#if POOMA_MPI
> +          simpleAssign(lhs, rhs, p->domain_m);
> +#else
> +          lhs(p->domain_m) = rhs(p->domain_m);
> +#endif
> +
> +	  // Mark up-to-date.
> +	  updated |= 1<<p->face_m;
> +
> +	}
> +
> +      }
> 
> -      lhs(p->domain_m) = rhs(p->domain_m);
> -
>        ++p;
>      }
> -
> -  *pDirty_m = false;
> +
> +  *pDirty_m &= ~updated;
>  }
> 
> 
> @@ -331,7 +377,7 @@
>        ++p;
>      }
> 
> -  *pDirty_m = true;
> +  setDirty();
>  }
> 
> 
> @@ -366,7 +412,7 @@
>        ++p;
>      }
> 
> -  *pDirty_m = true;
> +  setDirty();
>  }
> 
> 


-- 
Jeffrey D. Oldham
oldham at codesourcery.com




More information about the pooma-dev mailing list