[PATCH] Speed up guard update.

Richard Guenther rguenth at tat.physik.uni-tuebingen.de
Wed Jan 14 20:56:51 UTC 2004


Hi!

This is a refined (aka shorter) patch which unifies the tracking of
up-to-date faces and the special optimized copy for MPI.

Tested on serial ia32 linux with gcc3.4 with no regression.

Ok?

Richard.


2004Jan14  Richard Guenther <richard.guenther at uni-tuebingen.de>

	* src/Engine/Intersector.h: track used guard faces.
	src/Engine/MultiPatchEngine.h: track up-to-dateness per
	face using a bitmask.
	src/Engine/Stencil.h: track used guard faces.
	src/Field/DiffOps/FieldStencil.h: track used guard faces.
	src/Layout/GridLayout.cpp: record face of guard update.
	src/Layout/LayoutBase.h: add face_m member to guard update
	struct.
	src/Layout/UniformGridLayout.cpp: record face of guard update.
	src/Engine/MultiPatchEngine.cpp: update only not up-to-date
	and needed faces during fillGuards(). Do manual Send/Receive
	of the inner guards domain for MPI.

--- cvs/r2/src/Engine/Intersector.h	2004-01-14 20:08:06.000000000 +0100
+++ pooma-mpi3/r2/src/Engine/Intersector.h	2004-01-14 20:13:32.000000000 +0100
@@ -129,7 +129,8 @@
   }

   template<class Engine, int Dim2>
-  bool intersect(const Engine &engine, const GuardLayers<Dim2> &guard)
+  bool intersect(const Engine &engine, const GuardLayers<Dim2> &guard,
+		 GuardLayers<Dim2> &usedGuards)
   {
     CTAssert(Engine::dimensions == Dim);

@@ -145,9 +146,7 @@
       // If we've seen this ID before, we're done.

       if (ids_m[i] == layout.ID())
-      {
 	return false;
-      }

       // If we've seen the base ID before and the base domain is the same
       // we're done.
@@ -157,10 +156,27 @@
       {
 	shared(layout.ID(),ids_m[i]);

-	// In this case we are using the guard cells unless this domain
-	// is exactly the same as one we've seen before.
+	// was: return (!sameBaseDomain(i,layout.baseDomain()));

-	return (!sameBaseDomain(i,layout.baseDomain()));
+        // We should be able to find out the actual shape of the
+	// used internal guards here, rather than just returning bool.
+	// Something like:
+
+	// But what do, if Dim2 > baseDims_m[i]!?
+	if (baseDims_m[i] < Dim2)
+	  return true;
+
+	bool used = false;
+	for (int j = 0; j < Dim2; j++)
+	{
+	  usedGuards.lower(j) = std::max(0, baseDomains_m[i][j].first() - layout.baseDomain()[j].first());
+	  if (usedGuards.lower(j) != 0)
+	    used = true;
+	  usedGuards.upper(j) = std::max(0, layout.baseDomain()[j].last() - baseDomains_m[i][j].last());
+	  if (usedGuards.upper(j) != 0)
+	    used = true;
+	}
+	return used;
       }
     }

@@ -437,9 +453,9 @@

   template<class Engine, int Dim2>
   inline
-  bool intersect(const Engine &l, const GuardLayers<Dim2> &guard)
+  bool intersect(const Engine &l, const GuardLayers<Dim2> &guard, GuardLayers<Dim2> &usedGuards)
   {
-    return (data()->intersect(l,guard));
+    return (data()->intersect(l,guard,usedGuards));
   }

 private:
--- cvs/r2/src/Engine/MultiPatchEngine.h	2004-01-14 20:11:36.000000000 +0100
+++ pooma-mpi3/r2/src/Engine/MultiPatchEngine.h	2004-01-14 20:13:32.000000000 +0100
@@ -628,13 +628,18 @@
   //---------------------------------------------------------------------------
   /// Fill the internal guard cells.

-  inline void fillGuards() const
+  inline void fillGuards(const GuardLayers<Dim>& g) const
   {
-    fillGuardsHandler(WrappedInt<Layout_t::supportsGuards>());
+    fillGuardsHandler(g, WrappedInt<Layout_t::supportsGuards>());
+  }
+
+  inline void fillGuards() const
+  {
+    fillGuards(layout().internalGuards());
   }

-  inline void fillGuardsHandler(const WrappedInt<false>&) const { };
-  void fillGuardsHandler(const WrappedInt<true>&) const ;
+  inline void fillGuardsHandler(const GuardLayers<Dim>&, const WrappedInt<false>&) const { };
+  void fillGuardsHandler(const GuardLayers<Dim>&, const WrappedInt<true>&) const ;

   //---------------------------------------------------------------------------
   /// Set the internal guard cells to a particular value.
@@ -650,14 +655,31 @@
   /// Set and get the dirty flag (fillGuards is a no-op unless the
   /// dirty flag is true).

+  inline int dirty() const { return *pDirty_m; }
+
   inline void setDirty() const
   {
-    *pDirty_m = true;
+    *pDirty_m = (1<<(Dim*2))-1;
+  }
+
+  inline void clearDirty(int face = -1) const
+  {
+    if (face == -1)
+      *pDirty_m = 0;
+    else {
+      PAssert(face >= 0 && face <= Dim*2-1);
+      *pDirty_m &= ~(1<<face);
+    }
   }

-  inline bool isDirty() const
+  inline bool isDirty(int face = -1) const
   {
-    return *pDirty_m;
+    if (face == -1)
+      return *pDirty_m != 0;
+    else {
+      PAssert(face >= 0 && face <= Dim*2-1);
+      return *pDirty_m & (1<<face);
+    }
   }

   //============================================================
@@ -874,7 +896,7 @@
   /// must share the same flag. We use the reference count in
   /// data_m to decide whether to clean this up.

-  bool *pDirty_m;
+  int *pDirty_m;
 };


@@ -1193,6 +1215,11 @@
     baseEngine_m.fillGuards();
   }

+  inline void fillGuards(const GuardLayers<Dim2>& g) const
+  {
+    baseEngine_m.fillGuards(g);
+  }
+
   //---------------------------------------------------------------------------
   /// Set the internal guard cells to a particular value (default zero)

@@ -1217,10 +1244,15 @@
   {
     baseEngine_m.setDirty();
   }
+
+  inline void clearDirty(int face=-1) const
+  {
+    baseEngine_m.clearDirty(face);
+  }

-  inline bool isDirty() const
+  inline bool isDirty(int face=-1) const
   {
-    return baseEngine_m.isDirty();
+    return baseEngine_m.isDirty(face);
   }

   //---------------------------------------------------------------------------
@@ -1694,12 +1726,13 @@
   apply(const Engine<Dim,T,MultiPatch<LayoutTag,PatchTag> > &engine,
 	const ExpressionApply<IntersectorTag<Intersect> > &tag)
   {
+    GuardLayers<Dim> usedGuards;
     bool useGuards =
       tag.tag().intersector_m.intersect(engine,
-				  engine.layout().internalGuards());
+				  engine.layout().internalGuards(), usedGuards);

     if (useGuards)
-      engine.fillGuards();
+      engine.fillGuards(usedGuards);

     return 0;
   }
@@ -1725,13 +1758,14 @@
 	       const ExpressionApply<IntersectorTag<Intersect> > &tag,
 	       const WrappedInt<true> &)
   {
+    GuardLayers<BD> usedGuards;
     bool useGuards =
       tag.tag().intersector_m.
       intersect(engine,
-		engine.layout().baseLayout().internalGuards());
+		engine.layout().baseLayout().internalGuards(), usedGuards);

     if (useGuards)
-      engine.fillGuards();
+      engine.fillGuards(usedGuards);

     return 0;
   }
@@ -1741,7 +1775,7 @@
 	       const ExpressionApply<IntersectorTag<Intersect> > &tag,
 	       const WrappedInt<false> &)
   {
-    tag.tag().intersector_m.intersect(engine, GuardLayers<Dim>());
+    tag.tag().intersector_m.intersect(engine);
     return 0;
   }
 };
--- cvs/r2/src/Engine/Stencil.h	2004-01-14 20:08:07.000000000 +0100
+++ pooma-mpi3/r2/src/Engine/Stencil.h	2004-01-14 20:13:32.000000000 +0100
@@ -752,11 +752,14 @@

   StencilIntersector(const This_t &model)
     : domain_m(model.domain_m),
+      stencilExtent_m(model.stencilExtent_m),
       intersector_m(model.intersector_m)
   { }

-  StencilIntersector(const Interval<Dim> &domain, const Intersect &intersect)
+  StencilIntersector(const Interval<Dim> &domain, const Intersect &intersect,
+		  const GuardLayers<Dim> &stencilExtent)
     : domain_m(domain),
+      stencilExtent_m(stencilExtent),
       intersector_m(intersect)
   { }

@@ -766,6 +769,7 @@
     {
       intersector_m = model.intersector_m;
       domain_m = model.domain_m;
+      stencilExtent_m = model.stencilExtent_m;
     }
     return *this;
   }
@@ -807,14 +811,19 @@

   template<class Engine, int Dim2>
   inline
-  bool intersect(const Engine &engine, const GuardLayers<Dim2> &)
+  bool intersect(const Engine &engine, const GuardLayers<Dim2> &g,
+		  GuardLayers<Dim> &usedGuards)
   {
     intersect(engine);
+    // FIXME: accumulate used guards from intersect above and
+    // stencil extent? I.e. allow  Stencil<>(a(i-1)+a(i+1))?
+    usedGuards = stencilExtent_m;
     return true;
   }

 private:
   Interval<Dim> domain_m;
+  GuardLayers<Dim> stencilExtent_m;
   Intersect     intersector_m;
 };

@@ -833,8 +842,14 @@
 	       const ExpressionApply<IntersectorTag<Intersect> > &tag)
   {
     typedef StencilIntersector<D, Intersect> NewIntersector_t;
+    GuardLayers<D> stencilExtent;
+    for (int i=0; i<D; ++i) {
+      stencilExtent.lower(i) = engine.function().lowerExtent(i);
+      stencilExtent.upper(i) = engine.function().upperExtent(i);
+    }
     NewIntersector_t newIntersector(engine.intersectDomain(),
-				    tag.tag().intersector_m);
+				    tag.tag().intersector_m,
+				    stencilExtent);

     expressionApply(engine.expression(),
 		    IntersectorTag<NewIntersector_t>(newIntersector));
--- cvs/r2/src/Field/DiffOps/FieldStencil.h	2004-01-14 20:08:09.000000000 +0100
+++ pooma-mpi3/r2/src/Field/DiffOps/FieldStencil.h	2004-01-14 20:13:32.000000000 +0100
@@ -614,11 +617,13 @@
   // Constructors

   FieldStencilIntersector(const This_t &model)
-    : domain_m(model.domain_m), intersector_m(model.intersector_m)
+    : domain_m(model.domain_m), stencilExtent_m(model.stencilExtent_m),
+      intersector_m(model.intersector_m)
   { }

-  FieldStencilIntersector(const Domain_t &dom, const Intersect &intersect)
-    : domain_m(dom), intersector_m(intersect)
+  FieldStencilIntersector(const Domain_t &dom, const Intersect &intersect,
+		  const GuardLayers<Dim> &stencilExtent)
+    : domain_m(dom), stencilExtent_m(stencilExtent), intersector_m(intersect)
   { }

   This_t &operator=(const This_t &model)
@@ -626,6 +631,7 @@
     if (this != &model)
     {
       domain_m = model.domain_m;
+      stencilExtent_m = model.stencilExtent_m;
       intersector_m = model.intersector_m;
     }
     return *this;
@@ -662,9 +668,13 @@
   }

   template<class Engine, int Dim2>
-  inline bool intersect(const Engine &engine, const GuardLayers<Dim2> &)
+  inline bool intersect(const Engine &engine, const GuardLayers<Dim2> &,
+		        GuardLayers<Dim> &usedGuards)
   {
     intersect(engine);
+    // FIXME: accumulate used guards from intersect above and
+    // stencil extent? I.e. allow  Stencil<>(a(i-1)+a(i+1))?
+    usedGuards = stencilExtent_m;
     return true;
   }

@@ -672,6 +682,7 @@


   Interval<Dim> domain_m;
+  GuardLayers<Dim> stencilExtent_m;
   Intersect     intersector_m;
 };

@@ -699,8 +710,14 @@
     // cells results in an error in the multipatch inode view.)

     typedef FieldStencilIntersector<Dim, Intersect> NewIntersector_t;
+    GuardLayers<Dim> stencilExtent;
+    for (int i=0; i<Dim; ++i) {
+      stencilExtent.lower(i) = engine.functor().lowerExtent(i);
+      stencilExtent.upper(i) = engine.functor().upperExtent(i);
+    }
     NewIntersector_t newIntersector(engine.intersectDomain(),
-				    tag.tag().intersector_m);
+				    tag.tag().intersector_m,
+				    stencilExtent);

     expressionApply(engine.field(),
 		    IntersectorTag<NewIntersector_t>(newIntersector));
--- cvs/r2/src/Layout/GridLayout.cpp	2004-01-14 20:08:10.000000000 +0100
+++ pooma-mpi3/r2/src/Layout/GridLayout.cpp	2004-01-14 20:13:32.000000000 +0100
@@ -429,7 +436,7 @@

 		      // Now, push IDs and source into cache...

-		      this->gcFillList_m.push_back(GCFillInfo_t(gcdom, sourceID, destID));
+		      this->gcFillList_m.push_back(GCFillInfo_t(gcdom, sourceID, destID, d*2));
 		    }
 		}
 	    }
@@ -481,7 +488,7 @@

 		      // Now, push IDs and source into cache...

-		      this->gcFillList_m.push_back(GCFillInfo_t(gcdom, sourceID, destID));
+		      this->gcFillList_m.push_back(GCFillInfo_t(gcdom, sourceID, destID, d*2+1));
 		    }
 		}
 	    }
--- cvs/r2/src/Layout/LayoutBase.h	2004-01-14 20:08:12.000000000 +0100
+++ pooma-mpi3/r2/src/Layout/LayoutBase.h	2004-01-14 20:13:32.000000000 +0100
@@ -119,8 +121,8 @@

   struct GCFillInfo
   {
-    GCFillInfo(const Domain_t &dom, int ownedID, int guardID)
-    : domain_m(dom), ownedID_m(ownedID), guardID_m(guardID) { }
+    GCFillInfo(const Domain_t &dom, int ownedID, int guardID, int face=-1)
+    : domain_m(dom), ownedID_m(ownedID), guardID_m(guardID), face_m(face) { }

     // Get a CW warning about this not having a default constructor
     // when we instantiate the vector<GCFillInfo> below. This never
@@ -131,6 +133,7 @@
     Domain_t domain_m;    // guard layer domain
     int      ownedID_m;   // node ID for which domain_m is owned
     int      guardID_m;   // node ID for which domain_m is in the guards
+    int      face_m;      // destination face of the guard layer (or -1, if unknown)

     Domain_t & domain() { return domain_m;}
     int & ownedID() { return ownedID_m;}
--- cvs/r2/src/Layout/UniformGridLayout.cpp	2004-01-14 20:08:13.000000000 +0100
+++ pooma-mpi3/r2/src/Layout/UniformGridLayout.cpp	2004-01-14 20:13:32.000000000 +0100
@@ -279,7 +279,7 @@
 //-----------------------------------------------------------------------------
 //
 // template <int Dim>
-// void UniformGridLayout<Dim>::calcGCFillList()
+// void UniformGridLayoutData<Dim>::calcGCFillList()
 //
 // Calculates the cached information needed by MultiPatch Engine to
 // fill the guard cells.
@@ -370,7 +370,7 @@
 		    this->all_m[sourceID]->context() == Pooma::context() ||
  		    this->all_m[destID]->context() == Pooma::context()
 		    )
-                this->gcFillList_m.push_back(GCFillInfo_t(gcdom,sourceID,destID));
+                this->gcFillList_m.push_back(GCFillInfo_t(gcdom,sourceID,destID,d*2));
               }
           }

@@ -417,7 +417,7 @@
 		    this->all_m[sourceID]->context() == Pooma::context() ||
  		    this->all_m[destID]->context() == Pooma::context()
 		    )
-		  this->gcFillList_m.push_back(GCFillInfo_t(gcdom,sourceID,destID));
+		  this->gcFillList_m.push_back(GCFillInfo_t(gcdom,sourceID,destID,d*2+1));
               }
           }
       }
--- cvs/r2/src/Engine/MultiPatchEngine.cpp	2004-01-14 20:11:34.000000000 +0100
+++ pooma-mpi3/r2/src/Engine/MultiPatchEngine.cpp	2004-01-14 20:23:23.000000000 +0100
@@ -34,6 +34,7 @@
 #include "Engine/CompressedFraction.h"
 #include "Array/Array.h"
 #include "Tulip/ReduceOverContexts.h"
+#include "Tulip/SendReceive.h"
 #include "Threads/PoomaCSem.h"
 #include "Domain/IteratorPairDomain.h"

@@ -77,16 +78,18 @@
 Engine(const Layout_t &layout)
   : layout_m(layout),
     data_m(layout.sizeGlobal()),
-    pDirty_m(new bool(true))
+    pDirty_m(new int)
 {
   typedef typename Layout_t::Value_t Node_t;

+  setDirty();
+
   // check for correct match of PatchTag and the mapper used to make the
   // layout.
   // THIS IS A HACK! we test on the context of the first patch, and if it
   // is -1, we have a Layout made with the LocalMapper.

-#if POOMA_CHEETAH
+#if POOMA_MESSAGING

   if( layout_m.nodeListGlobal().size() > 0)
   {
@@ -247,7 +250,7 @@
   PAssert(data_m.isValid());
   if (data_m.isShared()) {
     data_m.makeOwnCopy();
-    pDirty_m = new bool(*pDirty_m);
+    pDirty_m = new int(*pDirty_m);
   }

   return *this;
@@ -261,45 +264,88 @@
 //
 //-----------------------------------------------------------------------------

+/// Guard layer assign between non-remote engines, just use the
+/// ET mechanisms
+
+template <int Dim, class T, class Tag>
+static inline
+void simpleAssign(const Array<Dim, T, Tag>& lhs,
+		  const Array<Dim, T, Tag>& rhs,
+		  const Interval<Dim>& domain)
+{
+  lhs(domain) = rhs(domain);
+}
+
+/// Guard layer assign between remote engines, use Send/Receive directly
+/// to avoid one extra copy of the data.
+
+template <int Dim, class T, class Tag>
+static inline
+void simpleAssign(const Array<Dim, T, Remote<Tag> >& lhs,
+		  const Array<Dim, T, Remote<Tag> >& rhs,
+		  const Interval<Dim>& domain)
+{
+  if (lhs.engine().owningContext() == rhs.engine().owningContext())
+    lhs(domain) = rhs(domain);
+  else {
+    typedef typename NewEngine<Engine<Dim, T, Tag>, Interval<Dim> >::Type_t ViewEngine_t;
+    if (lhs.engine().engineIsLocal())
+      Receive<ViewEngine_t>::receive(ViewEngine_t(lhs.engine().localEngine(), domain),
+				     rhs.engine().owningContext());
+    else if (rhs.engine().engineIsLocal())
+      SendReceive::send(ViewEngine_t(rhs.engine().localEngine(), domain),
+			lhs.engine().owningContext());
+  }
+}
+
 template <int Dim, class T, class LayoutTag, class PatchTag>
 void Engine<Dim, T, MultiPatch<LayoutTag,PatchTag> >::
-fillGuardsHandler(const WrappedInt<true> &) const
+fillGuardsHandler(const GuardLayers<Dim>& g, const WrappedInt<true> &) const
 {
   if (!isDirty()) return;
-
-#if POOMA_PURIFY
-
-  // This is here to remove spurious UMRs that result when un-initialized
-  // guards are copied in the following loop. All of the unitialized data
-  // is ultimately overwritten with good data, so I don't see why purify
-  // calls these UMRs in stead of unitialized memory copies, but it does.
-  // I don't do this in general since it would be slow and since T(0) is
-  // not generally valid. This does mean that fillGuards() will fail
-  // with purify for types that do not know what to do with T(0).
-
-  setGuards(T(0));
-
-#endif

+  int updated = 0;
   typename Layout_t::FillIterator_t p = layout_m.beginFillList();
-
+
   while (p != layout_m.endFillList())
     {
       int src  = p->ownedID_m;
       int dest = p->guardID_m;

-      // Create patch arrays that see the entire patch:
+      // Skip face, if not dirty.
+
+      if (isDirty(p->face_m)) {
+
+        // Check, if the p->domain_m is a guard which matches the
+        // needed guard g.
+
+	int d = p->face_m/2;
+	int guardSizeNeeded = p->face_m & 1 ? g.upper(d) : g.lower(d);
+        if (!(p->face_m != -1
+	      && guardSizeNeeded == 0)) {
+
+          // Create patch arrays that see the entire patch:

-      Array<Dim, T, PatchTag> lhs(data()[dest]), rhs(data()[src]);
+          Array<Dim, T, PatchTag> lhs(data()[dest]), rhs(data()[src]);

-      // Now do assignment from the subdomains.
+          // Now do assignment from the subdomains.
+#if POOMA_MPI
+          simpleAssign(lhs, rhs, p->domain_m);
+#else
+          lhs(p->domain_m) = rhs(p->domain_m);
+#endif
+
+	  // Mark up-to-date.
+	  updated |= 1<<p->face_m;
+
+	}
+
+      }

-      lhs(p->domain_m) = rhs(p->domain_m);
-
       ++p;
     }
-
-  *pDirty_m = false;
+
+  *pDirty_m &= ~updated;
 }


@@ -331,7 +377,7 @@
       ++p;
     }

-  *pDirty_m = true;
+  setDirty();
 }


@@ -366,7 +412,7 @@
       ++p;
     }

-  *pDirty_m = true;
+  setDirty();
 }





More information about the pooma-dev mailing list