[pooma-dev] [PATCH] Fix PackUnpack bug

Jeffrey D. Oldham oldham at codesourcery.com
Wed Aug 25 01:05:54 UTC 2004


Richard Guenther wrote:

>PackUnpack.h pack/unpack use a functor with LoopApplyEvaluator
>that violate the assumption of independent iterations.  Thus
>the Field/LocalPatch testcase fails for OpenMP.
>
>We have the required infrastructure for ordered LoopApply in
>RemoteEngine.h.  This patch uses that, but before needs to fix
>it as it works with zeroBased domain only due to a bug.
>  
>
I do not understand this last sentence.  I also do not understand the 
patch, mainly because of my ignorance.

Each portion of the patch is internally consistent, but why should a 
local patch ignore its field?

>Tested partly with OpenMP, MPI and serial.
>
>Ok?
>
>Richard.
>
>
>2004Aug24  Richard Guenther <richard.guenther at uni-tuebingen.de>
>
>	* src/Functions/PackUnpack.h: use EngineBlockSerialize from
>	RemoteEngine.h.
>	src/Engine/RemoteEngine.h: fix EngineBlockSerialize for
>	non-zerobased domains.
>  
>
>------------------------------------------------------------------------
>
>Index: Engine/RemoteEngine.h
>===================================================================
>RCS file: /home/pooma/Repository/r2/src/Engine/RemoteEngine.h,v
>retrieving revision 1.42
>diff -u -u -r1.42 RemoteEngine.h
>--- Engine/RemoteEngine.h	19 Jan 2004 22:04:33 -0000	1.42
>+++ Engine/RemoteEngine.h	24 Aug 2004 14:23:53 -0000
>@@ -1055,8 +1055,8 @@
>     {
>       CTAssert(Domain::unitStride == 1);
>       int f0 = domain[0].first();
>-      int e0 = domain[0].length();
>-      for (int i0 = f0; i0<e0; ++i0)
>+      int e0 = domain[0].last();
>+      for (int i0 = f0; i0<=e0; ++i0)
> 	op(engine(i0));
>       return op.total_m;
>     }
>@@ -1068,10 +1068,10 @@
>       CTAssert(Domain::unitStride == 1);
>       int f0 = domain[0].first();
>       int f1 = domain[1].first();
>-      int e0 = domain[0].length();
>-      int e1 = domain[1].length();
>-      for (int i1 = f1; i1<e1; ++i1)
>-	for (int i0 = f0; i0<e0; ++i0)
>+      int e0 = domain[0].last();
>+      int e1 = domain[1].last();
>+      for (int i1 = f1; i1<=e1; ++i1)
>+	for (int i0 = f0; i0<=e0; ++i0)
> 	  op(engine(i0,i1));
>       return op.total_m;
>     }
>@@ -1084,12 +1084,12 @@
>       int f0 = domain[0].first();
>       int f1 = domain[1].first();
>       int f2 = domain[2].first();
>-      int e0 = domain[0].length();
>-      int e1 = domain[1].length();
>-      int e2 = domain[2].length();
>-      for (int i2 = f2; i2<e2; ++i2)
>-	for (int i1 = f1; i1<e1; ++i1)
>-	  for (int i0 = f0; i0<e0; ++i0)
>+      int e0 = domain[0].last();
>+      int e1 = domain[1].last();
>+      int e2 = domain[2].last();
>+      for (int i2 = f2; i2<=e2; ++i2)
>+	for (int i1 = f1; i1<=e1; ++i1)
>+	  for (int i0 = f0; i0<=e0; ++i0)
> 	    op(engine(i0,i1,i2));
>       return op.total_m;
>     }
>@@ -1103,14 +1103,14 @@
>       int f1 = domain[1].first();
>       int f2 = domain[2].first();
>       int f3 = domain[3].first();
>-      int e0 = domain[0].length();
>-      int e1 = domain[1].length();
>-      int e2 = domain[2].length();
>-      int e3 = domain[3].length();
>-      for (int i3 = f3; i3<e3; ++i3)
>-	for (int i2 = f2; i2<e2; ++i2)
>-	  for (int i1 = f1; i1<e1; ++i1)
>-	    for (int i0 = f0; i0<e0; ++i0)
>+      int e0 = domain[0].last();
>+      int e1 = domain[1].last();
>+      int e2 = domain[2].last();
>+      int e3 = domain[3].last();
>+      for (int i3 = f3; i3<=e3; ++i3)
>+	for (int i2 = f2; i2<=e2; ++i2)
>+	  for (int i1 = f1; i1<=e1; ++i1)
>+	    for (int i0 = f0; i0<=e0; ++i0)
> 	      op(engine(i0,i1,i2,i3));
>       return op.total_m;
>     }
>@@ -1125,16 +1125,16 @@
>       int f2 = domain[2].first();
>       int f3 = domain[3].first();
>       int f4 = domain[4].first();
>-      int e0 = domain[0].length();
>-      int e1 = domain[1].length();
>-      int e2 = domain[2].length();
>-      int e3 = domain[3].length();
>-      int e4 = domain[4].length();
>-      for (int i4 = f4; i4<e4; ++i4)
>-	for (int i3 = f3; i3<e3; ++i3)
>-	  for (int i2 = f2; i2<e2; ++i2)
>-	    for (int i1 = f1; i1<e1; ++i1)
>-	      for (int i0 = f0; i0<e0; ++i0)
>+      int e0 = domain[0].last();
>+      int e1 = domain[1].last();
>+      int e2 = domain[2].last();
>+      int e3 = domain[3].last();
>+      int e4 = domain[4].last();
>+      for (int i4 = f4; i4<=e4; ++i4)
>+	for (int i3 = f3; i3<=e3; ++i3)
>+	  for (int i2 = f2; i2<=e2; ++i2)
>+	    for (int i1 = f1; i1<=e1; ++i1)
>+	      for (int i0 = f0; i0<=e0; ++i0)
> 		op(engine(i0,i1,i2,i3,i4));
>       return op.total_m;
>     }
>@@ -1150,18 +1150,18 @@
>       int f3 = domain[3].first();
>       int f4 = domain[4].first();
>       int f5 = domain[5].first();
>-      int e0 = domain[0].length();
>-      int e1 = domain[1].length();
>-      int e2 = domain[2].length();
>-      int e3 = domain[3].length();
>-      int e4 = domain[4].length();
>-      int e5 = domain[5].length();
>-      for (int i5 = f5; i5<e5; ++i5)
>-	for (int i4 = f4; i4<e4; ++i4)
>-	  for (int i3 = f3; i3<e3; ++i3)
>-	    for (int i2 = f2; i2<e2; ++i2)
>-	      for (int i1 = f1; i1<e1; ++i1)
>-		for (int i0 = f0; i0<e0; ++i0)
>+      int e0 = domain[0].last();
>+      int e1 = domain[1].last();
>+      int e2 = domain[2].last();
>+      int e3 = domain[3].last();
>+      int e4 = domain[4].last();
>+      int e5 = domain[5].last();
>+      for (int i5 = f5; i5<=e5; ++i5)
>+	for (int i4 = f4; i4<=e4; ++i4)
>+	  for (int i3 = f3; i3<=e3; ++i3)
>+	    for (int i2 = f2; i2<=e2; ++i2)
>+	      for (int i1 = f1; i1<=e1; ++i1)
>+		for (int i0 = f0; i0<=e0; ++i0)
> 		  op(engine(i0,i1,i2,i3,i4,i5));
>       return op.total_m;
>     }
>@@ -1178,20 +1178,20 @@
>   int f4 = domain[4].first();
>   int f5 = domain[5].first();
>   int f6 = domain[6].first();
>-  int e0 = domain[0].length();
>-  int e1 = domain[1].length();
>-  int e2 = domain[2].length();
>-  int e3 = domain[3].length();
>-  int e4 = domain[4].length();
>-  int e5 = domain[5].length();
>-  int e6 = domain[6].length();
>-  for (int i6 = f6; i6<e6; ++i6)
>-    for (int i5 = f5; i5<e5; ++i5)
>-      for (int i4 = f4; i4<e4; ++i4)
>-	for (int i3 = f3; i3<e3; ++i3)
>-	  for (int i2 = f2; i2<e2; ++i2)
>-	    for (int i1 = f1; i1<e1; ++i1)
>-	      for (int i0 = f0; i0<e0; ++i0)
>+  int e0 = domain[0].last();
>+  int e1 = domain[1].last();
>+  int e2 = domain[2].last();
>+  int e3 = domain[3].last();
>+  int e4 = domain[4].last();
>+  int e5 = domain[5].last();
>+  int e6 = domain[6].last();
>+  for (int i6 = f6; i6<=e6; ++i6)
>+    for (int i5 = f5; i5<=e5; ++i5)
>+      for (int i4 = f4; i4<=e4; ++i4)
>+	for (int i3 = f3; i3<=e3; ++i3)
>+	  for (int i2 = f2; i2<=e2; ++i2)
>+	    for (int i1 = f1; i1<=e1; ++i1)
>+	      for (int i0 = f0; i0<=e0; ++i0)
> 		op(engine(i0,i1,i2,i3,i4,i5,i6));
>   return op.total_m;
> }
>Index: Functions/PackUnpack.h
>===================================================================
>RCS file: /home/pooma/Repository/r2/src/Functions/PackUnpack.h,v
>retrieving revision 1.5
>diff -u -u -r1.5 PackUnpack.h
>--- Functions/PackUnpack.h	25 Oct 2003 12:06:55 -0000	1.5
>+++ Functions/PackUnpack.h	24 Aug 2004 14:23:53 -0000
>@@ -59,6 +59,7 @@
> //-----------------------------------------------------------------------------
> 
> #include "Utilities/RefCountedBlockPtr.h"
>+#include "Engine/RemoteEngine.h"
> #include "Pooma/Pooma.h"
> 
> //-----------------------------------------------------------------------------
>@@ -93,38 +94,19 @@
> {
>   typedef typename InputField::Element_t Element_t;
> 
>-  PackLocalPatches(const InputField &field,
>-                   RefCountedBlockPtr<Element_t> block)
>-    : field_m(field), block_m(block)
>+  PackLocalPatches(RefCountedBlockPtr<Element_t> block)
>+    : block_m(block)
>   {
>   }
> 
>-  void operator()(int i0) const
>+  inline void operator()(const Element_t &t)
>   {
>-    *block_m = field_m.read(i0);
>+    *block_m = t;
>     ++block_m;
>   }
> 
>-  void operator()(int i0, int i1) const
>-  {
>-    *block_m = field_m.read(i0, i1);
>-    ++block_m;
>-  }
>-
>-  void operator()(int i0, int i1, int i2) const
>-  {
>-    *block_m = field_m.read(i0, i1, i2);
>-    ++block_m;
>-  }
>-
>-  void operator()(int i0, int i1, int i2, int i3) const
>-  {
>-    *block_m = field_m.read(i0, i1, i2, i3);
>-    ++block_m;
>-  }
>-
>-  InputField field_m;
>-  mutable RefCountedBlockPtr<Element_t> block_m;
>+  RefCountedBlockPtr<Element_t> block_m;
>+  int total_m;
> };
> 
> template<class InputField>
>@@ -149,8 +131,8 @@
>   {
>     typedef typename Patch<InputField>::Type_t PatchField_t;
>     PatchField_t patch = field.patchLocal(i);
>-    PackLocalPatches<PatchField_t> packFunctor(patch, current);
>-    LoopApplyEvaluator::evaluate(packFunctor, patch.domain());
>+    PackLocalPatches<PatchField_t> packFunctor(current);
>+    EngineBlockSerialize::apply(packFunctor, patch, patch.domain());
>     current += patch.domain().size();
>   }
> 
>@@ -168,44 +150,27 @@
> {
>   typedef typename InputField::Element_t Element_t;
> 
>-  UnPackLocalPatches(const InputField &field,
>-                     RefCountedBlockPtr<Element_t> block)
>-    : field_m(field), block_m(block)
>-  {
>-  }
>-
>-  void operator()(int i0) const
>+  UnPackLocalPatches(RefCountedBlockPtr<Element_t> block)
>+    : block_m(block)
>   {
>-    field_m(i0) = *block_m;
>-    ++block_m;
>-  }
>-
>-  void operator()(int i0, int i1) const
>-  {
>-    field_m(i0, i1) = *block_m;
>-    ++block_m;
>   }
> 
>-  void operator()(int i0, int i1, int i2) const
>+  inline void operator()(Element_t &t)
>   {
>-    field_m(i0, i1, i2) = *block_m;
>+    t = *block_m;
>     ++block_m;
>   }
> 
>-  void operator()(int i0, int i1, int i2, int i3) const
>-  {
>-    field_m(i0, i1, i2, i3) = *block_m;
>-    ++block_m;
>-  }
>-
>-  InputField field_m;
>-  mutable RefCountedBlockPtr<Element_t> block_m;
>+  RefCountedBlockPtr<Element_t> block_m;
>+  int total_m;
> };
> 
> template<class InputField, class T>
> void
> unpack(const InputField &field, RefCountedBlockPtr<T> block)
> {
>+  Pooma::blockAndEvaluate();
>+
>   int i;
> 
>   RefCountedBlockPtr<T> current = block;
>@@ -214,8 +179,8 @@
>   {
>     typedef typename Patch<InputField>::Type_t PatchField_t;
>     PatchField_t patch = field.patchLocal(i);
>-    UnPackLocalPatches<PatchField_t> unpackFunctor(patch, current);
>-    LoopApplyEvaluator::evaluate(unpackFunctor, patch.physicalDomain());
>+    UnPackLocalPatches<PatchField_t> unpackFunctor(current);
>+    EngineBlockSerialize::apply(unpackFunctor, patch, patch.physicalDomain());
>     current += patch.physicalDomain().size();
>   }
> }
>  
>


-- 
Jeffrey D. Oldham
oldham at codesourcery.com




More information about the pooma-dev mailing list