[pooma-dev] [PATCH] Fix PackUnpack bug
Jeffrey D. Oldham
oldham at codesourcery.com
Wed Aug 25 01:05:54 UTC 2004
Richard Guenther wrote:
>PackUnpack.h pack/unpack use a functor with LoopApplyEvaluator
>that violate the assumption of independent iterations. Thus
>the Field/LocalPatch testcase fails for OpenMP.
>
>We have the required infrastructure for ordered LoopApply in
>RemoteEngine.h. This patch uses that, but before needs to fix
>it as it works with zeroBased domain only due to a bug.
>
>
I do not understand this last sentence. I also do not understand the
patch, mainly because of my ignorance.
Each portion of the patch is internally consistent, but why should a
local patch ignore its field?
>Tested partly with OpenMP, MPI and serial.
>
>Ok?
>
>Richard.
>
>
>2004Aug24 Richard Guenther <richard.guenther at uni-tuebingen.de>
>
> * src/Functions/PackUnpack.h: use EngineBlockSerialize from
> RemoteEngine.h.
> src/Engine/RemoteEngine.h: fix EngineBlockSerialize for
> non-zerobased domains.
>
>
>------------------------------------------------------------------------
>
>Index: Engine/RemoteEngine.h
>===================================================================
>RCS file: /home/pooma/Repository/r2/src/Engine/RemoteEngine.h,v
>retrieving revision 1.42
>diff -u -u -r1.42 RemoteEngine.h
>--- Engine/RemoteEngine.h 19 Jan 2004 22:04:33 -0000 1.42
>+++ Engine/RemoteEngine.h 24 Aug 2004 14:23:53 -0000
>@@ -1055,8 +1055,8 @@
> {
> CTAssert(Domain::unitStride == 1);
> int f0 = domain[0].first();
>- int e0 = domain[0].length();
>- for (int i0 = f0; i0<e0; ++i0)
>+ int e0 = domain[0].last();
>+ for (int i0 = f0; i0<=e0; ++i0)
> op(engine(i0));
> return op.total_m;
> }
>@@ -1068,10 +1068,10 @@
> CTAssert(Domain::unitStride == 1);
> int f0 = domain[0].first();
> int f1 = domain[1].first();
>- int e0 = domain[0].length();
>- int e1 = domain[1].length();
>- for (int i1 = f1; i1<e1; ++i1)
>- for (int i0 = f0; i0<e0; ++i0)
>+ int e0 = domain[0].last();
>+ int e1 = domain[1].last();
>+ for (int i1 = f1; i1<=e1; ++i1)
>+ for (int i0 = f0; i0<=e0; ++i0)
> op(engine(i0,i1));
> return op.total_m;
> }
>@@ -1084,12 +1084,12 @@
> int f0 = domain[0].first();
> int f1 = domain[1].first();
> int f2 = domain[2].first();
>- int e0 = domain[0].length();
>- int e1 = domain[1].length();
>- int e2 = domain[2].length();
>- for (int i2 = f2; i2<e2; ++i2)
>- for (int i1 = f1; i1<e1; ++i1)
>- for (int i0 = f0; i0<e0; ++i0)
>+ int e0 = domain[0].last();
>+ int e1 = domain[1].last();
>+ int e2 = domain[2].last();
>+ for (int i2 = f2; i2<=e2; ++i2)
>+ for (int i1 = f1; i1<=e1; ++i1)
>+ for (int i0 = f0; i0<=e0; ++i0)
> op(engine(i0,i1,i2));
> return op.total_m;
> }
>@@ -1103,14 +1103,14 @@
> int f1 = domain[1].first();
> int f2 = domain[2].first();
> int f3 = domain[3].first();
>- int e0 = domain[0].length();
>- int e1 = domain[1].length();
>- int e2 = domain[2].length();
>- int e3 = domain[3].length();
>- for (int i3 = f3; i3<e3; ++i3)
>- for (int i2 = f2; i2<e2; ++i2)
>- for (int i1 = f1; i1<e1; ++i1)
>- for (int i0 = f0; i0<e0; ++i0)
>+ int e0 = domain[0].last();
>+ int e1 = domain[1].last();
>+ int e2 = domain[2].last();
>+ int e3 = domain[3].last();
>+ for (int i3 = f3; i3<=e3; ++i3)
>+ for (int i2 = f2; i2<=e2; ++i2)
>+ for (int i1 = f1; i1<=e1; ++i1)
>+ for (int i0 = f0; i0<=e0; ++i0)
> op(engine(i0,i1,i2,i3));
> return op.total_m;
> }
>@@ -1125,16 +1125,16 @@
> int f2 = domain[2].first();
> int f3 = domain[3].first();
> int f4 = domain[4].first();
>- int e0 = domain[0].length();
>- int e1 = domain[1].length();
>- int e2 = domain[2].length();
>- int e3 = domain[3].length();
>- int e4 = domain[4].length();
>- for (int i4 = f4; i4<e4; ++i4)
>- for (int i3 = f3; i3<e3; ++i3)
>- for (int i2 = f2; i2<e2; ++i2)
>- for (int i1 = f1; i1<e1; ++i1)
>- for (int i0 = f0; i0<e0; ++i0)
>+ int e0 = domain[0].last();
>+ int e1 = domain[1].last();
>+ int e2 = domain[2].last();
>+ int e3 = domain[3].last();
>+ int e4 = domain[4].last();
>+ for (int i4 = f4; i4<=e4; ++i4)
>+ for (int i3 = f3; i3<=e3; ++i3)
>+ for (int i2 = f2; i2<=e2; ++i2)
>+ for (int i1 = f1; i1<=e1; ++i1)
>+ for (int i0 = f0; i0<=e0; ++i0)
> op(engine(i0,i1,i2,i3,i4));
> return op.total_m;
> }
>@@ -1150,18 +1150,18 @@
> int f3 = domain[3].first();
> int f4 = domain[4].first();
> int f5 = domain[5].first();
>- int e0 = domain[0].length();
>- int e1 = domain[1].length();
>- int e2 = domain[2].length();
>- int e3 = domain[3].length();
>- int e4 = domain[4].length();
>- int e5 = domain[5].length();
>- for (int i5 = f5; i5<e5; ++i5)
>- for (int i4 = f4; i4<e4; ++i4)
>- for (int i3 = f3; i3<e3; ++i3)
>- for (int i2 = f2; i2<e2; ++i2)
>- for (int i1 = f1; i1<e1; ++i1)
>- for (int i0 = f0; i0<e0; ++i0)
>+ int e0 = domain[0].last();
>+ int e1 = domain[1].last();
>+ int e2 = domain[2].last();
>+ int e3 = domain[3].last();
>+ int e4 = domain[4].last();
>+ int e5 = domain[5].last();
>+ for (int i5 = f5; i5<=e5; ++i5)
>+ for (int i4 = f4; i4<=e4; ++i4)
>+ for (int i3 = f3; i3<=e3; ++i3)
>+ for (int i2 = f2; i2<=e2; ++i2)
>+ for (int i1 = f1; i1<=e1; ++i1)
>+ for (int i0 = f0; i0<=e0; ++i0)
> op(engine(i0,i1,i2,i3,i4,i5));
> return op.total_m;
> }
>@@ -1178,20 +1178,20 @@
> int f4 = domain[4].first();
> int f5 = domain[5].first();
> int f6 = domain[6].first();
>- int e0 = domain[0].length();
>- int e1 = domain[1].length();
>- int e2 = domain[2].length();
>- int e3 = domain[3].length();
>- int e4 = domain[4].length();
>- int e5 = domain[5].length();
>- int e6 = domain[6].length();
>- for (int i6 = f6; i6<e6; ++i6)
>- for (int i5 = f5; i5<e5; ++i5)
>- for (int i4 = f4; i4<e4; ++i4)
>- for (int i3 = f3; i3<e3; ++i3)
>- for (int i2 = f2; i2<e2; ++i2)
>- for (int i1 = f1; i1<e1; ++i1)
>- for (int i0 = f0; i0<e0; ++i0)
>+ int e0 = domain[0].last();
>+ int e1 = domain[1].last();
>+ int e2 = domain[2].last();
>+ int e3 = domain[3].last();
>+ int e4 = domain[4].last();
>+ int e5 = domain[5].last();
>+ int e6 = domain[6].last();
>+ for (int i6 = f6; i6<=e6; ++i6)
>+ for (int i5 = f5; i5<=e5; ++i5)
>+ for (int i4 = f4; i4<=e4; ++i4)
>+ for (int i3 = f3; i3<=e3; ++i3)
>+ for (int i2 = f2; i2<=e2; ++i2)
>+ for (int i1 = f1; i1<=e1; ++i1)
>+ for (int i0 = f0; i0<=e0; ++i0)
> op(engine(i0,i1,i2,i3,i4,i5,i6));
> return op.total_m;
> }
>Index: Functions/PackUnpack.h
>===================================================================
>RCS file: /home/pooma/Repository/r2/src/Functions/PackUnpack.h,v
>retrieving revision 1.5
>diff -u -u -r1.5 PackUnpack.h
>--- Functions/PackUnpack.h 25 Oct 2003 12:06:55 -0000 1.5
>+++ Functions/PackUnpack.h 24 Aug 2004 14:23:53 -0000
>@@ -59,6 +59,7 @@
> //-----------------------------------------------------------------------------
>
> #include "Utilities/RefCountedBlockPtr.h"
>+#include "Engine/RemoteEngine.h"
> #include "Pooma/Pooma.h"
>
> //-----------------------------------------------------------------------------
>@@ -93,38 +94,19 @@
> {
> typedef typename InputField::Element_t Element_t;
>
>- PackLocalPatches(const InputField &field,
>- RefCountedBlockPtr<Element_t> block)
>- : field_m(field), block_m(block)
>+ PackLocalPatches(RefCountedBlockPtr<Element_t> block)
>+ : block_m(block)
> {
> }
>
>- void operator()(int i0) const
>+ inline void operator()(const Element_t &t)
> {
>- *block_m = field_m.read(i0);
>+ *block_m = t;
> ++block_m;
> }
>
>- void operator()(int i0, int i1) const
>- {
>- *block_m = field_m.read(i0, i1);
>- ++block_m;
>- }
>-
>- void operator()(int i0, int i1, int i2) const
>- {
>- *block_m = field_m.read(i0, i1, i2);
>- ++block_m;
>- }
>-
>- void operator()(int i0, int i1, int i2, int i3) const
>- {
>- *block_m = field_m.read(i0, i1, i2, i3);
>- ++block_m;
>- }
>-
>- InputField field_m;
>- mutable RefCountedBlockPtr<Element_t> block_m;
>+ RefCountedBlockPtr<Element_t> block_m;
>+ int total_m;
> };
>
> template<class InputField>
>@@ -149,8 +131,8 @@
> {
> typedef typename Patch<InputField>::Type_t PatchField_t;
> PatchField_t patch = field.patchLocal(i);
>- PackLocalPatches<PatchField_t> packFunctor(patch, current);
>- LoopApplyEvaluator::evaluate(packFunctor, patch.domain());
>+ PackLocalPatches<PatchField_t> packFunctor(current);
>+ EngineBlockSerialize::apply(packFunctor, patch, patch.domain());
> current += patch.domain().size();
> }
>
>@@ -168,44 +150,27 @@
> {
> typedef typename InputField::Element_t Element_t;
>
>- UnPackLocalPatches(const InputField &field,
>- RefCountedBlockPtr<Element_t> block)
>- : field_m(field), block_m(block)
>- {
>- }
>-
>- void operator()(int i0) const
>+ UnPackLocalPatches(RefCountedBlockPtr<Element_t> block)
>+ : block_m(block)
> {
>- field_m(i0) = *block_m;
>- ++block_m;
>- }
>-
>- void operator()(int i0, int i1) const
>- {
>- field_m(i0, i1) = *block_m;
>- ++block_m;
> }
>
>- void operator()(int i0, int i1, int i2) const
>+ inline void operator()(Element_t &t)
> {
>- field_m(i0, i1, i2) = *block_m;
>+ t = *block_m;
> ++block_m;
> }
>
>- void operator()(int i0, int i1, int i2, int i3) const
>- {
>- field_m(i0, i1, i2, i3) = *block_m;
>- ++block_m;
>- }
>-
>- InputField field_m;
>- mutable RefCountedBlockPtr<Element_t> block_m;
>+ RefCountedBlockPtr<Element_t> block_m;
>+ int total_m;
> };
>
> template<class InputField, class T>
> void
> unpack(const InputField &field, RefCountedBlockPtr<T> block)
> {
>+ Pooma::blockAndEvaluate();
>+
> int i;
>
> RefCountedBlockPtr<T> current = block;
>@@ -214,8 +179,8 @@
> {
> typedef typename Patch<InputField>::Type_t PatchField_t;
> PatchField_t patch = field.patchLocal(i);
>- UnPackLocalPatches<PatchField_t> unpackFunctor(patch, current);
>- LoopApplyEvaluator::evaluate(unpackFunctor, patch.physicalDomain());
>+ UnPackLocalPatches<PatchField_t> unpackFunctor(current);
>+ EngineBlockSerialize::apply(unpackFunctor, patch, patch.physicalDomain());
> current += patch.physicalDomain().size();
> }
> }
>
>
--
Jeffrey D. Oldham
oldham at codesourcery.com
More information about the pooma-dev
mailing list