[PATCH] Optimize guard update copy
Richard Guenther
rguenth at tat.physik.uni-tuebingen.de
Tue Dec 30 20:47:27 UTC 2003
Hi!
This patch removes number four of the copies done for guard update.
Basically, additionally to the three copies I mentioned in the previous
mail, we're doing one extra during the RemoteView expressionApply of the
data-parallel assignment we're doing for the guard domains. Ugh. Fixed by
manually sending/receiving from/to the views. Doesn't work for Cheetah,
so conditionalized on POOMA_MPI.
Tested as usual, ok to apply?
Richard.
2003Dec30 Richard Guenther <richard.guenther at uni-tuebingen.de>
* src/Engine/MultiPatchEngine.cpp: optimize remote to local and
local to remote copy in guard update.
===== MultiPatchEngine.cpp 1.6 vs 1.7 =====
--- 1.6/r2/src/Engine/MultiPatchEngine.cpp Tue Dec 9 12:16:07 2003
+++ 1.7/r2/src/Engine/MultiPatchEngine.cpp Thu Dec 18 16:41:50 2003
@@ -34,6 +34,7 @@
#include "Engine/CompressedFraction.h"
#include "Array/Array.h"
#include "Tulip/ReduceOverContexts.h"
+#include "Tulip/SendReceive.h"
#include "Threads/PoomaCSem.h"
#include "Domain/IteratorPairDomain.h"
@@ -261,6 +262,40 @@
//
//-----------------------------------------------------------------------------
+/// Guard layer assign between non-remote engines, just use the
+/// ET mechanisms
+
+template <int Dim, class T, class Tag>
+static inline
+void simpleAssign(const Array<Dim, T, Tag>& lhs,
+ const Array<Dim, T, Tag>& rhs,
+ const Interval<Dim>& domain)
+{
+ lhs(domain) = rhs(domain);
+}
+
+/// Guard layer assign between remote engines, use Send/Receive directly
+/// to avoid one extra copy of the data.
+
+template <int Dim, class T, class Tag>
+static inline
+void simpleAssign(const Array<Dim, T, Remote<Tag> >& lhs,
+ const Array<Dim, T, Remote<Tag> >& rhs,
+ const Interval<Dim>& domain)
+{
+ if (lhs.engine().owningContext() == rhs.engine().owningContext())
+ lhs(domain) = rhs(domain);
+ else {
+ typedef typename NewEngine<Engine<Dim, T, Tag>, Interval<Dim> >::Type_t ViewEngine_t;
+ if (lhs.engine().engineIsLocal())
+ Receive<ViewEngine_t>::receive(ViewEngine_t(lhs.engine().localEngine(), domain),
+ rhs.engine().owningContext());
+ else if (rhs.engine().engineIsLocal())
+ SendReceive::send(ViewEngine_t(rhs.engine().localEngine(), domain),
+ lhs.engine().owningContext());
+ }
+}
+
template <int Dim, class T, class LayoutTag, class PatchTag>
void Engine<Dim, T, MultiPatch<LayoutTag,PatchTag> >::
fillGuardsHandler(const WrappedInt<true> &) const
@@ -293,8 +328,12 @@
Array<Dim, T, PatchTag> lhs(data()[dest]), rhs(data()[src]);
// Now do assignment from the subdomains.
-
+ // Optimized lhs(p->domain_m) = rhs(p->domain_m);
+#if POOMA_MPI
+ simpleAssign(lhs, rhs, p->domain_m);
+#else
lhs(p->domain_m) = rhs(p->domain_m);
+#endif
++p;
}
More information about the pooma-dev
mailing list