[PATCH] Reduction cleanup (2nd try)
Richard Guenther
rguenth at tat.physik.uni-tuebingen.de
Sun Oct 26 20:34:22 UTC 2003
Hi!
The following patch cleans up the reduction evaluator by introducing
a ReductionTraits<Op, T> class to specify a neutral element for the
reduction operation. This enables "trivial" OpenMP parallelization for
reductions (patch to come). It also simplifies the loop structure of
the reductions and thus exposes more optimization abilities to the
compiler. This second approach does not inhibit use of arbitrary complex
types, but only requires appropriate specialization of the
ReductionTraits. In question is only the default specialization for
OpBitwiseOrAssign and OpBitwiseAndAssign which constructs from 0ULL and
~0ULL - any better ideas?
Reductions tested on ppc, full test tomorrow.
Ok?
Richard.
2003Oct26 Richard Guenther <richard.guenther at uni-tuebingen.de>
* src/Evaluator/ReductionEvaluator.h: simplify reduction loops
by introducing ReductionTraits<Op, T> class.
===== ReductionEvaluator.h 1.1 vs edited =====
--- 1.1/r2/src/Evaluator/ReductionEvaluator.h Mon May 13 17:47:34 2002
+++ edited/ReductionEvaluator.h Sun Oct 26 21:22:58 2003
@@ -63,6 +63,58 @@
template<class KernelTag>
struct ReductionEvaluator;
+/**
+ * Traits class defining neutral element for type T under
+ * operation Op. Needs to be specialized for Op and possibly T.
+ */
+
+template<class Op, class T>
+struct ReductionTraits {
+};
+
+template<class T>
+struct ReductionTraits<OpAddAssign, T> {
+ static inline T neutral() { return static_cast<T>(0); }
+};
+
+template<class T>
+struct ReductionTraits<OpMultiplyAssign, T> {
+ static inline T neutral() { return static_cast<T>(1); }
+};
+
+template<class T>
+struct ReductionTraits<FnMinAssign, T> {
+ static inline T neutral() { return std::numeric_limits<T>::max(); }
+};
+
+template<class T>
+struct ReductionTraits<FnMaxAssign, T> {
+ static inline T neutral() { return std::numeric_limits<T>::min(); }
+};
+
+template<class T>
+struct ReductionTraits<FnOrAssign, T> {
+ static inline T neutral() { return static_cast<T>(false); }
+};
+
+template<class T>
+struct ReductionTraits<FnAndAssign, T> {
+ static inline T neutral() { return static_cast<T>(true); }
+};
+
+template<class T>
+struct ReductionTraits<OpBitwiseOrAssign, T> {
+ static inline T neutral() { return static_cast<T>(0ULL); }
+};
+
+template<class T>
+struct ReductionTraits<OpBitwiseAndAssign, T> {
+ static inline T neutral() { return static_cast<T>(~0ULL); }
+};
+
+
+
+
//-----------------------------------------------------------------------------
// The point of this class is to input an expression with the
// 'evaluate' member function and reduce it by looping over the
@@ -127,8 +179,8 @@
Expr localExpr(e);
int e0 = domain[0].length();
- T answer(localExpr.read(0));
- for (int i0 = 1; i0 < e0; ++i0)
+ T answer = ReductionTraits<Op, T>::neutral();
+ for (int i0 = 0; i0 < e0; ++i0)
op(answer, localExpr.read(i0));
ret = answer;
@@ -145,22 +197,10 @@
int e0 = domain[0].length();
int e1 = domain[1].length();
- int i00;
- bool firstLoop = true;
-
- T answer(localExpr.read(0, 0));
+ T answer = ReductionTraits<Op, T>::neutral();
for (int i1 = 0; i1 < e1; ++i1)
- {
- if (firstLoop)
- {
- firstLoop = false;
- i00 = 1;
- }
- else
- i00 = 0;
- for (int i0 = i00; i0 < e0; ++i0)
- op(answer, localExpr.read(i0, i1));
- }
+ for (int i0 = 0; i0 < e0; ++i0)
+ op(answer, localExpr.read(i0, i1));
ret = answer;
}
@@ -177,24 +217,12 @@
int e0 = domain[0].length();
int e1 = domain[1].length();
int e2 = domain[2].length();
-
- int i00;
- bool firstLoop = true;
- T answer(localExpr.read(0, 0, 0));
+ T answer = ReductionTraits<Op, T>::neutral();
for (int i2 = 0; i2 < e2; ++i2)
for (int i1 = 0; i1 < e1; ++i1)
- {
- if (firstLoop)
- {
- firstLoop = false;
- i00 = 1;
- }
- else
- i00 = 0;
- for (int i0 = i00; i0 < e0; ++i0)
- op(answer, localExpr.read(i0, i1, i2));
- }
+ for (int i0 = 0; i0 < e0; ++i0)
+ op(answer, localExpr.read(i0, i1, i2));
ret = answer;
}
@@ -213,25 +241,13 @@
int e1 = domain[1].length();
int e2 = domain[2].length();
int e3 = domain[3].length();
-
- int i00;
- bool firstLoop = true;
- T answer(localExpr.read(0, 0, 0, 0));
+ T answer = ReductionTraits<Op, T>::neutral();
for (int i3 = 0; i3 < e3; ++i3)
for (int i2 = 0; i2 < e2; ++i2)
for (int i1 = 0; i1 < e1; ++i1)
- {
- if (firstLoop)
- {
- firstLoop = false;
- i00 = 1;
- }
- else
- i00 = 0;
- for (int i0 = i00; i0 < e0; ++i0)
- op(answer, localExpr.read(i0, i1, i2, i3));
- }
+ for (int i0 = 0; i0 < e0; ++i0)
+ op(answer, localExpr.read(i0, i1, i2, i3));
ret = answer;
}
@@ -252,26 +268,14 @@
int e2 = domain[2].length();
int e3 = domain[3].length();
int e4 = domain[4].length();
-
- int i00;
- bool firstLoop = true;
- T answer(localExpr.read(0, 0, 0, 0, 0));
+ T answer = ReductionTraits<Op, T>::neutral();
for (int i4 = 0; i4 < e4; ++i4)
for (int i3 = 0; i3 < e3; ++i3)
for (int i2 = 0; i2 < e2; ++i2)
for (int i1 = 0; i1 < e1; ++i1)
- {
- if (firstLoop)
- {
- firstLoop = false;
- i00 = 1;
- }
- else
- i00 = 0;
- for (int i0 = i00; i0 < e0; ++i0)
- op(answer, localExpr.read(i0, i1, i2, i3, i4));
- }
+ for (int i0 = 0; i0 < e0; ++i0)
+ op(answer, localExpr.read(i0, i1, i2, i3, i4));
ret = answer;
}
@@ -294,27 +298,15 @@
int e3 = domain[3].length();
int e4 = domain[4].length();
int e5 = domain[5].length();
-
- int i00;
- bool firstLoop = true;
- T answer(localExpr.read(0, 0, 0, 0, 0, 0));
+ T answer = ReductionTraits<Op, T>::neutral();
for (int i5 = 0; i5 < e5; ++i5)
for (int i4 = 0; i4 < e4; ++i4)
for (int i3 = 0; i3 < e3; ++i3)
for (int i2 = 0; i2 < e2; ++i2)
for (int i1 = 0; i1 < e1; ++i1)
- {
- if (firstLoop)
- {
- firstLoop = false;
- i00 = 1;
- }
- else
- i00 = 0;
- for (int i0 = i00; i0 < e0; ++i0)
- op(answer, localExpr.read(i0, i1, i2, i3, i4, i5));
- }
+ for (int i0 = 0; i0 < e0; ++i0)
+ op(answer, localExpr.read(i0, i1, i2, i3, i4, i5));
ret = answer;
}
@@ -339,28 +331,16 @@
int e4 = domain[4].length();
int e5 = domain[5].length();
int e6 = domain[6].length();
-
- int i00;
- bool firstLoop = true;
- T answer(localExpr.read(0, 0, 0, 0, 0, 0, 0));
+ T answer = ReductionTraits<Op, T>::neutral();
for (int i6 = 0; i6 < e6; ++i6)
for (int i5 = 0; i5 < e5; ++i5)
for (int i4 = 0; i4 < e4; ++i4)
for (int i3 = 0; i3 < e3; ++i3)
for (int i2 = 0; i2 < e2; ++i2)
for (int i1 = 0; i1 < e1; ++i1)
- {
- if (firstLoop)
- {
- firstLoop = false;
- i00 = 1;
- }
- else
- i00 = 0;
- for (int i0 = i00; i0 < e0; ++i0)
- op(answer, localExpr.read(i0, i1, i2, i3, i4, i5, i6));
- }
+ for (int i0 = 0; i0 < e0; ++i0)
+ op(answer, localExpr.read(i0, i1, i2, i3, i4, i5, i6));
ret = answer;
}
More information about the pooma-dev
mailing list