[PATCH] Fix deadlocks in MPI reduction evaluators

Jeffrey D. Oldham oldham at codesourcery.com
Tue Jan 13 19:06:55 UTC 2004


Richard Guenther wrote:
> Hi!
> 
> The following patch is necessary to avoid deadlocks with the MPI
> implementation and multi-patch setups where one context does not
> participate in the reduction.
> 
> Fixes failure of array_test_.. - I don't remember - with MPI.
> 
> Basically the scenario is that the collective synchronous MPI_Gather is
> called from ReduceOverContexts<> on the non-participating (and thus
> not receiving) contexts while the SendIterates are still in the
> schedulers queue.  The calculation participating contexts will wait for
> the ReceiveIterates and patch reductions to complete using the CSem
> forever then.
> 
> So the fix is to make the not participating contexts wait on the CSem,
> too, by using a fake write iterate queued after the send iterates which
> will trigger as soon as the send iterates complete.

Instead of adding fake write iterate can we adjust the MPI_Gather so 
non-participating contexts do not participate?

> Tested using MPI, Cheetah and serial some time ago.
> 
> Ok?
> 
> Richard.
> 
> 
> 2004Jan08  Richard Guenther <richard.guenther at uni-tuebingen.de>
> 
> 	* src/Engine/RemoteEngine.h: use a waiting iterate to wait for
> 	reduction completion in remote single and multi-patch reduction
> 	evaluator.
> 	Do begin/endGeneration at the toplevel evaluate.
> 	src/Evaluator/Reduction.h: do begin/endGeneration at the toplevel
> 	evaluate.
> 
> --- src/Engine/RemoteEngine.h	2004-01-02 12:57:48.000000000 +0100
> +++ /home/richard/src/pooma/pooma-mpi3/r2/src/Engine/RemoteEngine.h	2004-01-08 23:00:40.000000000 +0100
> @@ -1954,6 +1962,29 @@
>    }
>  };
> 
> +
> +template <class Expr>
> +struct WaitingIterate : public Pooma::Iterate_t {
> +  WaitingIterate(const Expr& e, Pooma::CountingSemaphore& csem)
> +    : Pooma::Iterate_t(Pooma::scheduler()),
> +      e_m(e), csem_m(csem)
> +  {
> +    DataObjectRequest<WriteRequest> writeReq(*this);
> +    engineFunctor(e_m, writeReq);
> +  }
> +  virtual void run()
> +  {
> +    csem_m.incr();
> +  }
> +  virtual ~WaitingIterate()
> +  {
> +    DataObjectRequest<WriteRelease> writeRel;
> +    engineFunctor(e_m, writeRel);
> +  }
> +  Expr e_m;
> +  Pooma::CountingSemaphore& csem_m;
> +};
> +
>  //-----------------------------------------------------------------------------
>  // Single-patch Reductions involving remote engines:
>  //
> @@ -1998,12 +2029,11 @@
>      Pooma::CountingSemaphore csem;
>      csem.height(1);
> 
> -    Pooma::scheduler().beginGeneration();
> -
>      if (Pooma::context() != computationContext)
>      {
>        expressionApply(e, RemoteSend(computationContext));
> -      csem.incr();
> +      Pooma::Iterate_t *it = new WaitingIterate<Expr>(e, csem);
> +      Pooma::scheduler().handOff(it);
>      }
>      else
>      {
> @@ -2013,8 +2043,7 @@
>  		 forEach(e, view, TreeCombine()), csem);
>      }
> 
> -    Pooma::scheduler().endGeneration();
> -
> +    // Wait for RemoteSend or Reduction to complete.
>      csem.wait();
> 
>      RemoteProxy<T> globalRet(ret, computationContext);
> @@ -2102,8 +2131,6 @@
>      csem.height(n);
>      T *vals = new T[n];
> 
> -    Pooma::scheduler().beginGeneration();
> -
>      i = inter.begin();
>      k = 0;
>      for (j = 0; j < inter.size(); j++)
> @@ -2129,13 +2156,19 @@
>  	    else
>  	    {
>  	      expressionApply(e(*i), RemoteSend(computationalContext[j]));
> +              // One extra RemoteSend to wait for. Maybe we can combine these
> +              // iterates, but maybe not. Play safe for now.
> +	      csem.raise_height(1);
> +	      Pooma::Iterate_t *it = new WaitingIterate
> +		<typename View1<Expr, INode<Expr::dimensions> >::Type_t>(e(*i), csem);
> +	      Pooma::scheduler().handOff(it);
>  	    }
>  	  }
> 
>  	++i;
>        }
> 
> -    Pooma::scheduler().endGeneration();
> +    // Wait for RemoteSends and Reductions to complete.
>      csem.wait();
> 
>      if (n > 0)
> --- src/Evaluator/Reduction.h	2003-11-21 22:30:38.000000000 +0100
> +++ /home/richard/src/pooma/pooma-mpi3/r2/src/Evaluator/Reduction.h	2004-01-02 00:40:14.000000000 +0100
> @@ -128,10 +128,15 @@
>    void evaluate(T &ret, const Op &op, const Expr &e) const
>    {
>      typedef typename EvaluatorTag1<Expr>::Evaluator_t Evaluator_t;
> +
> +    Pooma::scheduler().beginGeneration();
> +
>      PAssert(checkValidity(e, WrappedInt<Expr::hasRelations>()));
>      forEach(e, PerformUpdateTag(), NullCombine());
>      Reduction<Evaluator_t>().evaluate(ret, op, e());
> 
> +    Pooma::scheduler().endGeneration();
> +
>      POOMA_INCREMENT_STATISTIC(NumReductions)
>    }
>  };
> @@ -184,12 +189,8 @@
>      Pooma::CountingSemaphore csem;
>      csem.height(1);
> 
> -    Pooma::scheduler().beginGeneration();
> -
>      evaluate(ret, op, e, csem);
> 
> -    Pooma::scheduler().endGeneration();
> -
>      csem.wait();
>    }
>  };
> @@ -237,12 +238,10 @@
> 
>      expressionApply(e, IntersectorTag<Inter_t>(inter));
> 
> -    const int n = std::distance(inter.begin(), inter.end());
> +    const int n = inter.size();
>      Pooma::CountingSemaphore csem;
>      csem.height(n);
>      T *vals = new T[n];
> -
> -    Pooma::scheduler().beginGeneration();
> 
>      typename Inter_t::const_iterator i = inter.begin();
>      int j = 0;
> @@ -253,8 +252,6 @@
>          ++i; ++j;
>        }
> 
> -    Pooma::scheduler().endGeneration();
> -
>      csem.wait();
> 
>      ret = vals[0];


-- 
Jeffrey D. Oldham
oldham at codesourcery.com




More information about the pooma-dev mailing list