Sierra Toolkit  Version of the Day
MPI.cpp
1 
10 #include <sstream>
11 
12 #include <stk_util/util/FeatureTest.hpp>
13 #include <stk_util/parallel/MPI.hpp>
14 
15 namespace sierra {
16 namespace MPI {
17 
18 template struct Loc<int>;
19 template struct Loc<double>;
20 template struct Loc<float>;
21 
22 MPI_Datatype
24 {
25  static MPI_Datatype s_mpi_double_complex;
26  static bool initialized = false;
27 
28  if (!initialized) {
29  initialized = true;
30 
31  MPI_Type_contiguous(2, MPI_DOUBLE, &s_mpi_double_complex);
32  MPI_Type_commit(&s_mpi_double_complex);
33  }
34  return s_mpi_double_complex;
35 }
36 
37 MPI_Datatype
39 {
40  static MPI_Datatype s_mpi_float_complex;
41  static bool initialized = false;
42 
43  if (!initialized) {
44  initialized = true;
45 
46  MPI_Type_contiguous(2, MPI_FLOAT, &s_mpi_float_complex);
47  MPI_Type_commit(&s_mpi_float_complex);
48  }
49  return s_mpi_float_complex;
50 }
51 
52 
53 // #ifdef MPI_LONG_LONG_INT
54 // MPI_Datatype
55 // long_long_int_int_type()
56 // {
57 // static MPI_Datatype s_mpi_long_long_int_int;
58 // static bool initialized = false;
59 
60 // int B[] = {2, 1};
61 // MPI_Aint D[] = {0, 8};
62 // MPI_Datatype T[] = {MPI_LONG_LONG_INT, MPI_INT};
63 
64 // if (!initialized) {
65 // initialized = true;
66 
67 // MPI_Type_struct(2, B, D, T, &s_mpi_long_long_int_int);
68 // MPI_Type_commit(&s_mpi_long_long_int_int);
69 // }
70 // return s_mpi_long_long_int_int;
71 // }
72 // #endif
73 
74 
75 MPI_Datatype
77 {
78  static MPI_Datatype s_mpi_double_double_int;
79  static bool initialized = false;
80 
81  int B[] = {2, 1};
82  MPI_Aint D[] = {0, 16};
83  MPI_Datatype T[] = {MPI_DOUBLE, MPI_INT};
84 
85 
86  if (!initialized) {
87  initialized = true;
88 
89  MPI_Type_struct(2, B, D, T, &s_mpi_double_double_int);
90  MPI_Type_commit(&s_mpi_double_double_int);
91  }
92  return s_mpi_double_double_int;
93 }
94 
95 
96 namespace {
97 
98 extern "C" {
99  void
100  mpi_double_complex_sum(
101  void * invec,
102  void * inoutvec,
103  int * len,
104  MPI_Datatype * datatype)
105  {
106  std::complex<double> *complex_in = static_cast<std::complex<double> *>(invec);
107  std::complex<double> *complex_inout = static_cast<std::complex<double> *>(inoutvec);
108 
109  for (int i = 0; i < *len; ++i)
110  complex_inout[i] += complex_in[i];
111  }
112 } // extern "C"
113 
114 } // namespace <unnamed>
115 
116 
117 MPI_Op
119 {
120  static MPI_Op s_mpi_double_complex_sum;
121  static bool initialized = false;
122 
123  if (!initialized) {
124  initialized = true;
125 
126  MPI_Op_create(mpi_double_complex_sum, true, &s_mpi_double_complex_sum);
127  }
128  return s_mpi_double_complex_sum;
129 }
130 
131 namespace {
132 
133 const MPI::ReduceSet *s_currentReduceSet = 0;
134 
135 extern "C" {
136  typedef void (*ParallelReduceOp)
137  (void * inv, void * outv, int *, MPI_Datatype *);
138 }
139 
140 
141 void
142 all_reduce(
143  MPI_Comm arg_comm,
144  ParallelReduceOp arg_op,
145  void * arg_in,
146  void * arg_out,
147  unsigned arg_len)
148 {
149  MPI_Op mpi_op = MPI_OP_NULL ;
150 
151  MPI_Op_create(arg_op, 0, & mpi_op);
152 
153  // The SUN was buggy when combinng an
154  // MPI_Allreduce with a user defined operator,
155  // use reduce/broadcast instead.
156 
157 #ifdef SIERRA_MPI_ALLREDUCE_USER_FUNCTION_BUG
158  const int result_reduce = MPI_Reduce(arg_in,arg_out,arg_len,MPI_BYTE,mpi_op,0,arg_comm);
159  const int result_bcast = MPI_Bcast(arg_out,arg_len,MPI_BYTE,0,arg_comm);
160 
161  MPI_Op_free(& mpi_op);
162 
163  if (MPI_SUCCESS != result_reduce || MPI_SUCCESS != result_bcast) {
164  std::ostringstream msg ;
165  msg << "sierra::MPI::all_reduce FAILED: MPI_Reduce = " << result_reduce
166  << " MPI_Bcast = " << result_bcast ;
167  throw std::runtime_error(msg.str());
168  }
169 #else
170  const int result = MPI_Allreduce(arg_in,arg_out,arg_len,MPI_BYTE,mpi_op,arg_comm);
171 
172  MPI_Op_free(& mpi_op);
173 
174  if (MPI_SUCCESS != result) {
175  std::ostringstream msg ;
176  msg << "sierra::MPI::all_reduce FAILED: MPI_Allreduce = " << result;
177  throw std::runtime_error(msg.str());
178  }
179 #endif
180 }
181 
182 struct ReduceCheck : public ReduceInterface
183 {
184  ReduceCheck()
185  {}
186 
187  void setSize(unsigned size) {
188  m_size = size;
189  }
190 
191  virtual void size(void *&inbuf) const {
192  unsigned *t = align_cast<unsigned>(inbuf);
193  t += sizeof(unsigned);
194  inbuf = t;
195  }
196 
197  virtual void copyin(void *&inbuf) const {
198  unsigned *t = align_cast<unsigned>(inbuf);
199  *t++ = m_size;
200  inbuf = t;
201  }
202 
203  virtual void copyout(void *&outbuf) const {
204  unsigned *t = align_cast<unsigned>(outbuf);
205 
206  unsigned size = *t++;
207  if (m_size != size)
208  throw std::runtime_error("size mismatch");
209 
210  outbuf = t;
211  }
212 
213  virtual void op(void *&inbuf, void *&outbuf) const {
214  unsigned *tin = align_cast<unsigned>(inbuf);
215  unsigned *tout = align_cast<unsigned>(outbuf);
216 
217  *tout = std::min(*tout, *tin);
218 
219  inbuf = ++tin;
220  outbuf = ++tout;
221  }
222 
223 private:
224  unsigned m_size;
225 };
226 
227 } // namespace <unnamed>
228 
229 
230 ReduceSet::ReduceSet()
231 {
232  add(new ReduceCheck);
233 }
234 
235 
236 ReduceSet::~ReduceSet()
237 {
238  for (ReduceVector::const_iterator it = m_reduceVector.begin(); it != m_reduceVector.end(); ++it)
239  delete (*it);
240 }
241 
242 
243 size_t
244 ReduceSet::size() const {
245  void *buffer_end = 0;
246 
247  for (ReduceVector::const_iterator it = m_reduceVector.begin(); it != m_reduceVector.end(); ++it)
248  (*it)->size(buffer_end);
249 
250  ReduceCheck *reduce_check = static_cast<ReduceCheck *>(m_reduceVector.front());
251  reduce_check->setSize(reinterpret_cast<char *>(buffer_end) - (char *) 0);
252 
253  return reinterpret_cast<char *>(buffer_end) - (char *) 0;
254 }
255 
256 void
257 ReduceSet::copyin(void * const buffer_in) const {
258  void *inbuf = buffer_in;
259 
260  for (ReduceVector::const_iterator it = m_reduceVector.begin(); it != m_reduceVector.end(); ++it)
261  (*it)->copyin(inbuf);
262 }
263 
264 void
265 ReduceSet::copyout(void * const buffer_out) const {
266  void *outbuf = buffer_out;
267 
268  for (ReduceVector::const_iterator it = m_reduceVector.begin(); it != m_reduceVector.end(); ++it)
269  (*it)->copyout(outbuf);
270 }
271 
272 void
273 ReduceSet::op(void * const buffer_in, void * const buffer_out) const {
274  void *inbuf = buffer_in;
275  void *outbuf = buffer_out;
276 
277  for (ReduceVector::const_iterator it = m_reduceVector.begin(); it != m_reduceVector.end(); ++it)
278  (*it)->op(inbuf, outbuf);
279 }
280 
281 void ReduceSet::void_op(void * inv, void * outv, int *, MPI_Datatype *) {
282  s_currentReduceSet->op(inv, outv);
283 }
284 
285 
286 void
287 ReduceSet::add(
288  ReduceInterface * reduce_interface)
289 {
290  m_reduceVector.push_back(reduce_interface);
291 }
292 
293 
294 void
296  MPI_Comm comm,
297  const ReduceSet & reduce_set)
298 {
299  size_t size = reduce_set.size();
300 
301  if (size) {
302  char *input_buffer = new char[size];
303  char *output_buffer = new char[size];
304  void *inbuf = (void *) input_buffer;
305  void *outbuf = (void *) output_buffer;
306 
307  s_currentReduceSet = &reduce_set;
308 
309  ParallelReduceOp f = reinterpret_cast<ParallelReduceOp>(& ReduceSet::void_op);
310 
311  reduce_set.copyin(inbuf);
312  all_reduce(comm, f, inbuf, outbuf, size);
313  reduce_set.copyout(outbuf);
314  delete [] output_buffer;
315  delete [] input_buffer;
316  }
317 }
318 
319 
320 } // namespace MPI
321 } // namespace sierra
Definition: Env.cpp:53
void AllReduce(MPI_Comm comm, const ReduceSet &reduce_set)
Member function AllReduce ...
Definition: MPI.cpp:295
MPI_Datatype float_complex_type()
Function float_complex_type returns an MPI complex data type for C++.
Definition: MPI.cpp:38
MPI_Datatype double_double_int_type()
Member function double_double_int_type ...
Definition: MPI.cpp:76
std::ostream & tout()
Regression test textual output stream.
Definition: OutputLog.cpp:682
MPI_Datatype double_complex_type()
Function double_complex_type returns an MPI complex data type for C++.
Definition: MPI.cpp:23
T * align_cast(void *p)
Function align_cast returns a pointer that has been aligned to the specified alignment or double if t...
Definition: MPI.hpp:451
void all_reduce(ParallelMachine, const ReduceOp &)
MPI_Op double_complex_sum_op()
Function double_complex_sum_op returns a sum operation for the C++ complex MPI data type...
Definition: MPI.cpp:118
Class ReduceSet ...
Definition: MPI.hpp:625