ExaDG
Loading...
Searching...
No Matches
permutation.h
1/* ______________________________________________________________________
2 *
3 * ExaDG - High-Order Discontinuous Galerkin for the Exa-Scale
4 *
5 * Copyright (C) 2021 by the ExaDG authors
6 *
7 * This program is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with this program. If not, see <https://www.gnu.org/licenses/>.
19 * ______________________________________________________________________
20 */
21
22#ifndef DEAL_SPECTRUM_PERMUTATION
23#define DEAL_SPECTRUM_PERMUTATION
24
25// std
26#include <math.h>
27#include <mpi.h>
28#include <stdio.h>
29#include <stdlib.h>
30#include <time.h>
31#include <map>
32#include <numeric>
33#include <vector>
34
35// ExaDG
36#include <exadg/postprocessor/spectral_analysis/setup.h>
37
38namespace dealspectrum
39{
40// data structures needed for the following comparator for index sorting
41unsigned long int * array_list; // position in local array ...
42int * array_proc; // element resides on process ...
43int __rank; // rank of this process
44
55int
56cmp(const void * a, const void * b)
57{
58 int ia = *(int *)a;
59 int ib = *(int *)b;
60 return array_proc[ia] == array_proc[ib] ?
61 // same process
62 array_list[ia] < array_list[ib] ? -1 : array_list[ia] > array_list[ib] :
63 // different processes
64 array_proc[ia] == __rank ? -1 :
65 array_proc[ib] == __rank ? +1 :
66 array_proc[ia] < array_proc[ib] ? -1 :
67 array_proc[ia] > array_proc[ib];
68}
69
81{
82 MPI_Comm const comm;
83 // reference to DEAL.SPECTRUM setup
84 Setup & s;
85 // is initialized?
86 bool initialized;
87
88public:
93 Permutator(MPI_Comm const & comm, Setup & s) : comm(comm), s(s), initialized(false)
94 {
95 }
96
103 template<typename MAPPING, typename FFTW>
104 void
105 init(MAPPING & MAP, FFTW & FFT)
106 {
107 // check if already initialized
108 if(this->initialized)
109 return;
110 this->initialized = true;
111
112 // help variables...
113 dim = s.dim;
114 int rank = s.rank;
115 unsigned long int points = s.points_dst;
116 int n = s.cells;
117
118 int start, end, start_, end_;
119 // ... my range of the space filling curve
120 MAP.getLocalRange(start, end);
121 // ... my rows for FFT
122 FFT.getLocalRange(start_, end_);
123 bsize = FFT.bsize;
124
125 // data structures for determining the communication partners...
126 int has_length = (end - start) * dealii::Utilities::pow(points, dim);
127 unsigned long int * has = new unsigned long int[has_length];
128 int * has_procs = new int[has_length];
129 send_buffer = new double[has_length * dim];
130 send_index = new int[has_length];
131
132 int want_length = (end_ - start_) * dealii::Utilities::pow(n * points, dim - 1);
133 unsigned long int * want = new unsigned long int[want_length];
134 int * want_procs = new int[want_length];
135 recv_buffer = new double[want_length * dim];
136 recv_index = new int[want_length];
137
138 // S.1: determine all dofs this process posses (+procs)
139 for(int ii = start, counter = 0; ii < end; ii++)
140 {
141 int i = MAP.indices(ii);
142 for(unsigned long int K = 0; K < (dim == 3 ? points : 1); K++)
143 for(unsigned long int J = 0; J < points; J++)
144 for(unsigned long int I = 0; I < points; I++, counter++)
145 {
146 // ... determine dof
147 unsigned long int temp =
148 ((dim == 3 ? ((MAP.lbf(i * dim + 2) * points + K) * points * n) : 0) +
149 MAP.lbf(i * dim + 1) * points + J) *
150 points * n +
151 (MAP.lbf(i * dim + 0) * points + I);
152 has[counter] = temp;
153 // ... which process does need this dof?
154 int proc = FFT.indices_proc_rows(temp / pow(n * points, dim - 1));
155 has_procs[counter] = proc;
156 }
157 }
158
159
160 // S.2: which processes needs how many dofs
161 std::map<int, int> has_map;
162 has_map[rank] = 0; // add this rank
163 for(int counter = 0; counter < has_length; counter++)
164 {
165 int key = has_procs[counter];
166 if(has_map.find(key) == has_map.end())
167 has_map[key] = 1;
168 else
169 has_map[key]++;
170 }
171
172
173 // S.3: determine local indices for collecting data
174 for(int i = 0; i < has_length; i++)
175 send_index[i] = i;
176 array_list = has;
177 array_proc = has_procs;
178 __rank = rank;
179
180 qsort(send_index, has_length, sizeof(int), cmp);
181
182
183 // S.4: determine offsets
184 for(auto & i : has_map)
185 if(i.first == rank)
186 {
187 send_offset.insert(send_offset.begin(), i.second);
188 send_procs.insert(send_procs.begin(), i.first);
189 }
190 else
191 {
192 send_offset.push_back(i.second);
193 send_procs.push_back(i.first);
194 }
195
196 send_offset.insert(send_offset.begin(), 0);
197
198 std::partial_sum(send_offset.begin(), send_offset.end(), send_offset.begin());
199
200
201
202 // R.1: determine all dofs this process wants (+procs)
203 // ... loop over all local rows
204 for(int ii = start_, pn = points * n, counter = 0; ii < end_; ii++)
205 // ... loop inside row
206 for(int jj = 0; jj < pow(n * points, dim - 1); jj++, counter++)
207 {
208 // ... determine dof
209 unsigned long int temp = ii * dealii::Utilities::pow(n * points, dim - 1) + jj;
210 want[counter] = temp;
211 // ... determine owning process
212 int t = dim == 3 ? (temp % pn) / points + ((temp % (pn * pn)) / pn) / points * n +
213 (temp / pn / pn) / points * n * n :
214 (temp % pn) / points + (temp / pn) / points * n;
215 int proc = MAP.indices_proc(MAP.indices_inv(t));
216 want_procs[counter] = proc;
217 }
218
219
220 // R.2: which processes needs how many dofs
221 std::map<int, int> want_map;
222 want_map[rank] = 0; // add this rank
223 for(int counter = 0; counter < want_length; counter++)
224 {
225 int key = want_procs[counter];
226 if(want_map.find(key) == want_map.end())
227 want_map[key] = 1;
228 else
229 want_map[key]++;
230 }
231
232 // R.3: determine local indices for distributing received data
233 for(int i = 0; i < want_length; i++)
234 recv_index[i] = i;
235 array_list = want;
236 array_proc = want_procs;
237
238 qsort(recv_index, want_length, sizeof(int), cmp);
239
240 // R.4: add padding
241 {
242 // consider padding needed by FFTW...
243 int N = points * n;
244 int Nx = (N / 2 + 1) * 2;
245 for(int i = 0; i < want_length; i++)
246 recv_index[i] += (recv_index[i] / N) * (Nx - N);
247 }
248
249 // R.5: determine offsets
250 for(auto & i : want_map)
251 {
252 if(i.first == rank)
253 {
254 recv_offset.insert(recv_offset.begin(), i.second);
255 recv_procs.insert(recv_procs.begin(), i.first);
256 }
257 else
258 {
259 recv_offset.push_back(i.second);
260 recv_procs.push_back(i.first);
261 }
262 }
263
264 recv_offset.insert(recv_offset.begin(), 0);
265
266 std::partial_sum(recv_offset.begin(), recv_offset.end(), recv_offset.begin());
267
268 // allocate memory for requests
269 recv_requests = new MPI_Request[recv_procs.size()];
270 send_requests = new MPI_Request[send_procs.size()];
271
272 // print info
273 {
274#ifdef SIMPLE_VIEW
275 char filename[100];
276 sprintf(filename, "build/file.%d.out", rank);
277 FILE * f = fopen(filename, "w");
278
279 fprintf(f, "Proc %d: \n", rank);
280
281
282# ifdef EXTENDED_VIEW
283 for(auto & i : send_offset)
284 fprintf(f, "%d ", i);
285 fprintf(f, "\n");
286
287 fprintf(f, "has: ");
288 for(auto & i : has_map)
289 fprintf(f, "%d:%d ", i.first, i.second);
290 fprintf(f, "\n");
291
292 fprintf(f, "wants: ");
293 for(auto & i : want_map)
294 fprintf(f, "%d:%d ", i.first, i.second);
295 fprintf(f, "\n\n");
296# endif
297
298
299
300 fprintf(f, " ");
301 if(send_offset[1] != 0)
302 fprintf(f, " ");
303 for(int i = 0; i < send_procs.size(); i++)
304 {
305 fprintf(f, "%4d", send_procs[i]);
306 for(int j = 0; j < (send_offset[i + 1] - send_offset[i] - 1) * 5 + 1; j++)
307 fprintf(f, " ");
308 }
309 fprintf(f, "\n");
310 fprintf(f, "has-index: ");
311 for(int counter = 0; counter < has_length; counter++)
312 {
313 fprintf(f, "%4d ", send_index[counter]);
314 }
315
316 fprintf(f, "\n");
317 fprintf(f, " ");
318 if(recv_offset[1] != 0)
319 fprintf(f, " ");
320 for(int i = 0; i < recv_procs.size(); i++)
321 {
322 fprintf(f, "%4d", recv_procs[i]);
323 for(int j = 0; j < (recv_offset[i + 1] - recv_offset[i] - 1) * 5 + 1; j++)
324 fprintf(f, " ");
325 }
326 fprintf(f, "\n");
327 fprintf(f, "wan-index: ");
328 for(int counter = 0; counter < want_length; counter++)
329 {
330 fprintf(f, "%4d ", recv_index[counter]);
331 }
332
333# ifdef EXTENDED_VIEW
334 fprintf(f, "\n\n");
335
336 fprintf(f, "has: ");
337
338 for(int counter = 0; counter < has_length; counter++)
339 {
340 fprintf(f, "%4d ", has[counter]);
341 }
342
343 fprintf(f, "\n");
344 fprintf(f, " ");
345
346 for(int counter = 0; counter < has_length; counter++)
347 {
348 fprintf(f, "%4d ", has_procs[counter]);
349 }
350
351 fprintf(f, "\n");
352
353 fprintf(f, "wants: ");
354 for(int counter = 0; counter < want_length; counter++)
355 {
356 fprintf(f, "%4d ", want[counter]);
357 }
358 fprintf(f, "\n");
359 fprintf(f, " ");
360 for(int counter = 0; counter < want_length; counter++)
361 {
362 fprintf(f, "%4d ", want_procs[counter]);
363 }
364# endif
365
366 fprintf(f, "\n\n");
367 fclose(f);
368#endif
369 }
370
371 delete[] has;
372 delete[] has_procs;
373 delete[] want;
374 delete[] want_procs;
375 }
376
380 virtual ~Permutator()
381 {
382 // not initialized -> nothing to clean up
383 if(not initialized)
384 return;
385
386 delete[] recv_index;
387 delete[] send_index;
388 delete[] send_requests;
389 delete[] recv_requests;
390 }
391
400 void
401 ipermute(double * source, double * target)
402 {
403 // save references to vectors (for iwait)
404 this->source = source;
405 this->target = target;
406
407 // request gathering of data ...
408 for(unsigned int i = 1; i < recv_procs.size(); i++)
409 MPI_Irecv(recv_buffer + recv_offset[i] * dim,
410 (recv_offset[i + 1] - recv_offset[i]) * dim,
411 MPI_DOUBLE,
412 recv_procs[i],
413 0,
414 comm,
415 recv_requests + i - 1);
416
417 // scatter data ...
418 for(unsigned int i = 1; i < send_procs.size(); i++)
419 {
420 // ... copy data into buffer and ...
421 for(int j = send_offset[i]; j < send_offset[i + 1]; j++)
422 for(int d = 0; d < dim; d++)
423 send_buffer[dim * j + d] = source[send_index[j] * dim + d];
424
425 // ... send right away
426 MPI_Isend(send_buffer + send_offset[i] * dim,
427 (send_offset[i + 1] - send_offset[i]) * dim,
428 MPI_DOUBLE,
429 send_procs[i],
430 0,
431 comm,
432 send_requests + i - 1);
433 }
434
435 // perform copy operation on local process, if necessary
436 for(int s = send_offset[0], t = recv_offset[0]; s < send_offset[1]; s++, t++)
437 for(int d = 0; d < dim; d++)
438 target[recv_index[t] + d * bsize] = source[dim * send_index[s] + d];
439 }
440
445 void
447 {
448 // wait for messages...
449 for(unsigned int c = 1; c < recv_procs.size(); c++)
450 {
451 int i = 0;
452 // ... a message received: extract index
453 MPI_Waitany(recv_procs.size() - 1, recv_requests, &i, MPI_STATUSES_IGNORE);
454 // ... copy data from buffers
455 for(int j = recv_offset[i + 1]; j < recv_offset[i + 2]; j++)
456 for(int d = 0; d < dim; d++)
457 target[recv_index[j] + d * bsize] = recv_buffer[j * dim + d];
458 }
459
460 // wait that e.th has been send away
461 MPI_Waitall(send_procs.size() - 1, send_requests, MPI_STATUSES_IGNORE);
462 }
463
464private:
465 // reference to source array
466 double * source;
467 // ... to target array
468 double * target;
469 // buffer for sending dofs
470 double * send_buffer;
471 // buffer for receiving dofs
472 double * recv_buffer;
473
474 // send: requests
475 MPI_Request * send_requests;
476 // ... indices for copying from source into send buffer
477 int * send_index;
478 // ... receiving processes
479 std::vector<int> send_procs;
480 // ... range of buffer to be send to a process
481 std::vector<int> send_offset;
482
483 // receive: requests
484 MPI_Request * recv_requests;
485 // ... indices for copying from recv buffer to target
486 int * recv_index;
487 // ... sending processes
488 std::vector<int> recv_procs;
489 // ... range of buffer to be received from a process
490 std::vector<int> recv_offset;
491
492 // dimensions
493 int dim;
494 // portion of target dedicated for each direction
495 int bsize;
496};
497
498} // namespace dealspectrum
499
500#endif
Definition permutation.h:81
Permutator(MPI_Comm const &comm, Setup &s)
Definition permutation.h:93
void iwait()
Definition permutation.h:446
void init(MAPPING &MAP, FFTW &FFT)
Definition permutation.h:105
void ipermute(double *source, double *target)
Definition permutation.h:401
virtual ~Permutator()
Definition permutation.h:380
Definition setup.h:56