105 init(MAPPING & MAP, FFTW & FFT)
108 if(this->initialized)
110 this->initialized =
true;
115 unsigned long int points = s.points_dst;
118 int start, end, start_, end_;
120 MAP.getLocalRange(start, end);
122 FFT.getLocalRange(start_, end_);
126 int has_length = (end - start) * dealii::Utilities::pow(points, dim);
127 unsigned long int * has =
new unsigned long int[has_length];
128 int * has_procs =
new int[has_length];
129 send_buffer =
new double[has_length * dim];
130 send_index =
new int[has_length];
132 int want_length = (end_ - start_) * dealii::Utilities::pow(n * points, dim - 1);
133 unsigned long int * want =
new unsigned long int[want_length];
134 int * want_procs =
new int[want_length];
135 recv_buffer =
new double[want_length * dim];
136 recv_index =
new int[want_length];
139 for(
int ii = start, counter = 0; ii < end; ii++)
141 int i = MAP.indices(ii);
142 for(
unsigned long int K = 0; K < (dim == 3 ? points : 1); K++)
143 for(
unsigned long int J = 0; J < points; J++)
144 for(
unsigned long int I = 0; I < points; I++, counter++)
147 unsigned long int temp =
148 ((dim == 3 ? ((MAP.lbf(i * dim + 2) * points + K) * points * n) : 0) +
149 MAP.lbf(i * dim + 1) * points + J) *
151 (MAP.lbf(i * dim + 0) * points + I);
154 int proc = FFT.indices_proc_rows(temp / pow(n * points, dim - 1));
155 has_procs[counter] = proc;
161 std::map<int, int> has_map;
163 for(
int counter = 0; counter < has_length; counter++)
165 int key = has_procs[counter];
166 if(has_map.find(key) == has_map.end())
174 for(
int i = 0; i < has_length; i++)
177 array_proc = has_procs;
180 qsort(send_index, has_length,
sizeof(
int), cmp);
184 for(
auto & i : has_map)
187 send_offset.insert(send_offset.begin(), i.second);
188 send_procs.insert(send_procs.begin(), i.first);
192 send_offset.push_back(i.second);
193 send_procs.push_back(i.first);
196 send_offset.insert(send_offset.begin(), 0);
198 std::partial_sum(send_offset.begin(), send_offset.end(), send_offset.begin());
204 for(
int ii = start_, pn = points * n, counter = 0; ii < end_; ii++)
206 for(
int jj = 0; jj < pow(n * points, dim - 1); jj++, counter++)
209 unsigned long int temp = ii * dealii::Utilities::pow(n * points, dim - 1) + jj;
210 want[counter] = temp;
212 int t = dim == 3 ? (temp % pn) / points + ((temp % (pn * pn)) / pn) / points * n +
213 (temp / pn / pn) / points * n * n :
214 (temp % pn) / points + (temp / pn) / points * n;
215 int proc = MAP.indices_proc(MAP.indices_inv(t));
216 want_procs[counter] = proc;
221 std::map<int, int> want_map;
223 for(
int counter = 0; counter < want_length; counter++)
225 int key = want_procs[counter];
226 if(want_map.find(key) == want_map.end())
233 for(
int i = 0; i < want_length; i++)
236 array_proc = want_procs;
238 qsort(recv_index, want_length,
sizeof(
int), cmp);
244 int Nx = (N / 2 + 1) * 2;
245 for(
int i = 0; i < want_length; i++)
246 recv_index[i] += (recv_index[i] / N) * (Nx - N);
250 for(
auto & i : want_map)
254 recv_offset.insert(recv_offset.begin(), i.second);
255 recv_procs.insert(recv_procs.begin(), i.first);
259 recv_offset.push_back(i.second);
260 recv_procs.push_back(i.first);
264 recv_offset.insert(recv_offset.begin(), 0);
266 std::partial_sum(recv_offset.begin(), recv_offset.end(), recv_offset.begin());
269 recv_requests =
new MPI_Request[recv_procs.size()];
270 send_requests =
new MPI_Request[send_procs.size()];
276 sprintf(filename,
"build/file.%d.out", rank);
277 FILE * f = fopen(filename,
"w");
279 fprintf(f,
"Proc %d: \n", rank);
283 for(
auto & i : send_offset)
284 fprintf(f,
"%d ", i);
288 for(
auto & i : has_map)
289 fprintf(f,
"%d:%d ", i.first, i.second);
292 fprintf(f,
"wants: ");
293 for(
auto & i : want_map)
294 fprintf(f,
"%d:%d ", i.first, i.second);
301 if(send_offset[1] != 0)
303 for(
int i = 0; i < send_procs.size(); i++)
305 fprintf(f,
"%4d", send_procs[i]);
306 for(
int j = 0; j < (send_offset[i + 1] - send_offset[i] - 1) * 5 + 1; j++)
310 fprintf(f,
"has-index: ");
311 for(
int counter = 0; counter < has_length; counter++)
313 fprintf(f,
"%4d ", send_index[counter]);
318 if(recv_offset[1] != 0)
320 for(
int i = 0; i < recv_procs.size(); i++)
322 fprintf(f,
"%4d", recv_procs[i]);
323 for(
int j = 0; j < (recv_offset[i + 1] - recv_offset[i] - 1) * 5 + 1; j++)
327 fprintf(f,
"wan-index: ");
328 for(
int counter = 0; counter < want_length; counter++)
330 fprintf(f,
"%4d ", recv_index[counter]);
338 for(
int counter = 0; counter < has_length; counter++)
340 fprintf(f,
"%4d ", has[counter]);
346 for(
int counter = 0; counter < has_length; counter++)
348 fprintf(f,
"%4d ", has_procs[counter]);
353 fprintf(f,
"wants: ");
354 for(
int counter = 0; counter < want_length; counter++)
356 fprintf(f,
"%4d ", want[counter]);
360 for(
int counter = 0; counter < want_length; counter++)
362 fprintf(f,
"%4d ", want_procs[counter]);
404 this->source = source;
405 this->target = target;
408 for(
unsigned int i = 1; i < recv_procs.size(); i++)
409 MPI_Irecv(recv_buffer + recv_offset[i] * dim,
410 (recv_offset[i + 1] - recv_offset[i]) * dim,
415 recv_requests + i - 1);
418 for(
unsigned int i = 1; i < send_procs.size(); i++)
421 for(
int j = send_offset[i]; j < send_offset[i + 1]; j++)
422 for(
int d = 0; d < dim; d++)
423 send_buffer[dim * j + d] = source[send_index[j] * dim + d];
426 MPI_Isend(send_buffer + send_offset[i] * dim,
427 (send_offset[i + 1] - send_offset[i]) * dim,
432 send_requests + i - 1);
436 for(
int s = send_offset[0], t = recv_offset[0]; s < send_offset[1]; s++, t++)
437 for(
int d = 0; d < dim; d++)
438 target[recv_index[t] + d * bsize] = source[dim * send_index[s] + d];