AdjoinableMPI
 All Classes Files Functions Variables Typedefs Enumerations Enumerator Macros Groups Pages
modified.c
Go to the documentation of this file.
1 /*
2 ##########################################################
3 # This file is part of the AdjoinableMPI library #
4 # released under the MIT License. #
5 # The full COPYRIGHT notice can be found in the top #
6 # level directory of the AdjoinableMPI distribution. #
7 ##########################################################
8 */
9 #include <stdlib.h>
10 #include <assert.h>
11 #include <string.h>
12 #include <stdio.h>
13 #include <mpi.h>
16 #include "ampi/adTool/support.h"
17 
18 MPI_Datatype AMPI_ADOUBLE;
19 MPI_Datatype AMPI_AFLOAT;
20 
21 #ifdef AMPI_FORTRANCOMPATIBLE
22 MPI_Datatype AMPI_ADOUBLE_PRECISION;
23 MPI_Datatype AMPI_AREAL;
24 #endif
25 
26 int FW_AMPI_Recv(void* buf,
27  int count,
28  MPI_Datatype datatype,
29  int src,
30  int tag,
32  MPI_Comm comm,
33  MPI_Status* status) {
34  int rc=0;
35  if (!(
36  pairedWith==AMPI_FROM_SEND
37  ||
38  pairedWith==AMPI_FROM_BSEND
39  ||
40  pairedWith==AMPI_FROM_RSEND
41  ||
42  pairedWith==AMPI_FROM_ISEND_WAIT
43  ||
44  pairedWith==AMPI_FROM_ISEND_WAITALL
45  )) rc=MPI_Abort(comm, MPI_ERR_ARG);
46  else {
47  MPI_Status myStatus;
48  double* mappedbuf=NULL;
49  int dt_idx = derivedTypeIdx(datatype);
50  int is_derived = isDerivedType(dt_idx);
52  mappedbuf=(*ourADTOOL_AMPI_FPCollection.rawData_fp)(buf,&count);
53  }
54  else if(is_derived) {
55  mappedbuf=(*ourADTOOL_AMPI_FPCollection.allocateTempBuf_fp)(count,datatype,comm);
56  }
57  else {
58  mappedbuf=buf;
59  }
60  rc=MPI_Recv(mappedbuf,
61  count,
63  src,
64  tag,
65  comm,
66  &myStatus); /* because status as passed in may be MPI_STATUS_IGNORE */
67  if (rc==MPI_SUCCESS && ((*ourADTOOL_AMPI_FPCollection.isActiveType_fp)(datatype)==AMPI_ACTIVE || is_derived)) {
68  if (is_derived) {
69  (*ourADTOOL_AMPI_FPCollection.unpackDType_fp)(mappedbuf,buf,count,dt_idx);
71  }
73  if(tag==MPI_ANY_TAG) tag=myStatus.MPI_TAG;
74  if(src==MPI_ANY_SOURCE) src=myStatus.MPI_SOURCE;
77  count,
78  datatype,
79  src,
80  tag,
81  pairedWith,
82  comm);
83  }
84  if (status!=MPI_STATUS_IGNORE) *status=myStatus;
85  }
86  return rc;
87 }
88 
89 int BW_AMPI_Recv(void* buf,
90  int count,
91  MPI_Datatype datatype,
92  int src,
93  int tag,
95  MPI_Comm comm,
96  MPI_Status* status) {
97  int rc=0;
98  void *idx=NULL;
100  &count,
101  &datatype,
102  &src,
103  &tag,
104  &pairedWith,
105  &comm,
106  &idx);
107  if (!(
108  pairedWith==AMPI_FROM_SEND
109  ||
110  pairedWith==AMPI_FROM_BSEND
111  ||
112  pairedWith==AMPI_FROM_RSEND
113  ||
114  pairedWith==AMPI_FROM_ISEND_WAIT
115  ||
116  pairedWith==AMPI_FROM_ISEND_WAITALL
117  )) rc=MPI_Abort(comm, MPI_ERR_ARG);
118  else {
119  switch(pairedWith) {
121  case AMPI_FROM_SEND: {
122  MPI_Datatype mappedtype = (*ourADTOOL_AMPI_FPCollection.BW_rawType_fp)(datatype);
123  (*ourADTOOL_AMPI_FPCollection.getAdjointCount_fp)(&count,datatype);
124  rc=MPI_Send(buf,
125  count,
126  mappedtype,
127  src,
128  tag,
129  comm);
130  (*ourADTOOL_AMPI_FPCollection.nullifyAdjoint_fp)(count,mappedtype,comm, buf);
131  break;
132  }
133  default:
134  rc=MPI_Abort(comm, MPI_ERR_TYPE);
135  break;
136  }
137  }
138  return rc;
139 }
140 
141 int TLM_AMPI_Recv(void* buf,
142  int count,
143  MPI_Datatype datatype,
144  int src,
145  int tag,
147  MPI_Comm comm,
148  MPI_Status* status) {
149  int rc;
150  rc = MPI_Recv(buf,count,datatype,src,tag,comm,status);
151  return rc;
152 }
153 
157 int TLS_AMPI_Recv(void* buf, void* shadowbuf,
158  int count,
159  MPI_Datatype datatype, MPI_Datatype shadowdatatype,
160  int src,
161  int tag,
163  MPI_Comm comm,
164  MPI_Status* status) {
165  MPI_Status status1 ;
166  int rc = MPI_Recv(buf, count, datatype, src, tag, comm, &status1) ;
167  assert(rc==MPI_SUCCESS);
168  MPI_Comm shadowcomm = (*ourADTOOL_AMPI_FPCollection.getShadowComm_fp)(comm) ;
169  rc = MPI_Recv(shadowbuf, count, shadowdatatype,
170  (src==MPI_ANY_SOURCE?status1.MPI_SOURCE:src),
171  (tag==MPI_ANY_TAG?status1.MPI_TAG:tag),
172  shadowcomm, status) ;
173  assert(rc==MPI_SUCCESS);
174  return rc ;
175 }
176 
177 
178 int FW_AMPI_Irecv (void* buf,
179  int count,
180  MPI_Datatype datatype,
181  int source,
182  int tag,
184  MPI_Comm comm,
185  AMPI_Request* request) {
186  int rc=0;
187  if (!(
188  pairedWith==AMPI_FROM_SEND
189  ||
190  pairedWith==AMPI_FROM_ISEND_WAIT
191  ||
192  pairedWith==AMPI_FROM_ISEND_WAITALL
193  )) rc=MPI_Abort(comm, MPI_ERR_ARG);
194  else {
195  double* mappedbuf=NULL;
197  mappedbuf=(*ourADTOOL_AMPI_FPCollection.rawData_fp)(buf,&count);
198  }
199  else {
200  mappedbuf=buf;
201  }
202  rc= MPI_Irecv(mappedbuf,
203  count,
204  datatype,
205  source,
206  tag,
207  comm,
208 #ifdef AMPI_FORTRANCOMPATIBLE
209  request
210 #else
211  &(request->plainRequest)
212 #endif
213  );
214  struct AMPI_Request_S *ampiRequest;
215 #ifdef AMPI_FORTRANCOMPATIBLE
216  struct AMPI_Request_S ampiRequestInst;
217  ampiRequest=&ampiRequestInst;
218  ampiRequest->plainRequest=*request;
219 #else
220  ampiRequest=request;
221 #endif
222  /* fill in the other info */
223  ampiRequest->endPoint=source;
224  ampiRequest->tag=tag;
225  ampiRequest->count=count;
226  ampiRequest->datatype=datatype;
227  ampiRequest->comm=comm;
228  ampiRequest->origin=AMPI_RECV_ORIGIN;
229  ampiRequest->pairedWith=pairedWith;
231  ampiRequest->tracedRequest=ampiRequest->plainRequest;
232 #ifdef AMPI_FORTRANCOMPATIBLE
233  BK_AMPI_put_AMPI_Request(ampiRequest);
234 #endif
237 #ifdef AMPI_REQUESTONTRACE
238  (*ourADTOOL_AMPI_FPCollection.push_request_fp)(ampiRequest->tracedRequest);
239 #endif
240  }
241  }
242  return rc;
243 }
244 
245 int BW_AMPI_Irecv (void* buf,
246  int count,
247  MPI_Datatype datatype,
248  int source,
249  int tag,
251  MPI_Comm comm,
252  AMPI_Request* request) {
253  int rc=0;
254  MPI_Request *plainRequest;
255  struct AMPI_Request_S *ampiRequest;
256 #ifdef AMPI_REQUESTONTRACE
257  MPI_Request tracedRequest;
258 #endif
259 #ifdef AMPI_FORTRANCOMPATIBLE
260  struct AMPI_Request_S ampiRequestInst;
261  ampiRequest=&ampiRequestInst;
262  plainRequest=request;
263 #else
264  plainRequest=&(request->plainRequest) ;
265  ampiRequest=request;
266 #endif
267 #if defined AMPI_FORTRANCOMPATIBLE || defined AMPI_REQUESTONTRACE
268 #ifdef AMPI_REQUESTONTRACE
269  tracedRequest=(*ourADTOOL_AMPI_FPCollection.pop_request_fp)();
270  BK_AMPI_get_AMPI_Request(&tracedRequest,ampiRequest,1);
271 #else
272  BK_AMPI_get_AMPI_Request(plainRequest,ampiRequest,0);
273 #endif
274 #endif
275  assert(ampiRequest->origin==AMPI_RECV_ORIGIN) ;
276  if (!(
277  ampiRequest->pairedWith==AMPI_FROM_SEND
278  ||
279  ampiRequest->pairedWith==AMPI_FROM_ISEND_WAIT
280  ||
281  ampiRequest->pairedWith==AMPI_FROM_ISEND_WAITALL
282  )) rc=MPI_Abort(comm, MPI_ERR_ARG);
283  else {
284  switch(ampiRequest->pairedWith) {
285  case AMPI_FROM_SEND:
286  case AMPI_FROM_ISEND_WAIT: {
287  rc=MPI_Wait(plainRequest,
288  MPI_STATUS_IGNORE);
289  (*ourADTOOL_AMPI_FPCollection.nullifyAdjoint_fp)(ampiRequest->adjointCount,
290  ampiRequest->datatype,
291  ampiRequest->comm,
292  ampiRequest->adjointBuf);
293  break ;
294  }
295  default:
296  rc=MPI_Abort(ampiRequest->comm, MPI_ERR_TYPE);
297  break;
298  }
299  }
300  return rc;
301 }
302 
303 int TLM_AMPI_Irecv (void* buf,
304  int count,
305  MPI_Datatype datatype,
306  int source,
307  int tag,
309  MPI_Comm comm,
310  AMPI_Request* request) {
311  int rc=0;
312  assert(0);
313  return rc;
314 }
315 
319 int TLS_AMPI_Irecv (void* buf, void* shadowbuf,
320  int count,
321  MPI_Datatype datatype, MPI_Datatype shadowdatatype,
322  int source,
323  int tag,
325  MPI_Comm comm,
326  AMPI_Request* request) {
327 
328  int rc=0;
329  struct AMPI_Request_S *ampiRequest;
330 #ifdef AMPI_FORTRANCOMPATIBLE
331  struct AMPI_Request_S ampiRequestInst;
332  ampiRequest=&ampiRequestInst;
333  ampiRequest->plainRequest=*request;
334 #else
335  ampiRequest=request;
336 #endif
337  /* fill in the info needed to Recv the shadowbuf later.
338  * [llh]: I don't need pairedWith nor tracedRequest... */
339  ampiRequest->endPoint=source;
340  ampiRequest->tag=tag;
341  ampiRequest->count=count;
342  ampiRequest->datatype=shadowdatatype;
343  ampiRequest->comm=comm;
344  ampiRequest->origin=AMPI_RECV_ORIGIN;
345  ampiRequest->pairedWith=pairedWith;
346  ampiRequest->adjointBuf=shadowbuf ;
347  ampiRequest->tracedRequest=ampiRequest->plainRequest;
348  rc= MPI_Irecv(buf,
349  count,
350  datatype,
351  source,
352  tag,
353  comm,
354  &(ampiRequest->plainRequest));
355 #ifdef AMPI_FORTRANCOMPATIBLE
356  *request = ampiRequest->plainRequest ;
357  BK_AMPI_put_AMPI_Request(ampiRequest);
358 #endif
359  return rc;
360 }
361 
362 int FW_AMPI_Send (void* buf,
363  int count,
364  MPI_Datatype datatype,
365  int dest,
366  int tag,
368  MPI_Comm comm) {
369  int rc=0;
370  if (!(
371  pairedWith==AMPI_TO_RECV
372  ||
373  pairedWith==AMPI_TO_IRECV_WAIT
374  ||
375  pairedWith==AMPI_TO_IRECV_WAITALL
376  )) rc=MPI_Abort(comm, MPI_ERR_ARG);
377  else {
378  double* mappedbuf=NULL;
379  int dt_idx = derivedTypeIdx(datatype);
380  int is_derived = isDerivedType(dt_idx);
382  mappedbuf=(*ourADTOOL_AMPI_FPCollection.rawData_fp)(buf,&count);
383  }
384  else if(is_derived) {
385  mappedbuf=(*ourADTOOL_AMPI_FPCollection.allocateTempBuf_fp)(count,datatype,comm);
386  (*ourADTOOL_AMPI_FPCollection.packDType_fp)(buf,mappedbuf,count,dt_idx);
387  }
388  else {
389  mappedbuf=buf;
390  }
391  rc=MPI_Send(mappedbuf,
392  count,
394  /* if derived then need to replace typemap */
395  dest,
396  tag,
397  comm);
398  if (is_derived) (*ourADTOOL_AMPI_FPCollection.releaseAdjointTempBuf_fp)(mappedbuf);
399  if (rc==MPI_SUCCESS && ((*ourADTOOL_AMPI_FPCollection.isActiveType_fp)(datatype)==AMPI_ACTIVE || is_derived)) {
402  count,
403  datatype,
404  dest,
405  tag,
406  pairedWith,
407  comm);
408  }
409  }
410  return rc;
411 }
412 
413 int BW_AMPI_Send (void* buf,
414  int count,
415  MPI_Datatype datatype,
416  int dest,
417  int tag,
419  MPI_Comm comm) {
420  int rc=0;
421  void *idx=NULL;
423  &count,
424  &datatype,
425  &dest,
426  &tag,
427  &pairedWith,
428  &comm,
429  &idx);
430  if (!(
431  pairedWith==AMPI_TO_RECV
432  ||
433  pairedWith==AMPI_TO_IRECV_WAIT
434  ||
435  pairedWith==AMPI_TO_IRECV_WAITALL
436  )) rc=MPI_Abort(comm, MPI_ERR_ARG);
437  else {
438  switch(pairedWith) {
439  case AMPI_TO_IRECV_WAIT:
440  case AMPI_TO_RECV: {
441  MPI_Datatype mappedtype = (*ourADTOOL_AMPI_FPCollection.BW_rawType_fp)(datatype);
443  void *tempBuf = (*ourADTOOL_AMPI_FPCollection.allocateTempBuf_fp)(count,mappedtype,comm) ;
444  rc=MPI_Recv(tempBuf,
445  count,
446  mappedtype,
447  dest,
448  tag,
449  comm,
450  MPI_STATUS_IGNORE) ;
452  mappedtype,
453  comm,
454  buf,
455  tempBuf, idx);
457  break;
458  }
459  default:
460  rc=MPI_Abort(comm, MPI_ERR_TYPE);
461  break;
462  }
463  }
464  return rc;
465 }
466 
467 int TLM_AMPI_Send (void* buf,
468  int count,
469  MPI_Datatype datatype,
470  int dest,
471  int tag,
473  MPI_Comm comm) {
474  int rc=0;
475  MPI_Send(buf,count,datatype,dest,tag,comm);
476  return rc;
477 }
478 
482 int TLS_AMPI_Send (void* buf, void* shadowbuf,
483  int count,
484  MPI_Datatype datatype, MPI_Datatype shadowdatatype,
485  int dest,
486  int tag,
488  MPI_Comm comm) {
489  int rc = MPI_Send(buf, count, datatype, dest, tag, comm) ;
490  assert(rc==MPI_SUCCESS);
491  MPI_Comm shadowcomm = (*ourADTOOL_AMPI_FPCollection.getShadowComm_fp)(comm) ;
492  rc = MPI_Send(shadowbuf, count, shadowdatatype, dest, tag, shadowcomm) ;
493  assert(rc==MPI_SUCCESS);
494  return rc ;
495 }
496 
497 int FW_AMPI_Isend (void* buf,
498  int count,
499  MPI_Datatype datatype,
500  int dest,
501  int tag,
503  MPI_Comm comm,
504  AMPI_Request* request) {
505  int rc=0;
506  if (!(
507  pairedWith==AMPI_TO_RECV
508  ||
509  pairedWith==AMPI_TO_IRECV_WAIT
510  ||
511  pairedWith==AMPI_TO_IRECV_WAITALL
512  )) rc=MPI_Abort(comm, MPI_ERR_ARG);
513  else {
514  double* mappedbuf=NULL;
516  mappedbuf=(*ourADTOOL_AMPI_FPCollection.rawData_fp)(buf,&count);
517  }
518  else {
519  mappedbuf=buf;
520  }
521  rc= MPI_Isend(mappedbuf,
522  count,
523  datatype,
524  dest,
525  tag,
526  comm,
527 #ifdef AMPI_FORTRANCOMPATIBLE
528  request
529 #else
530  &(request->plainRequest)
531 #endif
532  );
533  struct AMPI_Request_S *ampiRequest;
534 #ifdef AMPI_FORTRANCOMPATIBLE
535  struct AMPI_Request_S ampiRequestInst;
536  ampiRequest=&ampiRequestInst;
537  ampiRequest->plainRequest=*request;
538 #else
539  ampiRequest=request;
540 #endif
541  /* fill in the other info */
542  ampiRequest->endPoint=dest;
543  ampiRequest->tag=tag;
544  ampiRequest->count=count;
545  ampiRequest->datatype=datatype;
546  ampiRequest->comm=comm;
547  ampiRequest->origin=AMPI_SEND_ORIGIN;
548  ampiRequest->pairedWith=pairedWith;
550  ampiRequest->tracedRequest=ampiRequest->plainRequest;
551 #ifdef AMPI_FORTRANCOMPATIBLE
552  BK_AMPI_put_AMPI_Request(ampiRequest);
553 #endif
556 #ifdef AMPI_REQUESTONTRACE
557  (*ourADTOOL_AMPI_FPCollection.push_request_fp)(ampiRequest->tracedRequest);
558 #endif
559  }
560  }
561  return rc;
562 }
563 
564 int BW_AMPI_Isend (void* buf,
565  int count,
566  MPI_Datatype datatype,
567  int dest,
568  int tag,
570  MPI_Comm comm,
571  AMPI_Request* request) {
572  int rc=0;
573  MPI_Request *plainRequest;
574  struct AMPI_Request_S *ampiRequest;
575 #ifdef AMPI_REQUESTONTRACE
576  MPI_Request tracedRequest;
577 #endif
578 #ifdef AMPI_FORTRANCOMPATIBLE
579  struct AMPI_Request_S ampiRequestInst;
580  ampiRequest=&ampiRequestInst;
581  plainRequest=request;
582 #else
583  ampiRequest=request;
584  plainRequest=&(ampiRequest->plainRequest);
585 #endif
586 #if defined AMPI_FORTRANCOMPATIBLE || defined AMPI_REQUESTONTRACE
587 #ifdef AMPI_REQUESTONTRACE
588  tracedRequest=(*ourADTOOL_AMPI_FPCollection.pop_request_fp)();
589  BK_AMPI_get_AMPI_Request(&tracedRequest,ampiRequest,1);
590 #else
591  BK_AMPI_get_AMPI_Request(plainRequest,ampiRequest,0);
592 #endif
593 #endif
594  assert(ampiRequest->origin==AMPI_SEND_ORIGIN) ;
595  if (!(
596  ampiRequest->pairedWith==AMPI_TO_RECV
597  ||
598  ampiRequest->pairedWith==AMPI_TO_IRECV_WAIT
599  ||
600  ampiRequest->pairedWith==AMPI_TO_IRECV_WAITALL
601  )) rc=MPI_Abort(comm, MPI_ERR_ARG);
602  else {
603  switch(ampiRequest->pairedWith) {
604  case AMPI_TO_RECV:
605  case AMPI_TO_IRECV_WAIT: {
606  rc=MPI_Wait(plainRequest,
607  MPI_STATUS_IGNORE);
608  (*ourADTOOL_AMPI_FPCollection.incrementAdjoint_fp)(ampiRequest->adjointCount,
609  ampiRequest->datatype,
610  ampiRequest->comm,
611  ampiRequest->adjointBuf,
612  ampiRequest->adjointTempBuf,
613  ampiRequest->idx);
614  (*ourADTOOL_AMPI_FPCollection.releaseAdjointTempBuf_fp)(ampiRequest->adjointTempBuf);
615  break;
616  }
617  default:
618  rc=MPI_Abort(ampiRequest->comm, MPI_ERR_TYPE);
619  break;
620  }
621  }
622  return rc;
623 }
624 
625 int TLM_AMPI_Isend (void* buf,
626  int count,
627  MPI_Datatype datatype,
628  int dest,
629  int tag,
631  MPI_Comm comm,
632  AMPI_Request* request) {
633  int rc=0;
634  assert(0);
635  return rc;
636 }
637 
641 int TLS_AMPI_Isend (void* buf, void* shadowbuf,
642  int count,
643  MPI_Datatype datatype, MPI_Datatype shadowdatatype,
644  int dest,
645  int tag,
647  MPI_Comm comm,
648  AMPI_Request* request) {
649  int rc = 0 ;
650  MPI_Comm shadowcomm ;
651  struct AMPI_Request_S *ampiRequest;
652 #ifdef AMPI_FORTRANCOMPATIBLE
653  struct AMPI_Request_S ampiRequestInst;
654  ampiRequest=&ampiRequestInst;
655  ampiRequest->plainRequest=*request;
656 #else
657  ampiRequest=request;
658 #endif
659  /* fill in the other info. [llh]:I need none of those... */
660  ampiRequest->endPoint=dest;
661  ampiRequest->tag=tag;
662  ampiRequest->count=count;
663  ampiRequest->datatype=datatype;
664  ampiRequest->comm=comm;
665  ampiRequest->origin=AMPI_SEND_ORIGIN;
666  ampiRequest->pairedWith=pairedWith;
667  (*ourADTOOL_AMPI_FPCollection.mapBufForAdjoint_fp)(ampiRequest,shadowbuf);
668  ampiRequest->tracedRequest=ampiRequest->plainRequest;
669  rc = MPI_Isend(buf, count, datatype, dest, tag, comm,
670  &(ampiRequest->plainRequest)) ;
671  assert(rc==MPI_SUCCESS);
672  shadowcomm = (*ourADTOOL_AMPI_FPCollection.getShadowComm_fp)(comm) ;
673  rc = MPI_Isend(shadowbuf, count, shadowdatatype, dest, tag, shadowcomm,
674  &(ampiRequest->shadowRequest)) ;
675 #ifdef AMPI_FORTRANCOMPATIBLE
676  *request = ampiRequest->plainRequest ;
677  BK_AMPI_put_AMPI_Request(ampiRequest);
678 #endif
679  return rc ;
680 }
681 
683  MPI_Status *status) {
684  int rc=0;
685  MPI_Request *plainRequest;
686  struct AMPI_Request_S *ampiRequest;
687 #ifdef AMPI_FORTRANCOMPATIBLE
688  struct AMPI_Request_S ampiRequestInst;
689  ampiRequest=&ampiRequestInst;
690  plainRequest=request;
691  /*[llh] doubt about the 3rd argument (0?) for the OO traced case: */
692  BK_AMPI_get_AMPI_Request(plainRequest,ampiRequest,0);
693 #else
694  plainRequest=&(request->plainRequest);
695  ampiRequest=request;
696 #endif
697  rc=MPI_Wait(plainRequest,
698  status);
699  if (rc==MPI_SUCCESS && (*ourADTOOL_AMPI_FPCollection.isActiveType_fp)(ampiRequest->datatype)==AMPI_ACTIVE) {
700  (*ourADTOOL_AMPI_FPCollection.writeData_fp)(ampiRequest->buf,&ampiRequest->count);
701  if(ampiRequest->tag==MPI_ANY_TAG) ampiRequest->tag=status->MPI_TAG;
702  if(ampiRequest->endPoint==MPI_ANY_SOURCE) ampiRequest->endPoint=status->MPI_SOURCE;
705  }
706  return rc;
707 }
708 
710  MPI_Status *status) {
711  int rc=0;
712  struct AMPI_Request_S *ampiRequest;
713 #ifdef AMPI_FORTRANCOMPATIBLE
714  struct AMPI_Request_S ampiRequestInst;
715  ampiRequest=&ampiRequestInst;
716 #else
717  ampiRequest=request;
718 #endif
719  /* pop request */
721  switch(ampiRequest->origin) {
722  case AMPI_SEND_ORIGIN: {
724  rc=MPI_Irecv(ampiRequest->adjointTempBuf,
725  ampiRequest->adjointCount,
726  ampiRequest->datatype,
727  ampiRequest->endPoint,
728  ampiRequest->tag,
729  ampiRequest->comm,
730  &(ampiRequest->plainRequest));
731  break;
732  }
733  case AMPI_RECV_ORIGIN: {
735  rc=MPI_Isend((*ourADTOOL_AMPI_FPCollection.rawAdjointData_fp)(ampiRequest->adjointBuf),
736  ampiRequest->adjointCount,
737  ampiRequest->datatype,
738  ampiRequest->endPoint,
739  ampiRequest->tag,
740  ampiRequest->comm,
741  &(ampiRequest->plainRequest));
742  break;
743  }
744  default:
745  rc=MPI_Abort(ampiRequest->comm, MPI_ERR_TYPE);
746  break;
747  }
748 #ifdef AMPI_FORTRANCOMPATIBLE
749  *request=ampiRequest->plainRequest;
750 #endif
751 #if defined AMPI_FORTRANCOMPATIBLE || defined AMPI_REQUESTONTRACE
752  BK_AMPI_put_AMPI_Request(ampiRequest);
753 #endif
754  return rc;
755 }
756 
758  MPI_Status *status) {
759  int rc=0;
760  assert(0);
761  return rc;
762 }
763 
768  MPI_Status *status) {
769  int rc=0;
770  MPI_Status status1 ;
771  struct AMPI_Request_S *ampiRequest;
772 #ifdef AMPI_FORTRANCOMPATIBLE
773  struct AMPI_Request_S ampiRequestInst;
774  ampiRequest=&ampiRequestInst;
775  /*[llh] doubt about the 3rd argument (0?) for the OO traced case: */
776  BK_AMPI_get_AMPI_Request(request,ampiRequest,0);
777 #else
778  ampiRequest=request;
779 #endif
780  rc=MPI_Wait(&(ampiRequest->plainRequest), &status1);
781  assert(rc==MPI_SUCCESS);
782  switch(ampiRequest->origin) {
783  case AMPI_SEND_ORIGIN: {
784  rc=MPI_Wait(&(ampiRequest->shadowRequest), status);
785  break ;
786  }
787  case AMPI_RECV_ORIGIN: {
788  MPI_Comm shadowcomm = (*ourADTOOL_AMPI_FPCollection.getShadowComm_fp)(ampiRequest->comm) ;
789  rc = MPI_Recv(ampiRequest->adjointBuf, ampiRequest->count, ampiRequest->datatype,
790  (ampiRequest->endPoint==MPI_ANY_SOURCE?status1.MPI_SOURCE:ampiRequest->endPoint),
791  (ampiRequest->tag==MPI_ANY_TAG?status1.MPI_TAG:ampiRequest->tag),
792  shadowcomm, status) ;
793  break ;
794  }
795  default:
796  rc=MPI_Abort(ampiRequest->comm, MPI_ERR_ARG);
797  break ;
798  }
799  return rc;
800 }
801 
802 int FW_AMPI_Barrier(MPI_Comm comm){
803  int rc=0;
804  rc=MPI_Barrier(comm);
807  return rc;
808 }
809 
810 int BW_AMPI_Barrier(MPI_Comm comm){
811  int rc;
813  rc=MPI_Barrier(comm);
814  return rc;
815 }
816 
817 int TLM_AMPI_Barrier(MPI_Comm comm){
818  int rc=0;
819  assert(0);
820  return rc;
821 }
822 
823 int TLS_AMPI_Barrier(MPI_Comm comm){
824  int rc=0;
825  rc=MPI_Barrier(comm);
826  assert(rc==MPI_SUCCESS);
827  MPI_Comm shadowcomm = (*ourADTOOL_AMPI_FPCollection.getShadowComm_fp)(comm) ;
828  rc=MPI_Barrier(shadowcomm);
829  return rc;
830 }
831 
832 int FW_AMPI_Gather(void *sendbuf,
833  int sendcnt,
834  MPI_Datatype sendtype,
835  void *recvbuf,
836  int recvcnt,
837  MPI_Datatype recvtype,
838  int root,
839  MPI_Comm comm) {
840  void *rawSendBuf=sendbuf, *rawRecvBuf=recvbuf;
841  int rc=MPI_SUCCESS;
842  int isInPlace=(sendbuf==MPI_IN_PLACE);
843  int myRank, myCommSize;
844  MPI_Comm_rank(comm, &myRank);
845  MPI_Comm_size(comm, &myCommSize);
846  if (!isInPlace && (*ourADTOOL_AMPI_FPCollection.isActiveType_fp)(sendtype)!=(*ourADTOOL_AMPI_FPCollection.isActiveType_fp)(recvtype)) {
847  rc=MPI_Abort(comm, MPI_ERR_ARG);
848  }
849  else {
850  if (!isInPlace && (*ourADTOOL_AMPI_FPCollection.isActiveType_fp)(sendtype)==AMPI_ACTIVE) rawSendBuf=(*ourADTOOL_AMPI_FPCollection.rawData_fp)(sendbuf,&sendcnt);
851  if (myRank==root) {
852  if ((*ourADTOOL_AMPI_FPCollection.isActiveType_fp)(recvtype)==AMPI_ACTIVE) rawRecvBuf=(*ourADTOOL_AMPI_FPCollection.rawData_fp)(recvbuf,&recvcnt);
853  }
854  rc=MPI_Gather(rawSendBuf,
855  sendcnt,
856  sendtype,
857  rawRecvBuf,
858  recvcnt,
859  recvtype,
860  root,
861  comm);
862  if (rc==MPI_SUCCESS && (*ourADTOOL_AMPI_FPCollection.isActiveType_fp)(recvtype)==AMPI_ACTIVE) {
863  if (myRank==root) (*ourADTOOL_AMPI_FPCollection.writeData_fp)(recvbuf,&recvcnt);
865  (*ourADTOOL_AMPI_FPCollection.pushGSinfo_fp)(((myRank==root)?myCommSize:0),
866  recvbuf,
867  recvcnt,
868  recvtype,
869  sendbuf,
870  sendcnt,
871  sendtype,
872  root,
873  comm);
874  }
875  }
876  return rc;
877 }
878 
879 int BW_AMPI_Gather(void *sendbuf,
880  int sendcnt,
881  MPI_Datatype sendtype,
882  void *recvbuf,
883  int recvcnt,
884  MPI_Datatype recvtype,
885  int root,
886  MPI_Comm comm) {
887  void *idx=NULL;
888  int rc=MPI_SUCCESS;
889  int commSizeForRootOrNull, rTypeSize,i;
891  (*ourADTOOL_AMPI_FPCollection.popGSinfo_fp)(commSizeForRootOrNull,
892  &recvbuf,
893  &recvcnt,
894  &recvtype,
895  &sendbuf,
896  &sendcnt,
897  &sendtype,
898  &root,
899  &comm);
900  (*ourADTOOL_AMPI_FPCollection.getAdjointCount_fp)(&sendcnt,sendtype);
901  void *tempBuf = 0;
902  if (sendcnt>0) tempBuf = (*ourADTOOL_AMPI_FPCollection.allocateTempBuf_fp)(sendcnt,sendtype,comm) ;
903  else {
904  if (commSizeForRootOrNull)
905  tempBuf=MPI_IN_PLACE;
906  else
907  tempBuf=0;
908  }
909  rc=MPI_Scatter(recvbuf,
910  recvcnt,
911  recvtype,
912  tempBuf,
913  sendcnt,
914  sendtype,
915  root,
916  comm);
918  sendtype,
919  comm,
920  sendbuf,
921  tempBuf, idx);
922  if (commSizeForRootOrNull) {
923  MPI_Type_size(recvtype,&rTypeSize);
924  for (i=0;i<commSizeForRootOrNull;++i) {
925  if (! (i==root && sendcnt==0)) { /* don't nullify the segment if "in place" on root */
926  void *recvbufSegment=(char*)recvbuf+(i*recvcnt*rTypeSize);
927  (*ourADTOOL_AMPI_FPCollection.nullifyAdjoint_fp)(recvcnt,recvtype,comm,
928  recvbufSegment);
929  }
930  }
931  }
932  if (tempBuf!=MPI_IN_PLACE && tempBuf!=0) (*ourADTOOL_AMPI_FPCollection.releaseAdjointTempBuf_fp)(tempBuf);
933  return rc;
934 }
935 
936 int TLM_AMPI_Gather(void *sendbuf,
937  int sendcnt,
938  MPI_Datatype sendtype,
939  void *recvbuf,
940  int recvcnt,
941  MPI_Datatype recvtype,
942  int root,
943  MPI_Comm comm) {
944  int rc;
945  rc = MPI_Gather(sendbuf,sendcnt,sendtype,recvbuf,recvcnt,recvtype,root,comm);
946  return rc;
947 }
948 
949 int FW_AMPI_Scatter(void *sendbuf,
950  int sendcnt,
951  MPI_Datatype sendtype,
952  void *recvbuf,
953  int recvcnt,
954  MPI_Datatype recvtype,
955  int root,
956  MPI_Comm comm) {
957  int rc=MPI_SUCCESS;
958  int myRank, myCommSize;
959  int isInPlace=(recvbuf==MPI_IN_PLACE);
960  void *rawSendBuf=sendbuf, *rawRecvBuf=recvbuf;
961  MPI_Comm_rank(comm, &myRank);
962  MPI_Comm_size(comm, &myCommSize);
963  if (!isInPlace && (*ourADTOOL_AMPI_FPCollection.isActiveType_fp)(sendtype)!=(*ourADTOOL_AMPI_FPCollection.isActiveType_fp)(recvtype)) {
964  rc=MPI_Abort(comm, MPI_ERR_ARG);
965  }
966  else {
967  if (myRank==root) {
968  if ((*ourADTOOL_AMPI_FPCollection.isActiveType_fp)(sendtype)==AMPI_ACTIVE) rawSendBuf=(*ourADTOOL_AMPI_FPCollection.rawData_fp)(sendbuf,&sendcnt);
969  }
970  if (!isInPlace && (*ourADTOOL_AMPI_FPCollection.isActiveType_fp)(recvtype)==AMPI_ACTIVE) rawRecvBuf=(*ourADTOOL_AMPI_FPCollection.rawData_fp)(recvbuf,&recvcnt);
971  rc=MPI_Scatter(rawSendBuf,
972  sendcnt,
973  sendtype,
974  rawRecvBuf,
975  recvcnt,
976  recvtype,
977  root,
978  comm);
979  if (rc==MPI_SUCCESS && (*ourADTOOL_AMPI_FPCollection.isActiveType_fp)(sendtype)==AMPI_ACTIVE) {
980  (*ourADTOOL_AMPI_FPCollection.writeData_fp)(recvbuf,&recvcnt);
982  (*ourADTOOL_AMPI_FPCollection.pushGSinfo_fp)(((myRank==root)?myCommSize:0),
983  sendbuf,
984  sendcnt,
985  sendtype,
986  recvbuf,
987  recvcnt,
988  recvtype,
989  root,
990  comm);
991  }
992  }
993  return rc;
994 }
995 
996 int BW_AMPI_Scatter(void *sendbuf,
997  int sendcnt,
998  MPI_Datatype sendtype,
999  void *recvbuf,
1000  int recvcnt,
1001  MPI_Datatype recvtype,
1002  int root,
1003  MPI_Comm comm) {
1004  int rc=MPI_SUCCESS;
1005  void *idx=NULL;
1006  int commSizeForRootOrNull,i,rTypeSize;
1008  (*ourADTOOL_AMPI_FPCollection.popGSinfo_fp)(commSizeForRootOrNull,
1009  &sendbuf,
1010  &sendcnt,
1011  &sendtype,
1012  &recvbuf,
1013  &recvcnt,
1014  &recvtype,
1015  &root,
1016  &comm);
1017  void *tempBuf = NULL;
1018  if (commSizeForRootOrNull>0) tempBuf=(*ourADTOOL_AMPI_FPCollection.allocateTempBuf_fp)(sendcnt*commSizeForRootOrNull,sendtype,comm);
1019  rc=MPI_Gather(recvbuf,
1020  recvcnt,
1021  recvtype,
1022  tempBuf,
1023  sendcnt,
1024  sendtype,
1025  root,
1026  comm);
1027  (*ourADTOOL_AMPI_FPCollection.nullifyAdjoint_fp)(recvcnt,recvtype,comm, recvbuf);
1028  if (commSizeForRootOrNull>0) MPI_Type_size(recvtype,&rTypeSize);
1029  for (i=0;i<commSizeForRootOrNull;++i) {
1030  if (! (i==root && recvcnt==0)) { /* don't increment the segment if "in place" on root */
1031  void *tempBufSeqment=(char*)tempBuf+i*sendcnt*rTypeSize;
1032  void *sendBufSegment=(char*)sendbuf+i*sendcnt*rTypeSize;
1034  sendtype,
1035  comm,
1036  sendBufSegment,
1037  tempBufSeqment, idx);
1038  }
1039  }
1040  if (commSizeForRootOrNull>0 && tempBuf)(*ourADTOOL_AMPI_FPCollection.releaseAdjointTempBuf_fp)(tempBuf);
1041  return rc;
1042 }
1043 
1044 int TLM_AMPI_Scatter(void *sendbuf,
1045  int sendcnt,
1046  MPI_Datatype sendtype,
1047  void *recvbuf,
1048  int recvcnt,
1049  MPI_Datatype recvtype,
1050  int root,
1051  MPI_Comm comm){
1052  int rc;
1053  rc = MPI_Scatter(sendbuf,sendcnt,sendtype,recvbuf,recvcnt,recvtype,root,comm);
1054  return rc;
1055 }
1056 
1057 int FW_AMPI_Allgather(void *sendbuf,
1058  int sendcount,
1059  MPI_Datatype sendtype,
1060  void *recvbuf,
1061  int recvcount,
1062  MPI_Datatype recvtype,
1063  MPI_Comm comm) {
1064  void *rawSendBuf=NULL, *rawRecvBuf=NULL;
1065  int rc=MPI_SUCCESS;
1066  int myRank, myCommSize;
1067  MPI_Comm_rank(comm, &myRank);
1068  MPI_Comm_size(comm, &myCommSize);
1070  rc=MPI_Abort(comm, MPI_ERR_ARG);
1071  }
1072  else {
1073  if ((*ourADTOOL_AMPI_FPCollection.isActiveType_fp)(sendtype)==AMPI_ACTIVE) rawSendBuf=(*ourADTOOL_AMPI_FPCollection.rawData_fp)(sendbuf,&sendcount);
1074  else rawSendBuf=sendbuf;
1075  if ((*ourADTOOL_AMPI_FPCollection.isActiveType_fp)(recvtype)==AMPI_ACTIVE) rawRecvBuf=(*ourADTOOL_AMPI_FPCollection.rawData_fp)(recvbuf,&recvcount);
1076  else rawRecvBuf=recvbuf;
1077  rc=MPI_Allgather(rawSendBuf,
1078  sendcount,
1079  sendtype,
1080  rawRecvBuf,
1081  recvcount,
1082  recvtype,
1083  comm);
1084  if (rc==MPI_SUCCESS && (*ourADTOOL_AMPI_FPCollection.isActiveType_fp)(recvtype)==AMPI_ACTIVE) {
1085  (*ourADTOOL_AMPI_FPCollection.writeData_fp)(recvbuf,&recvcount);
1088  recvbuf,
1089  recvcount,
1090  recvtype,
1091  sendbuf,
1092  sendcount,
1093  sendtype,
1094  0,
1095  comm);
1096  }
1097  }
1098  return rc;
1099 }
1100 
1101 int BW_AMPI_Allgather(void *sendbuf,
1102  int sendcount,
1103  MPI_Datatype sendtype,
1104  void *recvbuf,
1105  int recvcount,
1106  MPI_Datatype recvtype,
1107  MPI_Comm comm) {
1108  void *idx=NULL;
1109  int rc=MPI_SUCCESS, rootPlaceholder;
1110  int commSizeForRootOrNull, rTypeSize, *recvcounts,i;
1112  (*ourADTOOL_AMPI_FPCollection.popGSinfo_fp)(commSizeForRootOrNull,
1113  &recvbuf,
1114  &recvcount,
1115  &recvtype,
1116  &sendbuf,
1117  &sendcount,
1118  &sendtype,
1119  &rootPlaceholder,
1120  &comm);
1121  recvcounts=(int*)malloc(sizeof(int)*commSizeForRootOrNull);
1122  for (i=0;i<commSizeForRootOrNull;++i) recvcounts[i]=sendcount;
1123  void *tempBuf = (*ourADTOOL_AMPI_FPCollection.allocateTempBuf_fp)(sendcount,sendtype,comm);
1127  rc=MPI_Reduce_scatter(recvbuf,
1128  tempBuf,
1129  recvcounts,
1130  MPI_DOUBLE, /* <<< here is the offending bit */
1131  MPI_SUM,
1132  comm);
1134  sendtype,
1135  comm,
1136  sendbuf,
1137  tempBuf, idx);
1138  if (commSizeForRootOrNull) {
1139  MPI_Type_size(recvtype,&rTypeSize);
1140  (*ourADTOOL_AMPI_FPCollection.nullifyAdjoint_fp)(recvcount*commSizeForRootOrNull,
1141  recvtype,comm,recvbuf);
1142  }
1144  if (recvcounts) free((void*)recvcounts);
1145  return rc;
1146 }
1147 
1148 int TLM_AMPI_Allgather(void *sendbuf,
1149  int sendcount,
1150  MPI_Datatype sendtype,
1151  void *recvbuf,
1152  int recvcount,
1153  MPI_Datatype recvtype,
1154  MPI_Comm comm) {
1155  int rc;
1156  rc = MPI_Allgather(sendbuf,sendcount,sendtype,recvbuf,recvcount,recvtype,comm);
1157  return rc;
1158 }
1159 
1160 int FW_AMPI_Gatherv(void *sendbuf,
1161  int sendcnt,
1162  MPI_Datatype sendtype,
1163  void *recvbuf,
1164  int *recvcnts,
1165  int *displs,
1166  MPI_Datatype recvtype,
1167  int root,
1168  MPI_Comm comm) {
1169  void *rawSendBuf=sendbuf, *rawRecvBuf=recvbuf;
1170  int rc=MPI_SUCCESS;
1171  int isInPlace=(sendbuf==MPI_IN_PLACE);
1172  int myRank, myCommSize;
1173  MPI_Comm_rank(comm, &myRank);
1174  MPI_Comm_size(comm, &myCommSize);
1175  if (!isInPlace && (*ourADTOOL_AMPI_FPCollection.isActiveType_fp)(sendtype)!=(*ourADTOOL_AMPI_FPCollection.isActiveType_fp)(recvtype)) {
1176  rc=MPI_Abort(comm, MPI_ERR_ARG);
1177  }
1178  else {
1179  if (!isInPlace && (*ourADTOOL_AMPI_FPCollection.isActiveType_fp)(sendtype)==AMPI_ACTIVE) rawSendBuf=(*ourADTOOL_AMPI_FPCollection.rawData_fp)(sendbuf,&sendcnt);
1180  if (myRank==root) {
1181  if ((*ourADTOOL_AMPI_FPCollection.isActiveType_fp)(recvtype)==AMPI_ACTIVE) rawRecvBuf=(*ourADTOOL_AMPI_FPCollection.rawDataV_fp)(recvbuf, myCommSize, recvcnts, displs);
1182  }
1183  rc=MPI_Gatherv(rawSendBuf,
1184  sendcnt,
1185  sendtype,
1186  rawRecvBuf,
1187  recvcnts,
1188  displs,
1189  recvtype,
1190  root,
1191  comm);
1192  if (rc==MPI_SUCCESS && (*ourADTOOL_AMPI_FPCollection.isActiveType_fp)(recvtype)==AMPI_ACTIVE) {
1193  if (myRank==root) (*ourADTOOL_AMPI_FPCollection.writeDataV_fp)(recvbuf,recvcnts, displs);
1194  (*ourADTOOL_AMPI_FPCollection.push_CallCodeReserve_fp)(AMPI_GATHERV,((myRank==root)?myCommSize:0)*2);
1195  (*ourADTOOL_AMPI_FPCollection.pushGSVinfo_fp)(((myRank==root)?myCommSize:0),
1196  recvbuf,
1197  recvcnts,
1198  displs,
1199  recvtype,
1200  sendbuf,
1201  sendcnt,
1202  sendtype,
1203  root,
1204  comm);
1205  }
1206  }
1207  return rc;
1208 }
1209 
1210 int BW_AMPI_Gatherv(void *sendbuf,
1211  int sendcnt,
1212  MPI_Datatype sendtype,
1213  void *recvbuf,
1214  int *recvcnts,
1215  int *displs,
1216  MPI_Datatype recvtype,
1217  int root,
1218  MPI_Comm comm) {
1219  void *idx=NULL;
1220  int i;
1221  int rc=MPI_SUCCESS;
1222  int myRank, commSizeForRootOrNull, rTypeSize;
1223  int *tRecvCnts=recvcnts, *tDispls=displs;
1224  char tRecvCntsFlag=0, tDisplsFlag=0;
1226  if (tRecvCnts==NULL) {
1227  tRecvCnts=(int*)malloc(sizeof(int)*commSizeForRootOrNull);
1228  tRecvCntsFlag=1;
1229  }
1230  if (tDispls==NULL) {
1231  tDispls=(int*)malloc(sizeof(int)*commSizeForRootOrNull);
1232  tDisplsFlag=1;
1233  }
1234  (*ourADTOOL_AMPI_FPCollection.popGSVinfo_fp)(commSizeForRootOrNull,
1235  &recvbuf,
1236  tRecvCnts,
1237  tDispls,
1238  &recvtype,
1239  &sendbuf,
1240  &sendcnt,
1241  &sendtype,
1242  &root,
1243  &comm);
1244  MPI_Comm_rank(comm, &myRank);
1245  (*ourADTOOL_AMPI_FPCollection.getAdjointCount_fp)(&sendcnt,sendtype);
1246  void *tempBuf = 0;
1247  if (sendcnt>0) tempBuf = (*ourADTOOL_AMPI_FPCollection.allocateTempBuf_fp)(sendcnt,sendtype,comm) ;
1248  else {
1249  if (commSizeForRootOrNull)
1250  tempBuf=MPI_IN_PLACE;
1251  else
1252  tempBuf=0;
1253  }
1254  rc=MPI_Scatterv(recvbuf,
1255  tRecvCnts,
1256  tDispls,
1257  recvtype,
1258  tempBuf,
1259  sendcnt,
1260  sendtype,
1261  root,
1262  comm);
1264  sendtype,
1265  comm,
1266  sendbuf,
1267  tempBuf, idx);
1268  if (commSizeForRootOrNull) {
1269  MPI_Type_size(recvtype,&rTypeSize);
1270  for (i=0;i<commSizeForRootOrNull;++i) {
1271  if (! (i==root && sendcnt==0)) { /* don't nullify the segment if "in place" on root */
1272  void* recvbufSegment=(char*)recvbuf+(rTypeSize*tDispls[i]); /* <---------- very iffy! */
1273  (*ourADTOOL_AMPI_FPCollection.nullifyAdjoint_fp)(tRecvCnts[i],recvtype,comm,
1274  recvbufSegment);
1275  }
1276  }
1277  }
1278  if (tempBuf!=MPI_IN_PLACE && tempBuf!=0) (*ourADTOOL_AMPI_FPCollection.releaseAdjointTempBuf_fp)(tempBuf);
1279  if (tRecvCntsFlag) free((void*)(tRecvCnts));
1280  if (tDisplsFlag) free((void*)(tDispls));
1281  return rc;
1282 }
1283 
1284 int TLM_AMPI_Gatherv(void *sendbuf,
1285  int sendcnt,
1286  MPI_Datatype sendtype,
1287  void *recvbuf,
1288  int *recvcnts,
1289  int *displs,
1290  MPI_Datatype recvtype,
1291  int root,
1292  MPI_Comm comm) {
1293  int rc;
1294  rc = MPI_Gatherv(sendbuf,sendcnt,sendtype,recvbuf,recvcnts,displs,recvtype,root,comm);
1295  return rc;
1296 }
1297 
1298 int FW_AMPI_Scatterv(void *sendbuf,
1299  int *sendcnts,
1300  int *displs,
1301  MPI_Datatype sendtype,
1302  void *recvbuf,
1303  int recvcnt,
1304  MPI_Datatype recvtype,
1305  int root,
1306  MPI_Comm comm) {
1307  int rc=MPI_SUCCESS;
1308  int myRank, myCommSize;
1309  int isInPlace=(recvbuf==MPI_IN_PLACE);
1310  void *rawSendBuf=sendbuf, *rawRecvBuf=recvbuf;
1311  MPI_Comm_rank(comm, &myRank);
1312  MPI_Comm_size(comm, &myCommSize);
1313  if (!isInPlace && (*ourADTOOL_AMPI_FPCollection.isActiveType_fp)(sendtype)!=(*ourADTOOL_AMPI_FPCollection.isActiveType_fp)(recvtype)) {
1314  rc=MPI_Abort(comm, MPI_ERR_ARG);
1315  }
1316  else {
1317  if (myRank==root) {
1318  if ((*ourADTOOL_AMPI_FPCollection.isActiveType_fp)(sendtype)==AMPI_ACTIVE) rawSendBuf=(*ourADTOOL_AMPI_FPCollection.rawDataV_fp)(sendbuf,myCommSize,sendcnts,displs);
1319  }
1320  if (!isInPlace && (*ourADTOOL_AMPI_FPCollection.isActiveType_fp)(recvtype)==AMPI_ACTIVE) rawRecvBuf=(*ourADTOOL_AMPI_FPCollection.rawData_fp)(recvbuf,&recvcnt);
1321  rc=MPI_Scatterv(rawSendBuf,
1322  sendcnts,
1323  displs,
1324  sendtype,
1325  rawRecvBuf,
1326  recvcnt,
1327  recvtype,
1328  root,
1329  comm);
1330  if (rc==MPI_SUCCESS && (*ourADTOOL_AMPI_FPCollection.isActiveType_fp)(sendtype)==AMPI_ACTIVE) {
1331  (*ourADTOOL_AMPI_FPCollection.writeData_fp)(recvbuf,&recvcnt);
1332  (*ourADTOOL_AMPI_FPCollection.push_CallCodeReserve_fp)(AMPI_SCATTERV,((myRank==root)?myCommSize:0)*2);
1333  (*ourADTOOL_AMPI_FPCollection.pushGSVinfo_fp)(((myRank==root)?myCommSize:0),
1334  sendbuf,
1335  sendcnts,
1336  displs,
1337  sendtype,
1338  recvbuf,
1339  recvcnt,
1340  recvtype,
1341  root,
1342  comm);
1343  }
1344  }
1345  return rc;
1346 }
1347 
1348 int BW_AMPI_Scatterv(void *sendbuf,
1349  int *sendcnts,
1350  int *displs,
1351  MPI_Datatype sendtype,
1352  void *recvbuf,
1353  int recvcnt,
1354  MPI_Datatype recvtype,
1355  int root,
1356  MPI_Comm comm) {
1357  int rc=MPI_SUCCESS;
1358  void *idx=NULL;
1359  int sendSize=0,i, typeSize;
1360  int myRank, commSizeForRootOrNull, *tempDispls;
1361  int *tSendCnts=sendcnts, *tDispls=displs;
1362  char tSendCntsFlag=0, tDisplsFlag=0;
1364  if (tSendCnts==NULL && commSizeForRootOrNull>0) {
1365  tSendCnts=(int*)malloc(sizeof(int)*commSizeForRootOrNull);
1366  tSendCntsFlag=1;
1367  }
1368  if (tDispls==NULL && commSizeForRootOrNull>0) {
1369  tDispls=(int*)malloc(sizeof(int)*commSizeForRootOrNull);
1370  tDisplsFlag=1;
1371  }
1372  (*ourADTOOL_AMPI_FPCollection.popGSVinfo_fp)(commSizeForRootOrNull,
1373  &sendbuf,
1374  tSendCnts,
1375  tDispls,
1376  &sendtype,
1377  &recvbuf,
1378  &recvcnt,
1379  &recvtype,
1380  &root,
1381  &comm);
1382  MPI_Comm_rank(comm, &myRank);
1383  tempDispls=(int*)malloc(sizeof(int)*commSizeForRootOrNull);
1384  for (i=0;i<commSizeForRootOrNull;++i) {
1385  tempDispls[i]=sendSize;
1386  sendSize+=tSendCnts[i];
1387  }
1388  void *tempBuf = NULL;
1389  if (commSizeForRootOrNull>0) tempBuf=(*ourADTOOL_AMPI_FPCollection.allocateTempBuf_fp)(sendSize,sendtype,comm);
1390  rc=MPI_Gatherv(recvbuf,
1391  recvcnt,
1392  recvtype,
1393  tempBuf,
1394  tSendCnts,
1395  tempDispls,
1396  sendtype,
1397  root,
1398  comm);
1399  (*ourADTOOL_AMPI_FPCollection.nullifyAdjoint_fp)(recvcnt,recvtype,comm,recvbuf);
1400  if (commSizeForRootOrNull>0) {
1401  MPI_Type_size(sendtype,&typeSize);
1402  for (i=0;i<commSizeForRootOrNull;++i) {
1403  if (! (i==root && recvcnt==0)) { /* don't increment the segment if "in place" on root */
1404  void* buf=(char*)sendbuf+(typeSize*tDispls[i]); /* <---------- very iffy! */
1405  void* sourceBuf=(char*)tempBuf+(typeSize*tempDispls[i]);
1407  sendtype,
1408  comm,
1409  buf,
1410  sourceBuf, idx);
1411  }
1412  }
1414  }
1415  if (tempDispls) free((void*)tempDispls);
1416  if (tSendCntsFlag) free((void*)(tSendCnts));
1417  if (tDisplsFlag) free((void*)(tDispls));
1418  return rc;
1419 }
1420 
1421 int TLM_AMPI_Scatterv(void *sendbuf,
1422  int *sendcnts,
1423  int *displs,
1424  MPI_Datatype sendtype,
1425  void *recvbuf,
1426  int recvcnt,
1427  MPI_Datatype recvtype,
1428  int root, MPI_Comm comm){
1429  int rc;
1430  rc = MPI_Scatterv(sendbuf,sendcnts,displs,sendtype,recvbuf,recvcnt,recvtype,root,comm);
1431  return rc;
1432 }
1433 
1434 int FW_AMPI_Allgatherv(void *sendbuf,
1435  int sendcnt,
1436  MPI_Datatype sendtype,
1437  void *recvbuf,
1438  int *recvcnts,
1439  int *displs,
1440  MPI_Datatype recvtype,
1441  MPI_Comm comm) {
1442  void *rawSendBuf=NULL, *rawRecvBuf=NULL;
1443  int rc=MPI_SUCCESS;
1444  int myRank, myCommSize;
1445  MPI_Comm_rank(comm, &myRank);
1446  MPI_Comm_size(comm, &myCommSize);
1448  rc=MPI_Abort(comm, MPI_ERR_ARG);
1449  }
1450  else {
1451  if ((*ourADTOOL_AMPI_FPCollection.isActiveType_fp)(sendtype)==AMPI_ACTIVE) rawSendBuf=(*ourADTOOL_AMPI_FPCollection.rawData_fp)(sendbuf,&sendcnt);
1452  else rawSendBuf=sendbuf;
1453  if ((*ourADTOOL_AMPI_FPCollection.isActiveType_fp)(recvtype)==AMPI_ACTIVE) rawRecvBuf=(*ourADTOOL_AMPI_FPCollection.rawDataV_fp)(recvbuf, myCommSize, recvcnts, displs);
1454  else rawRecvBuf=recvbuf;
1455  rc=MPI_Allgatherv(rawSendBuf,
1456  sendcnt,
1457  sendtype,
1458  rawRecvBuf,
1459  recvcnts,
1460  displs,
1461  recvtype,
1462  comm);
1463  if (rc==MPI_SUCCESS && (*ourADTOOL_AMPI_FPCollection.isActiveType_fp)(recvtype)==AMPI_ACTIVE) {
1464  (*ourADTOOL_AMPI_FPCollection.writeDataV_fp)(recvbuf,recvcnts, displs);
1467  recvbuf,
1468  recvcnts,
1469  displs,
1470  recvtype,
1471  sendbuf,
1472  sendcnt,
1473  sendtype,
1474  0,
1475  comm);
1476  }
1477  }
1478  return rc;
1479 }
1480 
1481 int BW_AMPI_Allgatherv(void *sendbuf,
1482  int sendcnt,
1483  MPI_Datatype sendtype,
1484  void *recvbuf,
1485  int *recvcnts,
1486  int *displs,
1487  MPI_Datatype recvtype,
1488  MPI_Comm comm) {
1489  void *idx=NULL;
1490  int i;
1491  int rc=MPI_SUCCESS;
1492  int myRank, commSizeForRootOrNull, rTypeSize,rootPlaceholder;
1493  int *tRecvCnts=recvcnts, *tDispls=displs;
1494  char tRecvCntsFlag=0, tDisplsFlag=0;
1496  if (tRecvCnts==NULL) {
1497  tRecvCnts=(int*)malloc(sizeof(int)*commSizeForRootOrNull);
1498  tRecvCntsFlag=1;
1499  }
1500  if (tDispls==NULL) {
1501  tDispls=(int*)malloc(sizeof(int)*commSizeForRootOrNull);
1502  tDisplsFlag=1;
1503  }
1504  (*ourADTOOL_AMPI_FPCollection.popGSVinfo_fp)(commSizeForRootOrNull,
1505  &recvbuf,
1506  tRecvCnts,
1507  tDispls,
1508  &recvtype,
1509  &sendbuf,
1510  &sendcnt,
1511  &sendtype,
1512  &rootPlaceholder,
1513  &comm);
1514  MPI_Comm_rank(comm, &myRank);
1515  void *tempBuf = (*ourADTOOL_AMPI_FPCollection.allocateTempBuf_fp)(tRecvCnts[myRank],sendtype,comm) ;
1519  rc=MPI_Reduce_scatter(recvbuf,
1520  tempBuf,
1521  tRecvCnts,
1522  MPI_DOUBLE, /* <<< here is the offending bit */
1523  MPI_SUM,
1524  comm);
1526  sendtype,
1527  comm,
1528  sendbuf,
1529  tempBuf, idx);
1530  MPI_Type_size(recvtype,&rTypeSize);
1531  for (i=0;i<commSizeForRootOrNull;++i) {
1532  void* buf=(char*)recvbuf+(rTypeSize*tDispls[i]); /* <---------- very iffy! */
1533  (*ourADTOOL_AMPI_FPCollection.nullifyAdjoint_fp)(tRecvCnts[i],recvtype,comm,buf);
1534  }
1536  if (tRecvCntsFlag) free((void*)(tRecvCnts));
1537  if (tDisplsFlag) free((void*)(tDispls));
1538  return rc;
1539 }
1540 
1541 int TLM_AMPI_Allgatherv(void *sendbuf,
1542  int sendcnt,
1543  MPI_Datatype sendtype,
1544  void *recvbuf,
1545  int *recvcnts,
1546  int *displs,
1547  MPI_Datatype recvtype,
1548  MPI_Comm comm) {
1549  int rc;
1550  rc = MPI_Allgatherv(sendbuf,sendcnt,sendtype,recvbuf,recvcnts,displs,recvtype,comm);
1551  return rc;
1552 }
1553 
1554 int FW_AMPI_Bcast (void* buf,
1555  int count,
1556  MPI_Datatype datatype,
1557  int root,
1558  MPI_Comm comm) {
1559  int rc=0;
1560  double* mappedbuf=NULL;
1561  int dt_idx = derivedTypeIdx(datatype);
1562  int is_derived = isDerivedType(dt_idx);
1564  mappedbuf=(*ourADTOOL_AMPI_FPCollection.rawData_fp)(buf,&count);
1565  }
1566  else if(is_derived) {
1567  mappedbuf=(*ourADTOOL_AMPI_FPCollection.allocateTempBuf_fp)(count,datatype,comm);
1568  (*ourADTOOL_AMPI_FPCollection.packDType_fp)(buf,mappedbuf,count,dt_idx);
1569  }
1570  else {
1571  mappedbuf=buf;
1572  }
1573  rc=MPI_Bcast(mappedbuf,
1574  count,
1576  root,
1577  comm);
1578  if (rc==MPI_SUCCESS && ((*ourADTOOL_AMPI_FPCollection.isActiveType_fp)(datatype)==AMPI_ACTIVE || is_derived )) {
1579  if (is_derived) {
1580  (*ourADTOOL_AMPI_FPCollection.unpackDType_fp)(mappedbuf,buf,count,dt_idx);
1582  }
1585  count,
1586  datatype,
1587  root,
1588  comm);
1589  }
1590  return rc;
1591 }
1592 
1593 int BW_AMPI_Bcast (void* buf,
1594  int count,
1595  MPI_Datatype datatype,
1596  int root,
1597  MPI_Comm comm) {
1598  int rc,rank;
1599  void *idx=NULL;
1601  &count,
1602  &datatype,
1603  &root,
1604  &comm,
1605  &idx);
1606  MPI_Comm_rank(comm,&rank);
1607  MPI_Datatype mappedtype = (*ourADTOOL_AMPI_FPCollection.BW_rawType_fp)(datatype);
1608  (*ourADTOOL_AMPI_FPCollection.getAdjointCount_fp)(&count,datatype);
1609  void *tempBuf = (*ourADTOOL_AMPI_FPCollection.allocateTempBuf_fp)(count,datatype,comm);
1610  rc=MPI_Reduce(buf,
1611  tempBuf,
1612  count,
1613  mappedtype,
1614  MPI_SUM,
1615  root,
1616  comm);
1617  (*ourADTOOL_AMPI_FPCollection.nullifyAdjoint_fp)(count, mappedtype, comm, buf);
1618  if (rank==root) {
1619  (*ourADTOOL_AMPI_FPCollection.incrementAdjoint_fp)(count, mappedtype, comm,
1620  buf, tempBuf, idx);
1621  }
1623  return rc;
1624 }
1625 
1627  int count,
1628  MPI_Datatype datatype,
1629  int root,
1630  MPI_Comm comm){
1631  int rc=MPI_Bcast(buf,
1632  count,
1633  datatype,
1634  root,
1635  comm);
1636  return rc;
1637 }
1638 
1644 int PEDESTRIAN_AMPI_Reduce(void* sbuf, void* sbufd, void* sbufb,
1645  void* rbuf, void* rbufd, void* rbufb,
1646  int count,
1647  MPI_Datatype datatype, MPI_Datatype datatyped, MPI_Datatype datatypeb,
1648  MPI_Op op, TLM_userFunctionF* uopd, ADJ_userFunctionF* uopb,
1649  int split_mode,
1650  int root,
1651  MPI_Comm comm) {
1652  if (count == 0) return MPI_SUCCESS;
1653  int rc, rank ;
1654  MPI_Comm_rank(comm,&rank) ;
1655  void *idx=NULL; /* only for compatibility in incrementAdjoint_fp(..., idx) */
1656  int reduceTgt = (sbufd!=NULL) ;
1657  int reduceAdj = (sbufb!=NULL) ;
1658  MPI_Comm shadowcomm = comm ;
1659  if (reduceTgt)
1660  shadowcomm = (*ourADTOOL_AMPI_FPCollection.getShadowComm_fp)(comm) ;
1661 
1662  if (uopd || uopb || op!=MPI_SUM) {
1663 
1664  int uop_idx = userDefinedOpIdx(op);
1665  userDefinedOpData* uopdata = (isUserDefinedOp(uop_idx)?getUOpData():NULL) ;
1666  int is_commutative = (uopdata?uopdata->commutes[uop_idx]:1) ;
1667  if (uopdata) {
1668  if (reduceTgt) assert(uopd) ;
1669  if (reduceAdj) assert(uopb) ;
1670  }
1671 
1672  void *exch_buf=NULL ;
1673  int switched = 0 ;
1674 
1675  void *obuf=NULL, *obufd=NULL ;
1676 
1677  int dt_idx = derivedTypeIdx(datatype);
1678  MPI_Aint lb = (isDerivedType(dt_idx)?getDTypeData()->lbs[dt_idx]:0) ;
1679  int dt_idxd = 0 ;
1680  MPI_Aint lbd = 0 ;
1681  obuf =
1682  (*ourADTOOL_AMPI_FPCollection.allocateTempActiveBuf_fp)(count,datatype,comm);
1683  obuf = (void*)((char*)obuf - lb);
1684  if (reduceTgt) {
1685  dt_idxd = derivedTypeIdx(datatyped);
1686  lbd = (isDerivedType(dt_idxd)?getDTypeData()->lbs[dt_idxd]:0) ;
1687  obufd =
1688  (*ourADTOOL_AMPI_FPCollection.allocateTempActiveBuf_fp)(count,datatyped,shadowcomm);
1689  obufd = (void*)((char*)obufd - lbd);
1690  }
1691 
1692  if (sbuf==MPI_IN_PLACE) {
1693  if (rank != root) {
1694  exch_buf = rbuf ;
1695  rbuf = (*ourADTOOL_AMPI_FPCollection.allocateTempActiveBuf_fp)(count,datatype,comm);
1696  rbuf = (void*)((char*)rbuf - lb);
1697  (*ourADTOOL_AMPI_FPCollection.copyActiveBuf_fp)(exch_buf, rbuf, count, datatype, comm);
1698  if (reduceTgt) {
1699  exch_buf = rbufd ;
1700  rbufd = (*ourADTOOL_AMPI_FPCollection.allocateTempActiveBuf_fp)(count,datatyped,shadowcomm);
1701  rbufd = (void*)((char*)rbufd - lbd);
1702  (*ourADTOOL_AMPI_FPCollection.copyActiveBuf_fp)(exch_buf, rbufd, count, datatyped, shadowcomm);
1703  }
1704  }
1705  } else { /* Standard case: sbuf != MPI_IN_PLACE */
1706  if (rank != root) {
1707  rbuf = (*ourADTOOL_AMPI_FPCollection.allocateTempActiveBuf_fp)(count,datatype,comm);
1708  rbuf = (void*)((char*)rbuf - lb);
1709  if (reduceTgt) {
1710  rbufd = (*ourADTOOL_AMPI_FPCollection.allocateTempActiveBuf_fp)(count,datatyped,shadowcomm);
1711  rbufd = (void*)((char*)rbufd - lbd);
1712  }
1713  }
1714  (*ourADTOOL_AMPI_FPCollection.copyActiveBuf_fp)(sbuf, rbuf, count, datatype, comm);
1715  if (reduceTgt)
1716  (*ourADTOOL_AMPI_FPCollection.copyActiveBuf_fp)(sbufd, rbufd, count, datatyped, shadowcomm);
1717  }
1718 
1719  MPI_Status status;
1720  int comm_size ;
1721  MPI_Comm_size(comm,&comm_size);
1722  int other, action ;
1723  int maskup = 0xffffffff ;
1724  int mask = 0x1;
1725 
1726  if (!reduceAdj || split_mode==0) {
1727  while (mask < comm_size) {
1728  if ((rank&mask) == 0) { /* Typical action is RECV */
1729  other = (rank==root?root&maskup:rank) | mask ;
1730  if (other >= comm_size)
1731  action = 0/*NOACTION*/ ;
1732  else if ((other&maskup) == (root&maskup)) {
1733  other = root ;
1734  action = 1/*SEND*/ ;
1735  } else {
1736  action = -1/*RECV*/ ;
1737  }
1738  } else { /* mask&rank == 1: Typical action is SEND */
1739  other = (rank==root?root&maskup:rank) & ~mask ;
1740  if ((other&maskup) == (root&maskup)) other = root ;
1741  if (rank==root)
1742  action = -1/*RECV*/ ;
1743  else
1744  action = 1/*SEND*/ ;
1745  }
1746  maskup = maskup & ~mask ;
1747  mask<<=1;
1748 
1749  if (action==1/*SEND*/) {
1750  /* TODO Not sure this "(..., 11, comm)" is correct. Would better use shadowcomm ? */
1751  rc = MPI_Send(rbuf, count, datatype, other, 11, comm) ;
1752  assert(rc==MPI_SUCCESS);
1753  if (reduceTgt) {
1754  rc = MPI_Send(rbufd, count, datatyped, other, 11, shadowcomm) ;
1755  assert(rc==MPI_SUCCESS);
1756  }
1757  break;
1758  } else if (action==-1/*RECV*/) {
1759  rc = MPI_Recv(obuf, count, datatype, other, 11, comm, &status);
1760  assert(rc==MPI_SUCCESS);
1761  if (reduceTgt) {
1762  rc = MPI_Recv(obufd, count, datatyped, other, 11, shadowcomm, &status);
1763  assert(rc==MPI_SUCCESS);
1764  }
1765  if (is_commutative || (other<rank)) {
1766  /* Save obuf and rbuf for future use in the adjoint sweep */
1767  (*ourADTOOL_AMPI_FPCollection.pushBuffer_fp)(count,datatype,comm,obuf) ;
1768  if (split_mode!=2) {
1769  (*ourADTOOL_AMPI_FPCollection.pushBuffer_fp)(count,datatype,comm,rbuf) ;
1770  }
1771  if (isUserDefinedOp(uop_idx)) {
1772  if (reduceTgt)
1773  (*uopd)(obuf, obufd, rbuf, rbufd, &count, &datatype, &datatyped);
1774  else
1775  (*(uopdata->functions[uop_idx]))(obuf, rbuf, &count, &datatype) ;
1776  } else {
1777  if (op==MPI_PROD) {
1779  (count, datatype, comm, obuf, obufd, rbuf, rbufd) ;
1780  } else if (op==MPI_MIN) {
1782  (count, datatype, comm, obuf, obufd, rbuf, rbufd) ;
1783  } else if (op==MPI_MAX) {
1785  (count, datatype, comm, obuf, obufd, rbuf, rbufd) ;
1786  } else {
1787  printf(__FILE__ ": tangent AMPI reduction not yet implemented for std op==%i\n",uop_idx) ;
1788  }
1789  }
1790  } else {
1791  /* Save obuf and rbuf for future use in the adjoint sweep */
1792  (*ourADTOOL_AMPI_FPCollection.pushBuffer_fp)(count,datatype,comm,rbuf) ;
1793  if (split_mode!=2) {
1794  (*ourADTOOL_AMPI_FPCollection.pushBuffer_fp)(count,datatype,comm,obuf) ;
1795  }
1796  if (reduceTgt)
1797  (*uopd)(rbuf, rbufd, obuf, obufd, &count, &datatype, &datatyped);
1798  else
1799  (*(uopdata->functions[uop_idx]))(rbuf, obuf, &count, &datatype) ;
1800  exch_buf = obuf ; obuf = rbuf ; rbuf = exch_buf ;
1801  if (reduceTgt) {
1802  exch_buf = obufd ; obufd = rbufd ; rbufd = exch_buf ;
1803  }
1804  switched = ~switched ;
1805  }
1806  }
1807  }
1808 
1809  if (switched) {
1810  if (!reduceAdj) { /* Adjoint joint mode does not need to return a correct rbuf */
1811  exch_buf = obuf ; obuf = rbuf ; rbuf = exch_buf ;
1812  if (rank==root)
1813  (*ourADTOOL_AMPI_FPCollection.copyActiveBuf_fp)(obuf, rbuf, count, datatype, comm) ;
1814  }
1815  if (reduceTgt) {
1816  exch_buf = obufd ; obufd = rbufd ; rbufd = exch_buf ;
1817  if (rank==root)
1818  (*ourADTOOL_AMPI_FPCollection.copyActiveBuf_fp)(obufd, rbufd, count, datatyped, shadowcomm) ;
1819  }
1820  }
1821 
1822  }
1823 
1824  if (split_mode!=0) {
1825  if (!reduceAdj) {
1826  (*ourADTOOL_AMPI_FPCollection.pushBuffer_fp)(1,MPI_INT,comm,&switched) ;
1827  (*ourADTOOL_AMPI_FPCollection.pushBuffer_fp)(1,MPI_INT,comm,&maskup) ;
1828  (*ourADTOOL_AMPI_FPCollection.pushBuffer_fp)(1,MPI_INT,comm,&mask) ;
1829  } else {
1830  (*ourADTOOL_AMPI_FPCollection.popBuffer_fp)(1,MPI_INT,comm,&mask) ;
1831  (*ourADTOOL_AMPI_FPCollection.popBuffer_fp)(1,MPI_INT,comm,&maskup) ;
1832  (*ourADTOOL_AMPI_FPCollection.popBuffer_fp)(1,MPI_INT,comm,&switched) ;
1833  }
1834  }
1835 
1836  if (reduceAdj) {
1837  void *rbufb_initial=NULL ;
1838  int dt_idxb = derivedTypeIdx(datatypeb);
1839  MPI_Aint lbb = (isDerivedType(dt_idxb)?getDTypeData()->lbs[dt_idxb]:0) ;
1840  void *obufb =
1841  (*ourADTOOL_AMPI_FPCollection.allocateTempActiveBuf_fp)(count,datatypeb,comm);
1842  obufb = (void*)((char*)obufb - lbb);
1843  (*ourADTOOL_AMPI_FPCollection.nullifyAdjoint_fp)(count,datatypeb,comm,obufb);
1844  if (rank != root) {
1845  rbufb_initial = rbufb ;
1846  rbufb = (*ourADTOOL_AMPI_FPCollection.allocateTempActiveBuf_fp)(count,datatypeb,comm);
1847  rbufb = (void*)((char*)rbufb - lbb);
1848  (*ourADTOOL_AMPI_FPCollection.nullifyAdjoint_fp)(count,datatypeb,comm,rbufb);
1849  }
1850  if (switched && rank==root) {
1851  (*ourADTOOL_AMPI_FPCollection.copyActiveBuf_fp)(rbufb, obufb, count, datatypeb, comm) ;
1852  (*ourADTOOL_AMPI_FPCollection.nullifyAdjoint_fp)(count,datatypeb,comm,rbufb);
1853  exch_buf = obufb ; obufb = rbufb ; rbufb = exch_buf ;
1854  }
1855  while (mask!=0x1) {
1856  mask>>=1 ;
1857  maskup = maskup | mask ;
1858  if ((rank&mask) == 0) { /* Typical action fw is RECV */
1859  other = (rank==root?root&maskup:rank) | mask ;
1860  if (other >= comm_size)
1861  action = 0/*NOACTION*/ ;
1862  else if ((other&maskup) == (root&maskup)) {
1863  other = root ;
1864  action = 1/* fw SEND*/ ;
1865  } else {
1866  action = -1/* fw RECV*/ ;
1867  }
1868  } else { /* mask&rank == 1: Typical action is SEND */
1869  other = (rank==root?root&maskup:rank) & ~mask ;
1870  if ((other&maskup) == (root&maskup)) other = root ;
1871  if (rank==root)
1872  action = -1/* fw RECV*/ ;
1873  else
1874  action = 1/* fw SEND*/ ;
1875  }
1876 
1877  if (action==1/* fw SEND*/) {
1878  rc = MPI_Recv(obufb, count, datatypeb, other, 11, comm, &status);
1879  assert(rc==MPI_SUCCESS);
1880  (*ourADTOOL_AMPI_FPCollection.incrementAdjoint_fp)(count,datatypeb,comm,rbufb,obufb, idx) ;
1881  } else if (action==-1/* fw RECV*/) {
1882  if (is_commutative || (other<rank)) {
1883  /* Retrieve obuf and rbuf for the adjoint call */
1884  if (split_mode!=2) {
1885  (*ourADTOOL_AMPI_FPCollection.popBuffer_fp)(count,datatype,comm,rbuf) ;
1886  }
1887  (*ourADTOOL_AMPI_FPCollection.popBuffer_fp)(count,datatype,comm,obuf) ;
1888  (*ourADTOOL_AMPI_FPCollection.nullifyAdjoint_fp)(count,datatypeb,comm,obufb);
1889  if (isUserDefinedOp(uop_idx)) {
1890  (*uopb)(obuf, obufb, rbuf, rbufb, &count, &datatype, &datatypeb) ;
1891  } else {
1892  if (op==MPI_PROD) {
1894  (count, datatype, comm, obuf, obufb, rbuf, rbufb) ;
1895  } else if (op==MPI_MIN) {
1897  (count, datatype, comm, obuf, obufb, rbuf, rbufb) ;
1898  } else if (op==MPI_MAX) {
1900  (count, datatype, comm, obuf, obufb, rbuf, rbufb) ;
1901  } else {
1902  printf(__FILE__ ": adjoint AMPI reduction not yet implemented for std op==%i\n",uop_idx) ;
1903  }
1904  }
1905  } else {
1906  exch_buf = obuf ; obuf = rbuf ; rbuf = exch_buf ;
1907  exch_buf = obufb ; obufb = rbufb ; rbufb = exch_buf ;
1908  /* Retrieve obuf and rbuf for the adjoint call */
1909  if (split_mode!=2) {
1910  (*ourADTOOL_AMPI_FPCollection.popBuffer_fp)(count,datatype,comm,obuf) ;
1911  }
1912  (*ourADTOOL_AMPI_FPCollection.popBuffer_fp)(count,datatype,comm,rbuf) ;
1913  (*uopb)(rbuf, rbufb, obuf, obufb, &count, &datatype, &datatypeb) ;
1914  }
1915  rc = MPI_Send(obufb, count, datatypeb, other, 11, comm);
1916  assert(rc==MPI_SUCCESS);
1917  }
1918  (*ourADTOOL_AMPI_FPCollection.nullifyAdjoint_fp)(count,datatypeb,comm,obufb);
1919  }
1920  if (sbuf==MPI_IN_PLACE) {
1921  if (rank != root) {
1922  (*ourADTOOL_AMPI_FPCollection.incrementAdjoint_fp)(count,datatypeb,comm,rbufb_initial,rbufb, idx) ;
1923  }
1924  } else {
1925  (*ourADTOOL_AMPI_FPCollection.incrementAdjoint_fp)(count,datatypeb,comm,sbufb,rbufb, idx) ;
1926  (*ourADTOOL_AMPI_FPCollection.nullifyAdjoint_fp)(count,datatypeb,comm,rbufb);
1927  }
1928  if (rank!=root) (*ourADTOOL_AMPI_FPCollection.releaseTempActiveBuf_fp)(rbufb,count,datatypeb);
1929  (*ourADTOOL_AMPI_FPCollection.releaseTempActiveBuf_fp)(obufb,count,datatypeb);
1930  }
1931 
1932  (*ourADTOOL_AMPI_FPCollection.releaseTempActiveBuf_fp)(obuf,count,datatype);
1933  if (reduceTgt) (*ourADTOOL_AMPI_FPCollection.releaseTempActiveBuf_fp)(obufd,count,datatyped);
1934  if (rank!=root) {
1935  (*ourADTOOL_AMPI_FPCollection.releaseTempActiveBuf_fp)(rbuf,count,datatype);
1936  if (reduceTgt) (*ourADTOOL_AMPI_FPCollection.releaseTempActiveBuf_fp)(rbufd,count,datatyped);
1937  }
1938  return MPI_SUCCESS;
1939  } else { /* i.e. op==MPI_SUM and no user-given derivative */
1940  if (!reduceAdj) {
1941  rc=MPI_Reduce(sbuf,
1942  rbuf,
1943  count,
1944  datatype,
1945  op,
1946  root,
1947  comm);
1948  assert(rc==MPI_SUCCESS);
1949  }
1950  if (reduceTgt)
1951  rc=MPI_Reduce(sbufd,
1952  rbufd,
1953  count,
1954  datatyped,
1955  op,
1956  root,
1957  shadowcomm);
1958  if (reduceAdj) {
1959  int dt_idxb = derivedTypeIdx(datatypeb);
1960  MPI_Aint lbb = (isDerivedType(dt_idxb)?getDTypeData()->lbs[dt_idxb]:0) ;
1961  void *tmp_bufb =
1962  (*ourADTOOL_AMPI_FPCollection.allocateTempActiveBuf_fp)(count,datatypeb,comm);
1963  tmp_bufb = (void*)((char*)tmp_bufb - lbb);
1964  if (rank==root)
1966  count, datatypeb, comm);
1967  rc=MPI_Bcast(tmp_bufb, count, datatypeb, root, comm) ;
1968  assert(rc==MPI_SUCCESS) ;
1969  (*ourADTOOL_AMPI_FPCollection.incrementAdjoint_fp)(count,datatypeb,comm,sbufb,tmp_bufb, idx) ;
1970  (*ourADTOOL_AMPI_FPCollection.releaseTempActiveBuf_fp)(tmp_bufb,count,datatypeb);
1971  rc = MPI_SUCCESS ;
1972  }
1973  return rc;
1974  }
1975 }
1976 
1977 int FWB_AMPI_Reduce (void* sbuf,
1978  void* rbuf,
1979  int count,
1980  MPI_Datatype datatype,
1981  MPI_Op op,
1982  int root,
1983  MPI_Comm comm) {
1984  int rc,rank;
1985  MPI_Comm_rank(comm,&rank);
1986  int uop_idx = userDefinedOpIdx(op);
1987  if (isUserDefinedOp(uop_idx)) {
1988  int comm_size, is_commutative;
1989  int mask, relrank, source, lroot;
1990  int dt_idx = derivedTypeIdx(datatype);
1991  MPI_Status status;
1992  MPI_Aint lb;
1993  void *tmp_buf;
1994  userDefinedOpData* uopd = getUOpData();
1995  MPI_User_function* uop = uopd->functions[uop_idx];
1996  if (count == 0) return MPI_SUCCESS;
1997  MPI_Comm_size(comm,&comm_size);
1998  if (isDerivedType(dt_idx)) lb = getDTypeData()->lbs[dt_idx];
1999  else lb = 0;
2000  is_commutative = uopd->commutes[uop_idx];
2001  tmp_buf = (*ourADTOOL_AMPI_FPCollection.allocateTempActiveBuf_fp)(count,datatype,comm);
2002  tmp_buf = (void*)((char*)tmp_buf - lb);
2003  if (rank != root) {
2004  rbuf = (*ourADTOOL_AMPI_FPCollection.allocateTempActiveBuf_fp)(count,datatype,comm);
2005  rbuf = (void*)((char*)rbuf - lb);
2006  }
2007  if ((rank != root) || (sbuf != MPI_IN_PLACE)) {
2008  (*ourADTOOL_AMPI_FPCollection.copyActiveBuf_fp)(sbuf, rbuf, count, datatype, comm);
2009  }
2010  mask = 0x1;
2011  if (is_commutative)
2012  lroot = root;
2013  else
2014  lroot = 0;
2015  relrank = (rank - lroot + comm_size) % comm_size;
2016  while (mask < comm_size) {
2017  if ((mask & relrank) == 0) {
2018  source = (relrank | mask);
2019  if (source < comm_size) {
2020 
2021  source = (source + lroot) % comm_size;
2022  rc = FW_AMPI_Recv(tmp_buf, count, datatype, source,
2023  11, AMPI_FROM_SEND, comm, &status);
2024  assert(rc==MPI_SUCCESS);
2025  if (is_commutative) {
2026  (*uop)(tmp_buf, rbuf, &count, &datatype);
2027  }
2028  else {
2029  (*uop)(rbuf, tmp_buf, &count, &datatype);
2030  (*ourADTOOL_AMPI_FPCollection.copyActiveBuf_fp)(sbuf, rbuf, count, datatype, comm);
2031  }
2032  }
2033  }
2034  else {
2035  source = ((relrank & (~mask)) + lroot) % comm_size;
2036  rc = FW_AMPI_Send(rbuf, count, datatype, source,
2037  11, AMPI_TO_RECV, comm);
2038  assert(rc==MPI_SUCCESS);
2039  break;
2040  }
2041  mask<<=1;
2042  }
2043  if (!is_commutative && (root != 0)) {
2044  if (rank == 0) rc = FW_AMPI_Send(rbuf, count, datatype, root,
2045  11, AMPI_TO_RECV, comm);
2046  else if (rank==root) rc = FW_AMPI_Recv(rbuf, count, datatype, 0,
2047  11, AMPI_FROM_SEND, comm, &status);
2048  assert(rc==MPI_SUCCESS);
2049  }
2050  (*ourADTOOL_AMPI_FPCollection.releaseTempActiveBuf_fp)(tmp_buf,count,datatype);
2051  if (rank != root) {
2052  (*ourADTOOL_AMPI_FPCollection.releaseTempActiveBuf_fp)(rbuf,count,datatype);
2053  }
2054  return 0;
2055  }
2056  else {
2057  double* mappedsbuf=NULL;
2058  double* mappedrbuf=NULL;
2060  mappedsbuf=(*ourADTOOL_AMPI_FPCollection.rawData_fp)(sbuf,&count);
2061  mappedrbuf=(*ourADTOOL_AMPI_FPCollection.rawData_fp)(rbuf,&count);
2062  }
2063  else {
2064  mappedsbuf=sbuf;
2065  mappedrbuf=rbuf;
2066  }
2067  rc=MPI_Reduce(mappedsbuf,
2068  mappedrbuf,
2069  count,
2071  op,
2072  root,
2073  comm);
2074  if (rc==MPI_SUCCESS && (*ourADTOOL_AMPI_FPCollection.isActiveType_fp)(datatype)==AMPI_ACTIVE) {
2077  rbuf,
2078  rbuf,
2079  rank==root, /* also push contents of rbuf for root */
2080  count,
2081  datatype,
2082  op,
2083  root,
2084  comm);
2085  }
2086  return rc;
2087  }
2088 }
2089 
2091 int FWS_AMPI_Reduce(void* sbuf,
2092  void* rbuf,
2093  int count,
2094  MPI_Datatype datatype,
2095  MPI_Op op,
2096  int root,
2097  MPI_Comm comm) {
2098  return PEDESTRIAN_AMPI_Reduce(sbuf, NULL, NULL,
2099  rbuf, NULL, NULL,
2100  count,
2101  datatype, datatype, datatype,
2102  op, NULL, NULL,
2103  1,
2104  root,
2105  comm) ;
2106 }
2107 
2108 int BWB_AMPI_Reduce (void* sbuf,
2109  void* rbuf,
2110  int count,
2111  MPI_Datatype datatype,
2112  MPI_Op op,
2113  int root,
2114  MPI_Comm comm) {
2115  int rc,rank;
2116  void *idx=NULL;
2118  MPI_Datatype mappedtype = (*ourADTOOL_AMPI_FPCollection.BW_rawType_fp)(datatype);
2119  (*ourADTOOL_AMPI_FPCollection.getAdjointCount_fp)(&count,datatype);
2120  void *prevValBuf = (*ourADTOOL_AMPI_FPCollection.allocateTempBuf_fp)(count,datatype,comm);
2121  void *reduceResultBuf = (*ourADTOOL_AMPI_FPCollection.allocateTempBuf_fp)(count,datatype,comm);
2123  &rbuf,
2124  &prevValBuf,
2125  &reduceResultBuf,
2126  &count,
2127  &op,
2128  &root,
2129  &comm,
2130  &idx);
2131  MPI_Comm_rank(comm,&rank);
2132  rc=MPI_Bcast(reduceResultBuf,
2133  count,
2134  mappedtype,
2135  root,
2136  comm);
2137  if (rc!=MPI_SUCCESS) MPI_Abort(comm, MPI_ERR_ARG);
2138  void *tempBuf = (*ourADTOOL_AMPI_FPCollection.allocateTempBuf_fp)(count,mappedtype,comm);
2139  if (rank==root) {
2140  (*ourADTOOL_AMPI_FPCollection.nullifyAdjoint_fp)(count, mappedtype, comm, tempBuf);
2141  (*ourADTOOL_AMPI_FPCollection.incrementAdjoint_fp)(count, mappedtype, comm,
2142  tempBuf, rbuf, idx);
2143  (*ourADTOOL_AMPI_FPCollection.nullifyAdjoint_fp)(count, mappedtype, comm, rbuf);
2144  }
2145  rc=MPI_Bcast(tempBuf,
2146  count,
2147  mappedtype,
2148  root,
2149  comm);
2150  if (op==MPI_PROD) {
2151  (*ourADTOOL_AMPI_FPCollection.multiplyAdjoint_fp)(count, mappedtype, comm,
2152  tempBuf, reduceResultBuf, &idx);
2153  (*ourADTOOL_AMPI_FPCollection.divideAdjoint_fp)(count, mappedtype, comm,
2154  tempBuf, prevValBuf, &idx);
2155  }
2156  else if (op==MPI_MAX || op==MPI_MIN) {
2157  void *equalsResultBuf = (*ourADTOOL_AMPI_FPCollection.allocateTempBuf_fp)(count,mappedtype,comm);
2158  (*ourADTOOL_AMPI_FPCollection.nullifyAdjoint_fp)(count, mappedtype, comm,
2159  equalsResultBuf);
2160  (*ourADTOOL_AMPI_FPCollection.equalAdjoints_fp)(count, mappedtype, comm,
2161  equalsResultBuf, prevValBuf, reduceResultBuf, &idx);
2162  void *contributionTotalsBuf = (*ourADTOOL_AMPI_FPCollection.allocateTempBuf_fp)(count,mappedtype,comm);
2163  MPI_Allreduce(equalsResultBuf,
2164  contributionTotalsBuf,
2165  count,
2166  mappedtype,
2167  MPI_SUM,
2168  comm);
2169  (*ourADTOOL_AMPI_FPCollection.multiplyAdjoint_fp)(count, mappedtype, comm,
2170  tempBuf, equalsResultBuf, &idx);
2171  (*ourADTOOL_AMPI_FPCollection.divideAdjoint_fp)(count, mappedtype, comm,
2172  tempBuf, contributionTotalsBuf, &idx);
2174  (*ourADTOOL_AMPI_FPCollection.releaseAdjointTempBuf_fp)(contributionTotalsBuf);
2175  }
2176  else {}
2177  (*ourADTOOL_AMPI_FPCollection.incrementAdjoint_fp)(count, mappedtype, comm,
2178  sbuf, tempBuf, idx);
2182  return rc;
2183 }
2184 
2186 int BWS_AMPI_Reduce(void* sbuf, void* sbufb,
2187  void* rbuf, void* rbufb,
2188  int count,
2189  MPI_Datatype datatype, MPI_Datatype datatypeb,
2190  MPI_Op op, TLM_userFunctionF* uopb,
2191  int root,
2192  MPI_Comm comm) {
2193  return PEDESTRIAN_AMPI_Reduce(sbuf, NULL, sbufb,
2194  rbuf, NULL, rbufb,
2195  count,
2196  datatype, datatype, datatypeb,
2197  op, NULL, uopb,
2198  1,
2199  root,
2200  comm) ;
2201 }
2202 
2203 int TLB_AMPI_Reduce(void* sbuf,
2204  void* rbuf,
2205  int count,
2206  MPI_Datatype datatype,
2207  MPI_Op op,
2208  int root,
2209  MPI_Comm comm){
2210  int rc=MPI_Reduce(sbuf,
2211  rbuf,
2212  count,
2213  datatype,
2214  op,
2215  root,
2216  comm);
2217  return rc;
2218 }
2219 
2223 int TLS_AMPI_Reduce(void* sbuf, void* sbufd,
2224  void* rbuf, void* rbufd,
2225  int count,
2226  MPI_Datatype datatype, MPI_Datatype datatyped,
2227  MPI_Op op, TLM_userFunctionF* uopd,
2228  int root,
2229  MPI_Comm comm) {
2230  return PEDESTRIAN_AMPI_Reduce(sbuf, sbufd, NULL,
2231  rbuf, rbufd, NULL,
2232  count,
2233  datatype, datatyped, datatype,
2234  op, uopd, NULL,
2235  0,
2236  root,
2237  comm) ;
2238 }
2239 
2240 int FW_AMPI_Allreduce (void* sbuf,
2241  void* rbuf,
2242  int count,
2243  MPI_Datatype datatype,
2244  MPI_Op op,
2245  MPI_Comm comm) {
2246  int rc,rank;
2247  MPI_Comm_rank(comm,&rank);
2248  double* mappedsbuf=NULL;
2249  double* mappedrbuf=NULL;
2251  mappedsbuf=(*ourADTOOL_AMPI_FPCollection.rawData_fp)(sbuf,&count);
2252  mappedrbuf=(*ourADTOOL_AMPI_FPCollection.rawData_fp)(rbuf,&count);
2253  }
2254  else {
2255  mappedsbuf=sbuf;
2256  mappedrbuf=rbuf;
2257  }
2258  rc=MPI_Allreduce(mappedsbuf,
2259  mappedrbuf,
2260  count,
2262  op,
2263  comm);
2264  if (rc==MPI_SUCCESS && (*ourADTOOL_AMPI_FPCollection.isActiveType_fp)(datatype)==AMPI_ACTIVE) {
2267  rbuf,
2268  rbuf,
2269  1,
2270  count,
2271  datatype,
2272  op,
2273  0,
2274  comm);
2275  }
2276  return rc;
2277 }
2278 
2279 int BW_AMPI_Allreduce (void* sbuf,
2280  void* rbuf,
2281  int count,
2282  MPI_Datatype datatype,
2283  MPI_Op op,
2284  MPI_Comm comm) {
2285  int rc=0,rank, rootPlaceHolder;
2286  void *idx=NULL;
2288  MPI_Datatype mappedtype = (*ourADTOOL_AMPI_FPCollection.BW_rawType_fp)(datatype);
2289  (*ourADTOOL_AMPI_FPCollection.getAdjointCount_fp)(&count,datatype);
2290  void *prevValBuf = (*ourADTOOL_AMPI_FPCollection.allocateTempBuf_fp)(count,mappedtype,comm);
2291  void *reduceResultBuf = (*ourADTOOL_AMPI_FPCollection.allocateTempBuf_fp)(count,mappedtype,comm);
2293  &rbuf,
2294  &prevValBuf,
2295  &reduceResultBuf,
2296  &count,
2297  &op,
2298  &rootPlaceHolder,
2299  &comm,
2300  &idx);
2301  MPI_Comm_rank(comm,&rank);
2302  void *tempBuf = (*ourADTOOL_AMPI_FPCollection.allocateTempBuf_fp)(count,mappedtype,comm);
2303  MPI_Allreduce(rbuf,
2304  tempBuf,
2305  count,
2306  mappedtype,
2307  MPI_SUM,
2308  comm);
2309  if (op==MPI_SUM) {
2310  ; /* nothing extra to be done here */
2311  }
2312  else if (op==MPI_PROD) {
2313  (*ourADTOOL_AMPI_FPCollection.multiplyAdjoint_fp)(count, mappedtype, comm,
2314  tempBuf, reduceResultBuf, &idx);
2315  (*ourADTOOL_AMPI_FPCollection.divideAdjoint_fp)(count, mappedtype, comm,
2316  tempBuf, prevValBuf, &idx);
2317  }
2318  else if (op==MPI_MAX || op==MPI_MIN) {
2319  void *equalsResultBuf = (*ourADTOOL_AMPI_FPCollection.allocateTempBuf_fp)(count,mappedtype,comm);
2320  (*ourADTOOL_AMPI_FPCollection.nullifyAdjoint_fp)(count, mappedtype, comm,
2321  equalsResultBuf);
2322  (*ourADTOOL_AMPI_FPCollection.equalAdjoints_fp)(count, mappedtype, comm,
2323  equalsResultBuf, prevValBuf, reduceResultBuf, &idx);
2324  void *contributionTotalsBuf = (*ourADTOOL_AMPI_FPCollection.allocateTempBuf_fp) (count,mappedtype,comm);
2325  MPI_Allreduce(equalsResultBuf,
2326  contributionTotalsBuf,
2327  count,
2328  mappedtype,
2329  MPI_SUM,
2330  comm);
2331  (*ourADTOOL_AMPI_FPCollection.multiplyAdjoint_fp)(count, mappedtype, comm,
2332  tempBuf, equalsResultBuf, &idx);
2333  (*ourADTOOL_AMPI_FPCollection.divideAdjoint_fp)(count, mappedtype, comm,
2334  tempBuf, contributionTotalsBuf, &idx);
2336  (*ourADTOOL_AMPI_FPCollection.releaseAdjointTempBuf_fp)(contributionTotalsBuf);
2337  }
2338  else {
2339  assert(0); /* unimplemented */
2340  }
2341  (*ourADTOOL_AMPI_FPCollection.incrementAdjoint_fp)(count, mappedtype, comm,
2342  sbuf, tempBuf, idx);
2343  (*ourADTOOL_AMPI_FPCollection.nullifyAdjoint_fp)(count, mappedtype, comm, rbuf);
2347  return rc;
2348 }
2349 
2350 int TLM_AMPI_Allreduce(void* sbuf,
2351  void* rbuf,
2352  int count,
2353  MPI_Datatype datatype,
2354  MPI_Op op,
2355  MPI_Comm comm) {
2356  int rc=0;
2357  assert(0);
2358  return rc;
2359 }
2360 
2362  static derivedTypeData* dat = NULL;
2363  if (dat==NULL) {
2364  derivedTypeData* newdat = (derivedTypeData*)malloc(sizeof(derivedTypeData));
2365  newdat->size = 0;
2366  newdat->preAlloc = 0;
2367  newdat->num_actives = NULL;
2368  newdat->first_active_blocks = NULL;
2369  newdat->last_active_blocks = NULL;
2370  newdat->last_active_block_lengths = NULL;
2371  newdat->derived_types = NULL;
2372  newdat->counts = NULL;
2373  newdat->arrays_of_blocklengths = NULL;
2374  newdat->arrays_of_displacements = NULL;
2375  newdat->arrays_of_types = NULL;
2376  newdat->lbs = NULL;
2377  newdat->extents = NULL;
2378  newdat->packed_types = NULL;
2379  newdat->arrays_of_p_blocklengths = NULL;
2380  newdat->arrays_of_p_displacements = NULL;
2381  newdat->arrays_of_p_types = NULL;
2382  newdat->p_extents = NULL;
2383  dat = newdat;
2384  }
2385  return dat;
2386 }
2387 
2389  int i;
2390  derivedTypeData* dat = getDTypeData();
2391  for (i=0;i<dat->size;i++) {
2392  free(dat->arrays_of_blocklengths[i]);
2393  free(dat->arrays_of_displacements[i]);
2394  free(dat->arrays_of_types[i]);
2395  free(dat->arrays_of_p_blocklengths[i]);
2396  free(dat->arrays_of_p_displacements[i]);
2397  free(dat->arrays_of_p_types[i]);
2398  if (dat->packed_types[i]!=MPI_DATATYPE_NULL) MPI_Type_free(dat->packed_types+i);
2399  }
2400  free(dat->num_actives);
2401  free(dat->first_active_blocks);
2402  free(dat->last_active_blocks);
2403  free(dat->last_active_block_lengths);
2404  free(dat->derived_types);
2405  free(dat->counts);
2406  free(dat->arrays_of_blocklengths);
2407  free(dat->arrays_of_displacements);
2408  free(dat->arrays_of_types);
2409  free(dat->lbs);
2410  free(dat->extents);
2411  free(dat->packed_types);
2412  free(dat->arrays_of_p_blocklengths);
2413  free(dat->arrays_of_p_displacements);
2414  free(dat->arrays_of_p_types);
2415  free(dat->p_extents);
2416  free(dat);
2417 }
2418 
2420  userDefinedOpData* dat = getUOpData();
2421  free(dat->ops);
2422  free(dat->functions);
2423  free(dat->commutes);
2424  free(dat);
2425 }
2426 
2428  int count,
2429  int array_of_blocklengths[],
2430  MPI_Aint array_of_displacements[],
2431  MPI_Datatype array_of_types[],
2432  MPI_Aint lb,
2433  MPI_Aint extent,
2434  int array_of_p_blocklengths[],
2435  MPI_Aint array_of_p_displacements[],
2436  MPI_Datatype array_of_p_types[],
2437  MPI_Aint p_extent,
2438  MPI_Datatype* newtype,
2439  MPI_Datatype* packed_type) {
2440  assert(dat);
2441  int i, dt_idx;
2442  int num_actives=0, fst_ablk_set=0;
2443  MPI_Aint fst_active_blk=0, lst_active_blk=0, lst_active_blk_len=0;
2444  for (i=0;i<count;i++) {
2445  if ((*ourADTOOL_AMPI_FPCollection.isActiveType_fp)(array_of_types[i])==AMPI_ACTIVE) {
2446  num_actives += array_of_blocklengths[i];
2447  if (!fst_ablk_set) {
2448  fst_active_blk = array_of_displacements[i];
2449  fst_ablk_set = 1;
2450  }
2451  lst_active_blk = array_of_displacements[i];
2452  lst_active_blk_len = array_of_blocklengths[i];
2453  continue;
2454  }
2455  dt_idx = derivedTypeIdx(array_of_types[i]);
2456  if (isDerivedType(dt_idx)) {
2457  num_actives += dat->num_actives[dt_idx]*array_of_blocklengths[i];
2458  if (!fst_ablk_set) {
2459  fst_active_blk = array_of_displacements[i] + dat->first_active_blocks[dt_idx];
2460  fst_ablk_set = 1;
2461  }
2462  lst_active_blk = array_of_displacements[i] + (array_of_blocklengths[i]-1)*dat->extents[dt_idx] + dat->last_active_blocks[dt_idx];
2463  lst_active_blk_len = dat->last_active_block_lengths[dt_idx];
2464  }
2465  }
2466  if (dat->preAlloc == dat->size) {
2467  dat->preAlloc += 16;
2468  dat->num_actives = realloc(dat->num_actives, (dat->preAlloc)*sizeof(int));
2469  dat->first_active_blocks = realloc(dat->first_active_blocks, (dat->preAlloc)*sizeof(MPI_Aint));
2470  dat->last_active_blocks = realloc(dat->last_active_blocks, (dat->preAlloc)*sizeof(MPI_Aint));
2471  dat->last_active_block_lengths = realloc(dat->last_active_block_lengths, (dat->preAlloc)*sizeof(int));
2472  dat->derived_types = realloc(dat->derived_types,
2473  (dat->preAlloc)*sizeof(MPI_Datatype));
2474  dat->counts = realloc(dat->counts, (dat->preAlloc)*sizeof(int));
2475  dat->arrays_of_blocklengths = realloc(dat->arrays_of_blocklengths,
2476  (dat->preAlloc)*sizeof(int*));
2478  (dat->preAlloc)*sizeof(MPI_Aint*));
2479  dat->arrays_of_types = realloc(dat->arrays_of_types,
2480  (dat->preAlloc)*sizeof(MPI_Datatype*));
2481  dat->lbs = realloc(dat->lbs, (dat->preAlloc)*sizeof(MPI_Aint));
2482  dat->extents = realloc(dat->extents, (dat->preAlloc)*sizeof(MPI_Aint));
2483  dat->packed_types = realloc(dat->packed_types,
2484  (dat->preAlloc)*sizeof(MPI_Datatype));
2486  (dat->preAlloc)*sizeof(int*));
2488  (dat->preAlloc)*sizeof(MPI_Aint*));
2489  dat->arrays_of_p_types = realloc(dat->arrays_of_p_types,
2490  (dat->preAlloc)*sizeof(MPI_Datatype*));
2491  dat->p_extents = realloc(dat->p_extents, (dat->preAlloc)*sizeof(MPI_Aint));
2492  }
2493  dat->num_actives[dat->size] = num_actives;
2494  dat->first_active_blocks[dat->size] = fst_active_blk;
2495  dat->last_active_blocks[dat->size] = lst_active_blk;
2496  dat->last_active_block_lengths[dat->size] = lst_active_blk_len;
2497  dat->derived_types[dat->size] = *newtype;
2498  dat->counts[dat->size] = count;
2499  dat->arrays_of_blocklengths[dat->size] = malloc(count*sizeof(int));
2500  memcpy(dat->arrays_of_blocklengths[dat->size], array_of_blocklengths, count*sizeof(int));
2501  dat->arrays_of_displacements[dat->size] = malloc(count*sizeof(MPI_Aint));
2502  memcpy(dat->arrays_of_displacements[dat->size], array_of_displacements, count*sizeof(MPI_Aint));
2503  dat->arrays_of_types[dat->size] = malloc(count*sizeof(MPI_Datatype));
2504  memcpy(dat->arrays_of_types[dat->size], array_of_types, count*sizeof(MPI_Datatype));
2505  dat->lbs[dat->size] = lb;
2506  dat->extents[dat->size] = extent;
2507  dat->packed_types[dat->size] = *packed_type;
2508  dat->arrays_of_p_blocklengths[dat->size] = malloc(count*sizeof(int));
2509  memcpy(dat->arrays_of_p_blocklengths[dat->size], array_of_p_blocklengths, count*sizeof(int));
2510  dat->arrays_of_p_displacements[dat->size] = malloc(count*sizeof(MPI_Aint));
2511  memcpy(dat->arrays_of_p_displacements[dat->size], array_of_p_displacements, count*sizeof(MPI_Aint));
2512  dat->arrays_of_p_types[dat->size] = malloc(count*sizeof(MPI_Datatype));
2513  memcpy(dat->arrays_of_p_types[dat->size], array_of_p_types, count*sizeof(MPI_Datatype));
2514  dat->p_extents[dat->size] = p_extent;
2515  dat->size += 1;
2516 }
2517 
2518 int derivedTypeIdx(MPI_Datatype datatype) {
2519  int i;
2520  derivedTypeData* dtdata = getDTypeData();
2521  for (i=0;i<dtdata->size;i++) {
2522  if (dtdata->derived_types[i]==datatype) return i;
2523  }
2524  return -1;
2525 }
2526 
2527 int isDerivedType(int dt_idx) {
2528  return dt_idx!=-1;
2529 }
2530 
2532  static userDefinedOpData* dat = NULL;
2533  if (dat==NULL) {
2534  userDefinedOpData* newdat = (userDefinedOpData*)malloc(sizeof(userDefinedOpData));
2535  newdat->size = 0;
2536  newdat->preAlloc = 0;
2537  newdat->ops = NULL;
2538  newdat->functions = NULL;
2539  newdat->commutes = NULL;
2540  dat = newdat;
2541  }
2542  return dat;
2543 }
2544 
2546  MPI_Op* op,
2547  MPI_User_function* function,
2548  int commute) {
2549  assert(dat);
2550  if (dat->preAlloc == dat->size) {
2551  dat->preAlloc += 16;
2552  dat->ops = realloc(dat->ops,(dat->preAlloc)*sizeof(MPI_Op));
2553  dat->functions = realloc(dat->functions,(dat->preAlloc)*sizeof(MPI_User_function*));
2554  dat->commutes = realloc(dat->commutes,(dat->preAlloc)*sizeof(int));
2555  }
2556  dat->ops[dat->size] = *op;
2557  dat->functions[dat->size] = function;
2558  dat->commutes[dat->size] = commute;
2559  dat->size += 1;
2560 }
2561 
2562 int userDefinedOpIdx(MPI_Op op) {
2563  int i;
2564  userDefinedOpData* uopdata = getUOpData();
2565  for (i=0;i<uopdata->size;i++) {
2566  if (uopdata->ops[i]==op) return i;
2567  }
2568  return -1;
2569 }
2570 
2571 int isUserDefinedOp(int uop_idx) {
2572  return uop_idx!=-1;
2573 }
2574 
2575 /* One-sided AMPI */
2576 int FW_AMPI_Win_create( void *base,
2577  MPI_Aint size,
2578  int disp_unit,
2579  MPI_Info info,
2580  MPI_Comm comm,
2581  AMPI_Win *win
2582  )
2583 {
2584  int rc=0;
2585  win->req_stack=(AMPI_Win_stack *) malloc(sizeof(AMPI_Win_stack));
2587  win->map=malloc(sizeof(void*));
2588  *win->map=(ourADTOOL_AMPI_FPCollection.createWinMap_fp)(base,size);
2589  win->plainWindow=(MPI_Win**) malloc(sizeof(MPI_Win*));
2590  *win->plainWindow= (MPI_Win*) malloc(sizeof(MPI_Win));
2591  win->base=base;
2592  win->num_reqs=0;
2593  win->idx=NULL;
2595  win->disp=disp_unit;
2596  win->comm=comm;
2598  rc=MPI_Win_create(*win->map, win->size, win->disp, info, win->comm, *win->plainWindow);
2600  /*BK_AMPI_put_AMPI_Win(win);*/
2601  return rc;
2602 }
2603 
2604 int BW_AMPI_Win_create( void *base,
2605  MPI_Aint size,
2606  int disp_unit,
2607  MPI_Info info,
2608  MPI_Comm comm,
2609  AMPI_Win *win
2610  ) {
2611  int rc=0;
2614  free(win->req_stack);
2615  free(*win->map);
2616  free(win->map);
2617  rc=MPI_Win_free(*win->plainWindow);
2618  free(*win->plainWindow);
2619  free(win->plainWindow);
2620  return rc;
2621 }
2622 
2624  free(*win->map);
2627  return MPI_Win_free(*win->plainWindow);
2628 }
2629 
2631  int rc=0;
2632  AMPI_Win ampiWin;
2634  *ampiWin.map=malloc(sizeof(ampiWin.size));
2636  rc=MPI_Win_create(*ampiWin.map, ampiWin.size, ampiWin.disp, MPI_INFO_NULL, ampiWin.comm, *ampiWin.plainWindow);
2637  double *map_=(double*) *ampiWin.map;
2638  if(ampiWin.size!=0)
2639  map_[0]=0;
2640  return rc;
2641 }
2642 
2643 int FW_AMPI_Get( void *origin_addr,
2644  int origin_count,
2645  MPI_Datatype origin_datatype,
2646  int target_rank,
2647  MPI_Aint target_disp,
2648  int target_count,
2649  MPI_Datatype target_datatype,
2650  AMPI_Win win
2651  )
2652 {
2653  int rc=MPI_SUCCESS;
2654  double* mappedbuf=NULL;
2655  if((*ourADTOOL_AMPI_FPCollection.isActiveType_fp)(origin_datatype)==AMPI_ACTIVE) {
2656  mappedbuf=(*ourADTOOL_AMPI_FPCollection.rawData_fp)(origin_addr,&origin_count);
2657  }
2658  else {
2659  mappedbuf=origin_addr;
2660  }
2661  rc=MPI_Get( mappedbuf,
2662  /*rc=MPI_Get( origin_addr,*/
2663  origin_count,
2664  origin_datatype,
2665  target_rank,
2666  target_disp,
2667  target_count,
2668  target_datatype,
2669  **win.plainWindow
2670  );
2671  AMPI_WinRequest winRequest;
2672  /* fill in the other info */
2673  winRequest.origin_addr=origin_addr;
2674  winRequest.origin_count=origin_count;
2675  winRequest.origin_datatype=origin_datatype;
2676  winRequest.target_rank=target_rank;
2677  winRequest.target_disp=target_disp;
2678  winRequest.target_count=target_count;
2679  winRequest.target_datatype=target_datatype;
2680  (*ourADTOOL_AMPI_FPCollection.mapWinBufForAdjoint_fp)(&winRequest,origin_addr);
2681  if ((*ourADTOOL_AMPI_FPCollection.isActiveType_fp)(origin_datatype)==AMPI_ACTIVE) {
2683  AMPI_WIN_STACK_push(win.req_stack,winRequest);
2684  }
2685  return rc;
2686 }
2687 
2688 int BW_AMPI_Get( void *origin_addr,
2689  int origin_count,
2690  MPI_Datatype origin_datatype,
2691  int target_rank,
2692  MPI_Aint target_disp,
2693  int target_count,
2694  MPI_Datatype target_datatype,
2695  AMPI_Win win
2696  ) {
2697  return MPI_SUCCESS;
2698 }
2699 
2700 int FW_AMPI_Put( void *origin_addr,
2701  int origin_count,
2702  MPI_Datatype origin_datatype,
2703  int target_rank,
2704  MPI_Aint target_disp,
2705  int target_count,
2706  MPI_Datatype target_datatype,
2707  AMPI_Win win
2708  ) {
2709  return MPI_SUCCESS;
2710 }
2711 
2712 int BW_AMPI_Put( void *origin_addr,
2713  int origin_count,
2714  MPI_Datatype origin_datatype,
2715  int target_rank,
2716  MPI_Aint target_disp,
2717  int target_count,
2718  MPI_Datatype target_datatype,
2719  AMPI_Win win
2720  ) {
2721  return MPI_SUCCESS;
2722 }
2723 
2724 int FW_AMPI_Win_fence( int assert,
2725  AMPI_Win win
2726  )
2727 {
2728  AMPI_WinRequest winRequest;
2729  int rc=MPI_SUCCESS;
2730  int i=0;
2731  int num_reqs=0;
2732  printf("FW win ptr: %p\n", *win.plainWindow);
2733  MPI_Win tmp=**win.plainWindow;
2734  /*Sync window*/
2735  rc=MPI_Win_fence( assert, tmp);
2737 
2738  num_reqs=win.req_stack->num_reqs;
2740  for(i=num_reqs; i>0 ; i=i-1) {
2741  winRequest=AMPI_WIN_STACK_pop(win.req_stack);
2744  }
2745  win.num_reqs=num_reqs;
2746  printf("FW num_reqs: %d\n", win.num_reqs);
2747  win.req_stack->num_reqs=0;
2749  rc=MPI_Win_fence( assert, **win.plainWindow );
2750  return rc;
2751 }
2752 
2753 int BW_AMPI_Win_fence( int assert,
2754  AMPI_Win win
2755  )
2756 {
2757  AMPI_WinRequest winRequest;
2758  int rc=MPI_SUCCESS;
2759  int i=0;
2760  int num_reqs=0;
2761  assert=0;
2762 
2763  /* We pop the window from the tape. Here we save the MPI_Win for the adjoints */
2764 
2766  printf("BW win ptr: %p\n", *win.plainWindow);
2767 
2768  /* First part is copying the adjoints. With booking we look up how many 1sided
2769  * adjoint comms took place*/
2770 
2771  rc=MPI_Win_fence( assert, **win.plainWindow );
2772  /*AMPI_Win bk_win;*/
2773  /*BK_AMPI_read_AMPI_Win(*win.plainWindow,&bk_win);*/
2774  /*printf("BW bk_num_reqs: %ld\n", bk_win.req_stack->num_reqs);*/
2775  /*printf("BW bk_win.size: %ld\n", bk_win.size);*/
2776 
2777 
2778  /*(ourADTOOL_AMPI_FPCollection.writeWinData_fp)(win.map,win.base,win.size);*/
2779  /*double *tmp=(double *) win.map;*/
2780  double *tmp=(double *) *win.map;
2781 
2782  /* if window size is nonzero we sync the incoming adjoints in the window map
2783  * and set the map to zero again */
2784 
2785  if(win.size!=0) {
2786  printf("BW Fence map: %f\n", tmp[0]);
2788  }
2789  /*num_reqs=bk_win.req_stack->num_reqs;*/
2790 
2791  /* placeholder for adjoints that are receveived through get. have to copy them
2792  * back here */
2793 
2794  /*for(i=num_reqs; i>0 ; i=i-1) {*/
2795  /*bk_winRequest=AMPI_WIN_STACK_pop(bk_win.req_stack);*/
2796  /*}*/
2797 
2798  /* We dispatch the next adjoint communications. These are popped from the
2799  * tape.*/
2800 
2801  rc=MPI_Win_fence( assert, **win.plainWindow );
2802  num_reqs=win.num_reqs;
2803  printf("BW num_reqs: %d\n", num_reqs);
2804  for(i=num_reqs; i>0 ; i=i-1) {
2807  double *tmp=(double *) ((*ourADTOOL_AMPI_FPCollection.rawAdjointData_fp)(winRequest.adjointTempBuf));
2808  printf("BW Put adj: %f\n", tmp[0]);
2810  winRequest.origin_count,
2811  winRequest.origin_datatype,
2812  winRequest.target_rank,
2813  winRequest.target_disp,
2814  winRequest.target_count,
2815  winRequest.target_datatype,
2816  **win.plainWindow
2817  );
2818 
2819  /*And we save the adjoint comms in our window that is in the bk system*/
2820 
2821  /*AMPI_WIN_STACK_push(bk_win.req_stack,*winRequest);*/
2822  }
2823  return rc;
2824 }