21 #ifdef AMPI_FORTRANCOMPATIBLE
45 )) rc=MPI_Abort(comm, MPI_ERR_ARG);
48 double* mappedbuf=NULL;
60 rc=MPI_Recv(mappedbuf,
73 if(tag==MPI_ANY_TAG) tag=myStatus.MPI_TAG;
74 if(src==MPI_ANY_SOURCE) src=myStatus.MPI_SOURCE;
84 if (status!=MPI_STATUS_IGNORE) *status=myStatus;
117 )) rc=MPI_Abort(comm, MPI_ERR_ARG);
134 rc=MPI_Abort(comm, MPI_ERR_TYPE);
148 MPI_Status* status) {
150 rc = MPI_Recv(buf,count,datatype,src,tag,comm,status);
159 MPI_Datatype
datatype, MPI_Datatype shadowdatatype,
164 MPI_Status* status) {
166 int rc = MPI_Recv(buf, count, datatype, src, tag, comm, &status1) ;
167 assert(rc==MPI_SUCCESS);
169 rc = MPI_Recv(shadowbuf, count, shadowdatatype,
170 (src==MPI_ANY_SOURCE?status1.MPI_SOURCE:src),
171 (tag==MPI_ANY_TAG?status1.MPI_TAG:tag),
172 shadowcomm, status) ;
173 assert(rc==MPI_SUCCESS);
193 )) rc=MPI_Abort(comm, MPI_ERR_ARG);
195 double* mappedbuf=NULL;
202 rc= MPI_Irecv(mappedbuf,
208 #ifdef AMPI_FORTRANCOMPATIBLE
215 #ifdef AMPI_FORTRANCOMPATIBLE
217 ampiRequest=&iRequestInst;
224 ampiRequest->tag=
tag;
225 ampiRequest->count=
count;
227 ampiRequest->comm=
comm;
231 ampiRequest->tracedRequest=ampiRequest->plainRequest;
232 #ifdef AMPI_FORTRANCOMPATIBLE
237 #ifdef AMPI_REQUESTONTRACE
256 #ifdef AMPI_REQUESTONTRACE
259 #ifdef AMPI_FORTRANCOMPATIBLE
261 ampiRequest=&iRequestInst;
262 plainRequest=request;
267 #if defined AMPI_FORTRANCOMPATIBLE || defined AMPI_REQUESTONTRACE
268 #ifdef AMPI_REQUESTONTRACE
282 )) rc=MPI_Abort(comm, MPI_ERR_ARG);
284 switch(ampiRequest->pairedWith) {
287 rc=MPI_Wait(plainRequest,
290 ampiRequest->datatype,
292 ampiRequest->adjointBuf);
296 rc=MPI_Abort(ampiRequest->comm, MPI_ERR_TYPE);
321 MPI_Datatype
datatype, MPI_Datatype shadowdatatype,
330 #ifdef AMPI_FORTRANCOMPATIBLE
332 ampiRequest=&iRequestInst;
340 ampiRequest->tag=
tag;
341 ampiRequest->count=
count;
342 ampiRequest->datatype=shadowdatatype;
343 ampiRequest->comm=
comm;
346 ampiRequest->adjointBuf=shadowbuf ;
347 ampiRequest->tracedRequest=ampiRequest->plainRequest;
354 &(ampiRequest->plainRequest));
355 #ifdef AMPI_FORTRANCOMPATIBLE
376 )) rc=MPI_Abort(comm, MPI_ERR_ARG);
378 double* mappedbuf=NULL;
384 else if(is_derived) {
391 rc=MPI_Send(mappedbuf,
436 )) rc=MPI_Abort(comm, MPI_ERR_ARG);
460 rc=MPI_Abort(comm, MPI_ERR_TYPE);
475 MPI_Send(buf,count,datatype,dest,tag,comm);
484 MPI_Datatype
datatype, MPI_Datatype shadowdatatype,
489 int rc = MPI_Send(buf, count, datatype, dest, tag, comm) ;
490 assert(rc==MPI_SUCCESS);
492 rc = MPI_Send(shadowbuf, count, shadowdatatype, dest, tag, shadowcomm) ;
493 assert(rc==MPI_SUCCESS);
512 )) rc=MPI_Abort(comm, MPI_ERR_ARG);
514 double* mappedbuf=NULL;
521 rc= MPI_Isend(mappedbuf,
527 #ifdef AMPI_FORTRANCOMPATIBLE
534 #ifdef AMPI_FORTRANCOMPATIBLE
536 ampiRequest=&iRequestInst;
543 ampiRequest->tag=
tag;
544 ampiRequest->count=
count;
546 ampiRequest->comm=
comm;
550 ampiRequest->tracedRequest=ampiRequest->plainRequest;
551 #ifdef AMPI_FORTRANCOMPATIBLE
556 #ifdef AMPI_REQUESTONTRACE
575 #ifdef AMPI_REQUESTONTRACE
578 #ifdef AMPI_FORTRANCOMPATIBLE
580 ampiRequest=&iRequestInst;
581 plainRequest=request;
584 plainRequest=&(ampiRequest->plainRequest);
586 #if defined AMPI_FORTRANCOMPATIBLE || defined AMPI_REQUESTONTRACE
587 #ifdef AMPI_REQUESTONTRACE
601 )) rc=MPI_Abort(comm, MPI_ERR_ARG);
603 switch(ampiRequest->pairedWith) {
606 rc=MPI_Wait(plainRequest,
609 ampiRequest->datatype,
611 ampiRequest->adjointBuf,
612 ampiRequest->adjointTempBuf,
618 rc=MPI_Abort(ampiRequest->comm, MPI_ERR_TYPE);
643 MPI_Datatype
datatype, MPI_Datatype shadowdatatype,
650 MPI_Comm shadowcomm ;
652 #ifdef AMPI_FORTRANCOMPATIBLE
654 ampiRequest=&iRequestInst;
661 ampiRequest->tag=
tag;
662 ampiRequest->count=
count;
664 ampiRequest->comm=
comm;
668 ampiRequest->tracedRequest=ampiRequest->plainRequest;
669 rc = MPI_Isend(buf, count, datatype, dest, tag, comm,
670 &(ampiRequest->plainRequest)) ;
671 assert(rc==MPI_SUCCESS);
673 rc = MPI_Isend(shadowbuf, count, shadowdatatype, dest, tag, shadowcomm,
674 &(ampiRequest->shadowRequest)) ;
675 #ifdef AMPI_FORTRANCOMPATIBLE
683 MPI_Status *status) {
687 #ifdef AMPI_FORTRANCOMPATIBLE
689 ampiRequest=&iRequestInst;
690 plainRequest=request;
697 rc=MPI_Wait(plainRequest,
701 if(ampiRequest->tag==MPI_ANY_TAG) ampiRequest->tag=status->MPI_TAG;
702 if(ampiRequest->endPoint==MPI_ANY_SOURCE) ampiRequest->endPoint=status->MPI_SOURCE;
710 MPI_Status *status) {
713 #ifdef AMPI_FORTRANCOMPATIBLE
715 ampiRequest=&iRequestInst;
721 switch(ampiRequest->origin) {
724 rc=MPI_Irecv(ampiRequest->adjointTempBuf,
725 ampiRequest->adjointCount,
726 ampiRequest->datatype,
727 ampiRequest->endPoint,
730 &(ampiRequest->plainRequest));
736 ampiRequest->adjointCount,
737 ampiRequest->datatype,
738 ampiRequest->endPoint,
741 &(ampiRequest->plainRequest));
745 rc=MPI_Abort(ampiRequest->comm, MPI_ERR_TYPE);
748 #ifdef AMPI_FORTRANCOMPATIBLE
751 #if defined AMPI_FORTRANCOMPATIBLE || defined AMPI_REQUESTONTRACE
758 MPI_Status *status) {
768 MPI_Status *status) {
772 #ifdef AMPI_FORTRANCOMPATIBLE
774 ampiRequest=&iRequestInst;
780 rc=MPI_Wait(&(ampiRequest->plainRequest), &status1);
781 assert(rc==MPI_SUCCESS);
782 switch(ampiRequest->origin) {
784 rc=MPI_Wait(&(ampiRequest->shadowRequest), status);
789 rc = MPI_Recv(ampiRequest->adjointBuf, ampiRequest->count, ampiRequest->datatype,
790 (ampiRequest->endPoint==MPI_ANY_SOURCE?status1.MPI_SOURCE:ampiRequest->endPoint),
791 (ampiRequest->tag==MPI_ANY_TAG?status1.MPI_TAG:ampiRequest->tag),
792 shadowcomm, status) ;
796 rc=MPI_Abort(ampiRequest->comm, MPI_ERR_ARG);
804 rc=MPI_Barrier(comm);
813 rc=MPI_Barrier(comm);
825 rc=MPI_Barrier(comm);
826 assert(rc==MPI_SUCCESS);
828 rc=MPI_Barrier(shadowcomm);
834 MPI_Datatype sendtype,
837 MPI_Datatype recvtype,
840 void *rawSendBuf=sendbuf, *rawRecvBuf=recvbuf;
842 int isInPlace=(sendbuf==MPI_IN_PLACE);
843 int myRank, myCommSize;
844 MPI_Comm_rank(comm, &myRank);
845 MPI_Comm_size(comm, &myCommSize);
847 rc=MPI_Abort(comm, MPI_ERR_ARG);
854 rc=MPI_Gather(rawSendBuf,
881 MPI_Datatype sendtype,
884 MPI_Datatype recvtype,
889 int commSizeForRootOrNull, rTypeSize,i;
904 if (commSizeForRootOrNull)
905 tempBuf=MPI_IN_PLACE;
909 rc=MPI_Scatter(recvbuf,
922 if (commSizeForRootOrNull) {
923 MPI_Type_size(recvtype,&rTypeSize);
924 for (i=0;i<commSizeForRootOrNull;++i) {
925 if (! (i==root && sendcnt==0)) {
926 void *recvbufSegment=(
char*)recvbuf+(i*recvcnt*rTypeSize);
938 MPI_Datatype sendtype,
941 MPI_Datatype recvtype,
945 rc = MPI_Gather(sendbuf,sendcnt,sendtype,recvbuf,recvcnt,recvtype,root,comm);
951 MPI_Datatype sendtype,
954 MPI_Datatype recvtype,
958 int myRank, myCommSize;
959 int isInPlace=(recvbuf==MPI_IN_PLACE);
960 void *rawSendBuf=sendbuf, *rawRecvBuf=recvbuf;
961 MPI_Comm_rank(comm, &myRank);
962 MPI_Comm_size(comm, &myCommSize);
964 rc=MPI_Abort(comm, MPI_ERR_ARG);
971 rc=MPI_Scatter(rawSendBuf,
998 MPI_Datatype sendtype,
1001 MPI_Datatype recvtype,
1006 int commSizeForRootOrNull,i,rTypeSize;
1017 void *tempBuf = NULL;
1019 rc=MPI_Gather(recvbuf,
1028 if (commSizeForRootOrNull>0) MPI_Type_size(recvtype,&rTypeSize);
1029 for (i=0;i<commSizeForRootOrNull;++i) {
1030 if (! (i==root && recvcnt==0)) {
1031 void *tempBufSeqment=(
char*)tempBuf+i*sendcnt*rTypeSize;
1032 void *sendBufSegment=(
char*)sendbuf+i*sendcnt*rTypeSize;
1037 tempBufSeqment, idx);
1046 MPI_Datatype sendtype,
1049 MPI_Datatype recvtype,
1053 rc = MPI_Scatter(sendbuf,sendcnt,sendtype,recvbuf,recvcnt,recvtype,root,comm);
1059 MPI_Datatype sendtype,
1062 MPI_Datatype recvtype,
1064 void *rawSendBuf=NULL, *rawRecvBuf=NULL;
1066 int myRank, myCommSize;
1067 MPI_Comm_rank(comm, &myRank);
1068 MPI_Comm_size(comm, &myCommSize);
1070 rc=MPI_Abort(comm, MPI_ERR_ARG);
1074 else rawSendBuf=sendbuf;
1076 else rawRecvBuf=recvbuf;
1077 rc=MPI_Allgather(rawSendBuf,
1103 MPI_Datatype sendtype,
1106 MPI_Datatype recvtype,
1109 int rc=MPI_SUCCESS, rootPlaceholder;
1110 int commSizeForRootOrNull, rTypeSize, *recvcounts,i;
1121 recvcounts=(
int*)malloc(
sizeof(
int)*commSizeForRootOrNull);
1122 for (i=0;i<commSizeForRootOrNull;++i) recvcounts[i]=sendcount;
1127 rc=MPI_Reduce_scatter(recvbuf,
1138 if (commSizeForRootOrNull) {
1139 MPI_Type_size(recvtype,&rTypeSize);
1141 recvtype,comm,recvbuf);
1144 if (recvcounts) free((
void*)recvcounts);
1150 MPI_Datatype sendtype,
1153 MPI_Datatype recvtype,
1156 rc = MPI_Allgather(sendbuf,sendcount,sendtype,recvbuf,recvcount,recvtype,comm);
1162 MPI_Datatype sendtype,
1166 MPI_Datatype recvtype,
1169 void *rawSendBuf=sendbuf, *rawRecvBuf=recvbuf;
1171 int isInPlace=(sendbuf==MPI_IN_PLACE);
1172 int myRank, myCommSize;
1173 MPI_Comm_rank(comm, &myRank);
1174 MPI_Comm_size(comm, &myCommSize);
1176 rc=MPI_Abort(comm, MPI_ERR_ARG);
1183 rc=MPI_Gatherv(rawSendBuf,
1212 MPI_Datatype sendtype,
1216 MPI_Datatype recvtype,
1222 int myRank, commSizeForRootOrNull, rTypeSize;
1223 int *tRecvCnts=recvcnts, *tDispls=displs;
1224 char tRecvCntsFlag=0, tDisplsFlag=0;
1226 if (tRecvCnts==NULL) {
1227 tRecvCnts=(
int*)malloc(
sizeof(
int)*commSizeForRootOrNull);
1230 if (tDispls==NULL) {
1231 tDispls=(
int*)malloc(
sizeof(
int)*commSizeForRootOrNull);
1244 MPI_Comm_rank(comm, &myRank);
1249 if (commSizeForRootOrNull)
1250 tempBuf=MPI_IN_PLACE;
1254 rc=MPI_Scatterv(recvbuf,
1268 if (commSizeForRootOrNull) {
1269 MPI_Type_size(recvtype,&rTypeSize);
1270 for (i=0;i<commSizeForRootOrNull;++i) {
1271 if (! (i==root && sendcnt==0)) {
1272 void* recvbufSegment=(
char*)recvbuf+(rTypeSize*tDispls[i]);
1279 if (tRecvCntsFlag) free((
void*)(tRecvCnts));
1280 if (tDisplsFlag) free((
void*)(tDispls));
1286 MPI_Datatype sendtype,
1290 MPI_Datatype recvtype,
1294 rc = MPI_Gatherv(sendbuf,sendcnt,sendtype,recvbuf,recvcnts,displs,recvtype,root,comm);
1301 MPI_Datatype sendtype,
1304 MPI_Datatype recvtype,
1308 int myRank, myCommSize;
1309 int isInPlace=(recvbuf==MPI_IN_PLACE);
1310 void *rawSendBuf=sendbuf, *rawRecvBuf=recvbuf;
1311 MPI_Comm_rank(comm, &myRank);
1312 MPI_Comm_size(comm, &myCommSize);
1314 rc=MPI_Abort(comm, MPI_ERR_ARG);
1321 rc=MPI_Scatterv(rawSendBuf,
1351 MPI_Datatype sendtype,
1354 MPI_Datatype recvtype,
1359 int sendSize=0,i, typeSize;
1360 int myRank, commSizeForRootOrNull, *tempDispls;
1361 int *tSendCnts=sendcnts, *tDispls=displs;
1362 char tSendCntsFlag=0, tDisplsFlag=0;
1364 if (tSendCnts==NULL && commSizeForRootOrNull>0) {
1365 tSendCnts=(
int*)malloc(
sizeof(
int)*commSizeForRootOrNull);
1368 if (tDispls==NULL && commSizeForRootOrNull>0) {
1369 tDispls=(
int*)malloc(
sizeof(
int)*commSizeForRootOrNull);
1382 MPI_Comm_rank(comm, &myRank);
1383 tempDispls=(
int*)malloc(
sizeof(
int)*commSizeForRootOrNull);
1384 for (i=0;i<commSizeForRootOrNull;++i) {
1385 tempDispls[i]=sendSize;
1386 sendSize+=tSendCnts[i];
1388 void *tempBuf = NULL;
1390 rc=MPI_Gatherv(recvbuf,
1400 if (commSizeForRootOrNull>0) {
1401 MPI_Type_size(sendtype,&typeSize);
1402 for (i=0;i<commSizeForRootOrNull;++i) {
1403 if (! (i==root && recvcnt==0)) {
1404 void*
buf=(
char*)sendbuf+(typeSize*tDispls[i]);
1405 void* sourceBuf=(
char*)tempBuf+(typeSize*tempDispls[i]);
1415 if (tempDispls) free((
void*)tempDispls);
1416 if (tSendCntsFlag) free((
void*)(tSendCnts));
1417 if (tDisplsFlag) free((
void*)(tDispls));
1424 MPI_Datatype sendtype,
1427 MPI_Datatype recvtype,
1428 int root, MPI_Comm
comm){
1430 rc = MPI_Scatterv(sendbuf,sendcnts,displs,sendtype,recvbuf,recvcnt,recvtype,root,comm);
1436 MPI_Datatype sendtype,
1440 MPI_Datatype recvtype,
1442 void *rawSendBuf=NULL, *rawRecvBuf=NULL;
1444 int myRank, myCommSize;
1445 MPI_Comm_rank(comm, &myRank);
1446 MPI_Comm_size(comm, &myCommSize);
1448 rc=MPI_Abort(comm, MPI_ERR_ARG);
1452 else rawSendBuf=sendbuf;
1454 else rawRecvBuf=recvbuf;
1455 rc=MPI_Allgatherv(rawSendBuf,
1483 MPI_Datatype sendtype,
1487 MPI_Datatype recvtype,
1492 int myRank, commSizeForRootOrNull, rTypeSize,rootPlaceholder;
1493 int *tRecvCnts=recvcnts, *tDispls=displs;
1494 char tRecvCntsFlag=0, tDisplsFlag=0;
1496 if (tRecvCnts==NULL) {
1497 tRecvCnts=(
int*)malloc(
sizeof(
int)*commSizeForRootOrNull);
1500 if (tDispls==NULL) {
1501 tDispls=(
int*)malloc(
sizeof(
int)*commSizeForRootOrNull);
1514 MPI_Comm_rank(comm, &myRank);
1519 rc=MPI_Reduce_scatter(recvbuf,
1530 MPI_Type_size(recvtype,&rTypeSize);
1531 for (i=0;i<commSizeForRootOrNull;++i) {
1532 void*
buf=(
char*)recvbuf+(rTypeSize*tDispls[i]);
1536 if (tRecvCntsFlag) free((
void*)(tRecvCnts));
1537 if (tDisplsFlag) free((
void*)(tDispls));
1543 MPI_Datatype sendtype,
1547 MPI_Datatype recvtype,
1550 rc = MPI_Allgatherv(sendbuf,sendcnt,sendtype,recvbuf,recvcnts,displs,recvtype,comm);
1560 double* mappedbuf=NULL;
1566 else if(is_derived) {
1573 rc=MPI_Bcast(mappedbuf,
1606 MPI_Comm_rank(comm,&rank);
1631 int rc=MPI_Bcast(buf,
1645 void* rbuf,
void* rbufd,
void* rbufb,
1647 MPI_Datatype
datatype, MPI_Datatype datatyped, MPI_Datatype datatypeb,
1652 if (count == 0)
return MPI_SUCCESS;
1654 MPI_Comm_rank(comm,&rank) ;
1656 int reduceTgt = (sbufd!=NULL) ;
1657 int reduceAdj = (sbufb!=NULL) ;
1658 MPI_Comm shadowcomm =
comm ;
1662 if (uopd || uopb || op!=MPI_SUM) {
1666 int is_commutative = (uopdata?uopdata->
commutes[uop_idx]:1) ;
1668 if (reduceTgt) assert(uopd) ;
1669 if (reduceAdj) assert(uopb) ;
1672 void *exch_buf=NULL ;
1675 void *obuf=NULL, *obufd=NULL ;
1683 obuf = (
void*)((
char*)obuf - lb);
1689 obufd = (
void*)((
char*)obufd - lbd);
1692 if (sbuf==MPI_IN_PLACE) {
1696 rbuf = (
void*)((
char*)rbuf - lb);
1701 rbufd = (
void*)((
char*)rbufd - lbd);
1708 rbuf = (
void*)((
char*)rbuf - lb);
1711 rbufd = (
void*)((
char*)rbufd - lbd);
1721 MPI_Comm_size(comm,&comm_size);
1723 int maskup = 0xffffffff ;
1726 if (!reduceAdj || split_mode==0) {
1727 while (mask < comm_size) {
1728 if ((rank&mask) == 0) {
1729 other = (rank==root?root&maskup:rank) | mask ;
1730 if (other >= comm_size)
1732 else if ((other&maskup) == (root&maskup)) {
1739 other = (rank==root?root&maskup:rank) & ~mask ;
1740 if ((other&maskup) == (root&maskup)) other = root ;
1746 maskup = maskup & ~mask ;
1751 rc = MPI_Send(rbuf, count, datatype, other, 11, comm) ;
1752 assert(rc==MPI_SUCCESS);
1754 rc = MPI_Send(rbufd, count, datatyped, other, 11, shadowcomm) ;
1755 assert(rc==MPI_SUCCESS);
1758 }
else if (action==-1) {
1759 rc = MPI_Recv(obuf, count, datatype, other, 11, comm, &status);
1760 assert(rc==MPI_SUCCESS);
1762 rc = MPI_Recv(obufd, count, datatyped, other, 11, shadowcomm, &status);
1763 assert(rc==MPI_SUCCESS);
1765 if (is_commutative || (other<rank)) {
1768 if (split_mode!=2) {
1773 (*uopd)(obuf, obufd, rbuf, rbufd, &
count, &
datatype, &datatyped);
1775 (*(uopdata->
functions[uop_idx]))(obuf, rbuf, &count, &datatype) ;
1779 (count, datatype, comm, obuf, obufd, rbuf, rbufd) ;
1780 }
else if (op==MPI_MIN) {
1782 (count, datatype, comm, obuf, obufd, rbuf, rbufd) ;
1783 }
else if (op==MPI_MAX) {
1785 (count, datatype, comm, obuf, obufd, rbuf, rbufd) ;
1787 printf(__FILE__
": tangent AMPI reduction not yet implemented for std op==%i\n",uop_idx) ;
1793 if (split_mode!=2) {
1797 (*uopd)(rbuf, rbufd, obuf, obufd, &
count, &
datatype, &datatyped);
1799 (*(uopdata->
functions[uop_idx]))(rbuf, obuf, &count, &datatype) ;
1800 exch_buf = obuf ; obuf = rbuf ; rbuf = exch_buf ;
1802 exch_buf = obufd ; obufd = rbufd ; rbufd = exch_buf ;
1804 switched = ~switched ;
1811 exch_buf = obuf ; obuf = rbuf ; rbuf = exch_buf ;
1816 exch_buf = obufd ; obufd = rbufd ; rbufd = exch_buf ;
1824 if (split_mode!=0) {
1837 void *rbufb_initial=NULL ;
1842 obufb = (
void*)((
char*)obufb - lbb);
1845 rbufb_initial = rbufb ;
1847 rbufb = (
void*)((
char*)rbufb - lbb);
1850 if (switched && rank==root) {
1853 exch_buf = obufb ; obufb = rbufb ; rbufb = exch_buf ;
1857 maskup = maskup | mask ;
1858 if ((rank&mask) == 0) {
1859 other = (rank==root?root&maskup:rank) | mask ;
1860 if (other >= comm_size)
1862 else if ((other&maskup) == (root&maskup)) {
1869 other = (rank==root?root&maskup:rank) & ~mask ;
1870 if ((other&maskup) == (root&maskup)) other = root ;
1878 rc = MPI_Recv(obufb, count, datatypeb, other, 11, comm, &status);
1879 assert(rc==MPI_SUCCESS);
1881 }
else if (action==-1) {
1882 if (is_commutative || (other<rank)) {
1884 if (split_mode!=2) {
1890 (*uopb)(obuf, obufb, rbuf, rbufb, &
count, &
datatype, &datatypeb) ;
1894 (count, datatype, comm, obuf, obufb, rbuf, rbufb) ;
1895 }
else if (op==MPI_MIN) {
1897 (count, datatype, comm, obuf, obufb, rbuf, rbufb) ;
1898 }
else if (op==MPI_MAX) {
1900 (count, datatype, comm, obuf, obufb, rbuf, rbufb) ;
1902 printf(__FILE__
": adjoint AMPI reduction not yet implemented for std op==%i\n",uop_idx) ;
1906 exch_buf = obuf ; obuf = rbuf ; rbuf = exch_buf ;
1907 exch_buf = obufb ; obufb = rbufb ; rbufb = exch_buf ;
1909 if (split_mode!=2) {
1913 (*uopb)(rbuf, rbufb, obuf, obufb, &
count, &
datatype, &datatypeb) ;
1915 rc = MPI_Send(obufb, count, datatypeb, other, 11, comm);
1916 assert(rc==MPI_SUCCESS);
1920 if (sbuf==MPI_IN_PLACE) {
1948 assert(rc==MPI_SUCCESS);
1951 rc=MPI_Reduce(sbufd,
1963 tmp_bufb = (
void*)((
char*)tmp_bufb - lbb);
1966 count, datatypeb, comm);
1967 rc=MPI_Bcast(tmp_bufb, count, datatypeb, root, comm) ;
1968 assert(rc==MPI_SUCCESS) ;
1985 MPI_Comm_rank(comm,&rank);
1988 int comm_size, is_commutative;
1989 int mask, relrank, source, lroot;
1995 MPI_User_function* uop = uopd->
functions[uop_idx];
1996 if (count == 0)
return MPI_SUCCESS;
1997 MPI_Comm_size(comm,&comm_size);
2000 is_commutative = uopd->
commutes[uop_idx];
2002 tmp_buf = (
void*)((
char*)tmp_buf - lb);
2005 rbuf = (
void*)((
char*)rbuf - lb);
2007 if ((rank != root) || (sbuf != MPI_IN_PLACE)) {
2015 relrank = (rank - lroot + comm_size) % comm_size;
2016 while (mask < comm_size) {
2017 if ((mask & relrank) == 0) {
2018 source = (relrank | mask);
2019 if (source < comm_size) {
2021 source = (source + lroot) % comm_size;
2024 assert(rc==MPI_SUCCESS);
2025 if (is_commutative) {
2035 source = ((relrank & (~mask)) + lroot) % comm_size;
2038 assert(rc==MPI_SUCCESS);
2043 if (!is_commutative && (root != 0)) {
2044 if (rank == 0) rc =
FW_AMPI_Send(rbuf, count, datatype, root,
2046 else if (rank==root) rc =
FW_AMPI_Recv(rbuf, count, datatype, 0,
2048 assert(rc==MPI_SUCCESS);
2057 double* mappedsbuf=NULL;
2058 double* mappedrbuf=NULL;
2067 rc=MPI_Reduce(mappedsbuf,
2101 datatype, datatype, datatype,
2131 MPI_Comm_rank(comm,&rank);
2132 rc=MPI_Bcast(reduceResultBuf,
2137 if (rc!=MPI_SUCCESS) MPI_Abort(comm, MPI_ERR_ARG);
2142 tempBuf, rbuf, idx);
2145 rc=MPI_Bcast(tempBuf,
2152 tempBuf, reduceResultBuf, &idx);
2154 tempBuf, prevValBuf, &idx);
2156 else if (op==MPI_MAX || op==MPI_MIN) {
2161 equalsResultBuf, prevValBuf, reduceResultBuf, &idx);
2163 MPI_Allreduce(equalsResultBuf,
2164 contributionTotalsBuf,
2170 tempBuf, equalsResultBuf, &idx);
2172 tempBuf, contributionTotalsBuf, &idx);
2178 sbuf, tempBuf, idx);
2187 void* rbuf,
void* rbufb,
2189 MPI_Datatype
datatype, MPI_Datatype datatypeb,
2196 datatype, datatype, datatypeb,
2210 int rc=MPI_Reduce(sbuf,
2224 void* rbuf,
void* rbufd,
2226 MPI_Datatype
datatype, MPI_Datatype datatyped,
2233 datatype, datatyped, datatype,
2247 MPI_Comm_rank(comm,&rank);
2248 double* mappedsbuf=NULL;
2249 double* mappedrbuf=NULL;
2258 rc=MPI_Allreduce(mappedsbuf,
2285 int rc=0,rank, rootPlaceHolder;
2301 MPI_Comm_rank(comm,&rank);
2312 else if (op==MPI_PROD) {
2314 tempBuf, reduceResultBuf, &idx);
2316 tempBuf, prevValBuf, &idx);
2318 else if (op==MPI_MAX || op==MPI_MIN) {
2323 equalsResultBuf, prevValBuf, reduceResultBuf, &idx);
2325 MPI_Allreduce(equalsResultBuf,
2326 contributionTotalsBuf,
2332 tempBuf, equalsResultBuf, &idx);
2334 tempBuf, contributionTotalsBuf, &idx);
2342 sbuf, tempBuf, idx);
2391 for (i=0;i<dat->
size;i++) {
2429 int array_of_blocklengths[],
2430 MPI_Aint array_of_displacements[],
2431 MPI_Datatype array_of_types[],
2434 int array_of_p_blocklengths[],
2435 MPI_Aint array_of_p_displacements[],
2436 MPI_Datatype array_of_p_types[],
2438 MPI_Datatype* newtype,
2439 MPI_Datatype* packed_type) {
2442 int num_actives=0, fst_ablk_set=0;
2443 MPI_Aint fst_active_blk=0, lst_active_blk=0, lst_active_blk_len=0;
2444 for (i=0;i<
count;i++) {
2446 num_actives += array_of_blocklengths[i];
2447 if (!fst_ablk_set) {
2448 fst_active_blk = array_of_displacements[i];
2451 lst_active_blk = array_of_displacements[i];
2452 lst_active_blk_len = array_of_blocklengths[i];
2457 num_actives += dat->
num_actives[dt_idx]*array_of_blocklengths[i];
2458 if (!fst_ablk_set) {
2462 lst_active_blk = array_of_displacements[i] + (array_of_blocklengths[i]-1)*dat->
extents[dt_idx] + dat->
last_active_blocks[dt_idx];
2473 (dat->
preAlloc)*
sizeof(MPI_Datatype));
2478 (dat->
preAlloc)*
sizeof(MPI_Aint*));
2480 (dat->
preAlloc)*
sizeof(MPI_Datatype*));
2484 (dat->
preAlloc)*
sizeof(MPI_Datatype));
2488 (dat->
preAlloc)*
sizeof(MPI_Aint*));
2490 (dat->
preAlloc)*
sizeof(MPI_Datatype*));
2521 for (i=0;i<dtdata->
size;i++) {
2547 MPI_User_function*
function,
2565 for (i=0;i<uopdata->
size;i++) {
2566 if (uopdata->
ops[i]==op)
return i;
2587 win->
map=malloc(
sizeof(
void*));
2589 win->
plainWindow=(MPI_Win**) malloc(
sizeof(MPI_Win*));
2590 *win->
plainWindow= (MPI_Win*) malloc(
sizeof(MPI_Win));
2595 win->
disp=disp_unit;
2634 *ampiWin.
map=malloc(
sizeof(ampiWin.
size));
2637 double *map_=(
double*) *ampiWin.
map;
2645 MPI_Datatype origin_datatype,
2647 MPI_Aint target_disp,
2649 MPI_Datatype target_datatype,
2654 double* mappedbuf=NULL;
2659 mappedbuf=origin_addr;
2661 rc=MPI_Get( mappedbuf,
2690 MPI_Datatype origin_datatype,
2692 MPI_Aint target_disp,
2694 MPI_Datatype target_datatype,
2702 MPI_Datatype origin_datatype,
2704 MPI_Aint target_disp,
2706 MPI_Datatype target_datatype,
2714 MPI_Datatype origin_datatype,
2716 MPI_Aint target_disp,
2718 MPI_Datatype target_datatype,
2735 rc=MPI_Win_fence( assert, tmp);
2740 for(i=num_reqs; i>0 ; i=i-1) {
2746 printf(
"FW num_reqs: %d\n", win.
num_reqs);
2780 double *tmp=(
double *) *win.
map;
2786 printf(
"BW Fence map: %f\n", tmp[0]);
2803 printf(
"BW num_reqs: %d\n", num_reqs);
2804 for(i=num_reqs; i>0 ; i=i-1) {
2808 printf(
"BW Put adj: %f\n", tmp[0]);