moab
|
00001 /* compile-time settings: 00002 00003 FORTRAN naming convention 00004 default cpgs_setup, etc. 00005 -DUPCASE CPGS_SETUP, etc. 00006 -DUNDERSCORE cpgs_setup_, etc. 00007 00008 -DMPI parallel version (sequential otherwise) 00009 -DCRYSTAL_STATIC avoid some message exchange at the risk of 00010 crashing b/c of insufficient buffer size 00011 00012 -DINITIAL_BUFFER_SIZE=expression 00013 ignored unless CRYSTAL_STATIC is defined. 00014 arithmetic expression controlling the initial buffer size for the crystal 00015 router; this needs to be large enough to hold the intermediate messages 00016 during all stages of the crystal router 00017 00018 variables that can be used in expression include 00019 num - the number of processors 00020 n - the length of the global index array 00021 00022 */ 00023 00024 /* default for INITIAL_BUFFER_SIZE */ 00025 #ifdef CRYSTAL_STATIC 00026 # ifndef INITIAL_BUFFER_SIZE 00027 # define INITIAL_BUFFER_SIZE 2*(3*num+n*9) 00028 # endif 00029 #endif 00030 00031 /* FORTRAN usage: 00032 00033 integer hc, np 00034 call crystal_new(hc,comm,np) ! get a crystal router handle (see fcrystal.c) 00035 00036 integer hgs 00037 integer n, max_vec_dim 00038 integer*? global_index_array(1:n) ! type corresponding to slong in "types.h" 00039 00040 call cpgs_setup(hgs,hc,global_index_array,n,max_vec_dim) 00041 sets hgs to new handle 00042 00043 !ok to call crystal_done(hc) here, or any later time 00044 00045 call cpgs_op(hgs, u, op) 00046 integer handle, op : 1-add, 2-multiply, 3-min, 4-max 00047 real u(1:n) - same layout as global_index_array provided to cpgs_setup 00048 00049 call cpgs_op_vec(hgs, u, d, op) 00050 integer op : 1-add, 2-multiply, 3-min, 4-max 00051 integer d <= max_vec_dim 00052 real u(1:d, 1:n) - vector components for each node stored together 00053 00054 call cpgs_op_many(hgs, u1, u2, u3, u4, u5, u6, d, op) 00055 integer op : 1-add, 2-multiply, 3-min, 4-max 00056 integer d : in {1,2,3,4,5,6}, <= max_vec_dim 00057 real u1(1:n), u2(1:n), u3(1:n), etc. 00058 00059 same effect as: call cpgs_op(hgs, u1, op) 00060 if(d.gt.1) call cpgs_op(hgs, u2, op) 00061 if(d.gt.2) call cpgs_op(hgs, u3, op) 00062 etc. 00063 with possibly some savings as fewer messages are exchanged 00064 00065 call cpgs_free(hgs) 00066 */ 00067 00068 #include <stdio.h> 00069 #include <stdlib.h> 00070 #include <stdarg.h> 00071 #include <string.h> 00072 #include <math.h> 00073 #ifdef USE_MPI 00074 # include "moab_mpi.h" 00075 #endif 00076 00077 #include "moab/gs.hpp" 00078 #ifdef USE_MPI 00079 00080 #ifdef VALGRIND 00081 # include <valgrind/memcheck.h> 00082 #elif !defined(VALGRIND_CHECK_MEM_IS_DEFINED) 00083 # define VALGRIND_CHECK_MEM_IS_DEFINED(a,b) 00084 #endif 00085 00086 #endif 00087 00088 namespace moab { 00089 00090 /*-------------------------------------------------------------------------- 00091 Local Execution Phases 00092 --------------------------------------------------------------------------*/ 00093 00094 #define DO_SET(a,b) b=a 00095 #define DO_ADD(a,b) a+=b 00096 #define DO_MUL(a,b) a*=b 00097 #define DO_MIN(a,b) if(b<a) a=b 00098 #define DO_MAX(a,b) if(b>a) a=b 00099 #define DO_BPR(a,b) \ 00100 do \ 00101 { uint a_ = a; \ 00102 uint b_ = b; \ 00103 for(;;) { \ 00104 if(a_<b_) \ 00105 b_>>=1; \ 00106 else if(b_<a_) \ 00107 a_>>=1; \ 00108 else break; } \ 00109 a = a_; \ 00110 } while(0) 00111 00112 #define LOOP(op) do { \ 00113 sint i,j; \ 00114 while((i=*cm++) != -1) \ 00115 while((j=*cm++) != -1) \ 00116 op(u[i],u[j]); \ 00117 } while(0) 00118 00119 static void local_condense(realType *u, int op, const sint *cm) 00120 { 00121 switch (op) 00122 { 00123 case GS_OP_ADD: 00124 LOOP(DO_ADD); 00125 break; 00126 case GS_OP_MUL: 00127 LOOP(DO_MUL); 00128 break; 00129 case GS_OP_MIN: 00130 LOOP(DO_MIN); 00131 break; 00132 case GS_OP_MAX: 00133 LOOP(DO_MAX); 00134 break; 00135 case GS_OP_BPR: 00136 LOOP(DO_BPR); 00137 break; 00138 } 00139 } 00140 00141 static void local_uncondense(realType *u, const sint *cm) 00142 { 00143 LOOP(DO_SET); 00144 } 00145 00146 #undef LOOP 00147 00148 #define LOOP(op) do { \ 00149 sint i,j,k; \ 00150 while((i=*cm++) != -1) { \ 00151 realType *pi=u+n*i; \ 00152 while((j=*cm++) != -1) { \ 00153 realType *pj=u+n*j; \ 00154 for(k=n;k;--k) { \ 00155 op(*pi,*pj); \ 00156 ++pi; \ 00157 ++pj; \ 00158 } \ 00159 } \ 00160 } \ 00161 } while(0) 00162 00163 static void local_condense_vec(realType *u, uint n, int op, const sint *cm) 00164 { 00165 switch (op) 00166 { 00167 case GS_OP_ADD: 00168 LOOP(DO_ADD); 00169 break; 00170 case GS_OP_MUL: 00171 LOOP(DO_MUL); 00172 break; 00173 case GS_OP_MIN: 00174 LOOP(DO_MIN); 00175 break; 00176 case GS_OP_MAX: 00177 LOOP(DO_MAX); 00178 break; 00179 case GS_OP_BPR: 00180 LOOP(DO_BPR); 00181 break; 00182 } 00183 } 00184 00185 static void local_uncondense_vec(realType *u, uint n, const sint *cm) 00186 { 00187 LOOP(DO_SET); 00188 } 00189 00190 #undef LOOP 00191 00192 /*-------------------------------------------------------------------------- 00193 Non-local Execution Phases 00194 --------------------------------------------------------------------------*/ 00195 00196 #ifdef USE_MPI 00197 00198 void gs_data::nonlocal_info::initialize(uint np, uint count, 00199 uint nlabels, uint nulabels, uint maxv) 00200 { 00201 _target = NULL; 00202 _nshared = NULL; 00203 _sh_ind = NULL; 00204 _slabels = NULL; 00205 _ulabels = NULL; 00206 _reqs = NULL; 00207 _buf = NULL; 00208 _np = np; 00209 _target = (uint*) malloc((2*np+count)*sizeof(uint)); 00210 _nshared = _target + np; 00211 _sh_ind = _nshared + np; 00212 if (1 < nlabels) 00213 _slabels = (slong*) malloc(((nlabels-1)*count)*sizeof(slong)); 00214 else 00215 _slabels = NULL; 00216 _ulabels = (ulong*) malloc((nulabels*count)*sizeof(ulong)); 00217 _reqs = (MPI_Request*) malloc(2*np*sizeof(MPI_Request)); 00218 _buf = (realType*) malloc((2*count*maxv)*sizeof(realType)); 00219 _maxv = maxv; 00220 } 00221 00222 void gs_data::nonlocal_info::nlinfo_free() 00223 { 00224 //Free the ptrs 00225 free(_buf); 00226 free(_reqs); 00227 free(_target); 00228 free(_slabels); 00229 free(_ulabels); 00230 //Set to null 00231 _ulabels=NULL; 00232 _buf=NULL; 00233 _reqs=NULL; 00234 _target=NULL; 00235 _slabels=NULL; 00236 _nshared = NULL; 00237 _sh_ind = NULL; 00238 } 00239 00240 void gs_data::nonlocal_info::nonlocal(realType *u, int op, MPI_Comm comm) 00241 { 00242 MPI_Status status; 00243 uint np = this->_np; 00244 MPI_Request *reqs = this->_reqs; 00245 uint *targ = this->_target; 00246 uint *nshared = this->_nshared; 00247 uint *sh_ind = this->_sh_ind; 00248 uint id; 00249 realType *buf = this->_buf, *start; 00250 unsigned int i; 00251 { MPI_Comm_rank(comm,(int *)&i); id=i;} 00252 for (i=0; i<np; ++i) 00253 { 00254 uint c = nshared[i]; 00255 start = buf; 00256 for (;c;--c) 00257 *buf++ = u[*sh_ind++]; 00258 MPI_Isend(start,nshared[i]*sizeof(realType), 00259 MPI_UNSIGNED_CHAR, targ[i],id,comm,reqs++); 00260 } 00261 start = buf; 00262 for(i=0; i<np; ++i) 00263 { 00264 MPI_Irecv(start,nshared[i]*sizeof(realType),MPI_UNSIGNED_CHAR, 00265 targ[i],targ[i],comm,reqs++); 00266 start+=nshared[i]; 00267 } 00268 for (reqs=this->_reqs,i=np*2;i;--i) 00269 MPI_Wait(reqs++,&status); 00270 sh_ind = this->_sh_ind; 00271 # define LOOP(OP) \ 00272 do { \ 00273 for(i=0;i<np;++i) { \ 00274 uint c; \ 00275 for(c=nshared[i];c;--c) \ 00276 { \ 00277 OP(u[*sh_ind],*buf); \ 00278 ++sh_ind, ++buf; \ 00279 } \ 00280 } \ 00281 } while(0) 00282 switch(op) 00283 { 00284 case GS_OP_ADD: LOOP(DO_ADD); break; 00285 case GS_OP_MUL: LOOP(DO_MUL); break; 00286 case GS_OP_MIN: LOOP(DO_MIN); break; 00287 case GS_OP_MAX: LOOP(DO_MAX); break; 00288 case GS_OP_BPR: LOOP(DO_BPR); break; 00289 } 00290 # undef LOOP 00291 } 00292 00293 void gs_data::nonlocal_info::nonlocal_vec(realType *u, uint n, 00294 int op, MPI_Comm comm) 00295 { 00296 MPI_Status status; 00297 uint np = this->_np; 00298 MPI_Request *reqs = this->_reqs; 00299 uint *targ = this->_target; 00300 uint *nshared = this->_nshared; 00301 uint *sh_ind = this->_sh_ind; 00302 uint id; 00303 realType *buf = this->_buf, *start; 00304 uint size = n*sizeof(realType); 00305 unsigned int i; 00306 { 00307 MPI_Comm_rank(comm,(int *)&i); 00308 id=i; 00309 } 00310 for (i=0; i<np; ++i) 00311 { 00312 uint ns=nshared[i], c=ns; 00313 start = buf; 00314 for (;c;--c) { 00315 memcpy(buf,u+n*(*sh_ind++),size); 00316 buf+=n; 00317 } 00318 MPI_Isend(start,ns*size,MPI_UNSIGNED_CHAR,targ[i],id,comm,reqs++); 00319 } 00320 start = buf; 00321 for (i=0; i<np; ++i) 00322 { 00323 int nsn=n*nshared[i]; 00324 MPI_Irecv(start,nsn*size,MPI_UNSIGNED_CHAR,targ[i],targ[i],comm,reqs++); 00325 start+=nsn; 00326 } 00327 for (reqs=this->_reqs,i=np*2;i;--i) 00328 MPI_Wait(reqs++,&status); 00329 sh_ind = this->_sh_ind; 00330 # define LOOP(OP) \ 00331 do { \ 00332 for(i=0;i<np;++i) { \ 00333 uint c,j; \ 00334 for(c=nshared[i];c;--c) { \ 00335 realType *uu=u+n*(*sh_ind++); \ 00336 for(j=n;j;--j) { \ 00337 OP(*uu,*buf); \ 00338 ++uu; \ 00339 ++buf; \ 00340 } \ 00341 } \ 00342 } \ 00343 } while(0) 00344 switch(op) 00345 { 00346 case GS_OP_ADD: LOOP(DO_ADD); break; 00347 case GS_OP_MUL: LOOP(DO_MUL); break; 00348 case GS_OP_MIN: LOOP(DO_MIN); break; 00349 case GS_OP_MAX: LOOP(DO_MAX); break; 00350 case GS_OP_BPR: LOOP(DO_BPR); break; 00351 } 00352 # undef LOOP 00353 } 00354 00355 void gs_data::nonlocal_info::nonlocal_many(realType **u, uint n, int op, 00356 MPI_Comm comm) 00357 { 00358 MPI_Status status; 00359 uint np = this->_np; 00360 MPI_Request *reqs = this->_reqs; 00361 uint *targ = this->_target; 00362 uint *nshared = this->_nshared; 00363 uint *sh_ind = this->_sh_ind; 00364 uint id; 00365 realType *buf = this->_buf, *start; 00366 unsigned int i; 00367 { 00368 MPI_Comm_rank(comm,(int *)&i); 00369 id=i; 00370 } 00371 for (i=0; i<np; ++i) 00372 { 00373 uint c, j, ns = nshared[i]; 00374 start = buf; 00375 for (j=0; j<n; ++j) 00376 { 00377 realType*uu=u[j]; 00378 for(c=0;c<ns;++c) 00379 *buf++=uu[sh_ind[c]]; 00380 } 00381 sh_ind+=ns; 00382 MPI_Isend(start,n*ns*sizeof(realType),MPI_UNSIGNED_CHAR,targ[i],id,comm,reqs++); 00383 } 00384 start = buf; 00385 for (i=0; i<np; ++i) 00386 { 00387 int nsn = n*nshared[i]; 00388 MPI_Irecv(start,nsn*sizeof(realType),MPI_UNSIGNED_CHAR, 00389 targ[i],targ[i],comm,reqs++); 00390 start+=nsn; 00391 } 00392 for (reqs=this->_reqs,i=np*2;i;--i) 00393 MPI_Wait(reqs++,&status); 00394 sh_ind = this->_sh_ind; 00395 # define LOOP(OP) \ 00396 do { \ 00397 for(i=0;i<np;++i) { \ 00398 uint c,j,ns=nshared[i]; \ 00399 for(j=0;j<n;++j) { \ 00400 realType *uu=u[j]; \ 00401 for(c=0;c<ns;++c) { \ 00402 OP(uu[sh_ind[c]],*buf); \ 00403 ++buf; \ 00404 } \ 00405 } \ 00406 sh_ind+=ns; \ 00407 } \ 00408 } while(0) 00409 switch(op) 00410 { 00411 case GS_OP_ADD: LOOP(DO_ADD); break; 00412 case GS_OP_MUL: LOOP(DO_MUL); break; 00413 case GS_OP_MIN: LOOP(DO_MIN); break; 00414 case GS_OP_MAX: LOOP(DO_MAX); break; 00415 case GS_OP_BPR: LOOP(DO_BPR); break; 00416 } 00417 # undef LOOP 00418 } 00419 00420 /*--------------------------------------------------------------------------- 00421 MOAB Crystal Router 00422 ---------------------------------------------------------------------------*/ 00423 gs_data::crystal_data::crystal_data() 00424 { 00425 } 00426 00427 void gs_data::crystal_data::initialize(MPI_Comm comm) 00428 { 00429 int num,id; 00430 buffers[0].buf.buffer_init(1024); 00431 buffers[1].buf.buffer_init(1024); 00432 buffers[2].buf.buffer_init(1024); 00433 all=&buffers[0]; 00434 keep=&buffers[1]; 00435 send=&buffers[2]; 00436 memcpy(&(this->_comm),&comm,sizeof(MPI_Comm)); 00437 MPI_Comm_rank(comm,&id ); 00438 this->_id =id; 00439 MPI_Comm_size(comm,&num); 00440 this->_num=num; 00441 } 00442 00443 void gs_data::crystal_data::reset() 00444 { 00445 buffers[0].buf.reset(); 00446 buffers[1].buf.reset(); 00447 buffers[2].buf.reset(); 00448 keep = NULL; 00449 all = NULL; 00450 send = NULL; 00451 } 00452 00453 //Partition data before sending 00454 void gs_data::crystal_data::partition(uint cutoff, 00455 crystal_buf *lo, crystal_buf *hi) 00456 { 00457 const uint *src = (uint*) all->buf.ptr; 00458 const uint *end = (uint*) src+all->n; 00459 uint *target, *lop, *hip; 00460 lo->n=hi->n=0; 00461 lo->buf.buffer_reserve(all->n*sizeof(uint)); 00462 hi->buf.buffer_reserve(all->n*sizeof(uint)); 00463 lop = (uint*) lo->buf.ptr; 00464 hip = (uint*) hi->buf.ptr; 00465 while(src!=end) 00466 { 00467 uint chunk_len = 3 + src[2]; 00468 if(src[0]<cutoff) { 00469 target=lop; 00470 lo->n+=chunk_len; 00471 lop+=chunk_len; 00472 } 00473 else { 00474 target=hip; 00475 hi->n+=chunk_len; 00476 hip+=chunk_len; 00477 } 00478 memcpy(target,src,chunk_len*sizeof(uint)); 00479 src+=chunk_len; 00480 } 00481 } 00482 00483 //Send message to target process 00484 void gs_data::crystal_data::send_(uint target, int recvn) 00485 { 00486 MPI_Request req[3] = 00487 { MPI_REQUEST_NULL, MPI_REQUEST_NULL, MPI_REQUEST_NULL}; 00488 MPI_Status status[3]; 00489 uint count[2]={ 0,0}, sum, *recv[2]; 00490 crystal_buf *t; 00491 int i; 00492 00493 VALGRIND_CHECK_MEM_IS_DEFINED( &send->n, sizeof(uint) ); 00494 MPI_Isend(&send->n,sizeof(uint),MPI_UNSIGNED_CHAR, 00495 target ,_id ,_comm,&req[ 0]); 00496 for (i=0; i<recvn; ++i) 00497 MPI_Irecv(&count[i] ,sizeof(uint),MPI_UNSIGNED_CHAR, 00498 target+i,target+i,_comm,&req[i+1]); 00499 MPI_Waitall(recvn+1,req,status); 00500 sum = keep->n; 00501 for (i=0; i<recvn; ++i) 00502 sum+=count[i]; 00503 keep->buf.buffer_reserve(sum*sizeof(uint)); 00504 recv[0]= (uint*) keep->buf.ptr; 00505 recv[0]+=keep->n; 00506 recv[1]=recv[0]+count[0]; 00507 keep->n=sum; 00508 00509 VALGRIND_CHECK_MEM_IS_DEFINED( send->buf.ptr,send->n*sizeof(uint) ); 00510 MPI_Isend(send->buf.ptr,send->n*sizeof(uint), 00511 MPI_UNSIGNED_CHAR,target,_id,_comm,&req[0]); 00512 if (recvn) 00513 { 00514 MPI_Irecv(recv[0],count[0]*sizeof(uint),MPI_UNSIGNED_CHAR, 00515 target,target,_comm,&req[1]); 00516 if (recvn==2) 00517 MPI_Irecv(recv[1],count[1]*sizeof(uint),MPI_UNSIGNED_CHAR, 00518 target+1,target+1,_comm,&req[2]); 00519 } 00520 MPI_Waitall(recvn+1,req,status); 00521 00522 t=all; 00523 all=keep; 00524 keep=t; 00525 } 00526 00527 void gs_data::crystal_data::crystal_router() 00528 { 00529 uint bl=0, bh, n=_num, nl, target; 00530 int recvn; 00531 crystal_buf *lo, *hi; 00532 while (n>1) 00533 { 00534 nl = n/2, bh = bl+nl; 00535 if (_id < bh) 00536 { 00537 target=_id+nl; 00538 recvn=(n&1 && _id==bh-1)?2:1; 00539 lo=keep; 00540 hi=send; 00541 } 00542 else { 00543 target=_id-nl; 00544 recvn=(target==bh)?(--target,0):1; 00545 hi=keep; 00546 lo=send; 00547 } 00548 partition(bh,lo,hi); 00549 send_(target,recvn); 00550 if(_id<bh) 00551 n=nl; 00552 else { 00553 n-=nl; 00554 bl=bh; 00555 } 00556 } 00557 } 00558 00559 #define UINT_PER_X(X) ((sizeof(X)+sizeof(uint)-1)/sizeof(uint)) 00560 #define UINT_PER_REAL UINT_PER_X(realType) 00561 #define UINT_PER_LONG UINT_PER_X(slong) 00562 00563 /*------------------------------------------------------------------------- 00564 Transfer 00565 -------------------------------------------------------------------------*/ 00566 ErrorCode gs_data::crystal_data::gs_transfer(int dynamic, 00567 moab::TupleList &tl, unsigned pf) 00568 { 00569 00570 unsigned mi,ml,mul,mr; 00571 tl.getTupleSize(mi, ml, mul, mr); 00572 00573 const unsigned tsize = (mi-1) + ml*UINT_PER_LONG + mul*UINT_PER_LONG + 00574 mr*UINT_PER_REAL; 00575 sint p, lp = -1; 00576 sint *ri; 00577 slong *rl; 00578 ulong *rul; 00579 realType *rr; 00580 uint i, j, *buf, *len=0, *buf_end; 00581 00582 /* sort to group by target proc */ 00583 if (pf >= mi) 00584 return moab::MB_MEMORY_ALLOCATION_FAILED; 00585 //fail("pf expected to be in vi array (%d, %d)", pf, mi); 00586 tl.sort(pf,&all->buf); 00587 00588 /* pack into buffer for crystal router */ 00589 all->buf.buffer_reserve((tl.get_n()*(3+tsize))*sizeof(uint)); 00590 all->n=0; 00591 buf = (uint*) all->buf.ptr; 00592 00593 bool canWrite = tl.get_writeEnabled(); 00594 if(!canWrite) tl.enableWriteAccess(); 00595 00596 ri=tl.vi_wr; 00597 rl=tl.vl_wr; 00598 rul=tl.vul_wr; 00599 rr=tl.vr_wr; 00600 00601 for (i=tl.get_n();i;--i) 00602 { 00603 p = ri[pf]; 00604 if (p!=lp) 00605 { 00606 lp = p; 00607 *buf++ = p; /* target */ 00608 *buf++ = _id; /* source */ 00609 len = buf++; 00610 *len=0; /* length */ 00611 all->n += 3; 00612 } 00613 for (j=0;j<mi;++j,++ri) 00614 if(j!=pf) 00615 *buf++ = *ri; 00616 for (j=ml;j;--j,++rl) 00617 { 00618 memcpy(buf,rl,sizeof(slong)); 00619 buf+=UINT_PER_LONG; 00620 } 00621 for (j=mul;j;--j,++rul) 00622 { 00623 memcpy(buf,rul,sizeof(ulong)); 00624 buf+=UINT_PER_LONG; 00625 } 00626 for (j=mr;j;--j,++rr) 00627 { 00628 memcpy(buf,rr,sizeof(realType )); 00629 buf+=UINT_PER_REAL; 00630 } 00631 *len += tsize, all->n += tsize; 00632 } 00633 00634 crystal_router(); 00635 00636 /* unpack */ 00637 buf = (uint*)all->buf.ptr; 00638 buf_end = buf + all->n; 00639 tl.set_n(0); 00640 ri=tl.vi_wr; 00641 rl=tl.vl_wr; 00642 rul=tl.vul_wr; 00643 rr=tl.vr_wr; 00644 00645 while (buf != buf_end) 00646 { 00647 sint llen; 00648 buf++; /* target ( == this proc ) */ 00649 p = *buf++; /* source */ 00650 llen = *buf++; /* length */ 00651 while (llen>0) 00652 { 00653 if (tl.get_n()==tl.get_max()) 00654 { 00655 if (!dynamic) 00656 { 00657 tl.set_n(tl.get_max() + 1); 00658 if(!canWrite) 00659 tl.disableWriteAccess(); 00660 return moab::MB_SUCCESS; 00661 } 00662 ErrorCode rval = tl.resize(tl.get_max() + (1+tl.get_max())/2 + 1); 00663 if(rval != moab::MB_SUCCESS) 00664 { 00665 if(!canWrite) 00666 tl.disableWriteAccess(); 00667 return rval; 00668 } 00669 ri = tl.vi_wr + mi*tl.get_n(); 00670 rl = tl.vl_wr + ml*tl.get_n(); 00671 rul = tl.vul_wr + mul*tl.get_n(); 00672 rr = tl.vr_wr + mr*tl.get_n(); 00673 } 00674 tl.inc_n(); 00675 for (j=0;j<mi;++j) 00676 if(j!=pf) 00677 *ri++ = *buf++; 00678 else 00679 *ri++ = p; 00680 for (j=ml;j;--j) { 00681 memcpy(rl++,buf,sizeof(slong)); 00682 buf+=UINT_PER_LONG; 00683 } 00684 for (j=mul;j;--j) { 00685 memcpy(rul++,buf,sizeof(ulong)); 00686 buf+=UINT_PER_LONG; 00687 } 00688 for (j=mr;j;--j) { 00689 memcpy(rr++,buf,sizeof(realType )); 00690 buf+=UINT_PER_REAL; 00691 } 00692 llen-=tsize; 00693 } 00694 } 00695 00696 if(!canWrite) tl.disableWriteAccess(); 00697 return moab::MB_SUCCESS; 00698 } 00699 #endif 00700 00701 /*-------------------------------------------------------------------------- 00702 Combined Execution 00703 --------------------------------------------------------------------------*/ 00704 00705 void gs_data::gs_data_op(realType *u, int op) 00706 { 00707 local_condense(u, op, this->local_cm); 00708 #ifdef USE_MPI 00709 this->nlinfo->nonlocal(u,op,_comm); 00710 #endif 00711 local_uncondense(u, local_cm); 00712 } 00713 00714 void gs_data::gs_data_op_vec(realType *u, uint n, int op) 00715 { 00716 #ifdef USE_MPI 00717 if (n>nlinfo->_maxv) 00718 moab::fail("%s: initialized with max vec size = %d," 00719 " but called with vec size = %d\n",__FILE__,nlinfo->_maxv,n); 00720 #endif 00721 local_condense_vec(u, n, op, local_cm); 00722 #ifdef USE_MPI 00723 this->nlinfo->nonlocal_vec(u,n,op,_comm); 00724 #endif 00725 local_uncondense_vec(u, n, local_cm); 00726 } 00727 00728 void gs_data::gs_data_op_many(realType **u, uint n, int op) 00729 { 00730 uint i; 00731 #ifdef USE_MPI 00732 if (n>nlinfo->_maxv) 00733 moab::fail("%s: initialized with max vec size = %d," 00734 " but called with vec size = %d\n",__FILE__,nlinfo->_maxv,n); 00735 #endif 00736 for (i = 0; i < n; ++i) 00737 local_condense(u[i], op, local_cm); 00738 00739 moab::fail("%s: initialized with max vec size = %d," 00740 " but called with vec size = %d\n", __FILE__, 6, n); 00741 00742 #ifdef USE_MPI 00743 this->nlinfo->nonlocal_many(u,n,op,_comm); 00744 #endif 00745 for (i = 0; i < n; ++i) 00746 local_uncondense(u[i], local_cm); 00747 } 00748 00749 /*-------------------------------------------------------------------------- 00750 Setup 00751 --------------------------------------------------------------------------*/ 00752 00753 ErrorCode gs_data::initialize(uint n, const long *label, const ulong *ulabel, 00754 uint maxv, const unsigned int nlabels, const unsigned int nulabels, 00755 crystal_data *crystal) 00756 { 00757 nlinfo = NULL; 00758 unsigned int j; 00759 TupleList nonzero, primary; 00760 ErrorCode rval; 00761 #ifdef USE_MPI 00762 TupleList shared; 00763 #else 00764 moab::TupleList::buffer buf; 00765 #endif 00766 VALGRIND_CHECK_MEM_IS_DEFINED(label, nlabels * sizeof(long)); 00767 VALGRIND_CHECK_MEM_IS_DEFINED(ulabel, nlabels * sizeof(ulong)); 00768 #ifdef USE_MPI 00769 MPI_Comm_dup(crystal->_comm,&this->_comm); 00770 #else 00771 buf.buffer_init(1024); 00772 #endif 00773 00774 /* construct list of nonzeros: (index ^, label) */ 00775 nonzero.initialize(1, nlabels, nulabels, 0, n); 00776 nonzero.enableWriteAccess(); 00777 { 00778 uint i; 00779 sint *nzi; 00780 long *nzl; 00781 unsigned long *nzul; 00782 nzi = nonzero.vi_wr; 00783 nzl = nonzero.vl_wr; 00784 nzul = nonzero.vul_wr; 00785 for (i = 0; i < n; ++i) 00786 if (label[nlabels * i] != 0) 00787 { 00788 nzi[0] = i; 00789 for (j = 0; j < nlabels; j++) 00790 nzl[j] = label[nlabels * i + j]; 00791 for (j = 0; j < nulabels; j++) 00792 nzul[j] = ulabel[nulabels * i + j]; 00793 nzi++; 00794 nzl += nlabels; 00795 nzul += nulabels; 00796 nonzero.inc_n(); 00797 } 00798 } 00799 00800 /* sort nonzeros by label: (index ^2, label ^1) */ 00801 #ifndef USE_MPI 00802 nonzero.sort(1, &buf); 00803 #else 00804 nonzero.sort(1,&crystal->all->buf); 00805 #endif 00806 00807 /* build list of unique labels w/ lowest associated index: 00808 (index in nonzero ^, primary (lowest) index in label, count, label(s), 00809 ulabel(s)) */ 00810 primary.initialize(3, nlabels, nulabels, 0, nonzero.get_n()); 00811 primary.enableWriteAccess(); 00812 { 00813 uint i; 00814 sint *nzi = nonzero.vi_wr, *pi = primary.vi_wr; 00815 slong *nzl = nonzero.vl_wr, *pl = primary.vl_wr; 00816 ulong *nzul = nonzero.vul_wr, *pul = primary.vul_wr; 00817 slong last = -1; 00818 for (i = 0; i < nonzero.get_n(); 00819 ++i, nzi += 1, nzl += nlabels, nzul += nulabels) 00820 { 00821 if (nzl[0] == last) 00822 { 00823 ++pi[-1]; 00824 continue; 00825 } 00826 last = nzl[0]; 00827 pi[0] = i; 00828 pi[1] = nzi[0]; 00829 for (j = 0; j < nlabels; j++) 00830 pl[j] = nzl[j]; 00831 for (j = 0; j < nulabels; j++) 00832 pul[j] = nzul[j]; 00833 pi[2] = 1; 00834 pi += 3, pl += nlabels; 00835 pul += nulabels; 00836 primary.inc_n(); 00837 } 00838 } 00839 00840 /* calculate size of local condense map */ 00841 { 00842 uint i, count = 1; 00843 sint *pi = primary.vi_wr; 00844 for (i = primary.get_n(); i; --i, pi += 3) 00845 if (pi[2] > 1) 00846 count += pi[2] + 1; 00847 this->local_cm = (sint*) malloc(count * sizeof(sint)); 00848 } 00849 00850 /* sort unique labels by primary index: 00851 (nonzero index ^2, primary index ^1, count, label ^2) */ 00852 #ifndef USE_MPI 00853 primary.sort(0, &buf); 00854 buf.reset(); 00855 //buffer_free(&buf); 00856 #else 00857 primary.sort(0,&crystal->all->buf); 00858 #endif 00859 00860 /* construct local condense map */ 00861 { 00862 uint i, ln; 00863 sint *pi = primary.vi_wr; 00864 sint *cm = this->local_cm; 00865 for (i = primary.get_n(); i > 0; --i, pi += 3) 00866 if ((ln = pi[2]) > 1) 00867 { 00868 sint *nzi = nonzero.vi_wr + 1 * pi[0]; 00869 for (j = ln; j > 0; --j, nzi += 1) 00870 *cm++ = nzi[0]; 00871 *cm++ = -1; 00872 } 00873 *cm++ = -1; 00874 } 00875 nonzero.reset(); 00876 #ifndef USE_MPI 00877 primary.reset(); 00878 #else 00879 /* assign work proc by label modulo np */ 00880 { 00881 uint i; 00882 sint *pi=primary.vi_wr; 00883 slong *pl=primary.vl_wr; 00884 for (i=primary.get_n(); i; --i, pi+=3, pl+=nlabels) 00885 pi[0]=pl[0]%crystal->_num; 00886 } 00887 rval = crystal->gs_transfer(1,primary,0); /* transfer to work procs */ 00888 if (rval != MB_SUCCESS) 00889 return rval; 00890 /* primary: (source proc, index on src, useless, label) */ 00891 /* sort by label */ 00892 primary.sort(3,&crystal->all->buf); 00893 /* add sentinel to primary list */ 00894 if (primary.get_n()==primary.get_max()) 00895 primary.resize( (primary.get_max() ? primary.get_max()+(primary.get_max()+1)/2+1 : 2)); 00896 primary.vl_wr[nlabels*primary.get_n()] = -1; 00897 /* construct shared list: (proc1, proc2, index1, label) */ 00898 #ifdef USE_MPI 00899 shared.initialize(3,nlabels,nulabels,0,primary.get_n()); 00900 shared.enableWriteAccess(); 00901 #endif 00902 { 00903 sint *pi1=primary.vi_wr, *si=shared.vi_wr; 00904 slong lbl, *pl1=primary.vl_wr, *sl=shared.vl_wr; 00905 ulong *pul1=primary.vul_wr, *sul=shared.vul_wr; 00906 for ( ;(lbl=pl1[0])!=-1; pi1+=3, pl1+=nlabels, pul1+=nulabels) 00907 { 00908 sint *pi2=pi1+3; 00909 slong *pl2=pl1+nlabels; 00910 ulong *pul2=pul1+nulabels; 00911 for ( ;pl2[0]==lbl; pi2+=3, pl2+=nlabels, pul2+=nulabels) 00912 { 00913 if (shared.get_n()+2 > shared.get_max()) 00914 shared.resize((shared.get_max() ? shared.get_max()+(shared.get_max()+1)/2+1 : 2)), 00915 si=shared.vi_wr+shared.get_n()*3; 00916 sl=shared.vl_wr+shared.get_n()*nlabels; 00917 sul=shared.vul_wr+shared.get_n()*nulabels; 00918 si[0] = pi1[0]; 00919 si[1] = pi2[0]; 00920 si[2] = pi1[1]; 00921 for (j = 0; j < nlabels; j++) 00922 sl[j] = pl2[j]; 00923 for (j = 0; j < nulabels; j++) 00924 sul[j] = pul2[j]; 00925 si+=3; 00926 sl+=nlabels; 00927 sul+=nulabels; 00928 shared.inc_n(); 00929 si[0] = pi2[0]; 00930 si[1] = pi1[0]; 00931 si[2] = pi2[1]; 00932 for (j = 0; j < nlabels; j++) 00933 sl[j] = pl1[j]; 00934 for (j = 0; j < nulabels; j++) 00935 sul[j] = pul1[j]; 00936 si+=3; 00937 sl+=nlabels; 00938 sul+=nulabels; 00939 shared.inc_n(); 00940 } 00941 } 00942 } 00943 primary.reset(); 00944 rval = crystal->gs_transfer(1,shared,0); /* segfaulting transfer to dest procs */ 00945 if (rval != MB_SUCCESS) 00946 return rval; 00947 /* shared list: (useless, proc2, index, label) */ 00948 /* sort by label */ 00949 shared.sort(3,&crystal->all->buf); 00950 /* sort by partner proc */ 00951 shared.sort(1,&crystal->all->buf); 00952 /* count partner procs */ 00953 { 00954 uint i, count=0; 00955 sint proc=-1,*si=shared.vi_wr; 00956 for (i=shared.get_n(); i; --i,si+=3) 00957 if (si[1]!=proc) { 00958 ++count; 00959 proc=si[1]; 00960 } 00961 //this->nlinfo = new nonlocal_info(); 00962 //this->nlinfo->initialize(count,shared.get_n(), 00963 // nlabels, nulabels, maxv); 00964 this->nlinfo = new nonlocal_info(count,shared.get_n(), 00965 nlabels, nulabels, maxv); 00966 } 00967 /* construct non-local info */ 00968 { 00969 uint i; 00970 sint proc=-1,*si=shared.vi_wr; 00971 slong *sl = shared.vl_wr; 00972 ulong *ul = shared.vul_wr; 00973 uint *target = this->nlinfo->_target; 00974 uint *nshared = this->nlinfo->_nshared; 00975 uint *sh_ind = this->nlinfo->_sh_ind; 00976 slong *slabels = this->nlinfo->_slabels; 00977 ulong *ulabels = this->nlinfo->_ulabels; 00978 for (i=shared.get_n(); i; --i,si+=3) 00979 { 00980 if (si[1]!=proc) 00981 { 00982 proc=si[1]; 00983 *target++ = proc; 00984 *nshared++ = 0; 00985 } 00986 ++nshared[-1]; 00987 *sh_ind++=si[2]; 00988 // don't store 1st slabel 00989 sl++; 00990 for (j = 0; j < nlabels-1; j++) 00991 slabels[j] = sl[j]; 00992 for (j = 0; j < nulabels; j++) 00993 ulabels[j] = ul[j]; 00994 slabels+=nlabels-1; 00995 ulabels+=nulabels; 00996 sl+=nlabels-1; 00997 ul+=nulabels; 00998 } 00999 } 01000 shared.reset(); 01001 #endif 01002 return MB_SUCCESS; 01003 } 01004 01005 void gs_data::reset() 01006 { 01007 free(local_cm); 01008 local_cm = NULL; 01009 #ifdef USE_MPI 01010 if(nlinfo != NULL) 01011 { 01012 nlinfo->nlinfo_free(); 01013 delete this->nlinfo; 01014 MPI_Comm_free(&_comm); 01015 nlinfo = NULL; 01016 } 01017 #endif 01018 } 01019 01020 } //namespace 01021