moab
|
00001 #include "moab/ParallelMergeMesh.hpp" 00002 #include "moab/Core.hpp" 00003 #include "moab/CartVect.hpp" 00004 #include "moab/BoundBox.hpp" 00005 #include "moab/Skinner.hpp" 00006 #include "moab/MergeMesh.hpp" 00007 #include "moab/CN.hpp" 00008 #include "float.h" 00009 #include <algorithm> 00010 00011 #ifdef USE_MPI 00012 #include "moab_mpi.h" 00013 #endif 00014 00015 namespace moab{ 00016 00017 //Constructor 00018 /*Get Merge Data and tolerance*/ 00019 ParallelMergeMesh::ParallelMergeMesh(ParallelComm *pc, 00020 const double epsilon) : 00021 myPcomm(pc), myEps(epsilon) 00022 { 00023 myMB = pc->get_moab(); 00024 mySkinEnts.resize(4); 00025 } 00026 00027 00028 //Have a wrapper function on the actual merge to avoid memory leaks 00029 //Merges elements within a proximity of epsilon 00030 ErrorCode ParallelMergeMesh::merge() 00031 { 00032 ErrorCode rval = PerformMerge(); 00033 CleanUp(); 00034 return rval; 00035 } 00036 00037 //Perform the merge 00038 ErrorCode ParallelMergeMesh::PerformMerge() 00039 { 00040 //Get the mesh dimension 00041 int dim; 00042 ErrorCode rval = myMB->get_dimension(dim); 00043 if(rval != MB_SUCCESS){ 00044 return rval; 00045 } 00046 00047 //Get the local skin elements 00048 rval = PopulateMySkinEnts(0,dim); 00049 //If there is only 1 proc, we can return now 00050 if(rval != MB_SUCCESS || myPcomm->size() == 1){ 00051 return rval; 00052 } 00053 00054 //Determine the global bounding box 00055 double gbox[6]; 00056 rval = GetGlobalBox(gbox); 00057 if(rval != MB_SUCCESS){ 00058 return rval; 00059 } 00060 00061 /* Assemble The Destination Tuples */ 00062 //Get a list of tuples which contain (toProc, handle, x,y,z) 00063 myTup.initialize(1,0,1,3,mySkinEnts[0].size()); 00064 rval = PopulateMyTup(gbox); 00065 if(rval != MB_SUCCESS){ 00066 return rval; 00067 } 00068 00069 /* Gather-Scatter Tuple 00070 -tup comes out as (remoteProc,handle,x,y,z) */ 00071 myCD.initialize(myPcomm->comm()); 00072 00073 //1 represents dynamic tuple, 0 represents index of the processor to send to 00074 myCD.gs_transfer(1, myTup, 0); 00075 00076 /* Sort By X,Y,Z 00077 -Utilizes a custom quick sort incoroporating eplison*/ 00078 SortTuplesByReal(myTup,myEps); 00079 00080 //Initilize another tuple list for matches 00081 myMatches.initialize(2,0,2,0,mySkinEnts[0].size()); 00082 00083 //ID the matching tuples 00084 rval = PopulateMyMatches(); 00085 if(rval != MB_SUCCESS){ 00086 return rval; 00087 } 00088 00089 //We can free up the tuple myTup now 00090 myTup.reset(); 00091 00092 /*Gather-Scatter Again*/ 00093 //1 represents dynamic list, 0 represents proc index to send tuple to 00094 myCD.gs_transfer(1,myMatches,0); 00095 //We can free up the crystal router now 00096 myCD.reset(); 00097 00098 //Sort the matches tuple list 00099 SortMyMatches(); 00100 00101 //Tag the shared elements 00102 rval = TagSharedElements(dim); 00103 00104 //Free up the matches tuples 00105 myMatches.reset(); 00106 return rval; 00107 } 00108 00109 //Sets mySkinEnts with all of the skin entities on the processor 00110 ErrorCode ParallelMergeMesh::PopulateMySkinEnts(const EntityHandle meshset,int dim) 00111 { 00112 /*Merge Mesh Locally*/ 00113 //Get all dim dimensional entities 00114 Range ents; 00115 ErrorCode rval = myMB->get_entities_by_dimension(meshset,dim,ents); 00116 if(rval != MB_SUCCESS){ 00117 return rval; 00118 } 00119 00120 //Merge Mesh Locally 00121 MergeMesh merger(myMB, false); 00122 merger.merge_entities(ents,myEps); 00123 //We can return if there is only 1 proc 00124 if(rval != MB_SUCCESS || myPcomm->size() == 1){ 00125 return rval; 00126 } 00127 00128 //Rebuild the ents range 00129 ents.clear(); 00130 rval = myMB->get_entities_by_dimension(0,dim,ents); 00131 if(rval != MB_SUCCESS){ 00132 return rval; 00133 } 00134 00135 /*Get Skin 00136 -Get Range of all dimensional entities 00137 -skinEnts[i] is the skin entities of dimension i*/ 00138 Skinner skinner(myMB); 00139 for(int skin_dim = dim; skin_dim >= 0; skin_dim--){ 00140 rval = skinner.find_skin(meshset,ents,skin_dim,mySkinEnts[skin_dim]); 00141 if(rval != MB_SUCCESS){ 00142 return rval; 00143 } 00144 } 00145 return MB_SUCCESS; 00146 } 00147 00148 //Determine the global assembly box 00149 ErrorCode ParallelMergeMesh::GetGlobalBox(double *gbox) 00150 { 00151 ErrorCode rval; 00152 00153 /*Get Bounding Box*/ 00154 BoundBox box; 00155 if(mySkinEnts[0].size() != 0){ 00156 rval = box.update(*myMB, mySkinEnts[0]); 00157 if(rval != MB_SUCCESS) 00158 return rval; 00159 } 00160 00161 //Invert the max 00162 box.bMax *= -1; 00163 00164 /*Communicate to all processors*/ 00165 MPI_Allreduce(&box, gbox, 6, MPI_DOUBLE, MPI_MIN, MPI_COMM_WORLD); 00166 00167 /*Assemble Global Bounding Box*/ 00168 //Flip the max back 00169 for(int i=3; i<6; i++){ 00170 gbox[i] *= -1; 00171 } 00172 return MB_SUCCESS; 00173 } 00174 00175 //Assemble the tuples with their processor destination 00176 ErrorCode ParallelMergeMesh::PopulateMyTup(double * gbox) 00177 { 00178 /*Figure out how do partition the global box*/ 00179 double lengths[3]; 00180 int parts[3]; 00181 ErrorCode rval = PartitionGlobalBox(gbox, lengths, parts); 00182 if(rval != MB_SUCCESS){ 00183 return rval; 00184 } 00185 00186 /* Get Skin Coordinates, Vertices */ 00187 double *x = new double[mySkinEnts[0].size()]; 00188 double *y = new double[mySkinEnts[0].size()]; 00189 double *z = new double[mySkinEnts[0].size()]; 00190 rval = myMB->get_coords(mySkinEnts[0],x,y,z); 00191 if(rval != MB_SUCCESS){ 00192 //Prevent Memory Leak 00193 delete []x; delete []y; delete []z; 00194 return rval; 00195 } 00196 00197 //Initialize variable to be used in the loops 00198 std::vector<int> toProcs; 00199 int xPart, yPart, zPart, xEps, yEps, zEps, baseProc; 00200 unsigned long long tup_i=0, tup_ul=0, tup_r=0, count=0; 00201 //These are boolean to determine if the vertice is on close enought to a given border 00202 bool xDup, yDup, zDup; 00203 bool canWrite = myTup.get_writeEnabled(); 00204 if(!canWrite) myTup.enableWriteAccess(); 00205 //Go through each vertice 00206 for(Range::iterator it = mySkinEnts[0].begin(); it != mySkinEnts[0].end(); it++){ 00207 xDup = false; yDup = false; zDup = false; 00208 //Figure out which x,y,z partition the element is in. 00209 xPart = static_cast<int>(floor((x[count]-gbox[0])/lengths[0])); 00210 xPart = (xPart<parts[0]?xPart:parts[0]-1);//Make sure it stays within the bounds 00211 00212 yPart = static_cast<int>(floor((y[count]-gbox[1])/lengths[1])); 00213 yPart = (yPart<parts[1]?yPart:parts[1]-1);//Make sure it stays within the bounds 00214 00215 zPart = static_cast<int>(floor((z[count]-gbox[2])/lengths[2])); 00216 zPart = (zPart<parts[2]?zPart:parts[2]-1);//Make sure it stays within the bounds 00217 00218 //Figure out the partition with the addition of Epsilon 00219 xEps = static_cast<int>(floor((x[count]-gbox[0]+myEps)/lengths[0])); 00220 yEps = static_cast<int>(floor((y[count]-gbox[1]+myEps)/lengths[1])); 00221 zEps = static_cast<int>(floor((z[count]-gbox[2]+myEps)/lengths[2])); 00222 00223 //Figure out if the vertice needs to be sent to multiple procs 00224 xDup = (xPart != xEps && xEps < parts[0]); 00225 yDup = (yPart != yEps && yEps < parts[1]); 00226 zDup = (zPart != zEps && zEps < parts[2]); 00227 00228 //Add appropriate processors to the vector 00229 baseProc = xPart+ yPart * parts[0] + zPart * parts[0] * parts[1]; 00230 toProcs.push_back(baseProc); 00231 if(xDup){ 00232 toProcs.push_back(baseProc + 1);//Get partition to the right 00233 } 00234 if(yDup){ 00235 //Partition up 1 00236 toProcs.push_back(baseProc + parts[0]); 00237 } 00238 if(zDup){ 00239 //Partition above 1 00240 toProcs.push_back(baseProc + parts[0] * parts[1]); 00241 } 00242 if(xDup && yDup){ 00243 //Partition up 1 and right 1 00244 toProcs.push_back(baseProc + parts[0] + 1); 00245 } 00246 if(xDup && zDup){ 00247 //Partition right 1 and above 1 00248 toProcs.push_back(baseProc + parts[0] * parts[1] + 1); 00249 } 00250 if(yDup && zDup){ 00251 //Partition up 1 and above 1 00252 toProcs.push_back(baseProc + parts[0] * parts[1] + parts[0]); 00253 } 00254 if(xDup && yDup && zDup){ 00255 //Partition right 1, up 1, and above 1 00256 toProcs.push_back(baseProc + parts[0] * parts[1] + parts[0] + 1); 00257 } 00258 //Grow the tuple list if necessary 00259 while(myTup.get_n() + toProcs.size() >= myTup.get_max()){ 00260 myTup.resize(myTup.get_max() ? 00261 myTup.get_max() + myTup.get_max()/2 + 1 00262 : 2); 00263 } 00264 00265 //Add each proc as a tuple 00266 for(std::vector<int>::iterator proc = toProcs.begin(); 00267 proc != toProcs.end(); 00268 proc++){ 00269 myTup.vi_wr[tup_i++] = *proc; 00270 myTup.vul_wr[tup_ul++] = *it; 00271 myTup.vr_wr[tup_r++] = x[count]; 00272 myTup.vr_wr[tup_r++] = y[count]; 00273 myTup.vr_wr[tup_r++] = z[count]; 00274 myTup.inc_n(); 00275 } 00276 count++; 00277 toProcs.clear(); 00278 } 00279 if(!canWrite) myTup.disableWriteAccess(); 00280 return MB_SUCCESS; 00281 } 00282 00283 //Partition the global box by the number of procs 00284 ErrorCode ParallelMergeMesh::PartitionGlobalBox(double *gbox, double *lengths, int *parts) 00285 { 00286 //Determine the length of each side 00287 double xLen = gbox[3]-gbox[0]; 00288 double yLen = gbox[4]-gbox[1]; 00289 double zLen = gbox[5]-gbox[2]; 00290 unsigned numProcs = myPcomm->size(); 00291 00292 //Partition sides from the longest to shortest lengths 00293 //If x is the longest side 00294 if(xLen >= yLen && xLen >= zLen){ 00295 parts[0] = PartitionSide(xLen, yLen * zLen, numProcs, true); 00296 numProcs /= parts[0]; 00297 //If y is second longest 00298 if(yLen >= zLen){ 00299 parts[1] = PartitionSide(yLen, zLen, numProcs, false); 00300 parts[2] = numProcs/parts[1]; 00301 } 00302 //If z is the longer 00303 else{ 00304 parts[2] = PartitionSide(zLen, yLen, numProcs, false); 00305 parts[1] = numProcs/parts[2]; 00306 } 00307 } 00308 //If y is the longest side 00309 else if (yLen >= zLen){ 00310 parts[1] = PartitionSide(yLen, xLen * zLen, numProcs, true); 00311 numProcs /= parts[1]; 00312 //If x is the second longest 00313 if(xLen >= zLen){ 00314 parts[0] = PartitionSide(xLen, zLen, numProcs, false); 00315 parts[2] = numProcs/parts[0]; 00316 } 00317 //If z is the second longest 00318 else{ 00319 parts[2] = PartitionSide(zLen, xLen, numProcs, false); 00320 parts[0] = numProcs/parts[2]; 00321 } 00322 } 00323 //If z is the longest side 00324 else{ 00325 parts[2] = PartitionSide(zLen, xLen * yLen, numProcs, true); 00326 numProcs /= parts[2]; 00327 //If x is the second longest 00328 if(xLen >= yLen){ 00329 parts[0] = PartitionSide(xLen, yLen, numProcs, false); 00330 parts[1] = numProcs/parts[0]; 00331 } 00332 //If y is the second longest 00333 else{ 00334 parts[1] = PartitionSide(yLen, xLen, numProcs, false); 00335 parts[0] = numProcs/parts[1]; 00336 } 00337 } 00338 00339 //Divide up each side to give the lengths 00340 lengths[0] = xLen/(double)parts[0]; 00341 lengths[1] = yLen/(double)parts[1]; 00342 lengths[2] = zLen/(double)parts[2]; 00343 return MB_SUCCESS; 00344 } 00345 00346 //Partition a side based on the length ratios 00347 int ParallelMergeMesh::PartitionSide(double sideLen, double restLen, unsigned numProcs, bool altRatio) 00348 { 00349 //If theres only 1 processor, then just return 1 00350 if(numProcs == 1){ 00351 return 1; 00352 } 00353 //Initialize with the ratio of 1 proc 00354 double ratio = -DBL_MAX; 00355 unsigned factor = 1; 00356 //We need to be able to save the last ratio and factor (for comparison) 00357 double oldRatio = ratio; 00358 double oldFactor = 1; 00359 00360 //This is the ratio were shooting for 00361 double goalRatio = sideLen/restLen; 00362 00363 //Calculate the divisor and numerator power 00364 //This avoid if statements in the loop and is useful since both calculations are similar 00365 double divisor, p; 00366 if(altRatio){ 00367 divisor = (double)numProcs * sideLen; 00368 p = 3; 00369 } 00370 else{ 00371 divisor = (double)numProcs; 00372 p = 2; 00373 } 00374 00375 //Find each possible factor 00376 for (unsigned i = 2; i <= numProcs/2; i++){ 00377 //If it is a factor... 00378 if (numProcs % i == 0){ 00379 //We need to save the past factor 00380 oldRatio = ratio; 00381 oldFactor = factor; 00382 //There are 2 different ways to calculate the ratio: 00383 //Comparing 1 side to 2 sides: (i*i*i)/(numProcs*x) 00384 //Justification: We have a ratio x:y:z (side Lengths) == a:b:c (procs). So a=kx, b=ky, c=kz. 00385 //Also, abc=n (numProcs) => bc = n/a. Also, a=kx => k=a/x => 1/k=x/a 00386 //And so x/(yz) == (kx)/(kyz) == (kx)/(kykz(1/k)) == a/(bc(x/a)) == a/((n/a)(x/a)) == a^3/(nx). 00387 //Comparing 1 side to 1 side: (i*i)/numprocs 00388 //Justification: i/(n/i) == i^2/n 00389 ratio = pow((double)i, p)/divisor; 00390 factor = i; 00391 //Once we have passed the goal ratio, we can break since we'll only move away from the goal ratio 00392 if(ratio >= goalRatio){ 00393 break; 00394 } 00395 } 00396 } 00397 //If we havent reached the goal ratio yet, check out factor = numProcs 00398 if(ratio < goalRatio){ 00399 oldRatio = ratio; 00400 oldFactor = factor; 00401 factor = numProcs; 00402 ratio = pow((double)numProcs, p)/divisor; 00403 } 00404 00405 //Figure out if our oldRatio is better than ratio 00406 if(fabs(ratio - goalRatio) > fabs(oldRatio-goalRatio)){ 00407 factor = oldFactor; 00408 } 00409 //Return our findings 00410 return factor; 00411 } 00412 00413 //Id the tuples that are matching 00414 ErrorCode ParallelMergeMesh::PopulateMyMatches() 00415 { 00416 //Counters for accessing tuples more efficiently 00417 unsigned long i = 0, mat_i=0, mat_ul=0, j=0, tup_r=0; 00418 double eps2 = myEps*myEps; 00419 00420 uint tup_mi, tup_ml, tup_mul, tup_mr; 00421 myTup.getTupleSize(tup_mi, tup_ml, tup_mul, tup_mr); 00422 00423 bool canWrite = myMatches.get_writeEnabled(); 00424 if(!canWrite) myMatches.enableWriteAccess(); 00425 00426 while((i+1)<myTup.get_n()){ 00427 //Proximity Comparison 00428 double xi = myTup.vr_rd[tup_r], 00429 yi = myTup.vr_rd[tup_r+1], 00430 zi = myTup.vr_rd[tup_r+2]; 00431 00432 bool done = false; 00433 while(!done){ 00434 j++; tup_r += tup_mr; 00435 if(j >= myTup.get_n()){ 00436 break; 00437 } 00438 CartVect cv(myTup.vr_rd[tup_r]-xi, 00439 myTup.vr_rd[tup_r+1]-yi, 00440 myTup.vr_rd[tup_r+2]-zi); 00441 if(cv.length_squared() > eps2){ 00442 done = true; 00443 } 00444 } 00445 //Allocate the tuple list before adding matches 00446 while(myMatches.get_n()+(j-i)*(j-i-1) >= myMatches.get_max()){ 00447 myMatches.resize(myMatches.get_max() ? 00448 myMatches.get_max() + myMatches.get_max()/2 + 1 : 00449 2); 00450 } 00451 00452 //We now know that tuples [i to j) exclusive match. 00453 //If n tuples match, n*(n-1) match tuples will be made 00454 //tuples are of the form (proc1,proc2,handle1,handle2) 00455 if(i+1 < j){ 00456 int kproc = i*tup_mi; 00457 unsigned long khand = i*tup_mul; 00458 for(unsigned long k = i; k<j; k++){ 00459 int lproc = kproc+tup_mi; 00460 unsigned long lhand = khand+tup_mul; 00461 for(unsigned long l=k+1; l<j; l++){ 00462 myMatches.vi_wr[mat_i++]=myTup.vi_rd[kproc];//proc1 00463 myMatches.vi_wr[mat_i++]=myTup.vi_rd[lproc];//proc2 00464 myMatches.vul_wr[mat_ul++]=myTup.vul_rd[khand];//handle1 00465 myMatches.vul_wr[mat_ul++]=myTup.vul_rd[lhand];//handle2 00466 myMatches.inc_n(); 00467 00468 myMatches.vi_wr[mat_i++]=myTup.vi_rd[lproc];//proc1 00469 myMatches.vi_wr[mat_i++]=myTup.vi_rd[kproc];//proc2 00470 myMatches.vul_wr[mat_ul++]=myTup.vul_rd[lhand];//handle1 00471 myMatches.vul_wr[mat_ul++]=myTup.vul_rd[khand];//handle2 00472 myMatches.inc_n(); 00473 lproc += tup_mi; 00474 lhand += tup_mul; 00475 } 00476 kproc += tup_mi; 00477 khand += tup_mul; 00478 }//End for(int k... 00479 } 00480 i = j; 00481 }//End while(i+1<tup.n) 00482 00483 if(!canWrite) myMatches.disableWriteAccess(); 00484 return MB_SUCCESS; 00485 } 00486 00487 //Sort the matching tuples so that vertices can be tagged accurately 00488 ErrorCode ParallelMergeMesh::SortMyMatches() 00489 { 00490 TupleList::buffer buf(mySkinEnts[0].size()); 00491 //Sorts are necessary to check for doubles 00492 //Sort by remote handle 00493 myMatches.sort(3,&buf); 00494 //Sort by matching proc 00495 myMatches.sort(1,&buf); 00496 //Sort by local handle 00497 myMatches.sort(2,&buf); 00498 buf.reset(); 00499 return MB_SUCCESS; 00500 } 00501 00502 //Tag the shared elements using existing PComm functionality 00503 ErrorCode ParallelMergeMesh::TagSharedElements(int dim) 00504 { 00505 //Manipulate the matches list to tag vertices and entities 00506 //Set up proc ents 00507 Range proc_ents; 00508 ErrorCode rval; 00509 00510 // get the entities in the partition sets 00511 for (Range::iterator rit = myPcomm->partitionSets.begin(); rit != myPcomm->partitionSets.end(); rit++) { 00512 Range tmp_ents; 00513 rval = myMB->get_entities_by_handle(*rit, tmp_ents, true); 00514 if (MB_SUCCESS != rval){ 00515 return rval; 00516 } 00517 proc_ents.merge(tmp_ents); 00518 } 00519 if (myMB->dimension_from_handle(*proc_ents.rbegin()) != 00520 myMB->dimension_from_handle(*proc_ents.begin())) { 00521 Range::iterator lower = proc_ents.lower_bound(CN::TypeDimensionMap[0].first), 00522 upper = proc_ents.upper_bound(CN::TypeDimensionMap[dim-1].second); 00523 proc_ents.erase(lower, upper); 00524 } 00525 00526 00527 //This vector doesnt appear to be used but its in resolve_shared_ents 00528 int maxp = -1; 00529 std::vector<int> sharing_procs(MAX_SHARING_PROCS); 00530 std::fill(sharing_procs.begin(), sharing_procs.end(), maxp); 00531 00532 // get ents shared by 1 or n procs 00533 std::map<std::vector<int>, std::vector<EntityHandle> > proc_nranges; 00534 Range proc_verts; 00535 rval = myMB->get_adjacencies(proc_ents, 0, false, proc_verts, 00536 Interface::UNION); 00537 if(rval != MB_SUCCESS){ 00538 return rval; 00539 } 00540 00541 rval = myPcomm->tag_shared_verts(myMatches, proc_nranges, proc_verts); 00542 if(rval != MB_SUCCESS){ 00543 return rval; 00544 } 00545 00546 // get entities shared by 1 or n procs 00547 rval = myPcomm->get_proc_nvecs(dim,dim-1, &mySkinEnts[0],proc_nranges); 00548 if(rval != MB_SUCCESS){ 00549 return rval; 00550 } 00551 00552 // create the sets for each interface; store them as tags on 00553 // the interface instance 00554 Range iface_sets; 00555 rval = myPcomm->create_interface_sets(proc_nranges); 00556 if(rval != MB_SUCCESS){ 00557 return rval; 00558 } 00559 // establish comm procs and buffers for them 00560 std::set<unsigned int> procs; 00561 rval = myPcomm->get_interface_procs(procs, true); 00562 if(rval != MB_SUCCESS){ 00563 return rval; 00564 } 00565 00566 // resolve shared entity remote handles; implemented in ghost cell exchange 00567 // code because it's so similar 00568 rval = myPcomm->exchange_ghost_cells(-1, -1, 0, true, true); 00569 if(rval != MB_SUCCESS){ 00570 return rval; 00571 } 00572 // now build parent/child links for interface sets 00573 rval = myPcomm->create_iface_pc_links(); 00574 return rval; 00575 } 00576 00577 //Make sure to free up any allocated data 00578 //Need to avoid a double free 00579 void ParallelMergeMesh::CleanUp() 00580 { 00581 //The reset operation is now safe and avoids a double free() 00582 myMatches.reset(); 00583 myTup.reset(); 00584 myCD.reset(); 00585 } 00586 00587 //Simple quick sort to real 00588 void ParallelMergeMesh::SortTuplesByReal(TupleList &tup, 00589 double eps) 00590 { 00591 bool canWrite = tup.get_writeEnabled(); 00592 if(!canWrite) tup.enableWriteAccess(); 00593 00594 uint mi, ml, mul, mr; 00595 tup.getTupleSize(mi,ml,mul,mr); 00596 PerformRealSort(tup, 0, tup.get_n(), eps, mr); 00597 00598 if(!canWrite) tup.disableWriteAccess(); 00599 } 00600 00601 //Swap around tuples 00602 void ParallelMergeMesh::SwapTuples(TupleList &tup, 00603 unsigned long a, unsigned long b) 00604 { 00605 if(a==b) return; 00606 00607 uint mi, ml, mul, mr; 00608 tup.getTupleSize(mi, ml, mul, mr); 00609 00610 //Swap mi 00611 unsigned long a_val = a*mi, b_val=b*mi; 00612 for(unsigned long i=0; i< mi;i++){ 00613 sint t =tup.vi_rd[a_val]; 00614 tup.vi_wr[a_val] = tup.vi_rd[b_val]; 00615 tup.vi_wr[b_val] = t; 00616 a_val++; 00617 b_val++; 00618 } 00619 //Swap ml 00620 a_val = a*ml; 00621 b_val = b*ml; 00622 for(unsigned long i=0; i< ml;i++){ 00623 slong t =tup.vl_rd[a_val]; 00624 tup.vl_wr[a_val] = tup.vl_rd[b_val]; 00625 tup.vl_wr[b_val] = t; 00626 a_val++; 00627 b_val++; 00628 } 00629 //Swap mul 00630 a_val = a*mul; 00631 b_val = b*mul; 00632 for(unsigned long i=0; i< mul;i++){ 00633 ulong t =tup.vul_rd[a_val]; 00634 tup.vul_wr[a_val] = tup.vul_rd[b_val]; 00635 tup.vul_wr[b_val] = t; 00636 a_val++; 00637 b_val++; 00638 } 00639 //Swap mr 00640 a_val = a*mr; 00641 b_val = b*mr; 00642 for(unsigned long i=0; i< mr;i++){ 00643 realType t =tup.vr_rd[a_val]; 00644 tup.vr_wr[a_val] = tup.vr_rd[b_val]; 00645 tup.vr_wr[b_val] = t; 00646 a_val++; 00647 b_val++; 00648 } 00649 } 00650 00651 //Perform the sorting of a tuple by real 00652 //To sort an entire tuple_list, call (tup,0,tup.n,epsilon) 00653 void ParallelMergeMesh::PerformRealSort(TupleList &tup, 00654 unsigned long left, 00655 unsigned long right, 00656 double eps, 00657 uint tup_mr) 00658 { 00659 //If list size is only 1 or 0 return 00660 if(left+1 >= right){ 00661 return; 00662 } 00663 unsigned long swap = left, tup_l = left*tup_mr, 00664 tup_t = tup_l + tup_mr; 00665 00666 //Swap the median with the left position for a (hopefully) better split 00667 SwapTuples(tup, left, (left+right)/2); 00668 00669 //Partition the data 00670 for(unsigned long t=left+1;t<right;t++){ 00671 //If the left value(pivot) is greater than t_val, swap it into swap 00672 if(TupleGreaterThan(tup,tup_l,tup_t,eps, tup_mr)){ 00673 swap++; 00674 SwapTuples(tup, swap,t); 00675 } 00676 tup_t+=tup_mr; 00677 } 00678 00679 //Swap so that position swap is in the correct position 00680 SwapTuples(tup,left,swap); 00681 00682 //Sort left and right of swap 00683 PerformRealSort(tup, left ,swap, eps, tup_mr); 00684 PerformRealSort(tup, swap+1,right,eps, tup_mr); 00685 } 00686 00687 //Note, this takes the actual tup.vr[] index (aka i*tup.mr) 00688 bool ParallelMergeMesh::TupleGreaterThan(TupleList &tup, 00689 unsigned long vrI, 00690 unsigned long vrJ, 00691 double eps, 00692 uint tup_mr){ 00693 unsigned check=0; 00694 while(check < tup_mr){ 00695 //If the values are the same 00696 if(fabs(tup.vr_rd[vrI+check]-tup.vr_rd[vrJ+check]) <= eps){ 00697 check++; 00698 continue; 00699 } 00700 //If I greater than J 00701 else if(tup.vr_rd[vrI+check] > tup.vr_rd[vrJ+check]){ 00702 return true; 00703 } 00704 //If J greater than I 00705 else{ 00706 return false; 00707 } 00708 } 00709 //All Values are the same 00710 return false; 00711 } 00712 00713 }//End namespace moab