moab-docs/html/ParallelMergeMesh_8cpp_source.html

00001 #include "moab/ParallelMergeMesh.hpp"
00002 #include "moab/Core.hpp"
00003 #include "moab/CartVect.hpp"
00004 #include "moab/BoundBox.hpp"
00005 #include "moab/Skinner.hpp"
00006 #include "moab/MergeMesh.hpp"
00007 #include "moab/CN.hpp"
00008 #include "float.h"
00009 #include <algorithm>
00010
00011 #ifdef USE_MPI
00012 #include "moab_mpi.h"
00013 #endif
00014
00015 namespace moab{
00016
00017   //Constructor
00018   /*Get Merge Data and tolerance*/
00019   ParallelMergeMesh::ParallelMergeMesh(ParallelComm *pc,
00020                        const double epsilon) :
00021     myPcomm(pc), myEps(epsilon)
00022   {
00023     myMB = pc->get_moab();
00024     mySkinEnts.resize(4);
00025   }
00026
00027
00028   //Have a wrapper function on the actual merge to avoid memory leaks
00029   //Merges elements within a proximity of epsilon
00030   ErrorCode ParallelMergeMesh::merge()
00031   {
00032     ErrorCode rval = PerformMerge();
00033     CleanUp();
00034     return rval;
00035   }
00036
00037   //Perform the merge
00038   ErrorCode ParallelMergeMesh::PerformMerge()
00039   {
00040     //Get the mesh dimension
00041     int dim;
00042     ErrorCode rval = myMB->get_dimension(dim);
00043     if(rval != MB_SUCCESS){
00044       return rval;
00045     }
00046
00047     //Get the local skin elements
00048     rval = PopulateMySkinEnts(0,dim);
00049     //If there is only 1 proc, we can return now
00050     if(rval != MB_SUCCESS || myPcomm->size() == 1){
00051       return rval;
00052     }
00053
00054     //Determine the global bounding box
00055     double gbox[6];
00056     rval = GetGlobalBox(gbox);
00057     if(rval != MB_SUCCESS){
00058       return rval;
00059     }
00060
00061     /* Assemble The Destination Tuples */
00062     //Get a list of tuples which contain (toProc, handle, x,y,z)
00063     myTup.initialize(1,0,1,3,mySkinEnts[0].size());
00064     rval = PopulateMyTup(gbox);
00065     if(rval != MB_SUCCESS){
00066       return rval;
00067     }
00068
00069     /* Gather-Scatter Tuple
00070        -tup comes out as (remoteProc,handle,x,y,z) */
00071     myCD.initialize(myPcomm->comm());
00072
00073     //1 represents dynamic tuple, 0 represents index of the processor to send to
00074     myCD.gs_transfer(1, myTup, 0);
00075
00076     /* Sort By X,Y,Z
00077        -Utilizes a custom quick sort incoroporating eplison*/
00078     SortTuplesByReal(myTup,myEps);
00079
00080     //Initilize another tuple list for matches
00081     myMatches.initialize(2,0,2,0,mySkinEnts[0].size());
00082
00083     //ID the matching tuples
00084     rval = PopulateMyMatches();
00085     if(rval != MB_SUCCESS){
00086       return rval;
00087     }
00088
00089     //We can free up the tuple myTup now
00090     myTup.reset();
00091
00092     /*Gather-Scatter Again*/
00093     //1 represents dynamic list, 0 represents proc index to send tuple to
00094     myCD.gs_transfer(1,myMatches,0);
00095     //We can free up the crystal router now
00096     myCD.reset();
00097
00098     //Sort the matches tuple list
00099     SortMyMatches();
00100
00101     //Tag the shared elements
00102     rval = TagSharedElements(dim);
00103
00104     //Free up the matches tuples
00105     myMatches.reset();
00106     return rval;
00107   }
00108
00109   //Sets mySkinEnts with all of the skin entities on the processor
00110   ErrorCode ParallelMergeMesh::PopulateMySkinEnts(const EntityHandle meshset,int dim)
00111   {
00112     /*Merge Mesh Locally*/
00113     //Get all dim dimensional entities
00114     Range ents;
00115     ErrorCode rval = myMB->get_entities_by_dimension(meshset,dim,ents);
00116     if(rval != MB_SUCCESS){
00117       return rval;
00118     }
00119
00120     //Merge Mesh Locally
00121     MergeMesh merger(myMB, false);
00122     merger.merge_entities(ents,myEps);
00123     //We can return if there is only 1 proc
00124     if(rval != MB_SUCCESS || myPcomm->size() == 1){
00125       return rval;
00126     }
00127
00128     //Rebuild the ents range
00129     ents.clear();
00130     rval = myMB->get_entities_by_dimension(0,dim,ents);
00131     if(rval != MB_SUCCESS){
00132       return rval;
00133     }
00134
00135     /*Get Skin
00136       -Get Range of all dimensional entities
00137       -skinEnts[i] is the skin entities of dimension i*/
00138     Skinner skinner(myMB);
00139     for(int skin_dim = dim; skin_dim >= 0; skin_dim--){
00140       rval = skinner.find_skin(meshset,ents,skin_dim,mySkinEnts[skin_dim]);
00141       if(rval != MB_SUCCESS){
00142     return rval;
00143       }
00144     }
00145     return MB_SUCCESS;
00146   }
00147
00148   //Determine the global assembly box
00149   ErrorCode ParallelMergeMesh::GetGlobalBox(double *gbox)
00150   {
00151     ErrorCode rval;
00152
00153     /*Get Bounding Box*/
00154     BoundBox box;
00155     if(mySkinEnts[0].size() != 0){
00156       rval = box.update(*myMB, mySkinEnts[0]);
00157       if(rval != MB_SUCCESS)
00158         return rval;
00159     }
00160
00161     //Invert the max
00162     box.bMax *= -1;
00163
00164     /*Communicate to all processors*/
00165     MPI_Allreduce(&box, gbox, 6, MPI_DOUBLE, MPI_MIN, MPI_COMM_WORLD);
00166
00167     /*Assemble Global Bounding Box*/
00168     //Flip the max back
00169     for(int i=3; i<6; i++){
00170       gbox[i] *= -1;
00171     }
00172     return MB_SUCCESS;
00173   }
00174
00175   //Assemble the tuples with their processor destination
00176   ErrorCode ParallelMergeMesh::PopulateMyTup(double * gbox)
00177   {
00178     /*Figure out how do partition the global box*/
00179     double lengths[3];
00180     int parts[3];
00181     ErrorCode rval = PartitionGlobalBox(gbox, lengths, parts);
00182     if(rval != MB_SUCCESS){
00183       return rval;
00184     }
00185
00186     /* Get Skin Coordinates, Vertices */
00187     double *x = new double[mySkinEnts[0].size()];
00188     double *y = new double[mySkinEnts[0].size()];
00189     double *z = new double[mySkinEnts[0].size()];
00190     rval = myMB->get_coords(mySkinEnts[0],x,y,z);
00191     if(rval != MB_SUCCESS){
00192       //Prevent Memory Leak
00193       delete []x; delete []y; delete []z;
00194       return rval;
00195     }
00196
00197     //Initialize variable to be used in the loops
00198     std::vector<int> toProcs;
00199     int xPart, yPart, zPart, xEps, yEps, zEps, baseProc;
00200     unsigned long long tup_i=0, tup_ul=0, tup_r=0, count=0;
00201     //These are boolean to determine if the vertice is on close enought to a given border
00202     bool xDup, yDup, zDup;
00203     bool canWrite = myTup.get_writeEnabled();
00204     if(!canWrite) myTup.enableWriteAccess();
00205     //Go through each vertice
00206     for(Range::iterator it = mySkinEnts[0].begin(); it != mySkinEnts[0].end(); it++){
00207       xDup = false; yDup = false; zDup = false;
00208       //Figure out which x,y,z partition the element is in.
00209       xPart = static_cast<int>(floor((x[count]-gbox[0])/lengths[0]));
00210       xPart = (xPart<parts[0]?xPart:parts[0]-1);//Make sure it stays within the bounds
00211
00212       yPart = static_cast<int>(floor((y[count]-gbox[1])/lengths[1]));
00213       yPart = (yPart<parts[1]?yPart:parts[1]-1);//Make sure it stays within the bounds
00214
00215       zPart = static_cast<int>(floor((z[count]-gbox[2])/lengths[2]));
00216       zPart = (zPart<parts[2]?zPart:parts[2]-1);//Make sure it stays within the bounds
00217
00218       //Figure out the partition with the addition of Epsilon
00219       xEps = static_cast<int>(floor((x[count]-gbox[0]+myEps)/lengths[0]));
00220       yEps = static_cast<int>(floor((y[count]-gbox[1]+myEps)/lengths[1]));
00221       zEps = static_cast<int>(floor((z[count]-gbox[2]+myEps)/lengths[2]));
00222
00223       //Figure out if the vertice needs to be sent to multiple procs
00224       xDup = (xPart != xEps && xEps < parts[0]);
00225       yDup = (yPart != yEps && yEps < parts[1]);
00226       zDup = (zPart != zEps && zEps < parts[2]);
00227
00228       //Add appropriate processors to the vector
00229       baseProc = xPart+ yPart * parts[0] + zPart * parts[0] * parts[1];
00230       toProcs.push_back(baseProc);
00231       if(xDup){
00232     toProcs.push_back(baseProc + 1);//Get partition to the right
00233       }
00234       if(yDup){
00235     //Partition up 1
00236     toProcs.push_back(baseProc + parts[0]);
00237       }
00238       if(zDup){
00239     //Partition above 1
00240     toProcs.push_back(baseProc + parts[0] * parts[1]);
00241       }
00242       if(xDup && yDup){
00243     //Partition up 1 and right 1
00244     toProcs.push_back(baseProc + parts[0] + 1);
00245       }
00246       if(xDup && zDup){
00247     //Partition right 1 and above 1
00248     toProcs.push_back(baseProc + parts[0] * parts[1] + 1);
00249       }
00250       if(yDup && zDup){
00251     //Partition up 1 and above 1
00252     toProcs.push_back(baseProc + parts[0] * parts[1] + parts[0]);
00253       }
00254       if(xDup && yDup && zDup){
00255     //Partition right 1, up 1, and above 1
00256     toProcs.push_back(baseProc + parts[0] * parts[1] + parts[0] + 1);
00257       }
00258       //Grow the tuple list if necessary
00259       while(myTup.get_n() + toProcs.size() >= myTup.get_max()){
00260     myTup.resize(myTup.get_max() ?
00261              myTup.get_max() + myTup.get_max()/2 + 1
00262              : 2);
00263       }
00264
00265       //Add each proc as a tuple
00266       for(std::vector<int>::iterator proc = toProcs.begin();
00267       proc != toProcs.end();
00268       proc++){
00269     myTup.vi_wr[tup_i++] = *proc;
00270     myTup.vul_wr[tup_ul++] = *it;
00271     myTup.vr_wr[tup_r++] = x[count];
00272     myTup.vr_wr[tup_r++] = y[count];
00273     myTup.vr_wr[tup_r++] = z[count];
00274     myTup.inc_n();
00275       }
00276       count++;
00277       toProcs.clear();
00278     }
00279     if(!canWrite) myTup.disableWriteAccess();
00280     return MB_SUCCESS;
00281   }
00282
00283   //Partition the global box by the number of procs
00284   ErrorCode ParallelMergeMesh::PartitionGlobalBox(double *gbox, double *lengths, int *parts)
00285   {
00286     //Determine the length of each side
00287     double xLen = gbox[3]-gbox[0];
00288     double yLen = gbox[4]-gbox[1];
00289     double zLen = gbox[5]-gbox[2];
00290     unsigned numProcs = myPcomm->size();
00291
00292     //Partition sides from the longest to shortest lengths
00293     //If x is the longest side
00294     if(xLen >= yLen && xLen >= zLen){
00295       parts[0] = PartitionSide(xLen, yLen * zLen, numProcs, true);
00296       numProcs /= parts[0];
00297       //If y is second longest
00298       if(yLen >= zLen){
00299     parts[1] = PartitionSide(yLen, zLen, numProcs, false);
00300     parts[2] = numProcs/parts[1];
00301       }
00302       //If z is the longer
00303       else{
00304     parts[2] = PartitionSide(zLen, yLen, numProcs, false);
00305     parts[1] = numProcs/parts[2];
00306       }
00307     }
00308     //If y is the longest side
00309     else if (yLen >= zLen){
00310       parts[1] = PartitionSide(yLen, xLen * zLen, numProcs, true);
00311       numProcs /= parts[1];
00312       //If x is the second longest
00313       if(xLen >= zLen){
00314     parts[0] = PartitionSide(xLen, zLen, numProcs, false);
00315     parts[2] = numProcs/parts[0];
00316       }
00317       //If z is the second longest
00318       else{
00319     parts[2] = PartitionSide(zLen, xLen, numProcs, false);
00320     parts[0] = numProcs/parts[2];
00321       }
00322     }
00323     //If z is the longest side
00324     else{
00325       parts[2] = PartitionSide(zLen, xLen * yLen, numProcs, true);
00326       numProcs /= parts[2];
00327       //If x is the second longest
00328       if(xLen >= yLen){
00329     parts[0] = PartitionSide(xLen, yLen, numProcs, false);
00330     parts[1] = numProcs/parts[0];
00331       }
00332       //If y is the second longest
00333       else{
00334     parts[1] = PartitionSide(yLen, xLen, numProcs, false);
00335     parts[0] = numProcs/parts[1];
00336       }
00337     }
00338
00339     //Divide up each side to give the lengths
00340     lengths[0] = xLen/(double)parts[0];
00341     lengths[1] = yLen/(double)parts[1];
00342     lengths[2] = zLen/(double)parts[2];
00343     return MB_SUCCESS;
00344   }
00345
00346   //Partition a side based on the length ratios
00347   int ParallelMergeMesh::PartitionSide(double sideLen, double restLen, unsigned numProcs, bool altRatio)
00348   {
00349     //If theres only 1 processor, then just return 1
00350     if(numProcs == 1){
00351       return 1;
00352     }
00353     //Initialize with the ratio of 1 proc
00354     double ratio = -DBL_MAX;
00355     unsigned factor = 1;
00356     //We need to be able to save the last ratio and factor (for comparison)
00357     double oldRatio = ratio;
00358     double oldFactor = 1;
00359
00360     //This is the ratio were shooting for
00361     double goalRatio = sideLen/restLen;
00362
00363     //Calculate the divisor and numerator power
00364     //This avoid if statements in the loop and is useful since both calculations are similar
00365     double divisor, p;
00366     if(altRatio){
00367       divisor = (double)numProcs * sideLen;
00368       p = 3;
00369     }
00370     else{
00371       divisor = (double)numProcs;
00372       p = 2;
00373     }
00374
00375     //Find each possible factor
00376     for (unsigned i = 2; i <= numProcs/2; i++){
00377       //If it is a factor...
00378       if (numProcs % i == 0){
00379     //We need to save the past factor
00380     oldRatio = ratio;
00381     oldFactor = factor;
00382     //There are 2 different ways to calculate the ratio:
00383     //Comparing 1 side to 2 sides: (i*i*i)/(numProcs*x)
00384     //Justification:  We have a ratio x:y:z (side Lengths) == a:b:c (procs).  So a=kx, b=ky, c=kz.
00385     //Also, abc=n (numProcs) => bc = n/a.  Also, a=kx => k=a/x => 1/k=x/a
00386     //And so x/(yz) == (kx)/(kyz) == (kx)/(kykz(1/k)) == a/(bc(x/a)) == a/((n/a)(x/a)) == a^3/(nx).
00387     //Comparing 1 side to 1 side: (i*i)/numprocs
00388     //Justification: i/(n/i) == i^2/n
00389     ratio = pow((double)i, p)/divisor;
00390     factor = i;
00391     //Once we have passed the goal ratio, we can break since we'll only move away from the goal ratio
00392     if(ratio >= goalRatio){
00393       break;
00394     }
00395       }
00396     }
00397     //If we havent reached the goal ratio yet, check out factor = numProcs
00398     if(ratio < goalRatio){
00399       oldRatio = ratio;
00400       oldFactor = factor;
00401       factor = numProcs;
00402       ratio = pow((double)numProcs, p)/divisor;
00403     }
00404
00405     //Figure out if our oldRatio is better than ratio
00406     if(fabs(ratio - goalRatio) > fabs(oldRatio-goalRatio)){
00407       factor = oldFactor;
00408     }
00409     //Return our findings
00410     return factor;
00411   }
00412
00413   //Id the tuples that are matching
00414   ErrorCode ParallelMergeMesh::PopulateMyMatches()
00415   {
00416     //Counters for accessing tuples more efficiently
00417     unsigned long i = 0, mat_i=0, mat_ul=0, j=0, tup_r=0;
00418     double eps2 = myEps*myEps;
00419
00420     uint tup_mi, tup_ml, tup_mul, tup_mr;
00421     myTup.getTupleSize(tup_mi, tup_ml, tup_mul, tup_mr);
00422
00423     bool canWrite = myMatches.get_writeEnabled();
00424     if(!canWrite) myMatches.enableWriteAccess();
00425
00426     while((i+1)<myTup.get_n()){
00427       //Proximity Comparison
00428       double xi = myTup.vr_rd[tup_r],
00429     yi = myTup.vr_rd[tup_r+1],
00430     zi = myTup.vr_rd[tup_r+2];
00431
00432       bool done = false;
00433       while(!done){
00434     j++; tup_r += tup_mr;
00435     if(j >= myTup.get_n()){
00436       break;
00437     }
00438     CartVect cv(myTup.vr_rd[tup_r]-xi,
00439             myTup.vr_rd[tup_r+1]-yi,
00440             myTup.vr_rd[tup_r+2]-zi);
00441     if(cv.length_squared() > eps2){
00442       done = true;
00443     }
00444       }
00445       //Allocate the tuple list before adding matches
00446       while(myMatches.get_n()+(j-i)*(j-i-1) >= myMatches.get_max()){
00447     myMatches.resize(myMatches.get_max() ?
00448              myMatches.get_max() + myMatches.get_max()/2 + 1 :
00449              2);
00450       }
00451
00452       //We now know that tuples [i to j) exclusive match.
00453       //If n tuples match, n*(n-1) match tuples will be made
00454       //tuples are of the form (proc1,proc2,handle1,handle2)
00455       if(i+1 < j){
00456     int kproc = i*tup_mi;
00457     unsigned long khand = i*tup_mul;
00458     for(unsigned long k = i; k<j; k++){
00459       int lproc = kproc+tup_mi;
00460       unsigned long lhand = khand+tup_mul;
00461       for(unsigned long l=k+1; l<j; l++){
00462         myMatches.vi_wr[mat_i++]=myTup.vi_rd[kproc];//proc1
00463         myMatches.vi_wr[mat_i++]=myTup.vi_rd[lproc];//proc2
00464         myMatches.vul_wr[mat_ul++]=myTup.vul_rd[khand];//handle1
00465         myMatches.vul_wr[mat_ul++]=myTup.vul_rd[lhand];//handle2
00466         myMatches.inc_n();
00467
00468         myMatches.vi_wr[mat_i++]=myTup.vi_rd[lproc];//proc1
00469         myMatches.vi_wr[mat_i++]=myTup.vi_rd[kproc];//proc2
00470         myMatches.vul_wr[mat_ul++]=myTup.vul_rd[lhand];//handle1
00471         myMatches.vul_wr[mat_ul++]=myTup.vul_rd[khand];//handle2
00472         myMatches.inc_n();
00473         lproc += tup_mi;
00474         lhand += tup_mul;
00475       }
00476       kproc += tup_mi;
00477       khand += tup_mul;
00478     }//End for(int k...
00479       }
00480       i = j;
00481     }//End while(i+1<tup.n)
00482
00483     if(!canWrite) myMatches.disableWriteAccess();
00484     return MB_SUCCESS;
00485   }
00486
00487   //Sort the matching tuples so that vertices can be tagged accurately
00488   ErrorCode ParallelMergeMesh::SortMyMatches()
00489   {
00490     TupleList::buffer buf(mySkinEnts[0].size());
00491     //Sorts are necessary to check for doubles
00492     //Sort by remote handle
00493     myMatches.sort(3,&buf);
00494     //Sort by matching proc
00495     myMatches.sort(1,&buf);
00496     //Sort by local handle
00497     myMatches.sort(2,&buf);
00498     buf.reset();
00499     return MB_SUCCESS;
00500   }
00501
00502   //Tag the shared elements using existing PComm functionality
00503   ErrorCode ParallelMergeMesh::TagSharedElements(int dim)
00504   {
00505     //Manipulate the matches list to tag vertices and entities
00506     //Set up proc ents
00507     Range proc_ents;
00508     ErrorCode rval;
00509
00510     // get the entities in the partition sets
00511     for (Range::iterator rit = myPcomm->partitionSets.begin(); rit != myPcomm->partitionSets.end(); rit++) {
00512       Range tmp_ents;
00513       rval = myMB->get_entities_by_handle(*rit, tmp_ents, true);
00514       if (MB_SUCCESS != rval){
00515     return rval;
00516       }
00517       proc_ents.merge(tmp_ents);
00518     }
00519     if (myMB->dimension_from_handle(*proc_ents.rbegin()) !=
00520     myMB->dimension_from_handle(*proc_ents.begin())) {
00521       Range::iterator lower = proc_ents.lower_bound(CN::TypeDimensionMap[0].first),
00522     upper = proc_ents.upper_bound(CN::TypeDimensionMap[dim-1].second);
00523       proc_ents.erase(lower, upper);
00524     }
00525
00526
00527     //This vector doesnt appear to be used but its in resolve_shared_ents
00528     int maxp = -1;
00529     std::vector<int> sharing_procs(MAX_SHARING_PROCS);
00530     std::fill(sharing_procs.begin(), sharing_procs.end(), maxp);
00531
00532     // get ents shared by 1 or n procs
00533     std::map<std::vector<int>, std::vector<EntityHandle> > proc_nranges;
00534     Range proc_verts;
00535     rval = myMB->get_adjacencies(proc_ents, 0, false, proc_verts,
00536                  Interface::UNION);
00537     if(rval != MB_SUCCESS){
00538       return rval;
00539     }
00540
00541     rval = myPcomm->tag_shared_verts(myMatches, proc_nranges, proc_verts);
00542     if(rval != MB_SUCCESS){
00543       return rval;
00544     }
00545
00546     // get entities shared by 1 or n procs
00547     rval = myPcomm->get_proc_nvecs(dim,dim-1, &mySkinEnts[0],proc_nranges);
00548     if(rval != MB_SUCCESS){
00549       return rval;
00550     }
00551
00552     // create the sets for each interface; store them as tags on
00553     // the interface instance
00554     Range iface_sets;
00555     rval = myPcomm->create_interface_sets(proc_nranges);
00556     if(rval != MB_SUCCESS){
00557       return rval;
00558     }
00559     // establish comm procs and buffers for them
00560     std::set<unsigned int> procs;
00561     rval = myPcomm->get_interface_procs(procs, true);
00562     if(rval != MB_SUCCESS){
00563       return rval;
00564     }
00565
00566     // resolve shared entity remote handles; implemented in ghost cell exchange
00567     // code because it's so similar
00568     rval = myPcomm->exchange_ghost_cells(-1, -1, 0, true, true);
00569     if(rval != MB_SUCCESS){
00570       return rval;
00571     }
00572     // now build parent/child links for interface sets
00573     rval = myPcomm->create_iface_pc_links();
00574     return rval;
00575   }
00576
00577   //Make sure to free up any allocated data
00578   //Need to avoid a double free
00579   void ParallelMergeMesh::CleanUp()
00580   {
00581     //The reset operation is now safe and avoids a double free()
00582     myMatches.reset();
00583     myTup.reset();
00584     myCD.reset();
00585   }
00586
00587   //Simple quick  sort to real
00588   void ParallelMergeMesh::SortTuplesByReal(TupleList &tup,
00589                        double eps)
00590   {
00591     bool canWrite = tup.get_writeEnabled();
00592     if(!canWrite) tup.enableWriteAccess();
00593
00594     uint mi, ml, mul, mr;
00595     tup.getTupleSize(mi,ml,mul,mr);
00596     PerformRealSort(tup, 0, tup.get_n(), eps, mr);
00597
00598     if(!canWrite) tup.disableWriteAccess();
00599   }
00600
00601   //Swap around tuples
00602   void ParallelMergeMesh::SwapTuples(TupleList &tup,
00603                      unsigned long a, unsigned long b)
00604   {
00605     if(a==b) return;
00606
00607     uint mi, ml, mul, mr;
00608     tup.getTupleSize(mi, ml, mul, mr);
00609
00610     //Swap mi
00611     unsigned long a_val = a*mi, b_val=b*mi;
00612     for(unsigned long i=0; i< mi;i++){
00613       sint t =tup.vi_rd[a_val];
00614       tup.vi_wr[a_val] = tup.vi_rd[b_val];
00615       tup.vi_wr[b_val] = t;
00616       a_val++;
00617       b_val++;
00618     }
00619     //Swap ml
00620     a_val = a*ml;
00621     b_val = b*ml;
00622     for(unsigned long i=0; i< ml;i++){
00623       slong t =tup.vl_rd[a_val];
00624       tup.vl_wr[a_val] = tup.vl_rd[b_val];
00625       tup.vl_wr[b_val] = t;
00626       a_val++;
00627       b_val++;
00628     }
00629     //Swap mul
00630     a_val = a*mul;
00631     b_val = b*mul;
00632     for(unsigned long i=0; i< mul;i++){
00633       ulong t =tup.vul_rd[a_val];
00634       tup.vul_wr[a_val] = tup.vul_rd[b_val];
00635       tup.vul_wr[b_val] = t;
00636       a_val++;
00637       b_val++;
00638     }
00639     //Swap mr
00640     a_val = a*mr;
00641     b_val = b*mr;
00642     for(unsigned long i=0; i< mr;i++){
00643       realType t =tup.vr_rd[a_val];
00644       tup.vr_wr[a_val] = tup.vr_rd[b_val];
00645       tup.vr_wr[b_val] = t;
00646       a_val++;
00647       b_val++;
00648     }
00649   }
00650
00651   //Perform the sorting of a tuple by real
00652   //To sort an entire tuple_list, call (tup,0,tup.n,epsilon)
00653   void ParallelMergeMesh::PerformRealSort(TupleList &tup,
00654                       unsigned long left,
00655                       unsigned long right,
00656                       double eps,
00657                       uint tup_mr)
00658   {
00659     //If list size is only 1 or 0 return
00660     if(left+1 >= right){
00661       return;
00662     }
00663     unsigned long swap = left, tup_l = left*tup_mr,
00664       tup_t = tup_l + tup_mr;
00665
00666     //Swap the median with the left position for a (hopefully) better split
00667     SwapTuples(tup, left, (left+right)/2);
00668
00669     //Partition the data
00670     for(unsigned long t=left+1;t<right;t++){
00671       //If the left value(pivot) is greater than t_val, swap it into swap
00672       if(TupleGreaterThan(tup,tup_l,tup_t,eps, tup_mr)){
00673     swap++;
00674     SwapTuples(tup, swap,t);
00675       }
00676       tup_t+=tup_mr;
00677     }
00678
00679     //Swap so that position swap is in the correct position
00680     SwapTuples(tup,left,swap);
00681
00682     //Sort left and right of swap
00683     PerformRealSort(tup, left  ,swap, eps, tup_mr);
00684     PerformRealSort(tup, swap+1,right,eps, tup_mr);
00685   }
00686
00687   //Note, this takes the actual tup.vr[] index (aka i*tup.mr)
00688   bool ParallelMergeMesh::TupleGreaterThan(TupleList &tup,
00689                        unsigned long vrI,
00690                        unsigned long vrJ,
00691                        double eps,
00692                        uint tup_mr){
00693     unsigned check=0;
00694     while(check < tup_mr){
00695       //If the values are the same
00696       if(fabs(tup.vr_rd[vrI+check]-tup.vr_rd[vrJ+check]) <= eps){
00697     check++;
00698     continue;
00699       }
00700       //If I greater than J
00701       else if(tup.vr_rd[vrI+check] > tup.vr_rd[vrJ+check]){
00702     return true;
00703       }
00704       //If J greater than I
00705       else{
00706     return false;
00707       }
00708     }
00709     //All Values are the same
00710     return false;
00711   }
00712
00713 }//End namespace moab