#include #include #include #include "mpi.h" #include "jacobi.h" #include "testname.h" /* * do_print controls output useful for debugging and observing, * particularly the sloooowwww convergence of this method. */ static int do_print = 0; static int max_it = 100; /* * Define the symbol NO_CONV_TEST to mimic an explicit differencing scheme */ int main( argc, argv ) int argc; char **argv; { int rank, size, i, j, itcnt; Mesh mesh; #ifndef NO_CONV_TEST double diffnorm, gdiffnorm; #else #define gdiffnorm 1.0 #endif double *swap; double *xlocalrow, *xnewrow; #ifdef _AIX #pragma disjoint (*xlocalrow, *xnewrow) #endif int maxm, maxn, lrow; int k; double t; MPI_Init( &argc, &argv ); MPI_Comm_rank( MPI_COMM_WORLD, &rank ); MPI_Comm_size( MPI_COMM_WORLD, &size ); Get_command_line( rank, argc, argv, &maxm, &maxn, &do_print, &max_it ); Setup_mesh( maxm, maxn, &mesh ); /* Run twice and take the last time (ensures that code is paged in) */ for (k=0; k<2; k++) { itcnt = 0; Init_mesh( &mesh ); ExchangeInit( &mesh ); MPI_Barrier( mesh.ring_comm ); t = MPI_Wtime(); if (do_print && rank == 0) { printf( "Starting at time %f\n", t ); fflush(stdout); } /* lrow is used enough that we make a local copy */ lrow = mesh.lrow; do { Exchange( &mesh ); /* Compute new values (but not on boundary). Note that many compilers will not generate good code for this (i.e., will not do the obvious strength reductions). The code below gets reasonable performance on some systems; additional rewriting might help others (e.g., 1+j instead of j+1). Note that without some "disjoint" xnewrow, xlocalrow, extra loads are needed in the loop. */ itcnt ++; #ifndef NO_CONV_TEST diffnorm = 0.0; #endif xnewrow = mesh.xnew + 1 * maxm; xlocalrow = mesh.xlocal + 1 * maxm; for (i=1; i<=lrow; i++) { for (j=1; j 1.0e-2 && itcnt < max_it); t = MPI_Wtime() - t; ExchangeEnd( &mesh ); } if (rank == 0) { #ifdef NO_CONV_TEST printf( "%s: %d iterations in %f secs (%f MFlops), m=%d n=%d np=%d\n", TESTNAME, itcnt, t, itcnt * (maxm-2.0)*(maxn-2)*(4)*1.0e-6/t, maxm, maxn, size ); #else /* 4 ops for new value, 3 for norm */ printf( "%s: %d iterations in %f secs (%f MFlops); diffnorm %f, m=%d n=%d np=%d\n", TESTNAME, itcnt, t, itcnt * (maxm-2.0)*(maxn-2)*(4+3)*1.0e-6/t, gdiffnorm, maxm, maxn, size ); #endif } MPI_Comm_free( &mesh.ring_comm ); MPI_Finalize( ); return 0; } #include #include #include #include "mpi.h" #include "jacobi.h" /* * Command line arguments are not defined by the MPI Standard. * We'll just get the values from process 0 and broadcast them * * maxm is the number of columns and maxn is the number of rows in the * global mesh. Because the mesh is divided into row-oriented slabs, the * resulting pieces are roughly maxm x maxn/p. See setupmesh.c for * the exact dimensions. * */ void Get_command_line( rank, argc, argv, maxm, maxn, do_print, maxit ) int rank, argc, *maxm, *maxn, *do_print, *maxit; char **argv; { int args[4]; int i; if (rank == 0) { /* Get maxn from the command line */ *maxn = DEFAULT_MAXN; *maxm = -1; for (i=1; i #include #include #include "mpi.h" #include "jacobi.h" void Setup_mesh( maxm, maxn, mesh ) int maxm, maxn; Mesh *mesh; { int false = 0; int true = 1; /* For some items, it is helpful to have local copies */ int lrow, rank, size; register double *xlocal, *xnew; MPI_Comm_size( MPI_COMM_WORLD, &size ); MPI_Cart_create( MPI_COMM_WORLD, 1, &size, &false, true, &mesh->ring_comm ); MPI_Cart_shift( mesh->ring_comm, 0, 1, &mesh->down_nbr, &mesh->up_nbr ); MPI_Comm_rank( mesh->ring_comm, &rank ); MPI_Comm_size( mesh->ring_comm, &size ); /* Number of local rows */ lrow = (maxn - 2) / size; if (rank < ((maxn - 2) % size)) lrow++; mesh->lrow = lrow; mesh->maxm = maxm; mesh->maxn = maxn; /* Allocate the local meshes */ mesh->xlocal = xlocal = (double *)malloc( maxm * ( lrow + 2 ) * sizeof(double) ); mesh->xnew = xnew = (double *)malloc( maxm * ( lrow + 2 ) * sizeof(double) ); if (!mesh->xlocal || !mesh->xnew) { fprintf( stderr, "Unable to allocate local mesh\n" ); MPI_Abort( MPI_COMM_WORLD, 1 ); } } void Init_mesh( mesh ) Mesh *mesh; { int i, j, lrow, rank, maxm; register double *xlocal, *xnew; xlocal = mesh->xlocal; xnew = mesh->xnew; lrow = mesh->lrow; maxm = mesh->maxm; MPI_Comm_rank( mesh->ring_comm, &rank ); /* Fill the data as specified */ for (i=1; i<=lrow; i++) for (j=0; j #include #include #include "mpi.h" #include "jacobi.h" void ExchangeInit( mesh ) Mesh *mesh; { double *xlocal = mesh->xlocal; int maxm = mesh->maxm; int lrow = mesh->lrow; int up_nbr = mesh->up_nbr; int down_nbr = mesh->down_nbr; MPI_Comm ring_comm = mesh->ring_comm; MPI_Irecv( xlocal, maxm, MPI_DOUBLE, down_nbr, 0, ring_comm, &mesh->rq[0] ); MPI_Irecv( xlocal + maxm * (lrow + 1), maxm, MPI_DOUBLE, up_nbr, 1, ring_comm, &mesh->rq[1] ); } void Exchange( mesh ) Mesh *mesh; { MPI_Status statuses[2]; double *xlocal = mesh->xlocal; int maxm = mesh->maxm; int lrow = mesh->lrow; int up_nbr = mesh->up_nbr; int down_nbr = mesh->down_nbr; MPI_Comm ring_comm = mesh->ring_comm; /* Send up and down, then receive */ MPI_Rsend( xlocal + maxm * lrow, maxm, MPI_DOUBLE, up_nbr, 0, ring_comm ); MPI_Rsend( xlocal + maxm, maxm, MPI_DOUBLE, down_nbr, 1, ring_comm ); MPI_Waitall( 2, mesh->rq, statuses ); } void ExchangeEnd( mesh ) Mesh *mesh; { MPI_Cancel( &mesh->rq[0] ); MPI_Cancel( &mesh->rq[1] ); }