#include "mpi.h"

double parallel_dot( const double restrict u[], 
                     const double restrict v[], int n, int ordered, 
		     MPI_Comm comm )
{
    int    rank, size, i;
    double temp, result;

    if (ordered) {
	MPI_Comm tempcomm;
	/* A sophisticated version would cache the duplicated communicator */
	MPI_Comm_dup( comm, &tempcomm );
	MPI_Comm_rank( tempcomm, &rank );
	MPI_Comm_size( tempcomm, &size );
	if (rank != 0) {
	    MPI_Recv( &temp, 1, MPI_DOUBLE, rank-1, 0, tempcomm, 
		      MPI_STATUS_NULL );
	}
	else {
	    temp = 0.0;
	}
	for (i=0; i<n; i++) {
	    temp += u[i] * v[i];
	}
	if (rank != size-1) {
	    MPI_Send( &temp, 1, MPI_DOUBLE, rank+1, 0, tempcomm );
	}
	MPI_Bcast( &temp, 1, MPI_DOUBLE, size-1, tempcomm );
	MPI_Comm_free( &tempcomm );
	result = temp;
    }
    else {
	temp = 0.0;
	for (i=0; i<n; i++) {
	    temp += u[i] * v[i];
	}
	MPI_Allreduce( &temp, &result, 1, MPI_DOUBLE, MPI_SUM, comm );
    }
    return result;
}