/**********************************************************************
 * Copyright (C) 1997-1998 Dolphin Interconnect Solutions Inc.
 * Copyright (C) 1999-2001 Etnus LLC.
 *
 * Permission is hereby granted to use, reproduce, prepare derivative
 * works, and to redistribute to others.
 *
 *				  DISCLAIMER
 *
 * Neither Dolphin Interconnect Solutions, Etnus LLC, nor any of their
 * employees, makes any warranty express or implied, or assumes any
 * legal liability or responsibility for the accuracy, completeness,
 * or usefulness of any information, apparatus, product, or process
 * disclosed, or represents that its use would not infringe privately
 * owned rights.
 *
 * This code was written by
 * James Cownie: Dolphin Interconnect Solutions. <jcownie@dolphinics.com>
 *               Etnus LLC <jcownie@etnus.com>
 **********************************************************************/

/* Update log
 *
 * Mar  6 2001 JHC: Add mqs_comm_get_group to allow a debugger to acquire
 *                  processes less eagerly.
 * Dec 13 2000 JHC: totalview/2514: Modify image_has_queues to return
 *                  a silent FALSE if none of the expected data is
 *                  present. This way you won't get complaints when
 *                  you try this on non MPICH processes.
 * Sep  8 2000 JVD: #include <string.h> to silence Linux Alpha compiler warnings.
 * Mar 21 2000 JHC: Add the new entrypoint mqs_dll_taddr_width
 * Nov 26 1998 JHC: Fix the problem that we weren't handling
 *                  MPIR_Ignore_queues properly.
 * Oct 22 1998 JHC: Fix a zero allocation problem
 * Aug 19 1998 JHC: Fix some problems in our use of target_to_host on
 *                  big endian machines.
 * May 28 1998 JHC: Use the extra information we can return to say
 *                  explicitly that sends are only showing non-blocking ops
 * May 19 1998 JHC: Changed the names of the structs and added casts
 *                  where needed to reflect the change to the way we handle
 *                  type safety across the interface.
 * Oct 27 1997 JHC: Created by exploding db_message_state_mpich.cxx
 */

/*
 * This file is an example of how to use the DLL interface to handle
 * message queue display from a debugger.  It provides all of the
 * functions required to display MPICH message queues.
 * It has been tested with TotalView.
 *
 * James Cownie <jcownie@dolphinics.com>
 */

#include <stdlib.h>	
/* 
   The following was added by William Gropp to improve the portability 
   to systems with non-ANSI C compilers 
 */
#include "mpichconf.h"
#ifdef HAVE_NO_C_CONST
#define const
#endif

#include <string.h>
#include "mpi_interface.h"
#include "mpich_dll_defs.h"	

/* Essential macros for C */
#ifndef NULL
#define NULL ((void *)0)
#endif
#ifndef TRUE
#define TRUE (0==0)
#endif
#ifndef FALSE
#define FALSE (0==1)
#endif

#ifdef OLD_STYLE_CPP_CONCAT
#define concat(a,b) a/**/b
#define stringize(a) "a"
#else
#define concat(a,b) a##b
#define stringize(a) #a
#endif

/**********************************************************************/
/* Set up the basic callbacks into the debugger, also work out 
 * one crucial piece of info about the machine we're running on.
 */
static const mqs_basic_callbacks *mqs_basic_entrypoints;
static int host_is_big_endian;

void mqs_setup_basic_callbacks (const mqs_basic_callbacks * cb)
{
  int t = 1;

  host_is_big_endian    = (*(char *)&t) != 1;
  mqs_basic_entrypoints = cb;
} /* mqs_setup_callbacks */

/**********************************************************************/
/* Macros to make it transparent that we're calling the TV functions
 * through function pointers.
 */
#define mqs_malloc           (mqs_basic_entrypoints->mqs_malloc_fp)
#define mqs_free             (mqs_basic_entrypoints->mqs_free_fp)
#define mqs_prints           (mqs_basic_entrypoints->mqs_eprints_fp)
#define mqs_put_image_info   (mqs_basic_entrypoints->mqs_put_image_info_fp)
#define mqs_get_image_info   (mqs_basic_entrypoints->mqs_get_image_info_fp)
#define mqs_put_process_info (mqs_basic_entrypoints->mqs_put_process_info_fp)
#define mqs_get_process_info (mqs_basic_entrypoints->mqs_get_process_info_fp)

/* These macros *RELY* on the function already having set up the conventional
 * local variables i_info or p_info.
 */
#define mqs_find_type        (i_info->image_callbacks->mqs_find_type_fp)
#define mqs_field_offset     (i_info->image_callbacks->mqs_field_offset_fp)
#define mqs_get_type_sizes   (i_info->image_callbacks->mqs_get_type_sizes_fp)
#define mqs_find_function    (i_info->image_callbacks->mqs_find_function_fp)
#define mqs_find_symbol      (i_info->image_callbacks->mqs_find_symbol_fp)

#define mqs_get_image        (p_info->process_callbacks->mqs_get_image_fp)
#define mqs_get_global_rank  (p_info->process_callbacks->mqs_get_global_rank_fp)
#define mqs_fetch_data       (p_info->process_callbacks->mqs_fetch_data_fp)
#define mqs_target_to_host   (p_info->process_callbacks->mqs_target_to_host_fp)

/**********************************************************************/
/* Version handling functions.
 * This one should never be changed.
 */
int mqs_version_compatibility ( void )
{
  return MQS_INTERFACE_COMPATIBILITY;
} /* mqs_version_compatibility */

/* This one can say what you like */
char *mqs_version_string ( void )
{
  return "ETNUS MPICH message queue support for MPICH 1.1, 1.2 compiled on " __DATE__;
} /* mqs_version_string */

/* So the debugger can tell what interface width the library was compiled with */
int mqs_dll_taddr_width (void)
{
  return sizeof (mqs_taddr_t);
} /* mqs_dll_taddr_width */

/**********************************************************************/
/* Additional error codes and error string conversion.
 */
enum {
  err_silent_failure  = mqs_first_user_code,

  err_no_current_communicator,
  err_bad_request,
  err_no_store,

  err_failed_qhdr,
    err_unexpected,
    err_posted,

  err_failed_queue,
    err_first,

  err_failed_qel,
    err_context_id,
    err_tag,
    err_tagmask,
    err_lsrc,
    err_srcmask,
    err_next,
    err_ptr,

  err_failed_squeue,
    err_sq_head,

  err_failed_sqel,
    err_db_shandle,
    err_db_comm,
    err_db_target,
    err_db_tag,
    err_db_data,
    err_db_byte_length,
    err_db_next,

  err_failed_rhandle,
    err_is_complete,
    err_buf,
    err_len,
    err_s,

  err_failed_status,
    err_count,
    err_MPI_SOURCE,
    err_MPI_TAG,

  err_failed_commlist,
    err_sequence_number,
    err_comm_first,

  err_failed_communicator,
    err_np,
    err_lrank_to_grank,
    err_send_context,
    err_recv_context,
    err_comm_next,
    err_comm_name,

  err_all_communicators,
  err_mpid_recvs,
  err_group_corrupt
};

/**********************************************************************/
/* Forward declarations 
 */
static mqs_taddr_t fetch_pointer (mqs_process * proc, mqs_taddr_t addr, mpich_process_info *p_info);
static mqs_tword_t fetch_int (mqs_process * proc, mqs_taddr_t addr, mpich_process_info *p_info);

/* Internal structure we hold for each communicator */
typedef struct communicator_t
{
  struct communicator_t * next;
  group_t *               group;		/* Translations */
  int                     recv_context;		/* To catch changes */
  int                     present;
  mqs_communicator        comm_info;		/* Info needed at the higher level */
} communicator_t;

/**********************************************************************/
/* Functions to handle translation groups.
 * We have a list of these on the process info, so that we can
 * share the group between multiple communicators.
 */
/**********************************************************************/
/* Translate a process number */
static int translate (group_t *this, int index) 
{ 	
  if (index == MQS_INVALID_PROCESS ||
      ((unsigned int)index) >= ((unsigned int) this->entries))
    return MQS_INVALID_PROCESS;
  else
    return this->local_to_global[index]; 
} /* translate */

/**********************************************************************/
/* Reverse translate a process number i.e. global to local*/
static int reverse_translate (group_t * this, int index) 
{ 	
  int i;
  for (i=0; i<this->entries; i++)
    if (this->local_to_global[i] == index)
      return i;

  return MQS_INVALID_PROCESS;
} /* reverse_translate */

/**********************************************************************/
/* Search the group list for this group, if not found create it.
 */
static group_t * find_or_create_group (mqs_process *proc,
				       mqs_tword_t np,
				       mqs_taddr_t table)
{
  mpich_process_info *p_info = (mpich_process_info *)mqs_get_process_info (proc);
  mqs_image * image          = mqs_get_image (proc);
  mpich_image_info *i_info   = (mpich_image_info *)mqs_get_image_info (image);
  int intsize = p_info->sizes.int_size;
  communicator_t *comm  = p_info->communicator_list;
  int *tr;
  char *trbuffer;
  int i;
  group_t *g;

  if (np <= 0)
    return 0;					/* Makes no sense ! */

  /* Iterate over each communicator seeing if we can find this group */
  for (;comm; comm = comm->next)
    {
      g = comm->group;
      if (g && g->table_base == table)
	{
	  g->ref_count++;			/* Someone else is interested */
	  return g;
	}
    }

  /* Hmm, couldn't find one, so fetch it */	
  g = (group_t *)mqs_malloc (sizeof (group_t));
  tr = (int *)mqs_malloc (np*sizeof(int));
  trbuffer = (char *)mqs_malloc (np*intsize);
  g->local_to_global = tr;

  if (mqs_ok != mqs_fetch_data (proc, table, np*intsize, trbuffer) )
    {
      mqs_free (g);
      mqs_free (tr);
      mqs_free (trbuffer);
      return NULL;
    }

  /* This code is assuming that sizeof(int) is the same on target and host...
   * that's a bit flaky, but probably actually OK.
   */
  for (i=0; i<np; i++)
    mqs_target_to_host (proc, trbuffer+intsize*i, &tr[i], intsize);

  mqs_free (trbuffer);

  g->entries = np;
  g->ref_count = 1;

  return g;
} /* find_or_create_group */

/***********************************************************************/
static void group_decref (group_t * group)
{
  if (--(group->ref_count) == 0)
    {
      mqs_free (group->local_to_global);
      mqs_free (group);
    }
} /* group_decref */

/***********************************************************************
 * Perform basic setup for the image, we just allocate and clear
 * our info.
 */
int mqs_setup_image (mqs_image *image, const mqs_image_callbacks *icb)
{
  mpich_image_info *i_info = (mpich_image_info *)mqs_malloc (sizeof (mpich_image_info));

  if (!i_info)
    return err_no_store;

  memset ((void *)i_info, 0, sizeof (mpich_image_info));
  i_info->image_callbacks = icb;		/* Before we do *ANYTHING* */

  mqs_put_image_info (image, (mqs_image_info *)i_info);
  
  return mqs_ok;
} /* mqs_setup_image */


/***********************************************************************
 * Check for all the information we require to access the MPICH message queues.
 * Stash it into our structure on the image if we're succesful.
 */

/* A macro to save a lot of typing. */
#define GETOFFSET(type, field)							\
do {										\
  i_info->concat(field,_offs) = mqs_field_offset(type, stringize(field));	\
  if (i_info->concat(field,_offs) < 0)						\
    return concat (err_,field);                                                 \
} while (0)

int mqs_image_has_queues (mqs_image *image, char **message)
{
  mpich_image_info * i_info = (mpich_image_info *)mqs_get_image_info (image);
  int have_qhdr = FALSE;
  int have_queue= FALSE;
  int have_qel  = FALSE;
  int have_sq   = FALSE;
  int have_sqel = FALSE;
  int have_rh   = FALSE;
  int have_co   = FALSE;
  int have_cl   = FALSE;

  /* Default failure message ! */
  *message = "The symbols and types in the MPICH library used by TotalView\n"
	     "to extract the message queues are not as expected in\n"
	     "the image '%s'\n"
	     "No message queue display is possible.\n"
	     "This is probably an MPICH version or configuration problem.";

  /* Force in the file containing our breakpoint function, to ensure that 
   * types have been read from there before we try to look them up.
   */
  mqs_find_function (image, "MPIR_Breakpoint", mqs_lang_c, NULL);

  /* Are we supposed to ignore this ? (e.g. it's really an HPF runtime using the
   * MPICH process acquisition, but not wanting queue display) 
   */
  if (mqs_find_symbol (image, "MPIR_Ignore_queues", NULL) == mqs_ok)
    {
      *message = NULL;				/* Fail silently */
      return err_silent_failure;
    }

  {
    mqs_type *qh_type = mqs_find_type (image, "MPID_QHDR", mqs_lang_c);
	
    if (qh_type)
      {
	have_qhdr = TRUE;
	GETOFFSET(qh_type,unexpected);
	GETOFFSET(qh_type,posted);
      }
  }

  {
    mqs_type *q_type = mqs_find_type (image,"MPID_QUEUE",mqs_lang_c);
    if (q_type)
      {
	have_queue = TRUE;
	GETOFFSET(q_type, first);
      }
  }
      
  { /* Now fill in fields from MPID_QEL */
    mqs_type * qel_type = mqs_find_type (image,"MPID_QEL",mqs_lang_c);
    if (qel_type)
      {
	have_qel = TRUE;
	GETOFFSET(qel_type, context_id);
	GETOFFSET(qel_type, tag);
	GETOFFSET(qel_type, tagmask);
	GETOFFSET(qel_type, lsrc);
	GETOFFSET(qel_type, srcmask);
	GETOFFSET(qel_type, next);
	GETOFFSET(qel_type, ptr);
      }
  }       

  { /* Fields from MPIR_SQUEUE */
    mqs_type * sq_type = mqs_find_type (image,"MPIR_SQUEUE",mqs_lang_c);
    if (sq_type)
      {
	have_sq = TRUE;
	GETOFFSET(sq_type, sq_head);
      }
  }

  { /* Fields from MPIR_SQEL */
    mqs_type * sq_type = mqs_find_type (image,"MPIR_SQEL",mqs_lang_c);
    if (sq_type)
      {
	have_sqel = TRUE;

	GETOFFSET(sq_type, db_shandle);
	GETOFFSET(sq_type, db_comm);
	GETOFFSET(sq_type, db_target);
	GETOFFSET(sq_type, db_tag);
	GETOFFSET(sq_type, db_data);
	GETOFFSET(sq_type, db_byte_length);
	GETOFFSET(sq_type, db_next);
      }
  }

  { /* Now fill in fields from MPIR_RHANDLE */
    mqs_type * rh_type = mqs_find_type (image,"MPIR_RHANDLE",mqs_lang_c);
    int status_offset;
    mqs_type *status_type;

    if (rh_type)
      {
	have_rh = TRUE;

	GETOFFSET(rh_type, is_complete);
	GETOFFSET(rh_type, buf);
	GETOFFSET(rh_type, len);

	/* Digital MPI doesn't provide this, so we handle not having it below,
	 * and don't complain about it
	 */
	i_info->start_offs = mqs_field_offset (rh_type, "start");

	/* And from the nested MPI_Status object. This is less pleasant */
	status_offset = mqs_field_offset (rh_type, "s");
	if (status_offset < 0)
	  return err_s;

	status_type = mqs_find_type (image, "MPI_Status",mqs_lang_c);
	if (!status_type)
	  return err_failed_status;
    
	/* Adjust the offsets of the embedded fields */
	GETOFFSET(status_type, count);
	i_info->count_offs += status_offset;
	GETOFFSET(status_type, MPI_SOURCE);
	i_info->MPI_SOURCE_offs += status_offset;      
	GETOFFSET(status_type, MPI_TAG);
	i_info->MPI_TAG_offs += status_offset;
      }
  }
      
  { /* Fields from the MPIR_Comm_list */
    mqs_type * cl_type = mqs_find_type (image,"MPIR_Comm_list",mqs_lang_c);
    if (cl_type)
      {
	have_cl = TRUE;
	GETOFFSET(cl_type, sequence_number);
	GETOFFSET(cl_type, comm_first);
      }
  }

  { /* Fields from the communicator */
    mqs_type * co_type = mqs_find_type (image, "MPIR_Communicator",mqs_lang_c);
    if (co_type)
      {
	have_co = TRUE;
	GETOFFSET(co_type, np);
	GETOFFSET(co_type, lrank_to_grank);
	GETOFFSET(co_type, send_context);
	GETOFFSET(co_type, recv_context);
	GETOFFSET(co_type, comm_next);
	GETOFFSET(co_type, comm_name);
      }
  }

  /* If we have none of the symbols we expect we decide that this isn't even
   * trying to be an MPICH code, and give up silently.
   */
  if (!have_qhdr && !have_queue && !have_qel  &&
      !have_sq   && !have_sqel &&  !have_rh   &&
      !have_co   && !have_cl)
    {
      *message = NULL;				/* Fail silently */
      return err_silent_failure;
    }

  /* Now check each status individually, we know at least one test
   * succeeded, so this is trying to be an MPICH code and it's worth
   * complaining vocally 
   */
  if (!have_qhdr)
    return  err_failed_qhdr;
  if (!have_queue)
    return err_failed_queue;
  if (!have_qel)
    return err_failed_qel;
  if (!have_sq)
    return err_failed_squeue;
  if (!have_sqel)
    return err_failed_sqel;
  if (!have_rh)
    return err_failed_rhandle;
  if (!have_co)
    return err_failed_communicator;
  if (!have_cl)
    return err_failed_commlist;

  *message = NULL;

  /* Also check for the sendq symbols */
  if (mqs_find_symbol (image, "MPIR_Sendq", NULL) != mqs_ok)
    *message = "The MPICH library built into the image '%s'\n"
	       "does not have the send queue symbol MPIR_Sendq in it, it has probably\n"
	       "been configured without the '-debug' flag.\n"
	       "No send queue display is possible without that.";

  return mqs_ok;
} /* mqs_image_has_queues */

/***********************************************************************
 * Setup information needed for a specific process.
 * TV assumes that this will hang something onto the process,
 * if nothing is attached to it, then TV will believe that this process
 * has no message queue information.
 */
int mqs_setup_process (mqs_process *process, const mqs_process_callbacks *pcb)
{ 
  /* Extract the addresses of the global variables we need and save them away */
  mpich_process_info *p_info = (mpich_process_info *)mqs_malloc (sizeof (mpich_process_info));

  if (p_info)
    {
      mqs_image        *image;
      mpich_image_info *i_info;

      p_info->process_callbacks = pcb;

      /* Now we can get the rest of the info ! */
      image  = mqs_get_image (process);
      i_info = (mpich_image_info *)mqs_get_image_info (image);

      /* Library starts at zero, so this ensures we go look to start with */
      p_info->communicator_sequence = -1;
      /* We have no communicators yet */
      p_info->communicator_list     = NULL;
      mqs_get_type_sizes (process, &p_info->sizes);

      mqs_put_process_info (process, (mqs_process_info *)p_info);
      
      return mqs_ok;
    }
  else
    return err_no_store;
} /* mqs_setup_process */

/***********************************************************************
 * Check the process for message queues.
 */
int mqs_process_has_queues (mqs_process *proc, char **msg)
{
  mpich_process_info *p_info = (mpich_process_info *)mqs_get_process_info (proc);
  mqs_image * image          = mqs_get_image (proc);
  mpich_image_info *i_info   = (mpich_image_info *)mqs_get_image_info (image);
  mqs_taddr_t debugged_addr;

  /* Don't bother with a pop up here, it's unlikely to be helpful */
  *msg = 0;

  if (mqs_find_symbol (image, "MPIR_All_communicators", &p_info->commlist_base) != mqs_ok)
    return err_all_communicators;
  
  if (mqs_find_symbol (image, "MPID_recvs", &p_info->queue_base) != mqs_ok)
    return err_mpid_recvs;
  
  /* Check for a send queue */
  if (mqs_ok != mqs_find_symbol (image, "MPIR_Sendq", &p_info->sendq_base) ||
      mqs_ok != mqs_find_symbol (image, "MPIR_being_debugged", &debugged_addr))
    {
      p_info->has_sendq = FALSE;
    }
  else
    {
      p_info->has_sendq = fetch_int (proc, debugged_addr, p_info) != 0;
    }
  
  return mqs_ok;
} /* mqs_setup_process_info */

/***********************************************************************
 * Check if the communicators have changed by looking at the 
 * sequence number.
 */
static int communicators_changed (mqs_process *proc)
{
  mpich_process_info *p_info = (mpich_process_info *)mqs_get_process_info (proc);
  mqs_image * image          = mqs_get_image (proc);
  mpich_image_info *i_info   = (mpich_image_info *)mqs_get_image_info (image);
  mqs_tword_t new_seq = fetch_int (proc, 
			       p_info->commlist_base+i_info->sequence_number_offs,
			       p_info);
  int  res = (new_seq != p_info->communicator_sequence);
      
  /* Save the sequence number for next time */
  p_info->communicator_sequence = new_seq;

  return res;
} /* mqs_communicators_changed */

/***********************************************************************
 * Find a matching communicator on our list. We check the recv context
 * as well as the address since the communicator structures may be
 * being re-allocated from a free list, in which case the same
 * address will be re-used a lot, which could confuse us.
 */
static communicator_t * find_communicator (mpich_process_info *p_info,
					   mqs_taddr_t comm_base, int recv_ctx)
{
  communicator_t * comm = p_info->communicator_list;

  for (; comm; comm=comm->next)
    {
      if (comm->comm_info.unique_id == comm_base &&
	  comm->recv_context == recv_ctx)
	return comm;
    }

  return NULL;
} /* find_communicator */

/***********************************************************************
 * Comparison function for sorting communicators.
 */
static int compare_comms (const void *a, const void *b)
{
  communicator_t * ca = *(communicator_t **)a;
  communicator_t * cb = *(communicator_t **)b;

  return cb->recv_context - ca->recv_context;
} /* compare_comms */

/***********************************************************************
 * Rebuild our list of communicators because something has changed 
 */
static int rebuild_communicator_list (mqs_process *proc)
{
  mpich_process_info *p_info = (mpich_process_info *)mqs_get_process_info (proc);
  mqs_image * image          = mqs_get_image (proc);
  mpich_image_info *i_info   = (mpich_image_info *)mqs_get_image_info (image);
  mqs_taddr_t comm_base = fetch_pointer (proc, 
					 p_info->commlist_base+i_info->comm_first_offs,
					 p_info);
  communicator_t **commp;
  int commcount = 0;

  /* Iterate over the list in the process comparing with the list
   * we already have saved. This is n**2, because we search for each
   * communicator on the existing list. I don't think it matters, though
   * because there aren't that many communicators to worry about, and
   * we only ever do this if something changed.
   */
  while (comm_base)
    {
      /* We do have one to look at, so extract the info */
      int recv_ctx = fetch_int (proc, comm_base+i_info->recv_context_offs, p_info);
      communicator_t *old = find_communicator (p_info, comm_base, recv_ctx);
      mqs_taddr_t namep = fetch_pointer (proc, comm_base+i_info->comm_name_offs,p_info);
      char *name = "--unnamed--";
      char namebuffer[64];

      if (namep)
	{
	  if (mqs_fetch_data (proc, namep, 64, namebuffer) == mqs_ok &&
	      namebuffer[0] != 0)
	    name = namebuffer;
	}

      if (old)
	{
	  old->present = TRUE;			/* We do want this communicator */
	  strncpy (old->comm_info.name, name, 64); /* Make sure the name is up to date,
						    * it might have changed and we can't tell.
						    */
	}
      else
	{
	  mqs_taddr_t group_base = fetch_pointer (proc, comm_base+i_info->lrank_to_grank_offs,
					      p_info);
	  int np     = fetch_int (proc, comm_base+i_info->np_offs,p_info);
	  group_t *g = find_or_create_group (proc, np, group_base);
	  communicator_t *nc;

	  if (!g)
	    return err_group_corrupt;

	  nc = (communicator_t *)mqs_malloc (sizeof (communicator_t));

	  /* Save the results */
	  nc->next = p_info->communicator_list;
	  p_info->communicator_list = nc;
	  nc->present = TRUE;
	  nc->group   = g;
	  nc->recv_context = recv_ctx;

	  strncpy (nc->comm_info.name, name, 64);
	  nc->comm_info.unique_id = comm_base;
	  nc->comm_info.size      = np;
	  nc->comm_info.local_rank= reverse_translate (g, mqs_get_global_rank (proc));
	}
      /* Step to the next communicator on the list */
      comm_base = fetch_pointer (proc, comm_base+i_info->comm_next_offs, p_info);
    }

  /* Now iterate over the list tidying up any communicators which
   * no longer exist, and cleaning the flags on any which do.
   */
  commp = &p_info->communicator_list;

  for (; *commp; commp = &(*commp)->next)
    {
      communicator_t *comm = *commp;

      if (comm->present)
	{
	  comm->present = FALSE;
	  commcount++;
	}
      else
	{ /* It needs to be deleted */
	  *commp = comm->next;			/* Remove from the list */
	  group_decref (comm->group);		/* Group is no longer referenced from here */
	  mqs_free (comm);
	}
    }

  if (commcount)
    {
      /* Sort the list so that it is displayed in some semi-sane order. */
      communicator_t ** comm_array = (communicator_t **) mqs_malloc (
	                               commcount * sizeof (communicator_t *));
      communicator_t *comm = p_info->communicator_list;
      int i;
      for (i=0; i<commcount; i++, comm=comm->next)
	comm_array [i] = comm;

      /* Do the sort */
      qsort (comm_array, commcount, sizeof (communicator_t *), compare_comms);

      /* Re build the list */
      p_info->communicator_list = NULL;
      for (i=0; i<commcount; i++)
	{
	  comm = comm_array[i];
	  comm->next = p_info->communicator_list;
	  p_info->communicator_list = comm;
	}

      mqs_free (comm_array);
    }

  return mqs_ok;
} /* rebuild_communicator_list */

/***********************************************************************
 * Update the list of communicators in the process if it has changed.
 */
int mqs_update_communicator_list (mqs_process *proc)
{
  if (communicators_changed (proc))
    return rebuild_communicator_list (proc);
  else
    return mqs_ok;
} /* mqs_update_communicator_list */

/***********************************************************************
 * Setup to iterate over communicators.
 * This is where we check whether our internal communicator list needs
 * updating and if so do it.
 */
int mqs_setup_communicator_iterator (mqs_process *proc)
{
  mpich_process_info *p_info = (mpich_process_info *)mqs_get_process_info (proc);

  /* Start at the front of the list again */
  p_info->current_communicator = p_info->communicator_list;
  /* Reset the operation iterator too */
  p_info->next_msg = 0;

  return p_info->current_communicator == NULL ? mqs_end_of_list : mqs_ok;
} /* mqs_setup_communicator_iterator */

/***********************************************************************
 * Fetch information about the current communicator.
 */
int mqs_get_communicator (mqs_process *proc, mqs_communicator *comm)
{
  mpich_process_info *p_info = (mpich_process_info *)mqs_get_process_info (proc);

  if (p_info->current_communicator)
    {
      *comm = p_info->current_communicator->comm_info;
  
      return mqs_ok;
    }
  else
    return err_no_current_communicator;
} /* mqs_get_communicator */

/***********************************************************************
 * Get the group information about the current communicator.
 */
int mqs_get_comm_group (mqs_process *proc, int *group_members)
{
  mpich_process_info *p_info = (mpich_process_info *)mqs_get_process_info (proc);
  communicator_t     *comm   = p_info->current_communicator;

  if (comm)
    {
      group_t * g = comm->group;
      int i;

      for (i=0; i<g->entries; i++)
	group_members[i] = g->local_to_global[i];

      return mqs_ok;
    }
  else
    return err_no_current_communicator;
} /* mqs_get_comm_group */

/***********************************************************************
 * Step to the next communicator.
 */
int mqs_next_communicator (mqs_process *proc)
{
  mpich_process_info *p_info = (mpich_process_info *)mqs_get_process_info (proc);

  p_info->current_communicator = p_info->current_communicator->next;
  
  return (p_info->current_communicator != NULL) ? mqs_ok : mqs_end_of_list;
} /* mqs_next_communicator */

/***********************************************************************
 * Setup to iterate over pending operations 
 */
int mqs_setup_operation_iterator (mqs_process *proc, int op)
{
  mpich_process_info *p_info = (mpich_process_info *)mqs_get_process_info (proc);
  mqs_image * image          = mqs_get_image (proc);
  mpich_image_info *i_info   = (mpich_image_info *)mqs_get_image_info (image);

  p_info->what = (mqs_op_class)op;

  switch (op)
    {
    case mqs_pending_sends:
      if (!p_info->has_sendq)
	return mqs_no_information;
      else
	{
	  p_info->next_msg = p_info->sendq_base + i_info->sq_head_offs;
	  return mqs_ok;
	}

    case mqs_pending_receives:
      p_info->next_msg = p_info->queue_base + i_info->posted_offs;
      return mqs_ok;

    case mqs_unexpected_messages:
      p_info->next_msg = p_info->queue_base + i_info->unexpected_offs;
      return mqs_ok;

    default:
      return err_bad_request;
    }
} /* mqs_setup_operation_iterator */

/***********************************************************************
 * Handle the unexpected queue and the pending receive queue.
 * They're very similar.
 */
static int fetch_receive (mqs_process *proc, mpich_process_info *p_info,
			  mqs_pending_operation *res, int look_for_user_buffer)
{
  mqs_image * image          = mqs_get_image (proc);
  mpich_image_info *i_info   = (mpich_image_info *)mqs_get_image_info (image);
  communicator_t   *comm   = p_info->current_communicator;
  mqs_tword_t wanted_context = comm->recv_context;
  mqs_taddr_t base           = fetch_pointer (proc, p_info->next_msg, p_info);

  while (base != 0)
    { /* Well, there's a queue, at least ! */
      mqs_tword_t actual_context = fetch_int (proc, base + i_info->context_id_offs, p_info);
      
      if (actual_context == wanted_context)
	{ /* Found a good one */
	  mqs_tword_t tag     = fetch_int (proc, base + i_info->tag_offs, p_info);
	  mqs_tword_t tagmask = fetch_int (proc, base + i_info->tagmask_offs, p_info);
	  mqs_tword_t lsrc    = fetch_int (proc, base + i_info->lsrc_offs, p_info);
	  mqs_tword_t srcmask = fetch_int (proc, base + i_info->srcmask_offs, p_info);
	  mqs_taddr_t ptr     = fetch_pointer (proc, base + i_info->ptr_offs, p_info);
	  
	  /* Fetch the fields from the MPIR_RHANDLE */
	  int is_complete = fetch_int (proc, ptr + i_info->is_complete_offs, p_info);
	  mqs_taddr_t buf     = fetch_pointer (proc, ptr + i_info->buf_offs, p_info);
	  mqs_tword_t len     = fetch_int (proc, ptr + i_info->len_offs, p_info);
	  mqs_tword_t count   = fetch_int (proc, ptr + i_info->count_offs, p_info);

	  /* If we don't have start, then use buf instead... */
	  mqs_taddr_t start;
	  if (i_info->start_offs < 0)
	    start = buf;
	  else
	    start = fetch_pointer (proc, ptr + i_info->start_offs, p_info);

	  /* Hurrah, we should now be able to fill in all the necessary fields in the
	   * result !
	   */
	  res->status = is_complete ? mqs_st_complete : mqs_st_pending; /* We can't discern matched */
	  if (srcmask == 0)
	    {
	      res->desired_local_rank  = -1;
	      res->desired_global_rank = -1;
	    }
	  else
	    {
	      res->desired_local_rank  = lsrc;
	      res->desired_global_rank = translate (comm->group, lsrc);
	      
	    }
	  res->tag_wild       = (tagmask == 0);
	  res->desired_tag    = tag;
	  
	  if (look_for_user_buffer)
	    {
	      res->system_buffer  = FALSE;
	      res->buffer         = buf;
	      res->desired_length = len;
	    }
	  else
	    {
	      res->system_buffer  = TRUE;
	      /* Correct an oddity. If the buffer length is zero then no buffer
	       * is allocated, but the descriptor is left with random data.
	       */
	      if (count == 0)
		start = 0;
	      
	      res->buffer         = start;
	      res->desired_length = count;
	    }

	  if (is_complete)
	    { /* Fill in the actual results, rather than what we were looking for */
	      mqs_tword_t mpi_source  = fetch_int (proc, ptr + i_info->MPI_SOURCE_offs, p_info);
	      mqs_tword_t mpi_tag  = fetch_int (proc, ptr + i_info->MPI_TAG_offs, p_info);

	      res->actual_length     = count;
	      res->actual_tag        = mpi_tag;
	      res->actual_local_rank = mpi_source;
	      res->actual_global_rank= translate (comm->group, mpi_source);
	    }

	  /* Don't forget to step the queue ! */
	  p_info->next_msg = base + i_info->next_offs;
	  return mqs_ok;
	}
      else
	{ /* Try the next one */
	  base = fetch_pointer (proc, base + i_info->next_offs, p_info);
	}
    }
  
  p_info->next_msg = 0;
  return mqs_end_of_list;
}  /* fetch_receive */

/***********************************************************************
 * Handle the send queue, somewhat different.
 */
static int fetch_send (mqs_process *proc, mpich_process_info *p_info,
		       mqs_pending_operation *res)
{
  mqs_image * image        = mqs_get_image (proc);
  mpich_image_info *i_info = (mpich_image_info *)mqs_get_image_info (image);
  communicator_t   *comm   = p_info->current_communicator;
  mqs_taddr_t base         = fetch_pointer (proc, p_info->next_msg, p_info);

  if (!p_info->has_sendq)
    return mqs_no_information;

  /* Say what operation it is. We can only see non blocking send operations
   * in MPICH. Other MPI systems may be able to show more here. 
   */
  strcpy ((char *)res->extra_text[0],"Non-blocking send");
  res->extra_text[1][0] = 0;

  while (base != 0)
    { /* Well, there's a queue, at least ! */
      /* Check if it's one we're interested in ? */
      mqs_taddr_t commp = fetch_pointer (proc, base+i_info->db_comm_offs, p_info);
      mqs_taddr_t next  = base+i_info->db_next_offs;

      if (commp == comm->comm_info.unique_id)
	{ /* Found one */
	  mqs_tword_t target = fetch_int (proc, base+i_info->db_target_offs,      p_info);
	  mqs_tword_t tag    = fetch_int (proc, base+i_info->db_tag_offs,         p_info);
	  mqs_tword_t length = fetch_int (proc, base+i_info->db_byte_length_offs, p_info);
	  mqs_taddr_t data   = fetch_pointer (proc, base+i_info->db_data_offs,    p_info);
	  mqs_taddr_t shandle= fetch_pointer (proc, base+i_info->db_shandle_offs, p_info);
	  mqs_tword_t complete=fetch_int (proc, shandle+i_info->is_complete_offs, p_info);

	  /* Ok, fill in the results */
	  res->status = complete ? mqs_st_complete : mqs_st_pending; /* We can't discern matched */
	  res->actual_local_rank = res->desired_local_rank = target;
	  res->actual_global_rank= res->desired_global_rank= translate (comm->group, target);
	  res->tag_wild   = FALSE;
	  res->actual_tag = res->desired_tag = tag;
	  res->desired_length = res->actual_length = length;
	  res->system_buffer  = FALSE;
	  res->buffer = data;

	  p_info->next_msg = next;
	  return mqs_ok;
	}
      
      base = fetch_pointer (proc, next, p_info);
    }

  p_info->next_msg = 0;
  return mqs_end_of_list;
} /* fetch_send */

/***********************************************************************
 * Fetch the next valid operation. 
 * Since MPICH only maintains a single queue of each type of operation,
 * we have to run over it and filter out the operations which
 * match the active communicator.
 */
int mqs_next_operation (mqs_process *proc, mqs_pending_operation *op)
{
  mpich_process_info *p_info = (mpich_process_info *)mqs_get_process_info (proc);

  switch (p_info->what)
    {
    case mqs_pending_receives:
      return fetch_receive (proc,p_info,op,TRUE);
    case mqs_unexpected_messages:
      return fetch_receive (proc,p_info,op,FALSE);
    case mqs_pending_sends:
      return fetch_send (proc,p_info,op);
    default: return err_bad_request;
    }
} /* mqs_next_operation */

/***********************************************************************
 * Destroy the info.
 */
void mqs_destroy_process_info (mqs_process_info *mp_info)
{
  mpich_process_info *p_info = (mpich_process_info *)mp_info;
  /* Need to handle the communicators and groups too */
  communicator_t *comm = p_info->communicator_list;

  while (comm)
    {
      communicator_t *next = comm->next;

      group_decref (comm->group);		/* Group is no longer referenced from here */
      mqs_free (comm);
      
      comm = next;
    }
  mqs_free (p_info);
} /* mqs_destroy_process_info */

/***********************************************************************
 * Free off the data we associated with an image. Since we malloced it
 * we just free it.
 */
void mqs_destroy_image_info (mqs_image_info *info)
{
  mqs_free (info);
} /* mqs_destroy_image_info */

/***********************************************************************/
static mqs_taddr_t fetch_pointer (mqs_process * proc, mqs_taddr_t addr, mpich_process_info *p_info)
{
  int asize = p_info->sizes.pointer_size;
  char data [8];				/* ASSUME a pointer fits in 8 bytes */
  mqs_taddr_t res = 0;

  if (mqs_ok == mqs_fetch_data (proc, addr, asize, data))
    mqs_target_to_host (proc, data, 
			((char *)&res) + (host_is_big_endian ? sizeof(mqs_taddr_t)-asize : 0), 
			asize);

  return res;
} /* fetch_pointer */

/***********************************************************************/
static mqs_tword_t fetch_int (mqs_process * proc, mqs_taddr_t addr, mpich_process_info *p_info)
{
  int isize = p_info->sizes.int_size;
  char buffer[8];				/* ASSUME an integer fits in 8 bytes */
  mqs_tword_t res = 0;

  if (mqs_ok == mqs_fetch_data (proc, addr, isize, buffer))
    mqs_target_to_host (proc, buffer, 
			((char *)&res) + (host_is_big_endian ? sizeof(mqs_tword_t)-isize : 0), 
			isize);
  
  return res;
} /* fetch_int */

/***********************************************************************/
/* Convert an error code into a printable string */
char * mqs_dll_error_string (int errcode)
{
  switch (errcode)
    {
    case err_silent_failure:
      return "";
    case err_no_current_communicator: 
      return "No current communicator in the communicator iterator";
    case err_bad_request:    
      return "Attempting to setup to iterate over an unknown queue of operations";
    case err_no_store: 
      return "Unable to allocate store";
    case err_failed_qhdr: 
      return "Failed to find type MPID_QHDR";
    case err_unexpected: 
      return "Failed to find field 'unexpected' in MPID_QHDR";
    case err_posted: 
      return "Failed to find field 'posted' in MPID_QHDR";
    case err_failed_queue: 
      return "Failed to find type MPID_QUEUE";
    case err_first: 
      return "Failed to find field 'first' in MPID_QUEUE";
    case err_failed_qel: 
      return "Failed to find type MPID_QEL";
    case err_context_id: 
      return "Failed to find field 'context_id' in MPID_QEL";
    case err_tag: 
      return "Failed to find field 'tag' in MPID_QEL";
    case err_tagmask: 
      return "Failed to find field 'tagmask' in MPID_QEL";
    case err_lsrc: 
      return "Failed to find field 'lsrc' in MPID_QEL";
    case err_srcmask: 
      return "Failed to find field 'srcmask' in MPID_QEL";
    case err_next: 
      return "Failed to find field 'next' in MPID_QEL";
    case err_ptr: 
      return "Failed to find field 'ptr' in MPID_QEL";
    case err_failed_squeue: 
      return "Failed to find type MPIR_SQUEUE";
    case err_sq_head: 
      return "Failed to find field 'sq_head' in MPIR_SQUEUE";
    case err_failed_sqel: 
      return "Failed to find type MPIR_SQEL";
    case err_db_shandle: 
      return "Failed to find field 'db_shandle' in MPIR_SQEL";
    case err_db_comm: 
      return "Failed to find field 'db_comm' in MPIR_SQEL";
    case err_db_target: 
      return "Failed to find field 'db_target' in MPIR_SQEL";
    case err_db_tag: 
      return "Failed to find field 'db_tag' in MPIR_SQEL";
    case err_db_data: 
      return "Failed to find field 'db_data' in MPIR_SQEL";
    case err_db_byte_length: 
      return "Failed to find field 'db_byte_length' in MPIR_SQEL";
    case err_db_next: 
      return "Failed to find field 'db_next' in MPIR_SQEL";
    case err_failed_rhandle: 
      return "Failed to find type MPIR_RHANDLE";
    case err_is_complete: 
      return "Failed to find field 'is_complete' in MPIR_RHANDLE";
    case err_buf: 
      return "Failed to find field 'buf' in MPIR_RHANDLE";
    case err_len: 
      return "Failed to find field 'len' in MPIR_RHANDLE";
    case err_s: 
      return "Failed to find field 's' in MPIR_RHANDLE";
    case err_failed_status: 
      return "Failed to find type MPI_Status";
    case err_count: 
      return "Failed to find field 'count' in MPIR_Status";
    case err_MPI_SOURCE: 
      return "Failed to find field 'MPI_SOURCE' in MPIR_Status";
    case err_MPI_TAG: 
      return "Failed to find field 'MPI_TAG' in MPIR_Status";
    case err_failed_commlist: 
      return "Failed to find type MPIR_Comm_list";
    case err_sequence_number: 
      return "Failed to find field 'sequence_number' in MPIR_Comm_list";
    case err_comm_first: 
      return "Failed to find field 'comm_first' in MPIR_Comm_list";
    case err_failed_communicator: 
      return "Failed to find type MPIR_Communicator";
    case err_np: 
      return "Failed to find field 'np' in MPIR_Communicator";
    case err_lrank_to_grank: 
      return "Failed to find field 'lrank_to_grank' in MPIR_Communicator";
    case err_send_context: 
      return "Failed to find field 'send_context' in MPIR_Communicator";
    case err_recv_context: 
      return "Failed to find field 'recv_context' in MPIR_Communicator";
    case err_comm_next: 
      return "Failed to find field 'comm_next' in MPIR_Communicator";
    case err_comm_name: 
      return "Failed to find field 'comm_name' in MPIR_Communicator";
    case err_all_communicators: 
      return "Failed to find the global symbol MPIR_All_communicators";
    case err_mpid_recvs: 
      return "Failed to find the global symbol MPID_recvs";
    case err_group_corrupt:
      return "Could not read a communicator's group from the process (probably a store corruption)";

    default: return "Unknown error code";
    }
} /* mqs_dll_error_string */