Actual source code: threadcomm.c

petsc-3.4.4 2014-03-13
  1: #include <petsc-private/threadcommimpl.h>      /*I "petscthreadcomm.h" I*/
  2: #include <petscviewer.h>
  3: #if defined(PETSC_HAVE_MALLOC_H)
  4: #include <malloc.h>
  5: #endif

  7: static PetscInt         N_CORES                          = -1;
  8: PetscBool               PetscThreadCommRegisterAllCalled = PETSC_FALSE;
  9: PetscFunctionList       PetscThreadCommList              = NULL;
 10: PetscMPIInt             Petsc_ThreadComm_keyval          = MPI_KEYVAL_INVALID;
 11: PetscThreadCommJobQueue PetscJobQueue                    = NULL;
 12: PetscThreadComm         PETSC_THREAD_COMM_WORLD          = NULL;

 14: /* Logging support */
 15: PetscLogEvent ThreadComm_RunKernel, ThreadComm_Barrier;

 17: static PetscErrorCode PetscThreadCommRunKernel0_Private(PetscThreadComm tcomm,PetscErrorCode (*func)(PetscInt,...));

 21: /*@
 22:   PetscGetNCores - Gets the number of available cores on the system

 24:   Not Collective

 26:   Level: developer

 28:   Notes
 29:   Defaults to 1 if the available core count cannot be found

 31: @*/
 32: PetscErrorCode PetscGetNCores(PetscInt *ncores)
 33: {
 35:   if (N_CORES == -1) {
 36:     N_CORES = 1; /* Default value if number of cores cannot be found out */

 38: #if defined(PETSC_HAVE_SYS_SYSINFO_H) && (PETSC_HAVE_GET_NPROCS) /* Linux */
 39:     N_CORES = get_nprocs();
 40: #elif defined(PETSC_HAVE_SYS_SYSCTL_H) && (PETSC_HAVE_SYSCTLBYNAME) /* MacOS, BSD */
 41:     {
 43:       size_t         len = sizeof(N_CORES);
 44:       sysctlbyname("hw.activecpu",&N_CORES,&len,NULL,0); /* osx preferes activecpu over ncpu */
 45:       if (ierr) { /* freebsd check ncpu */
 46:         sysctlbyname("hw.ncpu",&N_CORES,&len,NULL,0);
 47:         /* continue even if there is an error */
 48:       }
 49:     }
 50: #elif defined(PETSC_HAVE_WINDOWS_H)   /* Windows */
 51:     {
 52:       SYSTEM_INFO sysinfo;
 53:       GetSystemInfo(&sysinfo);
 54:       N_CORES = sysinfo.dwNumberOfProcessors;
 55:     }
 56: #endif
 57:   }
 58:   if (ncores) *ncores = N_CORES;
 59:   return(0);
 60: }

 62: PetscErrorCode PetscThreadCommWorldInitialize();
 65: /*
 66:   PetscGetThreadCommWorld - Gets the global thread communicator.
 67:                             Creates it if it does not exist already.

 69:   Not Collective

 71:   Output Parameters:
 72:   tcommp - pointer to the global thread communicator

 74:   Level: Intermediate
 75: */
 76: PetscErrorCode PetscGetThreadCommWorld(PetscThreadComm *tcommp)
 77: {

 81:   if (!PETSC_THREAD_COMM_WORLD) {
 82:     PetscThreadCommWorldInitialize();
 83:   }
 84:   *tcommp = PETSC_THREAD_COMM_WORLD;
 85:   return(0);
 86: }

 90: /*@C
 91:   PetscCommGetThreadComm - Gets the thread communicator
 92:                            associated with the MPI communicator

 94:   Not Collective

 96:   Input Parameters:
 97: . comm - the MPI communicator

 99:   Output Parameters:
100: . tcommp - pointer to the thread communicator

102:   Notes: If no thread communicator is on the MPI_Comm then the global thread communicator
103:          is returned.
104:   Level: Intermediate

106: .seealso: PetscThreadCommCreate(), PetscThreadCommDestroy()
107: @*/
108: PetscErrorCode PetscCommGetThreadComm(MPI_Comm comm,PetscThreadComm *tcommp)
109: {
111:   PetscMPIInt    flg;
112:   void           *ptr;

115:   MPI_Attr_get(comm,Petsc_ThreadComm_keyval,(PetscThreadComm*)&ptr,&flg);
116:   if (!flg) {
117:     PetscGetThreadCommWorld(tcommp);
118:   } else *tcommp      = (PetscThreadComm)ptr;
119:   return(0);
120: }

124: /*
125:    PetscThreadCommCreate - Allocates a thread communicator object

127:    Not Collective

129:    Output Parameters:
130: .  tcomm - pointer to the thread communicator object

132:    Level: developer

134: .seealso: PetscThreadCommDestroy()
135: */
136: PetscErrorCode PetscThreadCommCreate(PetscThreadComm *tcomm)
137: {
138:   PetscErrorCode  ierr;
139:   PetscThreadComm tcommout;


144:   *tcomm = NULL;

146:   PetscNew(struct _p_PetscThreadComm,&tcommout);
147:   tcommout->refct        = 0;
148:   tcommout->nworkThreads =  -1;
149:   tcommout->affinities   = NULL;
150:   PetscNew(struct _PetscThreadCommOps,&tcommout->ops);
151:   tcommout->leader       = 0;
152:   *tcomm                 = tcommout;

154:   return(0);
155: }

157: #if defined(PETSC_USE_DEBUG)

159: static PetscErrorCode PetscThreadCommStackCreate_kernel(PetscInt trank)
160: {
161:   if (trank && !PetscStackActive()) {
162:     PetscStack *petscstack_in;
163:     petscstack_in = (PetscStack*)malloc(sizeof(PetscStack));
164:     petscstack_in->currentsize = 0;
165:     PetscThreadLocalSetValue((PetscThreadKey*)&petscstack,petscstack_in);
166:   }
167:   return 0;
168: }

170: /* Creates stack frames for threads other than the main thread */
173: PetscErrorCode  PetscThreadCommStackCreate(void)
174: {
176:   PetscThreadCommRunKernel0(PETSC_COMM_SELF,(PetscThreadKernel)PetscThreadCommStackCreate_kernel);
177:   PetscThreadCommBarrier(PETSC_COMM_SELF);
178:   return 0;
179: }

181: static PetscErrorCode PetscThreadCommStackDestroy_kernel(PetscInt trank)
182: {
183:   if (trank && PetscStackActive()) {
184:     PetscStack *petscstack_in;
185:     petscstack_in = (PetscStack*)PetscThreadLocalGetValue(petscstack);
186:     free(petscstack_in);
187:     PetscThreadLocalSetValue((PetscThreadKey*)&petscstack,(PetscStack*)0);
188:   }
189:   return 0;
190: }

194: /* Destroy stack frames for threads other than main thread
195:  *
196:  * The keyval may have been destroyed by the time this function is called, thus we must call
197:  * PetscThreadCommRunKernel0_Private so that we never reference an MPI_Comm.
198:  */
199: PetscErrorCode  PetscThreadCommStackDestroy(void)
200: {
203:   PetscThreadCommRunKernel0_Private(PETSC_THREAD_COMM_WORLD,(PetscThreadKernel)PetscThreadCommStackDestroy_kernel);
204:   PETSC_THREAD_COMM_WORLD = NULL;
205:   return(0);
206:   return 0;
207: }
208: #else
211: PetscErrorCode  PetscThreadCommStackCreate(void)
212: {
214:   return(0);
215: }

219: PetscErrorCode  PetscThreadCommStackDestroy(void)
220: {
222:   PETSC_THREAD_COMM_WORLD = NULL;
223:   return(0);
224: }

226: #endif

230: /*
231:   PetscThreadCommDestroy - Frees a thread communicator object

233:   Not Collective

235:   Input Parameters:
236: . tcomm - the PetscThreadComm object

238:   Level: developer

240: .seealso: PetscThreadCommCreate()
241: */
242: PetscErrorCode PetscThreadCommDestroy(PetscThreadComm *tcomm)
243: {

247:   if (!*tcomm) return(0);
248:   if (!--(*tcomm)->refct) {
249:     PetscThreadCommStackDestroy();
250:     /* Destroy the implementation specific data struct */
251:     if ((*tcomm)->ops->destroy) (*(*tcomm)->ops->destroy)(*tcomm);

253:     PetscFree((*tcomm)->affinities);
254:     PetscFree((*tcomm)->ops);
255:     PetscFree(PetscJobQueue->jobs[0].job_status);
256:     PetscFree(PetscJobQueue->jobs);
257:     PetscFree(PetscJobQueue);
258:     PetscThreadCommReductionDestroy((*tcomm)->red);
259:     PetscFree((*tcomm));
260:   }
261:   *tcomm = NULL;
262:   return(0);
263: }

267: /*@C
268:    PetscThreadCommView - view a thread communicator

270:    Collective on comm

272:    Input Parameters:
273: +  comm - MPI communicator
274: -  viewer - viewer to display, for example PETSC_VIEWER_STDOUT_WORLD

276:    Level: developer

278: .seealso: PetscThreadCommCreate()
279: @*/
280: PetscErrorCode PetscThreadCommView(MPI_Comm comm,PetscViewer viewer)
281: {
282:   PetscErrorCode  ierr;
283:   PetscBool       iascii;
284:   PetscThreadComm tcomm=0;

287:   PetscCommGetThreadComm(comm,&tcomm);
288:   if (!viewer) {PetscViewerASCIIGetStdout(comm,&viewer);}
289:   PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);
290:   if (iascii) {
291:     PetscViewerASCIIPrintf(viewer,"Thread Communicator\n");
292:     PetscViewerASCIIPushTab(viewer);
293:     PetscViewerASCIIPrintf(viewer,"Number of threads = %D\n",tcomm->nworkThreads);
294:     PetscViewerASCIIPrintf(viewer,"Type = %s\n",tcomm->type);
295:     PetscViewerASCIIPopTab(viewer);
296:     if (tcomm->ops->view) {
297:       PetscViewerASCIIPushTab(viewer);
298:       (*tcomm->ops->view)(tcomm,viewer);
299:       PetscViewerASCIIPopTab(viewer);
300:     }
301:   }
302:   return(0);
303: }

307: /*
308:    PetscThreadCommSetNThreads - Set the thread count for the thread communicator

310:    Not collective

312:    Input Parameters:
313: +  tcomm - the thread communicator
314: -  nthreads - Number of threads

316:    Options Database keys:
317:    -threadcomm_nthreads <nthreads> Number of threads to use

319:    Level: developer

321:    Notes:
322:    Defaults to using 1 thread.

324:    Use nthreads = PETSC_DECIDE or -threadcomm_nthreads PETSC_DECIDE for PETSc to decide the number of threads.


327: .seealso: PetscThreadCommGetNThreads()
328: */
329: PetscErrorCode PetscThreadCommSetNThreads(PetscThreadComm tcomm,PetscInt nthreads)
330: {
332:   PetscBool      flg;
333:   PetscInt       nthr;

336:   if (nthreads == PETSC_DECIDE) {
337:     tcomm->nworkThreads = 1;
338:     PetscOptionsBegin(PETSC_COMM_WORLD,NULL,"Thread comm - setting number of threads",NULL);
339:     PetscOptionsInt("-threadcomm_nthreads","number of threads to use in the thread communicator","PetscThreadCommSetNThreads",1,&nthr,&flg);
340:     PetscOptionsEnd();
341:     if (flg) {
342:       if (nthr == PETSC_DECIDE) tcomm->nworkThreads = N_CORES;
343:       else tcomm->nworkThreads = nthr;
344:     }
345:   } else tcomm->nworkThreads = nthreads;
346:   return(0);
347: }

351: /*@C
352:    PetscThreadCommGetNThreads - Gets the thread count from the thread communicator
353:                                 associated with the MPI communicator

355:    Not collective

357:    Input Parameters:
358: .  comm - the MPI communicator

360:    Output Parameters:
361: .  nthreads - number of threads

363:    Level: developer

365: .seealso: PetscThreadCommSetNThreads()
366: @*/
367: PetscErrorCode PetscThreadCommGetNThreads(MPI_Comm comm,PetscInt *nthreads)
368: {
369:   PetscErrorCode  ierr;
370:   PetscThreadComm tcomm=0;

373:   PetscCommGetThreadComm(comm,&tcomm);
374:   *nthreads = tcomm->nworkThreads;
375:   return(0);
376: }

380: /*
381:    PetscThreadCommSetAffinities - Sets the core affinity for threads
382:                                   (which threads run on which cores)

384:    Not collective

386:    Input Parameters:
387: +  tcomm - the thread communicator
388: -  affinities - array of core affinity for threads

390:    Options Database keys:
391: .  -threadcomm_affinities <list of thread affinities>

393:    Level: developer

395:    Notes:
396:    Use affinities = NULL for PETSc to decide the affinities.
397:    If PETSc decides affinities, then each thread has affinity to
398:    a unique core with the main thread on Core 0, thread0 on core 1,
399:    and so on. If the thread count is more the number of available
400:    cores then multiple threads share a core.

402:    The first value is the affinity for the main thread

404:    The affinity list can be passed as
405:    a comma seperated list:                                 0,1,2,3,4,5,6,7
406:    a range (start-end+1):                                  0-8
407:    a range with given increment (start-end+1:inc):         0-7:2
408:    a combination of values and ranges seperated by commas: 0,1-8,8-15:2

410:    There must be no intervening spaces between the values.

412: .seealso: PetscThreadCommGetAffinities(), PetscThreadCommSetNThreads()
413: */
414: PetscErrorCode PetscThreadCommSetAffinities(PetscThreadComm tcomm,const PetscInt affinities[])
415: {
417:   PetscBool      flg;
418:   PetscInt       nmax=tcomm->nworkThreads;

421:   /* Free if affinities set already */
422:   PetscFree(tcomm->affinities);
423:   PetscMalloc(tcomm->nworkThreads*sizeof(PetscInt),&tcomm->affinities);

425:   if (!affinities) {
426:     /* Check if option is present in the options database */
427:     PetscOptionsBegin(PETSC_COMM_WORLD,NULL,"Thread comm - setting thread affinities",NULL);
428:     PetscOptionsIntArray("-threadcomm_affinities","Set core affinities of threads","PetscThreadCommSetAffinities",tcomm->affinities,&nmax,&flg);
429:     PetscOptionsEnd();
430:     if (flg) {
431:       if (nmax != tcomm->nworkThreads) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Must set affinities for all threads, Threads = %D, Core affinities set = %D",tcomm->nworkThreads,nmax);
432:     } else {
433:       /* PETSc default affinities */
434:       PetscInt i;
435:       for (i=0; i<tcomm->nworkThreads; i++) tcomm->affinities[i] = i%N_CORES;
436:     }
437:   } else {
438:     PetscMemcpy(tcomm->affinities,affinities,tcomm->nworkThreads*sizeof(PetscInt));
439:   }
440:   return(0);
441: }

445: /*@C
446:    PetscThreadCommGetAffinities - Returns the core affinities set for the
447:                                   thread communicator associated with the MPI_Comm

449:     Not collective

451:     Input Parameters:
452: .   comm - MPI communicator

454:     Output Parameters:
455: .   affinities - thread affinities

457:     Level: developer

459:     Notes:
460:     The user must allocate space (nthreads PetscInts) for the
461:     affinities. Must call PetscThreadCommSetAffinities before.

463: */
464: PetscErrorCode PetscThreadCommGetAffinities(MPI_Comm comm,PetscInt affinities[])
465: {
466:   PetscErrorCode  ierr;
467:   PetscThreadComm tcomm=0;

470:   PetscCommGetThreadComm(comm,&tcomm);
472:   PetscMemcpy(affinities,tcomm->affinities,tcomm->nworkThreads*sizeof(PetscInt));
473:   return(0);
474: }

478: /*
479:    PetscThreadCommSetType - Sets the threading model for the thread communicator

481:    Logically collective

483:    Input Parameters:
484: +  tcomm - the thread communicator
485: -  type  - the type of thread model needed


488:    Options Database keys:
489:    -threadcomm_type <type>

491:    Available types
492:    See "petsc/include/petscthreadcomm.h" for available types

494: */
495: PetscErrorCode PetscThreadCommSetType(PetscThreadComm tcomm,PetscThreadCommType type)
496: {
497:   PetscErrorCode ierr,(*r)(PetscThreadComm);
498:   char           ttype[256];
499:   PetscBool      flg;

503:   if (!PetscThreadCommRegisterAllCalled) { PetscThreadCommRegisterAll();}

505:   PetscOptionsBegin(PETSC_COMM_WORLD,NULL,"Thread comm - setting threading model",NULL);
506:   PetscOptionsList("-threadcomm_type","Thread communicator model","PetscThreadCommSetType",PetscThreadCommList,type,ttype,256,&flg);
507:   PetscOptionsEnd();
508:   if (!flg) {
509:     PetscStrcpy(ttype,type);
510:   }
511:   PetscFunctionListFind(PetscThreadCommList,ttype,&r);
512:   if (!r) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_UNKNOWN_TYPE,"Unable to find requested PetscThreadComm type %s",ttype);
513:   (*r)(tcomm);
514:   PetscStrcmp(NOTHREAD,tcomm->type,&tcomm->isnothread);
515:   return(0);
516: }

520: /*  PetscThreadCommBarrier - Apply a barrier on the thread communicator
521:                              associated with the MPI communicator

523:     Not collective

525:     Input Parameters:
526: .   comm - the MPI communicator

528:     Level: developer

530:     Notes:
531:     This routine provides an interface to put an explicit barrier between
532:     successive kernel calls to ensure that the first kernel is executed
533:     by all the threads before calling the next one.

535:     Called by the main thread only.

537:     May not be applicable to all types.
538: */
539: PetscErrorCode PetscThreadCommBarrier(MPI_Comm comm)
540: {
541:   PetscErrorCode  ierr;
542:   PetscThreadComm tcomm=0;

545:   PetscLogEventBegin(ThreadComm_Barrier,0,0,0,0);
546:   PetscCommGetThreadComm(comm,&tcomm);
547:   if (tcomm->ops->barrier) {
548:     (*tcomm->ops->barrier)(tcomm);
549:   }
550:   PetscLogEventEnd(ThreadComm_Barrier,0,0,0,0);
551:   return(0);
552: }

556: /*@C
557:   PetscThreadCommRegister -

559:   Level: advanced
560: @*/
561: PetscErrorCode  PetscThreadCommRegister(const char sname[],PetscErrorCode (*function)(PetscThreadComm))
562: {

566:   PetscFunctionListAdd(&PetscThreadCommList,sname,function);
567:   return(0);
568: }

572: /*@C
573:    PetscThreadCommGetScalars - Gets pointers to locations for storing three PetscScalars that may be passed
574:                                to PetscThreadCommRunKernel to ensure that the scalar values remain valid
575:                                even after the main thread exits the calling function.

577:    Input Parameters:
578: +  comm - the MPI communicator having the thread communicator
579: .  val1 - pointer to store the first scalar value
580: .  val2 - pointer to store the second scalar value
581: -  val3 - pointer to store the third scalar value

583:    Level: developer

585:    Notes:
586:    This is a utility function to ensure that any scalars passed to PetscThreadCommRunKernel remain
587:    valid even after the main thread exits the calling function. If any scalars need to passed to
588:    PetscThreadCommRunKernel then these should be first stored in the locations provided by PetscThreadCommGetScalars()

590:    Pass NULL if any pointers are not needed.

592:    Called by the main thread only, not from within kernels

594:    Typical usage:

596:    PetscScalar *valptr;
597:    PetscThreadCommGetScalars(comm,&valptr,NULL,NULL);
598:    *valptr = alpha;   (alpha is the scalar you wish to pass in PetscThreadCommRunKernel)

600:    PetscThreadCommRunKernel(comm,(PetscThreadKernel)kernel_func,3,x,y,valptr);

602: .seealso: PetscThreadCommRunKernel()
603: @*/
604: PetscErrorCode PetscThreadCommGetScalars(MPI_Comm comm,PetscScalar **val1, PetscScalar **val2, PetscScalar **val3)
605: {
606:   PetscErrorCode        ierr;
607:   PetscThreadComm       tcomm;
608:   PetscThreadCommJobCtx job;
609:   PetscInt              job_num;

612:   PetscCommGetThreadComm(comm,&tcomm);
613:   job_num = PetscJobQueue->ctr%tcomm->nkernels;
614:   job     = &PetscJobQueue->jobs[job_num];
615:   if (val1) *val1 = &job->scalars[0];
616:   if (val2) *val2 = &job->scalars[1];
617:   if (val3) *val3 = &job->scalars[2];
618:   return(0);
619: }

623: /*@C
624:    PetscThreadCommGetInts - Gets pointers to locations for storing three PetscInts that may be passed
625:                                to PetscThreadCommRunKernel to ensure that the scalar values remain valid
626:                                even after the main thread exits the calling function.

628:    Input Parameters:
629: +  comm - the MPI communicator having the thread communicator
630: .  val1 - pointer to store the first integer value
631: .  val2 - pointer to store the second integer value
632: -  val3 - pointer to store the third integer value

634:    Level: developer

636:    Notes:
637:    This is a utility function to ensure that any scalars passed to PetscThreadCommRunKernel remain
638:    valid even after the main thread exits the calling function. If any scalars need to passed to
639:    PetscThreadCommRunKernel then these should be first stored in the locations provided by PetscThreadCommGetInts()

641:    Pass NULL if any pointers are not needed.

643:    Called by the main thread only, not from within kernels

645:    Typical usage:

647:    PetscScalar *valptr;
648:    PetscThreadCommGetScalars(comm,&valptr,NULL,NULL);
649:    *valptr = alpha;   (alpha is the scalar you wish to pass in PetscThreadCommRunKernel)

651:    PetscThreadCommRunKernel(comm,(PetscThreadKernel)kernel_func,3,x,y,valptr);

653: .seealso: PetscThreadCommRunKernel()
654: @*/
655: PetscErrorCode PetscThreadCommGetInts(MPI_Comm comm,PetscInt **val1, PetscInt **val2, PetscInt **val3)
656: {
657:   PetscErrorCode        ierr;
658:   PetscThreadComm       tcomm;
659:   PetscThreadCommJobCtx job;
660:   PetscInt              job_num;

663:   PetscCommGetThreadComm(comm,&tcomm);
664:   job_num = PetscJobQueue->ctr%tcomm->nkernels;
665:   job     = &PetscJobQueue->jobs[job_num];
666:   if (val1) *val1 = &job->ints[0];
667:   if (val2) *val2 = &job->ints[1];
668:   if (val3) *val3 = &job->ints[2];
669:   return(0);
670: }

674: /*@C
675:    PetscThreadCommRunKernel - Runs the kernel using the thread communicator
676:                               associated with the MPI communicator

678:    Not Collective

680:    Input Parameters:
681: +  comm  - the MPI communicator
682: .  func  - the kernel (needs to be cast to PetscThreadKernel)
683: .  nargs - Number of input arguments for the kernel
684: -  ...   - variable list of input arguments

686:    Level: developer

688:    Notes:
689:    All input arguments to the kernel must be passed by reference, Petsc objects are
690:    inherrently passed by reference so you don't need to additionally & them.

692:    Example usage - PetscThreadCommRunKernel(comm,(PetscThreadKernel)kernel_func,3,x,y,z);
693:    with kernel_func declared as
694:    PetscErrorCode kernel_func(PetscInt thread_id,PetscInt* x, PetscScalar* y, PetscReal* z)

696:    The first input argument of kernel_func, thread_id, is the thread rank. This is passed implicitly
697:    by PETSc.

699: .seealso: PetscThreadCommCreate(), PetscThreadCommGNThreads()
700: @*/
701: PetscErrorCode PetscThreadCommRunKernel(MPI_Comm comm,PetscErrorCode (*func)(PetscInt,...),PetscInt nargs,...)
702: {
703:   PetscErrorCode        ierr;
704:   va_list               argptr;
705:   PetscInt              i;
706:   PetscThreadComm       tcomm=0;
707:   PetscThreadCommJobCtx job;

710:   if (nargs > PETSC_KERNEL_NARGS_MAX) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Requested %D input arguments for kernel, max. limit %D",nargs,PETSC_KERNEL_NARGS_MAX);
711:   PetscLogEventBegin(ThreadComm_RunKernel,0,0,0,0);
712:   PetscCommGetThreadComm(comm,&tcomm);
713:   job  = &PetscJobQueue->jobs[PetscJobQueue->ctr]; /* Get the job context from the queue to launch this job */
714:   if (job->job_status[0] != THREAD_JOB_NONE) {
715:     for (i=0; i<tcomm->nworkThreads; i++) {
716:       while (PetscReadOnce(int,job->job_status[i]) != THREAD_JOB_COMPLETED) ;
717:     }
718:   }

720:   job->tcomm          = tcomm;
721:   job->tcomm->job_ctr = PetscJobQueue->ctr;
722:   job->nargs          = nargs;
723:   job->pfunc          = (PetscThreadKernel)func;
724:   va_start(argptr,nargs);
725:   for (i=0; i < nargs; i++) job->args[i] = va_arg(argptr,void*);
726:   va_end(argptr);
727:   for (i=0; i<tcomm->nworkThreads; i++) job->job_status[i] = THREAD_JOB_POSTED;

729:   PetscJobQueue->ctr = (PetscJobQueue->ctr+1)%tcomm->nkernels; /* Increment the queue ctr to point to the next available slot */
730:   PetscJobQueue->kernel_ctr++;
731:   if (tcomm->isnothread) {
732:     PetscRunKernel(0,job->nargs,job);
733:     job->job_status[0] = THREAD_JOB_COMPLETED;
734:   } else {
735:     (*tcomm->ops->runkernel)(tcomm,job);
736:   }
737:   PetscLogEventEnd(ThreadComm_RunKernel,0,0,0,0);
738:   return(0);
739: }

743: /* The zero-argument kernel needs to be callable with an unwrapped PetscThreadComm after Petsc_ThreadComm_keyval has been freed. */
744: static PetscErrorCode PetscThreadCommRunKernel0_Private(PetscThreadComm tcomm,PetscErrorCode (*func)(PetscInt,...))
745: {
746:   PetscErrorCode        ierr;
747:   PetscInt              i;
748:   PetscThreadCommJobCtx job;

751:   if (tcomm->isnothread) {
752:     (*func)(0);
753:     return(0);
754:   }

756:   if (!PetscJobQueue) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Trying to run kernel with no job queue");
757:   job = &PetscJobQueue->jobs[PetscJobQueue->ctr]; /* Get the job context from the queue to launch this job */
758:   if (job->job_status[0] != THREAD_JOB_NONE) {
759:     for (i=0; i<tcomm->nworkThreads; i++) {
760:       while (PetscReadOnce(int,job->job_status[i]) != THREAD_JOB_COMPLETED) ;
761:     }
762:   }

764:   job->tcomm          = tcomm;
765:   job->tcomm->job_ctr = PetscJobQueue->ctr;
766:   job->nargs          = 1;
767:   job->pfunc          = (PetscThreadKernel)func;

769:   for (i=0; i<tcomm->nworkThreads; i++) job->job_status[i] = THREAD_JOB_POSTED;

771:   PetscJobQueue->ctr = (PetscJobQueue->ctr+1)%tcomm->nkernels; /* Increment the queue ctr to point to the next available slot */
772:   PetscJobQueue->kernel_ctr++;

774:   (*tcomm->ops->runkernel)(tcomm,job);
775:   return(0);
776: }

780: /*@C
781:    PetscThreadCommRunKernel0 - PetscThreadCommRunKernel version for kernels with no
782:                                input arguments

784:    Input Parameters:
785: +  comm  - the MPI communicator
786: -  func  - the kernel (needs to be cast to PetscThreadKernel)

788:    Level: developer

790:    Notes:
791:    All input arguments to the kernel must be passed by reference, Petsc objects are
792:    inherrently passed by reference so you don't need to additionally & them.

794:    Example usage - PetscThreadCommRunKernel0(comm,(PetscThreadKernel)kernel_func);
795:    with kernel_func declared as
796:    PetscErrorCode kernel_func(PetscInt thread_id)

798:    The first input argument of kernel_func, thread_id, is the thread rank. This is passed implicitly
799:    by PETSc.

801: .seealso: PetscThreadCommCreate(), PetscThreadCommGNThreads()
802: @*/
803: PetscErrorCode PetscThreadCommRunKernel0(MPI_Comm comm,PetscErrorCode (*func)(PetscInt,...))
804: {
805:   PetscErrorCode        ierr;
806:   PetscThreadComm       tcomm=0;

809:   PetscLogEventBegin(ThreadComm_RunKernel,0,0,0,0);
810:   PetscCommGetThreadComm(comm,&tcomm);
811:   PetscThreadCommRunKernel0_Private(tcomm,func);
812:   PetscLogEventEnd(ThreadComm_RunKernel,0,0,0,0);
813:   return(0);
814: }

818: /*@C
819:    PetscThreadCommRunKernel1 - PetscThreadCommRunKernel version for kernels with 1
820:                                input argument

822:    Input Parameters:
823: +  comm  - the MPI communicator
824: .  func  - the kernel (needs to be cast to PetscThreadKernel)
825: -  in1   - input argument for the kernel

827:    Level: developer

829:    Notes:
830:    All input arguments to the kernel must be passed by reference, Petsc objects are
831:    inherrently passed by reference so you don't need to additionally & them.

833:    Example usage - PetscThreadCommRunKernel1(comm,(PetscThreadKernel)kernel_func,x);
834:    with kernel_func declared as
835:    PetscErrorCode kernel_func(PetscInt thread_id,PetscInt* x)

837:    The first input argument of kernel_func, thread_id, is the thread rank. This is passed implicitly
838:    by PETSc.

840: .seealso: PetscThreadCommCreate(), PetscThreadCommGNThreads()
841: @*/
842: PetscErrorCode PetscThreadCommRunKernel1(MPI_Comm comm,PetscErrorCode (*func)(PetscInt,...),void *in1)
843: {
844:   PetscErrorCode        ierr;
845:   PetscInt              i;
846:   PetscThreadComm       tcomm=0;
847:   PetscThreadCommJobCtx job;

850:   PetscLogEventBegin(ThreadComm_RunKernel,0,0,0,0);
851:   PetscCommGetThreadComm(comm,&tcomm);
852:   if (tcomm->isnothread) {
853:     (*func)(0,in1);
854:     PetscLogEventEnd(ThreadComm_RunKernel,0,0,0,0);
855:     return(0);
856:   }

858:   if (!PetscJobQueue) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Trying to run kernel with no job queue");
859:   job = &PetscJobQueue->jobs[PetscJobQueue->ctr]; /* Get the job context from the queue to launch this job */
860:   if (job->job_status[0] != THREAD_JOB_NONE) {
861:     for (i=0; i<tcomm->nworkThreads; i++) {
862:       while (PetscReadOnce(int,job->job_status[i]) != THREAD_JOB_COMPLETED) ;
863:     }
864:   }

866:   job->tcomm          = tcomm;
867:   job->tcomm->job_ctr = PetscJobQueue->ctr;
868:   job->nargs          = 1;
869:   job->pfunc          = (PetscThreadKernel)func;
870:   job->args[0]        = in1;

872:   for (i=0; i<tcomm->nworkThreads; i++) job->job_status[i] = THREAD_JOB_POSTED;

874:   PetscJobQueue->ctr = (PetscJobQueue->ctr+1)%tcomm->nkernels; /* Increment the queue ctr to point to the next available slot */
875:   PetscJobQueue->kernel_ctr++;

877:   (*tcomm->ops->runkernel)(tcomm,job);

879:   PetscLogEventEnd(ThreadComm_RunKernel,0,0,0,0);
880:   return(0);
881: }

885: /*@C
886:    PetscThreadCommRunKernel2 - PetscThreadCommRunKernel version for kernels with 2
887:                                input arguments

889:    Input Parameters:
890: +  comm  - the MPI communicator
891: .  func  - the kernel (needs to be cast to PetscThreadKernel)
892: .  in1   - 1st input argument for the kernel
893: -  in2   - 2nd input argument for the kernel

895:    Level: developer

897:    Notes:
898:    All input arguments to the kernel must be passed by reference, Petsc objects are
899:    inherrently passed by reference so you don't need to additionally & them.

901:    Example usage - PetscThreadCommRunKernel1(comm,(PetscThreadKernel)kernel_func,x);
902:    with kernel_func declared as
903:    PetscErrorCode kernel_func(PetscInt thread_id,PetscInt *x,PetscInt *y)

905:    The first input argument of kernel_func, thread_id, is the thread rank. This is passed implicitly
906:    by PETSc.

908: .seealso: PetscThreadCommCreate(), PetscThreadCommGNThreads()
909: @*/
910: PetscErrorCode PetscThreadCommRunKernel2(MPI_Comm comm,PetscErrorCode (*func)(PetscInt,...),void *in1,void *in2)
911: {
912:   PetscErrorCode        ierr;
913:   PetscInt              i;
914:   PetscThreadComm       tcomm=0;
915:   PetscThreadCommJobCtx job;

918:   PetscLogEventBegin(ThreadComm_RunKernel,0,0,0,0);
919:   PetscCommGetThreadComm(comm,&tcomm);
920:   if (tcomm->isnothread) {
921:     (*func)(0,in1,in2);
922:     PetscLogEventEnd(ThreadComm_RunKernel,0,0,0,0);
923:     return(0);
924:   }

926:   if (!PetscJobQueue) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Trying to run kernel with no job queue");
927:   job = &PetscJobQueue->jobs[PetscJobQueue->ctr]; /* Get the job context from the queue to launch this job */
928:   if (job->job_status[0] != THREAD_JOB_NONE) {
929:     for (i=0; i<tcomm->nworkThreads; i++) {
930:       while (PetscReadOnce(int,job->job_status[i]) != THREAD_JOB_COMPLETED) ;
931:     }
932:   }

934:   job->tcomm          = tcomm;
935:   job->tcomm->job_ctr = PetscJobQueue->ctr;
936:   job->nargs          = 2;
937:   job->pfunc          = (PetscThreadKernel)func;
938:   job->args[0]        = in1;
939:   job->args[1]        = in2;

941:   for (i=0; i<tcomm->nworkThreads; i++) job->job_status[i] = THREAD_JOB_POSTED;

943:   PetscJobQueue->ctr = (PetscJobQueue->ctr+1)%tcomm->nkernels; /* Increment the queue ctr to point to the next available slot */
944:   PetscJobQueue->kernel_ctr++;

946:   (*tcomm->ops->runkernel)(tcomm,job);

948:   PetscLogEventEnd(ThreadComm_RunKernel,0,0,0,0);
949:   return(0);
950: }

954: /*@C
955:    PetscThreadCommRunKernel3 - PetscThreadCommRunKernel version for kernels with 3
956:                                input argument

958:    Input Parameters:
959: +  comm  - the MPI communicator
960: .  func  - the kernel (needs to be cast to PetscThreadKernel)
961: .  in1   - first input argument for the kernel
962: .  in2   - second input argument for the kernel
963: -  in3   - third input argument for the kernel

965:    Level: developer

967:    Notes:
968:    All input arguments to the kernel must be passed by reference, Petsc objects are
969:    inherrently passed by reference so you don't need to additionally & them.

971:    Example usage - PetscThreadCommRunKernel1(comm,(PetscThreadKernel)kernel_func,x);
972:    with kernel_func declared as
973:    PetscErrorCode kernel_func(PetscInt thread_id,PetscInt* x)

975:    The first input argument of kernel_func, thread_id, is the thread rank. This is passed implicitly
976:    by PETSc.

978: .seealso: PetscThreadCommCreate(), PetscThreadCommGNThreads()
979: @*/
980: PetscErrorCode PetscThreadCommRunKernel3(MPI_Comm comm,PetscErrorCode (*func)(PetscInt,...),void *in1,void *in2,void *in3)
981: {
982:   PetscErrorCode        ierr;
983:   PetscInt              i;
984:   PetscThreadComm       tcomm=0;
985:   PetscThreadCommJobCtx job;

988:   PetscLogEventBegin(ThreadComm_RunKernel,0,0,0,0);
989:   PetscCommGetThreadComm(comm,&tcomm);
990:   if (tcomm->isnothread) {
991:     (*func)(0,in1,in2,in3);
992:     PetscLogEventEnd(ThreadComm_RunKernel,0,0,0,0);
993:     return(0);
994:   }

996:   if (!PetscJobQueue) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Trying to run kernel with no job queue");
997:   job = &PetscJobQueue->jobs[PetscJobQueue->ctr]; /* Get the job context from the queue to launch this job */
998:   if (job->job_status[0] != THREAD_JOB_NONE) {
999:     for (i=0; i<tcomm->nworkThreads; i++) {
1000:       while (PetscReadOnce(int,job->job_status[i]) != THREAD_JOB_COMPLETED) ;
1001:     }
1002:   }

1004:   job->tcomm          = tcomm;
1005:   job->tcomm->job_ctr = PetscJobQueue->ctr;
1006:   job->nargs          = 3;
1007:   job->pfunc          = (PetscThreadKernel)func;
1008:   job->args[0]        = in1;
1009:   job->args[1]        = in2;
1010:   job->args[2]        = in3;

1012:   for (i=0; i<tcomm->nworkThreads; i++) job->job_status[i] = THREAD_JOB_POSTED;

1014:   PetscJobQueue->ctr = (PetscJobQueue->ctr+1)%tcomm->nkernels; /* Increment the queue ctr to point to the next available slot */
1015:   PetscJobQueue->kernel_ctr++;

1017:   (*tcomm->ops->runkernel)(tcomm,job);

1019:   PetscLogEventEnd(ThreadComm_RunKernel,0,0,0,0);
1020:   return(0);
1021: }

1025: /*@C
1026:    PetscThreadCommRunKernel4 - PetscThreadCommRunKernel version for kernels with 4
1027:                                input argument

1029:    Input Parameters:
1030: +  comm  - the MPI communicator
1031: .  func  - the kernel (needs to be cast to PetscThreadKernel)
1032: .  in1   - first input argument for the kernel
1033: .  in2   - second input argument for the kernel
1034: .  in3   - third input argument for the kernel
1035: -  in4   - fourth input argument for the kernel

1037:    Level: developer

1039:    Notes:
1040:    All input arguments to the kernel must be passed by reference, Petsc objects are
1041:    inherrently passed by reference so you don't need to additionally & them.

1043:    Example usage - PetscThreadCommRunKernel1(comm,(PetscThreadKernel)kernel_func,x);
1044:    with kernel_func declared as
1045:    PetscErrorCode kernel_func(PetscInt thread_id,PetscInt* x)

1047:    The first input argument of kernel_func, thread_id, is the thread rank. This is passed implicitly
1048:    by PETSc.

1050: .seealso: PetscThreadCommCreate(), PetscThreadCommGNThreads()
1051: @*/
1052: PetscErrorCode PetscThreadCommRunKernel4(MPI_Comm comm,PetscErrorCode (*func)(PetscInt,...),void *in1,void *in2,void *in3,void *in4)
1053: {
1054:   PetscErrorCode        ierr;
1055:   PetscInt              i;
1056:   PetscThreadComm       tcomm=0;
1057:   PetscThreadCommJobCtx job;

1060:   PetscLogEventBegin(ThreadComm_RunKernel,0,0,0,0);
1061:   PetscCommGetThreadComm(comm,&tcomm);
1062:   if (tcomm->isnothread) {
1063:     (*func)(0,in1,in2,in3,in4);
1064:     PetscLogEventEnd(ThreadComm_RunKernel,0,0,0,0);
1065:     return(0);
1066:   }

1068:   if (!PetscJobQueue) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Trying to run kernel with no job queue");
1069:   job = &PetscJobQueue->jobs[PetscJobQueue->ctr]; /* Get the job context from the queue to launch this job */
1070:   if (job->job_status[0] != THREAD_JOB_NONE) {
1071:     for (i=0; i<tcomm->nworkThreads; i++) {
1072:       while (PetscReadOnce(int,job->job_status[i]) != THREAD_JOB_COMPLETED) ;
1073:     }
1074:   }

1076:   job->tcomm          = tcomm;
1077:   job->tcomm->job_ctr = PetscJobQueue->ctr;
1078:   job->nargs          = 4;
1079:   job->pfunc          = (PetscThreadKernel)func;
1080:   job->args[0]        = in1;
1081:   job->args[1]        = in2;
1082:   job->args[2]        = in3;
1083:   job->args[3]        = in4;

1085:   for (i=0; i<tcomm->nworkThreads; i++) job->job_status[i] = THREAD_JOB_POSTED;

1087:   PetscJobQueue->ctr = (PetscJobQueue->ctr+1)%tcomm->nkernels; /* Increment the queue ctr to point to the next available slot */
1088:   PetscJobQueue->kernel_ctr++;

1090:   (*tcomm->ops->runkernel)(tcomm,job);

1092:   PetscLogEventEnd(ThreadComm_RunKernel,0,0,0,0);
1093:   return(0);
1094: }

1098: /*@C
1099:    PetscThreadCommRunKernel6 - PetscThreadCommRunKernel version for kernels with 6
1100:                                input arguments

1102:    Input Parameters:
1103: +  comm  - the MPI communicator
1104: .  func  - the kernel (needs to be cast to PetscThreadKernel)
1105: .  in1   - first input argument for the kernel
1106: .  in2   - second input argument for the kernel
1107: .  in3   - third input argument for the kernel
1108: .  in4   - fourth input argument for the kernel
1109: .  in5   - fifth input argument for the kernel
1110: -  in6   - sixth input argument for the kernel

1112:    Level: developer

1114:    Notes:
1115:    All input arguments to the kernel must be passed by reference, Petsc objects are
1116:    inherrently passed by reference so you don't need to additionally & them.

1118:    Example usage - PetscThreadCommRunKernel1(comm,(PetscThreadKernel)kernel_func,x);
1119:    with kernel_func declared as
1120:    PetscErrorCode kernel_func(PetscInt thread_id,PetscInt* x)

1122:    The first input argument of kernel_func, thread_id, is the thread rank. This is passed implicitly
1123:    by PETSc.

1125: .seealso: PetscThreadCommCreate(), PetscThreadCommGNThreads()
1126: @*/
1127: PetscErrorCode PetscThreadCommRunKernel6(MPI_Comm comm,PetscErrorCode (*func)(PetscInt,...),void *in1,void *in2,void *in3,void *in4,void *in5,void *in6)
1128: {
1129:   PetscErrorCode        ierr;
1130:   PetscInt              i;
1131:   PetscThreadComm       tcomm=0;
1132:   PetscThreadCommJobCtx job;

1135:   PetscLogEventBegin(ThreadComm_RunKernel,0,0,0,0);
1136:   PetscCommGetThreadComm(comm,&tcomm);
1137:   if (tcomm->isnothread) {
1138:     (*func)(0,in1,in2,in3,in4,in5,in6);
1139:     PetscLogEventEnd(ThreadComm_RunKernel,0,0,0,0);
1140:     return(0);
1141:   }

1143:   if (!PetscJobQueue) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Trying to run kernel with no job queue");
1144:   job = &PetscJobQueue->jobs[PetscJobQueue->ctr]; /* Get the job context from the queue to launch this job */
1145:   if (job->job_status[0] != THREAD_JOB_NONE) {
1146:     for (i=0; i<tcomm->nworkThreads; i++) {
1147:       while (PetscReadOnce(int,job->job_status[i]) != THREAD_JOB_COMPLETED) ;
1148:     }
1149:   }

1151:   job->tcomm          = tcomm;
1152:   job->tcomm->job_ctr = PetscJobQueue->ctr;
1153:   job->nargs          = 6;
1154:   job->pfunc          = (PetscThreadKernel)func;
1155:   job->args[0]        = in1;
1156:   job->args[1]        = in2;
1157:   job->args[2]        = in3;
1158:   job->args[3]        = in4;
1159:   job->args[4]        = in5;
1160:   job->args[5]        = in6;


1163:   for (i=0; i<tcomm->nworkThreads; i++) job->job_status[i] = THREAD_JOB_POSTED;

1165:   PetscJobQueue->ctr = (PetscJobQueue->ctr+1)%tcomm->nkernels; /* Increment the queue ctr to point to the next available slot */
1166:   PetscJobQueue->kernel_ctr++;

1168:   (*tcomm->ops->runkernel)(tcomm,job);

1170:   PetscLogEventEnd(ThreadComm_RunKernel,0,0,0,0);
1171:   return(0);
1172: }

1174: /*
1175:    Detaches the thread communicator from the MPI communicator if it exists
1176: */
1179: PetscErrorCode PetscThreadCommDetach(MPI_Comm comm)
1180: {
1182:   PetscMPIInt    flg;
1183:   void           *ptr;

1186:   MPI_Attr_get(comm,Petsc_ThreadComm_keyval,&ptr,&flg);
1187:   if (flg) {
1188:     MPI_Attr_delete(comm,Petsc_ThreadComm_keyval);
1189:   }
1190:   return(0);
1191: }

1193: /*
1194:    This routine attaches the thread communicator to the MPI communicator if it does not
1195:    exist already.
1196: */
1199: PetscErrorCode PetscThreadCommAttach(MPI_Comm comm,PetscThreadComm tcomm)
1200: {
1202:   PetscMPIInt    flg;
1203:   void           *ptr;

1206:   MPI_Attr_get(comm,Petsc_ThreadComm_keyval,&ptr,&flg);
1207:   if (!flg) {
1208:     tcomm->refct++;
1209:     MPI_Attr_put(comm,Petsc_ThreadComm_keyval,tcomm);
1210:   }
1211:   return(0);
1212: }

1216: /*
1217:   PetscThreadCommWorldInitialize - Initializes the global thread communicator object

1219:   PetscThreadCommWorldInitialize() defaults to using the nonthreaded communicator.
1220: */
1221: PetscErrorCode PetscThreadCommWorldInitialize(void)
1222: {
1223:   PetscErrorCode  ierr;
1224:   PetscThreadComm tcomm;
1225:   PetscInt        i,j;

1228:   PetscThreadCommCreate(&PETSC_THREAD_COMM_WORLD);
1229:   tcomm = PETSC_THREAD_COMM_WORLD;
1230:   PetscThreadCommSetNThreads(tcomm,PETSC_DECIDE);
1231:   PetscThreadCommSetAffinities(tcomm,NULL);
1232:   PetscNew(struct _p_PetscThreadCommJobQueue,&PetscJobQueue);

1234:   tcomm->nkernels = 16;

1236:   PetscOptionsBegin(PETSC_COMM_WORLD,NULL,"Thread comm - setting number of kernels",NULL);
1237:   PetscOptionsInt("-threadcomm_nkernels","number of kernels that can be launched simultaneously","",16,&tcomm->nkernels,NULL);
1238:   PetscOptionsEnd();

1240:   PetscMalloc(tcomm->nkernels*sizeof(struct _p_PetscThreadCommJobCtx),&PetscJobQueue->jobs);
1241:   PetscMalloc(tcomm->nworkThreads*tcomm->nkernels*sizeof(PetscInt),&PetscJobQueue->jobs[0].job_status);
1242:   for (i=0; i<tcomm->nkernels; i++) {
1243:     PetscJobQueue->jobs[i].job_status = PetscJobQueue->jobs[0].job_status + i*tcomm->nworkThreads;
1244:     for (j=0; j<tcomm->nworkThreads; j++) PetscJobQueue->jobs[i].job_status[j] = THREAD_JOB_NONE;
1245:   }
1246:   PetscJobQueue->ctr        = 0;
1247:   PetscJobQueue->kernel_ctr = 0;
1248:   tcomm->job_ctr            = 0;

1250:   PetscThreadCommSetType(tcomm,NOTHREAD);
1251:   PetscThreadCommReductionCreate(tcomm,&tcomm->red);
1252:   PetscThreadCommStackCreate();
1253:   tcomm->refct++;
1254:   return(0);
1255: }

1259: /*
1260:    PetscThreadCommGetOwnershipRanges - Given the global size of an array, computes the local sizes and sets
1261:                                        the starting array indices

1263:    Input Parameters:
1264: +  comm - the MPI communicator which holds the thread communicator
1265: -  N    - the global size of the array

1267:    Output Parameters:
1268: .  trstarts - The starting array indices for each thread. the size of trstarts is nthreads+1

1270:    Notes:
1271:    trstarts is malloced in this routine
1272: */
1273: PetscErrorCode PetscThreadCommGetOwnershipRanges(MPI_Comm comm,PetscInt N,PetscInt *trstarts[])
1274: {
1275:   PetscErrorCode  ierr;
1276:   PetscInt        Q,R;
1277:   PetscBool       S;
1278:   PetscThreadComm tcomm = NULL;
1279:   PetscInt        *trstarts_out,nloc,i;

1282:   PetscCommGetThreadComm(comm,&tcomm);

1284:   PetscMalloc((tcomm->nworkThreads+1)*sizeof(PetscInt),&trstarts_out);
1285:   trstarts_out[0] = 0;
1286:   Q               = N/tcomm->nworkThreads;
1287:   R               = N - Q*tcomm->nworkThreads;
1288:   for (i=0; i<tcomm->nworkThreads; i++) {
1289:     S                 = (PetscBool)(i < R);
1290:     nloc              = S ? Q+1 : Q;
1291:     trstarts_out[i+1] = trstarts_out[i] + nloc;
1292:   }

1294:   *trstarts = trstarts_out;
1295:   return(0);
1296: }

1300: /*
1301:    PetscThreadCommGetRank - Gets the rank of the calling thread

1303:    Input Parameters:
1304: .  tcomm - the thread communicator

1306:    Output Parameters:
1307: .  trank - The rank of the calling thread

1309: */
1310: PetscErrorCode PetscThreadCommGetRank(PetscThreadComm tcomm,PetscInt *trank)
1311: {
1313:   PetscInt       rank = 0;

1316:   if (tcomm->ops->getrank) {
1317:     (*tcomm->ops->getrank)(&rank);
1318:   }
1319:   *trank = rank;
1320:   return(0);
1321: }