Actual source code: pthread.c

petsc-3.3-p5 2012-12-01
  1: /* Define feature test macros to make sure CPU_SET and other functions are available
  2:  */
  3: #define PETSC_DESIRE_FEATURE_TEST_MACROS

  5: #include <petscsys.h>        /*I  "petscsys.h"   I*/
  6: #include <../src/sys/objects/pthread/pthreadimpl.h>

  8: /* Initialize global variables and function pointers */
  9: PetscBool   PetscThreadGo = PETSC_TRUE;
 10: PetscMPIInt PetscMaxThreads = -1;
 11: pthread_t*  PetscThreadPoint=NULL;
 12: PetscInt*   PetscThreadsCoreAffinities=NULL;
 13: PetscInt    PetscMainThreadShareWork = 1;
 14: PetscInt    PetscMainThreadCoreAffinity = 0;
 15: PetscBool   PetscThreadsInitializeCalled = PETSC_FALSE;
 16: #if defined(PETSC_PTHREAD_LOCAL)
 17: PETSC_PTHREAD_LOCAL PetscInt PetscThreadRank;
 18: #else
 19: pthread_key_t PetscThreadsRankkey;
 20: #endif

 22: PetscInt*   PetscThreadRanks;

 24: void*          (*PetscThreadFunc)(void*) = NULL;
 25: PetscErrorCode (*PetscThreadsSynchronizationInitialize)(PetscInt) = NULL;
 26: PetscErrorCode (*PetscThreadsSynchronizationFinalize)(void) = NULL;
 27: void*          (*PetscThreadsWait)(void*) = NULL;
 28: PetscErrorCode (*PetscThreadsRunKernel)(PetscErrorCode (*pFunc)(void*),void**,PetscInt,PetscInt*)=NULL;

 30: static const char *const PetscThreadsSynchronizationTypes[] = {"NOPOOL","MAINPOOL","TRUEPOOL","CHAINPOOL","TREEPOOL","LOCKFREE","PetscThreadsSynchronizationType","THREADSYNC_",0};
 31: static const char *const PetscThreadsAffinityPolicyTypes[] = {"ALL","ONECORE","NONE","ThreadAffinityPolicyType","THREADAFFINITYPOLICY_",0};

 33: static PetscThreadsAffinityPolicyType thread_aff_policy=THREADAFFINITYPOLICY_ONECORE;

 35: static PetscInt     N_CORES;

 37: PetscErrorCode PetscThreadsFinish(void* arg) {
 38:   PetscThreadGo = PETSC_FALSE;
 39:   return(0);
 40: }

 42: PETSC_STATIC_INLINE PetscInt PetscGetThreadRank()
 43: {
 44: #if defined(PETSC_PTHREAD_LOCAL)
 45:   return PetscThreadRank;
 46: #else
 47:   return *((PetscInt*)pthread_getspecific(PetscThreadsRankkey));
 48: #endif
 49: }

 51: #if defined(PETSC_HAVE_SCHED_CPU_SET_T)
 52: /* Set CPU affinity for the main thread, only called by main thread */
 53: void PetscSetMainThreadAffinity(PetscInt icorr)
 54: {
 55:   cpu_set_t mset;

 57:   CPU_ZERO(&mset);
 58:   CPU_SET(icorr%N_CORES,&mset);
 59:   sched_setaffinity(0,sizeof(cpu_set_t),&mset);
 60: }

 62: /* Only called by spawned threads */
 63: void PetscThreadsDoCoreAffinity(void)
 64: {
 65:   PetscInt  i,icorr=0;
 66:   cpu_set_t mset;
 67:   PetscInt  myrank=PetscGetThreadRank();
 68: 
 69:   switch(thread_aff_policy) {
 70:   case THREADAFFINITYPOLICY_ONECORE:
 71:     icorr = PetscThreadsCoreAffinities[myrank];
 72:     CPU_ZERO(&mset);
 73:     CPU_SET(icorr%N_CORES,&mset);
 74:     pthread_setaffinity_np(pthread_self(),sizeof(cpu_set_t),&mset);
 75:     break;
 76:   case THREADAFFINITYPOLICY_ALL:
 77:     CPU_ZERO(&mset);
 78:     for(i=0;i<N_CORES;i++) CPU_SET(i,&mset);
 79:     pthread_setaffinity_np(pthread_self(),sizeof(cpu_set_t),&mset);
 80:     break;
 81:   case THREADAFFINITYPOLICY_NONE:
 82:     break;
 83:   }
 84: }
 85: #endif

 87: /* Sets the CPU affinities for threads */
 90: PetscErrorCode PetscThreadsSetAffinities(PetscInt affinities[])
 91: {
 93:   PetscInt       nworkThreads=PetscMaxThreads+PetscMainThreadShareWork;
 94:   PetscInt       nmax=nworkThreads;
 95:   PetscBool      flg;


 99:   PetscMalloc(nworkThreads*sizeof(PetscInt),&PetscThreadsCoreAffinities);

101:   if(affinities == PETSC_NULL) {
102:     /* PETSc decides affinities */
103:     /* Check if the run-time option is set */
104:     PetscOptionsIntArray("-thread_affinities","Set CPU affinities of threads","PetscThreadsSetAffinities",PetscThreadsCoreAffinities,&nmax,&flg);
105:     if(flg) {
106:       if(nmax != nworkThreads) {
107:         SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Must set affinities for all threads, Threads = %D, CPU affinities set = %D",nworkThreads,nmax);
108:       }
109:     } else {
110:       /* PETSc default affinities */
111:       PetscInt i;
112:       if(PetscMainThreadShareWork) {
113:         PetscThreadsCoreAffinities[0] = PetscMainThreadCoreAffinity;
114:         for(i=1; i< nworkThreads; i++) PetscThreadsCoreAffinities[i] = i%N_CORES;
115:       } else {
116:         for(i=0;i < nworkThreads;i++) PetscThreadsCoreAffinities[i] = (i+1)%N_CORES;
117:       }
118:     }
119:   } else {
120:     /* Set user provided affinities */
121:     PetscMemcpy(PetscThreadsCoreAffinities,affinities,PetscMaxThreads*sizeof(PetscInt));
122:   }
123:     return(0);
124:   }
125: 
128: /*
129:   PetscThreadsInitialize - Initializes the thread synchronization scheme with given
130:   of threads.

132:   Input Parameters:
133: . nthreads - Number of threads to create

135:   Level: beginner

137: .seealso: PetscThreadsFinalize()
138: */
139: PetscErrorCode PetscThreadsInitialize(PetscInt nthreads)
140: {
142:   PetscInt       i;
143:   PetscInt       nworkThreads=PetscMaxThreads+PetscMainThreadShareWork;

146:   if(PetscThreadsInitializeCalled) return(0);

148:   /* Set thread ranks */
149:   PetscMalloc(nworkThreads*sizeof(PetscInt),&PetscThreadRanks);
150:   for(i=0;i< nworkThreads;i++) PetscThreadRanks[i] = i;
151: #if defined(PETSC_PTHREAD_LOCAL)
152:   if(PetscMainThreadShareWork) PetscThreadRank=0; /* Main thread rank */
153: #else
154:   pthread_key_create(&PetscThreadsRankkey,NULL);
155:   if(PetscMainThreadShareWork) {
156:     pthread_setspecific(PetscThreadsRankkey,&PetscThreadRanks[0]);
157:   }
158: #endif
159:   /* Set thread affinities */
160:   PetscThreadsSetAffinities(PETSC_NULL);
161:   /* Initialize thread pool */
162:   if(PetscThreadsSynchronizationInitialize) {
163:     (*PetscThreadsSynchronizationInitialize)(nthreads);
164:   }
165:   PetscThreadsInitializeCalled = PETSC_TRUE;
166:   return(0);
167: }

171: /*
172:   PetscThreadsFinalize - Terminates the thread synchronization scheme initiated
173:   by PetscThreadsInitialize()

175:   Level: beginner

177: .seealso: PetscThreadsInitialize()
178: */
179: PetscErrorCode PetscThreadsFinalize(void)
180: {

184:   if(!PetscThreadsInitializeCalled) return(0);

186:   if (PetscThreadsSynchronizationFinalize) {
187:     (*PetscThreadsSynchronizationFinalize)();
188:   }

190:   PetscFree(PetscThreadsCoreAffinities);
191:   PetscFree(PetscThreadRanks);
192:   PetscThreadsInitializeCalled = PETSC_FALSE;
193:   return(0);
194: }

198: /*
199:    PetscSetMaxPThreads - Sets the number of pthreads to be used.

201:    Not collective
202:   
203:    Input Parameters:
204: .  nthreads - # of pthreads.

206:    Options Database Keys:
207:    -nthreads <nthreads> Number of pthreads to be used.

209:    Level: developer
210:  
211:    Notes:
212:    Use nthreads = PETSC_DECIDE for PETSc to calculate the maximum number of pthreads to be used.
213:    If nthreads = PETSC_DECIDE, PETSc will create (ncpus - 1) threads where ncpus is the number of 
214:    available processing cores. 
215:    
216:    By default, the main execution thread is also considered as a work thread.
217:    
218:    
219: .seealso: PetscGetMaxPThreads()
220: */
221: PetscErrorCode PetscSetMaxPThreads(PetscInt nthreads)
222: {
224:   PetscBool      flg=PETSC_FALSE;
225:   PetscInt       nworkThreads;


229:   N_CORES=1; /* Default value if N_CORES cannot be found out */
230:   /* Find the number of cores */
231: #if defined(PETSC_HAVE_SCHED_CPU_SET_T) /* Linux */
232:   N_CORES = get_nprocs();
233: #elif defined(PETSC_HAVE_SYS_SYSCTL_H) /* MacOS, BSD */
234:   {
235:     size_t   len = sizeof(N_CORES);
236:     sysctlbyname("hw.activecpu",&N_CORES,&len,NULL,0);
237:   }
238: #elif defined(PETSC_HAVE_WINDOWS_H)   /* Windows */
239:   {
240:     SYSTEM_INFO sysinfo;
241:     GetSystemInfo( &sysinfo );
242:     N_CORES = sysinfo.dwNumberOfProcessors;
243:   }
244: #endif
245:   PetscMaxThreads=N_CORES-1;
246:   if(nthreads == PETSC_DECIDE) {
247:     /* Check if run-time option is given */
248:     PetscOptionsInt("-nthreads","Set number of threads to be used for the thread pool","PetscSetMaxPThreads",N_CORES,&nworkThreads,&flg);
249:     if(flg) PetscMaxThreads = nworkThreads-1;
250:   } else PetscMaxThreads = nthreads;
251:   return(0);
252: }

256: /*
257:    PetscGetMaxPThreads - Returns the number of pthreads used in the thread pool.

259:    Not collective
260:   
261:    Output Parameters:
262: .  nthreads - Number of pthreads in the the thread pool.

264:    Level: beginner
265:  
266:    Notes:
267:    Must call PetscSetMaxPThreads() before
268:    
269: .seealso: PetscSetMaxPThreads()
270: */
271: PetscErrorCode PetscGetMaxPThreads(PetscInt *nthreads)
272: {
274:   if(PetscMaxThreads < 0) {
275:     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call PetscSetMaxPThreads() first");
276:   } else {
277:     *nthreads = PetscMaxThreads+PetscMainThreadShareWork;
278:   }
279:   return(0);
280: }

284: PetscErrorCode PetscOptionsCheckInitial_Private_Pthread(void)
285: {
286:   PetscErrorCode                  ierr;
287:   PetscBool                       flg1=PETSC_FALSE;
288:   PetscThreadsSynchronizationType thread_sync_type=THREADSYNC_LOCKFREE;


292:   PetscOptionsBegin(PETSC_COMM_WORLD,PETSC_NULL,"PThread Options","Sys");

294:   /* Set nthreads */
295:   PetscSetMaxPThreads(PETSC_DECIDE);

297:   /* Check to see if the user wants the main thread not to share work with the other threads */
298:   PetscOptionsInt("-mainthread_is_worker","Main thread is also a work thread",PETSC_NULL,PetscMainThreadShareWork,&PetscMainThreadShareWork,&flg1);
299:   PetscOptionsInt("-mainthread_affinity","CPU affinity of main thread","PetscSetMainThreadAffinity",PetscMainThreadCoreAffinity,&PetscMainThreadCoreAffinity,PETSC_NULL);
300: #if defined(PETSC_HAVE_SCHED_CPU_SET_T)
301:   PetscSetMainThreadAffinity(PetscMainThreadCoreAffinity);
302: #endif
303: 
304:   /* Get thread affinity policy */
305:   PetscOptionsEnum("-thread_aff_policy","Type of thread affinity policy"," ",PetscThreadsAffinityPolicyTypes,(PetscEnum)thread_aff_policy,(PetscEnum*)&thread_aff_policy,&flg1);
306:   /* Get thread synchronization scheme */
307:   PetscOptionsEnum("-thread_sync_type","Type of thread synchronization algorithm"," ",PetscThreadsSynchronizationTypes,(PetscEnum)thread_sync_type,(PetscEnum*)&thread_sync_type,&flg1);
308:   PetscOptionsEnd();
309: 
310:   switch(thread_sync_type) {
311: #if 0 /* I'm tired of modifying each thread pool whenever there is a common change in any one. Hence, i'm disabling
312:          all the thread pools except lockfree for now. Will activate them once all the other development work
313:          is done.
314:       */
315:     case THREADSYNC_TREEPOOL:
316:       PetscThreadFunc       = &PetscThreadFunc_Tree;
317:       PetscThreadsSynchronizationInitialize = &PetscThreadsSynchronizationInitialize_Tree;
318:       PetscThreadsSynchronizationFinalize   = &PetscThreadsSynchronizationFinalize_Tree;
319:       PetscThreadsWait      = &PetscThreadsWait_Tree;
320:       PetscThreadsRunKernel = &PetscThreadsRunKernel_Tree;
321:       PetscInfo1(PETSC_NULL,"Using tree thread pool with %d threads\n",PetscMaxThreads);
322:       break;
323:     case THREADSYNC_MAINPOOL:
324:       PetscThreadFunc       = &PetscThreadFunc_Main;
325:       PetscThreadsSynchronizationInitialize = &PetscThreadsSynchronizationInitialize_Main;
326:       PetscThreadsSynchronizationFinalize   = &PetscThreadsSynchronizationFinalize_Main;
327:     PetscThreadsWait      = &PetscThreadsWait_Main;
328:     PetscThreadsRunKernel = &PetscThreadsRunKernel_Main;
329:     PetscInfo1(PETSC_NULL,"Using main thread pool with %d threads\n",PetscMaxThreads);
330:     break;
331:     case THREADSYNC_CHAINPOOL:
332:       PetscThreadFunc       = &PetscThreadFunc_Chain;
333:       PetscThreadsSynchronizationInitialize = &PetscThreadsSynchronizationInitialize_Chain;
334:       PetscThreadsSynchronizationFinalize   = &PetscThreadsSynchronizationFinalize_Chain;
335:       PetscThreadsWait      = &PetscThreadsWait_Chain;
336:       PetscThreadsRunKernel = &PetscThreadsRunKernel_Chain;
337:       PetscInfo1(PETSC_NULL,"Using chain thread pool with %d threads\n",PetscMaxThreads);
338:       break;
339:     case THREADSYNC_TRUEPOOL:
340: #if defined(PETSC_HAVE_PTHREAD_BARRIER_T)
341:       PetscThreadFunc       = &PetscThreadFunc_True;
342:       PetscThreadsSynchronizationInitialize = &PetscThreadsSynchronizationInitialize_True;
343:       PetscThreadsSynchronizationFinalize   = &PetscThreadsSynchronizationFinalize_True;
344:       PetscThreadsWait      = &PetscThreadsWait_True;
345:       PetscThreadsRunKernel = &PetscThreadsRunKernel_True;
346:       PetscInfo1(PETSC_NULL,"Using true thread pool with %d threads\n",PetscMaxThreads);
347: #else
348:       PetscThreadFunc       = &PetscThreadFunc_Main;
349:       PetscThreadsSynchronizationInitialize = &PetscThreadsSynchronizationInitialize_Main;
350:       PetscThreadsSynchronizationFinalize   = &PetscThreadsSynchronizationFinalize_Main;
351:       PetscThreadsWait      = &PetscThreadsWait_Main;
352:       PetscThreadsRunKernel = &PetscThreadsRunKernel_Main;
353:       PetscInfo1(PETSC_NULL,"Cannot use true thread pool since pthread_barrier_t is not defined, creating main thread pool instead with %d threads\n",PetscMaxThreads);
354: #endif
355:       break;
356:     case THREADSYNC_NOPOOL:
357:       PetscThreadsSynchronizationInitialize = PETSC_NULL;
358:       PetscThreadsSynchronizationFinalize   = PETSC_NULL;
359:       PetscThreadFunc       = &PetscThreadFunc_None;
360:       PetscThreadsWait      = &PetscThreadsWait_None;
361:       PetscThreadsRunKernel = &PetscThreadsRunKernel_None;
362:       PetscInfo1(PETSC_NULL,"Using No thread pool with %d threads\n",PetscMaxThreads);
363:       break;
364: #endif
365:   case THREADSYNC_LOCKFREE:
366:     PetscThreadFunc       = &PetscThreadFunc_LockFree;
367:     PetscThreadsSynchronizationInitialize = &PetscThreadsSynchronizationInitialize_LockFree;
368:     PetscThreadsSynchronizationFinalize   = &PetscThreadsSynchronizationFinalize_LockFree;
369:     PetscThreadsWait      = &PetscThreadsWait_LockFree;
370:     PetscThreadsRunKernel = &PetscThreadsRunKernel_LockFree;
371:     PetscInfo1(PETSC_NULL,"Using lock-free thread synchronization with %d threads\n",PetscMaxThreads+PetscMainThreadShareWork);
372:     break;
373:   default:
374:     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only Lock-free synchronization scheme supported currently");
375:   }
376:   return(0);
377: }