int main(int argc,char **argv) { PetscErrorCode ierr; PetscInt nthreads,i; PetscInt *ranks; PetscScalar *values; PetscInitialize(&argc,&argv,(char *)0,help); ierr = PetscThreadCommView(PETSC_COMM_WORLD,0);CHKERRQ(ierr); ierr = PetscThreadCommGetNThreads(PETSC_COMM_WORLD,&nthreads);CHKERRQ(ierr); ierr = PetscMalloc(nthreads*sizeof(PetscInt),&ranks);CHKERRQ(ierr); ierr = PetscMalloc(nthreads*sizeof(PetscScalar),&values);CHKERRQ(ierr); for(i=0;i < nthreads;i++) { ranks[i] = i; values[i] = i; } ierr = PetscThreadCommRunKernel(PETSC_COMM_WORLD,(PetscThreadKernel)kernel_func1,2,ranks,values);CHKERRQ(ierr); ierr = PetscThreadCommRunKernel(PETSC_COMM_WORLD,(PetscThreadKernel)kernel_func2,2,ranks,values);CHKERRQ(ierr); ierr = PetscThreadCommBarrier(PETSC_COMM_WORLD);CHKERRQ(ierr); ierr = PetscFree(ranks);CHKERRQ(ierr); ierr = PetscFree(values);CHKERRQ(ierr); PetscFinalize(); return 0; }
int main(int argc,char **argv) { PetscErrorCode ierr; PetscScalar dot=0.0,v; Vec x,y; PetscInt N=8; PetscScalar one=1.0,two=2.0,alpha=2.0; PetscInitialize(&argc,&argv,(char *)0,help); #if defined(PETSC_THREADCOMM_ACTIVE) ierr = PetscThreadCommView(PETSC_COMM_WORLD,PETSC_VIEWER_STDOUT_WORLD); CHKERRQ(ierr); #endif ierr = PetscOptionsGetInt(PETSC_NULL,"-N",&N,PETSC_NULL); CHKERRQ(ierr); ierr = VecCreate(PETSC_COMM_WORLD,&x); CHKERRQ(ierr); ierr = VecSetSizes(x,PETSC_DECIDE,N); CHKERRQ(ierr); ierr = VecSetFromOptions(x); CHKERRQ(ierr); ierr = VecSet(x,one); CHKERRQ(ierr); ierr = PetscPrintf(PETSC_COMM_WORLD,"x = %lf\n",one); CHKERRQ(ierr); ierr = VecCreate(PETSC_COMM_WORLD,&y); CHKERRQ(ierr); ierr = VecSetSizes(y,PETSC_DECIDE,N); CHKERRQ(ierr); ierr = VecSetFromOptions(y); CHKERRQ(ierr); ierr = VecSet(y,two); CHKERRQ(ierr); ierr = PetscPrintf(PETSC_COMM_WORLD,"y = %lf\n",two); CHKERRQ(ierr); ierr = VecAXPY(y,alpha,x); CHKERRQ(ierr); v = two+alpha*one; ierr = PetscPrintf(PETSC_COMM_WORLD,"x+%lfy = %lf\n",alpha,v); CHKERRQ(ierr); ierr = VecDot(x,y,&dot); CHKERRQ(ierr); #if defined(PETSC_THREADCOMM_ACTIVE) ierr = PetscThreadCommBarrier(PETSC_COMM_WORLD); CHKERRQ(ierr); #endif ierr = PetscPrintf(PETSC_COMM_WORLD,"Dot product %d*(%lf*%lf) is %lf\n",N,one,v,dot); CHKERRQ(ierr); ierr = VecDestroy(&x); CHKERRQ(ierr); ierr = VecDestroy(&y); CHKERRQ(ierr); PetscFinalize(); return 0; }
int main(int argc,char **argv) { PetscErrorCode ierr; PetscInt i,j,k,N=100,**counters,tsize; PetscInitialize(&argc,&argv,(char *)0,help); ierr = PetscThreadCommView(PETSC_COMM_WORLD,PETSC_VIEWER_STDOUT_WORLD); CHKERRQ(ierr); ierr = PetscOptionsGetInt(PETSC_NULL,"-N",&N,PETSC_NULL); CHKERRQ(ierr); ierr = PetscThreadCommGetNThreads(PETSC_COMM_WORLD,&tsize); CHKERRQ(ierr); ierr = PetscMalloc(tsize*sizeof(*counters),&counters); CHKERRQ(ierr); ierr = PetscThreadCommRunKernel(PETSC_COMM_WORLD,(PetscThreadKernel)CounterInit_kernel,1,counters); CHKERRQ(ierr); for (i=0; i<10; i++) { PetscReal t0,t1; ierr = PetscThreadCommBarrier(PETSC_COMM_WORLD); CHKERRQ(ierr); ierr = PetscGetTime(&t0); CHKERRQ(ierr); for (j=0; j<N; j++) { /* ierr = PetscThreadCommRunKernel(PETSC_COMM_WORLD,(PetscThreadKernel)CounterIncrement_kernel,1,counters);CHKERRQ(ierr); */ ierr = PetscThreadCommRunKernel1(PETSC_COMM_WORLD,(PetscThreadKernel)CounterIncrement_kernel,counters); CHKERRQ(ierr); } ierr = PetscThreadCommBarrier(PETSC_COMM_WORLD); CHKERRQ(ierr); ierr = PetscGetTime(&t1); CHKERRQ(ierr); ierr = PetscPrintf(PETSC_COMM_WORLD,"Time per kernel: %g us\n",1e6*(t1-t0)/N); CHKERRQ(ierr); } for (i=0; i<10; i++) { PetscReal t0,t1; ierr = PetscThreadCommBarrier(PETSC_COMM_WORLD); CHKERRQ(ierr); ierr = PetscGetTime(&t0); CHKERRQ(ierr); for (j=0; j<N; j++) { #pragma omp parallel num_threads(tsize) { PetscInt trank = omp_get_thread_num(); CounterIncrement_kernel(trank,counters); } } ierr = PetscThreadCommBarrier(PETSC_COMM_WORLD); CHKERRQ(ierr); ierr = PetscGetTime(&t1); CHKERRQ(ierr); ierr = PetscPrintf(PETSC_COMM_WORLD,"OpenMP inline time per kernel: %g us\n",1e6*(t1-t0)/N); CHKERRQ(ierr); } for (i=0; i<10; i++) { PetscReal t0,t1; ierr = PetscGetTime(&t0); CHKERRQ(ierr); for (j=0; j<N; j++) { CounterIncrement_kernel(0,counters); } ierr = PetscGetTime(&t1); CHKERRQ(ierr); ierr = PetscPrintf(PETSC_COMM_WORLD,"Serial inline time per single kernel: %g us\n",1e6*(t1-t0)/N); CHKERRQ(ierr); } for (i=0; i<10; i++) { PetscReal t0,t1; ierr = PetscGetTime(&t0); CHKERRQ(ierr); for (j=0; j<N; j++) { for (k=0; k<tsize; k++) CounterIncrement_kernel(k,counters); } ierr = PetscGetTime(&t1); CHKERRQ(ierr); ierr = PetscPrintf(PETSC_COMM_WORLD,"Serial inline time per kernel: %g us\n",1e6*(t1-t0)/N); CHKERRQ(ierr); } ierr = PetscThreadCommRunKernel(PETSC_COMM_WORLD,(PetscThreadKernel)CounterFree_kernel,1,counters); CHKERRQ(ierr); ierr = PetscThreadCommBarrier(PETSC_COMM_WORLD); CHKERRQ(ierr); ierr = PetscFree(counters); CHKERRQ(ierr); PetscFinalize(); return 0; }