Exemple #1
0
int main(int argc,char **argv)
{
  PetscErrorCode ierr;
  PetscInt       nthreads,i;
  PetscInt       *ranks;
  PetscScalar    *values;

  PetscInitialize(&argc,&argv,(char *)0,help);

  ierr = PetscThreadCommView(PETSC_COMM_WORLD,0);CHKERRQ(ierr);
  ierr = PetscThreadCommGetNThreads(PETSC_COMM_WORLD,&nthreads);CHKERRQ(ierr);

  ierr = PetscMalloc(nthreads*sizeof(PetscInt),&ranks);CHKERRQ(ierr);
  ierr = PetscMalloc(nthreads*sizeof(PetscScalar),&values);CHKERRQ(ierr);

  for(i=0;i < nthreads;i++) {
    ranks[i] = i; values[i] = i;
  }

  ierr = PetscThreadCommRunKernel(PETSC_COMM_WORLD,(PetscThreadKernel)kernel_func1,2,ranks,values);CHKERRQ(ierr);
  ierr = PetscThreadCommRunKernel(PETSC_COMM_WORLD,(PetscThreadKernel)kernel_func2,2,ranks,values);CHKERRQ(ierr);

  ierr = PetscThreadCommBarrier(PETSC_COMM_WORLD);CHKERRQ(ierr);
  ierr = PetscFree(ranks);CHKERRQ(ierr);
  ierr = PetscFree(values);CHKERRQ(ierr);
  PetscFinalize();
  return 0;
}
Exemple #2
0
int main(int argc,char **argv)
{
    PetscErrorCode ierr;
    PetscScalar    dot=0.0,v;
    Vec            x,y;
    PetscInt       N=8;
    PetscScalar    one=1.0,two=2.0,alpha=2.0;

    PetscInitialize(&argc,&argv,(char *)0,help);

#if defined(PETSC_THREADCOMM_ACTIVE)
    ierr = PetscThreadCommView(PETSC_COMM_WORLD,PETSC_VIEWER_STDOUT_WORLD);
    CHKERRQ(ierr);
#endif
    ierr = PetscOptionsGetInt(PETSC_NULL,"-N",&N,PETSC_NULL);
    CHKERRQ(ierr);

    ierr = VecCreate(PETSC_COMM_WORLD,&x);
    CHKERRQ(ierr);
    ierr = VecSetSizes(x,PETSC_DECIDE,N);
    CHKERRQ(ierr);
    ierr = VecSetFromOptions(x);
    CHKERRQ(ierr);
    ierr = VecSet(x,one);
    CHKERRQ(ierr);
    ierr = PetscPrintf(PETSC_COMM_WORLD,"x = %lf\n",one);
    CHKERRQ(ierr);

    ierr = VecCreate(PETSC_COMM_WORLD,&y);
    CHKERRQ(ierr);
    ierr = VecSetSizes(y,PETSC_DECIDE,N);
    CHKERRQ(ierr);
    ierr = VecSetFromOptions(y);
    CHKERRQ(ierr);
    ierr = VecSet(y,two);
    CHKERRQ(ierr);
    ierr = PetscPrintf(PETSC_COMM_WORLD,"y = %lf\n",two);
    CHKERRQ(ierr);

    ierr = VecAXPY(y,alpha,x);
    CHKERRQ(ierr);
    v = two+alpha*one;
    ierr = PetscPrintf(PETSC_COMM_WORLD,"x+%lfy = %lf\n",alpha,v);
    CHKERRQ(ierr);

    ierr = VecDot(x,y,&dot);
    CHKERRQ(ierr);

#if defined(PETSC_THREADCOMM_ACTIVE)
    ierr = PetscThreadCommBarrier(PETSC_COMM_WORLD);
    CHKERRQ(ierr);
#endif

    ierr = PetscPrintf(PETSC_COMM_WORLD,"Dot product %d*(%lf*%lf) is %lf\n",N,one,v,dot);
    CHKERRQ(ierr);
    ierr = VecDestroy(&x);
    CHKERRQ(ierr);
    ierr = VecDestroy(&y);
    CHKERRQ(ierr);
    PetscFinalize();
    return 0;
}
Exemple #3
0
int main(int argc,char **argv)
{
    PetscErrorCode ierr;
    PetscInt       i,j,k,N=100,**counters,tsize;

    PetscInitialize(&argc,&argv,(char *)0,help);

    ierr = PetscThreadCommView(PETSC_COMM_WORLD,PETSC_VIEWER_STDOUT_WORLD);
    CHKERRQ(ierr);
    ierr = PetscOptionsGetInt(PETSC_NULL,"-N",&N,PETSC_NULL);
    CHKERRQ(ierr);

    ierr = PetscThreadCommGetNThreads(PETSC_COMM_WORLD,&tsize);
    CHKERRQ(ierr);
    ierr = PetscMalloc(tsize*sizeof(*counters),&counters);
    CHKERRQ(ierr);
    ierr = PetscThreadCommRunKernel(PETSC_COMM_WORLD,(PetscThreadKernel)CounterInit_kernel,1,counters);
    CHKERRQ(ierr);

    for (i=0; i<10; i++) {
        PetscReal t0,t1;
        ierr = PetscThreadCommBarrier(PETSC_COMM_WORLD);
        CHKERRQ(ierr);
        ierr = PetscGetTime(&t0);
        CHKERRQ(ierr);
        for (j=0; j<N; j++) {
            /*      ierr = PetscThreadCommRunKernel(PETSC_COMM_WORLD,(PetscThreadKernel)CounterIncrement_kernel,1,counters);CHKERRQ(ierr); */
            ierr = PetscThreadCommRunKernel1(PETSC_COMM_WORLD,(PetscThreadKernel)CounterIncrement_kernel,counters);
            CHKERRQ(ierr);
        }
        ierr = PetscThreadCommBarrier(PETSC_COMM_WORLD);
        CHKERRQ(ierr);
        ierr = PetscGetTime(&t1);
        CHKERRQ(ierr);
        ierr = PetscPrintf(PETSC_COMM_WORLD,"Time per kernel: %g us\n",1e6*(t1-t0)/N);
        CHKERRQ(ierr);
    }

    for (i=0; i<10; i++) {
        PetscReal t0,t1;
        ierr = PetscThreadCommBarrier(PETSC_COMM_WORLD);
        CHKERRQ(ierr);
        ierr = PetscGetTime(&t0);
        CHKERRQ(ierr);
        for (j=0; j<N; j++) {
            #pragma omp parallel num_threads(tsize)
            {
                PetscInt trank = omp_get_thread_num();
                CounterIncrement_kernel(trank,counters);
            }
        }
        ierr = PetscThreadCommBarrier(PETSC_COMM_WORLD);
        CHKERRQ(ierr);
        ierr = PetscGetTime(&t1);
        CHKERRQ(ierr);
        ierr = PetscPrintf(PETSC_COMM_WORLD,"OpenMP inline time per kernel: %g us\n",1e6*(t1-t0)/N);
        CHKERRQ(ierr);
    }

    for (i=0; i<10; i++) {
        PetscReal t0,t1;
        ierr = PetscGetTime(&t0);
        CHKERRQ(ierr);
        for (j=0; j<N; j++) {
            CounterIncrement_kernel(0,counters);
        }
        ierr = PetscGetTime(&t1);
        CHKERRQ(ierr);
        ierr = PetscPrintf(PETSC_COMM_WORLD,"Serial inline time per single kernel: %g us\n",1e6*(t1-t0)/N);
        CHKERRQ(ierr);
    }

    for (i=0; i<10; i++) {
        PetscReal t0,t1;
        ierr = PetscGetTime(&t0);
        CHKERRQ(ierr);
        for (j=0; j<N; j++) {
            for (k=0; k<tsize; k++) CounterIncrement_kernel(k,counters);
        }
        ierr = PetscGetTime(&t1);
        CHKERRQ(ierr);
        ierr = PetscPrintf(PETSC_COMM_WORLD,"Serial inline time per kernel: %g us\n",1e6*(t1-t0)/N);
        CHKERRQ(ierr);
    }

    ierr = PetscThreadCommRunKernel(PETSC_COMM_WORLD,(PetscThreadKernel)CounterFree_kernel,1,counters);
    CHKERRQ(ierr);
    ierr = PetscThreadCommBarrier(PETSC_COMM_WORLD);
    CHKERRQ(ierr);
    ierr = PetscFree(counters);
    CHKERRQ(ierr);
    PetscFinalize();
    return 0;
}