Example #1
0
/*
S^{-1} = ( G^T G )^{-1} G^T K G ( G^T G )^{-1}
       = A C A
S^{-T} = A^T (A C)^T
       = A^T C^T A^T, but A = G^T G which is symmetric
       = A C^T A
       = A G^T ( G^T K )^T A
       = A G^T K^T G A

*/
PetscErrorCode BSSCR_PCApplyTranspose_GtKG( PC pc, Vec x, Vec y )
{
	
	PC_GtKG ctx = (PC_GtKG)pc->data;
	
	KSP ksp;
	Mat K, G;
	Vec s,t,X;
	PetscLogDouble t0,t1;
	
	ksp = ctx->ksp;
	K = ctx->K;
	G = ctx->G;
	s = ctx->s;
	t = ctx->t;
	X = ctx->X;
	
	if (ctx->monitor_rhs_consistency) {		BSSCRBSSCR_Lp_monitor_check_rhs_consistency(ksp,x,1);		}
	PetscGetTime(&t0);
	KSPSolve( ksp, x, t ); /* t <- GtG_inv x */
	PetscGetTime(&t1);
	if (ctx->monitor_activated) {	BSSCR_PCBFBTSubKSPMonitor(ksp,1,(t1-t0));		}
	
	MatMult( G, t, s ); /* s <- G t */
	MatMultTranspose( K, s, X ); /* X <- K^T s */
	MatMultTranspose( G, X, t ); /* t <- Gt X */
	
	if (ctx->monitor_rhs_consistency) {		BSSCRBSSCR_Lp_monitor_check_rhs_consistency(ksp,t,2);		}
	PetscGetTime(&t0);
	KSPSolve( ksp, t, y ); /* y <- GtG_inv t */
	PetscGetTime(&t1);
	if (ctx->monitor_activated) {	BSSCR_PCBFBTSubKSPMonitor(ksp,2,(t1-t0));		}
	
	PetscFunctionReturn(0);
}
Example #2
0
PetscErrorCode RunTest(int nx, int ny, int nz, int loops, double *wt)
{
  Vec            x,f;
  TS             ts;
  AppCtx         _app,*app=&_app;
  double         t1,t2;
  PetscErrorCode ierr;
  PetscFunctionBegin;

  app->nx = nx; app->h[0] = 1./(nx-1);
  app->ny = ny; app->h[1] = 1./(ny-1);
  app->nz = nz; app->h[2] = 1./(nz-1);

  ierr = VecCreate(PETSC_COMM_SELF,&x);CHKERRQ(ierr);
  ierr = VecSetSizes(x,nx*ny*nz,nx*ny*nz);CHKERRQ(ierr);
  ierr = VecSetUp(x);CHKERRQ(ierr);
  ierr = VecDuplicate(x,&f);CHKERRQ(ierr);

  ierr = TSCreate(PETSC_COMM_SELF,&ts);CHKERRQ(ierr);
  ierr = TSSetProblemType(ts,TS_NONLINEAR);CHKERRQ(ierr);
  ierr = TSSetType(ts,TSTHETA);CHKERRQ(ierr);
  ierr = TSThetaSetTheta(ts,1.0);CHKERRQ(ierr);
  ierr = TSSetTimeStep(ts,0.01);CHKERRQ(ierr);
  ierr = TSSetTime(ts,0.0);CHKERRQ(ierr);
  ierr = TSSetDuration(ts,10,1.0);CHKERRQ(ierr);

  ierr = TSSetSolution(ts,x);CHKERRQ(ierr);
  ierr = TSSetIFunction(ts,f,FormFunction,app);CHKERRQ(ierr);
  ierr = PetscOptionsSetValue("-snes_mf","1");CHKERRQ(ierr);
  {
    SNES snes;
    KSP  ksp;
    ierr = TSGetSNES(ts,&snes);CHKERRQ(ierr);
    ierr = SNESGetKSP(snes,&ksp);CHKERRQ(ierr);
    ierr = KSPSetType(ksp,KSPCG);CHKERRQ(ierr);
  }
  ierr = TSSetFromOptions(ts);CHKERRQ(ierr);
  ierr = TSSetUp(ts);CHKERRQ(ierr);

  *wt = 1e300;
  while (loops-- > 0) {
    ierr = FormInitial(0.0,x,app);CHKERRQ(ierr);
    ierr = PetscGetTime(&t1);CHKERRQ(ierr);
    ierr = TSSolve(ts,x,PETSC_NULL);CHKERRQ(ierr);
    ierr = PetscGetTime(&t2);CHKERRQ(ierr);
    *wt = PetscMin(*wt,t2-t1);
  }

  ierr = VecDestroy(&x);CHKERRQ(ierr);
  ierr = VecDestroy(&f);CHKERRQ(ierr);
  ierr = TSDestroy(&ts);CHKERRQ(ierr);
  
  PetscFunctionReturn(0);
}
Example #3
0
int main(int argc,char **argv)
{
  PetscLogDouble x,y,z;
  PetscScalar    A[10000];
  int            ierr;

  PetscInitialize(&argc,&argv,0,0);
  /* To take care of paging effects */
  ierr = PetscMemzero(A,sizeof(PetscScalar)*0);CHKERRQ(ierr);
  ierr = PetscGetTime(&x);CHKERRQ(ierr);

  ierr = PetscGetTime(&x);CHKERRQ(ierr);
  ierr = PetscMemzero(A,sizeof(PetscScalar)*10000);CHKERRQ(ierr);
  ierr = PetscMemzero(A,sizeof(PetscScalar)*10000);CHKERRQ(ierr);
  ierr = PetscMemzero(A,sizeof(PetscScalar)*10000);CHKERRQ(ierr);
  ierr = PetscMemzero(A,sizeof(PetscScalar)*10000);CHKERRQ(ierr);
  ierr = PetscMemzero(A,sizeof(PetscScalar)*10000);CHKERRQ(ierr);
  ierr = PetscMemzero(A,sizeof(PetscScalar)*10000);CHKERRQ(ierr);
  ierr = PetscMemzero(A,sizeof(PetscScalar)*10000);CHKERRQ(ierr);
  ierr = PetscMemzero(A,sizeof(PetscScalar)*10000);CHKERRQ(ierr);
  ierr = PetscMemzero(A,sizeof(PetscScalar)*10000);CHKERRQ(ierr);
  ierr = PetscMemzero(A,sizeof(PetscScalar)*10000);CHKERRQ(ierr);
  ierr = PetscGetTime(&y);CHKERRQ(ierr);
  ierr = PetscMemzero(A,sizeof(PetscScalar)*0);CHKERRQ(ierr);
  ierr = PetscMemzero(A,sizeof(PetscScalar)*0);CHKERRQ(ierr);
  ierr = PetscMemzero(A,sizeof(PetscScalar)*0);CHKERRQ(ierr);
  ierr = PetscMemzero(A,sizeof(PetscScalar)*0);CHKERRQ(ierr);
  ierr = PetscMemzero(A,sizeof(PetscScalar)*0);CHKERRQ(ierr);
  ierr = PetscMemzero(A,sizeof(PetscScalar)*0);CHKERRQ(ierr);
  ierr = PetscMemzero(A,sizeof(PetscScalar)*0);CHKERRQ(ierr);
  ierr = PetscMemzero(A,sizeof(PetscScalar)*0);CHKERRQ(ierr);
  ierr = PetscMemzero(A,sizeof(PetscScalar)*0);CHKERRQ(ierr);
  ierr = PetscMemzero(A,sizeof(PetscScalar)*0);CHKERRQ(ierr);
  ierr = PetscGetTime(&z);CHKERRQ(ierr);

  fprintf(stdout,"%s : \n","PetscMemzero");
  fprintf(stdout,"    %-15s : %e sec\n","Latency",(z-y)/10.0);
  fprintf(stdout,"    %-15s : %e sec\n","Per PetscScalar",(2*y-x-z)/100000.0);

  ierr = PetscFinalize();CHKERRQ(ierr);
  PetscFunctionReturn(0);
}
Example #4
0
PetscErrorCode MatRARtNumeric_SeqAIJ_SeqAIJ(Mat A,Mat R,Mat C)
{
  PetscErrorCode        ierr;
  Mat_RARt              *rart;
  PetscContainer        container;
  MatTransposeColoring  matcoloring;
  Mat                   Rt,RARt;
  PetscLogDouble        Mult_sp_den=0.0,app1=0.0,app2=0.0,t0,tf;

  PetscFunctionBegin;
  ierr = PetscObjectQuery((PetscObject)C,"Mat_RARt",(PetscObject *)&container);CHKERRQ(ierr);
  if (!container) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit");
  ierr  = PetscContainerGetPointer(container,(void **)&rart);CHKERRQ(ierr);

  /* Get dense Rt by Apply MatTransposeColoring to R */
  matcoloring = rart->matcoloring;
  Rt          = rart->Rt;
  ierr = PetscGetTime(&t0);CHKERRQ(ierr);
  ierr = MatTransColoringApplySpToDen(matcoloring,R,Rt);CHKERRQ(ierr);
  ierr = PetscGetTime(&tf);CHKERRQ(ierr);
  app1 += tf - t0;

  /* Get dense RARt = R*A*Rt */
  ierr = PetscGetTime(&t0);CHKERRQ(ierr);
  RARt = rart->RARt;
  ierr = MatMatMatMultNumeric_SeqAIJ_SeqAIJ_SeqDense(R,A,Rt,RARt,rart->work);CHKERRQ(ierr);
  ierr = PetscGetTime(&tf);CHKERRQ(ierr);
  Mult_sp_den += tf - t0;

  /* Recover C from C_dense */
  ierr = PetscGetTime(&t0);CHKERRQ(ierr);
  ierr = MatTransColoringApplyDenToSp(matcoloring,RARt,C);CHKERRQ(ierr);
  ierr = PetscGetTime(&tf);CHKERRQ(ierr);
  app2 += tf - t0;

#if defined(PETSC_USE_INFO)
  ierr = PetscInfo4(C,"Num = ColorApp %g + %g + Mult_sp_den %g  = %g\n",app1,app2,Mult_sp_den,app1+app2+Mult_sp_den);CHKERRQ(ierr);
#endif
  PetscFunctionReturn(0);
}
Example #5
0
int test1(void)
{
  PetscLogDouble  t1,t2;
  double      value;
  int         i,ierr,*z,*zi,intval;
  PetscScalar *x,*y;
  PetscRandom r;

  ierr = PetscRandomCreate(PETSC_COMM_SELF,&r);CHKERRQ(ierr);
  ierr = PetscRandomSetFromOptions(r);CHKERRQ(ierr);
  ierr = PetscMalloc(20000*sizeof(PetscScalar),&x);CHKERRQ(ierr);
  ierr = PetscMalloc(20000*sizeof(PetscScalar),&y);CHKERRQ(ierr);

  ierr = PetscMalloc(2000*sizeof(int),&z);CHKERRQ(ierr); 
  ierr = PetscMalloc(2000*sizeof(int),&zi);CHKERRQ(ierr); 



  /* Take care of paging effects */
  ierr = PetscGetTime(&t1);CHKERRQ(ierr);
  
   /* Form the random set of integers */
  for (i=0; i<2000; i++) {
    ierr   = PetscRandomGetValue(r,&value);CHKERRQ(ierr);
    intval = (int)(value*20000.0);
    z[i]   = intval;
  }

  for (i=0; i<2000; i++) {
    ierr    = PetscRandomGetValue(r,&value);CHKERRQ(ierr);
    intval  = (int)(value*20000.0);
    zi[i]   = intval;
  }
  /* fprintf(stdout,"Done setup\n"); */

  ierr = BlastCache();CHKERRQ(ierr);

  ierr = PetscGetTime(&t1);CHKERRQ(ierr);
  for (i=0; i<2000; i++) {  x[i] = y[i]; }
  ierr = PetscGetTime(&t2);CHKERRQ(ierr);
  fprintf(stdout,"%-27s : %e sec\n","x[i] = y[i]",(t2-t1)/2000.0);

  ierr = BlastCache();CHKERRQ(ierr);

  ierr = PetscGetTime(&t1);CHKERRQ(ierr);
  for (i=0; i<500; i+=4) {  
    x[i]   = y[z[i]];
    x[1+i] = y[z[1+i]];
    x[2+i] = y[z[2+i]];
    x[3+i] = y[z[3+i]];
  }
  ierr = PetscGetTime(&t2);CHKERRQ(ierr);
  fprintf(stdout,"%-27s : %e sec\n","x[i] = y[idx[i]] - unroll 4",(t2-t1)/2000.0);

  ierr = BlastCache();CHKERRQ(ierr);

  ierr = PetscGetTime(&t1);CHKERRQ(ierr)
  for (i=0; i<2000; i++) {  x[i] = y[z[i]]; }
  ierr = PetscGetTime(&t2);CHKERRQ(ierr);
  fprintf(stdout,"%-27s : %e sec\n","x[i] = y[idx[i]]",(t2-t1)/2000.0);

  ierr = BlastCache();CHKERRQ(ierr);

  ierr = PetscGetTime(&t1);CHKERRQ(ierr);
  for (i=0; i<1000; i+=2) {  x[i] = y[z[i]];  x[1+i] = y[z[1+i]]; }
  ierr = PetscGetTime(&t2);CHKERRQ(ierr);
  fprintf(stdout,"%-27s : %e sec\n","x[i] = y[idx[i]] - unroll 2",(t2-t1)/2000.0);

  ierr = BlastCache();CHKERRQ(ierr);

  ierr = PetscGetTime(&t1);CHKERRQ(ierr);
  for (i=0; i<2000; i++) {  x[z[i]] = y[i]; }
  ierr = PetscGetTime(&t2);CHKERRQ(ierr);
  fprintf(stdout,"%-27s : %e sec\n","x[z[i]] = y[i]",(t2-t1)/2000.0);

  ierr = BlastCache();CHKERRQ(ierr);

  ierr = PetscGetTime(&t1);CHKERRQ(ierr);
  for (i=0; i<2000; i++) {  x[z[i]] = y[zi[i]]; }
  ierr = PetscGetTime(&t2);CHKERRQ(ierr);
  fprintf(stdout,"%-27s : %e sec\n","x[z[i]] = y[zi[i]]",(t2-t1)/2000.0);
  
  ierr = PetscMemcpy(x,y,10);CHKERRQ(ierr);
  ierr = PetscMemcpy(z,zi,10);CHKERRQ(ierr);
  ierr = PetscFree(z);CHKERRQ(ierr);
  ierr = PetscFree(zi);CHKERRQ(ierr);
  ierr = PetscFree(x);CHKERRQ(ierr);
  ierr = PetscFree(y);CHKERRQ(ierr);
  PetscRandomDestroy(r);
  PetscFunctionReturn(0);
}
Example #6
0
int test2(void)
{
  PetscLogDouble   t1,t2;
  double       value;
  int          i,ierr,z[20000],zi[20000],intval,tmp;
  PetscScalar  x[20000],y[20000];
  PetscRandom  r;

  ierr = PetscRandomCreate(PETSC_COMM_SELF,&r);CHKERRQ(ierr);
  ierr = PetscRandomSetFromOptions(r);CHKERRQ(ierr);

  /* Take care of paging effects */
  ierr = PetscGetTime(&t1);CHKERRQ(ierr);
 
  for (i=0; i<20000; i++) {
    x[i]  = i;
    y[i]  = i;
    z[i]  = i;
    zi[i] = i;
  }

   /* Form the random set of integers */
  for (i=0; i<20000; i++) {
    ierr   = PetscRandomGetValue(r,&value);CHKERRQ(ierr);
    intval = (int)(value*20000.0);
    tmp    = z[i];
    z[i]   = z[intval];
    z[intval] = tmp;
  }

  for (i=0; i<20000; i++) {
    ierr   = PetscRandomGetValue(r,&value);CHKERRQ(ierr);
    intval = (int)(value*20000.0);
    tmp    = zi[i];
    zi[i]  = zi[intval];
    zi[intval] = tmp;
  }
  /* fprintf(stdout,"Done setup\n"); */

  /* ierr = BlastCache();CHKERRQ(ierr); */

  ierr = PetscGetTime(&t1);CHKERRQ(ierr);
  for (i=0; i<2000; i++) {  x[i] = y[i]; }
  ierr = PetscGetTime(&t2);CHKERRQ(ierr);
  fprintf(stdout,"%-27s : %e sec\n","x[i] = y[i]",(t2-t1)/2000.0);

  /* ierr = BlastCache();CHKERRQ(ierr); */

  ierr = PetscGetTime(&t1);CHKERRQ(ierr);
  for (i=0; i<2000; i++) {  y[i] = x[z[i]]; }
  ierr = PetscGetTime(&t2);CHKERRQ(ierr);
  fprintf(stdout,"%-27s : %e sec\n","x[i] = y[idx[i]]",(t2-t1)/2000.0);

  /* ierr = BlastCache();CHKERRQ(ierr); */

  ierr = PetscGetTime(&t1);CHKERRQ(ierr);
  for (i=0; i<2000; i++) {  x[z[i]] = y[i]; }
  ierr = PetscGetTime(&t2);CHKERRQ(ierr);
  fprintf(stdout,"%-27s : %e sec\n","x[z[i]] = y[i]",(t2-t1)/2000.0);

  /* ierr = BlastCache();CHKERRQ(ierr); */

  ierr = PetscGetTime(&t1);CHKERRQ(ierr);
  for (i=0; i<2000; i++) {  y[z[i]] = x[zi[i]]; }
  ierr = PetscGetTime(&t2);CHKERRQ(ierr);
  fprintf(stdout,"%-27s : %e sec\n","x[z[i]] = y[zi[i]]",(t2-t1)/2000.0);


  PetscRandomDestroy(r);
  PetscFunctionReturn(0);
}
Example #7
0
int main(int argc,char **args)
{
  PetscInt       rank,size,npt;  
  PetscScalar    dx,dy,cx,cy;
  PetscErrorCode ierr;
  Vec            x,x0,tempvec, *vinda,*vindb,*vindc;
  PetscInt       i,j,k,n,n2,pmax,puse,Istart,Iend,localsize,niter;
  PetscScalar    **x0array, **aarray,**barray;
  PetscInt       *cacheInt;
  PetscScalar    *cacheScalar;  
  DA             myDA;

  PetscScalar    *Mixnorm;
  PetscInt       iter,*iterind,*nind;
  FILE           *fidoutput, *fidtimelog;   
  char           fname[50],ftimelog[50];
  PetscViewer    socketviewer; 
  PetscInt       withMatlab, doFFT, doSmoothing;
  PetscTruth     Matlabflag, FFTflag, Smoothingflag;
  PetscInt       timelogcount;  
  MPI_Status     status;

  PetscLogDouble v1,v2,elapsed_time;
  timelogcount = 0;
 
 
  PetscInitialize(&argc,&args,(char *)0,help);
  MPI_Comm_size(PETSC_COMM_WORLD,&size);
  MPI_Comm_rank(PETSC_COMM_WORLD,&rank);

  ierr = PetscPrintf(PETSC_COMM_WORLD,"\nPETSC: Petsc Initializes successfully! \n");
  ierr = PetscPrintf(PETSC_COMM_WORLD,"PETSC: comm_size is %d \n", size);
 
  ierr = PetscOptionsGetInt(PETSC_NULL,"-withMatlab",&withMatlab,&Matlabflag);CHKERRQ(ierr);  
  if (Matlabflag == PETSC_FALSE){withMatlab = 0;}else{withMatlab = 1;}
  ierr = PetscOptionsGetInt(PETSC_NULL,"-doFFT",&doFFT,&FFTflag);CHKERRQ(ierr);  
  if (FFTflag == PETSC_FALSE){doFFT = 0;}else{doFFT = 1;}

  ierr = PetscOptionsGetInt(PETSC_NULL,"-doSmoothing",&doSmoothing,&Smoothingflag);CHKERRQ(ierr);  
  if (Smoothingflag == PETSC_FALSE){doSmoothing = 0;}else{doSmoothing = 1;}

  if(withMatlab==1){
  // Rank 0 connects to socket, use default socket
  PetscViewerSocketOpen(PETSC_COMM_WORLD,0,PETSC_DEFAULT,&socketviewer);  
  ierr = PetscPrintf(PETSC_COMM_WORLD,"PETSC: socket opened! \n");CHKERRQ(ierr); 

  // Receive n from Matlab
  IntReceive(socketviewer, &nind);
  n = *nind;
  //  Receive iter from Matlab
  IntReceive(socketviewer, &iterind);
  iter = *iterind;
 
  }else{
  ierr = PetscOptionsGetInt(PETSC_NULL,"-ngrid",&n,PETSC_NULL);CHKERRQ(ierr);
  ierr = PetscOptionsGetInt(PETSC_NULL,"-niter",&iter,PETSC_NULL);CHKERRQ(ierr);
  }

  
 
/////////////////////////////////////////////////////////////////////////////////////

  ierr = PetscPrintf(PETSC_COMM_WORLD,"PETSC: number of grid is %d \n", n);
  ierr = PetscPrintf(PETSC_COMM_WORLD,"PETSC: number of iteration is %d \n", iter);


  Mixnorm    = malloc(iter*sizeof(PetscScalar));

  dx         = 1.0/n;
  dy         = 1.0/n;
  n2         = (PetscInt)(n*0.5);
  npt        = 5;
  pmax       = 5e5;
  puse       = pmax;


  PetscInt      logmax = 1000;
  PetscScalar   Timelog[logmax];
  PetscLogDouble t1,t2;

  ierr = PetscPrintf(PETSC_COMM_WORLD,"PETSC: estimated buffer size (per processer) %f Mbytes \n", pmax*1.0/1e6*8*17 );
  ierr = PetscPrintf(PETSC_COMM_WORLD,"PETSC: estimated variable size %f Mbytes\n", 1.0*n*n/1e6*8*1);

/////////////////////////////////////////////////////////////////////////////////////
  
 // ierr  = VecCreateMPI(PETSC_COMM_WORLD,PETSC_DECIDE ,n,&tempvec);CHKERRQ(ierr);
 // ierr  = VecGetOwnershipRange(tempvec,&Istart,&Iend);CHKERRQ(ierr); 
 // localsize = Iend-Istart;
 // ierr = VecDestroy(tempvec);CHKERRQ(ierr);


/////////////////////////////////////////////////////////////////////////////////////

if(doSmoothing==1){
      ierr = PetscPrintf(PETSC_COMM_WORLD,"\n\n\n\n\nPETSC: Now Do  DACreate2d \n\n\n\n" );
      ierr = PetscPrintf(PETSC_COMM_WORLD,"\n\n\n\n\nPETSC: %d  %d  %d\n\n\n\n",n2,n,size);
      DACreate2d(MPI_COMM_WORLD,DA_XYPERIODIC,DA_STENCIL_BOX,n2,n,1,size,1,2,PETSC_NULL,PETSC_NULL,&myDA);
      DACreateGlobalVector(myDA,&x0);
      DAGetCorners(myDA,PETSC_NULL,&Istart,PETSC_NULL,PETSC_NULL,&localsize,PETSC_NULL);
      Iend = Istart+localsize;
}else{
      ierr  = VecCreateMPI(PETSC_COMM_WORLD,PETSC_DECIDE ,n,&tempvec);CHKERRQ(ierr);
      ierr  = VecGetOwnershipRange(tempvec,&Istart,&Iend);CHKERRQ(ierr); 
      localsize = Iend-Istart;
      ierr = VecDestroy(tempvec);CHKERRQ(ierr);
      VecCreateMPI(PETSC_COMM_WORLD,localsize*n2,PETSC_DETERMINE ,&x0);
}


//ierr = PetscPrintf(PETSC_COMM_WORLD,"\n\n\n\n\nPETSC: So far so good\n\n\n\n");



VecGetArray2d(x0,n2,localsize,0,0,&x0array);

// Create initial vector

   
   for(j=0;j<localsize;j++){
         for(i=0;i<n2;i++){
             cx = (Istart+j+0.5)*dx;  
             x0array[i][j] = cos(2*M_PI*cx);
        }
    }

    
VecRestoreArray2d(x0,n2,localsize,0,0,&x0array);


    ierr = VecDuplicate(x0,&x);CHKERRQ(ierr); 
    ierr =  VecNorm(x0,NORM_2,Mixnorm); CHKERRQ(ierr);  
    PetscPrintf(PETSC_COMM_WORLD,"PETSC: initial norm= %f \n",*(Mixnorm+0)/n ); 
    vinda = &x0;
    vindb = &x;

    sprintf(fname, "mixnorm_%d_%d",n,iter);
    ierr =PetscPrintf(PETSC_COMM_WORLD,"\n iter     norm      time      unit time\n");CHKERRQ(ierr);
    ierr =PetscFOpen(PETSC_COMM_WORLD,fname,"w",&fidoutput);CHKERRQ(ierr);

///////////////////////////////////////////////////////////////////////////////////////////////////
// Memory allocation for the iteration scheme 

 //   cacheInt    = malloc(1*pmax*sizeof(PetscInt));
 //   cacheScalar = malloc(2*pmax*sizeof(PetscScalar));

   cacheInt    = malloc(2*pmax*sizeof(PetscInt));
   cacheScalar = malloc(2*pmax*sizeof(PetscScalar));
///////////////////////////////////////////////////////////////////////////////////////////////////
// Iteration here!

for(niter=0;niter<iter;niter++){

  ierr = PetscGetTime(&v1);CHKERRQ(ierr);
 // BackwardAverage(vinda, vindb, cacheInt, cacheScalar, n,  npt, pmax, Istart,Iend);
 // BackwardAverageR(vinda, vindb, cacheInt, cacheScalar, n,  npt, pmax, Istart,Iend);
   BackwardAverageRL(vinda, vindb, cacheInt, cacheScalar, n,  npt, pmax, Istart,Iend);
   vindc = vindb;
   vindb = vinda;
   vinda = vindc;   
// if(doSmoothing==1){Smoothing(vinda, vindb,n, myDA, Istart,Iend);}
  ierr = PetscGetTime(&v2);CHKERRQ(ierr);
   //vindc = vindb;
   //vindb = vinda;
   //vinda = vindc;   
  
   ierr =  VecNorm(*vinda,NORM_2,Mixnorm+niter); CHKERRQ(ierr); 
   *(Mixnorm+niter) = *(Mixnorm+niter)/n; 
      
   elapsed_time = v2 - v1; 
   PetscPrintf(PETSC_COMM_WORLD,"     %d   %f   %f  %f \n",niter,*(Mixnorm+niter),elapsed_time,elapsed_time/n/n*1e6 );
   PetscFPrintf(PETSC_COMM_WORLD,fidoutput,"    %d   %f   %f  %f\n"
                ,niter,*(Mixnorm+niter),elapsed_time,elapsed_time/n/n*1e6 ); 
}

////////////////////////////////////////////////////////////////////////////////////////////////////
//Change oremtation of vector
VecGetArray2d(*vinda,n2,localsize,0,0,&aarray);
VecGetArray2d(*vindb,localsize,n2,0,0,&barray);
  for(j=0;j<localsize;j++){
       for(i=0;i<n2;i++){
             barray[j][i] = aarray[i][j];       
        }
    }  
VecRestoreArray2d(*vinda,n2,localsize,0,0,&aarray);
VecRestoreArray2d(*vindb,localsize,n2,0,0,&barray);
   vindc = vindb;
   vindb = vinda;
   vinda = vindc;  

////////////////////////////////////////////////////////////////////////////////////////////////////
// FFT part
if(doFFT==1){FFT2D(*vinda,*vindb, localsize, n, Istart,Iend, iter,doSmoothing);}
////////////////////////////////////////////////////////////////////////////////////////////////////
/*
   if(rank==0){
   sprintf(ftimelog, "timelog_%d_%d",n,iter);
   fidtimelog = fopen(ftimelog,"w");
   for(i=0;i<timelogcount;i++){  fprintf(fidtimelog,"%f ",Timelog[i]); }  
   fprintf(fidtimelog,"\n ");  

   for(j = 1;j<size;j++){
     MPI_Recv(Timelog,timelogcount,MPI_DOUBLE,j,j,PETSC_COMM_WORLD,&status);
      for(i=0;i<timelogcount;i++){  fprintf(fidtimelog,"%f ",Timelog[i]); }
      fprintf(fidtimelog,"\n ");
     }
   fclose(fidtimelog);
   }else{ 
   MPI_Send(Timelog ,timelogcount,MPI_DOUBLE,0,rank,PETSC_COMM_WORLD);
   } 
   PetscFClose(PETSC_COMM_WORLD,fidoutput);
*/ 
///////////////////////////////////////////////////////////////////////////

    if(withMatlab==1){
     VecView(*vinda,socketviewer);
     PetscScalarView(iter,Mixnorm,socketviewer);
    }
 

  // free(x0array); 
   free(Mixnorm);
   free(cacheInt);
   free(cacheScalar);
 
   ierr = VecDestroy(x0);CHKERRQ(ierr);
   ierr = VecDestroy(x);CHKERRQ(ierr);
   
 
  PetscPrintf(PETSC_COMM_WORLD,"Done!");
  
//////////////////////////////////////////////////////////////////////////////////////
  ierr = PetscFinalize();CHKERRQ(ierr);
Example #8
0
PetscErrorCode MatPtAPNumeric_SeqAIJ_SeqAIJ_SparseAxpy(Mat A,Mat P,Mat C) 
{
  PetscErrorCode    ierr;
  Mat_SeqAIJ        *a  = (Mat_SeqAIJ *) A->data;
  Mat_SeqAIJ        *p  = (Mat_SeqAIJ *) P->data; 
  Mat_SeqAIJ        *c  = (Mat_SeqAIJ *) C->data;
  const PetscInt    *ai=a->i,*aj=a->j,*pi=p->i,*pj=p->j,*ci=c->i,*cj=c->j;
  const PetscScalar *aa=a->a,*pa=p->a,*pval;
  const PetscInt    *apj,*pcol,*cjj;
  const PetscInt    am=A->rmap->N,cm=C->rmap->N;
  PetscInt          i,j,k,anz,apnz,pnz,prow,crow,apcol,nextap;
  PetscScalar       *apa,*ca=c->a,*caj,pvalj;
  Mat_PtAP          *ptap = c->ptap;
#if defined(PROFILE_MatPtAPNumeric)
  PetscLogDouble    t0,tf,time_Cseq0=0.0,time_Cseq1=0.0;
  PetscInt          flops0=0,flops1=0;
#endif

  PetscFunctionBegin;
  /* Get temporary array for storage of one row of A*P */
  apa = ptap->apa;
  
  /* Clear old values in C */
  ierr = PetscMemzero(ca,ci[cm]*sizeof(MatScalar));CHKERRQ(ierr);

  for (i=0;i<am;i++) {
    /* Form sparse row of AP[i,:] = A[i,:]*P */
#if defined(PROFILE_MatPtAPNumeric)
    ierr = PetscGetTime(&t0);CHKERRQ(ierr);
#endif
    anz  = ai[i+1] - ai[i];
    apnz = 0;
    for (j=0; j<anz; j++) {
      prow = aj[j];
      pnz  = pi[prow+1] - pi[prow];
      pcol = pj + pi[prow];
      pval = pa + pi[prow];
      for (k=0; k<pnz; k++) {
        apa[pcol[k]] += aa[j]*pval[k];
      }
      ierr = PetscLogFlops(2.0*pnz);CHKERRQ(ierr);
#if defined(PROFILE_MatPtAPNumeric)
      flops0 += 2.0*pnz;
#endif
    }
    aj += anz; aa += anz;
#if defined(PROFILE_MatPtAPNumeric)
    ierr = PetscGetTime(&tf);CHKERRQ(ierr);
    time_Cseq0 += tf - t0;
#endif
    
    /* Compute P^T*A*P using outer product P[i,:]^T*AP[i,:]. */
#if defined(PROFILE_MatPtAPNumeric)
    ierr = PetscGetTime(&t0);CHKERRQ(ierr);
#endif
    apj  = ptap->apj + ptap->api[i]; 
    apnz = ptap->api[i+1] - ptap->api[i];
    pnz  = pi[i+1] - pi[i];
    pcol = pj + pi[i];
    pval = pa + pi[i];
   
    /* Perform sparse axpy */
    for (j=0; j<pnz; j++) {
      crow   = pcol[j]; 
      cjj    = cj + ci[crow];
      caj    = ca + ci[crow];
      pvalj  = pval[j];
      nextap = 1;
      apcol  = apj[nextap];
      for (k=0; nextap<apnz; k++) {
        if (cjj[k] == apcol) {
          caj[k] += pvalj*apa[apcol];
          apcol   = apj[nextap++];
        }
      }
      ierr = PetscLogFlops(2.0*apnz);CHKERRQ(ierr);
#if defined(PROFILE_MatPtAPNumeric)
      flops1 += 2.0*apnz;
#endif
    }
#if defined(PROFILE_MatPtAPNumeric)
    ierr = PetscGetTime(&tf);CHKERRQ(ierr);
    time_Cseq1 += tf - t0;
#endif
    
    /* Zero the current row info for A*P */
    for (j=0; j<apnz; j++) {
      apcol      = apj[j];
      apa[apcol] = 0.;
    }
  }

  /* Assemble the final matrix and clean up */
  ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
  ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
#if defined(PROFILE_MatPtAPNumeric)
  printf("MatPtAPNumeric_SeqAIJ_SeqAIJ_SparseAxpy time %g + %g, flops %d %d\n",time_Cseq0,time_Cseq1,flops0,flops1);
#endif
  PetscFunctionReturn(0);
}
Example #9
0
int main ( int argc, char* argv[] )
{
	/* parse command line arguments */
	std::string anArg;
	std::string modelRoot;
	#if defined(USE_MPI) || defined(USE_MPI_PARPROC) || defined(USE_MPI_REGSOIL) || \
		defined(USE_MPI_GEMS) || defined(USE_MPI_KRC)
		int nb_ddc=0; //number of cores for DDC related processes
	#endif

	for( int i = 1; i < argc; i++ )
	{
		anArg = std::string( argv[i] );
		if( anArg == "--help" || anArg == "-h")
		{
			std::cout << "Usage: ogs [MODEL_ROOT] [OPTIONS]\n"
			          << "Where OPTIONS are:\n"
			          << "  -h [--help]               print this message and exit\n"
			          << "  -b [--build-info]         print build info and exit\n"
			          << "  --output-directory DIR    put output files into DIR\n"
			          << "  --version                 print ogs version and exit" << "\n";
			continue;
		}
		if( anArg == "--build-info" || anArg == "-b" )
		{
			std::cout << "ogs version: " << BuildInfo::OGS_VERSION << "\n"
			          << "ogs date: " << BuildInfo::OGS_DATE << "\n";
			std::cout << "git commit info: " << BuildInfo::GIT_COMMIT_INFO << "\n";
			std::cout << "build timestamp: " << BuildInfo::BUILD_TIMESTAMP << "\n";
			continue;
		}
		if( anArg == "--version" )
		{
			std::cout << BuildInfo::OGS_VERSION << "\n";
			continue;
		}
		if( anArg == "--model-root" || anArg == "-m" )
		{
			if (i+1 >= argc) {
				std::cerr << "Error: Parameter " << anArg << " needs an additional argument" << std::endl;
				std::exit(EXIT_FAILURE);
			}
			modelRoot = std::string( argv[++i] );
			continue;
		}
		if (anArg == "--output-directory")
		{
			if (i+1 >= argc) {
				std::cerr << "Error: Parameter " << anArg << " needs an additional argument" << std::endl;
				std::exit(EXIT_FAILURE);
			}
			std::string path = argv[++i];

			if (! path.empty()) defaultOutputPath = path;
			continue;
		}
#if defined(USE_MPI) || defined(USE_MPI_PARPROC) || defined(USE_MPI_REGSOIL) || \
	defined(USE_MPI_GEMS) || defined(USE_MPI_KRC)
		std::string decompositions;
		if( anArg == "--domain-decomposition" || anArg == "-ddc" )
		{
			decompositions = std::string( argv[++i] );
			nb_ddc = atoi(decompositions.c_str());
			continue;
		}
#endif
		// anything left over must be the model root, unless already found
		if ( modelRoot == "" )
			modelRoot = std::string( argv[i] );
	} // end of parse argc loop

	if( argc > 1 && modelRoot == "" ) // non-interactive mode and no model given
		exit(0);             // e.g. just wanted the build info

	std::string solver_pkg_name = BuildInfo::SOLVER_PACKAGE_NAME;
	// No default linear solver package is in use.
	if(solver_pkg_name.find("Default") == std::string::npos)
	{
		std::cout << "\nWarning: " << solver_pkg_name
		<< " other than the OGS default one is in use." <<std::endl;
		std::cout << "         The solver setting may need to be adjusted for the solution accuracy!" << std::endl;
	}

	char* dateiname(NULL);
#ifdef SUPERCOMPUTER
// *********************************************************************
// buffered output ... important for performance on cray
// (unbuffered output is limited to 10 bytes per second)
// [email protected] 11.10.2007

	char buf[1024 * 1024];
	int bsize;

	bsize = 1024 * 1024; // question: what happens if buffer is full?
	                     // according to documentation the buffer is flushed when full.
	                     // If we have a lot of output, increasing buffer is usefull.
	if(bsize > 0)
//        bufstd = malloc(bsize);
		setvbuf(stdout, buf, _IOFBF, bsize);
	//**********************************************************************
#endif
/*---------- MPI Initialization ----------------------------------*/
#if defined(USE_MPI) || defined(USE_MPI_PARPROC) || defined(USE_MPI_REGSOIL) || \
	defined(USE_MPI_GEMS) || defined(USE_MPI_KRC)
	printf("Before MPI_Init\n");
#if defined(USE_MPI_GEMS)
	int prov;
	MPI_Init_thread(&argc,&argv,MPI_THREAD_FUNNELED, &prov);
#else
	MPI_Init(&argc,&argv);
#endif
	MPI_Barrier (MPI_COMM_WORLD); // 12.09.2007 WW
	elapsed_time_mpi = -MPI_Wtime(); // 12.09.2007 WW
	bool splitcomm_flag;
	int np;
	MPI_Comm_size(MPI_COMM_WORLD, &np);
	splitcomm_flag = SplitMPI_Communicator::CreateCommunicator(MPI_COMM_WORLD, np, nb_ddc);
	time_ele_paral = 0.0;
#endif
/*---------- MPI Initialization ----------------------------------*/


#ifdef USE_PETSC
	int rank, r_size;
	PetscLogDouble v1,v2;
	char help[] = "OGS with PETSc \n";
	//PetscInitialize(argc, argv, help);
	PetscInitialize(&argc,&argv,(char *)0,help);
	//kg44 quick fix to compile PETSC with version PETSCV3.4
#ifdef USEPETSC34
       PetscTime(&v1);
#else
       PetscGetTime(&v1);
#endif
	MPI_Comm_rank(PETSC_COMM_WORLD, &rank);
	MPI_Comm_size(PETSC_COMM_WORLD, &r_size);
	PetscSynchronizedPrintf(PETSC_COMM_WORLD, "===\nUse PETSc solver");
	PetscSynchronizedPrintf(PETSC_COMM_WORLD, "Number of CPUs: %d, rank: %d\n", r_size, rank);
#endif

/*---------- LIS solver -----------------------------------------*/
#ifdef LIS
	//Initialization of the lis solver.
	lis_initialize(&argc, &argv);
#endif
/*========================================================================*/
/* Kommunikation mit Betriebssystem */
	/* Timer fuer Gesamtzeit starten */
#ifdef TESTTIME
	TStartTimer(0);
#endif
	/* Intro ausgeben */
#if defined(USE_MPI) //WW
	if(myrank == 0)
#endif
#ifdef USE_PETSC
        if(rank == 0 )
#endif

	DisplayStartMsg();
	/* Speicherverwaltung initialisieren */
	if (!InitMemoryTest())
	{
		DisplayErrorMsg("Fehler: Speicherprotokoll kann nicht erstellt werden!");
		DisplayErrorMsg("        Programm vorzeitig beendet!");
		return 1; // LB changed from 0 to 1 because 0 is indicating success
	}
	if( argc == 1 )               // interactive mode

		dateiname = ReadString();
	else                         // non-interactive mode
	{
		if ( argc == 2 )     // a model root was supplied
		{
			dateiname = (char*) Malloc((int)strlen(argv[1]) + 1);
			dateiname = strcpy(dateiname,argv[1]);
		}
		else                // several args supplied
		if( modelRoot != "")
		{
			dateiname = (char*) Malloc( (int) modelRoot.size() + 1 );
			dateiname = strcpy( dateiname, modelRoot.c_str() );
		}
		DisplayMsgLn(dateiname);
	}
	//WW  DisplayMsgLn("");
	//WW  DisplayMsgLn("");
	// ----------23.02.2009. WW-----------------

	// LB Check if file exists
	std::string tmpFilename = dateiname;
	tmpFilename.append(".pcs");
	if(!IsFileExisting(tmpFilename))
	{
		std::cout << " Error: Cannot find file " << dateiname << "\n";
		return 1;
	}

	// If no option is given, output files are placed in the same directory as the input files
	if (defaultOutputPath.empty()) defaultOutputPath = pathDirname(std::string(dateiname));

	FileName = dateiname;
	size_t indexChWin, indexChLinux;
	indexChWin = indexChLinux = 0;
	indexChWin = FileName.find_last_of('\\');
	indexChLinux = FileName.find_last_of('/');
	//
	if(indexChWin != std::string::npos)
		FilePath = FileName.substr(0,indexChWin) + "\\";
	else if(indexChLinux != std::string::npos)
		FilePath = FileName.substr(0,indexChLinux) + "/";
	// ---------------------------WW
	Problem* aproblem = new Problem(dateiname);
#ifdef USE_PETSC
	aproblem->setRankandSize(rank, r_size);
#endif
#if defined(USE_MPI) || defined(USE_MPI_PARPROC) || defined(USE_MPI_REGSOIL) || defined(USE_MPI_GEMS)  || defined(USE_MPI_KRC)
	aproblem->setRankandSize(myrank, mysize);

	if (myrank != MPI_UNDEFINED)
	{
#endif
	aproblem->Euler_TimeDiscretize();
	delete aproblem;
	aproblem = NULL;
#if defined(USE_MPI) || defined(USE_MPI_PARPROC) || defined(USE_MPI_REGSOIL) || defined(USE_MPI_GEMS)  || defined(USE_MPI_KRC)
	  }

	//sending killing signals to ranks of group_IPQC, only when the group exists
	if (splitcomm_flag == true){
		int signal = -1, rank_IPQC, mysize_IPQC = np - nb_ddc;
		for (int i=0; i< mysize_IPQC; i++){
			rank_IPQC = mysize + i;
			MPI_Send(&signal, 1, MPI_INT, rank_IPQC, 0, MPI_COMM_WORLD);
		}
 	  }

#endif


	if(ClockTimeVec.size()>0)
		ClockTimeVec[0]->PrintTimes();  //CB time
	DestroyClockTime();
#ifdef TESTTIME
#if defined(USE_MPI)
     if(myrank == 0)
#endif
#if defined(USE_PETSC)
     if(rank == 0)
#endif
	std::cout << "Simulation time: " << TGetTimer(0) << "s" << "\n";
#endif
	/* Abspann ausgeben */
/*--------- MPI Finalize ------------------*/
#if defined(USE_MPI) || defined(USE_MPI_PARPROC) || defined(USE_MPI_REGSOIL) || defined(USE_MPI_KRC)
	elapsed_time_mpi += MPI_Wtime(); // 12.09.2007 WW
	std::cout << "\n *** Total CPU time of parallel modeling: " << elapsed_time_mpi <<
	"\n";                                                                          //WW
	// Count CPU time of post time loop WW
	MPI_Finalize();
#endif
/*--------- MPI Finalize ------------------*/
/*--------- LIS Finalize ------------------*/
#ifdef LIS
	lis_finalize();
#endif
/*--------- LIS Finalize ------------------*/

	free(dateiname);

#ifdef USE_PETSC
	//kg44 quick fix to compile PETSC with version PETSCV3.4
#ifdef USEPETSC34
       PetscTime(&v2);
#else
       PetscGetTime(&v2);
#endif


   PetscPrintf(PETSC_COMM_WORLD,"\t\n>>Total elapsed time by using PETSC:%f s\n",v2-v1);

   PetscFinalize();
#endif

	return 0;
}
Example #10
0
void PETScLinearSolver::Solver()
{
  
   //TEST
#ifdef TEST_MEM_PETSC
   PetscLogDouble mem1, mem2;
   PetscMemoryGetCurrentUsage(&mem1);
#endif
 
  /* 
  //TEST
  PetscViewer viewer;
  PetscViewerASCIIOpen(PETSC_COMM_WORLD, "x.txt", &viewer);
  PetscViewerPushFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);
  PetscObjectSetName((PetscObject)x,"Solution");
  VecView(x, viewer);   
  */


   int its; 
   PetscLogDouble v1,v2;
   KSPConvergedReason reason;

   // #define PETSC34
   //kg44 quick fix to compile PETSC with version PETSCV3.4
#ifdef USEPETSC34
   PetscTime(&v1);
#else
   PetscGetTime(&v1);
#endif

#if (PETSC_VERSION_MAJOR == 3) && (PETSC_VERSION_MINOR > 4)
   KSPSetOperators(lsolver, A, A);
#else
   KSPSetOperators(lsolver, A, A, DIFFERENT_NONZERO_PATTERN);
#endif

   KSPSolve(lsolver, b, x);
  
   KSPGetConvergedReason(lsolver,&reason); //CHKERRQ(ierr);
   if (reason==KSP_DIVERGED_INDEFINITE_PC)
   {
     PetscPrintf(PETSC_COMM_WORLD,"\nDivergence because of indefinite preconditioner;\n");
     PetscPrintf(PETSC_COMM_WORLD,"Run the executable again but with -pc_factor_shift_positive_definite option.\n");
   }
   else if (reason<0)
   {
     PetscPrintf(PETSC_COMM_WORLD,"\nOther kind of divergence: this should not happen.\n");
   }
   else 
   {
     const char *slv_type;
     const char *prc_type;
     KSPGetType(lsolver, &slv_type);
     PCGetType(prec, &prc_type);

      PetscPrintf(PETSC_COMM_WORLD,"\n================================================");         
      PetscPrintf(PETSC_COMM_WORLD, "\nLinear solver %s with %s preconditioner",
                                    slv_type, prc_type);         
      KSPGetIterationNumber(lsolver,&its); //CHKERRQ(ierr);
      PetscPrintf(PETSC_COMM_WORLD,"\nConvergence in %d iterations.\n",(int)its);
      PetscPrintf(PETSC_COMM_WORLD,"\n================================================");           
   }
   PetscPrintf(PETSC_COMM_WORLD,"\n");

   //VecAssemblyBegin(x);
   //VecAssemblyEnd(x);

   //kg44 quick fix to compile PETSC with version PETSCV3.4
#ifdef USEPETSC34
   PetscTime(&v2);
#else
   PetscGetTime(&v2);
#endif

   time_elapsed += v2-v1;

   
#define aTEST_OUT
#ifdef TEST_OUT
  //TEST
   PetscViewer viewer;
   PetscViewerASCIIOpen(PETSC_COMM_WORLD, "x2.txt", &viewer);
   PetscViewerPushFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);
   PetscObjectSetName((PetscObject)A,"Matrix");
   MatView(A, viewer);
   PetscObjectSetName((PetscObject)x,"Solution");
   VecView(x, viewer);
   PetscObjectSetName((PetscObject)b,"RHS");
   VecView(b, viewer);   
    VecDestroy(&b);
  VecDestroy(&x);
  MatDestroy(&A);
  if(lsolver) KSPDestroy(&lsolver);
  // if(prec) PCDestroy(&prec);
  if(global_x0)
    delete []  global_x0;
  if(global_x1)
    delete []  global_x1;
   PetscFinalize();
   exit(0);
#endif


#ifdef TEST_MEM_PETSC
  //TEST
   PetscMemoryGetCurrentUsage(&mem2);
   PetscPrintf(PETSC_COMM_WORLD, "###Memory usage by solver. Before :%f After:%f Increase:%d\n", mem1, mem2, (int)(mem2 - mem1));
#endif
}
Example #11
0
/* This function returns the suitable solver for pressure. linear system */
KSP Solver_get_pressure_solver(Mat lhs, Parameters *params) {

    KSP solver;
    PC pc;
    double rtol;
    int ierr;
    PetscLogDouble T1, T2;

    rtol = params->resmax;
    ierr = KSPCreate(PETSC_COMM_WORLD, &solver); PETScErrAct(ierr);

    short int hasnullspace = NO;
    if (hasnullspace) {

        MatNullSpace nullsp;
        MatNullSpaceCreate(PETSC_COMM_WORLD, PETSC_TRUE, 0, PETSC_NULL, &nullsp);
        KSPSetNullSpace(solver, nullsp);
        //MatNullSpaceDestroy(nullsp);
    }

    ierr = KSPSetOperators(solver, lhs, lhs, SAME_PRECONDITIONER); PETScErrAct(ierr);
    ierr = KSPSetType(solver, KSPGMRES); PETScErrAct(ierr);
    ierr = KSPGMRESSetRestart(solver, 20); PETScErrAct(ierr);
    ierr = KSPSetTolerances(solver, rtol, PETSC_DEFAULT, PETSC_DEFAULT, 300); PETScErrAct(ierr);

    ierr = KSPGetPC(solver, &pc); PETScErrAct(ierr);
    //PCSetType(pc, PCNONE);
    //PCSetType(pc, PCASM);

    PCSetType(pc, PCHYPRE);
    PCHYPRESetType(pc,"boomeramg");

    ierr = PetscOptionsSetValue("-pc_hypre_boomeramg_max_levels", "25"); PETScErrAct(ierr);
    ierr = PetscOptionsSetValue("-pc_hypre_boomeramg_strong_threshold", "0.0"); PETScErrAct(ierr);
    ierr = PetscOptionsSetValue("-pc_hypre_boomeramg_relax_type_all", "SOR/Jacobi"); PETScErrAct(ierr);

    //ierr = PetscOptionsSetValue("-pc_hypre_boomeramg_cycle_type", ""); PETScErrAct(ierr);
    //ierr = PetscOptionsSetValue("-pc_hypre_boomeramg_cycle_type", "V"); PETScErrAct(ierr);
    //ierr = PetscOptionsSetValue("-pc_hypre_boomeramg_coarsen_type", "PMIS"); PETScErrAct(ierr);
    //ierr = PetscOptionsSetValue("-pc_hypre_boomeramg_truncfactor", "0.9"); PETScErrAct(ierr);

    /*******************************************************************************************************/
    /*******************************************************************************************************/
    /*******************************************************************************************************/
    /* Hypre-Petsc Interface.
The most important parameters to be set are
1- Strong Threshold
2- Truncation Factor
3- Coarsennig Type
*/ 

    /* Between 0 to 1 */
    /* "0 "gives better convergence rate (in 3D). */
    /* Suggested values (By Hypre manual): 0.25 for 2D, 0.5 for 3D

 ierr = PetscOptionsSetValue("-pc_hypre_boomeramg_strong_threshold", "0.0"); PETScErrAct(ierr);
*/
    /*******************************************************************************************************/

    /* Available Options:
 "CLJP","Ruge-Stueben","modifiedRuge-Stueben","Falgout", "PMIS", "HMIS"
 Falgout is usually the best.
 ierr = PetscOptionsSetValue("-pc_hypre_boomeramg_coarsen_type", "Falgout"); PETScErrAct(ierr);
*/
    /*******************************************************************************************************/

    /* Availble options:
 "local", "global"

 ierr = PetscOptionsSetValue("-pc_hypre_boomeramg_measure_type", "local"); PETScErrAct(ierr);
*/
    /*******************************************************************************************************/

    /* Availble options:
 Jacobi,sequential-Gauss-Seidel,
 SOR/Jacobi,backward-SOR/Jacobi,symmetric-SOR/Jacobi,Gaussian-elimination

 Important: If you are using a symmetric KSP solver (like CG), you should use a symmetric smoother
 here.

 ierr = PetscOptionsSetValue("-pc_hypre_boomeramg_relax_type_all", "symmetric-SOR/Jacobi"); PETScErrAct(ierr);
*/
    /*******************************************************************************************************/

    /* Available options:
 "V", "W"

 ierr = PetscOptionsSetValue("-pc_hypre_boomeramg_cycle_type", "V"); PETScErrAct(ierr);
*/
    /*******************************************************************************************************/

    /* Availble options:
 "classical", "", "", "direct", "multipass", "multipass-wts", "ext+i", "ext+i-cc", "standard", "standard-wts", "", "", "FF", "FF1"

 ierr = PetscOptionsSetValue("-pc_hypre_boomeramg_interp_type", ""); PETScErrAct(ierr);
*/
    /*******************************************************************************************************/
    /* Available options:
Greater than zero.
Use zero for the best convergence. However, if you have memory problems, use greate than zero to save some
memory.

 ierr = PetscOptionsSetValue("-pc_hypre_boomeramg_truncfactor", "0.0"); PETScErrAct(ierr);
 */




    /*
Preconditioner Generation Options
PCSetType(pc,PCHYPRE) or -pc_type hypre
-pc_hypre_boomeramg_max_levels nmax
-pc_hypre_boomeramg_truncfactor
-pc_hypre_boomeramg_strong_threshold
-pc_hypre_boomeramg_max_row_sum
-pc_hypre_boomeramg_no_CF
-pc_hypre_boomeramg_coarsen_type CLJP,Ruge-Stueben,modifiedRuge-Stueben,
-pc_hypre_boomeramg_measure_type local,global


Preconditioner Iteration Options
-pc_hypre_boomeramg_relax_type_all Jacobi,sequential-Gauss-Seidel,
   SOR/Jacobi,backward-SOR/Jacobi,symmetric-SOR/Jacobi,Gaussian-eliminat
-pc_hypre_boomeramg_relax_type_fine
-pc_hypre_boomeramg_relax_type_down
-pc_hypre_boomeramg_relax_type_up
-pc_hypre_boomeramg_relax_weight_all r
-pc_hypre_boomeramg_outer_relax_weight_all r
-pc_hypre_boomeramg_grid_sweeps_down n
-pc_hypre_boomeramg_grid_sweeps_up n
-pc_hypre_boomeramg_grid_sweeps_coarse n
-pc_hypre_boomeramg_tol tol
-pc_hypre_boomeramg_max_iter it

*/



    /*
 //ierr = PCSetType(pc, PCASM); PETScErrAct(ierr);
 ierr = PCSetType(pc, PCNONE); PETScErrAct(ierr);

 //ierr = PCSetType(pc, PCILU); PETScErrAct(ierr);

 //ierr = PCSetType(pc, PCBJACOBI); PETScErrAct(ierr);
 //ierr = PCSetType(pc, PCLU); PETScErrAct(ierr);
 //ierr = PCSetType(pc, PCEISENSTAT); PETScErrAct(ierr);
 //ierr = PCSetType(pc, PCSOR); PETScErrAct(ierr);
 //ierr = PCSetType(pc, PCJACOBI); PETScErrAct(ierr);
 //ierr = PCSetType(pc, PCNONE); PETScErrAct(ierr);

*/
    /*
 ierr = KSPGetPC(solver, &pc); PETScErrAct(ierr);
 ierr = PCSetType(pc, PCILU); PETScErrAct(ierr);
 ierr = PCFactorSetLevels(pc, 3); PETScErrAct(ierr);

 //ierr = PCFactorSetUseDropTolerance(pc, 1e-3, .1, 50); PETScErrAct(ierr);
 //ierr = PCFactorSetFill(pc, 30.7648); PETScErrAct(ierr);

 ierr = PCFactorSetReuseOrdering(pc, PETSC_TRUE); PETScErrAct(ierr);
 ierr = PCFactorSetReuseFill(pc, PETSC_TRUE); PETScErrAct(ierr);
 ierr = PCFactorSetAllowDiagonalFill(pc); PETScErrAct(ierr);

 //ierr = PCFactorSetUseInPlace(pc); PETScErrAct(ierr);
*/
    ierr = KSPSetInitialGuessNonzero(solver, PETSC_TRUE); PETScErrAct(ierr);
    ierr = PetscGetTime(&T1);PETScErrAct(ierr);
    ierr = KSPSetFromOptions(solver); PETScErrAct(ierr);

    ierr = PetscGetTime(&T2);PETScErrAct(ierr);
    PetscPrintf(PCW, "Setup time for the Pressure solver was:%f\n", (T2 - T1));
    ierr = KSPView(solver, PETSC_VIEWER_STDOUT_WORLD); PETScErrAct(ierr);
    return solver;

}
Example #12
0
int main ( int argc, char* argv[] )
{
	/* parse command line arguments */
	std::string anArg;
	std::string modelRoot;
	for( int i = 1; i < argc; i++ )
	{
		anArg = std::string( argv[i] );
		if( anArg == "--help" || anArg == "-h")
		{
			std::cout << "Usage: ogs [MODEL_ROOT] [OPTIONS]\n"
			          << "Where OPTIONS are:\n"
			          << "  -h [--help]       print this message and exit\n"
			          << "  -b [--build-info] print build info and exit\n"
			          << "  --version         print ogs version and exit" << "\n";
			continue;
		}
		if( anArg == "--build-info" || anArg == "-b" )
		{
			std::cout << "ogs version: " << OGS_VERSION << "\n"
			          << "ogs date: " << OGS_DATE << "\n";
#ifdef CMAKE_CMD_ARGS
			std::cout << "cmake command line arguments: " << CMAKE_CMD_ARGS << "\n";
#endif // CMAKE_CMD_ARGS
#ifdef GIT_COMMIT_INFO
			std::cout << "git commit info: " << GIT_COMMIT_INFO << "\n";
#endif // GIT_COMMIT_INFO
#ifdef SVN_REVISION
			std::cout << "subversion info: " << SVN_REVISION << "\n";
#endif // SVN_REVISION
#ifdef BUILD_TIMESTAMP
			std::cout << "build timestamp: " << BUILD_TIMESTAMP << "\n";
#endif // BUILD_TIMESTAMP
			continue;
		}
		if( anArg == "--version" )
		{
			std::cout << OGS_VERSION << "\n";
			continue;
		}
		if( anArg == "--model-root" || anArg == "-m" )
		{
			modelRoot = std::string( argv[++i] );
			continue;
		}
		// anything left over must be the model root, unless already found
		if ( modelRoot == "" )
			modelRoot = std::string( argv[i] );
	} // end of parse argc loop

	if( argc > 1 && modelRoot == "" ) // non-interactive mode and no model given
		exit(0);             // e.g. just wanted the build info

	char* dateiname(NULL);
#ifdef SUPERCOMPUTER
// *********************************************************************
// buffered output ... important for performance on cray
// (unbuffered output is limited to 10 bytes per second)
// [email protected] 11.10.2007

	char buf[1024 * 1024];
	int bsize;

	bsize = 1024 * 1024; // question: what happens if buffer is full?
	                     // according to documentation the buffer is flushed when full.
	                     // If we have a lot of output, increasing buffer is usefull.
	if(bsize > 0)
//        bufstd = malloc(bsize);
		setvbuf(stdout, buf, _IOFBF, bsize);
	//**********************************************************************
#endif
/*---------- MPI Initialization ----------------------------------*/
#if defined(USE_MPI) || defined(USE_MPI_PARPROC) || defined(USE_MPI_REGSOIL) || \
	defined(USE_MPI_GEMS) || defined(USE_MPI_KRC) 
	printf("Before MPI_Init\n");
#if defined(USE_MPI_GEMS)
	int prov;
	MPI_Init_thread(&argc,&argv,MPI_THREAD_FUNNELED, &prov);
#else
	MPI_Init(&argc,&argv);
#endif
	MPI_Barrier (MPI_COMM_WORLD); // 12.09.2007 WW
	elapsed_time_mpi = -MPI_Wtime(); // 12.09.2007 WW
	MPI_Comm_size(MPI_COMM_WORLD,&mysize);
	MPI_Comm_rank(MPI_COMM_WORLD,&myrank);
	std::cout << "After MPI_Init myrank = " << myrank << '\n';
	time_ele_paral = 0.0;
#endif
/*---------- MPI Initialization ----------------------------------*/


#ifdef USE_PETSC
	int rank, r_size;
	PetscLogDouble v1,v2;
	char help[] = "OGS with PETSc \n";
	//PetscInitialize(argc, argv, help);
	PetscInitialize(&argc,&argv,(char *)0,help);
	//kg44 quick fix to compile PETSC with version PETSCV3.4
#ifdef USEPETSC34
       PetscTime(&v1);
#else
       PetscGetTime(&v1);
#endif
	MPI_Comm_rank(PETSC_COMM_WORLD, &rank);
	MPI_Comm_size(PETSC_COMM_WORLD, &r_size);
	PetscSynchronizedPrintf(PETSC_COMM_WORLD, "===\nUse PETSc solver");
	PetscSynchronizedPrintf(PETSC_COMM_WORLD, "Number of CPUs: %d, rank: %d\n", r_size, rank);
#endif




/*---------- LIS solver -----------------------------------------*/
#ifdef LIS
	//Initialization of the lis solver.
	lis_initialize(&argc, &argv);
#endif
/*========================================================================*/
/* Kommunikation mit Betriebssystem */
	/* Timer fuer Gesamtzeit starten */
#ifdef TESTTIME
	TStartTimer(0);
#endif
	/* Intro ausgeben */
#if defined(USE_MPI) //WW
	if(myrank == 0)
#endif
#ifdef USE_PETSC
        if(rank == 0 )
#endif

	DisplayStartMsg();
	/* Speicherverwaltung initialisieren */
	if (!InitMemoryTest())
	{
		DisplayErrorMsg("Fehler: Speicherprotokoll kann nicht erstellt werden!");
		DisplayErrorMsg("        Programm vorzeitig beendet!");
		return 1; // LB changed from 0 to 1 because 0 is indicating success
	}
	if( argc == 1 )               // interactive mode

		dateiname = ReadString();
	else                         // non-interactive mode
	{
		if ( argc == 2 )     // a model root was supplied
		{
			dateiname = (char*) Malloc((int)strlen(argv[1]) + 1);
			dateiname = strcpy(dateiname,argv[1]);
		}
		else                // several args supplied
		if( modelRoot != "")
		{
			dateiname = (char*) Malloc( (int) modelRoot.size() + 1 );
			dateiname = strcpy( dateiname, modelRoot.c_str() );
		}
		DisplayMsgLn(dateiname);
	}
	//WW  DisplayMsgLn("");
	//WW  DisplayMsgLn("");
	// ----------23.02.2009. WW-----------------

	// LB Check if file exists
	std::string tmpFilename = dateiname;
	tmpFilename.append(".pcs");
	if(!IsFileExisting(tmpFilename))
	{
		std::cout << " Error: Cannot find file " << dateiname << "\n";
		return 1;
	}

	FileName = dateiname;
	size_t indexChWin, indexChLinux;
	indexChWin = indexChLinux = 0;
	indexChWin = FileName.find_last_of('\\');
	indexChLinux = FileName.find_last_of('/');
	//
	if(indexChWin != std::string::npos)
		FilePath = FileName.substr(0,indexChWin) + "\\";
	else if(indexChLinux != std::string::npos)
		FilePath = FileName.substr(0,indexChLinux) + "/";
	// ---------------------------WW
	Problem* aproblem = new Problem(dateiname);
#ifdef USE_PETSC
	aproblem->setRankandSize(rank, r_size);
#endif
#if defined(USE_MPI) || defined(USE_MPI_PARPROC) || defined(USE_MPI_REGSOIL) || defined(USE_MPI_GEMS)  || defined(USE_MPI_KRC)
	aproblem->setRankandSize(myrank, mysize);
#endif
	
	aproblem->Euler_TimeDiscretize();
	delete aproblem;
	aproblem = NULL;
	if(ClockTimeVec.size()>0)
		ClockTimeVec[0]->PrintTimes();  //CB time
	DestroyClockTime();
#ifdef TESTTIME
#if defined(USE_MPI)
     if(myrank == 0)
#endif
#if defined(USE_PETSC) 
     if(rank == 0)
#endif
	std::cout << "Simulation time: " << TGetTimer(0) << "s" << "\n";
#endif
	/* Abspann ausgeben */
/*--------- MPI Finalize ------------------*/
#if defined(USE_MPI) || defined(USE_MPI_PARPROC) || defined(USE_MPI_REGSOIL) || defined(USE_MPI_KRC)
	elapsed_time_mpi += MPI_Wtime(); // 12.09.2007 WW
	std::cout << "\n *** Total CPU time of parallel modeling: " << elapsed_time_mpi <<
	"\n";                                                                          //WW
	// Count CPU time of post time loop WW
	MPI_Finalize();
#endif
/*--------- MPI Finalize ------------------*/
/*--------- LIS Finalize ------------------*/
#ifdef LIS
	lis_finalize();
#endif
/*--------- LIS Finalize ------------------*/

	free(dateiname);

#ifdef USE_PETSC
	//kg44 quick fix to compile PETSC with version PETSCV3.4
#ifdef USEPETSC34
       PetscTime(&v2);
#else
       PetscGetTime(&v2);
#endif


   PetscPrintf(PETSC_COMM_WORLD,"\t\n>>Total elapsed time by using PETSC:%f s\n",v2-v1);

   PetscFinalize();
#endif

	return 0;
}
Example #13
0
int main(int argc,char **args)
{
   Vec            u;
   Mat            A;

   PetscErrorCode ierr;

   mv_MultiVectorPtr          eigenvectors;
   PetscScalar  *             eigs;
   PetscScalar  *             eigs_hist;
   double *                   resid;
   double *                   resid_hist;
   int                        iterations;
   PetscMPIInt                rank;
   int                        n_eigs = 1;
   int                         seed = 1;
   int                         i,j;
   PetscLogDouble              t1,t2,elapsed_time;
   DA                          da;
   double                      tol=1e-08;
   PetscTruth                  option_present;
   PetscTruth                  freepart=PETSC_FALSE;
   PetscTruth                  full_output=PETSC_FALSE;
   PetscInt                    m,n,p;
   KSP                        ksp;
   lobpcg_Tolerance           lobpcg_tol;
   int                        maxIt = 100;
   mv_InterfaceInterpreter    ii;
   lobpcg_BLASLAPACKFunctions blap_fn;
   aux_data_struct            aux_data;
/*
   PetscViewer                viewer;
*/
   PetscInt                   tmp_int;
   mv_TempMultiVector * xe;
   PetscInt  N;
   PetscScalar * xx;

   PetscInitialize(&argc,&args,(char *)0,help);
   ierr = PetscOptionsGetInt(PETSC_NULL,"-n_eigs",&tmp_int,&option_present);CHKERRQ(ierr);
   if (option_present)
      n_eigs = tmp_int;
   ierr = PetscOptionsGetReal(PETSC_NULL,"-tol", &tol,PETSC_NULL); CHKERRQ(ierr);
   ierr = PetscOptionsHasName(PETSC_NULL,"-freepart",&freepart); CHKERRQ(ierr);
   ierr = PetscOptionsHasName(PETSC_NULL,"-full_out",&full_output); CHKERRQ(ierr);
   ierr = PetscOptionsGetInt(PETSC_NULL,"-seed",&tmp_int,&option_present);CHKERRQ(ierr);
   if (option_present)
      seed = tmp_int;
   ierr = PetscOptionsGetInt(PETSC_NULL,"-itr",&tmp_int,&option_present);CHKERRQ(ierr);
   if (option_present)
      maxIt = tmp_int;

   if (seed<1)
    seed=1;

  /* we actually run our code twice: first time we solve small problem just to make sure
    that all program code is actually loaded into memory; then we solve the problem
    we are interested in; this trick is done for accurate timing
  */
  PreLoadBegin(PETSC_TRUE,"grid and matrix assembly");

  /* "create" the grid and stencil data; on first run we form small problem */
  if (PreLoadIt==0)
  {
      /* small problem */
      ierr=DACreate3d(PETSC_COMM_WORLD,DA_NONPERIODIC,DA_STENCIL_STAR,10,10,10,
            1,PETSC_DECIDE,1,1,1,0,0,0,&da); CHKERRQ(ierr);
  }
  else
  {
     /* actual problem */
      if (freepart)     /* petsc determines partitioning */
      {
        ierr=DACreate3d(PETSC_COMM_WORLD,DA_NONPERIODIC,DA_STENCIL_STAR,-10,-10,-10,
            PETSC_DECIDE,PETSC_DECIDE,PETSC_DECIDE,1,1,0,0,0,&da); CHKERRQ(ierr);
      }
      else             /* (1,NP,1) partitioning */
      {
        ierr=DACreate3d(PETSC_COMM_WORLD,DA_NONPERIODIC,DA_STENCIL_STAR,-10,-10,-10,
            1,PETSC_DECIDE,1,1,1,0,0,0,&da); CHKERRQ(ierr);
      }

      /* now we print what partitioning is chosen */
      ierr=DAGetInfo(da,PETSC_NULL,PETSC_NULL,PETSC_NULL,PETSC_NULL,&m,
                      &n,&p,PETSC_NULL,PETSC_NULL,PETSC_NULL,PETSC_NULL); CHKERRQ(ierr);
      PetscPrintf(PETSC_COMM_WORLD,"Partitioning: %u %u %u\n",m,n,p);
  }

  /* create matrix, whose nonzero structure and probably partitioning corresponds to
  grid and stencil structure */
  ierr=DAGetMatrix(da,MATMPIAIJ,&A); CHKERRQ(ierr);

  /* fill the matrix with values. I intend to build 7-pt Laplas */
  /* this procedure includes matrix assembly */
  ierr=FillMatrix(da,A); CHKERRQ(ierr);

  /*
  PetscViewerBinaryOpen(PETSC_COMM_WORLD,"matrix.dat",FILE_MODE_WRITE,&viewer);
  MatView(A,PETSC_VIEWER_STDOUT_WORLD);
  PetscViewerDestroy(viewer);
  */

  /*
     Create parallel vectors.
      - We form 1 vector from scratch and then duplicate as needed.
  */

  ierr = DACreateGlobalVector(da,&u); CHKERRQ(ierr);
  /* ierr = VecSetFromOptions(u);CHKERRQ(ierr); */

  /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
                Create the linear solver and set various options
     - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */

/* Here we START measuring time for preconditioner setup */
  PreLoadStage("preconditioner setup");
  ierr = PetscGetTime(&t1);CHKERRQ(ierr);

  /*
     Create linear solver context
  */
  ierr = KSPCreate(PETSC_COMM_WORLD,&ksp);CHKERRQ(ierr);

  /*
     Set operators. Here the matrix that defines the linear system
     also serves as the preconditioning matrix.
  */
  ierr = KSPSetOperators(ksp,A,A,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);

  /*
    Set runtime options, e.g.,
        -ksp_type <type> -pc_type <type> -ksp_monitor -ksp_rtol <rtol>
    These options will override those specified above as long as
    KSPSetFromOptions() is called _after_ any other customization
    routines.
  */
   ierr = KSPSetFromOptions(ksp);CHKERRQ(ierr);

   /* probably this call actually builds the preconditioner */
   ierr = KSPSetUp(ksp);CHKERRQ(ierr);

/* Here we STOP measuring time for preconditioner setup */
   PreLoadStage("solution");

   ierr = PetscGetTime(&t2);CHKERRQ(ierr);
   elapsed_time=t2-t1;
   if (PreLoadIt==1)
    PetscPrintf(PETSC_COMM_WORLD,"Preconditioner setup, seconds: %f\n",elapsed_time);

   /* request memory for eig-vals */
   ierr = PetscMalloc(sizeof(PetscScalar)*n_eigs,&eigs); CHKERRQ(ierr);

   /* request memory for eig-vals history */
   ierr = PetscMalloc(sizeof(PetscScalar)*n_eigs*(maxIt+1),&eigs_hist); CHKERRQ(ierr);

   /* request memory for resid. norms */
   ierr = PetscMalloc(sizeof(double)*n_eigs,&resid); CHKERRQ(ierr);

   /* request memory for resid. norms hist. */
   ierr = PetscMalloc(sizeof(double)*n_eigs*(maxIt+1),&resid_hist); CHKERRQ(ierr);

   LOBPCG_InitRandomContext();

   MPI_Comm_rank(PETSC_COMM_WORLD,&rank);


   PETSCSetupInterpreter( &ii );
   eigenvectors = mv_MultiVectorCreateFromSampleVector(&ii, n_eigs,u);


   xe = (mv_TempMultiVector *) mv_MultiVectorGetData( eigenvectors );
   /*
   VecView( (Vec)xe->vector[0],PETSC_VIEWER_STDOUT_WORLD);
   */

   for (i=0; i<seed; i++) /* this cycle is to imitate changing random seed */
      mv_MultiVectorSetRandom (eigenvectors, 1234);
   /*
   VecView( (Vec)xe->vector[0],PETSC_VIEWER_STDOUT_WORLD);
   */

   VecGetSize( (Vec)xe->vector[0], &N );
   N=mv_TempMultiVectorHeight( xe );
   VecGetArray( (Vec)xe->vector[0],&xx);

   lobpcg_tol.absolute = tol;
   lobpcg_tol.relative = 1e-50;

   #ifdef PETSC_USE_COMPLEX
      blap_fn.zpotrf = PETSC_zpotrf_interface;
      blap_fn.zhegv = PETSC_zsygv_interface;
   #else
      blap_fn.dpotrf = PETSC_dpotrf_interface;
      blap_fn.dsygv = PETSC_dsygv_interface;
   #endif

   aux_data.A = A;
   aux_data.ksp = ksp;
   aux_data.ii = ii;

/* Here we START measuring time for solution process */
   ierr = PetscGetTime(&t1);CHKERRQ(ierr);

  #ifdef PETSC_USE_COMPLEX
   lobpcg_solve_complex(
              eigenvectors,             /*input-initial guess of e-vectors */
              &aux_data,                /*input-matrix A */
              OperatorAMultiVector,     /*input-operator A */
              NULL,                     /*input-matrix B */
              NULL,                     /*input-operator B */
              &aux_data,                /*input-matrix T */
              Precond_FnMultiVector,    /*input-operator T */
              NULL,                     /*input-matrix Y */
              blap_fn,                  /*input-lapack functions */
              lobpcg_tol,               /*input-tolerances */
              PreLoadIt? maxIt:1,       /*input-max iterations */
              !rank && PreLoadIt,       /*input-verbosity level */

              &iterations,              /*output-actual iterations */
              (komplex *) eigs,                     /*output-eigenvalues */
              (komplex *) eigs_hist,                /*output-eigenvalues history */
              n_eigs,                   /*output-history global height */
              resid,                    /*output-residual norms */
              resid_hist ,              /*output-residual norms history */
              n_eigs                    /*output-history global height  */
    );

   #else

   lobpcg_solve_double( eigenvectors,
                 &aux_data,
              OperatorAMultiVector,
              NULL,
              NULL,
              &aux_data,
              Precond_FnMultiVector,
              NULL,
              blap_fn,
              lobpcg_tol,
              PreLoadIt? maxIt:1,
              !rank && PreLoadIt,
              &iterations,

          eigs,                    /* eigenvalues; "lambda_values" should point to array
                                       containing <blocksize> doubles where <blocksize> is the
                                       width of multivector "blockVectorX" */

          eigs_hist,
                                      /* eigenvalues history; a pointer to the entries of the
                                         <blocksize>-by-(<maxIterations>+1) matrix stored in
                                         fortran-style. (i.e. column-wise) The matrix may be
                                         a submatrix of a larger matrix, see next argument */

              n_eigs,                  /* global height of the matrix (stored in fotran-style)
                                         specified by previous argument */

          resid,
                                      /* residual norms; argument should point to
                                         array of <blocksize> doubles */

          resid_hist ,
                                      /* residual norms history; a pointer to the entries of the
                                         <blocksize>-by-(<maxIterations>+1) matrix stored in
                                         fortran-style. (i.e. column-wise) The matrix may be
                                         a submatrix of a larger matrix, see next argument */
              n_eigs
                                      /* global height of the matrix (stored in fotran-style)
                                         specified by previous argument */
   );

   #endif

/* Here we STOP measuring time for solution process */
  ierr = PetscGetTime(&t2);CHKERRQ(ierr);
  elapsed_time=t2-t1;
  if (PreLoadIt)
   PetscPrintf(PETSC_COMM_WORLD,"Solution process, seconds: %e\n",elapsed_time);

  if (PreLoadIt && full_output)
  {
      PetscPrintf(PETSC_COMM_WORLD,"Output from LOBPCG, eigenvalues:\n");
      for (i=0;i<n_eigs;i++)
    {
        ierr = PetscPrintf(PETSC_COMM_WORLD,"%e\n",PetscRealPart(eigs[i]));
        CHKERRQ(ierr);
    }

      PetscPrintf(PETSC_COMM_WORLD,"Output from LOBPCG, eigenvalues history:\n");
      for (j=0; j<iterations+1; j++)
         for (i=0;i<n_eigs;i++)
         {
            ierr = PetscPrintf(PETSC_COMM_WORLD,"%e\n",PetscRealPart(*(eigs_hist+j*n_eigs+i)));
            CHKERRQ(ierr);
     }
      PetscPrintf(PETSC_COMM_WORLD,"Output from LOBPCG, residual norms:\n");
      for (i=0;i<n_eigs;i++)
    {
        ierr = PetscPrintf(PETSC_COMM_WORLD,"%e\n",resid[i]);
        CHKERRQ(ierr);
    }

      PetscPrintf(PETSC_COMM_WORLD,"Output from LOBPCG, residual norms history:\n");
      for (j=0; j<iterations+1; j++)
         for (i=0;i<n_eigs;i++)
         {
            ierr = PetscPrintf(PETSC_COMM_WORLD,"%e\n",*(resid_hist+j*n_eigs+i));
            CHKERRQ(ierr);
     }
   }
  /*
     Free work space.  All PETSc objects should be destroyed when they
     are no longer needed.
  */
   ierr = VecDestroy(u);CHKERRQ(ierr);
   ierr = MatDestroy(A);CHKERRQ(ierr);
   ierr = KSPDestroy(ksp);CHKERRQ(ierr);
   ierr = DADestroy(da); CHKERRQ(ierr);

   LOBPCG_DestroyRandomContext();
   mv_MultiVectorDestroy(eigenvectors);

   /* free memory used for eig-vals */
   ierr = PetscFree(eigs); CHKERRQ(ierr);
   ierr = PetscFree(eigs_hist); CHKERRQ(ierr);
   ierr = PetscFree(resid); CHKERRQ(ierr);
   ierr = PetscFree(resid_hist); CHKERRQ(ierr);

  /*
     Always call PetscFinalize() before exiting a program.  This routine
       - finalizes the PETSc libraries as well as MPI
       - provides summary and diagnostic information if certain runtime
         options are chosen (e.g., -log_summary).
  */

  PreLoadEnd();
  ierr = PetscFinalize();CHKERRQ(ierr);
  return 0;
}
Example #14
0
PetscErrorCode MatRARtSymbolic_SeqAIJ_SeqAIJ(Mat A,Mat R,PetscReal fill,Mat *C)
{
  PetscErrorCode      ierr;
  Mat                 P;
  PetscInt            *rti,*rtj;
  Mat_RARt            *rart;
  PetscContainer      container;
  MatTransposeColoring matcoloring;
  ISColoring           iscoloring;
  Mat                  Rt_dense,RARt_dense;
  PetscLogDouble       GColor=0.0,MCCreate=0.0,MDenCreate=0.0,t0,tf,etime=0.0;
  Mat_SeqAIJ           *c;

  PetscFunctionBegin;
  ierr = PetscGetTime(&t0);CHKERRQ(ierr);
  /* create symbolic P=Rt */
  ierr = MatGetSymbolicTranspose_SeqAIJ(R,&rti,&rtj);CHKERRQ(ierr);
  ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,R->cmap->n,R->rmap->n,rti,rtj,PETSC_NULL,&P);CHKERRQ(ierr);

  /* get symbolic C=Pt*A*P */
  ierr = MatPtAPSymbolic_SeqAIJ_SeqAIJ(A,P,fill,C);CHKERRQ(ierr);
  (*C)->rmap->bs = R->rmap->bs;
  (*C)->cmap->bs = R->rmap->bs;

  /* create a supporting struct */
  ierr = PetscNew(Mat_RARt,&rart);CHKERRQ(ierr);

  /* attach the supporting struct to C */
  ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
  ierr = PetscContainerSetPointer(container,rart);CHKERRQ(ierr);
  ierr = PetscContainerSetUserDestroy(container,PetscContainerDestroy_Mat_RARt);CHKERRQ(ierr);
  ierr = PetscObjectCompose((PetscObject)(*C),"Mat_RARt",(PetscObject)container);CHKERRQ(ierr);
  ierr = PetscContainerDestroy(&container);CHKERRQ(ierr);
  ierr = PetscGetTime(&tf);CHKERRQ(ierr);
  etime += tf - t0;

  /* Create MatTransposeColoring from symbolic C=R*A*R^T */
  c=(Mat_SeqAIJ*)(*C)->data;
  ierr = PetscGetTime(&t0);CHKERRQ(ierr);
  ierr = MatGetColoring(*C,MATCOLORINGLF,&iscoloring);CHKERRQ(ierr);
  ierr = PetscGetTime(&tf);CHKERRQ(ierr);
  GColor += tf - t0;

  ierr = PetscGetTime(&t0);CHKERRQ(ierr);
  ierr = MatTransposeColoringCreate(*C,iscoloring,&matcoloring);CHKERRQ(ierr);
  rart->matcoloring = matcoloring;
  ierr = ISColoringDestroy(&iscoloring);CHKERRQ(ierr);
  ierr = PetscGetTime(&tf);CHKERRQ(ierr);
  MCCreate += tf - t0;

  ierr = PetscGetTime(&t0);CHKERRQ(ierr);
  /* Create Rt_dense */
  ierr = MatCreate(PETSC_COMM_SELF,&Rt_dense);CHKERRQ(ierr);
  ierr = MatSetSizes(Rt_dense,A->cmap->n,matcoloring->ncolors,A->cmap->n,matcoloring->ncolors);CHKERRQ(ierr);
  ierr = MatSetType(Rt_dense,MATSEQDENSE);CHKERRQ(ierr);
  ierr = MatSeqDenseSetPreallocation(Rt_dense,PETSC_NULL);CHKERRQ(ierr);
  Rt_dense->assembled = PETSC_TRUE;
  rart->Rt            = Rt_dense;

  /* Create RARt_dense = R*A*Rt_dense */
  ierr = MatCreate(PETSC_COMM_SELF,&RARt_dense);CHKERRQ(ierr);
  ierr = MatSetSizes(RARt_dense,(*C)->rmap->n,matcoloring->ncolors,(*C)->rmap->n,matcoloring->ncolors);CHKERRQ(ierr);
  ierr = MatSetType(RARt_dense,MATSEQDENSE);CHKERRQ(ierr);
  ierr = MatSeqDenseSetPreallocation(RARt_dense,PETSC_NULL);CHKERRQ(ierr);
  rart->RARt = RARt_dense;

  /* Allocate work array to store columns of A*R^T used in MatMatMatMultNumeric_SeqAIJ_SeqAIJ_SeqDense() */
  ierr = PetscMalloc(A->rmap->n*4*sizeof(PetscScalar),&rart->work);CHKERRQ(ierr);

  ierr = PetscGetTime(&tf);CHKERRQ(ierr);
  MDenCreate += tf - t0;

  rart->destroy = (*C)->ops->destroy;
  (*C)->ops->destroy = MatDestroy_SeqAIJ_RARt;

  /* clean up */
  ierr = MatRestoreSymbolicTranspose_SeqAIJ(R,&rti,&rtj);CHKERRQ(ierr);
  ierr = MatDestroy(&P);CHKERRQ(ierr);

#if defined(PETSC_USE_INFO)
  {
  PetscReal density= (PetscReal)(c->nz)/(RARt_dense->rmap->n*RARt_dense->cmap->n);
  ierr = PetscInfo6(*C,"RARt_den %D %D; Rt_den %D %D, (RARt->nz %D)/(m*ncolors)=%g\n",RARt_dense->rmap->n,RARt_dense->cmap->n,Rt_dense->rmap->n,Rt_dense->cmap->n,c->nz,density);CHKERRQ(ierr);
  ierr = PetscInfo5(*C,"Sym = GetColor %g + MColorCreate %g + MDenCreate %g + other %g = %g\n",GColor,MCCreate,MDenCreate,etime,GColor+MCCreate+MDenCreate+etime);CHKERRQ(ierr);
  }
#endif
  PetscFunctionReturn(0);
}
/*
S^{-1} = ( G^T G )^{-1} G^T K G ( G^T G )^{-1}
       = A C A
S^{-T} = A^T (A C)^T
       = A^T C^T A^T, but A = G^T G which is symmetric
       = A C^T A
       = A G^T ( G^T K )^T A
       = A G^T K^T G A

*/
PetscErrorCode BSSCR_PCApplyTranspose_ScGtKG( PC pc, Vec x, Vec y )
{
	PC_SC_GtKG ctx = (PC_SC_GtKG)pc->data;
	
	KSP ksp;
	Mat F, Bt;
	Vec s,t,X;
	PetscLogDouble t0,t1;
	
	
	ksp = ctx->ksp_BBt;
	F = ctx->F;
	Bt = ctx->Bt;
	s = ctx->s;
	t = ctx->t;
	X = ctx->X;
	
	
	
	/* Apply scaled Poisson operator */
	/* scale x */ 
	/* ======================================================== 
	     NOTE:
	       I THINK TO OMIT THESE AS WE WANT TO UNSCALE THE 
	        PRECONDITIONER AS S IN THIS CASE IS NOT SCALED 
	======================================================== */
//	VecPointwiseDivide( x, x, ctx->X2 ); /* x <- x/X2 */  /* NEED TO BE SURE */
	
	if( ctx->BBt_has_cnst_nullspace == PETSC_TRUE ) {
		BSSCR_VecRemoveConstNullspace( x, PETSC_NULL );
	}
	PetscGetTime(&t0);
	KSPSolveTranspose( ksp, x, t ); /* t <- GtG_inv x */
	PetscGetTime(&t1);
	if (ctx->monitor_activated) {
		BSSCR_PCScBFBTSubKSPMonitor(ksp,1,(t1-t0));
	}
	
	/* Apply Bt */
	MatMult( Bt, t, s ); /* s <- G t */
	VecPointwiseMult( s, s, ctx->X1 ); /* s <- s * X1 */
	
	
	/* Apply F */
	VecPointwiseMult( s, s, ctx->Y1 ); /* s <- s * Y1 */
	MatMultTranspose( F, s, X ); /* X <- K s */
	VecPointwiseMult( X, X, ctx->X1 ); /* X <- X * X1 */
	
	/* Apply B */
	VecPointwiseMult( X, X, ctx->Y1 ); /* X <- X * Y1 */
	MatMultTranspose( Bt, X, t ); /* t <- Gt X */
	
	if( ctx->BBt_has_cnst_nullspace == PETSC_TRUE ) {
		BSSCR_VecRemoveConstNullspace( t, PETSC_NULL );
	}
	PetscGetTime(&t0);
	KSPSolveTranspose( ksp, t, y ); /* y <- GtG_inv t */
	PetscGetTime(&t1);
	if (ctx->monitor_activated) {
		BSSCR_PCScBFBTSubKSPMonitor(ksp,2,(t1-t0));
	}
	
	VecPointwiseMult( y, y, ctx->Y2 ); /* y <- y/Y2 */ 
	
	
	/* undo modification made to x on entry */
//	VecPointwiseMult( x, x, ctx->X2 ); /* x <- x/X2 */ /* NEED TO BE SURE */
	
	
	PetscFunctionReturn(0);
}
int main(int argc,char **args)
{

  /*PETSc Mat Object */
  Mat         pMat;
  /* Input matrix market file and output PETSc binary file */
  char        inputFile[128],outputFile[128],buf[128];

  /* number rows, columns, non zeros etc */
  int         i, m,n,nnz,ierr,col,row;

  /*We compute no of nozeros per row for PETSc Mat object pre-allocation*/  
  int *nnzPtr;
  /*Maximum nonzero in nay row */
  int maxNNZperRow=0;
  /*Row number containing max non zero elements */
  int maxRowNum = 0;
  /*Just no of comments that will be ignore during successive read of file */
  int numComments=0;

  /* This is  variable of type double */
  PetscScalar val;

  /*File handle for read and write*/
  FILE*       file;
  /*File handle for writing nonzero elements distribution per row */
  FILE 	      *fileRowDist;

   /*PETSc Viewer is used for writing PETSc Mat object in binary format */
   PetscViewer view;
  /*Just record time required for conversion */
  PetscLogDouble t1,t2,elapsed_time;

  /*Initialise PETSc lib */
  PetscInitialize(&argc,&args,(char *)0,PETSC_NULL);

  /* Just record time */
  ierr = PetscGetTime(&t1); CHKERRQ(ierr);

  /*Get name of matrix market file from command line options and Open file*/
  ierr = PetscOptionsGetString(PETSC_NULL,"-fin",inputFile,127,PETSC_NULL);
  ierr = PetscFOpen(PETSC_COMM_SELF,inputFile,"r",&file);

  /* Just count the comment lines in the file */
  while(1)
  {
  	fgets(buf,128,file);
        /*If line starts with %, its a comment */
        if(buf[0] == '%')
	{
	   printf("\n IGNORING COMMENT LINE : IGNORING....");
	   numComments++; 
	}
	else
	{
	   /*Set Pointer to Start of File */
	   fseek(file, 0, SEEK_SET );
           int num = numComments;

	   /* and just move pointer to the entry in the file which indicates row nums, col nums and non zero elements */
	   while(num--)
	   	fgets(buf,128,file);
	   break;
	}
  }

  /*Reads size of sparse matrix from matrix market file */
  fscanf(file,"%d %d %d\n",&m,&n,&nnz);
  printf ("ROWS = %d, COLUMNS = %d, NO OF NON-ZEROS = %d\n",m,n,nnz);

  /*Now we will calculate non zero elelments distribution per row */
  nnzPtr = (int *) calloc (sizeof(int),  m);

  /*This is similar to calculate histogram or frequency of elements in the array */
  for (i=0; !feof(file); i++) 
  {
  	  fscanf(file,"%d %d %le\n",&row,&col,&val);
	  row = row-1; col = col-1 ;
	  nnzPtr[row]++;
  }

  printf("\n ROW DISTRIBUTION CALCULATED....WRITING TO THE FILE..!");
  fflush(stdout);

  /*Write row distribution to the file ROW_STR.dat */
  fileRowDist =  fopen ("ROW_DISTR.dat", "w");
  for (i=0; i< m; i++)
  {
     fprintf(fileRowDist, "%d\t %d\n", i, nnzPtr[i]);
     /*Find max num of of nonzero for any row of the matrix and that row number */
     if( maxNNZperRow < nnzPtr[i] )
     {	  /*store max nonzero for any row*/
	  maxNNZperRow =  nnzPtr[i];
	  /*row that contains max non zero elements*/
          maxRowNum = i; 
          
     }
  }
  /*Close File */
  fclose(fileRowDist);

  printf("\n MAX NONZERO FOR ANY ROW ARE : %d & ROW NUM IS : %d", maxNNZperRow, maxRowNum );
  
  /* Again set the file pointer the fist data record in matrix market file*
   * Note that we can directly move ponts with fseek, but as this is text file 
   * we are simple reading line by line
   */
  fseek(file, 0, SEEK_SET );
  numComments++;
  while(numComments--)
	fgets(buf,128,file);


  /* Its important to pre-allocate memory by passing max non zero for any row in the matrix */
  ierr = MatCreateSeqAIJ(PETSC_COMM_WORLD,m,n,maxNNZperRow,PETSC_NULL,&pMat);
  /* OR we can also pass row distribution of nozero elements for every row */
  /* ierr = MatCreateSeqAIJ(PETSC_COMM_WORLD,m,n,0,nnzPtr,&pMat);*/

  /*Now Set matrix elements values form matrix market file */
  for (i=0; i<nnz; i++) 
  {
	    /*Read matrix element from matrix market file*/
	    fscanf(file,"%d %d %le\n",&row,&col,&val);
            /*In matrix market format, rows and columns starts from 1 */
	    row = row-1; col = col-1 ;
	    /* For every non zero element,insert that value at row,col position */	
	    ierr = MatSetValues(pMat,1,&row,1,&col,&val,INSERT_VALUES);
  }
  fclose(file);
  /*Matrix Read Complete */
  ierr = PetscPrintf(PETSC_COMM_SELF,"\n MATRIX READ...DONE!");

  /*Now assemeble the matrix */
  ierr = MatAssemblyBegin(pMat,MAT_FINAL_ASSEMBLY);
  ierr = MatAssemblyEnd(pMat,MAT_FINAL_ASSEMBLY);

  /* Now open output file for writing into PETSc Binary FOrmat*/
  ierr = PetscOptionsGetString(PETSC_NULL,"-fout",outputFile,127,PETSC_NULL);CHKERRQ(ierr);
  /*With the PETSc Viewer write output to File*/
  ierr = PetscViewerBinaryOpen(PETSC_COMM_WORLD,outputFile,FILE_MODE_WRITE,&view);CHKERRQ(ierr);
  /*Matview will dump the Mat object to binary file */
  ierr = MatView(pMat,view);CHKERRQ(ierr);

  /* Destroy the data structure */
  ierr = PetscViewerDestroy(&view);CHKERRQ(ierr);
  ierr = MatDestroy(&pMat);CHKERRQ(ierr);

  /*Just for statistics*/
  ierr = PetscGetTime(&t2);CHKERRQ(ierr);
  elapsed_time = t2 - t1;     
  ierr = PetscPrintf(PETSC_COMM_SELF,"ELAPSE TIME: %g\n",elapsed_time);CHKERRQ(ierr);

  ierr = PetscFinalize();CHKERRQ(ierr);
  return 0;
}
Example #17
0
int main(int argc,char **argv)
{
    PetscLogDouble  x,y,z;
    int             i,ierr;
    PetscScalar     *A,*B;

    PetscInitialize(&argc,&argv,0,0);

    ierr = PetscMalloc(8000000*sizeof(PetscScalar),&A);
    CHKERRQ(ierr);
    ierr = PetscMalloc(8000000*sizeof(PetscScalar),&B);
    CHKERRQ(ierr);

    for (i=0; i<8000000; i++) {
        A[i] = i%61897;
        B[i] = i%61897;
    }
    /* To take care of paging effects */
    ierr = PetscMemcpy(A,B,sizeof(PetscScalar)*0);
    CHKERRQ(ierr);
    ierr = PetscGetTime(&x);
    CHKERRQ(ierr);

    ierr = PetscGetTime(&x);
    CHKERRQ(ierr);
    /*
    PetscMemcpy(A,B,sizeof(PetscScalar)*8000000);
    PetscMemcpy(A,B,sizeof(PetscScalar)*8000000);
    PetscMemcpy(A,B,sizeof(PetscScalar)*8000000);
    PetscMemcpy(A,B,sizeof(PetscScalar)*8000000);
    PetscMemcpy(A,B,sizeof(PetscScalar)*8000000);
    PetscMemcpy(A,B,sizeof(PetscScalar)*8000000);
    PetscMemcpy(A,B,sizeof(PetscScalar)*8000000);
    PetscMemcpy(A,B,sizeof(PetscScalar)*8000000);
    PetscMemcpy(A,B,sizeof(PetscScalar)*8000000);
    PetscMemcpy(A,B,sizeof(PetscScalar)*8000000); */
    {   int j;
        for (j = 0; j<10; j++) {
            for (i=0; i<8000000; i++) {
                B[i] = A[i];
            }
        }
    }

    ierr = PetscGetTime(&y);
    CHKERRQ(ierr);
    ierr = PetscMemcpy(A,B,sizeof(PetscScalar)*0);
    CHKERRQ(ierr);
    ierr = PetscMemcpy(A,B,sizeof(PetscScalar)*0);
    CHKERRQ(ierr);
    ierr = PetscMemcpy(A,B,sizeof(PetscScalar)*0);
    CHKERRQ(ierr);
    ierr = PetscMemcpy(A,B,sizeof(PetscScalar)*0);
    CHKERRQ(ierr);
    ierr = PetscMemcpy(A,B,sizeof(PetscScalar)*0);
    CHKERRQ(ierr);
    ierr = PetscMemcpy(A,B,sizeof(PetscScalar)*0);
    CHKERRQ(ierr);
    ierr = PetscMemcpy(A,B,sizeof(PetscScalar)*0);
    CHKERRQ(ierr);
    ierr = PetscMemcpy(A,B,sizeof(PetscScalar)*0);
    CHKERRQ(ierr);
    ierr = PetscMemcpy(A,B,sizeof(PetscScalar)*0);
    CHKERRQ(ierr);
    ierr = PetscMemcpy(A,B,sizeof(PetscScalar)*0);
    CHKERRQ(ierr);
    ierr = PetscGetTime(&z);
    CHKERRQ(ierr);

    fprintf(stdout,"%s : \n","PetscMemcpy");
    fprintf(stdout,"    %-15s : %e MB/s\n","Bandwidth",10.0*8*8/(y-x));
    fprintf(stdout,"    %-15s : %e sec\n","Latency",(z-y)/10.0);
    fprintf(stdout,"    %-15s : %e sec\n","Per PetscScalar",(2*y-x-z)/8000000.0);

    ierr = PetscFree(A);
    CHKERRQ(ierr);
    ierr = PetscFree(B);
    CHKERRQ(ierr);
    ierr = PetscFinalize();
    CHKERRQ(ierr);
    PetscFunctionReturn(0);
}
Example #18
0
void PETSC_STDCALL  petscgettime_(PetscLogDouble *t, int *__ierr ){
*__ierr = PetscGetTime(t);
}
Example #19
0
PetscErrorCode MatLUFactorNumeric_SuperLU_DIST(Mat F,Mat A,const MatFactorInfo *info)
{
  Mat              *tseq,A_seq = PETSC_NULL;
  Mat_SeqAIJ       *aa,*bb;
  Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST*)(F)->spptr;
  PetscErrorCode   ierr;
  PetscInt         M=A->rmap->N,N=A->cmap->N,i,*ai,*aj,*bi,*bj,nz,rstart,*garray,
                   m=A->rmap->n, colA_start,j,jcol,jB,countA,countB,*bjj,*ajj;
  int              sinfo; /* SuperLU_Dist info flag is always an int even with long long indices */
  PetscMPIInt      size;
  SuperLUStat_t    stat;
  double           *berr=0;
  IS               isrow;
  PetscLogDouble   time0,time,time_min,time_max; 
  Mat              F_diag=PETSC_NULL;
#if defined(PETSC_USE_COMPLEX)
  doublecomplex    *av, *bv; 
#else
  double           *av, *bv; 
#endif

  PetscFunctionBegin;
  ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr);
  
  if (lu->options.PrintStat) { /* collect time for mat conversion */
    ierr = MPI_Barrier(((PetscObject)A)->comm);CHKERRQ(ierr);
    ierr = PetscGetTime(&time0);CHKERRQ(ierr);  
  }

  if (lu->MatInputMode == GLOBAL) { /* global mat input */
    if (size > 1) { /* convert mpi A to seq mat A */
      ierr = ISCreateStride(PETSC_COMM_SELF,M,0,1,&isrow);CHKERRQ(ierr);  
      ierr = MatGetSubMatrices(A,1,&isrow,&isrow,MAT_INITIAL_MATRIX,&tseq);CHKERRQ(ierr);
      ierr = ISDestroy(&isrow);CHKERRQ(ierr);
   
      A_seq = *tseq;
      ierr = PetscFree(tseq);CHKERRQ(ierr);
      aa =  (Mat_SeqAIJ*)A_seq->data;
    } else {
      PetscBool  flg;
      ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&flg);CHKERRQ(ierr);
      if (flg) {
        Mat_MPIAIJ *At = (Mat_MPIAIJ*)A->data;
        A = At->A;
      }
      aa =  (Mat_SeqAIJ*)A->data;
    }

    /* Convert Petsc NR matrix to SuperLU_DIST NC. 
       Note: memories of lu->val, col and row are allocated by CompRow_to_CompCol_dist()! */
    if (lu->options.Fact != DOFACT) {/* successive numeric factorization, sparsity pattern is reused. */
      Destroy_CompCol_Matrix_dist(&lu->A_sup);
      if (lu->FactPattern == SamePattern_SameRowPerm){
        lu->options.Fact = SamePattern_SameRowPerm; /* matrix has similar numerical values */
      } else { /* lu->FactPattern == SamePattern */
        Destroy_LU(N, &lu->grid, &lu->LUstruct); 
        lu->options.Fact = SamePattern;    
      }
    }
#if defined(PETSC_USE_COMPLEX)
    zCompRow_to_CompCol_dist(M,N,aa->nz,(doublecomplex*)aa->a,aa->j,aa->i,&lu->val,&lu->col, &lu->row);
#else
    dCompRow_to_CompCol_dist(M,N,aa->nz,aa->a,aa->j,aa->i,&lu->val, &lu->col, &lu->row);
#endif

    /* Create compressed column matrix A_sup. */
#if defined(PETSC_USE_COMPLEX)
    zCreate_CompCol_Matrix_dist(&lu->A_sup, M, N, aa->nz, lu->val, lu->col, lu->row, SLU_NC, SLU_Z, SLU_GE);
#else
    dCreate_CompCol_Matrix_dist(&lu->A_sup, M, N, aa->nz, lu->val, lu->col, lu->row, SLU_NC, SLU_D, SLU_GE);  
#endif
  } else { /* distributed mat input */
    Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data;  
    aa=(Mat_SeqAIJ*)(mat->A)->data;
    bb=(Mat_SeqAIJ*)(mat->B)->data;
    ai=aa->i; aj=aa->j; 
    bi=bb->i; bj=bb->j; 
#if defined(PETSC_USE_COMPLEX)
    av=(doublecomplex*)aa->a;   
    bv=(doublecomplex*)bb->a;
#else
    av=aa->a;
    bv=bb->a;
#endif
    rstart = A->rmap->rstart;
    nz     = aa->nz + bb->nz;
    garray = mat->garray;
   
    if (lu->options.Fact == DOFACT) {/* first numeric factorization */
#if defined(PETSC_USE_COMPLEX)
      zallocateA_dist(m, nz, &lu->val, &lu->col, &lu->row);
#else
      dallocateA_dist(m, nz, &lu->val, &lu->col, &lu->row);
#endif
    } else { /* successive numeric factorization, sparsity pattern and perm_c are reused. */
      /* Destroy_CompRowLoc_Matrix_dist(&lu->A_sup); */ /* this leads to crash! However, see SuperLU_DIST_2.5/EXAMPLE/pzdrive2.c */
      if (lu->FactPattern == SamePattern_SameRowPerm){
        lu->options.Fact = SamePattern_SameRowPerm; /* matrix has similar numerical values */
      } else {
        Destroy_LU(N, &lu->grid, &lu->LUstruct); /* Deallocate storage associated with the L and U matrices. */
        lu->options.Fact = SamePattern;
      }
    }
    nz = 0;
    for ( i=0; i<m; i++ ) {
      lu->row[i] = nz;
      countA = ai[i+1] - ai[i];
      countB = bi[i+1] - bi[i];
      ajj = aj + ai[i];  /* ptr to the beginning of this row */
      bjj = bj + bi[i];  

      /* B part, smaller col index */   
      colA_start = rstart + ajj[0]; /* the smallest global col index of A */  
      jB = 0;
      for (j=0; j<countB; j++){
        jcol = garray[bjj[j]];
        if (jcol > colA_start) {
          jB = j;
          break;
        }
        lu->col[nz] = jcol; 
        lu->val[nz++] = *bv++;
        if (j==countB-1) jB = countB; 
      }

      /* A part */
      for (j=0; j<countA; j++){
        lu->col[nz] = rstart + ajj[j]; 
        lu->val[nz++] = *av++;
      }

      /* B part, larger col index */      
      for (j=jB; j<countB; j++){
        lu->col[nz] = garray[bjj[j]];
        lu->val[nz++] = *bv++;
      }
    } 
    lu->row[m] = nz;
#if defined(PETSC_USE_COMPLEX)
    zCreate_CompRowLoc_Matrix_dist(&lu->A_sup, M, N, nz, m, rstart,lu->val, lu->col, lu->row, SLU_NR_loc, SLU_Z, SLU_GE);
#else
    dCreate_CompRowLoc_Matrix_dist(&lu->A_sup, M, N, nz, m, rstart,lu->val, lu->col, lu->row, SLU_NR_loc, SLU_D, SLU_GE);
#endif
  }
  if (lu->options.PrintStat) {
    ierr = PetscGetTime(&time);CHKERRQ(ierr);  
    time0 = time - time0;
  }

  /* Factor the matrix. */
  PStatInit(&stat);   /* Initialize the statistics variables. */
  if (lu->MatInputMode == GLOBAL) { /* global mat input */
#if defined(PETSC_USE_COMPLEX)
    pzgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, M, 0,&lu->grid, &lu->LUstruct, berr, &stat, &sinfo);
#else
    pdgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, M, 0,&lu->grid, &lu->LUstruct, berr, &stat, &sinfo);
#endif 
  } else { /* distributed mat input */
#if defined(PETSC_USE_COMPLEX)
    pzgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, m, 0, &lu->grid,&lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &sinfo);
    if (sinfo) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"pzgssvx fails, info: %d\n",sinfo);
#else
    pdgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, m, 0, &lu->grid,&lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &sinfo);
    if (sinfo) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"pdgssvx fails, info: %d\n",sinfo);
#endif
  }

  if (lu->MatInputMode == GLOBAL && size > 1){
    ierr = MatDestroy(&A_seq);CHKERRQ(ierr);
  }

  if (lu->options.PrintStat) {
    ierr = MPI_Reduce(&time0,&time_max,1,MPI_DOUBLE,MPI_MAX,0,((PetscObject)A)->comm);
    ierr = MPI_Reduce(&time0,&time_min,1,MPI_DOUBLE,MPI_MIN,0,((PetscObject)A)->comm);
    ierr = MPI_Reduce(&time0,&time,1,MPI_DOUBLE,MPI_SUM,0,((PetscObject)A)->comm);
    time = time/size; /* average time */
    ierr = PetscPrintf(((PetscObject)A)->comm, "        Mat conversion(PETSc->SuperLU_DIST) time (max/min/avg): \n                              %g / %g / %g\n",time_max,time_min,time);CHKERRQ(ierr);
    PStatPrint(&lu->options, &stat, &lu->grid);  /* Print the statistics. */
  }
  PStatFree(&stat);  
  if (size > 1){
    F_diag = ((Mat_MPIAIJ *)(F)->data)->A;
    F_diag->assembled = PETSC_TRUE; 
  }
  (F)->assembled    = PETSC_TRUE;
  (F)->preallocated = PETSC_TRUE;
  lu->options.Fact  = FACTORED; /* The factored form of A is supplied. Local option used by this func. only */
  PetscFunctionReturn(0);
}
Example #20
0
//int BenchmarkBoundaryChecks( int n, PetscLogDouble t_bulk, PetscLogDouble )
int PetscMain()
{
  int i,j,k, n=128;
  PetscLogDouble t1,t2,s1,s2;
  Vec U,V,W,DIV1,DIV2;
  PetscReal ***u,***v,***w,***div1, ***div2;
  DA da;
  DALocalInfo info;
  PetscErrorCode ierr;
  ierr = DACreate3d(PETSC_COMM_SELF,//MPI Communicator   
      DA_NONPERIODIC,   //DA_NONPERIODIC, DA_XPERIODIC, DA_YPERIODIC, DA_XYPERIODIC
      DA_STENCIL_STAR, //DA_STENCIL_BOX or DA_STENCIL_STAR
      n,n,n, //Global array dimension
      1,1,1,//Number procs per dim
      1,    //Number of chemical species
      1,           //stencil width
      0,0,0,       //specific array of nodes
      &da); CHKERRQ(ierr);
  DACreateGlobalVector(da,&U);
  DACreateGlobalVector(da,&V);
  DACreateGlobalVector(da,&W);
  DACreateGlobalVector(da,&DIV1);
  DACreateGlobalVector(da,&DIV2);
  VecSet(DIV1,0);
  VecSet(DIV2,0);
  DAVecGetArray(da,U,&u);
  DAVecGetArray(da,V,&v);
  DAVecGetArray(da,W,&w);
  DAVecGetArray(da,DIV1,&div1);
  DAVecGetArray(da,DIV2,&div2);
  DAGetLocalInfo(da,&info);
  PetscBarrier(0);
  for( k = 0; k < n; ++k)
  {
    for( j = 0; j < n; ++j)
    {
      for( i = 0; i < n; ++i)
      {
        u[k][j][i] = i * (i-n) * j * (j-n) * k * (k-n);
        v[k][j][i] = 1 - u[k][j][i];
        w[k][j][i] = u[k][j][i] * v[k][j][i];
      }
    }
  }
  PetscBarrier(0);
  PetscGetTime(&t1);
  for( k = 1; k < n-1; ++k)
  {
    for( j = 1; j < n-1; ++j)
    {
      for( i = 1; i < n-1; ++i)
      {
        div1[k][j][i] = u[k][j][i+1] - u[k][j][i-1] +
                        v[k][j+1][i] - v[k][j-1][i] +
                        w[k+1][j][i] - w[k-1][j][i];
        div1[k][j][i] /= 2;
      }
    }
  }
  PetscGetTime(&t2);
  PetscReal uE,uW,vN,vS,wF,wB;
  PetscReal hx,hy,hz;
  PetscBarrier(0);
  PetscGetTime(&s1);
  for( k = 1; k < n-1; ++k)
  {
    for( j = 1; j < n-1; ++j)
    {
      for( i = 1; i < n-1; ++i)
      {
/*        uE = i == info.mx-1 ? u[k][j][i] : u[k][j][i+1]; 
        uW = i == 0         ? u[k][j][i] : u[k][j][i-1];
        vN = j == info.my-1 ? v[k][j][i] : v[k][j+1][i];
        vS = j == 0         ? v[k][j][i] : v[k][j-1][i];
        wB = k == info.mz-1 ? w[k][j][i] : w[k+1][j][i];
        wF = k == 0         ? w[k][j][i] : w[k-1][j][i];
  */      
if( i == info.mx-1 ) { uE = u[k][j][i]; hx= 1;  }else{ uE = u[k][j][i+1];hx= 2;}
if( i == info.mx-1 ) { uE = u[k][j][i]; hx= 1;  }else{ uE = u[k][j][i+1];hx= 2;}
if( j == info.mx-1 ) { uE = u[k][j][i]; hy= 1;  }else{ uE = u[k][j][i+1];hy= 2;}
if( j == info.mx-1 ) { uE = u[k][j][i]; hy= 1;  }else{ uE = u[k][j][i+1];hy= 2;}
if( k == info.mx-1 ) { uE = u[k][j][i]; hz= 1;  }else{ uE = u[k][j][i+1];hz= 2;}
if( k == info.mx-1 ) { uE = u[k][j][i]; hz= 1;  }else{ uE = u[k][j][i+1];hz= 2;}

        div2[k][j][i] = uE - uW + vN - vS + wB - wF;
        div2[k][j][i] /= 2;
//        printf("%f\t%f\t%f\n",div1[k][j][i], div2[k][j][i], div2[k][j][i] - div1[k][j][i]);        
      }
    }
  }
  PetscGetTime(&s2);
  DAVecRestoreArray(da,DIV1,&div1);
  DAVecRestoreArray(da,DIV2,&div2);
  VecAXPY(DIV1,-1,DIV2);
  PetscReal norm;
  VecNorm(DIV1,NORM_2,&norm);
  
  printf("BULK: %f\nIF's: %f\nDIFF:\t%f\nRATIO:\t%f\nnorm: %f\n", (t2-t1), (s2-s1), (s2-s1)-(t2-t1),(s2-s1)/(t2-t1),norm);
}
int main(int argc,char **args)
{
  PetscInt       rank,size,npt;  
  PetscErrorCode ierr;
  Vec            x,y0,tempvec, *vinda,*vindb,*vindc;
  PetscInt       i,j,k,l,n,p,m,m2,pmax,puse,Istart,Iend,localsize,niter;

  PetscScalar    dx,dy,dx2,dy2;  
  PetscScalar    *Mixnorm;
  PetscInt       iter,*iterind,*nind;
  FILE           *fidoutput;   
  char           fname[50];
  PetscViewer    socketviewer; 
  PetscInt       withMatlab;
  PetscTruth     Matlabflag;


  PetscLogDouble v1,v2,elapsed_time;


  PetscInitialize(&argc,&args,(char *)0,help);
  MPI_Comm_size(PETSC_COMM_WORLD,&size);
  MPI_Comm_rank(PETSC_COMM_WORLD,&rank);

  ierr = PetscPrintf(PETSC_COMM_WORLD,"\nPETSC: Petsc Initializes successfully! \n");
  ierr = PetscPrintf(PETSC_COMM_WORLD,"PETSC: comm_size is %d \n", size);
 
  ierr = PetscOptionsGetInt(PETSC_NULL,"-withMatlab",&withMatlab,&Matlabflag);CHKERRQ(ierr);  
  if (Matlabflag == PETSC_FALSE){withMatlab = 0;}else{withMatlab = 1;}


  if(withMatlab==1){
  // Rank 0 connects to socket, use default socket
  PetscViewerSocketOpen(PETSC_COMM_WORLD,0,PETSC_DEFAULT,&socketviewer);  
  ierr = PetscPrintf(PETSC_COMM_WORLD,"PETSC: socket opened! \n");CHKERRQ(ierr); 

  // Receive n from Matlab
  IntReceive(socketviewer, &nind);
  n = *nind;
  //  Receive iter from Matlab
  IntReceive(socketviewer, &iterind);
  iter = *iterind;
 
  }else{
  ierr = PetscOptionsGetInt(PETSC_NULL,"-ngrid",&n,PETSC_NULL);CHKERRQ(ierr);
  ierr = PetscOptionsGetInt(PETSC_NULL,"-niter",&iter,PETSC_NULL);CHKERRQ(ierr);
  }

  
 
/////////////////////////////////////////////////////////////////////////////////////




  ierr = PetscPrintf(PETSC_COMM_WORLD,"PETSC: number of grid is %d \n", n);
  ierr = PetscPrintf(PETSC_COMM_WORLD,"PETSC: number of iteration is %d \n", iter);



  Mixnorm    = malloc(iter*sizeof(PetscScalar));
  dx         = 1.0/n;
  dy         = 1.0/n;
  dx2        = dx/2-dx/1e6;
  dy2        = dy/2-dy/1e6;
  npt        = 5;
  pmax       = 4e6;
  puse       = pmax;

  ierr = PetscPrintf(PETSC_COMM_WORLD,"PETSC: estimated buffer size (per processer) %f Mbytes \n", pmax*1.0/1e6*8*16 );
  ierr = PetscPrintf(PETSC_COMM_WORLD,"PETSC: estimated variable size %f Mbytes\n", 1.0*n*n/1e6*8*2);

/////////////////////////////////////////////////////////////////////////////////////
  
  ierr  = VecCreateMPI(PETSC_COMM_WORLD,PETSC_DECIDE ,n,&tempvec);CHKERRQ(ierr);
  ierr  = VecGetOwnershipRange(tempvec,&Istart,&Iend);CHKERRQ(ierr); 
  localsize = Iend-Istart;
  ierr = VecDestroy(tempvec);CHKERRQ(ierr);
/////////////////////////////////////////////////////////////////////////////////////
// Create initial vector
    Vec         x0;
    PetscScalar *x0array;
    x0array = malloc((localsize)*n*sizeof(PetscScalar));
    k = 0;
    for(i=Istart;i<Iend;i++){
        for(j=0;j<n;j++){
            *(x0array+k) = cos(2*M_PI*(dx/2+i*dx));
            //*(x0array+k) = cos(2*M_PI*(dy/2+j*dy));       
            k++;

        }
    }

  

    ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,n*localsize,PETSC_DECIDE,x0array,&x0);CHKERRQ(ierr);
    ierr = VecDuplicate(x0,&x);CHKERRQ(ierr); 
    ierr = VecCreateSeq(PETSC_COMM_SELF,pmax*npt,&y0);CHKERRQ(ierr);
     
    ierr =  VecNorm(x0,NORM_2,Mixnorm); CHKERRQ(ierr);  
    PetscPrintf(PETSC_COMM_WORLD,"PETSC: initial norm= %f \n",*(Mixnorm+0)/n ); 

///////////////////////////////////////////////////////////////////////////
// Map Center Points
  PetscInt     *NzindJ,*idx,*idy,*idp;
  PetscScalar  *CenterX,*CenterY,*VecVal,*pty;
  PetscScalar  *ShiftX,*ShiftY,CX,CY, *yarray;
  IS           isx,isy;
  VecScatter   ctx;

  CenterX      = malloc(npt*sizeof(PetscScalar));
  CenterY      = malloc(npt*sizeof(PetscScalar));
  ShiftX       = malloc(npt*sizeof(PetscScalar));
  ShiftY       = malloc(npt*sizeof(PetscScalar));
  VecVal       = malloc(npt*sizeof(PetscScalar));
  yarray       = malloc(pmax*sizeof(PetscScalar)); 

  NzindJ       = malloc(pmax*npt*sizeof(PetscInt));
  idx          = malloc(pmax*npt*sizeof(PetscInt));
  idy          = malloc(pmax*npt*sizeof(PetscInt)); 
  idp          = malloc(pmax*sizeof(PetscInt)); 
 

  *(ShiftX+0) = 0;
  *(ShiftY+0) = 0;
  *(ShiftX+1) = -dx2;
  *(ShiftY+1) = -dy2;
  *(ShiftX+2) =  dx2;
  *(ShiftY+2) = -dy2;
  *(ShiftX+3) = -dx2;
  *(ShiftY+3) =  dy2;
  *(ShiftX+4) =  dy2;
  *(ShiftY+4) =  dx2;


  //*(ShiftX+5) = 0;
  //*(ShiftY+5) = -dy2;
  //*(ShiftX+6) = -dx2;
  //*(ShiftY+6) = 0;
  //*(ShiftX+7) =  dx2;
  //*(ShiftY+7) = 0;
  //*(ShiftX+8) = 0;
  //*(ShiftY+9) =  dy2;

  for(i=0;i<npt*pmax;i++){ *(idy+i)=i; }
  ISCreateGeneralWithArray(PETSC_COMM_SELF,npt*pmax,idy,&isy);
  vinda = &x0;
  vindb = &x;


   sprintf(fname, "mixnorm_%d_%d",n,iter);
   ierr =PetscPrintf(PETSC_COMM_WORLD,"\n iter     norm      time      unit time\n");CHKERRQ(ierr);
   ierr =PetscFOpen(PETSC_COMM_WORLD,fname,"w",&fidoutput);CHKERRQ(ierr);


for(niter=0;niter<iter;niter++){
   
 ierr = PetscGetTime(&v1);CHKERRQ(ierr);
 l = 0; p = 0;

 if (n*localsize-l<=pmax){puse = n*localsize-l;}else{puse=pmax;}     
     for(i=Istart;i<Iend;i++){
         for(j=0;j<n;j++){
              CX = dx2+i*dx;
              CY = dy2+j*dy;              
              for(k=0;k<npt;k++){ 
                   *(CenterX+k) = CX + *(ShiftX+k);
                   *(CenterY+k) = CY + *(ShiftY+k);    
                   InverseStandardMap((CenterX+k),(CenterY+k));
                   *(NzindJ+p*npt +k) =  floor(*(CenterX+k)*n)*n +  floor(*(CenterY+k)*n);      
              }          
              *(idp+p) = Istart*n+ l;
      
             if(p>=puse-1){ 
          
                 ierr =  ISCreateGeneralWithArray(PETSC_COMM_WORLD,npt*puse,NzindJ,&isx);CHKERRQ(ierr);
                 for(m=0;m<npt*puse;m++){ *(idy+m)=m; }
                 ierr =  ISCreateGeneralWithArray(PETSC_COMM_SELF,npt*puse,idy,&isy);CHKERRQ(ierr);
                 ierr =  VecScatterCreate(*vinda,isx,y0,isy,&ctx);CHKERRQ(ierr);
                 ierr =  VecScatterBegin(*vinda,y0,INSERT_VALUES,SCATTER_FORWARD,ctx);CHKERRQ(ierr);
                 ierr =  VecScatterEnd(*vinda,y0,INSERT_VALUES,SCATTER_FORWARD,ctx);CHKERRQ(ierr);
                 ierr =  VecScatterDestroy(ctx);
                 ierr =  VecGetArray(y0,&pty);CHKERRQ(ierr);
              
                 for(m=0;m<puse;m++){
                     for(m2=0;m2<npt;m2++){
                        *(yarray+m) = *(yarray+m)+*(pty+m*npt+m2);
                     }  
                     *(yarray+m) = *(yarray+m)/npt;
                 } 
                 VecRestoreArray(y0,&pty);
                 VecSetValues(*vindb,puse,idp,yarray,INSERT_VALUES);       

 

                 for(m=0;m<pmax;m++){*(yarray+m) = 0; } 
                 p = 0;

                 if (n*localsize-l<=pmax){puse = n*localsize-l-1;}else{puse=pmax;}            
             }else{p++;}

             l++;
         
        }
    }


   VecAssemblyBegin(*vindb);
   VecAssemblyEnd(*vindb);

   vindc = vindb;
   vindb = vinda;
   vinda = vindc;   


   //ierr =  VecCopy(x,x0);CHKERRQ(ierr);
   ierr =  VecNorm(*vinda,NORM_2,Mixnorm+niter); CHKERRQ(ierr); 
   *(Mixnorm+niter) = *(Mixnorm+niter)/n; 

        
   ierr = PetscGetTime(&v2);CHKERRQ(ierr);
   elapsed_time = v2 - v1; 
   PetscPrintf(PETSC_COMM_WORLD,"     %d   %f   %f  %f \n",niter,*(Mixnorm+niter),elapsed_time,elapsed_time/n/n*1e6 );
   PetscFPrintf(PETSC_COMM_WORLD,fidoutput,"    %d   %f   %f  %f\n"
                ,niter,*(Mixnorm+niter),elapsed_time,elapsed_time/n/n*1e6 );
}



 PetscFClose(PETSC_COMM_WORLD,fidoutput);

///////////////////////////////////////////////////////////////////////////

    if(withMatlab==1){
     VecView(x0,socketviewer);
     PetscScalarView(iter,Mixnorm,socketviewer);
    }
 
  free(CenterX);
  free(CenterY);
  free(ShiftX);
  free(ShiftY);
  

  free(x0array);
  free(idx);
  free(idy);
  free(idp);
  free(yarray);

 

  free(NzindJ);

  free(Mixnorm);

 
   ierr = VecDestroy(x0);CHKERRQ(ierr);
   ierr = VecDestroy(x);CHKERRQ(ierr);
   ierr = VecDestroy(y0);CHKERRQ(ierr);
 
  PetscPrintf(PETSC_COMM_WORLD,"Done!");
  
//////////////////////////////////////////////////////////////////////////////////////
  ierr = PetscFinalize();CHKERRQ(ierr);
  return 0;
}
Example #22
0
int main(int argc,char **argv)
{
    PetscErrorCode ierr;
    PetscInt       i,j,k,N=100,**counters,tsize;

    PetscInitialize(&argc,&argv,(char *)0,help);

    ierr = PetscThreadCommView(PETSC_COMM_WORLD,PETSC_VIEWER_STDOUT_WORLD);
    CHKERRQ(ierr);
    ierr = PetscOptionsGetInt(PETSC_NULL,"-N",&N,PETSC_NULL);
    CHKERRQ(ierr);

    ierr = PetscThreadCommGetNThreads(PETSC_COMM_WORLD,&tsize);
    CHKERRQ(ierr);
    ierr = PetscMalloc(tsize*sizeof(*counters),&counters);
    CHKERRQ(ierr);
    ierr = PetscThreadCommRunKernel(PETSC_COMM_WORLD,(PetscThreadKernel)CounterInit_kernel,1,counters);
    CHKERRQ(ierr);

    for (i=0; i<10; i++) {
        PetscReal t0,t1;
        ierr = PetscThreadCommBarrier(PETSC_COMM_WORLD);
        CHKERRQ(ierr);
        ierr = PetscGetTime(&t0);
        CHKERRQ(ierr);
        for (j=0; j<N; j++) {
            /*      ierr = PetscThreadCommRunKernel(PETSC_COMM_WORLD,(PetscThreadKernel)CounterIncrement_kernel,1,counters);CHKERRQ(ierr); */
            ierr = PetscThreadCommRunKernel1(PETSC_COMM_WORLD,(PetscThreadKernel)CounterIncrement_kernel,counters);
            CHKERRQ(ierr);
        }
        ierr = PetscThreadCommBarrier(PETSC_COMM_WORLD);
        CHKERRQ(ierr);
        ierr = PetscGetTime(&t1);
        CHKERRQ(ierr);
        ierr = PetscPrintf(PETSC_COMM_WORLD,"Time per kernel: %g us\n",1e6*(t1-t0)/N);
        CHKERRQ(ierr);
    }

    for (i=0; i<10; i++) {
        PetscReal t0,t1;
        ierr = PetscThreadCommBarrier(PETSC_COMM_WORLD);
        CHKERRQ(ierr);
        ierr = PetscGetTime(&t0);
        CHKERRQ(ierr);
        for (j=0; j<N; j++) {
            #pragma omp parallel num_threads(tsize)
            {
                PetscInt trank = omp_get_thread_num();
                CounterIncrement_kernel(trank,counters);
            }
        }
        ierr = PetscThreadCommBarrier(PETSC_COMM_WORLD);
        CHKERRQ(ierr);
        ierr = PetscGetTime(&t1);
        CHKERRQ(ierr);
        ierr = PetscPrintf(PETSC_COMM_WORLD,"OpenMP inline time per kernel: %g us\n",1e6*(t1-t0)/N);
        CHKERRQ(ierr);
    }

    for (i=0; i<10; i++) {
        PetscReal t0,t1;
        ierr = PetscGetTime(&t0);
        CHKERRQ(ierr);
        for (j=0; j<N; j++) {
            CounterIncrement_kernel(0,counters);
        }
        ierr = PetscGetTime(&t1);
        CHKERRQ(ierr);
        ierr = PetscPrintf(PETSC_COMM_WORLD,"Serial inline time per single kernel: %g us\n",1e6*(t1-t0)/N);
        CHKERRQ(ierr);
    }

    for (i=0; i<10; i++) {
        PetscReal t0,t1;
        ierr = PetscGetTime(&t0);
        CHKERRQ(ierr);
        for (j=0; j<N; j++) {
            for (k=0; k<tsize; k++) CounterIncrement_kernel(k,counters);
        }
        ierr = PetscGetTime(&t1);
        CHKERRQ(ierr);
        ierr = PetscPrintf(PETSC_COMM_WORLD,"Serial inline time per kernel: %g us\n",1e6*(t1-t0)/N);
        CHKERRQ(ierr);
    }

    ierr = PetscThreadCommRunKernel(PETSC_COMM_WORLD,(PetscThreadKernel)CounterFree_kernel,1,counters);
    CHKERRQ(ierr);
    ierr = PetscThreadCommBarrier(PETSC_COMM_WORLD);
    CHKERRQ(ierr);
    ierr = PetscFree(counters);
    CHKERRQ(ierr);
    PetscFinalize();
    return 0;
}