/* S^{-1} = ( G^T G )^{-1} G^T K G ( G^T G )^{-1} = A C A S^{-T} = A^T (A C)^T = A^T C^T A^T, but A = G^T G which is symmetric = A C^T A = A G^T ( G^T K )^T A = A G^T K^T G A */ PetscErrorCode BSSCR_PCApplyTranspose_GtKG( PC pc, Vec x, Vec y ) { PC_GtKG ctx = (PC_GtKG)pc->data; KSP ksp; Mat K, G; Vec s,t,X; PetscLogDouble t0,t1; ksp = ctx->ksp; K = ctx->K; G = ctx->G; s = ctx->s; t = ctx->t; X = ctx->X; if (ctx->monitor_rhs_consistency) { BSSCRBSSCR_Lp_monitor_check_rhs_consistency(ksp,x,1); } PetscGetTime(&t0); KSPSolve( ksp, x, t ); /* t <- GtG_inv x */ PetscGetTime(&t1); if (ctx->monitor_activated) { BSSCR_PCBFBTSubKSPMonitor(ksp,1,(t1-t0)); } MatMult( G, t, s ); /* s <- G t */ MatMultTranspose( K, s, X ); /* X <- K^T s */ MatMultTranspose( G, X, t ); /* t <- Gt X */ if (ctx->monitor_rhs_consistency) { BSSCRBSSCR_Lp_monitor_check_rhs_consistency(ksp,t,2); } PetscGetTime(&t0); KSPSolve( ksp, t, y ); /* y <- GtG_inv t */ PetscGetTime(&t1); if (ctx->monitor_activated) { BSSCR_PCBFBTSubKSPMonitor(ksp,2,(t1-t0)); } PetscFunctionReturn(0); }
PetscErrorCode RunTest(int nx, int ny, int nz, int loops, double *wt) { Vec x,f; TS ts; AppCtx _app,*app=&_app; double t1,t2; PetscErrorCode ierr; PetscFunctionBegin; app->nx = nx; app->h[0] = 1./(nx-1); app->ny = ny; app->h[1] = 1./(ny-1); app->nz = nz; app->h[2] = 1./(nz-1); ierr = VecCreate(PETSC_COMM_SELF,&x);CHKERRQ(ierr); ierr = VecSetSizes(x,nx*ny*nz,nx*ny*nz);CHKERRQ(ierr); ierr = VecSetUp(x);CHKERRQ(ierr); ierr = VecDuplicate(x,&f);CHKERRQ(ierr); ierr = TSCreate(PETSC_COMM_SELF,&ts);CHKERRQ(ierr); ierr = TSSetProblemType(ts,TS_NONLINEAR);CHKERRQ(ierr); ierr = TSSetType(ts,TSTHETA);CHKERRQ(ierr); ierr = TSThetaSetTheta(ts,1.0);CHKERRQ(ierr); ierr = TSSetTimeStep(ts,0.01);CHKERRQ(ierr); ierr = TSSetTime(ts,0.0);CHKERRQ(ierr); ierr = TSSetDuration(ts,10,1.0);CHKERRQ(ierr); ierr = TSSetSolution(ts,x);CHKERRQ(ierr); ierr = TSSetIFunction(ts,f,FormFunction,app);CHKERRQ(ierr); ierr = PetscOptionsSetValue("-snes_mf","1");CHKERRQ(ierr); { SNES snes; KSP ksp; ierr = TSGetSNES(ts,&snes);CHKERRQ(ierr); ierr = SNESGetKSP(snes,&ksp);CHKERRQ(ierr); ierr = KSPSetType(ksp,KSPCG);CHKERRQ(ierr); } ierr = TSSetFromOptions(ts);CHKERRQ(ierr); ierr = TSSetUp(ts);CHKERRQ(ierr); *wt = 1e300; while (loops-- > 0) { ierr = FormInitial(0.0,x,app);CHKERRQ(ierr); ierr = PetscGetTime(&t1);CHKERRQ(ierr); ierr = TSSolve(ts,x,PETSC_NULL);CHKERRQ(ierr); ierr = PetscGetTime(&t2);CHKERRQ(ierr); *wt = PetscMin(*wt,t2-t1); } ierr = VecDestroy(&x);CHKERRQ(ierr); ierr = VecDestroy(&f);CHKERRQ(ierr); ierr = TSDestroy(&ts);CHKERRQ(ierr); PetscFunctionReturn(0); }
int main(int argc,char **argv) { PetscLogDouble x,y,z; PetscScalar A[10000]; int ierr; PetscInitialize(&argc,&argv,0,0); /* To take care of paging effects */ ierr = PetscMemzero(A,sizeof(PetscScalar)*0);CHKERRQ(ierr); ierr = PetscGetTime(&x);CHKERRQ(ierr); ierr = PetscGetTime(&x);CHKERRQ(ierr); ierr = PetscMemzero(A,sizeof(PetscScalar)*10000);CHKERRQ(ierr); ierr = PetscMemzero(A,sizeof(PetscScalar)*10000);CHKERRQ(ierr); ierr = PetscMemzero(A,sizeof(PetscScalar)*10000);CHKERRQ(ierr); ierr = PetscMemzero(A,sizeof(PetscScalar)*10000);CHKERRQ(ierr); ierr = PetscMemzero(A,sizeof(PetscScalar)*10000);CHKERRQ(ierr); ierr = PetscMemzero(A,sizeof(PetscScalar)*10000);CHKERRQ(ierr); ierr = PetscMemzero(A,sizeof(PetscScalar)*10000);CHKERRQ(ierr); ierr = PetscMemzero(A,sizeof(PetscScalar)*10000);CHKERRQ(ierr); ierr = PetscMemzero(A,sizeof(PetscScalar)*10000);CHKERRQ(ierr); ierr = PetscMemzero(A,sizeof(PetscScalar)*10000);CHKERRQ(ierr); ierr = PetscGetTime(&y);CHKERRQ(ierr); ierr = PetscMemzero(A,sizeof(PetscScalar)*0);CHKERRQ(ierr); ierr = PetscMemzero(A,sizeof(PetscScalar)*0);CHKERRQ(ierr); ierr = PetscMemzero(A,sizeof(PetscScalar)*0);CHKERRQ(ierr); ierr = PetscMemzero(A,sizeof(PetscScalar)*0);CHKERRQ(ierr); ierr = PetscMemzero(A,sizeof(PetscScalar)*0);CHKERRQ(ierr); ierr = PetscMemzero(A,sizeof(PetscScalar)*0);CHKERRQ(ierr); ierr = PetscMemzero(A,sizeof(PetscScalar)*0);CHKERRQ(ierr); ierr = PetscMemzero(A,sizeof(PetscScalar)*0);CHKERRQ(ierr); ierr = PetscMemzero(A,sizeof(PetscScalar)*0);CHKERRQ(ierr); ierr = PetscMemzero(A,sizeof(PetscScalar)*0);CHKERRQ(ierr); ierr = PetscGetTime(&z);CHKERRQ(ierr); fprintf(stdout,"%s : \n","PetscMemzero"); fprintf(stdout," %-15s : %e sec\n","Latency",(z-y)/10.0); fprintf(stdout," %-15s : %e sec\n","Per PetscScalar",(2*y-x-z)/100000.0); ierr = PetscFinalize();CHKERRQ(ierr); PetscFunctionReturn(0); }
PetscErrorCode MatRARtNumeric_SeqAIJ_SeqAIJ(Mat A,Mat R,Mat C) { PetscErrorCode ierr; Mat_RARt *rart; PetscContainer container; MatTransposeColoring matcoloring; Mat Rt,RARt; PetscLogDouble Mult_sp_den=0.0,app1=0.0,app2=0.0,t0,tf; PetscFunctionBegin; ierr = PetscObjectQuery((PetscObject)C,"Mat_RARt",(PetscObject *)&container);CHKERRQ(ierr); if (!container) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit"); ierr = PetscContainerGetPointer(container,(void **)&rart);CHKERRQ(ierr); /* Get dense Rt by Apply MatTransposeColoring to R */ matcoloring = rart->matcoloring; Rt = rart->Rt; ierr = PetscGetTime(&t0);CHKERRQ(ierr); ierr = MatTransColoringApplySpToDen(matcoloring,R,Rt);CHKERRQ(ierr); ierr = PetscGetTime(&tf);CHKERRQ(ierr); app1 += tf - t0; /* Get dense RARt = R*A*Rt */ ierr = PetscGetTime(&t0);CHKERRQ(ierr); RARt = rart->RARt; ierr = MatMatMatMultNumeric_SeqAIJ_SeqAIJ_SeqDense(R,A,Rt,RARt,rart->work);CHKERRQ(ierr); ierr = PetscGetTime(&tf);CHKERRQ(ierr); Mult_sp_den += tf - t0; /* Recover C from C_dense */ ierr = PetscGetTime(&t0);CHKERRQ(ierr); ierr = MatTransColoringApplyDenToSp(matcoloring,RARt,C);CHKERRQ(ierr); ierr = PetscGetTime(&tf);CHKERRQ(ierr); app2 += tf - t0; #if defined(PETSC_USE_INFO) ierr = PetscInfo4(C,"Num = ColorApp %g + %g + Mult_sp_den %g = %g\n",app1,app2,Mult_sp_den,app1+app2+Mult_sp_den);CHKERRQ(ierr); #endif PetscFunctionReturn(0); }
int test1(void) { PetscLogDouble t1,t2; double value; int i,ierr,*z,*zi,intval; PetscScalar *x,*y; PetscRandom r; ierr = PetscRandomCreate(PETSC_COMM_SELF,&r);CHKERRQ(ierr); ierr = PetscRandomSetFromOptions(r);CHKERRQ(ierr); ierr = PetscMalloc(20000*sizeof(PetscScalar),&x);CHKERRQ(ierr); ierr = PetscMalloc(20000*sizeof(PetscScalar),&y);CHKERRQ(ierr); ierr = PetscMalloc(2000*sizeof(int),&z);CHKERRQ(ierr); ierr = PetscMalloc(2000*sizeof(int),&zi);CHKERRQ(ierr); /* Take care of paging effects */ ierr = PetscGetTime(&t1);CHKERRQ(ierr); /* Form the random set of integers */ for (i=0; i<2000; i++) { ierr = PetscRandomGetValue(r,&value);CHKERRQ(ierr); intval = (int)(value*20000.0); z[i] = intval; } for (i=0; i<2000; i++) { ierr = PetscRandomGetValue(r,&value);CHKERRQ(ierr); intval = (int)(value*20000.0); zi[i] = intval; } /* fprintf(stdout,"Done setup\n"); */ ierr = BlastCache();CHKERRQ(ierr); ierr = PetscGetTime(&t1);CHKERRQ(ierr); for (i=0; i<2000; i++) { x[i] = y[i]; } ierr = PetscGetTime(&t2);CHKERRQ(ierr); fprintf(stdout,"%-27s : %e sec\n","x[i] = y[i]",(t2-t1)/2000.0); ierr = BlastCache();CHKERRQ(ierr); ierr = PetscGetTime(&t1);CHKERRQ(ierr); for (i=0; i<500; i+=4) { x[i] = y[z[i]]; x[1+i] = y[z[1+i]]; x[2+i] = y[z[2+i]]; x[3+i] = y[z[3+i]]; } ierr = PetscGetTime(&t2);CHKERRQ(ierr); fprintf(stdout,"%-27s : %e sec\n","x[i] = y[idx[i]] - unroll 4",(t2-t1)/2000.0); ierr = BlastCache();CHKERRQ(ierr); ierr = PetscGetTime(&t1);CHKERRQ(ierr) for (i=0; i<2000; i++) { x[i] = y[z[i]]; } ierr = PetscGetTime(&t2);CHKERRQ(ierr); fprintf(stdout,"%-27s : %e sec\n","x[i] = y[idx[i]]",(t2-t1)/2000.0); ierr = BlastCache();CHKERRQ(ierr); ierr = PetscGetTime(&t1);CHKERRQ(ierr); for (i=0; i<1000; i+=2) { x[i] = y[z[i]]; x[1+i] = y[z[1+i]]; } ierr = PetscGetTime(&t2);CHKERRQ(ierr); fprintf(stdout,"%-27s : %e sec\n","x[i] = y[idx[i]] - unroll 2",(t2-t1)/2000.0); ierr = BlastCache();CHKERRQ(ierr); ierr = PetscGetTime(&t1);CHKERRQ(ierr); for (i=0; i<2000; i++) { x[z[i]] = y[i]; } ierr = PetscGetTime(&t2);CHKERRQ(ierr); fprintf(stdout,"%-27s : %e sec\n","x[z[i]] = y[i]",(t2-t1)/2000.0); ierr = BlastCache();CHKERRQ(ierr); ierr = PetscGetTime(&t1);CHKERRQ(ierr); for (i=0; i<2000; i++) { x[z[i]] = y[zi[i]]; } ierr = PetscGetTime(&t2);CHKERRQ(ierr); fprintf(stdout,"%-27s : %e sec\n","x[z[i]] = y[zi[i]]",(t2-t1)/2000.0); ierr = PetscMemcpy(x,y,10);CHKERRQ(ierr); ierr = PetscMemcpy(z,zi,10);CHKERRQ(ierr); ierr = PetscFree(z);CHKERRQ(ierr); ierr = PetscFree(zi);CHKERRQ(ierr); ierr = PetscFree(x);CHKERRQ(ierr); ierr = PetscFree(y);CHKERRQ(ierr); PetscRandomDestroy(r); PetscFunctionReturn(0); }
int test2(void) { PetscLogDouble t1,t2; double value; int i,ierr,z[20000],zi[20000],intval,tmp; PetscScalar x[20000],y[20000]; PetscRandom r; ierr = PetscRandomCreate(PETSC_COMM_SELF,&r);CHKERRQ(ierr); ierr = PetscRandomSetFromOptions(r);CHKERRQ(ierr); /* Take care of paging effects */ ierr = PetscGetTime(&t1);CHKERRQ(ierr); for (i=0; i<20000; i++) { x[i] = i; y[i] = i; z[i] = i; zi[i] = i; } /* Form the random set of integers */ for (i=0; i<20000; i++) { ierr = PetscRandomGetValue(r,&value);CHKERRQ(ierr); intval = (int)(value*20000.0); tmp = z[i]; z[i] = z[intval]; z[intval] = tmp; } for (i=0; i<20000; i++) { ierr = PetscRandomGetValue(r,&value);CHKERRQ(ierr); intval = (int)(value*20000.0); tmp = zi[i]; zi[i] = zi[intval]; zi[intval] = tmp; } /* fprintf(stdout,"Done setup\n"); */ /* ierr = BlastCache();CHKERRQ(ierr); */ ierr = PetscGetTime(&t1);CHKERRQ(ierr); for (i=0; i<2000; i++) { x[i] = y[i]; } ierr = PetscGetTime(&t2);CHKERRQ(ierr); fprintf(stdout,"%-27s : %e sec\n","x[i] = y[i]",(t2-t1)/2000.0); /* ierr = BlastCache();CHKERRQ(ierr); */ ierr = PetscGetTime(&t1);CHKERRQ(ierr); for (i=0; i<2000; i++) { y[i] = x[z[i]]; } ierr = PetscGetTime(&t2);CHKERRQ(ierr); fprintf(stdout,"%-27s : %e sec\n","x[i] = y[idx[i]]",(t2-t1)/2000.0); /* ierr = BlastCache();CHKERRQ(ierr); */ ierr = PetscGetTime(&t1);CHKERRQ(ierr); for (i=0; i<2000; i++) { x[z[i]] = y[i]; } ierr = PetscGetTime(&t2);CHKERRQ(ierr); fprintf(stdout,"%-27s : %e sec\n","x[z[i]] = y[i]",(t2-t1)/2000.0); /* ierr = BlastCache();CHKERRQ(ierr); */ ierr = PetscGetTime(&t1);CHKERRQ(ierr); for (i=0; i<2000; i++) { y[z[i]] = x[zi[i]]; } ierr = PetscGetTime(&t2);CHKERRQ(ierr); fprintf(stdout,"%-27s : %e sec\n","x[z[i]] = y[zi[i]]",(t2-t1)/2000.0); PetscRandomDestroy(r); PetscFunctionReturn(0); }
int main(int argc,char **args) { PetscInt rank,size,npt; PetscScalar dx,dy,cx,cy; PetscErrorCode ierr; Vec x,x0,tempvec, *vinda,*vindb,*vindc; PetscInt i,j,k,n,n2,pmax,puse,Istart,Iend,localsize,niter; PetscScalar **x0array, **aarray,**barray; PetscInt *cacheInt; PetscScalar *cacheScalar; DA myDA; PetscScalar *Mixnorm; PetscInt iter,*iterind,*nind; FILE *fidoutput, *fidtimelog; char fname[50],ftimelog[50]; PetscViewer socketviewer; PetscInt withMatlab, doFFT, doSmoothing; PetscTruth Matlabflag, FFTflag, Smoothingflag; PetscInt timelogcount; MPI_Status status; PetscLogDouble v1,v2,elapsed_time; timelogcount = 0; PetscInitialize(&argc,&args,(char *)0,help); MPI_Comm_size(PETSC_COMM_WORLD,&size); MPI_Comm_rank(PETSC_COMM_WORLD,&rank); ierr = PetscPrintf(PETSC_COMM_WORLD,"\nPETSC: Petsc Initializes successfully! \n"); ierr = PetscPrintf(PETSC_COMM_WORLD,"PETSC: comm_size is %d \n", size); ierr = PetscOptionsGetInt(PETSC_NULL,"-withMatlab",&withMatlab,&Matlabflag);CHKERRQ(ierr); if (Matlabflag == PETSC_FALSE){withMatlab = 0;}else{withMatlab = 1;} ierr = PetscOptionsGetInt(PETSC_NULL,"-doFFT",&doFFT,&FFTflag);CHKERRQ(ierr); if (FFTflag == PETSC_FALSE){doFFT = 0;}else{doFFT = 1;} ierr = PetscOptionsGetInt(PETSC_NULL,"-doSmoothing",&doSmoothing,&Smoothingflag);CHKERRQ(ierr); if (Smoothingflag == PETSC_FALSE){doSmoothing = 0;}else{doSmoothing = 1;} if(withMatlab==1){ // Rank 0 connects to socket, use default socket PetscViewerSocketOpen(PETSC_COMM_WORLD,0,PETSC_DEFAULT,&socketviewer); ierr = PetscPrintf(PETSC_COMM_WORLD,"PETSC: socket opened! \n");CHKERRQ(ierr); // Receive n from Matlab IntReceive(socketviewer, &nind); n = *nind; // Receive iter from Matlab IntReceive(socketviewer, &iterind); iter = *iterind; }else{ ierr = PetscOptionsGetInt(PETSC_NULL,"-ngrid",&n,PETSC_NULL);CHKERRQ(ierr); ierr = PetscOptionsGetInt(PETSC_NULL,"-niter",&iter,PETSC_NULL);CHKERRQ(ierr); } ///////////////////////////////////////////////////////////////////////////////////// ierr = PetscPrintf(PETSC_COMM_WORLD,"PETSC: number of grid is %d \n", n); ierr = PetscPrintf(PETSC_COMM_WORLD,"PETSC: number of iteration is %d \n", iter); Mixnorm = malloc(iter*sizeof(PetscScalar)); dx = 1.0/n; dy = 1.0/n; n2 = (PetscInt)(n*0.5); npt = 5; pmax = 5e5; puse = pmax; PetscInt logmax = 1000; PetscScalar Timelog[logmax]; PetscLogDouble t1,t2; ierr = PetscPrintf(PETSC_COMM_WORLD,"PETSC: estimated buffer size (per processer) %f Mbytes \n", pmax*1.0/1e6*8*17 ); ierr = PetscPrintf(PETSC_COMM_WORLD,"PETSC: estimated variable size %f Mbytes\n", 1.0*n*n/1e6*8*1); ///////////////////////////////////////////////////////////////////////////////////// // ierr = VecCreateMPI(PETSC_COMM_WORLD,PETSC_DECIDE ,n,&tempvec);CHKERRQ(ierr); // ierr = VecGetOwnershipRange(tempvec,&Istart,&Iend);CHKERRQ(ierr); // localsize = Iend-Istart; // ierr = VecDestroy(tempvec);CHKERRQ(ierr); ///////////////////////////////////////////////////////////////////////////////////// if(doSmoothing==1){ ierr = PetscPrintf(PETSC_COMM_WORLD,"\n\n\n\n\nPETSC: Now Do DACreate2d \n\n\n\n" ); ierr = PetscPrintf(PETSC_COMM_WORLD,"\n\n\n\n\nPETSC: %d %d %d\n\n\n\n",n2,n,size); DACreate2d(MPI_COMM_WORLD,DA_XYPERIODIC,DA_STENCIL_BOX,n2,n,1,size,1,2,PETSC_NULL,PETSC_NULL,&myDA); DACreateGlobalVector(myDA,&x0); DAGetCorners(myDA,PETSC_NULL,&Istart,PETSC_NULL,PETSC_NULL,&localsize,PETSC_NULL); Iend = Istart+localsize; }else{ ierr = VecCreateMPI(PETSC_COMM_WORLD,PETSC_DECIDE ,n,&tempvec);CHKERRQ(ierr); ierr = VecGetOwnershipRange(tempvec,&Istart,&Iend);CHKERRQ(ierr); localsize = Iend-Istart; ierr = VecDestroy(tempvec);CHKERRQ(ierr); VecCreateMPI(PETSC_COMM_WORLD,localsize*n2,PETSC_DETERMINE ,&x0); } //ierr = PetscPrintf(PETSC_COMM_WORLD,"\n\n\n\n\nPETSC: So far so good\n\n\n\n"); VecGetArray2d(x0,n2,localsize,0,0,&x0array); // Create initial vector for(j=0;j<localsize;j++){ for(i=0;i<n2;i++){ cx = (Istart+j+0.5)*dx; x0array[i][j] = cos(2*M_PI*cx); } } VecRestoreArray2d(x0,n2,localsize,0,0,&x0array); ierr = VecDuplicate(x0,&x);CHKERRQ(ierr); ierr = VecNorm(x0,NORM_2,Mixnorm); CHKERRQ(ierr); PetscPrintf(PETSC_COMM_WORLD,"PETSC: initial norm= %f \n",*(Mixnorm+0)/n ); vinda = &x0; vindb = &x; sprintf(fname, "mixnorm_%d_%d",n,iter); ierr =PetscPrintf(PETSC_COMM_WORLD,"\n iter norm time unit time\n");CHKERRQ(ierr); ierr =PetscFOpen(PETSC_COMM_WORLD,fname,"w",&fidoutput);CHKERRQ(ierr); /////////////////////////////////////////////////////////////////////////////////////////////////// // Memory allocation for the iteration scheme // cacheInt = malloc(1*pmax*sizeof(PetscInt)); // cacheScalar = malloc(2*pmax*sizeof(PetscScalar)); cacheInt = malloc(2*pmax*sizeof(PetscInt)); cacheScalar = malloc(2*pmax*sizeof(PetscScalar)); /////////////////////////////////////////////////////////////////////////////////////////////////// // Iteration here! for(niter=0;niter<iter;niter++){ ierr = PetscGetTime(&v1);CHKERRQ(ierr); // BackwardAverage(vinda, vindb, cacheInt, cacheScalar, n, npt, pmax, Istart,Iend); // BackwardAverageR(vinda, vindb, cacheInt, cacheScalar, n, npt, pmax, Istart,Iend); BackwardAverageRL(vinda, vindb, cacheInt, cacheScalar, n, npt, pmax, Istart,Iend); vindc = vindb; vindb = vinda; vinda = vindc; // if(doSmoothing==1){Smoothing(vinda, vindb,n, myDA, Istart,Iend);} ierr = PetscGetTime(&v2);CHKERRQ(ierr); //vindc = vindb; //vindb = vinda; //vinda = vindc; ierr = VecNorm(*vinda,NORM_2,Mixnorm+niter); CHKERRQ(ierr); *(Mixnorm+niter) = *(Mixnorm+niter)/n; elapsed_time = v2 - v1; PetscPrintf(PETSC_COMM_WORLD," %d %f %f %f \n",niter,*(Mixnorm+niter),elapsed_time,elapsed_time/n/n*1e6 ); PetscFPrintf(PETSC_COMM_WORLD,fidoutput," %d %f %f %f\n" ,niter,*(Mixnorm+niter),elapsed_time,elapsed_time/n/n*1e6 ); } //////////////////////////////////////////////////////////////////////////////////////////////////// //Change oremtation of vector VecGetArray2d(*vinda,n2,localsize,0,0,&aarray); VecGetArray2d(*vindb,localsize,n2,0,0,&barray); for(j=0;j<localsize;j++){ for(i=0;i<n2;i++){ barray[j][i] = aarray[i][j]; } } VecRestoreArray2d(*vinda,n2,localsize,0,0,&aarray); VecRestoreArray2d(*vindb,localsize,n2,0,0,&barray); vindc = vindb; vindb = vinda; vinda = vindc; //////////////////////////////////////////////////////////////////////////////////////////////////// // FFT part if(doFFT==1){FFT2D(*vinda,*vindb, localsize, n, Istart,Iend, iter,doSmoothing);} //////////////////////////////////////////////////////////////////////////////////////////////////// /* if(rank==0){ sprintf(ftimelog, "timelog_%d_%d",n,iter); fidtimelog = fopen(ftimelog,"w"); for(i=0;i<timelogcount;i++){ fprintf(fidtimelog,"%f ",Timelog[i]); } fprintf(fidtimelog,"\n "); for(j = 1;j<size;j++){ MPI_Recv(Timelog,timelogcount,MPI_DOUBLE,j,j,PETSC_COMM_WORLD,&status); for(i=0;i<timelogcount;i++){ fprintf(fidtimelog,"%f ",Timelog[i]); } fprintf(fidtimelog,"\n "); } fclose(fidtimelog); }else{ MPI_Send(Timelog ,timelogcount,MPI_DOUBLE,0,rank,PETSC_COMM_WORLD); } PetscFClose(PETSC_COMM_WORLD,fidoutput); */ /////////////////////////////////////////////////////////////////////////// if(withMatlab==1){ VecView(*vinda,socketviewer); PetscScalarView(iter,Mixnorm,socketviewer); } // free(x0array); free(Mixnorm); free(cacheInt); free(cacheScalar); ierr = VecDestroy(x0);CHKERRQ(ierr); ierr = VecDestroy(x);CHKERRQ(ierr); PetscPrintf(PETSC_COMM_WORLD,"Done!"); ////////////////////////////////////////////////////////////////////////////////////// ierr = PetscFinalize();CHKERRQ(ierr);
PetscErrorCode MatPtAPNumeric_SeqAIJ_SeqAIJ_SparseAxpy(Mat A,Mat P,Mat C) { PetscErrorCode ierr; Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data; Mat_SeqAIJ *p = (Mat_SeqAIJ *) P->data; Mat_SeqAIJ *c = (Mat_SeqAIJ *) C->data; const PetscInt *ai=a->i,*aj=a->j,*pi=p->i,*pj=p->j,*ci=c->i,*cj=c->j; const PetscScalar *aa=a->a,*pa=p->a,*pval; const PetscInt *apj,*pcol,*cjj; const PetscInt am=A->rmap->N,cm=C->rmap->N; PetscInt i,j,k,anz,apnz,pnz,prow,crow,apcol,nextap; PetscScalar *apa,*ca=c->a,*caj,pvalj; Mat_PtAP *ptap = c->ptap; #if defined(PROFILE_MatPtAPNumeric) PetscLogDouble t0,tf,time_Cseq0=0.0,time_Cseq1=0.0; PetscInt flops0=0,flops1=0; #endif PetscFunctionBegin; /* Get temporary array for storage of one row of A*P */ apa = ptap->apa; /* Clear old values in C */ ierr = PetscMemzero(ca,ci[cm]*sizeof(MatScalar));CHKERRQ(ierr); for (i=0;i<am;i++) { /* Form sparse row of AP[i,:] = A[i,:]*P */ #if defined(PROFILE_MatPtAPNumeric) ierr = PetscGetTime(&t0);CHKERRQ(ierr); #endif anz = ai[i+1] - ai[i]; apnz = 0; for (j=0; j<anz; j++) { prow = aj[j]; pnz = pi[prow+1] - pi[prow]; pcol = pj + pi[prow]; pval = pa + pi[prow]; for (k=0; k<pnz; k++) { apa[pcol[k]] += aa[j]*pval[k]; } ierr = PetscLogFlops(2.0*pnz);CHKERRQ(ierr); #if defined(PROFILE_MatPtAPNumeric) flops0 += 2.0*pnz; #endif } aj += anz; aa += anz; #if defined(PROFILE_MatPtAPNumeric) ierr = PetscGetTime(&tf);CHKERRQ(ierr); time_Cseq0 += tf - t0; #endif /* Compute P^T*A*P using outer product P[i,:]^T*AP[i,:]. */ #if defined(PROFILE_MatPtAPNumeric) ierr = PetscGetTime(&t0);CHKERRQ(ierr); #endif apj = ptap->apj + ptap->api[i]; apnz = ptap->api[i+1] - ptap->api[i]; pnz = pi[i+1] - pi[i]; pcol = pj + pi[i]; pval = pa + pi[i]; /* Perform sparse axpy */ for (j=0; j<pnz; j++) { crow = pcol[j]; cjj = cj + ci[crow]; caj = ca + ci[crow]; pvalj = pval[j]; nextap = 1; apcol = apj[nextap]; for (k=0; nextap<apnz; k++) { if (cjj[k] == apcol) { caj[k] += pvalj*apa[apcol]; apcol = apj[nextap++]; } } ierr = PetscLogFlops(2.0*apnz);CHKERRQ(ierr); #if defined(PROFILE_MatPtAPNumeric) flops1 += 2.0*apnz; #endif } #if defined(PROFILE_MatPtAPNumeric) ierr = PetscGetTime(&tf);CHKERRQ(ierr); time_Cseq1 += tf - t0; #endif /* Zero the current row info for A*P */ for (j=0; j<apnz; j++) { apcol = apj[j]; apa[apcol] = 0.; } } /* Assemble the final matrix and clean up */ ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); #if defined(PROFILE_MatPtAPNumeric) printf("MatPtAPNumeric_SeqAIJ_SeqAIJ_SparseAxpy time %g + %g, flops %d %d\n",time_Cseq0,time_Cseq1,flops0,flops1); #endif PetscFunctionReturn(0); }
int main ( int argc, char* argv[] ) { /* parse command line arguments */ std::string anArg; std::string modelRoot; #if defined(USE_MPI) || defined(USE_MPI_PARPROC) || defined(USE_MPI_REGSOIL) || \ defined(USE_MPI_GEMS) || defined(USE_MPI_KRC) int nb_ddc=0; //number of cores for DDC related processes #endif for( int i = 1; i < argc; i++ ) { anArg = std::string( argv[i] ); if( anArg == "--help" || anArg == "-h") { std::cout << "Usage: ogs [MODEL_ROOT] [OPTIONS]\n" << "Where OPTIONS are:\n" << " -h [--help] print this message and exit\n" << " -b [--build-info] print build info and exit\n" << " --output-directory DIR put output files into DIR\n" << " --version print ogs version and exit" << "\n"; continue; } if( anArg == "--build-info" || anArg == "-b" ) { std::cout << "ogs version: " << BuildInfo::OGS_VERSION << "\n" << "ogs date: " << BuildInfo::OGS_DATE << "\n"; std::cout << "git commit info: " << BuildInfo::GIT_COMMIT_INFO << "\n"; std::cout << "build timestamp: " << BuildInfo::BUILD_TIMESTAMP << "\n"; continue; } if( anArg == "--version" ) { std::cout << BuildInfo::OGS_VERSION << "\n"; continue; } if( anArg == "--model-root" || anArg == "-m" ) { if (i+1 >= argc) { std::cerr << "Error: Parameter " << anArg << " needs an additional argument" << std::endl; std::exit(EXIT_FAILURE); } modelRoot = std::string( argv[++i] ); continue; } if (anArg == "--output-directory") { if (i+1 >= argc) { std::cerr << "Error: Parameter " << anArg << " needs an additional argument" << std::endl; std::exit(EXIT_FAILURE); } std::string path = argv[++i]; if (! path.empty()) defaultOutputPath = path; continue; } #if defined(USE_MPI) || defined(USE_MPI_PARPROC) || defined(USE_MPI_REGSOIL) || \ defined(USE_MPI_GEMS) || defined(USE_MPI_KRC) std::string decompositions; if( anArg == "--domain-decomposition" || anArg == "-ddc" ) { decompositions = std::string( argv[++i] ); nb_ddc = atoi(decompositions.c_str()); continue; } #endif // anything left over must be the model root, unless already found if ( modelRoot == "" ) modelRoot = std::string( argv[i] ); } // end of parse argc loop if( argc > 1 && modelRoot == "" ) // non-interactive mode and no model given exit(0); // e.g. just wanted the build info std::string solver_pkg_name = BuildInfo::SOLVER_PACKAGE_NAME; // No default linear solver package is in use. if(solver_pkg_name.find("Default") == std::string::npos) { std::cout << "\nWarning: " << solver_pkg_name << " other than the OGS default one is in use." <<std::endl; std::cout << " The solver setting may need to be adjusted for the solution accuracy!" << std::endl; } char* dateiname(NULL); #ifdef SUPERCOMPUTER // ********************************************************************* // buffered output ... important for performance on cray // (unbuffered output is limited to 10 bytes per second) // [email protected] 11.10.2007 char buf[1024 * 1024]; int bsize; bsize = 1024 * 1024; // question: what happens if buffer is full? // according to documentation the buffer is flushed when full. // If we have a lot of output, increasing buffer is usefull. if(bsize > 0) // bufstd = malloc(bsize); setvbuf(stdout, buf, _IOFBF, bsize); //********************************************************************** #endif /*---------- MPI Initialization ----------------------------------*/ #if defined(USE_MPI) || defined(USE_MPI_PARPROC) || defined(USE_MPI_REGSOIL) || \ defined(USE_MPI_GEMS) || defined(USE_MPI_KRC) printf("Before MPI_Init\n"); #if defined(USE_MPI_GEMS) int prov; MPI_Init_thread(&argc,&argv,MPI_THREAD_FUNNELED, &prov); #else MPI_Init(&argc,&argv); #endif MPI_Barrier (MPI_COMM_WORLD); // 12.09.2007 WW elapsed_time_mpi = -MPI_Wtime(); // 12.09.2007 WW bool splitcomm_flag; int np; MPI_Comm_size(MPI_COMM_WORLD, &np); splitcomm_flag = SplitMPI_Communicator::CreateCommunicator(MPI_COMM_WORLD, np, nb_ddc); time_ele_paral = 0.0; #endif /*---------- MPI Initialization ----------------------------------*/ #ifdef USE_PETSC int rank, r_size; PetscLogDouble v1,v2; char help[] = "OGS with PETSc \n"; //PetscInitialize(argc, argv, help); PetscInitialize(&argc,&argv,(char *)0,help); //kg44 quick fix to compile PETSC with version PETSCV3.4 #ifdef USEPETSC34 PetscTime(&v1); #else PetscGetTime(&v1); #endif MPI_Comm_rank(PETSC_COMM_WORLD, &rank); MPI_Comm_size(PETSC_COMM_WORLD, &r_size); PetscSynchronizedPrintf(PETSC_COMM_WORLD, "===\nUse PETSc solver"); PetscSynchronizedPrintf(PETSC_COMM_WORLD, "Number of CPUs: %d, rank: %d\n", r_size, rank); #endif /*---------- LIS solver -----------------------------------------*/ #ifdef LIS //Initialization of the lis solver. lis_initialize(&argc, &argv); #endif /*========================================================================*/ /* Kommunikation mit Betriebssystem */ /* Timer fuer Gesamtzeit starten */ #ifdef TESTTIME TStartTimer(0); #endif /* Intro ausgeben */ #if defined(USE_MPI) //WW if(myrank == 0) #endif #ifdef USE_PETSC if(rank == 0 ) #endif DisplayStartMsg(); /* Speicherverwaltung initialisieren */ if (!InitMemoryTest()) { DisplayErrorMsg("Fehler: Speicherprotokoll kann nicht erstellt werden!"); DisplayErrorMsg(" Programm vorzeitig beendet!"); return 1; // LB changed from 0 to 1 because 0 is indicating success } if( argc == 1 ) // interactive mode dateiname = ReadString(); else // non-interactive mode { if ( argc == 2 ) // a model root was supplied { dateiname = (char*) Malloc((int)strlen(argv[1]) + 1); dateiname = strcpy(dateiname,argv[1]); } else // several args supplied if( modelRoot != "") { dateiname = (char*) Malloc( (int) modelRoot.size() + 1 ); dateiname = strcpy( dateiname, modelRoot.c_str() ); } DisplayMsgLn(dateiname); } //WW DisplayMsgLn(""); //WW DisplayMsgLn(""); // ----------23.02.2009. WW----------------- // LB Check if file exists std::string tmpFilename = dateiname; tmpFilename.append(".pcs"); if(!IsFileExisting(tmpFilename)) { std::cout << " Error: Cannot find file " << dateiname << "\n"; return 1; } // If no option is given, output files are placed in the same directory as the input files if (defaultOutputPath.empty()) defaultOutputPath = pathDirname(std::string(dateiname)); FileName = dateiname; size_t indexChWin, indexChLinux; indexChWin = indexChLinux = 0; indexChWin = FileName.find_last_of('\\'); indexChLinux = FileName.find_last_of('/'); // if(indexChWin != std::string::npos) FilePath = FileName.substr(0,indexChWin) + "\\"; else if(indexChLinux != std::string::npos) FilePath = FileName.substr(0,indexChLinux) + "/"; // ---------------------------WW Problem* aproblem = new Problem(dateiname); #ifdef USE_PETSC aproblem->setRankandSize(rank, r_size); #endif #if defined(USE_MPI) || defined(USE_MPI_PARPROC) || defined(USE_MPI_REGSOIL) || defined(USE_MPI_GEMS) || defined(USE_MPI_KRC) aproblem->setRankandSize(myrank, mysize); if (myrank != MPI_UNDEFINED) { #endif aproblem->Euler_TimeDiscretize(); delete aproblem; aproblem = NULL; #if defined(USE_MPI) || defined(USE_MPI_PARPROC) || defined(USE_MPI_REGSOIL) || defined(USE_MPI_GEMS) || defined(USE_MPI_KRC) } //sending killing signals to ranks of group_IPQC, only when the group exists if (splitcomm_flag == true){ int signal = -1, rank_IPQC, mysize_IPQC = np - nb_ddc; for (int i=0; i< mysize_IPQC; i++){ rank_IPQC = mysize + i; MPI_Send(&signal, 1, MPI_INT, rank_IPQC, 0, MPI_COMM_WORLD); } } #endif if(ClockTimeVec.size()>0) ClockTimeVec[0]->PrintTimes(); //CB time DestroyClockTime(); #ifdef TESTTIME #if defined(USE_MPI) if(myrank == 0) #endif #if defined(USE_PETSC) if(rank == 0) #endif std::cout << "Simulation time: " << TGetTimer(0) << "s" << "\n"; #endif /* Abspann ausgeben */ /*--------- MPI Finalize ------------------*/ #if defined(USE_MPI) || defined(USE_MPI_PARPROC) || defined(USE_MPI_REGSOIL) || defined(USE_MPI_KRC) elapsed_time_mpi += MPI_Wtime(); // 12.09.2007 WW std::cout << "\n *** Total CPU time of parallel modeling: " << elapsed_time_mpi << "\n"; //WW // Count CPU time of post time loop WW MPI_Finalize(); #endif /*--------- MPI Finalize ------------------*/ /*--------- LIS Finalize ------------------*/ #ifdef LIS lis_finalize(); #endif /*--------- LIS Finalize ------------------*/ free(dateiname); #ifdef USE_PETSC //kg44 quick fix to compile PETSC with version PETSCV3.4 #ifdef USEPETSC34 PetscTime(&v2); #else PetscGetTime(&v2); #endif PetscPrintf(PETSC_COMM_WORLD,"\t\n>>Total elapsed time by using PETSC:%f s\n",v2-v1); PetscFinalize(); #endif return 0; }
void PETScLinearSolver::Solver() { //TEST #ifdef TEST_MEM_PETSC PetscLogDouble mem1, mem2; PetscMemoryGetCurrentUsage(&mem1); #endif /* //TEST PetscViewer viewer; PetscViewerASCIIOpen(PETSC_COMM_WORLD, "x.txt", &viewer); PetscViewerPushFormat(viewer,PETSC_VIEWER_ASCII_MATLAB); PetscObjectSetName((PetscObject)x,"Solution"); VecView(x, viewer); */ int its; PetscLogDouble v1,v2; KSPConvergedReason reason; // #define PETSC34 //kg44 quick fix to compile PETSC with version PETSCV3.4 #ifdef USEPETSC34 PetscTime(&v1); #else PetscGetTime(&v1); #endif #if (PETSC_VERSION_MAJOR == 3) && (PETSC_VERSION_MINOR > 4) KSPSetOperators(lsolver, A, A); #else KSPSetOperators(lsolver, A, A, DIFFERENT_NONZERO_PATTERN); #endif KSPSolve(lsolver, b, x); KSPGetConvergedReason(lsolver,&reason); //CHKERRQ(ierr); if (reason==KSP_DIVERGED_INDEFINITE_PC) { PetscPrintf(PETSC_COMM_WORLD,"\nDivergence because of indefinite preconditioner;\n"); PetscPrintf(PETSC_COMM_WORLD,"Run the executable again but with -pc_factor_shift_positive_definite option.\n"); } else if (reason<0) { PetscPrintf(PETSC_COMM_WORLD,"\nOther kind of divergence: this should not happen.\n"); } else { const char *slv_type; const char *prc_type; KSPGetType(lsolver, &slv_type); PCGetType(prec, &prc_type); PetscPrintf(PETSC_COMM_WORLD,"\n================================================"); PetscPrintf(PETSC_COMM_WORLD, "\nLinear solver %s with %s preconditioner", slv_type, prc_type); KSPGetIterationNumber(lsolver,&its); //CHKERRQ(ierr); PetscPrintf(PETSC_COMM_WORLD,"\nConvergence in %d iterations.\n",(int)its); PetscPrintf(PETSC_COMM_WORLD,"\n================================================"); } PetscPrintf(PETSC_COMM_WORLD,"\n"); //VecAssemblyBegin(x); //VecAssemblyEnd(x); //kg44 quick fix to compile PETSC with version PETSCV3.4 #ifdef USEPETSC34 PetscTime(&v2); #else PetscGetTime(&v2); #endif time_elapsed += v2-v1; #define aTEST_OUT #ifdef TEST_OUT //TEST PetscViewer viewer; PetscViewerASCIIOpen(PETSC_COMM_WORLD, "x2.txt", &viewer); PetscViewerPushFormat(viewer,PETSC_VIEWER_ASCII_MATLAB); PetscObjectSetName((PetscObject)A,"Matrix"); MatView(A, viewer); PetscObjectSetName((PetscObject)x,"Solution"); VecView(x, viewer); PetscObjectSetName((PetscObject)b,"RHS"); VecView(b, viewer); VecDestroy(&b); VecDestroy(&x); MatDestroy(&A); if(lsolver) KSPDestroy(&lsolver); // if(prec) PCDestroy(&prec); if(global_x0) delete [] global_x0; if(global_x1) delete [] global_x1; PetscFinalize(); exit(0); #endif #ifdef TEST_MEM_PETSC //TEST PetscMemoryGetCurrentUsage(&mem2); PetscPrintf(PETSC_COMM_WORLD, "###Memory usage by solver. Before :%f After:%f Increase:%d\n", mem1, mem2, (int)(mem2 - mem1)); #endif }
/* This function returns the suitable solver for pressure. linear system */ KSP Solver_get_pressure_solver(Mat lhs, Parameters *params) { KSP solver; PC pc; double rtol; int ierr; PetscLogDouble T1, T2; rtol = params->resmax; ierr = KSPCreate(PETSC_COMM_WORLD, &solver); PETScErrAct(ierr); short int hasnullspace = NO; if (hasnullspace) { MatNullSpace nullsp; MatNullSpaceCreate(PETSC_COMM_WORLD, PETSC_TRUE, 0, PETSC_NULL, &nullsp); KSPSetNullSpace(solver, nullsp); //MatNullSpaceDestroy(nullsp); } ierr = KSPSetOperators(solver, lhs, lhs, SAME_PRECONDITIONER); PETScErrAct(ierr); ierr = KSPSetType(solver, KSPGMRES); PETScErrAct(ierr); ierr = KSPGMRESSetRestart(solver, 20); PETScErrAct(ierr); ierr = KSPSetTolerances(solver, rtol, PETSC_DEFAULT, PETSC_DEFAULT, 300); PETScErrAct(ierr); ierr = KSPGetPC(solver, &pc); PETScErrAct(ierr); //PCSetType(pc, PCNONE); //PCSetType(pc, PCASM); PCSetType(pc, PCHYPRE); PCHYPRESetType(pc,"boomeramg"); ierr = PetscOptionsSetValue("-pc_hypre_boomeramg_max_levels", "25"); PETScErrAct(ierr); ierr = PetscOptionsSetValue("-pc_hypre_boomeramg_strong_threshold", "0.0"); PETScErrAct(ierr); ierr = PetscOptionsSetValue("-pc_hypre_boomeramg_relax_type_all", "SOR/Jacobi"); PETScErrAct(ierr); //ierr = PetscOptionsSetValue("-pc_hypre_boomeramg_cycle_type", ""); PETScErrAct(ierr); //ierr = PetscOptionsSetValue("-pc_hypre_boomeramg_cycle_type", "V"); PETScErrAct(ierr); //ierr = PetscOptionsSetValue("-pc_hypre_boomeramg_coarsen_type", "PMIS"); PETScErrAct(ierr); //ierr = PetscOptionsSetValue("-pc_hypre_boomeramg_truncfactor", "0.9"); PETScErrAct(ierr); /*******************************************************************************************************/ /*******************************************************************************************************/ /*******************************************************************************************************/ /* Hypre-Petsc Interface. The most important parameters to be set are 1- Strong Threshold 2- Truncation Factor 3- Coarsennig Type */ /* Between 0 to 1 */ /* "0 "gives better convergence rate (in 3D). */ /* Suggested values (By Hypre manual): 0.25 for 2D, 0.5 for 3D ierr = PetscOptionsSetValue("-pc_hypre_boomeramg_strong_threshold", "0.0"); PETScErrAct(ierr); */ /*******************************************************************************************************/ /* Available Options: "CLJP","Ruge-Stueben","modifiedRuge-Stueben","Falgout", "PMIS", "HMIS" Falgout is usually the best. ierr = PetscOptionsSetValue("-pc_hypre_boomeramg_coarsen_type", "Falgout"); PETScErrAct(ierr); */ /*******************************************************************************************************/ /* Availble options: "local", "global" ierr = PetscOptionsSetValue("-pc_hypre_boomeramg_measure_type", "local"); PETScErrAct(ierr); */ /*******************************************************************************************************/ /* Availble options: Jacobi,sequential-Gauss-Seidel, SOR/Jacobi,backward-SOR/Jacobi,symmetric-SOR/Jacobi,Gaussian-elimination Important: If you are using a symmetric KSP solver (like CG), you should use a symmetric smoother here. ierr = PetscOptionsSetValue("-pc_hypre_boomeramg_relax_type_all", "symmetric-SOR/Jacobi"); PETScErrAct(ierr); */ /*******************************************************************************************************/ /* Available options: "V", "W" ierr = PetscOptionsSetValue("-pc_hypre_boomeramg_cycle_type", "V"); PETScErrAct(ierr); */ /*******************************************************************************************************/ /* Availble options: "classical", "", "", "direct", "multipass", "multipass-wts", "ext+i", "ext+i-cc", "standard", "standard-wts", "", "", "FF", "FF1" ierr = PetscOptionsSetValue("-pc_hypre_boomeramg_interp_type", ""); PETScErrAct(ierr); */ /*******************************************************************************************************/ /* Available options: Greater than zero. Use zero for the best convergence. However, if you have memory problems, use greate than zero to save some memory. ierr = PetscOptionsSetValue("-pc_hypre_boomeramg_truncfactor", "0.0"); PETScErrAct(ierr); */ /* Preconditioner Generation Options PCSetType(pc,PCHYPRE) or -pc_type hypre -pc_hypre_boomeramg_max_levels nmax -pc_hypre_boomeramg_truncfactor -pc_hypre_boomeramg_strong_threshold -pc_hypre_boomeramg_max_row_sum -pc_hypre_boomeramg_no_CF -pc_hypre_boomeramg_coarsen_type CLJP,Ruge-Stueben,modifiedRuge-Stueben, -pc_hypre_boomeramg_measure_type local,global Preconditioner Iteration Options -pc_hypre_boomeramg_relax_type_all Jacobi,sequential-Gauss-Seidel, SOR/Jacobi,backward-SOR/Jacobi,symmetric-SOR/Jacobi,Gaussian-eliminat -pc_hypre_boomeramg_relax_type_fine -pc_hypre_boomeramg_relax_type_down -pc_hypre_boomeramg_relax_type_up -pc_hypre_boomeramg_relax_weight_all r -pc_hypre_boomeramg_outer_relax_weight_all r -pc_hypre_boomeramg_grid_sweeps_down n -pc_hypre_boomeramg_grid_sweeps_up n -pc_hypre_boomeramg_grid_sweeps_coarse n -pc_hypre_boomeramg_tol tol -pc_hypre_boomeramg_max_iter it */ /* //ierr = PCSetType(pc, PCASM); PETScErrAct(ierr); ierr = PCSetType(pc, PCNONE); PETScErrAct(ierr); //ierr = PCSetType(pc, PCILU); PETScErrAct(ierr); //ierr = PCSetType(pc, PCBJACOBI); PETScErrAct(ierr); //ierr = PCSetType(pc, PCLU); PETScErrAct(ierr); //ierr = PCSetType(pc, PCEISENSTAT); PETScErrAct(ierr); //ierr = PCSetType(pc, PCSOR); PETScErrAct(ierr); //ierr = PCSetType(pc, PCJACOBI); PETScErrAct(ierr); //ierr = PCSetType(pc, PCNONE); PETScErrAct(ierr); */ /* ierr = KSPGetPC(solver, &pc); PETScErrAct(ierr); ierr = PCSetType(pc, PCILU); PETScErrAct(ierr); ierr = PCFactorSetLevels(pc, 3); PETScErrAct(ierr); //ierr = PCFactorSetUseDropTolerance(pc, 1e-3, .1, 50); PETScErrAct(ierr); //ierr = PCFactorSetFill(pc, 30.7648); PETScErrAct(ierr); ierr = PCFactorSetReuseOrdering(pc, PETSC_TRUE); PETScErrAct(ierr); ierr = PCFactorSetReuseFill(pc, PETSC_TRUE); PETScErrAct(ierr); ierr = PCFactorSetAllowDiagonalFill(pc); PETScErrAct(ierr); //ierr = PCFactorSetUseInPlace(pc); PETScErrAct(ierr); */ ierr = KSPSetInitialGuessNonzero(solver, PETSC_TRUE); PETScErrAct(ierr); ierr = PetscGetTime(&T1);PETScErrAct(ierr); ierr = KSPSetFromOptions(solver); PETScErrAct(ierr); ierr = PetscGetTime(&T2);PETScErrAct(ierr); PetscPrintf(PCW, "Setup time for the Pressure solver was:%f\n", (T2 - T1)); ierr = KSPView(solver, PETSC_VIEWER_STDOUT_WORLD); PETScErrAct(ierr); return solver; }
int main ( int argc, char* argv[] ) { /* parse command line arguments */ std::string anArg; std::string modelRoot; for( int i = 1; i < argc; i++ ) { anArg = std::string( argv[i] ); if( anArg == "--help" || anArg == "-h") { std::cout << "Usage: ogs [MODEL_ROOT] [OPTIONS]\n" << "Where OPTIONS are:\n" << " -h [--help] print this message and exit\n" << " -b [--build-info] print build info and exit\n" << " --version print ogs version and exit" << "\n"; continue; } if( anArg == "--build-info" || anArg == "-b" ) { std::cout << "ogs version: " << OGS_VERSION << "\n" << "ogs date: " << OGS_DATE << "\n"; #ifdef CMAKE_CMD_ARGS std::cout << "cmake command line arguments: " << CMAKE_CMD_ARGS << "\n"; #endif // CMAKE_CMD_ARGS #ifdef GIT_COMMIT_INFO std::cout << "git commit info: " << GIT_COMMIT_INFO << "\n"; #endif // GIT_COMMIT_INFO #ifdef SVN_REVISION std::cout << "subversion info: " << SVN_REVISION << "\n"; #endif // SVN_REVISION #ifdef BUILD_TIMESTAMP std::cout << "build timestamp: " << BUILD_TIMESTAMP << "\n"; #endif // BUILD_TIMESTAMP continue; } if( anArg == "--version" ) { std::cout << OGS_VERSION << "\n"; continue; } if( anArg == "--model-root" || anArg == "-m" ) { modelRoot = std::string( argv[++i] ); continue; } // anything left over must be the model root, unless already found if ( modelRoot == "" ) modelRoot = std::string( argv[i] ); } // end of parse argc loop if( argc > 1 && modelRoot == "" ) // non-interactive mode and no model given exit(0); // e.g. just wanted the build info char* dateiname(NULL); #ifdef SUPERCOMPUTER // ********************************************************************* // buffered output ... important for performance on cray // (unbuffered output is limited to 10 bytes per second) // [email protected] 11.10.2007 char buf[1024 * 1024]; int bsize; bsize = 1024 * 1024; // question: what happens if buffer is full? // according to documentation the buffer is flushed when full. // If we have a lot of output, increasing buffer is usefull. if(bsize > 0) // bufstd = malloc(bsize); setvbuf(stdout, buf, _IOFBF, bsize); //********************************************************************** #endif /*---------- MPI Initialization ----------------------------------*/ #if defined(USE_MPI) || defined(USE_MPI_PARPROC) || defined(USE_MPI_REGSOIL) || \ defined(USE_MPI_GEMS) || defined(USE_MPI_KRC) printf("Before MPI_Init\n"); #if defined(USE_MPI_GEMS) int prov; MPI_Init_thread(&argc,&argv,MPI_THREAD_FUNNELED, &prov); #else MPI_Init(&argc,&argv); #endif MPI_Barrier (MPI_COMM_WORLD); // 12.09.2007 WW elapsed_time_mpi = -MPI_Wtime(); // 12.09.2007 WW MPI_Comm_size(MPI_COMM_WORLD,&mysize); MPI_Comm_rank(MPI_COMM_WORLD,&myrank); std::cout << "After MPI_Init myrank = " << myrank << '\n'; time_ele_paral = 0.0; #endif /*---------- MPI Initialization ----------------------------------*/ #ifdef USE_PETSC int rank, r_size; PetscLogDouble v1,v2; char help[] = "OGS with PETSc \n"; //PetscInitialize(argc, argv, help); PetscInitialize(&argc,&argv,(char *)0,help); //kg44 quick fix to compile PETSC with version PETSCV3.4 #ifdef USEPETSC34 PetscTime(&v1); #else PetscGetTime(&v1); #endif MPI_Comm_rank(PETSC_COMM_WORLD, &rank); MPI_Comm_size(PETSC_COMM_WORLD, &r_size); PetscSynchronizedPrintf(PETSC_COMM_WORLD, "===\nUse PETSc solver"); PetscSynchronizedPrintf(PETSC_COMM_WORLD, "Number of CPUs: %d, rank: %d\n", r_size, rank); #endif /*---------- LIS solver -----------------------------------------*/ #ifdef LIS //Initialization of the lis solver. lis_initialize(&argc, &argv); #endif /*========================================================================*/ /* Kommunikation mit Betriebssystem */ /* Timer fuer Gesamtzeit starten */ #ifdef TESTTIME TStartTimer(0); #endif /* Intro ausgeben */ #if defined(USE_MPI) //WW if(myrank == 0) #endif #ifdef USE_PETSC if(rank == 0 ) #endif DisplayStartMsg(); /* Speicherverwaltung initialisieren */ if (!InitMemoryTest()) { DisplayErrorMsg("Fehler: Speicherprotokoll kann nicht erstellt werden!"); DisplayErrorMsg(" Programm vorzeitig beendet!"); return 1; // LB changed from 0 to 1 because 0 is indicating success } if( argc == 1 ) // interactive mode dateiname = ReadString(); else // non-interactive mode { if ( argc == 2 ) // a model root was supplied { dateiname = (char*) Malloc((int)strlen(argv[1]) + 1); dateiname = strcpy(dateiname,argv[1]); } else // several args supplied if( modelRoot != "") { dateiname = (char*) Malloc( (int) modelRoot.size() + 1 ); dateiname = strcpy( dateiname, modelRoot.c_str() ); } DisplayMsgLn(dateiname); } //WW DisplayMsgLn(""); //WW DisplayMsgLn(""); // ----------23.02.2009. WW----------------- // LB Check if file exists std::string tmpFilename = dateiname; tmpFilename.append(".pcs"); if(!IsFileExisting(tmpFilename)) { std::cout << " Error: Cannot find file " << dateiname << "\n"; return 1; } FileName = dateiname; size_t indexChWin, indexChLinux; indexChWin = indexChLinux = 0; indexChWin = FileName.find_last_of('\\'); indexChLinux = FileName.find_last_of('/'); // if(indexChWin != std::string::npos) FilePath = FileName.substr(0,indexChWin) + "\\"; else if(indexChLinux != std::string::npos) FilePath = FileName.substr(0,indexChLinux) + "/"; // ---------------------------WW Problem* aproblem = new Problem(dateiname); #ifdef USE_PETSC aproblem->setRankandSize(rank, r_size); #endif #if defined(USE_MPI) || defined(USE_MPI_PARPROC) || defined(USE_MPI_REGSOIL) || defined(USE_MPI_GEMS) || defined(USE_MPI_KRC) aproblem->setRankandSize(myrank, mysize); #endif aproblem->Euler_TimeDiscretize(); delete aproblem; aproblem = NULL; if(ClockTimeVec.size()>0) ClockTimeVec[0]->PrintTimes(); //CB time DestroyClockTime(); #ifdef TESTTIME #if defined(USE_MPI) if(myrank == 0) #endif #if defined(USE_PETSC) if(rank == 0) #endif std::cout << "Simulation time: " << TGetTimer(0) << "s" << "\n"; #endif /* Abspann ausgeben */ /*--------- MPI Finalize ------------------*/ #if defined(USE_MPI) || defined(USE_MPI_PARPROC) || defined(USE_MPI_REGSOIL) || defined(USE_MPI_KRC) elapsed_time_mpi += MPI_Wtime(); // 12.09.2007 WW std::cout << "\n *** Total CPU time of parallel modeling: " << elapsed_time_mpi << "\n"; //WW // Count CPU time of post time loop WW MPI_Finalize(); #endif /*--------- MPI Finalize ------------------*/ /*--------- LIS Finalize ------------------*/ #ifdef LIS lis_finalize(); #endif /*--------- LIS Finalize ------------------*/ free(dateiname); #ifdef USE_PETSC //kg44 quick fix to compile PETSC with version PETSCV3.4 #ifdef USEPETSC34 PetscTime(&v2); #else PetscGetTime(&v2); #endif PetscPrintf(PETSC_COMM_WORLD,"\t\n>>Total elapsed time by using PETSC:%f s\n",v2-v1); PetscFinalize(); #endif return 0; }
int main(int argc,char **args) { Vec u; Mat A; PetscErrorCode ierr; mv_MultiVectorPtr eigenvectors; PetscScalar * eigs; PetscScalar * eigs_hist; double * resid; double * resid_hist; int iterations; PetscMPIInt rank; int n_eigs = 1; int seed = 1; int i,j; PetscLogDouble t1,t2,elapsed_time; DA da; double tol=1e-08; PetscTruth option_present; PetscTruth freepart=PETSC_FALSE; PetscTruth full_output=PETSC_FALSE; PetscInt m,n,p; KSP ksp; lobpcg_Tolerance lobpcg_tol; int maxIt = 100; mv_InterfaceInterpreter ii; lobpcg_BLASLAPACKFunctions blap_fn; aux_data_struct aux_data; /* PetscViewer viewer; */ PetscInt tmp_int; mv_TempMultiVector * xe; PetscInt N; PetscScalar * xx; PetscInitialize(&argc,&args,(char *)0,help); ierr = PetscOptionsGetInt(PETSC_NULL,"-n_eigs",&tmp_int,&option_present);CHKERRQ(ierr); if (option_present) n_eigs = tmp_int; ierr = PetscOptionsGetReal(PETSC_NULL,"-tol", &tol,PETSC_NULL); CHKERRQ(ierr); ierr = PetscOptionsHasName(PETSC_NULL,"-freepart",&freepart); CHKERRQ(ierr); ierr = PetscOptionsHasName(PETSC_NULL,"-full_out",&full_output); CHKERRQ(ierr); ierr = PetscOptionsGetInt(PETSC_NULL,"-seed",&tmp_int,&option_present);CHKERRQ(ierr); if (option_present) seed = tmp_int; ierr = PetscOptionsGetInt(PETSC_NULL,"-itr",&tmp_int,&option_present);CHKERRQ(ierr); if (option_present) maxIt = tmp_int; if (seed<1) seed=1; /* we actually run our code twice: first time we solve small problem just to make sure that all program code is actually loaded into memory; then we solve the problem we are interested in; this trick is done for accurate timing */ PreLoadBegin(PETSC_TRUE,"grid and matrix assembly"); /* "create" the grid and stencil data; on first run we form small problem */ if (PreLoadIt==0) { /* small problem */ ierr=DACreate3d(PETSC_COMM_WORLD,DA_NONPERIODIC,DA_STENCIL_STAR,10,10,10, 1,PETSC_DECIDE,1,1,1,0,0,0,&da); CHKERRQ(ierr); } else { /* actual problem */ if (freepart) /* petsc determines partitioning */ { ierr=DACreate3d(PETSC_COMM_WORLD,DA_NONPERIODIC,DA_STENCIL_STAR,-10,-10,-10, PETSC_DECIDE,PETSC_DECIDE,PETSC_DECIDE,1,1,0,0,0,&da); CHKERRQ(ierr); } else /* (1,NP,1) partitioning */ { ierr=DACreate3d(PETSC_COMM_WORLD,DA_NONPERIODIC,DA_STENCIL_STAR,-10,-10,-10, 1,PETSC_DECIDE,1,1,1,0,0,0,&da); CHKERRQ(ierr); } /* now we print what partitioning is chosen */ ierr=DAGetInfo(da,PETSC_NULL,PETSC_NULL,PETSC_NULL,PETSC_NULL,&m, &n,&p,PETSC_NULL,PETSC_NULL,PETSC_NULL,PETSC_NULL); CHKERRQ(ierr); PetscPrintf(PETSC_COMM_WORLD,"Partitioning: %u %u %u\n",m,n,p); } /* create matrix, whose nonzero structure and probably partitioning corresponds to grid and stencil structure */ ierr=DAGetMatrix(da,MATMPIAIJ,&A); CHKERRQ(ierr); /* fill the matrix with values. I intend to build 7-pt Laplas */ /* this procedure includes matrix assembly */ ierr=FillMatrix(da,A); CHKERRQ(ierr); /* PetscViewerBinaryOpen(PETSC_COMM_WORLD,"matrix.dat",FILE_MODE_WRITE,&viewer); MatView(A,PETSC_VIEWER_STDOUT_WORLD); PetscViewerDestroy(viewer); */ /* Create parallel vectors. - We form 1 vector from scratch and then duplicate as needed. */ ierr = DACreateGlobalVector(da,&u); CHKERRQ(ierr); /* ierr = VecSetFromOptions(u);CHKERRQ(ierr); */ /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - Create the linear solver and set various options - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ /* Here we START measuring time for preconditioner setup */ PreLoadStage("preconditioner setup"); ierr = PetscGetTime(&t1);CHKERRQ(ierr); /* Create linear solver context */ ierr = KSPCreate(PETSC_COMM_WORLD,&ksp);CHKERRQ(ierr); /* Set operators. Here the matrix that defines the linear system also serves as the preconditioning matrix. */ ierr = KSPSetOperators(ksp,A,A,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); /* Set runtime options, e.g., -ksp_type <type> -pc_type <type> -ksp_monitor -ksp_rtol <rtol> These options will override those specified above as long as KSPSetFromOptions() is called _after_ any other customization routines. */ ierr = KSPSetFromOptions(ksp);CHKERRQ(ierr); /* probably this call actually builds the preconditioner */ ierr = KSPSetUp(ksp);CHKERRQ(ierr); /* Here we STOP measuring time for preconditioner setup */ PreLoadStage("solution"); ierr = PetscGetTime(&t2);CHKERRQ(ierr); elapsed_time=t2-t1; if (PreLoadIt==1) PetscPrintf(PETSC_COMM_WORLD,"Preconditioner setup, seconds: %f\n",elapsed_time); /* request memory for eig-vals */ ierr = PetscMalloc(sizeof(PetscScalar)*n_eigs,&eigs); CHKERRQ(ierr); /* request memory for eig-vals history */ ierr = PetscMalloc(sizeof(PetscScalar)*n_eigs*(maxIt+1),&eigs_hist); CHKERRQ(ierr); /* request memory for resid. norms */ ierr = PetscMalloc(sizeof(double)*n_eigs,&resid); CHKERRQ(ierr); /* request memory for resid. norms hist. */ ierr = PetscMalloc(sizeof(double)*n_eigs*(maxIt+1),&resid_hist); CHKERRQ(ierr); LOBPCG_InitRandomContext(); MPI_Comm_rank(PETSC_COMM_WORLD,&rank); PETSCSetupInterpreter( &ii ); eigenvectors = mv_MultiVectorCreateFromSampleVector(&ii, n_eigs,u); xe = (mv_TempMultiVector *) mv_MultiVectorGetData( eigenvectors ); /* VecView( (Vec)xe->vector[0],PETSC_VIEWER_STDOUT_WORLD); */ for (i=0; i<seed; i++) /* this cycle is to imitate changing random seed */ mv_MultiVectorSetRandom (eigenvectors, 1234); /* VecView( (Vec)xe->vector[0],PETSC_VIEWER_STDOUT_WORLD); */ VecGetSize( (Vec)xe->vector[0], &N ); N=mv_TempMultiVectorHeight( xe ); VecGetArray( (Vec)xe->vector[0],&xx); lobpcg_tol.absolute = tol; lobpcg_tol.relative = 1e-50; #ifdef PETSC_USE_COMPLEX blap_fn.zpotrf = PETSC_zpotrf_interface; blap_fn.zhegv = PETSC_zsygv_interface; #else blap_fn.dpotrf = PETSC_dpotrf_interface; blap_fn.dsygv = PETSC_dsygv_interface; #endif aux_data.A = A; aux_data.ksp = ksp; aux_data.ii = ii; /* Here we START measuring time for solution process */ ierr = PetscGetTime(&t1);CHKERRQ(ierr); #ifdef PETSC_USE_COMPLEX lobpcg_solve_complex( eigenvectors, /*input-initial guess of e-vectors */ &aux_data, /*input-matrix A */ OperatorAMultiVector, /*input-operator A */ NULL, /*input-matrix B */ NULL, /*input-operator B */ &aux_data, /*input-matrix T */ Precond_FnMultiVector, /*input-operator T */ NULL, /*input-matrix Y */ blap_fn, /*input-lapack functions */ lobpcg_tol, /*input-tolerances */ PreLoadIt? maxIt:1, /*input-max iterations */ !rank && PreLoadIt, /*input-verbosity level */ &iterations, /*output-actual iterations */ (komplex *) eigs, /*output-eigenvalues */ (komplex *) eigs_hist, /*output-eigenvalues history */ n_eigs, /*output-history global height */ resid, /*output-residual norms */ resid_hist , /*output-residual norms history */ n_eigs /*output-history global height */ ); #else lobpcg_solve_double( eigenvectors, &aux_data, OperatorAMultiVector, NULL, NULL, &aux_data, Precond_FnMultiVector, NULL, blap_fn, lobpcg_tol, PreLoadIt? maxIt:1, !rank && PreLoadIt, &iterations, eigs, /* eigenvalues; "lambda_values" should point to array containing <blocksize> doubles where <blocksize> is the width of multivector "blockVectorX" */ eigs_hist, /* eigenvalues history; a pointer to the entries of the <blocksize>-by-(<maxIterations>+1) matrix stored in fortran-style. (i.e. column-wise) The matrix may be a submatrix of a larger matrix, see next argument */ n_eigs, /* global height of the matrix (stored in fotran-style) specified by previous argument */ resid, /* residual norms; argument should point to array of <blocksize> doubles */ resid_hist , /* residual norms history; a pointer to the entries of the <blocksize>-by-(<maxIterations>+1) matrix stored in fortran-style. (i.e. column-wise) The matrix may be a submatrix of a larger matrix, see next argument */ n_eigs /* global height of the matrix (stored in fotran-style) specified by previous argument */ ); #endif /* Here we STOP measuring time for solution process */ ierr = PetscGetTime(&t2);CHKERRQ(ierr); elapsed_time=t2-t1; if (PreLoadIt) PetscPrintf(PETSC_COMM_WORLD,"Solution process, seconds: %e\n",elapsed_time); if (PreLoadIt && full_output) { PetscPrintf(PETSC_COMM_WORLD,"Output from LOBPCG, eigenvalues:\n"); for (i=0;i<n_eigs;i++) { ierr = PetscPrintf(PETSC_COMM_WORLD,"%e\n",PetscRealPart(eigs[i])); CHKERRQ(ierr); } PetscPrintf(PETSC_COMM_WORLD,"Output from LOBPCG, eigenvalues history:\n"); for (j=0; j<iterations+1; j++) for (i=0;i<n_eigs;i++) { ierr = PetscPrintf(PETSC_COMM_WORLD,"%e\n",PetscRealPart(*(eigs_hist+j*n_eigs+i))); CHKERRQ(ierr); } PetscPrintf(PETSC_COMM_WORLD,"Output from LOBPCG, residual norms:\n"); for (i=0;i<n_eigs;i++) { ierr = PetscPrintf(PETSC_COMM_WORLD,"%e\n",resid[i]); CHKERRQ(ierr); } PetscPrintf(PETSC_COMM_WORLD,"Output from LOBPCG, residual norms history:\n"); for (j=0; j<iterations+1; j++) for (i=0;i<n_eigs;i++) { ierr = PetscPrintf(PETSC_COMM_WORLD,"%e\n",*(resid_hist+j*n_eigs+i)); CHKERRQ(ierr); } } /* Free work space. All PETSc objects should be destroyed when they are no longer needed. */ ierr = VecDestroy(u);CHKERRQ(ierr); ierr = MatDestroy(A);CHKERRQ(ierr); ierr = KSPDestroy(ksp);CHKERRQ(ierr); ierr = DADestroy(da); CHKERRQ(ierr); LOBPCG_DestroyRandomContext(); mv_MultiVectorDestroy(eigenvectors); /* free memory used for eig-vals */ ierr = PetscFree(eigs); CHKERRQ(ierr); ierr = PetscFree(eigs_hist); CHKERRQ(ierr); ierr = PetscFree(resid); CHKERRQ(ierr); ierr = PetscFree(resid_hist); CHKERRQ(ierr); /* Always call PetscFinalize() before exiting a program. This routine - finalizes the PETSc libraries as well as MPI - provides summary and diagnostic information if certain runtime options are chosen (e.g., -log_summary). */ PreLoadEnd(); ierr = PetscFinalize();CHKERRQ(ierr); return 0; }
PetscErrorCode MatRARtSymbolic_SeqAIJ_SeqAIJ(Mat A,Mat R,PetscReal fill,Mat *C) { PetscErrorCode ierr; Mat P; PetscInt *rti,*rtj; Mat_RARt *rart; PetscContainer container; MatTransposeColoring matcoloring; ISColoring iscoloring; Mat Rt_dense,RARt_dense; PetscLogDouble GColor=0.0,MCCreate=0.0,MDenCreate=0.0,t0,tf,etime=0.0; Mat_SeqAIJ *c; PetscFunctionBegin; ierr = PetscGetTime(&t0);CHKERRQ(ierr); /* create symbolic P=Rt */ ierr = MatGetSymbolicTranspose_SeqAIJ(R,&rti,&rtj);CHKERRQ(ierr); ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,R->cmap->n,R->rmap->n,rti,rtj,PETSC_NULL,&P);CHKERRQ(ierr); /* get symbolic C=Pt*A*P */ ierr = MatPtAPSymbolic_SeqAIJ_SeqAIJ(A,P,fill,C);CHKERRQ(ierr); (*C)->rmap->bs = R->rmap->bs; (*C)->cmap->bs = R->rmap->bs; /* create a supporting struct */ ierr = PetscNew(Mat_RARt,&rart);CHKERRQ(ierr); /* attach the supporting struct to C */ ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); ierr = PetscContainerSetPointer(container,rart);CHKERRQ(ierr); ierr = PetscContainerSetUserDestroy(container,PetscContainerDestroy_Mat_RARt);CHKERRQ(ierr); ierr = PetscObjectCompose((PetscObject)(*C),"Mat_RARt",(PetscObject)container);CHKERRQ(ierr); ierr = PetscContainerDestroy(&container);CHKERRQ(ierr); ierr = PetscGetTime(&tf);CHKERRQ(ierr); etime += tf - t0; /* Create MatTransposeColoring from symbolic C=R*A*R^T */ c=(Mat_SeqAIJ*)(*C)->data; ierr = PetscGetTime(&t0);CHKERRQ(ierr); ierr = MatGetColoring(*C,MATCOLORINGLF,&iscoloring);CHKERRQ(ierr); ierr = PetscGetTime(&tf);CHKERRQ(ierr); GColor += tf - t0; ierr = PetscGetTime(&t0);CHKERRQ(ierr); ierr = MatTransposeColoringCreate(*C,iscoloring,&matcoloring);CHKERRQ(ierr); rart->matcoloring = matcoloring; ierr = ISColoringDestroy(&iscoloring);CHKERRQ(ierr); ierr = PetscGetTime(&tf);CHKERRQ(ierr); MCCreate += tf - t0; ierr = PetscGetTime(&t0);CHKERRQ(ierr); /* Create Rt_dense */ ierr = MatCreate(PETSC_COMM_SELF,&Rt_dense);CHKERRQ(ierr); ierr = MatSetSizes(Rt_dense,A->cmap->n,matcoloring->ncolors,A->cmap->n,matcoloring->ncolors);CHKERRQ(ierr); ierr = MatSetType(Rt_dense,MATSEQDENSE);CHKERRQ(ierr); ierr = MatSeqDenseSetPreallocation(Rt_dense,PETSC_NULL);CHKERRQ(ierr); Rt_dense->assembled = PETSC_TRUE; rart->Rt = Rt_dense; /* Create RARt_dense = R*A*Rt_dense */ ierr = MatCreate(PETSC_COMM_SELF,&RARt_dense);CHKERRQ(ierr); ierr = MatSetSizes(RARt_dense,(*C)->rmap->n,matcoloring->ncolors,(*C)->rmap->n,matcoloring->ncolors);CHKERRQ(ierr); ierr = MatSetType(RARt_dense,MATSEQDENSE);CHKERRQ(ierr); ierr = MatSeqDenseSetPreallocation(RARt_dense,PETSC_NULL);CHKERRQ(ierr); rart->RARt = RARt_dense; /* Allocate work array to store columns of A*R^T used in MatMatMatMultNumeric_SeqAIJ_SeqAIJ_SeqDense() */ ierr = PetscMalloc(A->rmap->n*4*sizeof(PetscScalar),&rart->work);CHKERRQ(ierr); ierr = PetscGetTime(&tf);CHKERRQ(ierr); MDenCreate += tf - t0; rart->destroy = (*C)->ops->destroy; (*C)->ops->destroy = MatDestroy_SeqAIJ_RARt; /* clean up */ ierr = MatRestoreSymbolicTranspose_SeqAIJ(R,&rti,&rtj);CHKERRQ(ierr); ierr = MatDestroy(&P);CHKERRQ(ierr); #if defined(PETSC_USE_INFO) { PetscReal density= (PetscReal)(c->nz)/(RARt_dense->rmap->n*RARt_dense->cmap->n); ierr = PetscInfo6(*C,"RARt_den %D %D; Rt_den %D %D, (RARt->nz %D)/(m*ncolors)=%g\n",RARt_dense->rmap->n,RARt_dense->cmap->n,Rt_dense->rmap->n,Rt_dense->cmap->n,c->nz,density);CHKERRQ(ierr); ierr = PetscInfo5(*C,"Sym = GetColor %g + MColorCreate %g + MDenCreate %g + other %g = %g\n",GColor,MCCreate,MDenCreate,etime,GColor+MCCreate+MDenCreate+etime);CHKERRQ(ierr); } #endif PetscFunctionReturn(0); }
/* S^{-1} = ( G^T G )^{-1} G^T K G ( G^T G )^{-1} = A C A S^{-T} = A^T (A C)^T = A^T C^T A^T, but A = G^T G which is symmetric = A C^T A = A G^T ( G^T K )^T A = A G^T K^T G A */ PetscErrorCode BSSCR_PCApplyTranspose_ScGtKG( PC pc, Vec x, Vec y ) { PC_SC_GtKG ctx = (PC_SC_GtKG)pc->data; KSP ksp; Mat F, Bt; Vec s,t,X; PetscLogDouble t0,t1; ksp = ctx->ksp_BBt; F = ctx->F; Bt = ctx->Bt; s = ctx->s; t = ctx->t; X = ctx->X; /* Apply scaled Poisson operator */ /* scale x */ /* ======================================================== NOTE: I THINK TO OMIT THESE AS WE WANT TO UNSCALE THE PRECONDITIONER AS S IN THIS CASE IS NOT SCALED ======================================================== */ // VecPointwiseDivide( x, x, ctx->X2 ); /* x <- x/X2 */ /* NEED TO BE SURE */ if( ctx->BBt_has_cnst_nullspace == PETSC_TRUE ) { BSSCR_VecRemoveConstNullspace( x, PETSC_NULL ); } PetscGetTime(&t0); KSPSolveTranspose( ksp, x, t ); /* t <- GtG_inv x */ PetscGetTime(&t1); if (ctx->monitor_activated) { BSSCR_PCScBFBTSubKSPMonitor(ksp,1,(t1-t0)); } /* Apply Bt */ MatMult( Bt, t, s ); /* s <- G t */ VecPointwiseMult( s, s, ctx->X1 ); /* s <- s * X1 */ /* Apply F */ VecPointwiseMult( s, s, ctx->Y1 ); /* s <- s * Y1 */ MatMultTranspose( F, s, X ); /* X <- K s */ VecPointwiseMult( X, X, ctx->X1 ); /* X <- X * X1 */ /* Apply B */ VecPointwiseMult( X, X, ctx->Y1 ); /* X <- X * Y1 */ MatMultTranspose( Bt, X, t ); /* t <- Gt X */ if( ctx->BBt_has_cnst_nullspace == PETSC_TRUE ) { BSSCR_VecRemoveConstNullspace( t, PETSC_NULL ); } PetscGetTime(&t0); KSPSolveTranspose( ksp, t, y ); /* y <- GtG_inv t */ PetscGetTime(&t1); if (ctx->monitor_activated) { BSSCR_PCScBFBTSubKSPMonitor(ksp,2,(t1-t0)); } VecPointwiseMult( y, y, ctx->Y2 ); /* y <- y/Y2 */ /* undo modification made to x on entry */ // VecPointwiseMult( x, x, ctx->X2 ); /* x <- x/X2 */ /* NEED TO BE SURE */ PetscFunctionReturn(0); }
int main(int argc,char **args) { /*PETSc Mat Object */ Mat pMat; /* Input matrix market file and output PETSc binary file */ char inputFile[128],outputFile[128],buf[128]; /* number rows, columns, non zeros etc */ int i, m,n,nnz,ierr,col,row; /*We compute no of nozeros per row for PETSc Mat object pre-allocation*/ int *nnzPtr; /*Maximum nonzero in nay row */ int maxNNZperRow=0; /*Row number containing max non zero elements */ int maxRowNum = 0; /*Just no of comments that will be ignore during successive read of file */ int numComments=0; /* This is variable of type double */ PetscScalar val; /*File handle for read and write*/ FILE* file; /*File handle for writing nonzero elements distribution per row */ FILE *fileRowDist; /*PETSc Viewer is used for writing PETSc Mat object in binary format */ PetscViewer view; /*Just record time required for conversion */ PetscLogDouble t1,t2,elapsed_time; /*Initialise PETSc lib */ PetscInitialize(&argc,&args,(char *)0,PETSC_NULL); /* Just record time */ ierr = PetscGetTime(&t1); CHKERRQ(ierr); /*Get name of matrix market file from command line options and Open file*/ ierr = PetscOptionsGetString(PETSC_NULL,"-fin",inputFile,127,PETSC_NULL); ierr = PetscFOpen(PETSC_COMM_SELF,inputFile,"r",&file); /* Just count the comment lines in the file */ while(1) { fgets(buf,128,file); /*If line starts with %, its a comment */ if(buf[0] == '%') { printf("\n IGNORING COMMENT LINE : IGNORING...."); numComments++; } else { /*Set Pointer to Start of File */ fseek(file, 0, SEEK_SET ); int num = numComments; /* and just move pointer to the entry in the file which indicates row nums, col nums and non zero elements */ while(num--) fgets(buf,128,file); break; } } /*Reads size of sparse matrix from matrix market file */ fscanf(file,"%d %d %d\n",&m,&n,&nnz); printf ("ROWS = %d, COLUMNS = %d, NO OF NON-ZEROS = %d\n",m,n,nnz); /*Now we will calculate non zero elelments distribution per row */ nnzPtr = (int *) calloc (sizeof(int), m); /*This is similar to calculate histogram or frequency of elements in the array */ for (i=0; !feof(file); i++) { fscanf(file,"%d %d %le\n",&row,&col,&val); row = row-1; col = col-1 ; nnzPtr[row]++; } printf("\n ROW DISTRIBUTION CALCULATED....WRITING TO THE FILE..!"); fflush(stdout); /*Write row distribution to the file ROW_STR.dat */ fileRowDist = fopen ("ROW_DISTR.dat", "w"); for (i=0; i< m; i++) { fprintf(fileRowDist, "%d\t %d\n", i, nnzPtr[i]); /*Find max num of of nonzero for any row of the matrix and that row number */ if( maxNNZperRow < nnzPtr[i] ) { /*store max nonzero for any row*/ maxNNZperRow = nnzPtr[i]; /*row that contains max non zero elements*/ maxRowNum = i; } } /*Close File */ fclose(fileRowDist); printf("\n MAX NONZERO FOR ANY ROW ARE : %d & ROW NUM IS : %d", maxNNZperRow, maxRowNum ); /* Again set the file pointer the fist data record in matrix market file* * Note that we can directly move ponts with fseek, but as this is text file * we are simple reading line by line */ fseek(file, 0, SEEK_SET ); numComments++; while(numComments--) fgets(buf,128,file); /* Its important to pre-allocate memory by passing max non zero for any row in the matrix */ ierr = MatCreateSeqAIJ(PETSC_COMM_WORLD,m,n,maxNNZperRow,PETSC_NULL,&pMat); /* OR we can also pass row distribution of nozero elements for every row */ /* ierr = MatCreateSeqAIJ(PETSC_COMM_WORLD,m,n,0,nnzPtr,&pMat);*/ /*Now Set matrix elements values form matrix market file */ for (i=0; i<nnz; i++) { /*Read matrix element from matrix market file*/ fscanf(file,"%d %d %le\n",&row,&col,&val); /*In matrix market format, rows and columns starts from 1 */ row = row-1; col = col-1 ; /* For every non zero element,insert that value at row,col position */ ierr = MatSetValues(pMat,1,&row,1,&col,&val,INSERT_VALUES); } fclose(file); /*Matrix Read Complete */ ierr = PetscPrintf(PETSC_COMM_SELF,"\n MATRIX READ...DONE!"); /*Now assemeble the matrix */ ierr = MatAssemblyBegin(pMat,MAT_FINAL_ASSEMBLY); ierr = MatAssemblyEnd(pMat,MAT_FINAL_ASSEMBLY); /* Now open output file for writing into PETSc Binary FOrmat*/ ierr = PetscOptionsGetString(PETSC_NULL,"-fout",outputFile,127,PETSC_NULL);CHKERRQ(ierr); /*With the PETSc Viewer write output to File*/ ierr = PetscViewerBinaryOpen(PETSC_COMM_WORLD,outputFile,FILE_MODE_WRITE,&view);CHKERRQ(ierr); /*Matview will dump the Mat object to binary file */ ierr = MatView(pMat,view);CHKERRQ(ierr); /* Destroy the data structure */ ierr = PetscViewerDestroy(&view);CHKERRQ(ierr); ierr = MatDestroy(&pMat);CHKERRQ(ierr); /*Just for statistics*/ ierr = PetscGetTime(&t2);CHKERRQ(ierr); elapsed_time = t2 - t1; ierr = PetscPrintf(PETSC_COMM_SELF,"ELAPSE TIME: %g\n",elapsed_time);CHKERRQ(ierr); ierr = PetscFinalize();CHKERRQ(ierr); return 0; }
int main(int argc,char **argv) { PetscLogDouble x,y,z; int i,ierr; PetscScalar *A,*B; PetscInitialize(&argc,&argv,0,0); ierr = PetscMalloc(8000000*sizeof(PetscScalar),&A); CHKERRQ(ierr); ierr = PetscMalloc(8000000*sizeof(PetscScalar),&B); CHKERRQ(ierr); for (i=0; i<8000000; i++) { A[i] = i%61897; B[i] = i%61897; } /* To take care of paging effects */ ierr = PetscMemcpy(A,B,sizeof(PetscScalar)*0); CHKERRQ(ierr); ierr = PetscGetTime(&x); CHKERRQ(ierr); ierr = PetscGetTime(&x); CHKERRQ(ierr); /* PetscMemcpy(A,B,sizeof(PetscScalar)*8000000); PetscMemcpy(A,B,sizeof(PetscScalar)*8000000); PetscMemcpy(A,B,sizeof(PetscScalar)*8000000); PetscMemcpy(A,B,sizeof(PetscScalar)*8000000); PetscMemcpy(A,B,sizeof(PetscScalar)*8000000); PetscMemcpy(A,B,sizeof(PetscScalar)*8000000); PetscMemcpy(A,B,sizeof(PetscScalar)*8000000); PetscMemcpy(A,B,sizeof(PetscScalar)*8000000); PetscMemcpy(A,B,sizeof(PetscScalar)*8000000); PetscMemcpy(A,B,sizeof(PetscScalar)*8000000); */ { int j; for (j = 0; j<10; j++) { for (i=0; i<8000000; i++) { B[i] = A[i]; } } } ierr = PetscGetTime(&y); CHKERRQ(ierr); ierr = PetscMemcpy(A,B,sizeof(PetscScalar)*0); CHKERRQ(ierr); ierr = PetscMemcpy(A,B,sizeof(PetscScalar)*0); CHKERRQ(ierr); ierr = PetscMemcpy(A,B,sizeof(PetscScalar)*0); CHKERRQ(ierr); ierr = PetscMemcpy(A,B,sizeof(PetscScalar)*0); CHKERRQ(ierr); ierr = PetscMemcpy(A,B,sizeof(PetscScalar)*0); CHKERRQ(ierr); ierr = PetscMemcpy(A,B,sizeof(PetscScalar)*0); CHKERRQ(ierr); ierr = PetscMemcpy(A,B,sizeof(PetscScalar)*0); CHKERRQ(ierr); ierr = PetscMemcpy(A,B,sizeof(PetscScalar)*0); CHKERRQ(ierr); ierr = PetscMemcpy(A,B,sizeof(PetscScalar)*0); CHKERRQ(ierr); ierr = PetscMemcpy(A,B,sizeof(PetscScalar)*0); CHKERRQ(ierr); ierr = PetscGetTime(&z); CHKERRQ(ierr); fprintf(stdout,"%s : \n","PetscMemcpy"); fprintf(stdout," %-15s : %e MB/s\n","Bandwidth",10.0*8*8/(y-x)); fprintf(stdout," %-15s : %e sec\n","Latency",(z-y)/10.0); fprintf(stdout," %-15s : %e sec\n","Per PetscScalar",(2*y-x-z)/8000000.0); ierr = PetscFree(A); CHKERRQ(ierr); ierr = PetscFree(B); CHKERRQ(ierr); ierr = PetscFinalize(); CHKERRQ(ierr); PetscFunctionReturn(0); }
void PETSC_STDCALL petscgettime_(PetscLogDouble *t, int *__ierr ){ *__ierr = PetscGetTime(t); }
PetscErrorCode MatLUFactorNumeric_SuperLU_DIST(Mat F,Mat A,const MatFactorInfo *info) { Mat *tseq,A_seq = PETSC_NULL; Mat_SeqAIJ *aa,*bb; Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST*)(F)->spptr; PetscErrorCode ierr; PetscInt M=A->rmap->N,N=A->cmap->N,i,*ai,*aj,*bi,*bj,nz,rstart,*garray, m=A->rmap->n, colA_start,j,jcol,jB,countA,countB,*bjj,*ajj; int sinfo; /* SuperLU_Dist info flag is always an int even with long long indices */ PetscMPIInt size; SuperLUStat_t stat; double *berr=0; IS isrow; PetscLogDouble time0,time,time_min,time_max; Mat F_diag=PETSC_NULL; #if defined(PETSC_USE_COMPLEX) doublecomplex *av, *bv; #else double *av, *bv; #endif PetscFunctionBegin; ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr); if (lu->options.PrintStat) { /* collect time for mat conversion */ ierr = MPI_Barrier(((PetscObject)A)->comm);CHKERRQ(ierr); ierr = PetscGetTime(&time0);CHKERRQ(ierr); } if (lu->MatInputMode == GLOBAL) { /* global mat input */ if (size > 1) { /* convert mpi A to seq mat A */ ierr = ISCreateStride(PETSC_COMM_SELF,M,0,1,&isrow);CHKERRQ(ierr); ierr = MatGetSubMatrices(A,1,&isrow,&isrow,MAT_INITIAL_MATRIX,&tseq);CHKERRQ(ierr); ierr = ISDestroy(&isrow);CHKERRQ(ierr); A_seq = *tseq; ierr = PetscFree(tseq);CHKERRQ(ierr); aa = (Mat_SeqAIJ*)A_seq->data; } else { PetscBool flg; ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&flg);CHKERRQ(ierr); if (flg) { Mat_MPIAIJ *At = (Mat_MPIAIJ*)A->data; A = At->A; } aa = (Mat_SeqAIJ*)A->data; } /* Convert Petsc NR matrix to SuperLU_DIST NC. Note: memories of lu->val, col and row are allocated by CompRow_to_CompCol_dist()! */ if (lu->options.Fact != DOFACT) {/* successive numeric factorization, sparsity pattern is reused. */ Destroy_CompCol_Matrix_dist(&lu->A_sup); if (lu->FactPattern == SamePattern_SameRowPerm){ lu->options.Fact = SamePattern_SameRowPerm; /* matrix has similar numerical values */ } else { /* lu->FactPattern == SamePattern */ Destroy_LU(N, &lu->grid, &lu->LUstruct); lu->options.Fact = SamePattern; } } #if defined(PETSC_USE_COMPLEX) zCompRow_to_CompCol_dist(M,N,aa->nz,(doublecomplex*)aa->a,aa->j,aa->i,&lu->val,&lu->col, &lu->row); #else dCompRow_to_CompCol_dist(M,N,aa->nz,aa->a,aa->j,aa->i,&lu->val, &lu->col, &lu->row); #endif /* Create compressed column matrix A_sup. */ #if defined(PETSC_USE_COMPLEX) zCreate_CompCol_Matrix_dist(&lu->A_sup, M, N, aa->nz, lu->val, lu->col, lu->row, SLU_NC, SLU_Z, SLU_GE); #else dCreate_CompCol_Matrix_dist(&lu->A_sup, M, N, aa->nz, lu->val, lu->col, lu->row, SLU_NC, SLU_D, SLU_GE); #endif } else { /* distributed mat input */ Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; aa=(Mat_SeqAIJ*)(mat->A)->data; bb=(Mat_SeqAIJ*)(mat->B)->data; ai=aa->i; aj=aa->j; bi=bb->i; bj=bb->j; #if defined(PETSC_USE_COMPLEX) av=(doublecomplex*)aa->a; bv=(doublecomplex*)bb->a; #else av=aa->a; bv=bb->a; #endif rstart = A->rmap->rstart; nz = aa->nz + bb->nz; garray = mat->garray; if (lu->options.Fact == DOFACT) {/* first numeric factorization */ #if defined(PETSC_USE_COMPLEX) zallocateA_dist(m, nz, &lu->val, &lu->col, &lu->row); #else dallocateA_dist(m, nz, &lu->val, &lu->col, &lu->row); #endif } else { /* successive numeric factorization, sparsity pattern and perm_c are reused. */ /* Destroy_CompRowLoc_Matrix_dist(&lu->A_sup); */ /* this leads to crash! However, see SuperLU_DIST_2.5/EXAMPLE/pzdrive2.c */ if (lu->FactPattern == SamePattern_SameRowPerm){ lu->options.Fact = SamePattern_SameRowPerm; /* matrix has similar numerical values */ } else { Destroy_LU(N, &lu->grid, &lu->LUstruct); /* Deallocate storage associated with the L and U matrices. */ lu->options.Fact = SamePattern; } } nz = 0; for ( i=0; i<m; i++ ) { lu->row[i] = nz; countA = ai[i+1] - ai[i]; countB = bi[i+1] - bi[i]; ajj = aj + ai[i]; /* ptr to the beginning of this row */ bjj = bj + bi[i]; /* B part, smaller col index */ colA_start = rstart + ajj[0]; /* the smallest global col index of A */ jB = 0; for (j=0; j<countB; j++){ jcol = garray[bjj[j]]; if (jcol > colA_start) { jB = j; break; } lu->col[nz] = jcol; lu->val[nz++] = *bv++; if (j==countB-1) jB = countB; } /* A part */ for (j=0; j<countA; j++){ lu->col[nz] = rstart + ajj[j]; lu->val[nz++] = *av++; } /* B part, larger col index */ for (j=jB; j<countB; j++){ lu->col[nz] = garray[bjj[j]]; lu->val[nz++] = *bv++; } } lu->row[m] = nz; #if defined(PETSC_USE_COMPLEX) zCreate_CompRowLoc_Matrix_dist(&lu->A_sup, M, N, nz, m, rstart,lu->val, lu->col, lu->row, SLU_NR_loc, SLU_Z, SLU_GE); #else dCreate_CompRowLoc_Matrix_dist(&lu->A_sup, M, N, nz, m, rstart,lu->val, lu->col, lu->row, SLU_NR_loc, SLU_D, SLU_GE); #endif } if (lu->options.PrintStat) { ierr = PetscGetTime(&time);CHKERRQ(ierr); time0 = time - time0; } /* Factor the matrix. */ PStatInit(&stat); /* Initialize the statistics variables. */ if (lu->MatInputMode == GLOBAL) { /* global mat input */ #if defined(PETSC_USE_COMPLEX) pzgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, M, 0,&lu->grid, &lu->LUstruct, berr, &stat, &sinfo); #else pdgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, M, 0,&lu->grid, &lu->LUstruct, berr, &stat, &sinfo); #endif } else { /* distributed mat input */ #if defined(PETSC_USE_COMPLEX) pzgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, m, 0, &lu->grid,&lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &sinfo); if (sinfo) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"pzgssvx fails, info: %d\n",sinfo); #else pdgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, m, 0, &lu->grid,&lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &sinfo); if (sinfo) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"pdgssvx fails, info: %d\n",sinfo); #endif } if (lu->MatInputMode == GLOBAL && size > 1){ ierr = MatDestroy(&A_seq);CHKERRQ(ierr); } if (lu->options.PrintStat) { ierr = MPI_Reduce(&time0,&time_max,1,MPI_DOUBLE,MPI_MAX,0,((PetscObject)A)->comm); ierr = MPI_Reduce(&time0,&time_min,1,MPI_DOUBLE,MPI_MIN,0,((PetscObject)A)->comm); ierr = MPI_Reduce(&time0,&time,1,MPI_DOUBLE,MPI_SUM,0,((PetscObject)A)->comm); time = time/size; /* average time */ ierr = PetscPrintf(((PetscObject)A)->comm, " Mat conversion(PETSc->SuperLU_DIST) time (max/min/avg): \n %g / %g / %g\n",time_max,time_min,time);CHKERRQ(ierr); PStatPrint(&lu->options, &stat, &lu->grid); /* Print the statistics. */ } PStatFree(&stat); if (size > 1){ F_diag = ((Mat_MPIAIJ *)(F)->data)->A; F_diag->assembled = PETSC_TRUE; } (F)->assembled = PETSC_TRUE; (F)->preallocated = PETSC_TRUE; lu->options.Fact = FACTORED; /* The factored form of A is supplied. Local option used by this func. only */ PetscFunctionReturn(0); }
//int BenchmarkBoundaryChecks( int n, PetscLogDouble t_bulk, PetscLogDouble ) int PetscMain() { int i,j,k, n=128; PetscLogDouble t1,t2,s1,s2; Vec U,V,W,DIV1,DIV2; PetscReal ***u,***v,***w,***div1, ***div2; DA da; DALocalInfo info; PetscErrorCode ierr; ierr = DACreate3d(PETSC_COMM_SELF,//MPI Communicator DA_NONPERIODIC, //DA_NONPERIODIC, DA_XPERIODIC, DA_YPERIODIC, DA_XYPERIODIC DA_STENCIL_STAR, //DA_STENCIL_BOX or DA_STENCIL_STAR n,n,n, //Global array dimension 1,1,1,//Number procs per dim 1, //Number of chemical species 1, //stencil width 0,0,0, //specific array of nodes &da); CHKERRQ(ierr); DACreateGlobalVector(da,&U); DACreateGlobalVector(da,&V); DACreateGlobalVector(da,&W); DACreateGlobalVector(da,&DIV1); DACreateGlobalVector(da,&DIV2); VecSet(DIV1,0); VecSet(DIV2,0); DAVecGetArray(da,U,&u); DAVecGetArray(da,V,&v); DAVecGetArray(da,W,&w); DAVecGetArray(da,DIV1,&div1); DAVecGetArray(da,DIV2,&div2); DAGetLocalInfo(da,&info); PetscBarrier(0); for( k = 0; k < n; ++k) { for( j = 0; j < n; ++j) { for( i = 0; i < n; ++i) { u[k][j][i] = i * (i-n) * j * (j-n) * k * (k-n); v[k][j][i] = 1 - u[k][j][i]; w[k][j][i] = u[k][j][i] * v[k][j][i]; } } } PetscBarrier(0); PetscGetTime(&t1); for( k = 1; k < n-1; ++k) { for( j = 1; j < n-1; ++j) { for( i = 1; i < n-1; ++i) { div1[k][j][i] = u[k][j][i+1] - u[k][j][i-1] + v[k][j+1][i] - v[k][j-1][i] + w[k+1][j][i] - w[k-1][j][i]; div1[k][j][i] /= 2; } } } PetscGetTime(&t2); PetscReal uE,uW,vN,vS,wF,wB; PetscReal hx,hy,hz; PetscBarrier(0); PetscGetTime(&s1); for( k = 1; k < n-1; ++k) { for( j = 1; j < n-1; ++j) { for( i = 1; i < n-1; ++i) { /* uE = i == info.mx-1 ? u[k][j][i] : u[k][j][i+1]; uW = i == 0 ? u[k][j][i] : u[k][j][i-1]; vN = j == info.my-1 ? v[k][j][i] : v[k][j+1][i]; vS = j == 0 ? v[k][j][i] : v[k][j-1][i]; wB = k == info.mz-1 ? w[k][j][i] : w[k+1][j][i]; wF = k == 0 ? w[k][j][i] : w[k-1][j][i]; */ if( i == info.mx-1 ) { uE = u[k][j][i]; hx= 1; }else{ uE = u[k][j][i+1];hx= 2;} if( i == info.mx-1 ) { uE = u[k][j][i]; hx= 1; }else{ uE = u[k][j][i+1];hx= 2;} if( j == info.mx-1 ) { uE = u[k][j][i]; hy= 1; }else{ uE = u[k][j][i+1];hy= 2;} if( j == info.mx-1 ) { uE = u[k][j][i]; hy= 1; }else{ uE = u[k][j][i+1];hy= 2;} if( k == info.mx-1 ) { uE = u[k][j][i]; hz= 1; }else{ uE = u[k][j][i+1];hz= 2;} if( k == info.mx-1 ) { uE = u[k][j][i]; hz= 1; }else{ uE = u[k][j][i+1];hz= 2;} div2[k][j][i] = uE - uW + vN - vS + wB - wF; div2[k][j][i] /= 2; // printf("%f\t%f\t%f\n",div1[k][j][i], div2[k][j][i], div2[k][j][i] - div1[k][j][i]); } } } PetscGetTime(&s2); DAVecRestoreArray(da,DIV1,&div1); DAVecRestoreArray(da,DIV2,&div2); VecAXPY(DIV1,-1,DIV2); PetscReal norm; VecNorm(DIV1,NORM_2,&norm); printf("BULK: %f\nIF's: %f\nDIFF:\t%f\nRATIO:\t%f\nnorm: %f\n", (t2-t1), (s2-s1), (s2-s1)-(t2-t1),(s2-s1)/(t2-t1),norm); }
int main(int argc,char **args) { PetscInt rank,size,npt; PetscErrorCode ierr; Vec x,y0,tempvec, *vinda,*vindb,*vindc; PetscInt i,j,k,l,n,p,m,m2,pmax,puse,Istart,Iend,localsize,niter; PetscScalar dx,dy,dx2,dy2; PetscScalar *Mixnorm; PetscInt iter,*iterind,*nind; FILE *fidoutput; char fname[50]; PetscViewer socketviewer; PetscInt withMatlab; PetscTruth Matlabflag; PetscLogDouble v1,v2,elapsed_time; PetscInitialize(&argc,&args,(char *)0,help); MPI_Comm_size(PETSC_COMM_WORLD,&size); MPI_Comm_rank(PETSC_COMM_WORLD,&rank); ierr = PetscPrintf(PETSC_COMM_WORLD,"\nPETSC: Petsc Initializes successfully! \n"); ierr = PetscPrintf(PETSC_COMM_WORLD,"PETSC: comm_size is %d \n", size); ierr = PetscOptionsGetInt(PETSC_NULL,"-withMatlab",&withMatlab,&Matlabflag);CHKERRQ(ierr); if (Matlabflag == PETSC_FALSE){withMatlab = 0;}else{withMatlab = 1;} if(withMatlab==1){ // Rank 0 connects to socket, use default socket PetscViewerSocketOpen(PETSC_COMM_WORLD,0,PETSC_DEFAULT,&socketviewer); ierr = PetscPrintf(PETSC_COMM_WORLD,"PETSC: socket opened! \n");CHKERRQ(ierr); // Receive n from Matlab IntReceive(socketviewer, &nind); n = *nind; // Receive iter from Matlab IntReceive(socketviewer, &iterind); iter = *iterind; }else{ ierr = PetscOptionsGetInt(PETSC_NULL,"-ngrid",&n,PETSC_NULL);CHKERRQ(ierr); ierr = PetscOptionsGetInt(PETSC_NULL,"-niter",&iter,PETSC_NULL);CHKERRQ(ierr); } ///////////////////////////////////////////////////////////////////////////////////// ierr = PetscPrintf(PETSC_COMM_WORLD,"PETSC: number of grid is %d \n", n); ierr = PetscPrintf(PETSC_COMM_WORLD,"PETSC: number of iteration is %d \n", iter); Mixnorm = malloc(iter*sizeof(PetscScalar)); dx = 1.0/n; dy = 1.0/n; dx2 = dx/2-dx/1e6; dy2 = dy/2-dy/1e6; npt = 5; pmax = 4e6; puse = pmax; ierr = PetscPrintf(PETSC_COMM_WORLD,"PETSC: estimated buffer size (per processer) %f Mbytes \n", pmax*1.0/1e6*8*16 ); ierr = PetscPrintf(PETSC_COMM_WORLD,"PETSC: estimated variable size %f Mbytes\n", 1.0*n*n/1e6*8*2); ///////////////////////////////////////////////////////////////////////////////////// ierr = VecCreateMPI(PETSC_COMM_WORLD,PETSC_DECIDE ,n,&tempvec);CHKERRQ(ierr); ierr = VecGetOwnershipRange(tempvec,&Istart,&Iend);CHKERRQ(ierr); localsize = Iend-Istart; ierr = VecDestroy(tempvec);CHKERRQ(ierr); ///////////////////////////////////////////////////////////////////////////////////// // Create initial vector Vec x0; PetscScalar *x0array; x0array = malloc((localsize)*n*sizeof(PetscScalar)); k = 0; for(i=Istart;i<Iend;i++){ for(j=0;j<n;j++){ *(x0array+k) = cos(2*M_PI*(dx/2+i*dx)); //*(x0array+k) = cos(2*M_PI*(dy/2+j*dy)); k++; } } ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,n*localsize,PETSC_DECIDE,x0array,&x0);CHKERRQ(ierr); ierr = VecDuplicate(x0,&x);CHKERRQ(ierr); ierr = VecCreateSeq(PETSC_COMM_SELF,pmax*npt,&y0);CHKERRQ(ierr); ierr = VecNorm(x0,NORM_2,Mixnorm); CHKERRQ(ierr); PetscPrintf(PETSC_COMM_WORLD,"PETSC: initial norm= %f \n",*(Mixnorm+0)/n ); /////////////////////////////////////////////////////////////////////////// // Map Center Points PetscInt *NzindJ,*idx,*idy,*idp; PetscScalar *CenterX,*CenterY,*VecVal,*pty; PetscScalar *ShiftX,*ShiftY,CX,CY, *yarray; IS isx,isy; VecScatter ctx; CenterX = malloc(npt*sizeof(PetscScalar)); CenterY = malloc(npt*sizeof(PetscScalar)); ShiftX = malloc(npt*sizeof(PetscScalar)); ShiftY = malloc(npt*sizeof(PetscScalar)); VecVal = malloc(npt*sizeof(PetscScalar)); yarray = malloc(pmax*sizeof(PetscScalar)); NzindJ = malloc(pmax*npt*sizeof(PetscInt)); idx = malloc(pmax*npt*sizeof(PetscInt)); idy = malloc(pmax*npt*sizeof(PetscInt)); idp = malloc(pmax*sizeof(PetscInt)); *(ShiftX+0) = 0; *(ShiftY+0) = 0; *(ShiftX+1) = -dx2; *(ShiftY+1) = -dy2; *(ShiftX+2) = dx2; *(ShiftY+2) = -dy2; *(ShiftX+3) = -dx2; *(ShiftY+3) = dy2; *(ShiftX+4) = dy2; *(ShiftY+4) = dx2; //*(ShiftX+5) = 0; //*(ShiftY+5) = -dy2; //*(ShiftX+6) = -dx2; //*(ShiftY+6) = 0; //*(ShiftX+7) = dx2; //*(ShiftY+7) = 0; //*(ShiftX+8) = 0; //*(ShiftY+9) = dy2; for(i=0;i<npt*pmax;i++){ *(idy+i)=i; } ISCreateGeneralWithArray(PETSC_COMM_SELF,npt*pmax,idy,&isy); vinda = &x0; vindb = &x; sprintf(fname, "mixnorm_%d_%d",n,iter); ierr =PetscPrintf(PETSC_COMM_WORLD,"\n iter norm time unit time\n");CHKERRQ(ierr); ierr =PetscFOpen(PETSC_COMM_WORLD,fname,"w",&fidoutput);CHKERRQ(ierr); for(niter=0;niter<iter;niter++){ ierr = PetscGetTime(&v1);CHKERRQ(ierr); l = 0; p = 0; if (n*localsize-l<=pmax){puse = n*localsize-l;}else{puse=pmax;} for(i=Istart;i<Iend;i++){ for(j=0;j<n;j++){ CX = dx2+i*dx; CY = dy2+j*dy; for(k=0;k<npt;k++){ *(CenterX+k) = CX + *(ShiftX+k); *(CenterY+k) = CY + *(ShiftY+k); InverseStandardMap((CenterX+k),(CenterY+k)); *(NzindJ+p*npt +k) = floor(*(CenterX+k)*n)*n + floor(*(CenterY+k)*n); } *(idp+p) = Istart*n+ l; if(p>=puse-1){ ierr = ISCreateGeneralWithArray(PETSC_COMM_WORLD,npt*puse,NzindJ,&isx);CHKERRQ(ierr); for(m=0;m<npt*puse;m++){ *(idy+m)=m; } ierr = ISCreateGeneralWithArray(PETSC_COMM_SELF,npt*puse,idy,&isy);CHKERRQ(ierr); ierr = VecScatterCreate(*vinda,isx,y0,isy,&ctx);CHKERRQ(ierr); ierr = VecScatterBegin(*vinda,y0,INSERT_VALUES,SCATTER_FORWARD,ctx);CHKERRQ(ierr); ierr = VecScatterEnd(*vinda,y0,INSERT_VALUES,SCATTER_FORWARD,ctx);CHKERRQ(ierr); ierr = VecScatterDestroy(ctx); ierr = VecGetArray(y0,&pty);CHKERRQ(ierr); for(m=0;m<puse;m++){ for(m2=0;m2<npt;m2++){ *(yarray+m) = *(yarray+m)+*(pty+m*npt+m2); } *(yarray+m) = *(yarray+m)/npt; } VecRestoreArray(y0,&pty); VecSetValues(*vindb,puse,idp,yarray,INSERT_VALUES); for(m=0;m<pmax;m++){*(yarray+m) = 0; } p = 0; if (n*localsize-l<=pmax){puse = n*localsize-l-1;}else{puse=pmax;} }else{p++;} l++; } } VecAssemblyBegin(*vindb); VecAssemblyEnd(*vindb); vindc = vindb; vindb = vinda; vinda = vindc; //ierr = VecCopy(x,x0);CHKERRQ(ierr); ierr = VecNorm(*vinda,NORM_2,Mixnorm+niter); CHKERRQ(ierr); *(Mixnorm+niter) = *(Mixnorm+niter)/n; ierr = PetscGetTime(&v2);CHKERRQ(ierr); elapsed_time = v2 - v1; PetscPrintf(PETSC_COMM_WORLD," %d %f %f %f \n",niter,*(Mixnorm+niter),elapsed_time,elapsed_time/n/n*1e6 ); PetscFPrintf(PETSC_COMM_WORLD,fidoutput," %d %f %f %f\n" ,niter,*(Mixnorm+niter),elapsed_time,elapsed_time/n/n*1e6 ); } PetscFClose(PETSC_COMM_WORLD,fidoutput); /////////////////////////////////////////////////////////////////////////// if(withMatlab==1){ VecView(x0,socketviewer); PetscScalarView(iter,Mixnorm,socketviewer); } free(CenterX); free(CenterY); free(ShiftX); free(ShiftY); free(x0array); free(idx); free(idy); free(idp); free(yarray); free(NzindJ); free(Mixnorm); ierr = VecDestroy(x0);CHKERRQ(ierr); ierr = VecDestroy(x);CHKERRQ(ierr); ierr = VecDestroy(y0);CHKERRQ(ierr); PetscPrintf(PETSC_COMM_WORLD,"Done!"); ////////////////////////////////////////////////////////////////////////////////////// ierr = PetscFinalize();CHKERRQ(ierr); return 0; }
int main(int argc,char **argv) { PetscErrorCode ierr; PetscInt i,j,k,N=100,**counters,tsize; PetscInitialize(&argc,&argv,(char *)0,help); ierr = PetscThreadCommView(PETSC_COMM_WORLD,PETSC_VIEWER_STDOUT_WORLD); CHKERRQ(ierr); ierr = PetscOptionsGetInt(PETSC_NULL,"-N",&N,PETSC_NULL); CHKERRQ(ierr); ierr = PetscThreadCommGetNThreads(PETSC_COMM_WORLD,&tsize); CHKERRQ(ierr); ierr = PetscMalloc(tsize*sizeof(*counters),&counters); CHKERRQ(ierr); ierr = PetscThreadCommRunKernel(PETSC_COMM_WORLD,(PetscThreadKernel)CounterInit_kernel,1,counters); CHKERRQ(ierr); for (i=0; i<10; i++) { PetscReal t0,t1; ierr = PetscThreadCommBarrier(PETSC_COMM_WORLD); CHKERRQ(ierr); ierr = PetscGetTime(&t0); CHKERRQ(ierr); for (j=0; j<N; j++) { /* ierr = PetscThreadCommRunKernel(PETSC_COMM_WORLD,(PetscThreadKernel)CounterIncrement_kernel,1,counters);CHKERRQ(ierr); */ ierr = PetscThreadCommRunKernel1(PETSC_COMM_WORLD,(PetscThreadKernel)CounterIncrement_kernel,counters); CHKERRQ(ierr); } ierr = PetscThreadCommBarrier(PETSC_COMM_WORLD); CHKERRQ(ierr); ierr = PetscGetTime(&t1); CHKERRQ(ierr); ierr = PetscPrintf(PETSC_COMM_WORLD,"Time per kernel: %g us\n",1e6*(t1-t0)/N); CHKERRQ(ierr); } for (i=0; i<10; i++) { PetscReal t0,t1; ierr = PetscThreadCommBarrier(PETSC_COMM_WORLD); CHKERRQ(ierr); ierr = PetscGetTime(&t0); CHKERRQ(ierr); for (j=0; j<N; j++) { #pragma omp parallel num_threads(tsize) { PetscInt trank = omp_get_thread_num(); CounterIncrement_kernel(trank,counters); } } ierr = PetscThreadCommBarrier(PETSC_COMM_WORLD); CHKERRQ(ierr); ierr = PetscGetTime(&t1); CHKERRQ(ierr); ierr = PetscPrintf(PETSC_COMM_WORLD,"OpenMP inline time per kernel: %g us\n",1e6*(t1-t0)/N); CHKERRQ(ierr); } for (i=0; i<10; i++) { PetscReal t0,t1; ierr = PetscGetTime(&t0); CHKERRQ(ierr); for (j=0; j<N; j++) { CounterIncrement_kernel(0,counters); } ierr = PetscGetTime(&t1); CHKERRQ(ierr); ierr = PetscPrintf(PETSC_COMM_WORLD,"Serial inline time per single kernel: %g us\n",1e6*(t1-t0)/N); CHKERRQ(ierr); } for (i=0; i<10; i++) { PetscReal t0,t1; ierr = PetscGetTime(&t0); CHKERRQ(ierr); for (j=0; j<N; j++) { for (k=0; k<tsize; k++) CounterIncrement_kernel(k,counters); } ierr = PetscGetTime(&t1); CHKERRQ(ierr); ierr = PetscPrintf(PETSC_COMM_WORLD,"Serial inline time per kernel: %g us\n",1e6*(t1-t0)/N); CHKERRQ(ierr); } ierr = PetscThreadCommRunKernel(PETSC_COMM_WORLD,(PetscThreadKernel)CounterFree_kernel,1,counters); CHKERRQ(ierr); ierr = PetscThreadCommBarrier(PETSC_COMM_WORLD); CHKERRQ(ierr); ierr = PetscFree(counters); CHKERRQ(ierr); PetscFinalize(); return 0; }