int GaussSeidelPoisson1D(Vector u, double tol, int maxit) { int it=0, i, j; double max = tol+1; double rl = maxNorm(u); Vector b = createVector(u->len); Vector r = createVector(u->len); Vector v = createVector(u->len); copyVector(b, u); fillVector(u, 0.0); while (max > tol*rl && ++it < maxit) { copyVector(v, u); copyVector(u, b); for (i=0;i<r->len;++i) { if (i > 0) u->data[i] += v->data[i-1]; if (i < r->len-1) u->data[i] += v->data[i+1]; r->data[i] = u->data[i]-(2.0+alpha)*v->data[i]; u->data[i] /= (2.0+alpha); v->data[i] = u->data[i]; } max = maxNorm(r); } freeVector(b); freeVector(r); freeVector(v); return it; }
int GaussSeidelPoisson1Drb(Vector u, double tol, int maxit) { int it=0, i, j; double max = tol+1; double rl = maxNorm(u); Vector b = createVector(u->len); Vector r = createVector(u->len); Vector v = createVector(u->len); copyVector(b, u); fillVector(u, 0.0); while (max > tol && ++it < maxit) { copyVector(v, u); copyVector(u, b); for (j=0;j<2;++j) { #pragma omp parallel for schedule(static) for (i=j;i<r->len;i+=2) { if (i > 0) u->data[i] += v->data[i-1]; if (i < r->len-1) u->data[i] += v->data[i+1]; r->data[i] = u->data[i]-(2.0+alpha)*v->data[i]; u->data[i] /= (2.0+alpha); v->data[i] = u->data[i]; } } max = maxNorm(r); } freeVector(b); freeVector(r); freeVector(v); return it; }
int GaussJacobiPoisson1D(Vector u, double tol, int maxit) { int it=0, i; double rl; double max = tol+1; Vector b = createVector(u->len); Vector e = createVector(u->len); copyVector(b, u); fillVector(u, 0.0); rl = maxNorm(b); while (max > tol*rl && ++it < maxit) { copyVector(e, u); copyVector(u, b); #pragma omp parallel for schedule(static) for (i=0;i<e->len;++i) { if (i > 0) u->data[i] += e->data[i-1]; if (i < e->len-1) u->data[i] += e->data[i+1]; u->data[i] /= (2.0+alpha); } axpy(e, u, -1.0); max = maxNorm(e); } freeVector(b); freeVector(e); return it; }
int GaussJacobiPoisson1D(Vector u, double tol, int maxit) { int it=0, i; Vector b = cloneVector(u); Vector e = cloneVector(u); copyVector(b, u); fillVector(u, 0.0); double max = tol+1; while (max > tol && ++it < maxit) { copyVector(e, u); collectVector(e); copyVector(u, b); #pragma omp parallel for schedule(static) for (i=1;i<e->len-1;++i) { u->data[i] += e->data[i-1]; u->data[i] += e->data[i+1]; u->data[i] /= (2.0+alpha); } axpy(e, u, -1.0); e->data[0] = e->data[e->len-1] = 0.0; max = maxNorm(e); } freeVector(b); freeVector(e); return it; }
/* * Check for numerical errors in the Jacobian. Useful debugging aid when new * models are being incorporated. */ void TWOjacCheck(TWOdevice *pDevice, BOOLEAN tranAnalysis, TWOtranInfo *info) { int index, rIndex; double del, diff, tol, *dptr; if (TWOjacDebug) { if (!OneCarrier) { TWO_sysLoad(pDevice, tranAnalysis, info); } else if (OneCarrier == N_TYPE) { TWONsysLoad(pDevice, tranAnalysis, info); } else if (OneCarrier == P_TYPE) { TWOPsysLoad(pDevice, tranAnalysis, info); } /* * spPrint( pDevice->matrix, 0, 1, 1 ); */ pDevice->rhsNorm = maxNorm(pDevice->rhs, pDevice->numEqns); for (index = 1; index <= pDevice->numEqns; index++) { if (1e3 * ABS(pDevice->rhs[index]) > pDevice->rhsNorm) { fprintf(stderr, "eqn %d: res %11.4e, norm %11.4e\n", index, pDevice->rhs[index], pDevice->rhsNorm); } } for (index = 1; index <= pDevice->numEqns; index++) { pDevice->rhsImag[index] = pDevice->rhs[index]; } for (index = 1; index <= pDevice->numEqns; index++) { pDevice->copiedSolution[index] = pDevice->dcSolution[index]; del = 1e-4 * pDevice->abstol + 1e-6 * ABS(pDevice->dcSolution[index]); pDevice->dcSolution[index] += del; if (!OneCarrier) { TWO_rhsLoad(pDevice, tranAnalysis, info); } else if (OneCarrier == N_TYPE) { TWONrhsLoad(pDevice, tranAnalysis, info); } else if (OneCarrier == P_TYPE) { TWOPrhsLoad(pDevice, tranAnalysis, info); } pDevice->dcSolution[index] = pDevice->copiedSolution[index]; for (rIndex = 1; rIndex <= pDevice->numEqns; rIndex++) { diff = (pDevice->rhsImag[rIndex] - pDevice->rhs[rIndex]) / del; dptr = spFindElement(pDevice->matrix, rIndex, index); if (dptr != NULL) { tol = (1e-4 * pDevice->abstol) + (1e-2 * MAX(ABS(diff), ABS(*dptr))); if ((diff != 0.0) && (ABS(diff - *dptr) > tol)) { fprintf(stderr, "Mismatch[%d][%d]: FD = %11.4e, AJ = %11.4e\n\t FD-AJ = %11.4e vs. %11.4e\n", rIndex, index, diff, *dptr, ABS(diff - *dptr), tol); } } else { if (diff != 0.0) { fprintf(stderr, "Missing [%d][%d]: FD = %11.4e, AJ = 0.0\n", rIndex, index, diff); } } } } } }
int main(int argc, char** argv) { int rank, size; init_app(argc, argv, &rank, &size); if (argc < 2) { printf("usage: %s <N> [L]\n",argv[0]); close_app(); return 1; } /* the total number of grid points in each spatial direction is (N+1) */ /* the total number of degrees-of-freedom in each spatial direction is (N-1) */ int N = atoi(argv[1]); int M = N-1; double L=1.0; if (argc > 2) L = atof(argv[2]); double h = L/N; poisson_info_t ctx; ctx.A = createPoisson1D(M); Vector grid = createVector(M); for (int i=0;i<M;++i) grid->data[i] = (i+1)*h; Matrix u = createMatrix(M, M); evalMesh(u->as_vec, grid, grid, poisson_source); scaleVector(u->as_vec, h*h); double time = WallTime(); cg(evaluate, u, 1.e-6, &ctx); evalMesh2(u->as_vec, grid, grid, exact_solution, -1.0); double max = maxNorm(u->as_vec); if (rank == 0) { printf("elapsed: %f\n", WallTime()-time); printf("max: %f\n", max); } freeMatrix(u); freeVector(grid); freeMatrix(ctx.A); close_app(); return 0; }
int main(int argc, char** argv) { int i, j, N, flag; Matrix A=NULL, Q=NULL; Vector b, grid, e, lambda=NULL; double time, sum, h, tol=1e-4; if (argc < 3) { printf("need two parameters, N and flag [and tolerance]\n"); printf(" - N is the problem size (in each direction\n"); printf(" - flag = 1 -> Dense LU\n"); printf(" - flag = 2 -> Dense Cholesky\n"); printf(" - flag = 3 -> Full Gauss-Jacobi iterations\n"); printf(" - flag = 4 -> Full Gauss-Jacobi iterations using BLAS\n"); printf(" - flag = 5 -> Full Gauss-Seidel iterations\n"); printf(" - flag = 6 -> Full Gauss-Seidel iterations using BLAS\n"); printf(" - flag = 7 -> Full CG iterations\n"); printf(" - flag = 8 -> Matrix-less Gauss-Jacobi iterations\n"); printf(" - flag = 9 -> Matrix-less Gauss-Seidel iterations\n"); printf(" - flag = 10 -> Matrix-less Red-Black Gauss-Seidel iterations\n"); printf(" - flag = 11 -> Diagonalization\n"); printf(" - flag = 12 -> Diagonalization - FST\n"); printf(" - flag = 13 -> Matrix-less CG iterations\n"); return 1; } N=atoi(argv[1]); flag=atoi(argv[2]); if (argc > 3) tol = atof(argv[3]); if (N < 0) { printf("invalid problem size given\n"); return 2; } if (flag < 0 || flag > 13) { printf("invalid flag given\n"); return 3; } if (flag == 10 && (N-1)%2 != 0) { printf("need an even size for red-black iterations\n"); return 4; } if (flag == 12 && (N & (N-1)) != 0) { printf("need a power-of-two for fst-based diagonalization\n"); return 5; } h = 1.0/N; grid = equidistantMesh(0.0, 1.0, N); b = createVector(N-1); e = createVector(N-1); evalMeshInternal(b, grid, source); evalMeshInternal(e, grid, exact); scaleVector(b, pow(h, 2)); axpy(b, e, alpha); if (flag < 8) { A = createMatrix(N-1,N-1); diag(A, -1, -1.0); diag(A, 0, 2.0+alpha); diag(A, 1, -1.0); } if (flag >= 11 && flag < 13) lambda = generateEigenValuesP1D(N-1); if (flag == 11) Q = generateEigenMatrixP1D(N-1); time = WallTime(); if (flag == 1) { int* ipiv=NULL; lusolve(A, b, &ipiv); free(ipiv); } else if (flag == 2) llsolve(A,b,0); else if (flag == 3) printf("Gauss-Jacobi used %i iterations\n", GaussJacobi(A, b, tol, 10000000)); else if (flag == 4) printf("Gauss-Jacobi used %i iterations\n", GaussJacobiBlas(A, b, tol, 10000000)); else if (flag == 5) printf("Gauss-Seidel used %i iterations\n", GaussSeidel(A, b, tol, 10000000)); else if (flag == 6) printf("Gauss-Seidel used %i iterations\n", GaussSeidelBlas(A, b, tol, 10000000)); else if (flag == 7) printf("CG used %i iterations\n", cg(A, b, 1e-8)); else if (flag == 8) printf("Gauss-Jacobi used %i iterations\n", GaussJacobiPoisson1D(b, tol, 10000000)); else if (flag == 9) printf("Gauss-Jacobi used %i iterations\n", GaussSeidelPoisson1D(b, tol, 10000000)); else if (flag == 10) printf("Gauss-Jacobi used %i iterations\n", GaussSeidelPoisson1Drb(b, tol, 10000000)); else if (flag == 11) DiagonalizationPoisson1D(b,lambda,Q); else if (flag == 12) DiagonalizationPoisson1Dfst(b,lambda); else if (flag == 13) printf("CG used %i iterations\n", cgMatrixFree(Poisson1D, b, tol)); printf("elapsed: %f\n", WallTime()-time); evalMeshInternal(e, grid, exact); axpy(b,e,-1.0); printf("max error: %e\n", maxNorm(b)); if (A) freeMatrix(A); if (Q) freeMatrix(Q); freeVector(grid); freeVector(b); freeVector(e); if (lambda) freeVector(lambda); return 0; }
int main(int argc, char** argv) { int i, j, N, flag; Matrix A=NULL, Q=NULL; Vector b, grid, e, lambda=NULL; double time, sum, h, tol=1e-6; int rank, size; int mpi_top_coords; int mpi_top_sizes; init_app(argc, argv, &rank, &size); if (argc < 3) { printf("need two parameters, N and flag [and tolerance]\n"); printf(" - N is the problem size (in each direction\n"); printf(" - flag = 1 -> Matrix-free Gauss-Jacobi iterations\n"); printf(" - flag = 2 -> Matrix-free red-black Gauss-Seidel iterations\n"); printf(" - flag = 3 -> Matrix-free CG iterations\n"); printf(" - flag = 4 -> Matrix-free additive schwarz preconditioned+Cholesky CG iterations\n"); printf(" - flag = 5 -> Matrix-free additive schwarz preconditioned+CG CG iterations\n"); return 1; } N=atoi(argv[1]); flag=atoi(argv[2]); if (argc > 3) tol=atof(argv[3]); if (N < 0) { if (rank == 0) printf("invalid problem size given\n"); close_app(); return 2; } if (flag < 0 || flag > 5) { if (rank == 0) printf("invalid flag given\n"); close_app(); return 3; } if (flag == 2 && (N-1)%2 != 0 && ((N-1)/size) % 2 != 0) { if (rank == 0) printf("need an even size (per process) for red-black iterations\n"); close_app(); return 4; } // setup topology mpi_top_coords = 0; mpi_top_sizes = 0; MPI_Dims_create(size, 1, &mpi_top_sizes); int periodic = 0; MPI_Comm comm; MPI_Cart_create(MPI_COMM_WORLD, 1, &mpi_top_sizes, &periodic, 0, &comm); MPI_Cart_coords(comm, rank, 1, &mpi_top_coords); b = createVectorMPI(N+1, &comm, 1, 1); e = createVectorMPI(N+1, &comm, 1, 1); grid = equidistantMesh(0.0, 1.0, N); h = 1.0/N; evalMeshDispl(b, grid, source); scaleVector(b, pow(h, 2)); evalMeshDispl(e, grid, exact); axpy(b, e, alpha); b->data[0] = b->data[b->len-1] = 0.0; if (flag == 4) { int size = b->len; if (b->comm_rank == 0) size--; if (b->comm_rank == b->comm_size-1) size--; A1D = createMatrix(size, size); A1Dfactored = 0; diag(A1D, -1, -1.0); diag(A1D, 0, 2.0+alpha); diag(A1D, 1, -1.0); } int its=-1; char method[128]; time = WallTime(); if (flag == 1) { its=GaussJacobiPoisson1D(b, tol, 1000000); sprintf(method,"Gauss-Jacobi"); } if (flag == 2) { its=GaussSeidelPoisson1Drb(b, tol, 1000000); sprintf(method,"Gauss-Seidel"); } if (flag == 3) { its=cgMatrixFree(Poisson1D, b, tol); sprintf(method,"CG"); } if (flag == 4 || flag == 5) { its=pcgMatrixFree(Poisson1D, Poisson1DPre, b, tol); sprintf(method,"PCG"); } if (rank == 0) { printf("%s used %i iterations\n", method, its); printf("elapsed: %f\n", WallTime()-time); } evalMeshDispl(e, grid, exact); axpy(b,e,-1.0); b->data[0] = b->data[b->len-1] = 0.0; h = maxNorm(b); if (rank == 0) printf("max error: %e\n", h); if (A) freeMatrix(A); if (Q) freeMatrix(Q); freeVector(grid); freeVector(b); freeVector(e); if (lambda) freeVector(lambda); if (A1D) freeMatrix(A1D); MPI_Comm_free(&comm); close_app(); return 0; }
int main(int argc, char** argv) { int dim_n = DIM_N; int dim_m = DIM_M; int dim_k = DIM_K; // if (argc == 4) { dim_n = atoi(argv[1]); dim_m = atoi(argv[2]); dim_k = atoi(argv[3]); } int local_k; // #ifdef MPI MPI_Init(&argc, &argv); int num_tasks; int my_rank; MPI_Comm_size(MPI_COMM_WORLD, &num_tasks); MPI_Comm_rank(MPI_COMM_WORLD, &my_rank); local_k = dim_k/num_tasks; #endif // double* restrict storage1 = (double*)malloc(dim_n*dim_m*dim_k*sizeof(double)); double* restrict storage2 = (double*)malloc(dim_n*dim_m*dim_k*sizeof(double)); printf("3x3 stencil: M=%d N=%d K=%d global.\n", dim_n, dim_m, dim_k); printf("array size = %f MB\n", (double) dim_n*dim_m*dim_k*sizeof(double)/1024/1024); double alloctime = -myseconds(); init (storage1, dim_n, dim_m, dim_k); init (storage2, dim_n, dim_m, dim_k); alloctime += myseconds(); printf("Allocation time = %f s.\n\n", alloctime); int n_iters = 0; int nrep = NREP; int ii; double * st_read = storage2; double * st_write = storage1; double phi; double norminf, norml2; double time = - myseconds(); // do { // swaping the solutions double *tmp = st_read; st_read = st_write; st_write = tmp; // ++n_iters; // double ftime = -myseconds(); //PAPI_START; unsigned long long c0 = cycles(); // for (ii = 0; ii < nrep; ++ii) { laplacian(st_read, st_write, dim_m, dim_n, dim_k); } // unsigned long long c1 = cycles(); //unsigned long long cycles = (c1 - c0)/((unsigned long long) (dim_m - 1)*(dim_n - 1)); unsigned long long cycles = (c1 - c0)/nrep; // //PAPI_STOP; ftime += myseconds(); ftime /= nrep; //print(st_write + dim_n*dim_m, dim_m, dim_n); //PAPI_PRINT; // norminf = maxNorm(st_write, st_read, dim_n*dim_m*dim_k); norml2 = l2Norm(st_write, st_read, dim_n*dim_m*dim_k); double flops = (dim_m - 2)*(dim_n - 2)*(dim_k - 2)*6.; // printf("iter = %d, %f s. %ld c., linf = %g l2 = %g, %d flops, %ld c, %f F/c, %f GF/s\n", n_iters, ftime, cycles, norminf, norml2, (long long int) flops, flops/cycles, (double) flops/ftime/1e9); } while (norminf > EPSI); time += myseconds(); // printf("\n# iter= %d time= %g residual= %g\n", n_iters, time, norml2); // free(storage1); free(storage2); // #ifdef MPI MPI_Finalize(); #endif return 0; }
int main(int argc, char** argv) { int rank, size; init_app(argc, argv, &rank, &size); if (argc < 2) { printf("usage: %s <N> [L]\n",argv[0]); close_app(); return 1; } /* the total number of grid points in each spatial direction is (N+1) */ /* the total number of degrees-of-freedom in each spatial direction is (N-1) */ int N = atoi(argv[1]); int M = N-1; double L=1; if (argc > 2) L = atof(argv[2]); double h = L/N; Vector lambda = createEigenValues(M); Vector grid = createVector(M); for (int i=0;i<M;++i) grid->data[i] = (i+1)*h; Matrix u = createMatrix(M, M); Matrix ut = createMatrix(M, M); evalMesh(u->as_vec, grid, grid, poisson_source); scaleVector(u->as_vec, h*h); int NN = 4*N; Vector z = createVector(NN); double time = WallTime(); for (int j=0; j < M; j++) fst(u->data[j], &N, z->data, &NN); transposeMatrix(ut, u); for (int i=0; i < M; i++) fstinv(ut->data[i], &N, z->data, &NN); for (int j=0; j < M; j++) for (int i=0; i < M; i++) ut->data[j][i] /= lambda->data[i]+lambda->data[j]; for (int i=0; i < M; i++) fst(ut->data[i], &N, z->data, &NN); transposeMatrix(u, ut); for (int j=0; j < M; j++) fstinv(u->data[j], &N, z->data, &NN); evalMesh2(u->as_vec, grid, grid, exact_solution, -1.0); double max = maxNorm(u->as_vec); if (rank == 0) { printf("elapsed: %f\n", WallTime()-time); printf("max: %f\n", max); } freeMatrix(u); freeMatrix(ut); freeVector(grid); freeVector(z); freeVector(lambda); close_app(); return 0; }
int TWOnewDelta(TWOdevice *pDevice, BOOLEAN tranAnalysis, TWOtranInfo *info) { int index, iterNum = 0; double newNorm; double fib, lambda, fibn, fibp; BOOLEAN acceptable = FALSE, error = FALSE; lambda = 1.0; fibn = 1.0; fibp = 1.0; /* * copy the contents of dcSolution into copiedSolution and modify * dcSolution by adding the deltaSolution */ for (index = 1; index <= pDevice->numEqns; ++index) { pDevice->copiedSolution[index] = pDevice->dcSolution[index]; pDevice->dcSolution[index] += pDevice->dcDeltaSolution[index]; } if (pDevice->poissonOnly) { TWOQrhsLoad(pDevice); } else if (!OneCarrier) { TWO_rhsLoad(pDevice, tranAnalysis, info); } else if (OneCarrier == N_TYPE) { TWONrhsLoad(pDevice, tranAnalysis, info); } else if (OneCarrier == P_TYPE) { TWOPrhsLoad(pDevice, tranAnalysis, info); } newNorm = maxNorm(pDevice->rhs, pDevice->numEqns); if (pDevice->rhsNorm <= pDevice->abstol) { lambda = 0.0; newNorm = pDevice->rhsNorm; } else if (newNorm < pDevice->rhsNorm) { acceptable = TRUE; } else { /* chop the step size so that deltax is acceptable */ if (TWOdcDebug) { fprintf(stdout, " %11.4e %11.4e\n", newNorm, lambda); } while (!acceptable) { iterNum++; if (iterNum > NORM_RED_MAXITERS) { error = TRUE; lambda = 0.0; /* Don't break out until after we've reset the device. */ } fib = fibp; fibp = fibn; fibn += fib; lambda *= (fibp / fibn); for (index = 1; index <= pDevice->numEqns; index++) { pDevice->dcSolution[index] = pDevice->copiedSolution[index] + lambda * pDevice->dcDeltaSolution[index]; } if (pDevice->poissonOnly) { TWOQrhsLoad(pDevice); } else if (!OneCarrier) { TWO_rhsLoad(pDevice, tranAnalysis, info); } else if (OneCarrier == N_TYPE) { TWONrhsLoad(pDevice, tranAnalysis, info); } else if (OneCarrier == P_TYPE) { TWOPrhsLoad(pDevice, tranAnalysis, info); } newNorm = maxNorm(pDevice->rhs, pDevice->numEqns); if (error) { break; } if (newNorm <= pDevice->rhsNorm) { acceptable = TRUE; } if (TWOdcDebug) { fprintf(stdout, " %11.4e %11.4e\n", newNorm, lambda); } } } /* restore the previous dcSolution and the new deltaSolution */ pDevice->rhsNorm = newNorm; for (index = 1; index <= pDevice->numEqns; index++) { pDevice->dcSolution[index] = pDevice->copiedSolution[index]; pDevice->dcDeltaSolution[index] *= lambda; } return (error); }
/* the iteration driving loop and convergence check */ void TWOdcSolve(TWOdevice *pDevice, int iterationLimit, BOOLEAN newSolver, BOOLEAN tranAnalysis, TWOtranInfo *info) { TWOnode *pNode; TWOelem *pElem; int size = pDevice->numEqns; int index, eIndex, error; int timesConverged = 0; BOOLEAN quitLoop; BOOLEAN debug; BOOLEAN negConc = FALSE; double *rhs = pDevice->rhs; double *solution = pDevice->dcSolution; double *delta = pDevice->dcDeltaSolution; double poissNorm, contNorm; double startTime, totalStartTime; double totalTime, loadTime, factorTime, solveTime, updateTime, checkTime; double orderTime = 0.0; totalTime = loadTime = factorTime = solveTime = updateTime = checkTime = 0.0; totalStartTime = SPfrontEnd->IFseconds(); quitLoop = FALSE; debug = (!tranAnalysis && TWOdcDebug) || (tranAnalysis && TWOtranDebug); pDevice->iterationNumber = 0; pDevice->converged = FALSE; if (debug) { if (pDevice->poissonOnly) { fprintf(stdout, "Equilibrium Solution:\n"); } else { fprintf(stdout, "Bias Solution:\n"); } fprintf(stdout, "Iteration RHS Norm\n"); } while (!(pDevice->converged || (pDevice->iterationNumber > iterationLimit) || quitLoop)) { pDevice->iterationNumber++; if ((!pDevice->poissonOnly) && (iterationLimit > 0) &&(!tranAnalysis)) { TWOjacCheck(pDevice, tranAnalysis, info); } /* LOAD */ startTime = SPfrontEnd->IFseconds(); if (pDevice->poissonOnly) { TWOQsysLoad(pDevice); } else if (!OneCarrier) { TWO_sysLoad(pDevice, tranAnalysis, info); } else if (OneCarrier == N_TYPE) { TWONsysLoad(pDevice, tranAnalysis, info); } else if (OneCarrier == P_TYPE) { TWOPsysLoad(pDevice, tranAnalysis, info); } pDevice->rhsNorm = maxNorm(rhs, size); loadTime += SPfrontEnd->IFseconds() - startTime; if (debug) { fprintf(stdout, "%7d %11.4e%s\n", pDevice->iterationNumber - 1, pDevice->rhsNorm, negConc ? " negative conc encountered" : ""); negConc = FALSE; } /* FACTOR */ startTime = SPfrontEnd->IFseconds(); error = spFactor(pDevice->matrix); factorTime += SPfrontEnd->IFseconds() - startTime; if (newSolver) { if (pDevice->iterationNumber == 1) { orderTime = factorTime; } else if (pDevice->iterationNumber == 2) { orderTime -= factorTime - orderTime; factorTime -= orderTime; if (pDevice->poissonOnly) { pDevice->pStats->orderTime[STAT_SETUP] += orderTime; } else { pDevice->pStats->orderTime[STAT_DC] += orderTime; } newSolver = FALSE; } } if (foundError(error)) { if (error == spSINGULAR) { int badRow, badCol; spWhereSingular(pDevice->matrix, &badRow, &badCol); printf("***** singular at (%d,%d)\n", badRow, badCol); } pDevice->converged = FALSE; quitLoop = TRUE; continue; } /* SOLVE */ startTime = SPfrontEnd->IFseconds(); spSolve(pDevice->matrix, rhs, delta, NULL, NULL); solveTime += SPfrontEnd->IFseconds() - startTime; /* UPDATE */ startTime = SPfrontEnd->IFseconds(); /* Use norm reducing Newton method for DC bias solutions only. */ if ((!pDevice->poissonOnly) && (iterationLimit > 0) && (!tranAnalysis) && (pDevice->rhsNorm > 1e-1)) { error = TWOnewDelta(pDevice, tranAnalysis, info); if (error) { pDevice->converged = FALSE; quitLoop = TRUE; updateTime += SPfrontEnd->IFseconds() - startTime; continue; } } for (index = 1; index <= size; index++) { solution[index] += delta[index]; } updateTime += SPfrontEnd->IFseconds() - startTime; /* CHECK CONVERGENCE */ startTime = SPfrontEnd->IFseconds(); /* Check if updates have gotten sufficiently small. */ if (pDevice->iterationNumber != 1) { pDevice->converged = TWOdeltaConverged(pDevice); } /* Check if the residual is smaller than abstol. */ if (pDevice->converged && (!pDevice->poissonOnly) && (!tranAnalysis)) { if (!OneCarrier) { TWO_rhsLoad(pDevice, tranAnalysis, info); } else if (OneCarrier == N_TYPE) { TWONrhsLoad(pDevice, tranAnalysis, info); } else if (OneCarrier == P_TYPE) { TWOPrhsLoad(pDevice, tranAnalysis, info); } pDevice->rhsNorm = maxNorm(rhs, size); if (pDevice->rhsNorm > pDevice->abstol) { pDevice->converged = FALSE; } if ((++timesConverged >= 2) && (pDevice->rhsNorm < 1e3 * pDevice->abstol)) { pDevice->converged = TRUE; } else if (timesConverged >= 5) { pDevice->converged = FALSE; quitLoop = TRUE; continue; } } else if (pDevice->converged && pDevice->poissonOnly) { TWOQrhsLoad(pDevice); pDevice->rhsNorm = maxNorm(rhs, size); if (pDevice->rhsNorm > pDevice->abstol) { pDevice->converged = FALSE; } if (++timesConverged >= 5) { pDevice->converged = TRUE; } } /* Check if the carrier concentrations are negative. */ if (pDevice->converged && (!pDevice->poissonOnly)) { /* Clear garbage entry since carrier-free elements reference it. */ solution[0] = 0.0; for (eIndex = 1; eIndex <= pDevice->numElems; eIndex++) { pElem = pDevice->elements[eIndex]; for (index = 0; index <= 3; index++) { if (pElem->evalNodes[index]) { pNode = pElem->pNodes[index]; if (solution[pNode->nEqn] < 0.0) { pDevice->converged = FALSE; negConc = TRUE; if (tranAnalysis) { quitLoop = TRUE; } else { solution[pNode->nEqn] = 0.0; } } if (solution[pNode->pEqn] < 0.0) { pDevice->converged = FALSE; negConc = TRUE; if (tranAnalysis) { quitLoop = TRUE; } else { solution[pNode->pEqn] = 0.0; } } } } } if (!pDevice->converged) { if (!OneCarrier) { TWO_rhsLoad(pDevice, tranAnalysis, info); } else if (OneCarrier == N_TYPE) { TWONrhsLoad(pDevice, tranAnalysis, info); } else if (OneCarrier == P_TYPE) { TWOPrhsLoad(pDevice, tranAnalysis, info); } pDevice->rhsNorm = maxNorm(rhs, size); } } checkTime += SPfrontEnd->IFseconds() - startTime; } totalTime += SPfrontEnd->IFseconds() - totalStartTime; if (tranAnalysis) { pDevice->pStats->loadTime[STAT_TRAN] += loadTime; pDevice->pStats->factorTime[STAT_TRAN] += factorTime; pDevice->pStats->solveTime[STAT_TRAN] += solveTime; pDevice->pStats->updateTime[STAT_TRAN] += updateTime; pDevice->pStats->checkTime[STAT_TRAN] += checkTime; pDevice->pStats->numIters[STAT_TRAN] += pDevice->iterationNumber; } else if (pDevice->poissonOnly) { pDevice->pStats->loadTime[STAT_SETUP] += loadTime; pDevice->pStats->factorTime[STAT_SETUP] += factorTime; pDevice->pStats->solveTime[STAT_SETUP] += solveTime; pDevice->pStats->updateTime[STAT_SETUP] += updateTime; pDevice->pStats->checkTime[STAT_SETUP] += checkTime; pDevice->pStats->numIters[STAT_SETUP] += pDevice->iterationNumber; } else { pDevice->pStats->loadTime[STAT_DC] += loadTime; pDevice->pStats->factorTime[STAT_DC] += factorTime; pDevice->pStats->solveTime[STAT_DC] += solveTime; pDevice->pStats->updateTime[STAT_DC] += updateTime; pDevice->pStats->checkTime[STAT_DC] += checkTime; pDevice->pStats->numIters[STAT_DC] += pDevice->iterationNumber; } if (debug) { if (!tranAnalysis) { pDevice->rhsNorm = maxNorm(rhs, size); fprintf(stdout, "%7d %11.4e%s\n", pDevice->iterationNumber, pDevice->rhsNorm, negConc ? " negative conc in solution" : ""); } if (pDevice->converged) { if (!pDevice->poissonOnly) { poissNorm = contNorm = 0.0; rhs[0] = 0.0; /* Make sure garbage entry is clear. */ for (eIndex = 1; eIndex <= pDevice->numElems; eIndex++) { pElem = pDevice->elements[eIndex]; for (index = 0; index <= 3; index++) { if (pElem->evalNodes[index]) { pNode = pElem->pNodes[index]; poissNorm = MAX(poissNorm,ABS(rhs[pNode->psiEqn])); contNorm = MAX(contNorm,ABS(rhs[pNode->nEqn])); contNorm = MAX(contNorm,ABS(rhs[pNode->pEqn])); } } } fprintf(stdout, "Residual: %11.4e C/um poisson, %11.4e A/um continuity\n", poissNorm * EpsNorm * VNorm * 1e-4, contNorm * JNorm * LNorm * 1e-4); } else { fprintf(stdout, "Residual: %11.4e C/um poisson\n", pDevice->rhsNorm * EpsNorm * VNorm * 1e-4); } } } }
int main(int argc, char** argv) { int rank, size; init_app(argc, argv, &rank, &size); if (argc < 2) { printf("usage: %s <N> [L]\n",argv[0]); close_app(); return 1; } /* the total number of grid points in each spatial direction is (N+1) */ /* the total number of degrees-of-freedom in each spatial direction is (N-1) */ int N = atoi(argv[1]); int M = N-1; double L=1.0; if (argc > 2) L = atof(argv[2]); double h = L/N; Vector grid = createVector(M); for (int i=0;i<M;++i) grid->data[i] = (i+1)*h; int coords[2] = {0}; int sizes[2] = {1}; #ifdef HAVE_MPI sizes[0] = sizes[1] = 0; MPI_Dims_create(size,2,sizes); int periodic[2]; periodic[0] = periodic[1] = 0; MPI_Comm comm; MPI_Cart_create(MPI_COMM_WORLD,2,sizes,periodic,0,&comm); MPI_Cart_coords(comm,rank,2,coords); #endif int* len[2]; int* displ[2]; splitVector(M, sizes[0], &len[0], &displ[0]); splitVector(M, sizes[1], &len[1], &displ[1]); #ifdef HAVE_MPI Matrix u = createMatrixMPI(len[0][coords[0]]+2, len[1][coords[1]]+2, M, M, &comm); #else Matrix u = createMatrix(M+2, M+2); #endif evalMeshDispl(u, grid, grid, poisson_source, displ[0][coords[0]], displ[1][coords[1]]); scaleVector(u->as_vec, h*h); double time = WallTime(); GS(u, 1e-6, 5000); evalMesh2Displ(u, grid, grid, exact_solution, -1.0, displ[0][coords[0]], displ[1][coords[1]]); double max = maxNorm(u->as_vec); if (rank == 0) { printf("elapsed: %f\n", WallTime()-time); printf("max: %f\n", max); } freeMatrix(u); freeVector(grid); for (int i=0;i<2;++i) { free(len[i]); free(displ[i]); } close_app(); return 0; }