예제 #1
0
파일: poisson1D.c 프로젝트: akva2/tma4280
int GaussSeidelPoisson1D(Vector u, double tol, int maxit)
{
  int it=0, i, j;
  double max = tol+1;
  double rl = maxNorm(u);
  Vector b = createVector(u->len);
  Vector r = createVector(u->len);
  Vector v = createVector(u->len);
  copyVector(b, u);
  fillVector(u, 0.0);
  while (max > tol*rl && ++it < maxit) {
    copyVector(v, u);
    copyVector(u, b);
    for (i=0;i<r->len;++i) {
      if (i > 0)
        u->data[i] += v->data[i-1];
      if (i < r->len-1)
        u->data[i] += v->data[i+1];
      r->data[i] = u->data[i]-(2.0+alpha)*v->data[i];
      u->data[i] /= (2.0+alpha);
      v->data[i] = u->data[i];
    }
    max = maxNorm(r);
  }
  freeVector(b);
  freeVector(r);
  freeVector(v);

  return it;
}
예제 #2
0
파일: poisson1D.c 프로젝트: akva2/tma4280
int GaussSeidelPoisson1Drb(Vector u, double tol, int maxit)
{
  int it=0, i, j;
  double max = tol+1;
  double rl = maxNorm(u);
  Vector b = createVector(u->len);
  Vector r = createVector(u->len);
  Vector v = createVector(u->len);
  copyVector(b, u);
  fillVector(u, 0.0);
  while (max > tol && ++it < maxit) {
    copyVector(v, u);
    copyVector(u, b);
    for (j=0;j<2;++j) {
#pragma omp parallel for schedule(static)
      for (i=j;i<r->len;i+=2) {
        if (i > 0)
          u->data[i] += v->data[i-1];
        if (i < r->len-1)
          u->data[i] += v->data[i+1];
        r->data[i] = u->data[i]-(2.0+alpha)*v->data[i];
        u->data[i] /= (2.0+alpha);
        v->data[i] = u->data[i];
      }
    }
    max = maxNorm(r);
  }
  freeVector(b);
  freeVector(r);
  freeVector(v);

  return it;
}
예제 #3
0
파일: poisson1D.c 프로젝트: akva2/tma4280
int GaussJacobiPoisson1D(Vector u, double tol, int maxit)
{
  int it=0, i;
  double rl;
  double max = tol+1;
  Vector b = createVector(u->len);
  Vector e = createVector(u->len);
  copyVector(b, u);
  fillVector(u, 0.0);
  rl = maxNorm(b);
  while (max > tol*rl && ++it < maxit) {
    copyVector(e, u);
    copyVector(u, b);
#pragma omp parallel for schedule(static)
    for (i=0;i<e->len;++i) {
      if (i > 0)
        u->data[i] += e->data[i-1];
      if (i < e->len-1)
        u->data[i] += e->data[i+1];
      u->data[i] /= (2.0+alpha);
    }
    axpy(e, u, -1.0);
    max = maxNorm(e);
  }
  freeVector(b);
  freeVector(e);

  return it;
}
예제 #4
0
int GaussJacobiPoisson1D(Vector u, double tol, int maxit)
{
  int it=0, i;
  Vector b = cloneVector(u);
  Vector e = cloneVector(u);
  copyVector(b, u);
  fillVector(u, 0.0);
  double max = tol+1;
  while (max > tol && ++it < maxit) {
    copyVector(e, u);
    collectVector(e);
    copyVector(u, b);
#pragma omp parallel for schedule(static)
    for (i=1;i<e->len-1;++i) {
      u->data[i] += e->data[i-1];
      u->data[i] += e->data[i+1];
      u->data[i] /= (2.0+alpha);
    }
    axpy(e, u, -1.0);
    e->data[0] = e->data[e->len-1] = 0.0;
    max = maxNorm(e);
  }
  freeVector(b);
  freeVector(e);

  return it;
}
예제 #5
0
파일: twosolve.c 프로젝트: Anastien/ngspice
/*
 * Check for numerical errors in the Jacobian.  Useful debugging aid when new
 * models are being incorporated.
 */
void
TWOjacCheck(TWOdevice *pDevice, BOOLEAN tranAnalysis, TWOtranInfo *info)
{
  int index, rIndex;
  double del, diff, tol, *dptr;

  if (TWOjacDebug) {
    if (!OneCarrier) {
      TWO_sysLoad(pDevice, tranAnalysis, info);
    } else if (OneCarrier == N_TYPE) {
      TWONsysLoad(pDevice, tranAnalysis, info);
    } else if (OneCarrier == P_TYPE) {
      TWOPsysLoad(pDevice, tranAnalysis, info);
    }
    /*
     * spPrint( pDevice->matrix, 0, 1, 1 );
     */
    pDevice->rhsNorm = maxNorm(pDevice->rhs, pDevice->numEqns);
    for (index = 1; index <= pDevice->numEqns; index++) {
      if (1e3 * ABS(pDevice->rhs[index]) > pDevice->rhsNorm) {
	fprintf(stderr, "eqn %d: res %11.4e, norm %11.4e\n",
	    index, pDevice->rhs[index], pDevice->rhsNorm);
      }
    }
    for (index = 1; index <= pDevice->numEqns; index++) {
      pDevice->rhsImag[index] = pDevice->rhs[index];
    }
    for (index = 1; index <= pDevice->numEqns; index++) {
      pDevice->copiedSolution[index] = pDevice->dcSolution[index];
      del = 1e-4 * pDevice->abstol + 1e-6 * ABS(pDevice->dcSolution[index]);
      pDevice->dcSolution[index] += del;
      if (!OneCarrier) {
	TWO_rhsLoad(pDevice, tranAnalysis, info);
      } else if (OneCarrier == N_TYPE) {
	TWONrhsLoad(pDevice, tranAnalysis, info);
      } else if (OneCarrier == P_TYPE) {
	TWOPrhsLoad(pDevice, tranAnalysis, info);
      }
      pDevice->dcSolution[index] = pDevice->copiedSolution[index];
      for (rIndex = 1; rIndex <= pDevice->numEqns; rIndex++) {
	diff = (pDevice->rhsImag[rIndex] - pDevice->rhs[rIndex]) / del;
	dptr = spFindElement(pDevice->matrix, rIndex, index);
	if (dptr != NULL) {
	  tol = (1e-4 * pDevice->abstol) + (1e-2 * MAX(ABS(diff), ABS(*dptr)));
	  if ((diff != 0.0) && (ABS(diff - *dptr) > tol)) {
	    fprintf(stderr, "Mismatch[%d][%d]: FD = %11.4e, AJ = %11.4e\n\t FD-AJ = %11.4e vs. %11.4e\n",
		rIndex, index, diff, *dptr,
		ABS(diff - *dptr), tol);
	  }
	} else {
	  if (diff != 0.0) {
	    fprintf(stderr, "Missing [%d][%d]: FD = %11.4e, AJ = 0.0\n",
		rIndex, index, diff);
	  }
	}
      }
    }
  }
}
예제 #6
0
int main(int argc, char** argv)
{
  int rank, size;
  init_app(argc, argv, &rank, &size);

  if (argc < 2) {
    printf("usage: %s <N> [L]\n",argv[0]);
    close_app();
    return 1;
  }

  /* the total number of grid points in each spatial direction is (N+1) */
  /* the total number of degrees-of-freedom in each spatial direction is (N-1) */
  int N  = atoi(argv[1]);
  int M  = N-1;
  double L=1.0;
  if (argc > 2)
    L = atof(argv[2]);

  double h = L/N;
  poisson_info_t ctx;
  ctx.A = createPoisson1D(M);

  Vector grid = createVector(M);
  for (int i=0;i<M;++i)
    grid->data[i] = (i+1)*h;

  Matrix u = createMatrix(M, M);
  evalMesh(u->as_vec, grid, grid, poisson_source);
  scaleVector(u->as_vec, h*h);

  double time = WallTime();
  cg(evaluate, u, 1.e-6, &ctx);

  evalMesh2(u->as_vec, grid, grid, exact_solution, -1.0);
  double max = maxNorm(u->as_vec);

  if (rank == 0) {
    printf("elapsed: %f\n", WallTime()-time);
    printf("max: %f\n", max);
  }

  freeMatrix(u);
  freeVector(grid);
  freeMatrix(ctx.A);

  close_app();
  return 0;
}
예제 #7
0
파일: poisson1D.c 프로젝트: akva2/tma4280
int main(int argc, char** argv)
{
  int i, j, N, flag;
  Matrix A=NULL, Q=NULL;
  Vector b, grid, e, lambda=NULL;
  double time, sum, h, tol=1e-4;

  if (argc < 3) {
    printf("need two parameters, N and flag [and tolerance]\n");
    printf(" - N is the problem size (in each direction\n");
    printf(" - flag = 1  -> Dense LU\n");
    printf(" - flag = 2  -> Dense Cholesky\n");
    printf(" - flag = 3  -> Full Gauss-Jacobi iterations\n");
    printf(" - flag = 4  -> Full Gauss-Jacobi iterations using BLAS\n");
    printf(" - flag = 5  -> Full Gauss-Seidel iterations\n");
    printf(" - flag = 6  -> Full Gauss-Seidel iterations using BLAS\n");
    printf(" - flag = 7  -> Full CG iterations\n");
    printf(" - flag = 8  -> Matrix-less Gauss-Jacobi iterations\n");
    printf(" - flag = 9  -> Matrix-less Gauss-Seidel iterations\n");
    printf(" - flag = 10 -> Matrix-less Red-Black Gauss-Seidel iterations\n");
    printf(" - flag = 11 -> Diagonalization\n");
    printf(" - flag = 12 -> Diagonalization - FST\n");
    printf(" - flag = 13 -> Matrix-less CG iterations\n");
    return 1;
  }
  N=atoi(argv[1]);
  flag=atoi(argv[2]);
  if (argc > 3)
    tol = atof(argv[3]);
  if (N < 0) {
    printf("invalid problem size given\n");
    return 2;
  }

  if (flag < 0 || flag > 13) {
    printf("invalid flag given\n");
    return 3;
  }

  if (flag == 10 && (N-1)%2 != 0) {
    printf("need an even size for red-black iterations\n");
    return 4;
  }
  if (flag == 12 && (N & (N-1)) != 0) {
    printf("need a power-of-two for fst-based diagonalization\n");
    return 5;
  }

  h = 1.0/N;

  grid = equidistantMesh(0.0, 1.0, N);
  b = createVector(N-1);
  e = createVector(N-1);
  evalMeshInternal(b, grid, source);
  evalMeshInternal(e, grid, exact);
  scaleVector(b, pow(h, 2));
  axpy(b, e, alpha);

  if (flag < 8) {
    A = createMatrix(N-1,N-1);
    diag(A, -1, -1.0);
    diag(A, 0, 2.0+alpha);
    diag(A, 1, -1.0);
  }

  if (flag >= 11 && flag < 13)
    lambda = generateEigenValuesP1D(N-1);
  if (flag == 11)
    Q = generateEigenMatrixP1D(N-1);

  time = WallTime();

  if (flag == 1) {
    int* ipiv=NULL;
    lusolve(A, b, &ipiv);
    free(ipiv);
  } else if (flag == 2)
    llsolve(A,b,0);
  else if (flag == 3)
    printf("Gauss-Jacobi used %i iterations\n",
           GaussJacobi(A, b, tol, 10000000));
  else if (flag == 4)
    printf("Gauss-Jacobi used %i iterations\n",
           GaussJacobiBlas(A, b, tol, 10000000));
  else if (flag == 5)
    printf("Gauss-Seidel used %i iterations\n",
           GaussSeidel(A, b, tol, 10000000));
  else if (flag == 6)
    printf("Gauss-Seidel used %i iterations\n",
           GaussSeidelBlas(A, b, tol, 10000000));
  else if (flag == 7)
    printf("CG used %i iterations\n", cg(A, b, 1e-8));
  else if (flag == 8)
    printf("Gauss-Jacobi used %i iterations\n",
           GaussJacobiPoisson1D(b, tol, 10000000));
  else if (flag == 9)
    printf("Gauss-Jacobi used %i iterations\n",
           GaussSeidelPoisson1D(b, tol, 10000000));
  else if (flag == 10)
    printf("Gauss-Jacobi used %i iterations\n",
           GaussSeidelPoisson1Drb(b, tol, 10000000));
  else if (flag == 11)
           DiagonalizationPoisson1D(b,lambda,Q);
  else if (flag == 12)
           DiagonalizationPoisson1Dfst(b,lambda);
  else if (flag == 13)
    printf("CG used %i iterations\n", cgMatrixFree(Poisson1D, b, tol));

  printf("elapsed: %f\n", WallTime()-time);

  evalMeshInternal(e, grid, exact);
  axpy(b,e,-1.0);

  printf("max error: %e\n", maxNorm(b));
  
  if (A)
    freeMatrix(A);
  if (Q)
    freeMatrix(Q);
  freeVector(grid);
  freeVector(b);
  freeVector(e);
  if (lambda)
    freeVector(lambda);
  return 0;
}
예제 #8
0
int main(int argc, char** argv)
{
  int i, j, N, flag;
  Matrix A=NULL, Q=NULL;
  Vector b, grid, e, lambda=NULL;
  double time, sum, h, tol=1e-6;
  int rank, size;
  int mpi_top_coords;
  int mpi_top_sizes;

  init_app(argc, argv, &rank, &size);

  if (argc < 3) {
    printf("need two parameters, N and flag [and tolerance]\n");
    printf(" - N is the problem size (in each direction\n");
    printf(" - flag = 1  -> Matrix-free Gauss-Jacobi iterations\n");
    printf(" - flag = 2  -> Matrix-free red-black Gauss-Seidel iterations\n");
    printf(" - flag = 3  -> Matrix-free CG iterations\n");
    printf(" - flag = 4  -> Matrix-free additive schwarz preconditioned+Cholesky CG iterations\n");
    printf(" - flag = 5  -> Matrix-free additive schwarz preconditioned+CG CG iterations\n");
    return 1;
  }
  N=atoi(argv[1]);
  flag=atoi(argv[2]);
  if (argc > 3)
    tol=atof(argv[3]);

  if (N < 0) {
    if (rank == 0)
      printf("invalid problem size given\n");
    close_app();
    return 2;
  }

  if (flag < 0 || flag > 5) {
    if (rank == 0)
      printf("invalid flag given\n");
    close_app();
    return 3;
  }

  if (flag == 2 && (N-1)%2 != 0 && ((N-1)/size) % 2 != 0) {
    if (rank == 0)
      printf("need an even size (per process) for red-black iterations\n");
    close_app();
    return 4;
  }

  // setup topology
  mpi_top_coords = 0;
  mpi_top_sizes = 0;
  MPI_Dims_create(size, 1, &mpi_top_sizes);
  int periodic = 0;
  MPI_Comm comm;
  MPI_Cart_create(MPI_COMM_WORLD, 1, &mpi_top_sizes, &periodic, 0, &comm);
  MPI_Cart_coords(comm, rank, 1, &mpi_top_coords);

  b = createVectorMPI(N+1, &comm, 1, 1);
  e = createVectorMPI(N+1, &comm, 1, 1);

  grid = equidistantMesh(0.0, 1.0, N);
  h = 1.0/N;

  evalMeshDispl(b, grid, source);
  scaleVector(b, pow(h, 2));
  evalMeshDispl(e, grid, exact);
  axpy(b, e, alpha);
  b->data[0] = b->data[b->len-1] = 0.0;

  if (flag == 4) {
    int size = b->len;
    if (b->comm_rank == 0)
      size--;
    if (b->comm_rank == b->comm_size-1)
      size--;
    A1D = createMatrix(size, size);
    A1Dfactored = 0;
    diag(A1D, -1, -1.0);
    diag(A1D, 0, 2.0+alpha);
    diag(A1D, 1, -1.0);
  }

  int its=-1;
  char method[128];
  time = WallTime();
  if (flag == 1) {
    its=GaussJacobiPoisson1D(b, tol, 1000000);
    sprintf(method,"Gauss-Jacobi");
  }
  if (flag == 2) {
    its=GaussSeidelPoisson1Drb(b, tol, 1000000);
    sprintf(method,"Gauss-Seidel");
  }
  if (flag == 3) {
    its=cgMatrixFree(Poisson1D, b, tol);
    sprintf(method,"CG");
  }
  if (flag == 4 || flag == 5) {
    its=pcgMatrixFree(Poisson1D, Poisson1DPre, b, tol);
    sprintf(method,"PCG");
  }
  if (rank == 0) {
    printf("%s used %i iterations\n", method, its);
    printf("elapsed: %f\n", WallTime()-time);
  }

  evalMeshDispl(e, grid, exact);
  axpy(b,e,-1.0);
  b->data[0] = b->data[b->len-1] = 0.0;

  h = maxNorm(b);
  if (rank == 0)
    printf("max error: %e\n", h);
  
  if (A)
    freeMatrix(A);
  if (Q)
    freeMatrix(Q);
  freeVector(grid);
  freeVector(b);
  freeVector(e);
  if (lambda)
    freeVector(lambda);
  if (A1D)
    freeMatrix(A1D);

  MPI_Comm_free(&comm);

  close_app();
  return 0;
}
예제 #9
0
int main(int argc, char** argv) 
{
	int dim_n = DIM_N;
	int dim_m = DIM_M;
	int dim_k = DIM_K;
	//      
	if (argc == 4)
	{
		dim_n = atoi(argv[1]);
		dim_m = atoi(argv[2]);
		dim_k = atoi(argv[3]);
	}
	int local_k;
	//
#ifdef MPI
	MPI_Init(&argc, &argv);
	int num_tasks;
	int my_rank;
	MPI_Comm_size(MPI_COMM_WORLD, &num_tasks);
	MPI_Comm_rank(MPI_COMM_WORLD, &my_rank);
	local_k = dim_k/num_tasks; 
#endif
	//
	double* restrict storage1 = (double*)malloc(dim_n*dim_m*dim_k*sizeof(double));
	double* restrict storage2 = (double*)malloc(dim_n*dim_m*dim_k*sizeof(double));

	printf("3x3 stencil: M=%d N=%d K=%d global.\n", dim_n, dim_m, dim_k);

	printf("array size = %f MB\n", (double) dim_n*dim_m*dim_k*sizeof(double)/1024/1024);

	double alloctime = -myseconds();
	init (storage1, dim_n, dim_m, dim_k);
	init (storage2, dim_n, dim_m, dim_k);
	alloctime += myseconds();
	printf("Allocation time = %f s.\n\n", alloctime);

	int n_iters = 0;
	int nrep    = NREP;
	int ii;

	double * st_read  = storage2;
	double * st_write = storage1;

	double phi;
	double norminf, norml2;
	double time = - myseconds();

	//
	do 
	{
		// swaping the solutions
		double *tmp = st_read;
		st_read     = st_write;
		st_write    = tmp;
		//
		++n_iters;
		//
		double ftime = -myseconds();
		//PAPI_START;
		unsigned long long c0 = cycles();
		//
		for (ii = 0; ii < nrep; ++ii)
		{
			laplacian(st_read, st_write, dim_m, dim_n, dim_k);
		}
		//
		unsigned long long c1 = cycles();
		//unsigned long long cycles = (c1 - c0)/((unsigned long long) (dim_m - 1)*(dim_n - 1));
		unsigned long long cycles = (c1 - c0)/nrep;
		//
		//PAPI_STOP;
		ftime += myseconds();
		ftime /= nrep;
		//print(st_write + dim_n*dim_m, dim_m, dim_n);
		//PAPI_PRINT;
		//
		norminf = maxNorm(st_write, st_read, dim_n*dim_m*dim_k);
		norml2   = l2Norm(st_write, st_read, dim_n*dim_m*dim_k);
		double flops = (dim_m - 2)*(dim_n - 2)*(dim_k - 2)*6.;
		//
		printf("iter = %d, %f s. %ld c., linf = %g l2 = %g, %d flops, %ld c, %f F/c, %f GF/s\n", n_iters, ftime, cycles, norminf, norml2, (long long int) flops, flops/cycles, (double) flops/ftime/1e9); 
	} while (norminf > EPSI);
	time += myseconds();
	//
	printf("\n# iter= %d time= %g residual= %g\n", n_iters, time, norml2); 
	//
	free(storage1);
	free(storage2);
	//
#ifdef MPI
	MPI_Finalize();
#endif
	return 0;
}
예제 #10
0
int main(int argc, char** argv)
{
  int rank, size;
  init_app(argc, argv, &rank, &size);

  if (argc < 2) {
    printf("usage: %s <N> [L]\n",argv[0]);
    close_app();
    return 1;
  }

  /* the total number of grid points in each spatial direction is (N+1) */
  /* the total number of degrees-of-freedom in each spatial direction is (N-1) */
  int N  = atoi(argv[1]);
  int M  = N-1;
  double L=1;
  if (argc > 2)
    L = atof(argv[2]);

  double h = L/N;
  Vector lambda = createEigenValues(M);

  Vector grid = createVector(M);
  for (int i=0;i<M;++i)
    grid->data[i] = (i+1)*h;

  Matrix u = createMatrix(M, M);
  Matrix ut = createMatrix(M, M);
  evalMesh(u->as_vec, grid, grid, poisson_source);
  scaleVector(u->as_vec, h*h);

  int NN = 4*N;
  Vector z    = createVector(NN);

  double time = WallTime();

  for (int j=0; j < M; j++)
    fst(u->data[j], &N, z->data, &NN);

  transposeMatrix(ut, u);

  for (int i=0; i < M; i++)
    fstinv(ut->data[i], &N, z->data, &NN);

  for (int j=0; j < M; j++)
    for (int i=0; i < M; i++)
      ut->data[j][i] /= lambda->data[i]+lambda->data[j];

  for (int i=0; i < M; i++)
    fst(ut->data[i], &N, z->data, &NN);

  transposeMatrix(u, ut);

  for (int j=0; j < M; j++)
    fstinv(u->data[j], &N, z->data, &NN);

  evalMesh2(u->as_vec, grid, grid, exact_solution, -1.0);
  double max = maxNorm(u->as_vec);

  if (rank == 0) {
    printf("elapsed: %f\n", WallTime()-time);
    printf("max: %f\n", max);
  }

  freeMatrix(u);
  freeMatrix(ut);
  freeVector(grid);
  freeVector(z);
  freeVector(lambda);

  close_app();
  return 0;
}
예제 #11
0
파일: twosolve.c 프로젝트: Anastien/ngspice
int
TWOnewDelta(TWOdevice *pDevice, BOOLEAN tranAnalysis, TWOtranInfo *info)
{
  int index, iterNum = 0;
  double newNorm;
  double fib, lambda, fibn, fibp;
  BOOLEAN acceptable = FALSE, error = FALSE;

  lambda = 1.0;
  fibn = 1.0;
  fibp = 1.0;

  /*
   * copy the contents of dcSolution into copiedSolution and modify
   * dcSolution by adding the deltaSolution
   */

  for (index = 1; index <= pDevice->numEqns; ++index) {
    pDevice->copiedSolution[index] = pDevice->dcSolution[index];
    pDevice->dcSolution[index] += pDevice->dcDeltaSolution[index];
  }

  if (pDevice->poissonOnly) {
    TWOQrhsLoad(pDevice);
  } else if (!OneCarrier) {
    TWO_rhsLoad(pDevice, tranAnalysis, info);
  } else if (OneCarrier == N_TYPE) {
    TWONrhsLoad(pDevice, tranAnalysis, info);
  } else if (OneCarrier == P_TYPE) {
    TWOPrhsLoad(pDevice, tranAnalysis, info);
  }
  newNorm = maxNorm(pDevice->rhs, pDevice->numEqns);
  if (pDevice->rhsNorm <= pDevice->abstol) {
    lambda = 0.0;
    newNorm = pDevice->rhsNorm;
  } else if (newNorm < pDevice->rhsNorm) {
    acceptable = TRUE;
  } else {
    /* chop the step size so that deltax is acceptable */
    if (TWOdcDebug) {
      fprintf(stdout, "          %11.4e  %11.4e\n",
	  newNorm, lambda);
    }
    while (!acceptable) {
      iterNum++;

      if (iterNum > NORM_RED_MAXITERS) {
	error = TRUE;
	lambda = 0.0;
	/* Don't break out until after we've reset the device. */
      }
      fib = fibp;
      fibp = fibn;
      fibn += fib;
      lambda *= (fibp / fibn);

      for (index = 1; index <= pDevice->numEqns; index++) {
	pDevice->dcSolution[index] = pDevice->copiedSolution[index]
	    + lambda * pDevice->dcDeltaSolution[index];
      }
      if (pDevice->poissonOnly) {
	TWOQrhsLoad(pDevice);
      } else if (!OneCarrier) {
	TWO_rhsLoad(pDevice, tranAnalysis, info);
      } else if (OneCarrier == N_TYPE) {
	TWONrhsLoad(pDevice, tranAnalysis, info);
      } else if (OneCarrier == P_TYPE) {
	TWOPrhsLoad(pDevice, tranAnalysis, info);
      }
      newNorm = maxNorm(pDevice->rhs, pDevice->numEqns);
      if (error) {
	break;
      }
      if (newNorm <= pDevice->rhsNorm) {
	acceptable = TRUE;
      }
      if (TWOdcDebug) {
	fprintf(stdout, "          %11.4e  %11.4e\n",
	    newNorm, lambda);
      }
    }
  }
  /* restore the previous dcSolution and the new deltaSolution */
  pDevice->rhsNorm = newNorm;
  for (index = 1; index <= pDevice->numEqns; index++) {
    pDevice->dcSolution[index] = pDevice->copiedSolution[index];
    pDevice->dcDeltaSolution[index] *= lambda;
  }
  return (error);
}
예제 #12
0
파일: twosolve.c 프로젝트: Anastien/ngspice
/* the iteration driving loop and convergence check */
void
TWOdcSolve(TWOdevice *pDevice, int iterationLimit, BOOLEAN newSolver, 
           BOOLEAN tranAnalysis, TWOtranInfo *info)
{
  TWOnode *pNode;
  TWOelem *pElem;
  int size = pDevice->numEqns;
  int index, eIndex, error;
  int timesConverged = 0;
  BOOLEAN quitLoop;
  BOOLEAN debug;
  BOOLEAN negConc = FALSE;
  double *rhs = pDevice->rhs;
  double *solution = pDevice->dcSolution;
  double *delta = pDevice->dcDeltaSolution;
  double poissNorm, contNorm;
  double startTime, totalStartTime;
  double totalTime, loadTime, factorTime, solveTime, updateTime, checkTime;
  double orderTime = 0.0;

  totalTime = loadTime = factorTime = solveTime = updateTime = checkTime = 0.0;
  totalStartTime = SPfrontEnd->IFseconds();

  quitLoop = FALSE;
  debug = (!tranAnalysis && TWOdcDebug) || (tranAnalysis && TWOtranDebug);
  pDevice->iterationNumber = 0;
  pDevice->converged = FALSE;

  if (debug) {
    if (pDevice->poissonOnly) {
      fprintf(stdout, "Equilibrium Solution:\n");
    } else {
      fprintf(stdout, "Bias Solution:\n");
    }
    fprintf(stdout, "Iteration  RHS Norm\n");
  }
  while (!(pDevice->converged || (pDevice->iterationNumber > iterationLimit)
	  || quitLoop)) {
    pDevice->iterationNumber++;

    if ((!pDevice->poissonOnly) && (iterationLimit > 0)
	&&(!tranAnalysis)) {
      TWOjacCheck(pDevice, tranAnalysis, info);
    }

    /* LOAD */
    startTime = SPfrontEnd->IFseconds();
    if (pDevice->poissonOnly) {
      TWOQsysLoad(pDevice);
    } else if (!OneCarrier) {
      TWO_sysLoad(pDevice, tranAnalysis, info);
    } else if (OneCarrier == N_TYPE) {
      TWONsysLoad(pDevice, tranAnalysis, info);
    } else if (OneCarrier == P_TYPE) {
      TWOPsysLoad(pDevice, tranAnalysis, info);
    }
    pDevice->rhsNorm = maxNorm(rhs, size);
    loadTime += SPfrontEnd->IFseconds() - startTime;
    if (debug) {
      fprintf(stdout, "%7d   %11.4e%s\n",
	  pDevice->iterationNumber - 1, pDevice->rhsNorm,
	  negConc ? "   negative conc encountered" : "");
      negConc = FALSE;
    }

    /* FACTOR */
    startTime = SPfrontEnd->IFseconds();
    error = spFactor(pDevice->matrix);
    factorTime += SPfrontEnd->IFseconds() - startTime;
    if (newSolver) {
      if (pDevice->iterationNumber == 1) {
	orderTime = factorTime;
      } else if (pDevice->iterationNumber == 2) {
	orderTime -= factorTime - orderTime;
	factorTime -= orderTime;
	if (pDevice->poissonOnly) {
	  pDevice->pStats->orderTime[STAT_SETUP] += orderTime;
	} else {
	  pDevice->pStats->orderTime[STAT_DC] += orderTime;
	}
	newSolver = FALSE;
      }
    }
    if (foundError(error)) {
      if (error == spSINGULAR) {
	int badRow, badCol;
	spWhereSingular(pDevice->matrix, &badRow, &badCol);
	printf("*****  singular at (%d,%d)\n", badRow, badCol);
      }
      pDevice->converged = FALSE;
      quitLoop = TRUE;
      continue;
    }

    /* SOLVE */
    startTime = SPfrontEnd->IFseconds();
    spSolve(pDevice->matrix, rhs, delta, NULL, NULL);
    solveTime += SPfrontEnd->IFseconds() - startTime;

    /* UPDATE */
    startTime = SPfrontEnd->IFseconds();
    /* Use norm reducing Newton method for DC bias solutions only. */
    if ((!pDevice->poissonOnly) && (iterationLimit > 0)
	&& (!tranAnalysis) && (pDevice->rhsNorm > 1e-1)) {
      error = TWOnewDelta(pDevice, tranAnalysis, info);
      if (error) {
	pDevice->converged = FALSE;
	quitLoop = TRUE;
	updateTime += SPfrontEnd->IFseconds() - startTime;
	continue;
      }
    }
    for (index = 1; index <= size; index++) {
      solution[index] += delta[index];
    }
    updateTime += SPfrontEnd->IFseconds() - startTime;

    /* CHECK CONVERGENCE */
    startTime = SPfrontEnd->IFseconds();
    /* Check if updates have gotten sufficiently small. */
    if (pDevice->iterationNumber != 1) {
      pDevice->converged = TWOdeltaConverged(pDevice);
    }
    /* Check if the residual is smaller than abstol. */
    if (pDevice->converged && (!pDevice->poissonOnly)
	&& (!tranAnalysis)) {
      if (!OneCarrier) {
	TWO_rhsLoad(pDevice, tranAnalysis, info);
      } else if (OneCarrier == N_TYPE) {
	TWONrhsLoad(pDevice, tranAnalysis, info);
      } else if (OneCarrier == P_TYPE) {
	TWOPrhsLoad(pDevice, tranAnalysis, info);
      }
      pDevice->rhsNorm = maxNorm(rhs, size);
      if (pDevice->rhsNorm > pDevice->abstol) {
	pDevice->converged = FALSE;
      }
      if ((++timesConverged >= 2)
	  && (pDevice->rhsNorm < 1e3 * pDevice->abstol)) {
	pDevice->converged = TRUE;
      } else if (timesConverged >= 5) {
	pDevice->converged = FALSE;
	quitLoop = TRUE;
	continue;
      }
    } else if (pDevice->converged && pDevice->poissonOnly) {
      TWOQrhsLoad(pDevice);
      pDevice->rhsNorm = maxNorm(rhs, size);
      if (pDevice->rhsNorm > pDevice->abstol) {
	pDevice->converged = FALSE;
      }
      if (++timesConverged >= 5) {
	pDevice->converged = TRUE;
      }
    }
    /* Check if the carrier concentrations are negative. */
    if (pDevice->converged && (!pDevice->poissonOnly)) {
      /* Clear garbage entry since carrier-free elements reference it. */
      solution[0] = 0.0;
      for (eIndex = 1; eIndex <= pDevice->numElems; eIndex++) {
	pElem = pDevice->elements[eIndex];
	for (index = 0; index <= 3; index++) {
	  if (pElem->evalNodes[index]) {
	    pNode = pElem->pNodes[index];
	    if (solution[pNode->nEqn] < 0.0) {
	      pDevice->converged = FALSE;
	      negConc = TRUE;
	      if (tranAnalysis) {
		quitLoop = TRUE;
	      } else {
		solution[pNode->nEqn] = 0.0;
	      }
	    }
	    if (solution[pNode->pEqn] < 0.0) {
	      pDevice->converged = FALSE;
	      negConc = TRUE;
	      if (tranAnalysis) {
		quitLoop = TRUE;
	      } else {
		solution[pNode->pEqn] = 0.0;
	      }
	    }
	  }
	}
      }
      if (!pDevice->converged) {
	if (!OneCarrier) {
	  TWO_rhsLoad(pDevice, tranAnalysis, info);
	} else if (OneCarrier == N_TYPE) {
	  TWONrhsLoad(pDevice, tranAnalysis, info);
	} else if (OneCarrier == P_TYPE) {
	  TWOPrhsLoad(pDevice, tranAnalysis, info);
	}
	pDevice->rhsNorm = maxNorm(rhs, size);
      }
    }
    checkTime += SPfrontEnd->IFseconds() - startTime;
  }
  totalTime += SPfrontEnd->IFseconds() - totalStartTime;

  if (tranAnalysis) {
    pDevice->pStats->loadTime[STAT_TRAN] += loadTime;
    pDevice->pStats->factorTime[STAT_TRAN] += factorTime;
    pDevice->pStats->solveTime[STAT_TRAN] += solveTime;
    pDevice->pStats->updateTime[STAT_TRAN] += updateTime;
    pDevice->pStats->checkTime[STAT_TRAN] += checkTime;
    pDevice->pStats->numIters[STAT_TRAN] += pDevice->iterationNumber;
  } else if (pDevice->poissonOnly) {
    pDevice->pStats->loadTime[STAT_SETUP] += loadTime;
    pDevice->pStats->factorTime[STAT_SETUP] += factorTime;
    pDevice->pStats->solveTime[STAT_SETUP] += solveTime;
    pDevice->pStats->updateTime[STAT_SETUP] += updateTime;
    pDevice->pStats->checkTime[STAT_SETUP] += checkTime;
    pDevice->pStats->numIters[STAT_SETUP] += pDevice->iterationNumber;
  } else {
    pDevice->pStats->loadTime[STAT_DC] += loadTime;
    pDevice->pStats->factorTime[STAT_DC] += factorTime;
    pDevice->pStats->solveTime[STAT_DC] += solveTime;
    pDevice->pStats->updateTime[STAT_DC] += updateTime;
    pDevice->pStats->checkTime[STAT_DC] += checkTime;
    pDevice->pStats->numIters[STAT_DC] += pDevice->iterationNumber;
  }

  if (debug) {
    if (!tranAnalysis) {
      pDevice->rhsNorm = maxNorm(rhs, size);
      fprintf(stdout, "%7d   %11.4e%s\n",
	  pDevice->iterationNumber, pDevice->rhsNorm,
	  negConc ? "   negative conc in solution" : "");
    }
    if (pDevice->converged) {
      if (!pDevice->poissonOnly) {
	poissNorm = contNorm = 0.0;
	rhs[0] = 0.0;		/* Make sure garbage entry is clear. */
	for (eIndex = 1; eIndex <= pDevice->numElems; eIndex++) {
	  pElem = pDevice->elements[eIndex];
	  for (index = 0; index <= 3; index++) {
	    if (pElem->evalNodes[index]) {
	      pNode = pElem->pNodes[index];
	      poissNorm = MAX(poissNorm,ABS(rhs[pNode->psiEqn]));
	      contNorm = MAX(contNorm,ABS(rhs[pNode->nEqn]));
	      contNorm = MAX(contNorm,ABS(rhs[pNode->pEqn]));
	    }
	  }
	}
	fprintf(stdout,
	    "Residual: %11.4e C/um poisson, %11.4e A/um continuity\n",
	    poissNorm * EpsNorm * VNorm * 1e-4,
	    contNorm * JNorm * LNorm * 1e-4);
      } else {
	fprintf(stdout, "Residual: %11.4e C/um poisson\n",
	    pDevice->rhsNorm * EpsNorm * VNorm * 1e-4);
      }
    }
  }
}
예제 #13
0
int main(int argc, char** argv)
{
  int rank, size;
  init_app(argc, argv, &rank, &size);

  if (argc < 2) {
    printf("usage: %s <N> [L]\n",argv[0]);
    close_app();
    return 1;
  }

  /* the total number of grid points in each spatial direction is (N+1) */
  /* the total number of degrees-of-freedom in each spatial direction is (N-1) */
  int N  = atoi(argv[1]);
  int M  = N-1;
  double L=1.0;
  if (argc > 2)
    L = atof(argv[2]);

  double h = L/N;

  Vector grid = createVector(M);
  for (int i=0;i<M;++i)
    grid->data[i] = (i+1)*h;

  int coords[2] = {0};
  int sizes[2] = {1};
#ifdef HAVE_MPI
  sizes[0] = sizes[1] = 0;
  MPI_Dims_create(size,2,sizes);
  int periodic[2];
  periodic[0] = periodic[1] = 0;
  MPI_Comm comm;
  MPI_Cart_create(MPI_COMM_WORLD,2,sizes,periodic,0,&comm);
  MPI_Cart_coords(comm,rank,2,coords);
#endif

  int* len[2];
  int* displ[2];
  splitVector(M, sizes[0], &len[0], &displ[0]);
  splitVector(M, sizes[1], &len[1], &displ[1]);

#ifdef HAVE_MPI
  Matrix u = createMatrixMPI(len[0][coords[0]]+2, len[1][coords[1]]+2, M, M, &comm);
#else
  Matrix u = createMatrix(M+2, M+2);
#endif
  evalMeshDispl(u, grid, grid, poisson_source,
                displ[0][coords[0]], displ[1][coords[1]]);
  scaleVector(u->as_vec, h*h);

  double time = WallTime();
  GS(u, 1e-6, 5000);

  evalMesh2Displ(u, grid, grid, exact_solution, -1.0,
                 displ[0][coords[0]], displ[1][coords[1]]);
  double max = maxNorm(u->as_vec);

  if (rank == 0) {
    printf("elapsed: %f\n", WallTime()-time);
    printf("max: %f\n", max);
  }

  freeMatrix(u);
  freeVector(grid);
  for (int i=0;i<2;++i) {
    free(len[i]);
    free(displ[i]);
  }

  close_app();
  return 0;
}