Example #1
0
/*
 * This function returns the solution of Ax = b
 *
 * The function employs LU decomposition:
 * If A=L U with L lower and U upper triangular, then the original system
 * amounts to solving
 * L y = b, U x = y
 *
 * A is mxm, b is mx1
 *
 * The function returns 0 in case of error, 1 if successful
 *
 * This function is often called repetitively to solve problems of identical
 * dimensions. To avoid repetitive malloc's and free's, allocated memory is
 * retained between calls and free'd-malloc'ed when not of the appropriate size.
 * A call with NULL as the first argument forces this memory to be released.
 */
int AX_EQ_B_LU(LM_REAL *A, LM_REAL *B, LM_REAL *x, int m)
{
__STATIC__ LM_REAL *buf=NULL;
__STATIC__ int buf_sz=0;

int a_sz, ipiv_sz, tot_sz;
register int i, j;
int info, *ipiv, nrhs=1;
LM_REAL *a;
   
    if(!A)
#ifdef LINSOLVERS_RETAIN_MEMORY
    {
      if(buf) free(buf);
      buf=NULL;
      buf_sz=0;

      return 1;
    }
#else
      return 1; /* NOP */
#endif /* LINSOLVERS_RETAIN_MEMORY */
   
    /* calculate required memory size */
    ipiv_sz=m;
    a_sz=m*m;
    tot_sz=a_sz*sizeof(LM_REAL) + ipiv_sz*sizeof(int); /* should be arranged in that order for proper doubles alignment */

#ifdef LINSOLVERS_RETAIN_MEMORY
    if(tot_sz>buf_sz){ /* insufficient memory, allocate a "big" memory chunk at once */
      if(buf) free(buf); /* free previously allocated memory */

      buf_sz=tot_sz;
      buf=(LM_REAL *)malloc(buf_sz);
      if(!buf){
        fprintf(stderr, RCAT("memory allocation in ", AX_EQ_B_LU) "() failed!\n");
        exit(1);
      }
    }
#else
      buf_sz=tot_sz;
      buf=(LM_REAL *)malloc(buf_sz);
      if(!buf){
        fprintf(stderr, RCAT("memory allocation in ", AX_EQ_B_LU) "() failed!\n");
        exit(1);
      }
#endif /* LINSOLVERS_RETAIN_MEMORY */

    a=buf;
    ipiv=(int *)(a+a_sz);

    /* store A (column major!) into a and B into x */
	  for(i=0; i<m; i++){
		  for(j=0; j<m; j++)
        a[i+j*m]=A[i*m+j];

      x[i]=B[i];
    }

  /* LU decomposition for A */
	GETRF((int *)&m, (int *)&m, a, (int *)&m, ipiv, (int *)&info);  
	if(info!=0){
		if(info<0){
      fprintf(stderr, RCAT(RCAT("argument %d of ", GETRF) " illegal in ", AX_EQ_B_LU) "()\n", -info);
			exit(1);
		}
		else{
      fprintf(stderr, RCAT(RCAT("singular matrix A for ", GETRF) " in ", AX_EQ_B_LU) "()\n");
#ifndef LINSOLVERS_RETAIN_MEMORY
      free(buf);
#endif

			return 0;
		}
	}

  /* solve the system with the computed LU */
  GETRS("N", (int *)&m, (int *)&nrhs, a, (int *)&m, ipiv, x, (int *)&m, (int *)&info);
	if(info!=0){
		if(info<0){
			fprintf(stderr, RCAT(RCAT("argument %d of ", GETRS) " illegal in ", AX_EQ_B_LU) "()\n", -info);
			exit(1);
		}
		else{
			fprintf(stderr, RCAT(RCAT("unknown error for ", GETRS) " in ", AX_EQ_B_LU) "()\n");
#ifndef LINSOLVERS_RETAIN_MEMORY
      free(buf);
#endif

			return 0;
		}
	}

#ifndef LINSOLVERS_RETAIN_MEMORY
  free(buf);
#endif

	return 1;
}
Example #2
0
int MAIN__(int argc, char *argv[]){

  FLOAT *a, *b;
  blasint *ipiv;

  blasint m, i, j, info;
  blasint unit =   1;

  int from =   1;
  int to   = 200;
  int step =   1;

  FLOAT maxerr;

  struct timeval start, stop;
  double time1, time2;

  argc--;argv++; 

  if (argc > 0) { from     = atol(*argv);		argc--; argv++;}
  if (argc > 0) { to       = MAX(atol(*argv), from);	argc--; argv++;}
  if (argc > 0) { step     = atol(*argv);		argc--; argv++;}

  fprintf(stderr, "From : %3d  To : %3d Step = %3d\n", from, to, step);

  if (( a = (FLOAT *)malloc(sizeof(FLOAT) * to * to * COMPSIZE)) == NULL){
    fprintf(stderr,"Out of Memory!!\n");exit(1);
  }
    
  if (( b = (FLOAT *)malloc(sizeof(FLOAT) * to * COMPSIZE)) == NULL){
    fprintf(stderr,"Out of Memory!!\n");exit(1);
  }
  
  if (( ipiv = (blasint *)malloc(sizeof(blasint) * to * COMPSIZE)) == NULL){
    fprintf(stderr,"Out of Memory!!\n");exit(1);
  }
  
#ifdef linux
  srandom(getpid());
#endif

  fprintf(stderr, "   SIZE       Residual     Decompose            Solve           Total\n");

  for(m = from; m <= to; m += step){
    
    fprintf(stderr, " %6d : ", (int)m);

    for(j = 0; j < m; j++){
      for(i = 0; i < m * COMPSIZE; i++){
	a[i + j * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
      }
    }
    
    for (i = 0; i < m * COMPSIZE; ++i) b[i] = 0.;
    
    for (j = 0; j < m; ++j) {
      for (i = 0; i < m * COMPSIZE; ++i) {
	b[i] += a[i + j * m * COMPSIZE];
      }
    }

    gettimeofday( &start, (struct timezone *)0);

    GETRF (&m, &m, a, &m, ipiv, &info);

    gettimeofday( &stop, (struct timezone *)0);

    if (info) {
      fprintf(stderr, "Matrix is not singular .. %d\n", info);
      exit(1);
    }
    
    time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;

    gettimeofday( &start, (struct timezone *)0);

    GETRS("N", &m, &unit, a, &m, ipiv, b, &m, &info);

    gettimeofday( &stop, (struct timezone *)0);

    if (info) {
      fprintf(stderr, "Matrix is not singular .. %d\n", info);
      exit(1);
    }
    
    time2 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;

    maxerr = 0.;

    for(i = 0; i < m; i++){
#ifndef XDOUBLE
      if (maxerr < fabs(b[i * COMPSIZE] - 1.0)) maxerr = fabs(b[i * COMPSIZE] - 1.0);
#ifdef COMPLEX
      if (maxerr < fabs(b[i * COMPSIZE] + 1)) maxerr = fabs(b[i * COMPSIZE + 1]);
#endif
#else
      if (maxerr < fabsl(b[i * COMPSIZE] - 1.0L)) maxerr = fabsl(b[i * COMPSIZE] - 1.0L);
#ifdef COMPLEX
      if (maxerr < fabsl(b[i * COMPSIZE] + 1)) maxerr = fabsl(b[i * COMPSIZE + 1]);
#endif
#endif
    }
    
#ifdef XDOUBLE
    fprintf(stderr,"  %Le ", maxerr);
#else
    fprintf(stderr,"  %e ", maxerr);
#endif

    fprintf(stderr,
	    " %10.2f MFlops %10.2f MFlops %10.2f MFlops\n", 
	    COMPSIZE * COMPSIZE * 2. / 3. * (double)m * (double)m * (double)m / time1 * 1.e-6,
	    COMPSIZE * COMPSIZE * 2.      * (double)m * (double)m             / time2 * 1.e-6,
	    COMPSIZE * COMPSIZE * (2. / 3. * (double)m * (double)m * (double)m + 2. * (double)m * (double)m) / (time1 + time2) * 1.e-6);

#if 0
    if (
#ifdef DOUBLE
	maxerr > 1.e-8
#else
	maxerr > 1.e-1
#endif
	) {
      fprintf(stderr, "Error is too large.\n");
      exit(1);
    }
#endif

  }

  return 0;
}
Example #3
0
File: Axb_core.c Project: vopl/sp
/*
 * This function returns the solution of Ax = b
 *
 * The function employs LU decomposition:
 * If A=L U with L lower and U upper triangular, then the original system
 * amounts to solving
 * L y = b, U x = y
 *
 * A is mxm, b is mx1
 *
 * The function returns 0 in case of error,
 * 1 if successfull
 *
 * This function is often called repetitively to solve problems of identical
 * dimensions. To avoid repetitive malloc's and free's, allocated memory is
 * retained between calls and free'd-malloc'ed when not of the appropriate size.
 * A call with NULL as the first argument forces this memory to be released.
 */
int AX_EQ_B_LU(LM_REAL *A, LM_REAL *B, LM_REAL *x, int m)
{
__STATIC__ LM_REAL *buf=NULL;
__STATIC__ int buf_sz=0;

LM_REAL stackBuf[2048];
const int stackBuf_sz=2048;

int a_sz, ipiv_sz, b_sz, work_sz, tot_sz;
register int i, j;
int info, *ipiv, nrhs=1;
LM_REAL *a, *b, *work;
   
    if(!A)
#ifdef LINSOLVERS_RETAIN_MEMORY
    {
      if(buf) free(buf);
      buf=NULL;
      buf_sz=0;

      return 1;
    }
#else
      return 1; /* NOP */
#endif /* LINSOLVERS_RETAIN_MEMORY */
   
    /* calculate required memory size */
    ipiv_sz=m;
    a_sz=m*m;
    b_sz=m;
    work_sz=100*m; /* this is probably too much */
    tot_sz=ipiv_sz + a_sz + b_sz + work_sz; // ipiv_sz counted as LM_REAL here, no harm is done though

#ifdef LINSOLVERS_RETAIN_MEMORY
    if(tot_sz>buf_sz){ /* insufficient memory, allocate a "big" memory chunk at once */
      if(buf) free(buf); /* free previously allocated memory */

      buf_sz=tot_sz;
      buf=(LM_REAL *)malloc(buf_sz*sizeof(LM_REAL));
      if(!buf){
        fprintf(stderr, RCAT("memory allocation in ", AX_EQ_B_LU) "() failed!\n");
        exit(1);
      }
    }
#else
      buf_sz=tot_sz;
	  if(buf_sz <= stackBuf_sz)
	  {
		  buf=stackBuf;
	  }
	  else
	  {
		  buf=(LM_REAL *)malloc(buf_sz*sizeof(LM_REAL));
		  if(!buf){
			  fprintf(stderr, RCAT("memory allocation in ", AX_EQ_B_LU) "() failed!\n");
			  exit(1);
		  }
	  }
#endif /* LINSOLVERS_RETAIN_MEMORY */

    ipiv=(int *)buf;
    a=(LM_REAL *)(ipiv + ipiv_sz);
    b=a+a_sz;
    work=b+b_sz;

    /* store A (column major!) into a and B into b */
	  for(i=0; i<m; i++){
		  for(j=0; j<m; j++)
        a[i+j*m]=A[i*m+j];

      b[i]=B[i];
    }

  /* LU decomposition for A */
	GETRF((int *)&m, (int *)&m, a, (int *)&m, ipiv, (int *)&info);  
	if(info!=0){
		if(info<0){
      fprintf(stderr, RCAT(RCAT("argument %d of ", GETRF) " illegal in ", AX_EQ_B_LU) "()\n", -info);
			exit(1);
		}
		else{
      fprintf(stderr, RCAT(RCAT("singular matrix A for ", GETRF) " in ", AX_EQ_B_LU) "()\n");
#ifndef LINSOLVERS_RETAIN_MEMORY
	  if(buf != stackBuf) free(buf);
#endif

			return 0;
		}
	}

  /* solve the system with the computed LU */
  GETRS("N", (int *)&m, (int *)&nrhs, a, (int *)&m, ipiv, b, (int *)&m, (int *)&info);
	if(info!=0){
		if(info<0){
			fprintf(stderr, RCAT(RCAT("argument %d of ", GETRS) " illegal in ", AX_EQ_B_LU) "()\n", -info);
			exit(1);
		}
		else{
			fprintf(stderr, RCAT(RCAT("unknown error for ", GETRS) " in ", AX_EQ_B_LU) "()\n");
#ifndef LINSOLVERS_RETAIN_MEMORY
			if(buf != stackBuf) free(buf);
#endif

			return 0;
		}
	}

	/* copy the result in x */
	for(i=0; i<m; i++){
		x[i]=b[i];
	}

#ifndef LINSOLVERS_RETAIN_MEMORY
	if(buf != stackBuf) free(buf);
#endif

	return 1;
}
Example #4
0
void tet_hp_cns::minvrt() {
	int i,j,k,n,tind,msgn,sgn,sind,v0;
	Array<FLT,2> spokemass;
	int last_phase, mp_phase;
	
	Array<double,1> lcl(NV), lclug(NV),lclres(NV),uavg(NV);
	Array<TinyVector<double,MXGP>,2> P(NV,NV);
	Array<TinyVector<double,MXGP>,1> u1d(NV),res1d(NV),temp1d(NV);
	Array<TinyVector<double,MXTM>,1> ucoef(NV),rcoef(NV),tcoef(NV);
	
	if (basis::tet(log2p).p > 2) {
		*gbl->log << "cns minvrt only works for p = 1 and 2" << endl;
		exit(4);
	}
	
	/* LOOP THROUGH EDGES */
	if (basis::tet(log2p).em > 0) {
		for(int eind = 0; eind<nseg;++eind) {
			/* SUBTRACT SIDE CONTRIBUTIONS TO VERTICES */         
			for (k=0; k <basis::tet(log2p).em; ++k) {
				for (i=0; i<2; ++i) {
					v0 = seg(eind).pnt(i);
					for(n=0;n<NV;++n)
						gbl->res.v(v0,n) -= basis::tet(log2p).sfmv(i,k)*gbl->res.e(eind,k,n);
				}
			}
		}		
	}
	
	gbl->res.v(Range(0,npnt-1),Range::all()) *= gbl->vprcn(Range(0,npnt-1),Range::all())*basis::tet(log2p).vdiag;

	/* LOOP THROUGH VERTICES */
	for(int i=0;i<npnt;++i){
		
		for(int n = 0; n < NV; ++n)
			lclres(n) = gbl->res.v(i,n);

		
		if(gbl->preconditioner == 0 || gbl->preconditioner == 1) {
			for(int n = 0; n < NV; ++n)
				lclug(n) = ug.v(i,n);

			switch_variables(lclug,lclres);

			for(int j=0;j<NV;++j){
				FLT lcl0 = lclres(j);
				for(int k=0;k<j;++k){
					lcl0 -= gbl->vpreconditioner(i,j,k)*lclres(k);
				}
				lclres(j) = lcl0/gbl->vpreconditioner(i,j,j);
			}
		}
		else {		
			int info,ipiv[NV];
			Array<double,2> P(NV,NV);
			
			for(int j=0;j<NV;++j)
				for(int k=0;k<NV;++k)
					P(j,k) = gbl->vpreconditioner(i,j,k);
				
			GETRF(NV, NV, P.data(), NV, ipiv, info);

			if (info != 0) {
				*gbl->log << "DGETRF FAILED FOR CNS MINVRT" << std::endl;
				sim::abort(__LINE__,__FILE__,gbl->log);
			}
			
			char trans[] = "T";
			GETRS(trans,NV,1,P.data(),NV,ipiv,lclres.data(),NV,info);
		}
		
		for(int n = 0; n < NV; ++n)
			gbl->res.v(i,n) = lclres(n);
		
	}
	
	for(last_phase = false, mp_phase = 0; !last_phase; ++mp_phase) {
		pc0load(mp_phase,gbl->res.v.data());
		pmsgpass(boundary::all_phased,mp_phase,boundary::symmetric);
		last_phase = true;
		last_phase &= pc0wait_rcv(mp_phase,gbl->res.v.data());
	}

	/* APPLY VERTEX DIRICHLET B.C.'S */
	for(i=0;i<nfbd;++i)
		hp_fbdry(i)->vdirichlet();
	
	for(i=0;i<nebd;++i)
		hp_ebdry(i)->vdirichlet3d();        
	
	for(i=0;i<nvbd;++i)
		hp_vbdry(i)->vdirichlet3d();
	
	if(basis::tet(log2p).em == 0) return;
	
	/* LOOP THROUGH SIDES */    
	for(int sind=0;sind<nseg;++sind) {
		
		for(int n = 0; n < NV; ++n)
			lclres(n) = gbl->res.e(sind,0,n);

		Array<FLT,2> P(NV,NV);
		for(int j=0;j<NV;++j){
			for(int k=0;k<NV;++k){
				P(j,k) = gbl->epreconditioner(sind,j,k);
				//P(j,k) = 0.5*(gbl->vpreconditioner(seg(sind).pnt(0),j,k)+gbl->vpreconditioner(seg(sind).pnt(1),j,k));
			}
		}

		if(gbl->preconditioner == 0 || gbl->preconditioner == 1) {
			for(int n = 0; n < NV; ++n)
				uavg(n) = 0.5*(ug.v(seg(sind).pnt(0),n)+ug.v(seg(sind).pnt(1),n));
				
			switch_variables(uavg,lclres);
			
			for(int j=0;j<NV;++j){
				FLT lcl0 = lclres(j);
				for(int k=0;k<j;++k){
					lcl0 -= P(j,k)*lclres(k);
				}
				lclres(j) = lcl0/P(j,j);
			}
		}
		else {
			int info,ipiv[NV];
			
			GETRF(NV, NV, P.data(), NV, ipiv, info);
			
			if (info != 0) {
				*gbl->log << "DGETRF FAILED FOR CNS MINVRT EDGE" << std::endl;
				sim::abort(__LINE__,__FILE__,gbl->log);
			}
			
			char trans[] = "T";
			GETRS(trans,NV,1,P.data(),NV,ipiv,lclres.data(),NV,info);
		}
		
		for(int n = 0; n < NV; ++n)
			gbl->res.e(sind,0,n) = lclres(n);
		
	}
	
	/* REMOVE VERTEX CONTRIBUTION FROM SIDE MODES */
	/* SOLVE FOR SIDE MODES */
	/* PART 1 REMOVE VERTEX CONTRIBUTIONS */
	for(tind=0;tind<ntet;++tind) {         
		for(i=0;i<4;++i) {
			v0 = tet(tind).pnt(i);
			for(n=0;n<NV;++n)
				uht(n)(i) = gbl->res.v(v0,n)*gbl->iprcn(tind,n);
		}
		/* edges */
		for(i=0;i<6;++i) {
			sind = tet(tind).seg(i);
			sgn  = tet(tind).sgn(i);
			for(j=0;j<4;++j) {
				msgn = 1;
				for(k=0;k<basis::tet(log2p).em;++k) {
					for(n=0;n<NV;++n)
						gbl->res.e(sind,k,n) -= msgn*basis::tet(log2p).vfms(j,4+k+i*basis::tet(log2p).em)*uht(n)(j);
					msgn *= sgn;
				}
			}
		}				
	}
	
	
	basis::tet(log2p).ediag(0) = 100.0;//for fast convergence 
	//basis::tet(log2p).ediag(0) = 48.0; //for accuracy mass lumped edge modes
	gbl->res.e(Range(0,nseg-1),0,Range::all()) *= gbl->eprcn(Range(0,nseg-1),Range::all())*basis::tet(log2p).ediag(0);
	
	for(last_phase = false, mp_phase = 0; !last_phase; ++mp_phase) {
		sc0load(mp_phase,gbl->res.e.data(),0,0,gbl->res.e.extent(secondDim));
		smsgpass(boundary::all_phased,mp_phase,boundary::symmetric);
		last_phase = true;
		last_phase &= sc0wait_rcv(mp_phase,gbl->res.e.data(),0,0,gbl->res.e.extent(secondDim));
	}
	
	/* APPLY DIRCHLET B.C.S TO MODE */
	for(int i=0;i<nfbd;++i)
		hp_fbdry(i)->edirichlet();
	
	for (int i=0;i<nebd;++i) 
		hp_ebdry(i)->edirichlet3d();	
	
	return;
}