/* * This function returns the solution of Ax = b * * The function employs LU decomposition: * If A=L U with L lower and U upper triangular, then the original system * amounts to solving * L y = b, U x = y * * A is mxm, b is mx1 * * The function returns 0 in case of error, 1 if successful * * This function is often called repetitively to solve problems of identical * dimensions. To avoid repetitive malloc's and free's, allocated memory is * retained between calls and free'd-malloc'ed when not of the appropriate size. * A call with NULL as the first argument forces this memory to be released. */ int AX_EQ_B_LU(LM_REAL *A, LM_REAL *B, LM_REAL *x, int m) { __STATIC__ LM_REAL *buf=NULL; __STATIC__ int buf_sz=0; int a_sz, ipiv_sz, tot_sz; register int i, j; int info, *ipiv, nrhs=1; LM_REAL *a; if(!A) #ifdef LINSOLVERS_RETAIN_MEMORY { if(buf) free(buf); buf=NULL; buf_sz=0; return 1; } #else return 1; /* NOP */ #endif /* LINSOLVERS_RETAIN_MEMORY */ /* calculate required memory size */ ipiv_sz=m; a_sz=m*m; tot_sz=a_sz*sizeof(LM_REAL) + ipiv_sz*sizeof(int); /* should be arranged in that order for proper doubles alignment */ #ifdef LINSOLVERS_RETAIN_MEMORY if(tot_sz>buf_sz){ /* insufficient memory, allocate a "big" memory chunk at once */ if(buf) free(buf); /* free previously allocated memory */ buf_sz=tot_sz; buf=(LM_REAL *)malloc(buf_sz); if(!buf){ fprintf(stderr, RCAT("memory allocation in ", AX_EQ_B_LU) "() failed!\n"); exit(1); } } #else buf_sz=tot_sz; buf=(LM_REAL *)malloc(buf_sz); if(!buf){ fprintf(stderr, RCAT("memory allocation in ", AX_EQ_B_LU) "() failed!\n"); exit(1); } #endif /* LINSOLVERS_RETAIN_MEMORY */ a=buf; ipiv=(int *)(a+a_sz); /* store A (column major!) into a and B into x */ for(i=0; i<m; i++){ for(j=0; j<m; j++) a[i+j*m]=A[i*m+j]; x[i]=B[i]; } /* LU decomposition for A */ GETRF((int *)&m, (int *)&m, a, (int *)&m, ipiv, (int *)&info); if(info!=0){ if(info<0){ fprintf(stderr, RCAT(RCAT("argument %d of ", GETRF) " illegal in ", AX_EQ_B_LU) "()\n", -info); exit(1); } else{ fprintf(stderr, RCAT(RCAT("singular matrix A for ", GETRF) " in ", AX_EQ_B_LU) "()\n"); #ifndef LINSOLVERS_RETAIN_MEMORY free(buf); #endif return 0; } } /* solve the system with the computed LU */ GETRS("N", (int *)&m, (int *)&nrhs, a, (int *)&m, ipiv, x, (int *)&m, (int *)&info); if(info!=0){ if(info<0){ fprintf(stderr, RCAT(RCAT("argument %d of ", GETRS) " illegal in ", AX_EQ_B_LU) "()\n", -info); exit(1); } else{ fprintf(stderr, RCAT(RCAT("unknown error for ", GETRS) " in ", AX_EQ_B_LU) "()\n"); #ifndef LINSOLVERS_RETAIN_MEMORY free(buf); #endif return 0; } } #ifndef LINSOLVERS_RETAIN_MEMORY free(buf); #endif return 1; }
int MAIN__(int argc, char *argv[]){ FLOAT *a, *b; blasint *ipiv; blasint m, i, j, info; blasint unit = 1; int from = 1; int to = 200; int step = 1; FLOAT maxerr; struct timeval start, stop; double time1, time2; argc--;argv++; if (argc > 0) { from = atol(*argv); argc--; argv++;} if (argc > 0) { to = MAX(atol(*argv), from); argc--; argv++;} if (argc > 0) { step = atol(*argv); argc--; argv++;} fprintf(stderr, "From : %3d To : %3d Step = %3d\n", from, to, step); if (( a = (FLOAT *)malloc(sizeof(FLOAT) * to * to * COMPSIZE)) == NULL){ fprintf(stderr,"Out of Memory!!\n");exit(1); } if (( b = (FLOAT *)malloc(sizeof(FLOAT) * to * COMPSIZE)) == NULL){ fprintf(stderr,"Out of Memory!!\n");exit(1); } if (( ipiv = (blasint *)malloc(sizeof(blasint) * to * COMPSIZE)) == NULL){ fprintf(stderr,"Out of Memory!!\n");exit(1); } #ifdef linux srandom(getpid()); #endif fprintf(stderr, " SIZE Residual Decompose Solve Total\n"); for(m = from; m <= to; m += step){ fprintf(stderr, " %6d : ", (int)m); for(j = 0; j < m; j++){ for(i = 0; i < m * COMPSIZE; i++){ a[i + j * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5; } } for (i = 0; i < m * COMPSIZE; ++i) b[i] = 0.; for (j = 0; j < m; ++j) { for (i = 0; i < m * COMPSIZE; ++i) { b[i] += a[i + j * m * COMPSIZE]; } } gettimeofday( &start, (struct timezone *)0); GETRF (&m, &m, a, &m, ipiv, &info); gettimeofday( &stop, (struct timezone *)0); if (info) { fprintf(stderr, "Matrix is not singular .. %d\n", info); exit(1); } time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6; gettimeofday( &start, (struct timezone *)0); GETRS("N", &m, &unit, a, &m, ipiv, b, &m, &info); gettimeofday( &stop, (struct timezone *)0); if (info) { fprintf(stderr, "Matrix is not singular .. %d\n", info); exit(1); } time2 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6; maxerr = 0.; for(i = 0; i < m; i++){ #ifndef XDOUBLE if (maxerr < fabs(b[i * COMPSIZE] - 1.0)) maxerr = fabs(b[i * COMPSIZE] - 1.0); #ifdef COMPLEX if (maxerr < fabs(b[i * COMPSIZE] + 1)) maxerr = fabs(b[i * COMPSIZE + 1]); #endif #else if (maxerr < fabsl(b[i * COMPSIZE] - 1.0L)) maxerr = fabsl(b[i * COMPSIZE] - 1.0L); #ifdef COMPLEX if (maxerr < fabsl(b[i * COMPSIZE] + 1)) maxerr = fabsl(b[i * COMPSIZE + 1]); #endif #endif } #ifdef XDOUBLE fprintf(stderr," %Le ", maxerr); #else fprintf(stderr," %e ", maxerr); #endif fprintf(stderr, " %10.2f MFlops %10.2f MFlops %10.2f MFlops\n", COMPSIZE * COMPSIZE * 2. / 3. * (double)m * (double)m * (double)m / time1 * 1.e-6, COMPSIZE * COMPSIZE * 2. * (double)m * (double)m / time2 * 1.e-6, COMPSIZE * COMPSIZE * (2. / 3. * (double)m * (double)m * (double)m + 2. * (double)m * (double)m) / (time1 + time2) * 1.e-6); #if 0 if ( #ifdef DOUBLE maxerr > 1.e-8 #else maxerr > 1.e-1 #endif ) { fprintf(stderr, "Error is too large.\n"); exit(1); } #endif } return 0; }
/* * This function returns the solution of Ax = b * * The function employs LU decomposition: * If A=L U with L lower and U upper triangular, then the original system * amounts to solving * L y = b, U x = y * * A is mxm, b is mx1 * * The function returns 0 in case of error, * 1 if successfull * * This function is often called repetitively to solve problems of identical * dimensions. To avoid repetitive malloc's and free's, allocated memory is * retained between calls and free'd-malloc'ed when not of the appropriate size. * A call with NULL as the first argument forces this memory to be released. */ int AX_EQ_B_LU(LM_REAL *A, LM_REAL *B, LM_REAL *x, int m) { __STATIC__ LM_REAL *buf=NULL; __STATIC__ int buf_sz=0; LM_REAL stackBuf[2048]; const int stackBuf_sz=2048; int a_sz, ipiv_sz, b_sz, work_sz, tot_sz; register int i, j; int info, *ipiv, nrhs=1; LM_REAL *a, *b, *work; if(!A) #ifdef LINSOLVERS_RETAIN_MEMORY { if(buf) free(buf); buf=NULL; buf_sz=0; return 1; } #else return 1; /* NOP */ #endif /* LINSOLVERS_RETAIN_MEMORY */ /* calculate required memory size */ ipiv_sz=m; a_sz=m*m; b_sz=m; work_sz=100*m; /* this is probably too much */ tot_sz=ipiv_sz + a_sz + b_sz + work_sz; // ipiv_sz counted as LM_REAL here, no harm is done though #ifdef LINSOLVERS_RETAIN_MEMORY if(tot_sz>buf_sz){ /* insufficient memory, allocate a "big" memory chunk at once */ if(buf) free(buf); /* free previously allocated memory */ buf_sz=tot_sz; buf=(LM_REAL *)malloc(buf_sz*sizeof(LM_REAL)); if(!buf){ fprintf(stderr, RCAT("memory allocation in ", AX_EQ_B_LU) "() failed!\n"); exit(1); } } #else buf_sz=tot_sz; if(buf_sz <= stackBuf_sz) { buf=stackBuf; } else { buf=(LM_REAL *)malloc(buf_sz*sizeof(LM_REAL)); if(!buf){ fprintf(stderr, RCAT("memory allocation in ", AX_EQ_B_LU) "() failed!\n"); exit(1); } } #endif /* LINSOLVERS_RETAIN_MEMORY */ ipiv=(int *)buf; a=(LM_REAL *)(ipiv + ipiv_sz); b=a+a_sz; work=b+b_sz; /* store A (column major!) into a and B into b */ for(i=0; i<m; i++){ for(j=0; j<m; j++) a[i+j*m]=A[i*m+j]; b[i]=B[i]; } /* LU decomposition for A */ GETRF((int *)&m, (int *)&m, a, (int *)&m, ipiv, (int *)&info); if(info!=0){ if(info<0){ fprintf(stderr, RCAT(RCAT("argument %d of ", GETRF) " illegal in ", AX_EQ_B_LU) "()\n", -info); exit(1); } else{ fprintf(stderr, RCAT(RCAT("singular matrix A for ", GETRF) " in ", AX_EQ_B_LU) "()\n"); #ifndef LINSOLVERS_RETAIN_MEMORY if(buf != stackBuf) free(buf); #endif return 0; } } /* solve the system with the computed LU */ GETRS("N", (int *)&m, (int *)&nrhs, a, (int *)&m, ipiv, b, (int *)&m, (int *)&info); if(info!=0){ if(info<0){ fprintf(stderr, RCAT(RCAT("argument %d of ", GETRS) " illegal in ", AX_EQ_B_LU) "()\n", -info); exit(1); } else{ fprintf(stderr, RCAT(RCAT("unknown error for ", GETRS) " in ", AX_EQ_B_LU) "()\n"); #ifndef LINSOLVERS_RETAIN_MEMORY if(buf != stackBuf) free(buf); #endif return 0; } } /* copy the result in x */ for(i=0; i<m; i++){ x[i]=b[i]; } #ifndef LINSOLVERS_RETAIN_MEMORY if(buf != stackBuf) free(buf); #endif return 1; }
void tet_hp_cns::minvrt() { int i,j,k,n,tind,msgn,sgn,sind,v0; Array<FLT,2> spokemass; int last_phase, mp_phase; Array<double,1> lcl(NV), lclug(NV),lclres(NV),uavg(NV); Array<TinyVector<double,MXGP>,2> P(NV,NV); Array<TinyVector<double,MXGP>,1> u1d(NV),res1d(NV),temp1d(NV); Array<TinyVector<double,MXTM>,1> ucoef(NV),rcoef(NV),tcoef(NV); if (basis::tet(log2p).p > 2) { *gbl->log << "cns minvrt only works for p = 1 and 2" << endl; exit(4); } /* LOOP THROUGH EDGES */ if (basis::tet(log2p).em > 0) { for(int eind = 0; eind<nseg;++eind) { /* SUBTRACT SIDE CONTRIBUTIONS TO VERTICES */ for (k=0; k <basis::tet(log2p).em; ++k) { for (i=0; i<2; ++i) { v0 = seg(eind).pnt(i); for(n=0;n<NV;++n) gbl->res.v(v0,n) -= basis::tet(log2p).sfmv(i,k)*gbl->res.e(eind,k,n); } } } } gbl->res.v(Range(0,npnt-1),Range::all()) *= gbl->vprcn(Range(0,npnt-1),Range::all())*basis::tet(log2p).vdiag; /* LOOP THROUGH VERTICES */ for(int i=0;i<npnt;++i){ for(int n = 0; n < NV; ++n) lclres(n) = gbl->res.v(i,n); if(gbl->preconditioner == 0 || gbl->preconditioner == 1) { for(int n = 0; n < NV; ++n) lclug(n) = ug.v(i,n); switch_variables(lclug,lclres); for(int j=0;j<NV;++j){ FLT lcl0 = lclres(j); for(int k=0;k<j;++k){ lcl0 -= gbl->vpreconditioner(i,j,k)*lclres(k); } lclres(j) = lcl0/gbl->vpreconditioner(i,j,j); } } else { int info,ipiv[NV]; Array<double,2> P(NV,NV); for(int j=0;j<NV;++j) for(int k=0;k<NV;++k) P(j,k) = gbl->vpreconditioner(i,j,k); GETRF(NV, NV, P.data(), NV, ipiv, info); if (info != 0) { *gbl->log << "DGETRF FAILED FOR CNS MINVRT" << std::endl; sim::abort(__LINE__,__FILE__,gbl->log); } char trans[] = "T"; GETRS(trans,NV,1,P.data(),NV,ipiv,lclres.data(),NV,info); } for(int n = 0; n < NV; ++n) gbl->res.v(i,n) = lclres(n); } for(last_phase = false, mp_phase = 0; !last_phase; ++mp_phase) { pc0load(mp_phase,gbl->res.v.data()); pmsgpass(boundary::all_phased,mp_phase,boundary::symmetric); last_phase = true; last_phase &= pc0wait_rcv(mp_phase,gbl->res.v.data()); } /* APPLY VERTEX DIRICHLET B.C.'S */ for(i=0;i<nfbd;++i) hp_fbdry(i)->vdirichlet(); for(i=0;i<nebd;++i) hp_ebdry(i)->vdirichlet3d(); for(i=0;i<nvbd;++i) hp_vbdry(i)->vdirichlet3d(); if(basis::tet(log2p).em == 0) return; /* LOOP THROUGH SIDES */ for(int sind=0;sind<nseg;++sind) { for(int n = 0; n < NV; ++n) lclres(n) = gbl->res.e(sind,0,n); Array<FLT,2> P(NV,NV); for(int j=0;j<NV;++j){ for(int k=0;k<NV;++k){ P(j,k) = gbl->epreconditioner(sind,j,k); //P(j,k) = 0.5*(gbl->vpreconditioner(seg(sind).pnt(0),j,k)+gbl->vpreconditioner(seg(sind).pnt(1),j,k)); } } if(gbl->preconditioner == 0 || gbl->preconditioner == 1) { for(int n = 0; n < NV; ++n) uavg(n) = 0.5*(ug.v(seg(sind).pnt(0),n)+ug.v(seg(sind).pnt(1),n)); switch_variables(uavg,lclres); for(int j=0;j<NV;++j){ FLT lcl0 = lclres(j); for(int k=0;k<j;++k){ lcl0 -= P(j,k)*lclres(k); } lclres(j) = lcl0/P(j,j); } } else { int info,ipiv[NV]; GETRF(NV, NV, P.data(), NV, ipiv, info); if (info != 0) { *gbl->log << "DGETRF FAILED FOR CNS MINVRT EDGE" << std::endl; sim::abort(__LINE__,__FILE__,gbl->log); } char trans[] = "T"; GETRS(trans,NV,1,P.data(),NV,ipiv,lclres.data(),NV,info); } for(int n = 0; n < NV; ++n) gbl->res.e(sind,0,n) = lclres(n); } /* REMOVE VERTEX CONTRIBUTION FROM SIDE MODES */ /* SOLVE FOR SIDE MODES */ /* PART 1 REMOVE VERTEX CONTRIBUTIONS */ for(tind=0;tind<ntet;++tind) { for(i=0;i<4;++i) { v0 = tet(tind).pnt(i); for(n=0;n<NV;++n) uht(n)(i) = gbl->res.v(v0,n)*gbl->iprcn(tind,n); } /* edges */ for(i=0;i<6;++i) { sind = tet(tind).seg(i); sgn = tet(tind).sgn(i); for(j=0;j<4;++j) { msgn = 1; for(k=0;k<basis::tet(log2p).em;++k) { for(n=0;n<NV;++n) gbl->res.e(sind,k,n) -= msgn*basis::tet(log2p).vfms(j,4+k+i*basis::tet(log2p).em)*uht(n)(j); msgn *= sgn; } } } } basis::tet(log2p).ediag(0) = 100.0;//for fast convergence //basis::tet(log2p).ediag(0) = 48.0; //for accuracy mass lumped edge modes gbl->res.e(Range(0,nseg-1),0,Range::all()) *= gbl->eprcn(Range(0,nseg-1),Range::all())*basis::tet(log2p).ediag(0); for(last_phase = false, mp_phase = 0; !last_phase; ++mp_phase) { sc0load(mp_phase,gbl->res.e.data(),0,0,gbl->res.e.extent(secondDim)); smsgpass(boundary::all_phased,mp_phase,boundary::symmetric); last_phase = true; last_phase &= sc0wait_rcv(mp_phase,gbl->res.e.data(),0,0,gbl->res.e.extent(secondDim)); } /* APPLY DIRCHLET B.C.S TO MODE */ for(int i=0;i<nfbd;++i) hp_fbdry(i)->edirichlet(); for (int i=0;i<nebd;++i) hp_ebdry(i)->edirichlet3d(); return; }