void nlSparseMatrixConstruct( NLSparseMatrix* M, NLuint m, NLuint n, NLenum storage ) { NLuint i ; M->m = m ; M->n = n ; M->storage = storage ; if(storage & NL_MATRIX_STORE_ROWS) { M->row = NL_NEW_ARRAY(NLRowColumn, m) ; for(i=0; i<n; i++) { nlRowColumnConstruct(&(M->row[i])) ; } } else { M->row = NULL ; } if(storage & NL_MATRIX_STORE_COLUMNS) { M->column = NL_NEW_ARRAY(NLRowColumn, n) ; for(i=0; i<n; i++) { nlRowColumnConstruct(&(M->column[i])) ; } } else { M->column = NULL ; } M->diag_size = MIN(m,n) ; M->diag = NL_NEW_ARRAY(NLdouble, M->diag_size) ; }
NLuint nlSolve_CG_precond() { NLdouble* b = nlCurrentContext->b ; NLdouble* x = nlCurrentContext->x ; NLdouble eps = nlCurrentContext->threshold ; NLuint max_iter = nlCurrentContext->max_iterations ; NLint N = nlCurrentContext->n ; NLdouble* r = NL_NEW_ARRAY(NLdouble, N) ; NLdouble* d = NL_NEW_ARRAY(NLdouble, N) ; NLdouble* h = NL_NEW_ARRAY(NLdouble, N) ; NLdouble *Ad = h; NLuint its=0; NLdouble rh, alpha, beta; NLdouble b_square = ddot(N,b,1,b,1); NLdouble err=eps*eps*b_square; NLint i; NLdouble * Ax=NL_NEW_ARRAY(NLdouble,nlCurrentContext->n); NLdouble accu =0.0; NLdouble curr_err; nlCurrentContext->matrix_vector_prod(x,r); daxpy(N,-1.,b,1,r,1); nlCurrentContext->precond_vector_prod(r,d); dcopy(N,d,1,h,1); rh=ddot(N,r,1,h,1); curr_err = ddot(N,r,1,r,1); while ( curr_err >err && its < max_iter) { if(!(its % 100)) { printf ( "%d : %.10e -- %.10e\n", its, curr_err, err ) ; } nlCurrentContext->matrix_vector_prod(d,Ad); alpha=rh/ddot(N,d,1,Ad,1); daxpy(N,-alpha,d,1,x,1); daxpy(N,-alpha,Ad,1,r,1); nlCurrentContext->precond_vector_prod(r,h); beta=1./rh; rh=ddot(N,r,1,h,1); beta*=rh; dscal(N,beta,d,1); daxpy(N,1.,h,1,d,1); ++its; // calcul de l'erreur courante curr_err = ddot(N,r,1,r,1); } nlCurrentContext->matrix_vector_prod(x,Ax); for(i = 0 ; i < N ; ++i) accu+=(Ax[i]-b[i])*(Ax[i]-b[i]); printf("in OpenNL : ||Ax-b||/||b|| = %e\n",sqrt(accu)/sqrt(b_square)); NL_DELETE_ARRAY(Ax); NL_DELETE_ARRAY(r) ; NL_DELETE_ARRAY(d) ; NL_DELETE_ARRAY(h) ; return its; }
NLuint nlSolve_CG() { NLdouble* b = nlCurrentContext->b ; NLdouble* x = nlCurrentContext->x ; NLdouble eps = nlCurrentContext->threshold ; NLuint max_iter = nlCurrentContext->max_iterations ; NLint N = nlCurrentContext->n ; NLdouble *g = NL_NEW_ARRAY(NLdouble, N) ; NLdouble *r = NL_NEW_ARRAY(NLdouble, N) ; NLdouble *p = NL_NEW_ARRAY(NLdouble, N) ; NLuint its=0; NLint i; NLdouble t, tau, sig, rho, gam; NLdouble b_square=ddot(N,b,1,b,1); NLdouble err=eps*eps*b_square; NLdouble accu =0.0; NLdouble * Ax=NL_NEW_ARRAY(NLdouble,nlCurrentContext->n); NLdouble curr_err; nlCurrentContext->matrix_vector_prod(x,g); daxpy(N,-1.,b,1,g,1); dscal(N,-1.,g,1); dcopy(N,g,1,r,1); curr_err = ddot(N,g,1,g,1); while ( curr_err >err && its < max_iter) { if(!(its % 100)) { printf ( "%d : %.10e -- %.10e\n", its, curr_err, err ) ; } nlCurrentContext->matrix_vector_prod(r,p); rho=ddot(N,p,1,p,1); sig=ddot(N,r,1,p,1); tau=ddot(N,g,1,r,1); t=tau/sig; daxpy(N,t,r,1,x,1); daxpy(N,-t,p,1,g,1); gam=(t*t*rho-tau)/tau; dscal(N,gam,r,1); daxpy(N,1.,g,1,r,1); ++its; curr_err = ddot(N,g,1,g,1); } nlCurrentContext->matrix_vector_prod(x,Ax); for(i = 0 ; i < N ; ++i) accu+=(Ax[i]-b[i])*(Ax[i]-b[i]); printf("in OpenNL : ||Ax-b||/||b|| = %e\n",sqrt(accu)/sqrt(b_square)); NL_DELETE_ARRAY(Ax); NL_DELETE_ARRAY(g) ; NL_DELETE_ARRAY(r) ; NL_DELETE_ARRAY(p) ; return its; }
void nlBeginSystem() { nlTransition(NL_STATE_INITIAL, NL_STATE_SYSTEM) ; nl_assert(nlCurrentContext->nb_variables > 0) ; nlCurrentContext->variable = NL_NEW_ARRAY( NLVariable, nlCurrentContext->nb_variables ) ; nlCurrentContext->alloc_variable = NL_TRUE ; }
void nlRowColumnGrow(NLRowColumn* c) { if(c->capacity != 0) { c->capacity = 2 * c->capacity ; c->coeff = NL_RENEW_ARRAY(NLCoeff, c->coeff, c->capacity) ; } else { c->capacity = 4 ; c->coeff = NL_NEW_ARRAY(NLCoeff, c->capacity) ; } }
void nlBeginMatrix() { NLuint i ; NLuint n = 0 ; NLenum storage = NL_MATRIX_STORE_ROWS ; nlTransition(NL_STATE_SYSTEM, NL_STATE_MATRIX) ; if(!nlCurrentContext->matrix_already_set) { for(i=0; i<nlCurrentContext->nb_variables; i++) { if(!nlCurrentContext->variable[i].locked) { nlCurrentContext->variable[i].index = n ; n++ ; } else { nlCurrentContext->variable[i].index = ~0 ; } } nlCurrentContext->n = n ; /* SSOR preconditioner requires rows and columns */ if(nlCurrentContext->preconditioner == NL_PRECOND_SSOR) { storage = (storage | NL_MATRIX_STORE_COLUMNS) ; } /* a least squares problem results in a symmetric matrix */ if(nlCurrentContext->least_squares && !nlSolverIsCNC(nlCurrentContext->solver)) { nlCurrentContext->symmetric = NL_TRUE ; } if(nlCurrentContext->symmetric) { storage = (storage | NL_MATRIX_STORE_SYMMETRIC) ; } /* SuperLU storage does not support symmetric storage */ if( nlCurrentContext->solver == NL_SUPERLU_EXT || nlCurrentContext->solver == NL_PERM_SUPERLU_EXT || nlCurrentContext->solver == NL_SYMMETRIC_SUPERLU_EXT ) { storage = (storage & ~NL_MATRIX_STORE_SYMMETRIC) ; } /* CHOLMOD storage requires columns */ if(nlCurrentContext->solver == NL_CHOLMOD_EXT) { storage = (storage & ~NL_MATRIX_STORE_ROWS) ; storage = (storage | NL_MATRIX_STORE_COLUMNS) ; } nlSparseMatrixConstruct(&nlCurrentContext->M, n, n, storage) ; nlCurrentContext->alloc_M = NL_TRUE ; nlCurrentContext->x = NL_NEW_ARRAY(NLdouble, n) ; nlCurrentContext->alloc_x = NL_TRUE ; nlCurrentContext->b = NL_NEW_ARRAY(NLdouble, n) ; nlCurrentContext->alloc_b = NL_TRUE ; nlVariablesToVector() ; nlRowColumnConstruct(&nlCurrentContext->af) ; nlCurrentContext->alloc_af = NL_TRUE ; nlRowColumnConstruct(&nlCurrentContext->al) ; nlCurrentContext->alloc_al = NL_TRUE ; nlRowColumnConstruct(&nlCurrentContext->xl) ; nlCurrentContext->alloc_xl = NL_TRUE ; nlCurrentContext->current_row = 0 ; } else { nl_assert(nlCurrentContext->alloc_M) ; nl_assert(nlCurrentContext->alloc_x) ; nl_assert(nlCurrentContext->alloc_b) ; nlRowColumnConstruct(&nlCurrentContext->af) ; nlCurrentContext->alloc_af = NL_TRUE ; nlRowColumnConstruct(&nlCurrentContext->al) ; nlCurrentContext->alloc_al = NL_TRUE ; nlRowColumnConstruct(&nlCurrentContext->xl) ; nlCurrentContext->alloc_xl = NL_TRUE ; nlCurrentContext->current_row = 0 ; } }
NLuint nlSolve_GMRES() { NLdouble* b = nlCurrentContext->b ; NLdouble* x = nlCurrentContext->x ; NLdouble eps = nlCurrentContext->threshold ; NLint max_iter = nlCurrentContext->max_iterations ; NLint n = nlCurrentContext->n ; NLint m = nlCurrentContext->inner_iterations ; typedef NLdouble *NLdoubleP; NLdouble *V = NL_NEW_ARRAY(NLdouble, n*(m+1) ) ; NLdouble *U = NL_NEW_ARRAY(NLdouble, m*(m+1)/2 ) ; NLdouble *r = NL_NEW_ARRAY(NLdouble, n ) ; NLdouble *y = NL_NEW_ARRAY(NLdouble, m+1 ) ; NLdouble *c = NL_NEW_ARRAY(NLdouble, m ) ; NLdouble *s = NL_NEW_ARRAY(NLdouble, m ) ; NLdouble **v = NL_NEW_ARRAY(NLdoubleP, m+1 ) ; NLdouble * Ax = NL_NEW_ARRAY(NLdouble,nlCurrentContext->n); NLdouble accu =0.0; NLint i, j, io, uij, u0j ; NLint its = -1 ; NLdouble beta, h, rd, dd, nrm2b ; for ( i=0; i<=m; ++i ){ v[i]=V+i*n ; } nrm2b=dnrm2(n,b,1); io=0; do { /* outer loop */ ++io; nlCurrentContext->matrix_vector_prod(x,r); daxpy(n,-1.,b,1,r,1); beta=dnrm2(n,r,1); dcopy(n,r,1,v[0],1); dscal(n,1./beta,v[0],1); y[0]=beta; j=0; uij=0; do { /* inner loop: j=0,...,m-1 */ u0j=uij; nlCurrentContext->matrix_vector_prod(v[j],v[j+1]); dgemv( Transpose,n,j+1,1.,V,n,v[j+1],1,0.,U+u0j,1 ); dgemv( NoTranspose,n,j+1,-1.,V,n,U+u0j,1,1.,v[j+1],1 ); h=dnrm2(n,v[j+1],1); dscal(n,1./h,v[j+1],1); for (i=0; i<j; ++i ) { /* rotiere neue Spalte */ double tmp = c[i]*U[uij]-s[i]*U[uij+1]; U[uij+1] = s[i]*U[uij]+c[i]*U[uij+1]; U[uij] = tmp; ++uij; } { /* berechne neue Rotation */ rd = U[uij]; dd = sqrt(rd*rd+h*h); c[j] = rd/dd; s[j] = -h/dd; U[uij] = dd; ++uij; } { /* rotiere rechte Seite y (vorher: y[j+1]=0) */ y[j+1] = s[j]*y[j]; y[j] = c[j]*y[j]; } ++j; } while ( j<m && fabs(y[j])>=eps*nrm2b ) ; { /* minimiere bzgl Y */ dtpsv( UpperTriangle, NoTranspose, NotUnitTriangular, j,U,y,1 ); /* correct X */ dgemv(NoTranspose,n,j,-1.,V,n,y,1,1.,x,1); } } while ( fabs(y[j])>=eps*nrm2b && (m*(io-1)+j) < max_iter); /* Count the inner iterations */ its = m*(io-1)+j; nlCurrentContext->matrix_vector_prod(x,Ax); for(i = 0 ; i < n ; ++i) accu+=(Ax[i]-b[i])*(Ax[i]-b[i]); printf("in OpenNL : ||Ax-b||/||b|| = %e\n",sqrt(accu)/nrm2b); NL_DELETE_ARRAY(Ax); NL_DELETE_ARRAY(V) ; NL_DELETE_ARRAY(U) ; NL_DELETE_ARRAY(r) ; NL_DELETE_ARRAY(y) ; NL_DELETE_ARRAY(c) ; NL_DELETE_ARRAY(s) ; NL_DELETE_ARRAY(v) ; return its; }
NLuint nlSolve_BICGSTAB_precond() { NLdouble* b = nlCurrentContext->b ; NLdouble* x = nlCurrentContext->x ; NLdouble eps = nlCurrentContext->threshold ; NLuint max_iter = nlCurrentContext->max_iterations ; NLint N = nlCurrentContext->n ; NLint i; NLdouble *rT = NL_NEW_ARRAY(NLdouble, N) ; NLdouble *d = NL_NEW_ARRAY(NLdouble, N) ; NLdouble *h = NL_NEW_ARRAY(NLdouble, N) ; NLdouble *u = NL_NEW_ARRAY(NLdouble, N) ; NLdouble *Sd = NL_NEW_ARRAY(NLdouble, N) ; NLdouble *t = NL_NEW_ARRAY(NLdouble, N) ; NLdouble *aux = NL_NEW_ARRAY(NLdouble, N) ; NLdouble *s = h; NLdouble rTh, rTSd, rTr, alpha, beta, omega, st, tt; NLuint its=0; NLdouble b_square = ddot(N,b,1,b,1); NLdouble err = eps*eps*b_square; NLdouble *r = NL_NEW_ARRAY(NLdouble, N); NLdouble * Ax = NL_NEW_ARRAY(NLdouble,nlCurrentContext->n); NLdouble accu =0.0; nlCurrentContext->matrix_vector_prod(x,r); daxpy(N,-1.,b,1,r,1); nlCurrentContext->precond_vector_prod(r,d); dcopy(N,d,1,h,1); dcopy(N,h,1,rT,1); nl_assert( ddot(N,rT,1,rT,1)>1e-40 ); rTh=ddot(N,rT,1,h,1); rTr=ddot(N,r,1,r,1); while ( rTr>err && its < max_iter) { if(!(its % 100)) { printf ( "%d : %.10e -- %.10e\n", its, rTr, err ) ; } nlCurrentContext->matrix_vector_prod(d,aux); nlCurrentContext->precond_vector_prod(aux,Sd); rTSd=ddot(N,rT,1,Sd,1); nl_assert( fabs(rTSd)>1e-40 ); alpha=rTh/rTSd; daxpy(N,-alpha,aux,1,r,1); dcopy(N,h,1,s,1); daxpy(N,-alpha,Sd,1,s,1); nlCurrentContext->matrix_vector_prod(s,aux); nlCurrentContext->precond_vector_prod(aux,t); daxpy(N,1.,t,1,u,1); dscal(N,alpha,u,1); st=ddot(N,s,1,t,1); tt=ddot(N,t,1,t,1); if ( fabs(st)<1e-40 || fabs(tt)<1e-40 ) { omega = 0.; } else { omega = st/tt; } daxpy(N,-omega,aux,1,r,1); daxpy(N,-alpha,d,1,x,1); daxpy(N,-omega,s,1,x,1); dcopy(N,s,1,h,1); daxpy(N,-omega,t,1,h,1); beta=(alpha/omega)/rTh; rTh=ddot(N,rT,1,h,1); beta*=rTh; dscal(N,beta,d,1); daxpy(N,1.,h,1,d,1); daxpy(N,-beta*omega,Sd,1,d,1); rTr=ddot(N,r,1,r,1); ++its; } nlCurrentContext->matrix_vector_prod(x,Ax); for(i = 0 ; i < N ; ++i){ accu+=(Ax[i]-b[i])*(Ax[i]-b[i]); } printf("in OpenNL : ||Ax-b||/||b|| = %e\n",sqrt(accu)/sqrt(b_square)); NL_DELETE_ARRAY(Ax); NL_DELETE_ARRAY(r); NL_DELETE_ARRAY(rT); NL_DELETE_ARRAY(d); NL_DELETE_ARRAY(h); NL_DELETE_ARRAY(u); NL_DELETE_ARRAY(Sd); NL_DELETE_ARRAY(t); NL_DELETE_ARRAY(aux); return its; }
NLboolean nlFactorize_SUPERLU() { /* OpenNL Context */ NLSparseMatrix* M = &(nlCurrentContext->M) ; NLuint n = nlCurrentContext->n ; NLuint nnz = nlSparseMatrixNNZ(M) ; /* Number of Non-Zero coeffs */ superlu_context* context = (superlu_context*)(nlCurrentContext->direct_solver_context) ; if(context == NULL) { nlCurrentContext->direct_solver_context = malloc(sizeof(superlu_context)) ; context = (superlu_context*)(nlCurrentContext->direct_solver_context) ; } /* SUPERLU variables */ NLint info ; SuperMatrix A, AC ; /* Temporary variables */ NLRowColumn* Ci = NULL ; NLuint i,j,count ; /* Sanity checks */ nl_assert(!(M->storage & NL_MATRIX_STORE_SYMMETRIC)) ; nl_assert(M->storage & NL_MATRIX_STORE_ROWS) ; nl_assert(M->m == M->n) ; set_default_options(&(context->options)) ; switch(nlCurrentContext->solver) { case NL_SUPERLU_EXT: { context->options.ColPerm = NATURAL ; } break ; case NL_PERM_SUPERLU_EXT: { context->options.ColPerm = COLAMD ; } break ; case NL_SYMMETRIC_SUPERLU_EXT: { context->options.ColPerm = MMD_AT_PLUS_A ; context->options.SymmetricMode = YES ; } break ; default: { nl_assert_not_reached ; } break ; } StatInit(&(context->stat)) ; /* * Step 1: convert matrix M into SUPERLU compressed column representation * ---------------------------------------------------------------------- */ NLint* xa = NL_NEW_ARRAY(NLint, n+1) ; NLdouble* a = NL_NEW_ARRAY(NLdouble, nnz) ; NLint* asub = NL_NEW_ARRAY(NLint, nnz) ; count = 0 ; for(i = 0; i < n; i++) { Ci = &(M->row[i]) ; xa[i] = count ; for(j = 0; j < Ci->size; j++) { a[count] = Ci->coeff[j].value ; asub[count] = Ci->coeff[j].index ; count++ ; } } xa[n] = nnz ; dCreate_CompCol_Matrix( &A, n, n, nnz, a, asub, xa, SLU_NR, /* Row wise */ SLU_D, /* doubles */ SLU_GE /* general storage */ ); /* * Step 2: factorize matrix * ------------------------ */ context->perm_c = NL_NEW_ARRAY(NLint, n) ; context->perm_r = NL_NEW_ARRAY(NLint, n) ; NLint* etree = NL_NEW_ARRAY(NLint, n) ; get_perm_c(context->options.ColPerm, &A, context->perm_c) ; sp_preorder(&(context->options), &A, context->perm_c, etree, &AC) ; int panel_size = sp_ienv(1) ; int relax = sp_ienv(2) ; dgstrf(&(context->options), &AC, relax, panel_size, etree, NULL, 0, context->perm_c, context->perm_r, &(context->L), &(context->U), &(context->stat), &info) ; /* * Step 3: cleanup * --------------- */ NL_DELETE_ARRAY(xa) ; NL_DELETE_ARRAY(a) ; NL_DELETE_ARRAY(asub) ; NL_DELETE_ARRAY(etree) ; Destroy_SuperMatrix_Store(&A); Destroy_CompCol_Permuted(&AC); StatFree(&(context->stat)); return NL_TRUE ; }