float tanh2(float x) { float a = sabs(x); a = 6 + a *(6 + a * (3 + a)); return ((x<0) ? -1 : 1) * (a-6)/(a+6); }
LIS_INT lis_precon_create_ilut_csr(LIS_SOLVER solver, LIS_PRECON precon) { #ifdef _OPENMP LIS_INT err; LIS_INT i,j,k,jj; LIS_INT is,ie,my_rank,nprocs; LIS_INT n,lfil,len; LIS_SCALAR t,tol,m; LIS_MATRIX A; LIS_MATRIX_ILU L,U; LIS_VECTOR D; LIS_REAL tnorm, tolnorm; LIS_SCALAR fact,lxu,*wn,*w; LIS_INT lenu,lenl,col,jpos,jrow,upos; LIS_INT *jbuf,*iw; LIS_DEBUG_FUNC_IN; A = solver->A; n = A->n; tol = solver->params[LIS_PARAMS_DROP-LIS_OPTIONS_LEN]; m = solver->params[LIS_PARAMS_RATE-LIS_OPTIONS_LEN]; lfil = (LIS_INT)((double)A->nnz/(2.0*n))*m; nprocs = omp_get_max_threads(); L = NULL; U = NULL; err = lis_matrix_ilu_create(n,1,&L); if( err ) return err; err = lis_matrix_ilu_create(n,1,&U); if( err ) return err; err = lis_matrix_ilu_setCR(L); if( err ) return err; err = lis_matrix_ilu_setCR(U); if( err ) return err; err = lis_vector_duplicate(A,&D); if( err ) { return err; } w = (LIS_SCALAR *)lis_malloc(nprocs*(n+1)*sizeof(LIS_SCALAR),"lis_precon_create_ilut_csr::w"); if( w==NULL ) { LIS_SETERR_MEM(nprocs*(n+1)*sizeof(LIS_SCALAR)); return LIS_OUT_OF_MEMORY; } wn = (LIS_SCALAR *)lis_malloc(nprocs*n*sizeof(LIS_SCALAR),"lis_precon_create_ilut_csr::w"); if( wn==NULL ) { LIS_SETERR_MEM(nprocs*n*sizeof(LIS_SCALAR)); return LIS_OUT_OF_MEMORY; } jbuf = (LIS_INT *)lis_malloc(nprocs*n*sizeof(LIS_INT),"lis_precon_create_ilut_csr::iw"); if( jbuf==NULL ) { LIS_SETERR_MEM(nprocs*n*sizeof(LIS_INT)); return LIS_OUT_OF_MEMORY; } iw = (LIS_INT *)lis_malloc(nprocs*n*sizeof(LIS_INT),"lis_precon_create_ilut_csr::iw"); if( iw==NULL ) { LIS_SETERR_MEM(nprocs*n*sizeof(LIS_INT)); return LIS_OUT_OF_MEMORY; } #pragma omp parallel private(is,ie,my_rank,i,j,k,jj,tnorm,tolnorm,len,lenu,lenl,col,t,jpos,jrow,fact,lxu,upos) { my_rank = omp_get_thread_num(); LIS_GET_ISIE(my_rank,nprocs,n,is,ie); for(i=is;i<ie;i++) iw[my_rank*n+i] = -1; for(i=is;i<ie;i++) { tnorm = 0; k = 0; for(j=A->ptr[i];j<A->ptr[i+1];j++) { jj = A->index[j]; if( jj<is || jj>=ie ) continue; tnorm += fabs(A->value[j]); k++; } tnorm = tnorm / (double)k; tolnorm = tol * tnorm; lenu = 0; lenl = 0; jbuf[my_rank*n+i] = i; w[my_rank*n+i] = 0; iw[my_rank*n+i] = i; for(j=A->ptr[i];j<A->ptr[i+1];j++) { col = A->index[j]; if( col<is || col>=ie ) continue; t = A->value[j]; if( col < i ) { jbuf[my_rank*n+lenl] = col; iw[my_rank*n+col] = lenl; w[my_rank*n+lenl] = t; lenl++; } else if( col == i ) { w[my_rank*n+i] = t; } else { lenu++; jpos = i + lenu; jbuf[my_rank*n+jpos] = col; iw[my_rank*n+col] = jpos; w[my_rank*n+jpos] = t; } } j = -1; len = 0; while( ++j < lenl ) { jrow = jbuf[my_rank*n+j]; jpos = j; for(k=j+1;k<lenl;k++) { if( jbuf[my_rank*n+k]<jrow ) { jrow = jbuf[my_rank*n+k]; jpos = k; } } if( jpos!=j ) { col = jbuf[my_rank*n+j]; jbuf[my_rank*n+j] = jbuf[my_rank*n+jpos]; jbuf[my_rank*n+jpos] = col; iw[my_rank*n+jrow] = j; iw[my_rank*n+col] = jpos; t = w[my_rank*n+j]; w[my_rank*n+j] = w[my_rank*n+jpos]; w[my_rank*n+jpos] = t; } fact = w[my_rank*n+j] * D->value[jrow]; w[my_rank*n+j] = fact; iw[my_rank*n+jrow] = -1; for(k=0;k<U->nnz[jrow];k++) { col = U->index[jrow][k]; jpos = iw[my_rank*n+col]; lxu = -fact * U->value[jrow][k]; if( fabs(lxu) < tolnorm && jpos==-1 ) continue; if( col >= i ) { if( jpos == -1 ) { lenu++; upos = i + lenu; jbuf[my_rank*n+upos] = col; iw[my_rank*n+col] = upos; w[my_rank*n+upos] = lxu; } else { w[my_rank*n+jpos] += lxu; } } else { if( jpos == -1 ) { jbuf[my_rank*n+lenl] = col; iw[my_rank*n+col] = lenl; w[my_rank*n+lenl] = lxu; lenl++; } else { w[my_rank*n+jpos] += lxu; } } } } iw[my_rank*n+i] = -1; for(j=0;j<lenu;j++) { iw[ my_rank*n+jbuf[my_rank*n+i+j+1] ] = -1; } D->value[i] = 1.0 / w[my_rank*n+i]; len = _min(lfil,lenl); for(j=0;j<lenl;j++) { wn[my_rank*n+j] = fabs(w[my_rank*n+j]); iw[my_rank*n+j] = j; } lis_sort_di(0,lenl-1,&wn[my_rank*n],&iw[my_rank*n]); lis_sort_i(0,len-1,&iw[my_rank*n]); L->nnz[i] = len; if( len>0 ) { L->index[i] = (LIS_INT *)malloc(len*sizeof(LIS_INT)); L->value[i] = (LIS_SCALAR *)malloc(len*sizeof(LIS_SCALAR)); } for(j=0;j<len;j++) { jpos = iw[my_rank*n+j]; L->index[i][j] = jbuf[my_rank*n+jpos]; L->value[i][j] = w[my_rank*n+jpos]; } for(j=0;j<lenl;j++) iw[my_rank*n+j] = -1; len = _min(lfil,lenu); for(j=0;j<lenu;j++) { wn[my_rank*n+j] = fabs(w[my_rank*n+i+j+1]); iw[my_rank*n+j] = i+j+1; } lis_sort_di(0,lenu-1,&wn[my_rank*n],&iw[my_rank*n]); lis_sort_i(0,len-1,&iw[my_rank*n]); U->nnz[i] = len; if( len>0 ) { U->index[i] = (LIS_INT *)malloc(len*sizeof(LIS_INT)); U->value[i] = (LIS_SCALAR *)malloc(len*sizeof(LIS_SCALAR)); } for(j=0;j<len;j++) { jpos = iw[my_rank*n+j]; U->index[i][j] = jbuf[my_rank*n+jpos]; U->value[i][j] = w[my_rank*n+jpos]; } for(j=0;j<lenu;j++) iw[my_rank*n+j] = -1; } } precon->L = L; precon->U = U; precon->D = D; lis_free2(4,w,iw,wn,jbuf); LIS_DEBUG_FUNC_OUT; return LIS_SUCCESS; #else LIS_INT err; LIS_INT i,j,k; LIS_INT n,lfil,len; LIS_SCALAR t,tol,m; LIS_MATRIX A; LIS_MATRIX_ILU L,U; LIS_VECTOR D; LIS_REAL tnorm, tolnorm; LIS_SCALAR fact,lxu,*wn,*w; LIS_INT lenu,lenl,col,jpos,jrow,upos; LIS_INT *jbuf,*iw; LIS_DEBUG_FUNC_IN; A = solver->A; n = A->n; tol = solver->params[LIS_PARAMS_DROP-LIS_OPTIONS_LEN]; m = solver->params[LIS_PARAMS_RATE-LIS_OPTIONS_LEN]; lfil = (LIS_INT)(((double)A->nnz/(2.0*n))*m); L = NULL; U = NULL; err = lis_matrix_ilu_create(n,1,&L); if( err ) return err; err = lis_matrix_ilu_create(n,1,&U); if( err ) return err; err = lis_matrix_ilu_setCR(L); if( err ) return err; err = lis_matrix_ilu_setCR(U); if( err ) return err; err = lis_vector_duplicate(A,&D); if( err ) { return err; } w = (LIS_SCALAR *)lis_malloc((n+1)*sizeof(LIS_SCALAR),"lis_precon_create_ilut_csr::w"); if( w==NULL ) { LIS_SETERR_MEM(n*sizeof(LIS_SCALAR)); return LIS_OUT_OF_MEMORY; } wn = (LIS_SCALAR *)lis_malloc(n*sizeof(LIS_SCALAR),"lis_precon_create_ilut_csr::w"); if( wn==NULL ) { LIS_SETERR_MEM(n*sizeof(LIS_SCALAR)); return LIS_OUT_OF_MEMORY; } jbuf = (LIS_INT *)lis_malloc(n*sizeof(LIS_INT),"lis_precon_create_ilut_csr::iw"); if( jbuf==NULL ) { LIS_SETERR_MEM(n*sizeof(LIS_INT)); return LIS_OUT_OF_MEMORY; } iw = (LIS_INT *)lis_malloc(n*sizeof(LIS_INT),"lis_precon_create_ilut_csr::iw"); if( iw==NULL ) { LIS_SETERR_MEM(n*sizeof(LIS_INT)); return LIS_OUT_OF_MEMORY; } for(i=0;i<n;i++) iw[i] = -1; for(i=0;i<n;i++) { tnorm = 0; for(j=A->ptr[i];j<A->ptr[i+1];j++) { tnorm += sabs(A->value[j]); } tnorm = tnorm / (double)(A->ptr[i+1]-A->ptr[i]); tolnorm = tol * tnorm; lenu = 0; lenl = 0; jbuf[i] = i; w[i] = 0; iw[i] = i; for(j=A->ptr[i];j<A->ptr[i+1];j++) { col = A->index[j]; #ifdef USE_MPI if( col>n-1 ) continue; #endif t = A->value[j]; if( col < i ) { jbuf[lenl] = col; iw[col] = lenl; w[lenl] = t; lenl++; } else if( col == i ) { w[i] = t; } else { lenu++; jpos = i + lenu; jbuf[jpos] = col; iw[col] = jpos; w[jpos] = t; } } j = -1; len = 0; while( ++j < lenl ) { jrow = jbuf[j]; jpos = j; for(k=j+1;k<lenl;k++) { if( jbuf[k]<jrow ) { jrow = jbuf[k]; jpos = k; } } if( jpos!=j ) { col = jbuf[j]; jbuf[j] = jbuf[jpos]; jbuf[jpos] = col; iw[jrow] = j; iw[col] = jpos; t = w[j]; w[j] = w[jpos]; w[jpos] = t; } fact = w[j] * D->value[jrow]; w[j] = fact; iw[jrow] = -1; for(k=0;k<U->nnz[jrow];k++) { col = U->index[jrow][k]; jpos = iw[col]; lxu = -fact * U->value[jrow][k]; if( sabs(lxu) < tolnorm && jpos==-1 ) continue; if( col >= i ) { if( jpos == -1 ) { lenu++; upos = i + lenu; jbuf[upos] = col; iw[col] = upos; w[upos] = lxu; } else { w[jpos] += lxu; } } else { if( jpos == -1 ) { jbuf[lenl] = col; iw[col] = lenl; w[lenl] = lxu; lenl++; } else { w[jpos] += lxu; } } } /* for(kk=0;kk<bs;kk++) { w[bs*len+kk] = -buf_fact[kk]; } jbuf[len] = jrow; len++;*/ } iw[i] = -1; for(j=0;j<lenu;j++) { iw[ jbuf[i+j+1] ] = -1; } D->value[i] = 1.0 / w[i]; len = _min(lfil,lenl); for(j=0;j<lenl;j++) { wn[j] = sabs(w[j]); iw[j] = j; } lis_sort_di(0,lenl-1,wn,iw); lis_sort_i(0,len-1,iw); L->nnz[i] = len; if( len>0 ) { L->index[i] = (LIS_INT *)malloc(len*sizeof(LIS_INT)); L->value[i] = (LIS_SCALAR *)malloc(len*sizeof(LIS_SCALAR)); } for(j=0;j<len;j++) { jpos = iw[j]; L->index[i][j] = jbuf[jpos]; L->value[i][j] = w[jpos]; } for(j=0;j<lenl;j++) iw[j] = -1; len = _min(lfil,lenu); for(j=0;j<lenu;j++) { wn[j] = sabs(w[i+j+1]); iw[j] = i+j+1; } lis_sort_di(0,lenu-1,wn,iw); lis_sort_i(0,len-1,iw); U->nnz[i] = len; if( len>0 ) { U->index[i] = (LIS_INT *)malloc(len*sizeof(LIS_INT)); U->value[i] = (LIS_SCALAR *)malloc(len*sizeof(LIS_SCALAR)); } for(j=0;j<len;j++) { jpos = iw[j]; U->index[i][j] = jbuf[jpos]; U->value[i][j] = w[jpos]; } for(j=0;j<lenu;j++) iw[j] = -1; } precon->L = L; precon->U = U; precon->D = D; lis_free2(4,w,iw,wn,jbuf); LIS_DEBUG_FUNC_OUT; return LIS_SUCCESS; #endif }
LIS_INT lis_gmres(LIS_SOLVER solver) { LIS_MATRIX A; LIS_VECTOR b,x; LIS_VECTOR r,s,z,*v; LIS_SCALAR *h; LIS_SCALAR aa,bb,rr,a2,b2,t; LIS_REAL tnrm2; LIS_REAL bnrm2,nrm2,tol; LIS_INT iter,maxiter,n,output; double time,ptime; LIS_REAL rnorm; LIS_INT i,j,k,m; LIS_INT ii,i1,iiv,i1v,iih,jj; LIS_INT h_dim; LIS_INT cs,sn; LIS_DEBUG_FUNC_IN; A = solver->A; b = solver->b; x = solver->x; n = A->n; maxiter = solver->options[LIS_OPTIONS_MAXITER]; output = solver->options[LIS_OPTIONS_OUTPUT]; m = solver->options[LIS_OPTIONS_RESTART]; h_dim = m+1; ptime = 0.0; s = solver->work[0]; r = solver->work[1]; z = solver->work[2]; v = &solver->work[3]; h = (LIS_SCALAR *)lis_malloc( sizeof(LIS_SCALAR)*(h_dim+1)*(h_dim+2),"lis_gmres::h" ); cs = (m+1)*h_dim; sn = (m+2)*h_dim; /* r = M^-1 * (b - A * x) */ lis_matvec(A,x,z); lis_vector_xpay(b,-1.0,z); lis_psolve(solver,z,v[0]); /* Initial Residual */ if( lis_solver_get_initial_residual(solver,NULL,NULL,v[0],&bnrm2) ) { lis_free(h); LIS_DEBUG_FUNC_OUT; return LIS_SUCCESS; } tol = solver->tol; iter=0; while( iter<maxiter ) { /* first column of V */ /* v = r / ||r||_2 */ lis_vector_nrm2(v[0],&rnorm); lis_vector_scale(1.0/rnorm,v[0]); /* s = ||r||_2 e_1 */ lis_vector_set_all(0,s); s->value[0] = rnorm; i = 0; do { iter++; i++; ii = i-1; i1 = i; iiv = i-1; i1v = i; iih = (i-1)*h_dim; /* z = M^-1 * v */ time = lis_wtime(); lis_psolve(solver,v[iiv],z); ptime += lis_wtime()-time; /* w = A * z */ lis_matvec(A,z,v[i1v]); for(k=0;k<i;k++) { /* h[k,i] = <w,v[k]> */ /* w = w - h[k,i] * v[k] */ lis_vector_dot(v[i1v],v[k],&t); h[k+iih] = t; lis_vector_axpy(-t,v[k],v[i1v]); } /* h[i+1,i] = ||w|| */ /* v[i+1] = w / h[i+1,i] */ lis_vector_nrm2(v[i1v],&tnrm2); h[i1+iih] = tnrm2; lis_vector_scale(1.0/tnrm2,v[i1v]); for(k=1;k<=ii;k++) { jj = k-1; t = h[jj+iih]; aa = h[jj+cs]*t; aa += h[jj+sn]*h[k+iih]; bb = -h[jj+sn]*t; bb += h[jj+cs]*h[k+iih]; h[jj+iih] = aa; h[k+iih] = bb; } aa = h[ii+iih]; bb = h[i1+iih]; a2 = aa*aa; b2 = bb*bb; rr = sqrt(a2+b2); if( rr==0.0 ) rr=1.0e-17; h[ii+cs] = aa/rr; h[ii+sn] = bb/rr; s->value[i1] = -h[ii+sn]*s->value[ii]; s->value[ii] = h[ii+cs]*s->value[ii]; aa = h[ii+cs]*h[ii+iih]; aa += h[ii+sn]*h[i1+iih]; h[ii+iih] = aa; /* convergence check */ nrm2 = sabs(s->value[i1])*bnrm2; if( output ) { if( output & LIS_PRINT_MEM ) solver->rhistory[iter] = nrm2; if( output & LIS_PRINT_OUT && A->my_rank==0 ) lis_print_rhistory(iter,nrm2); } if( tol >= nrm2 ) break; } while( i<m && iter <maxiter ); /* Solve H * Y = S for upper Hessenberg matrix H */ s->value[ii] = s->value[ii]/h[ii+iih]; for(k=1;k<=ii;k++) { jj = ii-k; t = s->value[jj]; for(j=jj+1;j<=ii;j++) { t -= h[jj+j*h_dim]*s->value[j]; } s->value[jj] = t/h[jj+jj*h_dim]; } /* z = z + y * v */ #ifdef _OPENMP #pragma omp parallel for private(k) #endif for(k=0;k<n;k++) { z->value[k] = s->value[0]*v[0]->value[k]; } for(j=1;j<=ii;j++) { lis_vector_axpy(s->value[j],v[j],z); } /* r = M^-1 * z */ time = lis_wtime(); lis_psolve(solver,z,r); ptime += lis_wtime()-time; /* x = x + r */ lis_vector_axpy(1,r,x); if( tol >= nrm2 ) { solver->retcode = LIS_SUCCESS; solver->iter = iter; solver->resid = nrm2; solver->ptime = ptime; lis_free(h); LIS_DEBUG_FUNC_OUT; return LIS_SUCCESS; } for(j=1;j<=i;j++) { jj = i1-j+1; s->value[jj-1] = -h[jj-1+sn]*s->value[jj]; s->value[jj] = h[jj-1+cs]*s->value[jj]; } for(j=0;j<=i1;j++) { t = s->value[j]; if( j==0 ) t = t-1.0; lis_vector_axpy(t,v[j],v[0]); } } solver->retcode = LIS_MAXITER; solver->iter = iter+1; solver->resid = nrm2; lis_free(h); LIS_DEBUG_FUNC_OUT; return LIS_MAXITER; }
LIS_INT lis_eii(LIS_ESOLVER esolver) { LIS_MATRIX A; LIS_VECTOR x; LIS_SCALAR evalue, ievalue; LIS_SCALAR lshift; LIS_INT emaxiter; LIS_REAL tol; LIS_INT iter,iter2,output; LIS_REAL nrm2,resid; LIS_VECTOR z,q; LIS_SOLVER solver; double time,itime,ptime,p_c_time,p_i_time; LIS_INT err; LIS_PRECON precon; LIS_INT nsol, precon_type; char solvername[128], preconname[128]; LIS_DEBUG_FUNC_IN; emaxiter = esolver->options[LIS_EOPTIONS_MAXITER]; tol = esolver->params[LIS_EPARAMS_RESID - LIS_EOPTIONS_LEN]; lshift = esolver->lshift; output = esolver->options[LIS_EOPTIONS_OUTPUT]; A = esolver->A; x = esolver->x; if (esolver->options[LIS_EOPTIONS_INITGUESS_ONES] ) { lis_vector_set_all(1.0,x); } evalue = 1.0; z = esolver->work[0]; q = esolver->work[1]; iter=0; ievalue = 1/(evalue); #ifdef _LONG__DOUBLE if( output & (A->my_rank==0) ) printf("local shift : %Le\n", lshift); #else #if defined(_COMPLEX) if( output & (A->my_rank==0) ) printf("local shift : "CFMT"\n", cfmt(lshift)); #else if( output & (A->my_rank==0) ) printf("local shift : %e\n", lshift); #endif #endif if (lshift != 0) lis_matrix_shift_diagonal(A, lshift); lis_solver_create(&solver); lis_solver_set_option("-i bicg -p none",solver); lis_solver_set_optionC(solver); lis_solver_get_solver(solver, &nsol); lis_solver_get_precon(solver, &precon_type); lis_solver_get_solvername(nsol, solvername); lis_solver_get_preconname(precon_type, preconname); if( output & (A->my_rank==0) ) printf("linear solver : %s\n", solvername); if( output & (A->my_rank==0) ) printf("preconditioner : %s\n", preconname); /* create preconditioner */ solver->A = A; err = lis_precon_create(solver, &precon); if( err ) { lis_solver_work_destroy(solver); solver->retcode = err; return err; } while (iter<emaxiter) { iter = iter+1; /* x = x / ||x||_2 */ lis_vector_nrm2(x, &nrm2); lis_vector_scale(1/nrm2, x); /* z = (A - lshift I)^-1 * x */ lis_solve_kernel(A, x, z, solver, precon); lis_solver_get_iter(solver,&iter2); /* 1/evalue = <x,z> */ lis_vector_dot(x, z, &ievalue); /* resid = ||z - 1/evalue * x||_2 / |1/evalue| */ lis_vector_axpyz(-ievalue,x,z,q); lis_vector_nrm2(q, &resid); resid = sabs(resid/ievalue); /* x = z */ lis_vector_copy(z,x); /* convergence check */ lis_solver_get_timeex(solver,&time,&itime,&ptime,&p_c_time,&p_i_time); esolver->ptime += solver->ptime; esolver->itime += solver->itime; esolver->p_c_time += solver->p_c_time; esolver->p_i_time += solver->p_i_time; if( output ) { if( output & LIS_EPRINT_MEM ) esolver->rhistory[iter] = resid; if( output & LIS_EPRINT_OUT && A->my_rank==0 ) lis_print_rhistory(iter,resid); } if( tol >= resid ) { esolver->retcode = LIS_SUCCESS; esolver->iter[0] = iter; esolver->resid[0] = resid; esolver->evalue[0] = 1/ievalue; lis_vector_nrm2(x, &nrm2); lis_vector_scale(1/nrm2, x); if (lshift != 0) lis_matrix_shift_diagonal(A, -lshift); lis_precon_destroy(precon); lis_solver_destroy(solver); LIS_DEBUG_FUNC_OUT; return LIS_SUCCESS; } } lis_precon_destroy(precon); esolver->retcode = LIS_MAXITER; esolver->iter[0] = iter; esolver->resid[0] = resid; esolver->evalue[0] = 1/ievalue; lis_vector_nrm2(x, &nrm2); lis_vector_scale(1/nrm2, x); if (lshift != 0) { lis_matrix_shift_diagonal(A, -lshift); } lis_solver_destroy(solver); LIS_DEBUG_FUNC_OUT; return LIS_MAXITER; }