Ejemplo n.º 1
0
float tanh2(float x)
{
	float a = sabs(x);
	a = 6 + a *(6 + a * (3 + a));
	return ((x<0) ? -1 : 1) * (a-6)/(a+6); 
}
Ejemplo n.º 2
0
LIS_INT lis_precon_create_ilut_csr(LIS_SOLVER solver, LIS_PRECON precon)
{
#ifdef _OPENMP
	LIS_INT	err;
	LIS_INT	i,j,k,jj;
	LIS_INT	is,ie,my_rank,nprocs;
	LIS_INT	n,lfil,len;
	LIS_SCALAR t,tol,m;
	LIS_MATRIX A;
	LIS_MATRIX_ILU L,U;
	LIS_VECTOR D;

	LIS_REAL tnorm, tolnorm;
	LIS_SCALAR fact,lxu,*wn,*w;
	LIS_INT	lenu,lenl,col,jpos,jrow,upos;
	LIS_INT	*jbuf,*iw;

	LIS_DEBUG_FUNC_IN;


	A      = solver->A;
	n      = A->n;
	tol    = solver->params[LIS_PARAMS_DROP-LIS_OPTIONS_LEN];
	m      = solver->params[LIS_PARAMS_RATE-LIS_OPTIONS_LEN];
	lfil   = (LIS_INT)((double)A->nnz/(2.0*n))*m;
	nprocs = omp_get_max_threads();

	L      = NULL;
	U      = NULL;


	err = lis_matrix_ilu_create(n,1,&L);
	if( err ) return err;
	err = lis_matrix_ilu_create(n,1,&U);
	if( err ) return err;
	err = lis_matrix_ilu_setCR(L);
	if( err ) return err;
	err = lis_matrix_ilu_setCR(U);
	if( err ) return err;
	err = lis_vector_duplicate(A,&D);
	if( err )
	{
		return err;
	}

	w   = (LIS_SCALAR *)lis_malloc(nprocs*(n+1)*sizeof(LIS_SCALAR),"lis_precon_create_ilut_csr::w");
	if( w==NULL )
	{
		LIS_SETERR_MEM(nprocs*(n+1)*sizeof(LIS_SCALAR));
		return LIS_OUT_OF_MEMORY;
	}
	wn = (LIS_SCALAR *)lis_malloc(nprocs*n*sizeof(LIS_SCALAR),"lis_precon_create_ilut_csr::w");
	if( wn==NULL )
	{
		LIS_SETERR_MEM(nprocs*n*sizeof(LIS_SCALAR));
		return LIS_OUT_OF_MEMORY;
	}

	jbuf   = (LIS_INT *)lis_malloc(nprocs*n*sizeof(LIS_INT),"lis_precon_create_ilut_csr::iw");
	if( jbuf==NULL )
	{
		LIS_SETERR_MEM(nprocs*n*sizeof(LIS_INT));
		return LIS_OUT_OF_MEMORY;
	}
	iw   = (LIS_INT *)lis_malloc(nprocs*n*sizeof(LIS_INT),"lis_precon_create_ilut_csr::iw");
	if( iw==NULL )
	{
		LIS_SETERR_MEM(nprocs*n*sizeof(LIS_INT));
		return LIS_OUT_OF_MEMORY;
	}


	#pragma omp parallel private(is,ie,my_rank,i,j,k,jj,tnorm,tolnorm,len,lenu,lenl,col,t,jpos,jrow,fact,lxu,upos)
	{
		my_rank  = omp_get_thread_num();
		LIS_GET_ISIE(my_rank,nprocs,n,is,ie);

		for(i=is;i<ie;i++) iw[my_rank*n+i] = -1;

		for(i=is;i<ie;i++)
		{
			tnorm = 0;
			k = 0;
			for(j=A->ptr[i];j<A->ptr[i+1];j++)
			{
				jj = A->index[j];
				if( jj<is || jj>=ie ) continue;
				tnorm += fabs(A->value[j]);
				k++;
			}
			tnorm   = tnorm / (double)k;
			tolnorm = tol * tnorm;

			lenu = 0;
			lenl = 0;
			jbuf[my_rank*n+i] = i;
			w[my_rank*n+i] = 0;
			iw[my_rank*n+i] = i;

			for(j=A->ptr[i];j<A->ptr[i+1];j++)
			{
				col = A->index[j];
				if( col<is || col>=ie ) continue;
				t = A->value[j];
				if( col < i )
				{
					jbuf[my_rank*n+lenl] = col;
					iw[my_rank*n+col] = lenl;
					w[my_rank*n+lenl] = t;
					lenl++;
				}
				else if( col == i )
				{
					w[my_rank*n+i] = t;
				}
				else
				{
					lenu++;
					jpos = i + lenu;
					jbuf[my_rank*n+jpos] = col;
					iw[my_rank*n+col] = jpos;
					w[my_rank*n+jpos] = t;
				}
			}

			j = -1;
			len = 0;

			while( ++j < lenl )
			{
				jrow = jbuf[my_rank*n+j];
				jpos = j;
				for(k=j+1;k<lenl;k++)
				{
					if( jbuf[my_rank*n+k]<jrow )
					{
						jrow = jbuf[my_rank*n+k];
						jpos = k;
					}
				}
				if( jpos!=j )
				{
					col = jbuf[my_rank*n+j];
					jbuf[my_rank*n+j] = jbuf[my_rank*n+jpos];
					jbuf[my_rank*n+jpos] = col;
					iw[my_rank*n+jrow] = j;
					iw[my_rank*n+col] = jpos;
					t = w[my_rank*n+j];
					w[my_rank*n+j] = w[my_rank*n+jpos];
					w[my_rank*n+jpos] = t;
				}
				fact = w[my_rank*n+j] * D->value[jrow];
				w[my_rank*n+j] = fact;
				iw[my_rank*n+jrow] = -1;

				for(k=0;k<U->nnz[jrow];k++)
				{
					col = U->index[jrow][k];
					jpos = iw[my_rank*n+col];
					lxu = -fact * U->value[jrow][k];

					if( fabs(lxu) < tolnorm && jpos==-1 ) continue;
					if( col >= i )
					{
						if( jpos == -1 )
						{
							lenu++;
							upos = i + lenu;
							jbuf[my_rank*n+upos] = col;
							iw[my_rank*n+col] = upos;
							w[my_rank*n+upos] = lxu;
						}
						else
						{
							w[my_rank*n+jpos] += lxu;
						}
					}
					else
					{
						if( jpos == -1 )
						{
							jbuf[my_rank*n+lenl] = col;
							iw[my_rank*n+col] = lenl;
							w[my_rank*n+lenl] = lxu;
							lenl++;
						}
						else
						{
							w[my_rank*n+jpos] += lxu;
						}
					}
				}
			}

			iw[my_rank*n+i] = -1;
			for(j=0;j<lenu;j++)
			{
				iw[ my_rank*n+jbuf[my_rank*n+i+j+1] ] = -1;
			}

			D->value[i] = 1.0 / w[my_rank*n+i];


			len = _min(lfil,lenl);
			for(j=0;j<lenl;j++)
			{
				wn[my_rank*n+j] = fabs(w[my_rank*n+j]);
				iw[my_rank*n+j] = j;
			}
			lis_sort_di(0,lenl-1,&wn[my_rank*n],&iw[my_rank*n]);
			lis_sort_i(0,len-1,&iw[my_rank*n]);
			
			L->nnz[i] = len;
			if( len>0 )
			{
				L->index[i] = (LIS_INT *)malloc(len*sizeof(LIS_INT));
				L->value[i] = (LIS_SCALAR *)malloc(len*sizeof(LIS_SCALAR));
			}
			for(j=0;j<len;j++)
			{
				jpos = iw[my_rank*n+j];
				L->index[i][j] = jbuf[my_rank*n+jpos];
				L->value[i][j] = w[my_rank*n+jpos];
			}
			for(j=0;j<lenl;j++) iw[my_rank*n+j] = -1;

			len = _min(lfil,lenu);
			for(j=0;j<lenu;j++)
			{
				wn[my_rank*n+j] = fabs(w[my_rank*n+i+j+1]);
				iw[my_rank*n+j] = i+j+1;
			}
			lis_sort_di(0,lenu-1,&wn[my_rank*n],&iw[my_rank*n]);
			lis_sort_i(0,len-1,&iw[my_rank*n]);
			
			U->nnz[i] = len;
			if( len>0 )
			{
				U->index[i] = (LIS_INT *)malloc(len*sizeof(LIS_INT));
				U->value[i] = (LIS_SCALAR *)malloc(len*sizeof(LIS_SCALAR));
			}
			for(j=0;j<len;j++)
			{
				jpos = iw[my_rank*n+j];
				U->index[i][j] = jbuf[my_rank*n+jpos];
				U->value[i][j] = w[my_rank*n+jpos];
			}
			for(j=0;j<lenu;j++) iw[my_rank*n+j] = -1;
		}
	}

	precon->L  = L;
	precon->U  = U;
	precon->D  = D;

	lis_free2(4,w,iw,wn,jbuf);

	LIS_DEBUG_FUNC_OUT;
	return LIS_SUCCESS;
#else
	LIS_INT	err;
	LIS_INT	i,j,k;
	LIS_INT	n,lfil,len;
	LIS_SCALAR t,tol,m;
	LIS_MATRIX A;
	LIS_MATRIX_ILU L,U;
	LIS_VECTOR D;

	LIS_REAL tnorm, tolnorm;
	LIS_SCALAR fact,lxu,*wn,*w;
	LIS_INT	lenu,lenl,col,jpos,jrow,upos;
	LIS_INT	*jbuf,*iw;

	LIS_DEBUG_FUNC_IN;


	A      = solver->A;
	n      = A->n;
	tol    = solver->params[LIS_PARAMS_DROP-LIS_OPTIONS_LEN];
	m      = solver->params[LIS_PARAMS_RATE-LIS_OPTIONS_LEN];
	lfil   = (LIS_INT)(((double)A->nnz/(2.0*n))*m);

	L      = NULL;
	U      = NULL;


	err = lis_matrix_ilu_create(n,1,&L);
	if( err ) return err;
	err = lis_matrix_ilu_create(n,1,&U);
	if( err ) return err;
	err = lis_matrix_ilu_setCR(L);
	if( err ) return err;
	err = lis_matrix_ilu_setCR(U);
	if( err ) return err;
	err = lis_vector_duplicate(A,&D);
	if( err )
	{
		return err;
	}

	w   = (LIS_SCALAR *)lis_malloc((n+1)*sizeof(LIS_SCALAR),"lis_precon_create_ilut_csr::w");
	if( w==NULL )
	{
		LIS_SETERR_MEM(n*sizeof(LIS_SCALAR));
		return LIS_OUT_OF_MEMORY;
	}
	wn = (LIS_SCALAR *)lis_malloc(n*sizeof(LIS_SCALAR),"lis_precon_create_ilut_csr::w");
	if( wn==NULL )
	{
		LIS_SETERR_MEM(n*sizeof(LIS_SCALAR));
		return LIS_OUT_OF_MEMORY;
	}

	jbuf   = (LIS_INT *)lis_malloc(n*sizeof(LIS_INT),"lis_precon_create_ilut_csr::iw");
	if( jbuf==NULL )
	{
		LIS_SETERR_MEM(n*sizeof(LIS_INT));
		return LIS_OUT_OF_MEMORY;
	}
	iw   = (LIS_INT *)lis_malloc(n*sizeof(LIS_INT),"lis_precon_create_ilut_csr::iw");
	if( iw==NULL )
	{
		LIS_SETERR_MEM(n*sizeof(LIS_INT));
		return LIS_OUT_OF_MEMORY;
	}


	for(i=0;i<n;i++) iw[i] = -1;

	for(i=0;i<n;i++)
	{
		tnorm = 0;
		for(j=A->ptr[i];j<A->ptr[i+1];j++)
		{
			tnorm += sabs(A->value[j]);
		}
		tnorm   = tnorm / (double)(A->ptr[i+1]-A->ptr[i]);
		tolnorm = tol * tnorm;

		lenu = 0;
		lenl = 0;
		jbuf[i] = i;
		w[i] = 0;
		iw[i] = i;

		for(j=A->ptr[i];j<A->ptr[i+1];j++)
		{
			col = A->index[j];
			#ifdef USE_MPI
				if( col>n-1 ) continue;
			#endif
			t = A->value[j];
			if( col < i )
			{
				jbuf[lenl] = col;
				iw[col] = lenl;
				w[lenl] = t;
				lenl++;
			}
			else if( col == i )
			{
				w[i] = t;
			}
			else
			{
				lenu++;
				jpos = i + lenu;
				jbuf[jpos] = col;
				iw[col] = jpos;
				w[jpos] = t;
			}
		}

		j = -1;
		len = 0;

		while( ++j < lenl )
		{
			jrow = jbuf[j];
			jpos = j;
			for(k=j+1;k<lenl;k++)
			{
				if( jbuf[k]<jrow )
				{
					jrow = jbuf[k];
					jpos = k;
				}
			}
			if( jpos!=j )
			{
				col = jbuf[j];
				jbuf[j] = jbuf[jpos];
				jbuf[jpos] = col;
				iw[jrow] = j;
				iw[col] = jpos;
				t = w[j];
				w[j] = w[jpos];
				w[jpos] = t;
			}
			fact = w[j] * D->value[jrow];
			w[j] = fact;
			iw[jrow] = -1;

			for(k=0;k<U->nnz[jrow];k++)
			{
				col = U->index[jrow][k];
				jpos = iw[col];
				lxu = -fact * U->value[jrow][k];

				if( sabs(lxu) < tolnorm && jpos==-1 ) continue;
				if( col >= i )
				{
					if( jpos == -1 )
					{
						lenu++;
						upos = i + lenu;
						jbuf[upos] = col;
						iw[col] = upos;
						w[upos] = lxu;
					}
					else
					{
						w[jpos] += lxu;
					}
				}
				else
				{
					if( jpos == -1 )
					{
						jbuf[lenl] = col;
						iw[col] = lenl;
						w[lenl] = lxu;
						lenl++;
					}
					else
					{
						w[jpos] += lxu;
					}
				}
			}
/*			for(kk=0;kk<bs;kk++)
			{
				w[bs*len+kk] = -buf_fact[kk];
			}
			jbuf[len] = jrow;
			len++;*/
		}

		iw[i] = -1;
		for(j=0;j<lenu;j++)
		{
			iw[ jbuf[i+j+1] ] = -1;
		}

		D->value[i] = 1.0 / w[i];


		len = _min(lfil,lenl);
		for(j=0;j<lenl;j++)
		{
			wn[j] = sabs(w[j]);
			iw[j] = j;
		}
		lis_sort_di(0,lenl-1,wn,iw);
		lis_sort_i(0,len-1,iw);
		
		L->nnz[i] = len;
		if( len>0 )
		{
			L->index[i] = (LIS_INT *)malloc(len*sizeof(LIS_INT));
			L->value[i] = (LIS_SCALAR *)malloc(len*sizeof(LIS_SCALAR));
		}
		for(j=0;j<len;j++)
		{
			jpos = iw[j];
			L->index[i][j] = jbuf[jpos];
			L->value[i][j] = w[jpos];
		}
		for(j=0;j<lenl;j++) iw[j] = -1;

		len = _min(lfil,lenu);
		for(j=0;j<lenu;j++)
		{
			wn[j] = sabs(w[i+j+1]);
			iw[j] = i+j+1;
		}
		lis_sort_di(0,lenu-1,wn,iw);
		lis_sort_i(0,len-1,iw);
		
		U->nnz[i] = len;
		if( len>0 )
		{
			U->index[i] = (LIS_INT *)malloc(len*sizeof(LIS_INT));
			U->value[i] = (LIS_SCALAR *)malloc(len*sizeof(LIS_SCALAR));
		}
		for(j=0;j<len;j++)
		{
			jpos = iw[j];
			U->index[i][j] = jbuf[jpos];
			U->value[i][j] = w[jpos];
		}
		for(j=0;j<lenu;j++) iw[j] = -1;
	}

	precon->L  = L;
	precon->U  = U;
	precon->D  = D;

	lis_free2(4,w,iw,wn,jbuf);

	LIS_DEBUG_FUNC_OUT;
	return LIS_SUCCESS;
#endif
}
Ejemplo n.º 3
0
LIS_INT lis_gmres(LIS_SOLVER solver)
{
	LIS_MATRIX A;
	LIS_VECTOR b,x;
	LIS_VECTOR r,s,z,*v;
	LIS_SCALAR *h;
	LIS_SCALAR aa,bb,rr,a2,b2,t;
	LIS_REAL tnrm2;

	LIS_REAL bnrm2,nrm2,tol;
	LIS_INT iter,maxiter,n,output;
	double time,ptime;

	LIS_REAL rnorm;
	LIS_INT i,j,k,m;
	LIS_INT ii,i1,iiv,i1v,iih,jj;
	LIS_INT h_dim;
	LIS_INT cs,sn;

	LIS_DEBUG_FUNC_IN;

	A       = solver->A;
	b       = solver->b;
	x       = solver->x;
	n       = A->n;
	maxiter = solver->options[LIS_OPTIONS_MAXITER];
	output  = solver->options[LIS_OPTIONS_OUTPUT];
	m       = solver->options[LIS_OPTIONS_RESTART];
	h_dim   = m+1;
	ptime   = 0.0;

	s       = solver->work[0];
	r       = solver->work[1];
	z       = solver->work[2];
	v       = &solver->work[3];

	h       = (LIS_SCALAR *)lis_malloc( sizeof(LIS_SCALAR)*(h_dim+1)*(h_dim+2),"lis_gmres::h" );
	cs      = (m+1)*h_dim;
	sn      = (m+2)*h_dim;

	/* r = M^-1 * (b - A * x) */
	lis_matvec(A,x,z);
	lis_vector_xpay(b,-1.0,z);
	lis_psolve(solver,z,v[0]);
	
	/* Initial Residual */
	if( lis_solver_get_initial_residual(solver,NULL,NULL,v[0],&bnrm2) )
	{
		lis_free(h);
		LIS_DEBUG_FUNC_OUT;
		return LIS_SUCCESS;
	}
	tol     = solver->tol;


	iter=0;
	while( iter<maxiter )
	{
		/* first column of V */
		/* v = r / ||r||_2 */
		lis_vector_nrm2(v[0],&rnorm);
		lis_vector_scale(1.0/rnorm,v[0]);

		/* s = ||r||_2 e_1 */
		lis_vector_set_all(0,s);
		s->value[0] = rnorm;

		i = 0;
		do
		{
			iter++;
			i++;
			ii  = i-1;
			i1  = i;
			iiv = i-1;
			i1v = i;
			iih = (i-1)*h_dim;


			/* z = M^-1 * v */
			time = lis_wtime();
			lis_psolve(solver,v[iiv],z);
			ptime += lis_wtime()-time;

			/* w = A * z */
			lis_matvec(A,z,v[i1v]);

			for(k=0;k<i;k++)
			{
				/* h[k,i]   = <w,v[k]>          */
				/* w        = w - h[k,i] * v[k] */
				lis_vector_dot(v[i1v],v[k],&t);
				h[k+iih] = t;
				lis_vector_axpy(-t,v[k],v[i1v]);
			}
			/* h[i+1,i] = ||w||          */
			/* v[i+1]   = w / h[i+1,i]   */
			lis_vector_nrm2(v[i1v],&tnrm2);
			h[i1+iih] = tnrm2;
			lis_vector_scale(1.0/tnrm2,v[i1v]);

			for(k=1;k<=ii;k++)
			{
				jj  =  k-1;
				t   =  h[jj+iih];
				aa  =  h[jj+cs]*t;
				aa +=  h[jj+sn]*h[k+iih];
				bb  = -h[jj+sn]*t;
				bb +=  h[jj+cs]*h[k+iih];
				h[jj+iih] = aa;
				h[k+iih] = bb;
			}
			aa = h[ii+iih];
			bb = h[i1+iih];
			a2 = aa*aa;
			b2 = bb*bb;
			rr = sqrt(a2+b2);
			if( rr==0.0 ) rr=1.0e-17;
			h[ii+cs] = aa/rr;
			h[ii+sn] = bb/rr;
			s->value[i1] = -h[ii+sn]*s->value[ii];
			s->value[ii] =  h[ii+cs]*s->value[ii];

			aa  =  h[ii+cs]*h[ii+iih];
			aa +=  h[ii+sn]*h[i1+iih];
			h[ii+iih] = aa;

			/* convergence check */
			nrm2 = sabs(s->value[i1])*bnrm2;

			if( output )
			{
				if( output & LIS_PRINT_MEM ) solver->rhistory[iter] = nrm2;
				if( output & LIS_PRINT_OUT && A->my_rank==0 ) lis_print_rhistory(iter,nrm2);
			}

			if( tol >= nrm2 ) break;
		} while( i<m && iter <maxiter );

		/* Solve H * Y = S for upper Hessenberg matrix H */
		s->value[ii] = s->value[ii]/h[ii+iih];
		for(k=1;k<=ii;k++)
		{
			jj = ii-k;
			t  = s->value[jj];
			for(j=jj+1;j<=ii;j++)
			{
				t -= h[jj+j*h_dim]*s->value[j];
			}
			s->value[jj] = t/h[jj+jj*h_dim];
		}
		/* z = z + y * v */
		#ifdef _OPENMP
		#pragma omp parallel for private(k)
		#endif
		for(k=0;k<n;k++)
		{
			z->value[k] = s->value[0]*v[0]->value[k];
		}
		for(j=1;j<=ii;j++)
		{
			lis_vector_axpy(s->value[j],v[j],z);
		}

		/* r = M^-1 * z */
		time = lis_wtime();
		lis_psolve(solver,z,r);
		ptime += lis_wtime()-time;

		/* x = x + r */
		lis_vector_axpy(1,r,x);

		if( tol >= nrm2 )
		{
			solver->retcode    = LIS_SUCCESS;
			solver->iter       = iter;
			solver->resid      = nrm2;
			solver->ptime      = ptime;
			lis_free(h);
			LIS_DEBUG_FUNC_OUT;
			return LIS_SUCCESS;
		}

		for(j=1;j<=i;j++)
		{
			jj = i1-j+1;
			s->value[jj-1] = -h[jj-1+sn]*s->value[jj];
			s->value[jj]   =  h[jj-1+cs]*s->value[jj];
		}

		for(j=0;j<=i1;j++)
		{
			t = s->value[j];
			if( j==0 ) t = t-1.0;
			lis_vector_axpy(t,v[j],v[0]);
		}
	}

	solver->retcode   = LIS_MAXITER;
	solver->iter      = iter+1;
	solver->resid     = nrm2;
	lis_free(h);
	LIS_DEBUG_FUNC_OUT;
	return LIS_MAXITER;
}
Ejemplo n.º 4
0
LIS_INT lis_eii(LIS_ESOLVER esolver)
{
  LIS_MATRIX A;
  LIS_VECTOR x;
  LIS_SCALAR evalue, ievalue;
  LIS_SCALAR lshift;
  LIS_INT emaxiter;
  LIS_REAL tol;
  LIS_INT iter,iter2,output;
  LIS_REAL nrm2,resid;
  LIS_VECTOR z,q;
  LIS_SOLVER solver;
  double time,itime,ptime,p_c_time,p_i_time;

  LIS_INT err;
  LIS_PRECON precon;
  LIS_INT nsol, precon_type;
  char solvername[128], preconname[128];

  LIS_DEBUG_FUNC_IN;

  emaxiter = esolver->options[LIS_EOPTIONS_MAXITER];
  tol = esolver->params[LIS_EPARAMS_RESID - LIS_EOPTIONS_LEN]; 
  lshift = esolver->lshift;
  output  = esolver->options[LIS_EOPTIONS_OUTPUT];

  A = esolver->A;
  x = esolver->x;
  if (esolver->options[LIS_EOPTIONS_INITGUESS_ONES] ) 
    {
      lis_vector_set_all(1.0,x);
    }
  evalue = 1.0;
  z = esolver->work[0];
  q = esolver->work[1];

  iter=0;
  ievalue = 1/(evalue);
#ifdef _LONG__DOUBLE
  if( output & (A->my_rank==0) ) printf("local shift           : %Le\n", lshift);
#else
#if defined(_COMPLEX)
  if( output & (A->my_rank==0) ) printf("local shift           : "CFMT"\n", cfmt(lshift));
#else
  if( output & (A->my_rank==0) ) printf("local shift           : %e\n", lshift);
#endif
#endif
  if (lshift != 0) lis_matrix_shift_diagonal(A, lshift);
  lis_solver_create(&solver);
  lis_solver_set_option("-i bicg -p none",solver);
  lis_solver_set_optionC(solver);
  lis_solver_get_solver(solver, &nsol);
  lis_solver_get_precon(solver, &precon_type);
  lis_solver_get_solvername(nsol, solvername);
  lis_solver_get_preconname(precon_type, preconname);
  if( output & (A->my_rank==0) ) printf("linear solver         : %s\n", solvername);
  if( output & (A->my_rank==0) ) printf("preconditioner        : %s\n", preconname);

  /* create preconditioner */
  solver->A = A;
  err = lis_precon_create(solver, &precon);
  if( err )
    {
      lis_solver_work_destroy(solver);
      solver->retcode = err;
      return err;
    }

  while (iter<emaxiter)
    {
      iter = iter+1;

      /* x = x / ||x||_2 */
      lis_vector_nrm2(x, &nrm2);
      lis_vector_scale(1/nrm2, x);

      /* z = (A - lshift I)^-1 * x */
      lis_solve_kernel(A, x, z, solver, precon);
      lis_solver_get_iter(solver,&iter2);

      /* 1/evalue = <x,z> */
      lis_vector_dot(x, z, &ievalue); 

      /* resid = ||z - 1/evalue * x||_2 / |1/evalue| */
      lis_vector_axpyz(-ievalue,x,z,q); 
      lis_vector_nrm2(q, &resid); 
      resid = sabs(resid/ievalue);

      /* x = z */
      lis_vector_copy(z,x);

      /* convergence check */
      lis_solver_get_timeex(solver,&time,&itime,&ptime,&p_c_time,&p_i_time);
      esolver->ptime += solver->ptime;
      esolver->itime += solver->itime;
      esolver->p_c_time += solver->p_c_time;
      esolver->p_i_time += solver->p_i_time;

      if( output )
	{
	  if( output & LIS_EPRINT_MEM ) esolver->rhistory[iter] = resid;
	  if( output & LIS_EPRINT_OUT && A->my_rank==0 ) lis_print_rhistory(iter,resid);
	}

      if( tol >= resid ) 
	{
	  esolver->retcode    = LIS_SUCCESS;
	  esolver->iter[0]    = iter;
	  esolver->resid[0]   = resid;
	  esolver->evalue[0]  = 1/ievalue;
	  lis_vector_nrm2(x, &nrm2);
	  lis_vector_scale(1/nrm2, x);
	  if (lshift != 0) lis_matrix_shift_diagonal(A, -lshift);
	  lis_precon_destroy(precon);
	  lis_solver_destroy(solver); 
	  LIS_DEBUG_FUNC_OUT;
	  return LIS_SUCCESS;
	}
    }

  lis_precon_destroy(precon);

  esolver->retcode    = LIS_MAXITER;
  esolver->iter[0]    = iter;
  esolver->resid[0]   = resid;
  esolver->evalue[0]  = 1/ievalue;
  lis_vector_nrm2(x, &nrm2);
  lis_vector_scale(1/nrm2, x);
  if (lshift != 0) 
    {
      lis_matrix_shift_diagonal(A, -lshift);
    }
  lis_solver_destroy(solver); 
  LIS_DEBUG_FUNC_OUT;
  return LIS_MAXITER;
}