double LisMPIDiscreteVector::getNorm2()
{
    double val = .0;
    int err = lis_vector_nrm2(_v, &val);
    CHKERR(err);
    return val;
}
Exemple #2
0
void lis_vector_nrm2_f(LIS_VECTOR_F *x, LIS_REAL *value, LIS_INT *ierr)
{
	LIS_DEBUG_FUNC_IN;

	*ierr = lis_vector_nrm2((LIS_VECTOR)LIS_V2P(x),value);
	if( *ierr )	return;

	LIS_DEBUG_FUNC_OUT;
	return;
}
LIS_INT lis_solver_get_residual_nrm2_r(LIS_VECTOR r, LIS_SOLVER solver, LIS_REAL *res)
{
	LIS_DEBUG_FUNC_IN;

	lis_vector_nrm2(r,res);
	*res = *res * solver->bnrm;

	LIS_DEBUG_FUNC_OUT;
	return LIS_SUCCESS;
}
void lis_idrs_orth(LIS_INT s, LIS_VECTOR *P)
{
	LIS_INT n,i,j;
	LIS_REAL r;
	LIS_SCALAR d;

	n = P[0]->n;
	for(j=0;j<s;j++)
	{
		lis_vector_nrm2(P[j],&r);
		r = 1.0/r;
		lis_vector_scale(r,P[j]);
		for(i=j+1;i<s;i++)
		{
			lis_vector_dot(P[j],P[i],&d);
			lis_vector_axpy(-d,P[j],P[i]);
		}
	}
}
Exemple #5
0
LIS_INT lis_jacobi(LIS_SOLVER solver)
{
	LIS_MATRIX A;
	LIS_VECTOR b,x;
	LIS_VECTOR d,r,t,s;
	LIS_REAL bnrm2,nrm2,tol;
	LIS_INT iter,maxiter,output;
	double time,ptime;

	LIS_DEBUG_FUNC_IN;

	A       = solver->A;
	b       = solver->b;
	x       = solver->x;
	maxiter = solver->options[LIS_OPTIONS_MAXITER];
	output  = solver->options[LIS_OPTIONS_OUTPUT];
	tol     = solver->params[LIS_PARAMS_RESID-LIS_OPTIONS_LEN];
	ptime   = 0.0;

	r       = solver->work[0];
	t       = solver->work[1];
	s       = solver->work[2];
	d       = solver->work[3];

	lis_vector_nrm2(b,&bnrm2);
	bnrm2   = 1.0 / bnrm2;

	lis_matrix_get_diagonal(A,d);
	lis_vector_reciprocal(d);

	for( iter=1; iter<=maxiter; iter++ )
	{
		/* x += D^{-1}(b - Ax) */
		time = lis_wtime();
		lis_psolve(solver,x,s);
		ptime += lis_wtime() - time;
		lis_matvec(A,s,t);
/*		lis_matvec(A,x,t);*/
		lis_vector_axpyz(-1,t,b,r);
		lis_vector_nrm2(r,&nrm2);
		lis_vector_pmul(r,d,r);
		lis_vector_axpy(1,r,x);

		/* convergence check */
		nrm2 = nrm2 * bnrm2;

		if( output )
		{
			if( output & LIS_PRINT_MEM ) solver->rhistory[iter] = nrm2;
			if( output & LIS_PRINT_OUT && A->my_rank==0 ) lis_print_rhistory(iter,nrm2);
		}

		if( tol >= nrm2 )
		{
			time = lis_wtime();
			lis_psolve(solver,x,s);
			ptime += lis_wtime() - time;
			lis_vector_copy(s,x);
			solver->retcode    = LIS_SUCCESS;
			solver->iter       = iter;
			solver->resid      = nrm2;
			solver->ptime      = ptime;
			LIS_DEBUG_FUNC_OUT;
			return LIS_SUCCESS;
		}
	}

	lis_psolve(solver,x,s);
	lis_vector_copy(s,x);
	solver->retcode   = LIS_MAXITER;
	solver->iter      = iter;
	solver->resid     = nrm2;
	LIS_DEBUG_FUNC_OUT;
	return LIS_MAXITER;
}
LIS_INT lis_fgmres(LIS_SOLVER solver)
{
	LIS_MATRIX A;
	LIS_PRECON M;
	LIS_VECTOR b,x;
	LIS_VECTOR r,s, *z, *v;
	LIS_SCALAR *h;
	LIS_SCALAR aa,bb,rr,a2,b2,t;

	LIS_REAL   bnrm2, nrm2, tol;
	LIS_INT iter,maxiter,n,output,conv;
	double times,ptimes;

	LIS_REAL   rnorm;
	LIS_INT i,j,k,m;
	LIS_INT ii,i1,iiv,i1v,iih,i1h,jj;
	LIS_INT h_dim;
	LIS_INT cs,sn;

	LIS_DEBUG_FUNC_IN;

	A       = solver->A;
	M       = solver->precon;
	b       = solver->b;
	x       = solver->x;
	n       = A->n;
	maxiter = solver->options[LIS_OPTIONS_MAXITER];
	output  = solver->options[LIS_OPTIONS_OUTPUT];
	m       = solver->options[LIS_OPTIONS_RESTART];
	conv    = solver->options[LIS_OPTIONS_CONV_COND];
	h_dim   = m+1;
	ptimes  = 0.0;

	s       = solver->work[0];
	r       = solver->work[1];
	z       = &solver->work[2];
	v       = &solver->work[m+2];

	h       = (LIS_SCALAR *)lis_malloc( sizeof(LIS_SCALAR) * (h_dim+1) * (h_dim+2),"lis_gmres::h" );
	cs      = (m+1)*h_dim;
	sn      = (m+2)*h_dim;


	/* Initial Residual */
	if( lis_solver_get_initial_residual(solver,NULL,NULL,v[0],&bnrm2) )
	{
		lis_free(h);
		LIS_DEBUG_FUNC_OUT;
		return LIS_SUCCESS;
	}
	tol     = solver->tol;
	rnorm   = 1.0 / bnrm2;


	iter=0;
	while( iter<maxiter )
	{
		/* first column of V */
		/* v = r / ||r||_2 */
		lis_vector_scale(bnrm2,v[0]);

		/* s = ||r||_2 e_1 */
		lis_vector_set_all(0,s);
		s->value[0] = rnorm;

		i = 0;
		do
		{
			iter++;
			i++;
			ii  = i-1;
			i1  = i;
			iiv = i-1;
			i1v = i;
			iih = (i-1)*h_dim;
			i1h = i*h_dim;


			/* z = M^-1 v */
			times = lis_wtime();
			lis_psolve(solver, v[iiv], z[iiv]);
			ptimes += lis_wtime()-times;

			/* v = Az */
			LIS_MATVEC(A,z[iiv], v[i1v]);

			for(k=0;k<i;k++)
			{
				/* h[k,i]   = <w,v[k]>       */
				/* w        = w - h[k,i]v[k] */
				lis_vector_dot(v[i1v],v[k],&t);
				h[k + iih] = t;
				lis_vector_axpy(-t,v[k],v[i1v]);
			}
			/* h[i+1,i] = ||w||          */
			/* v[i+1]   = w / h[i+1,i]   */
			lis_vector_nrm2(v[i1v],&t);
			h[i1 + iih] = t;
			lis_vector_scale(1.0/t,v[i1v]);

			for(k=1;k<=ii;k++)
			{
				jj  = k-1;
				t   =  h[jj + iih];
				aa  =  h[jj + cs]*t;
				aa +=  h[jj + sn]*h[k  + iih];
				bb  = -h[jj + sn]*t;
				bb +=  h[jj + cs]*h[k  + iih];
				h[jj + iih] = aa;
				h[k  + iih] = bb;
			}
			aa = h[ii + iih];
			bb = h[i1 + iih];
			a2 = aa*aa;
			b2 = bb*bb;
			rr = sqrt(a2 + b2);
			if( rr==0.0 ) rr=1.0e-17;
			h[ii + cs] = aa / rr;
			h[ii + sn] = bb / rr;
			s->value[i1] = -h[ii + sn]*s->value[ii];
			s->value[ii] =  h[ii + cs]*s->value[ii];

			aa  =  h[ii + cs]*h[ii + iih];
			aa +=  h[ii + sn]*h[i1 + iih];
			h[ii   + iih] = aa;

			/* convergence check */
			nrm2 = fabs(s->value[i1]);

			if( output )
			{
				if( output & LIS_PRINT_MEM ) solver->residual[iter] = nrm2;
				if( output & LIS_PRINT_OUT && A->my_rank==0 ) printf("iter: %5d  residual = %e\n", iter, nrm2);
			}

			if( tol >= nrm2 ) break;
		} while( i<m && iter <maxiter );

		/* Solve H*Y =S for upper triangular H */
		s->value[ii] = s->value[ii] / h[ii + iih];
		for(k=1;k<=ii;k++)
		{
			jj = ii-k;
			t  = s->value[jj];
			for(j=jj+1;j<=ii;j++)
			{
				t -= h[jj + j*h_dim]*s->value[j];
			}
			s->value[jj] = t / h[jj + jj*h_dim];
		}
		/* x = x + zy */
		for(j=0;j<=ii;j++)
		{
			lis_vector_axpy(s->value[j],z[j],x);
		}

		if( tol >= nrm2 )
		{
			solver->retcode    = LIS_SUCCESS;
			solver->iter       = iter;
			solver->resid      = nrm2;
			solver->ptimes     = ptimes;
			lis_free(h);
			LIS_DEBUG_FUNC_OUT;
			return LIS_SUCCESS;
		}

		LIS_MATVEC(A,x,v[0]);
		lis_vector_xpay(b,-1.0,v[0]);
		lis_vector_nrm2(v[0],&rnorm);
		bnrm2 = 1.0 / rnorm;
	}

	solver->retcode   = LIS_MAXITER;
	solver->iter      = iter+1;
	solver->resid     = nrm2;
	lis_free(h);
	LIS_DEBUG_FUNC_OUT;
	return LIS_MAXITER;
}
Exemple #7
0
LIS_INT lis_eii_quad(LIS_ESOLVER esolver)
{
  LIS_MATRIX A;
  LIS_VECTOR x;
  LIS_SCALAR evalue, ievalue;
  LIS_SCALAR lshift;
  LIS_INT emaxiter;
  LIS_REAL tol;
  LIS_INT iter,iter2,output;
  LIS_REAL nrm2,resid;
  LIS_QUAD_PTR qdot_xz;
  LIS_VECTOR z,q;
  LIS_SOLVER solver;
  double time,itime,ptime,p_c_time,p_i_time;
  LIS_INT err;
  LIS_PRECON precon;
  LIS_INT nsol, precon_type;
  char solvername[128], preconname[128];

  LIS_DEBUG_FUNC_IN;

  emaxiter = esolver->options[LIS_EOPTIONS_MAXITER];
  tol = esolver->params[LIS_EPARAMS_RESID - LIS_EOPTIONS_LEN]; 
  lshift = esolver->lshift;
  output  = esolver->options[LIS_EOPTIONS_OUTPUT];

  A = esolver->A;
  x = esolver->x;
  if (esolver->options[LIS_EOPTIONS_INITGUESS_ONES] ) 
    {
      lis_vector_set_all(1.0,x);
    }
  evalue = 1.0;
  z = esolver->work[0];
  q = esolver->work[1];

  LIS_QUAD_SCALAR_MALLOC(qdot_xz,0,1);

  iter=0;
  ievalue = 1/(evalue);
#ifdef _LONG__DOUBLE
  if( output & (A->my_rank==0) ) printf("local shift           : %Le\n", lshift);
#else
  if( output & (A->my_rank==0) ) printf("local shift           : %e\n", lshift);
#endif
  if (lshift != 0) lis_matrix_shift_diagonal(A, lshift);
  lis_solver_create(&solver);
  lis_solver_set_option("-i bicg -p none -precision quad",solver);
  lis_solver_set_optionC(solver);
  lis_solver_get_solver(solver, &nsol);
  lis_solver_get_precon(solver, &precon_type);
  lis_solver_get_solvername(nsol, solvername);
  lis_solver_get_preconname(precon_type, preconname);
  if( output & (A->my_rank==0) ) printf("linear solver         : %s\n", solvername);
  if( output & (A->my_rank==0) ) printf("preconditioner        : %s\n", preconname);

  /* create preconditioner */
  solver->A = A;
  err = lis_precon_create(solver, &precon);
  if( err )
    {
      lis_solver_work_destroy(solver);
      solver->retcode = err;
      return err;
    }

  while (iter<emaxiter)
    {
      iter = iter+1;

      /* x = x / ||x||_2 */
      lis_vector_nrm2(x, &nrm2);
      lis_vector_scale(1/nrm2, x);

      /* z = (A - lshift I)^-1 * x */
      lis_solve_kernel(A, x, z, solver, precon);
      lis_solver_get_iter(solver,&iter2);

      /* 1/evalue = <x,z> */
      lis_vector_dotex_mmm(x, z, &qdot_xz);
      lis_quad_minus((LIS_QUAD *)qdot_xz.hi);
      lis_vector_axpyzex_mmmm(qdot_xz,x,z,q);
      lis_quad_minus((LIS_QUAD *)qdot_xz.hi);
      ievalue = qdot_xz.hi[0];

      /* resid = ||z - 1/evalue * x||_2 / |1/evalue| */
      lis_vector_nrm2(q, &resid);
      resid = fabs(resid/ievalue);

      /* x = z */
      lis_vector_copy(z,x);

      /* convergence check */
      lis_solver_get_timeex(solver,&time,&itime,&ptime,&p_c_time,&p_i_time);
      esolver->ptime += solver->ptime;
      esolver->itime += solver->itime;
      esolver->p_c_time += solver->p_c_time;
      esolver->p_i_time += solver->p_i_time;

      if( output )
	{
	  if( output & LIS_EPRINT_MEM ) esolver->rhistory[iter] = resid;
	  if( output & LIS_EPRINT_OUT && A->my_rank==0 ) lis_print_rhistory(iter,resid);
	}

      if( tol >= resid ) 
	{
	  esolver->retcode    = LIS_SUCCESS;
	  esolver->iter[0]    = iter;
	  esolver->resid[0]   = resid;
	  esolver->evalue[0]  = 1/ievalue;
	  lis_vector_nrm2(x, &nrm2);
	  lis_vector_scale(1/nrm2, x);
	  if (lshift != 0) lis_matrix_shift_diagonal(A, -lshift);
	  lis_precon_destroy(precon);
	  lis_solver_destroy(solver); 
	  LIS_DEBUG_FUNC_OUT;
	  return LIS_SUCCESS;
	}
    }

  lis_precon_destroy(precon);

  esolver->retcode    = LIS_MAXITER;
  esolver->iter[0]    = iter;
  esolver->resid[0]   = resid;
  esolver->evalue[0]  = 1/ievalue;
  lis_vector_nrm2(x, &nrm2);
  lis_vector_scale(1/nrm2, x);
  if (lshift != 0) 
    {
      lis_matrix_shift_diagonal(A, -lshift);
    }
  lis_solver_destroy(solver); 
  LIS_DEBUG_FUNC_OUT;
  return LIS_MAXITER;
}
Exemple #8
0
LIS_INT lis_gmres(LIS_SOLVER solver)
{
	LIS_MATRIX A;
	LIS_VECTOR b,x;
	LIS_VECTOR r,s,z,*v;
	LIS_SCALAR *h;
	LIS_SCALAR aa,bb,rr,a2,b2,t;
	LIS_REAL tnrm2;

	LIS_REAL bnrm2,nrm2,tol;
	LIS_INT iter,maxiter,n,output;
	double time,ptime;

	LIS_REAL rnorm;
	LIS_INT i,j,k,m;
	LIS_INT ii,i1,iiv,i1v,iih,jj;
	LIS_INT h_dim;
	LIS_INT cs,sn;

	LIS_DEBUG_FUNC_IN;

	A       = solver->A;
	b       = solver->b;
	x       = solver->x;
	n       = A->n;
	maxiter = solver->options[LIS_OPTIONS_MAXITER];
	output  = solver->options[LIS_OPTIONS_OUTPUT];
	m       = solver->options[LIS_OPTIONS_RESTART];
	h_dim   = m+1;
	ptime   = 0.0;

	s       = solver->work[0];
	r       = solver->work[1];
	z       = solver->work[2];
	v       = &solver->work[3];

	h       = (LIS_SCALAR *)lis_malloc( sizeof(LIS_SCALAR)*(h_dim+1)*(h_dim+2),"lis_gmres::h" );
	cs      = (m+1)*h_dim;
	sn      = (m+2)*h_dim;

	/* r = M^-1 * (b - A * x) */
	lis_matvec(A,x,z);
	lis_vector_xpay(b,-1.0,z);
	lis_psolve(solver,z,v[0]);
	
	/* Initial Residual */
	if( lis_solver_get_initial_residual(solver,NULL,NULL,v[0],&bnrm2) )
	{
		lis_free(h);
		LIS_DEBUG_FUNC_OUT;
		return LIS_SUCCESS;
	}
	tol     = solver->tol;


	iter=0;
	while( iter<maxiter )
	{
		/* first column of V */
		/* v = r / ||r||_2 */
		lis_vector_nrm2(v[0],&rnorm);
		lis_vector_scale(1.0/rnorm,v[0]);

		/* s = ||r||_2 e_1 */
		lis_vector_set_all(0,s);
		s->value[0] = rnorm;

		i = 0;
		do
		{
			iter++;
			i++;
			ii  = i-1;
			i1  = i;
			iiv = i-1;
			i1v = i;
			iih = (i-1)*h_dim;


			/* z = M^-1 * v */
			time = lis_wtime();
			lis_psolve(solver,v[iiv],z);
			ptime += lis_wtime()-time;

			/* w = A * z */
			lis_matvec(A,z,v[i1v]);

			for(k=0;k<i;k++)
			{
				/* h[k,i]   = <w,v[k]>          */
				/* w        = w - h[k,i] * v[k] */
				lis_vector_dot(v[i1v],v[k],&t);
				h[k+iih] = t;
				lis_vector_axpy(-t,v[k],v[i1v]);
			}
			/* h[i+1,i] = ||w||          */
			/* v[i+1]   = w / h[i+1,i]   */
			lis_vector_nrm2(v[i1v],&tnrm2);
			h[i1+iih] = tnrm2;
			lis_vector_scale(1.0/tnrm2,v[i1v]);

			for(k=1;k<=ii;k++)
			{
				jj  =  k-1;
				t   =  h[jj+iih];
				aa  =  h[jj+cs]*t;
				aa +=  h[jj+sn]*h[k+iih];
				bb  = -h[jj+sn]*t;
				bb +=  h[jj+cs]*h[k+iih];
				h[jj+iih] = aa;
				h[k+iih] = bb;
			}
			aa = h[ii+iih];
			bb = h[i1+iih];
			a2 = aa*aa;
			b2 = bb*bb;
			rr = sqrt(a2+b2);
			if( rr==0.0 ) rr=1.0e-17;
			h[ii+cs] = aa/rr;
			h[ii+sn] = bb/rr;
			s->value[i1] = -h[ii+sn]*s->value[ii];
			s->value[ii] =  h[ii+cs]*s->value[ii];

			aa  =  h[ii+cs]*h[ii+iih];
			aa +=  h[ii+sn]*h[i1+iih];
			h[ii+iih] = aa;

			/* convergence check */
			nrm2 = sabs(s->value[i1])*bnrm2;

			if( output )
			{
				if( output & LIS_PRINT_MEM ) solver->rhistory[iter] = nrm2;
				if( output & LIS_PRINT_OUT && A->my_rank==0 ) lis_print_rhistory(iter,nrm2);
			}

			if( tol >= nrm2 ) break;
		} while( i<m && iter <maxiter );

		/* Solve H * Y = S for upper Hessenberg matrix H */
		s->value[ii] = s->value[ii]/h[ii+iih];
		for(k=1;k<=ii;k++)
		{
			jj = ii-k;
			t  = s->value[jj];
			for(j=jj+1;j<=ii;j++)
			{
				t -= h[jj+j*h_dim]*s->value[j];
			}
			s->value[jj] = t/h[jj+jj*h_dim];
		}
		/* z = z + y * v */
		#ifdef _OPENMP
		#pragma omp parallel for private(k)
		#endif
		for(k=0;k<n;k++)
		{
			z->value[k] = s->value[0]*v[0]->value[k];
		}
		for(j=1;j<=ii;j++)
		{
			lis_vector_axpy(s->value[j],v[j],z);
		}

		/* r = M^-1 * z */
		time = lis_wtime();
		lis_psolve(solver,z,r);
		ptime += lis_wtime()-time;

		/* x = x + r */
		lis_vector_axpy(1,r,x);

		if( tol >= nrm2 )
		{
			solver->retcode    = LIS_SUCCESS;
			solver->iter       = iter;
			solver->resid      = nrm2;
			solver->ptime      = ptime;
			lis_free(h);
			LIS_DEBUG_FUNC_OUT;
			return LIS_SUCCESS;
		}

		for(j=1;j<=i;j++)
		{
			jj = i1-j+1;
			s->value[jj-1] = -h[jj-1+sn]*s->value[jj];
			s->value[jj]   =  h[jj-1+cs]*s->value[jj];
		}

		for(j=0;j<=i1;j++)
		{
			t = s->value[j];
			if( j==0 ) t = t-1.0;
			lis_vector_axpy(t,v[j],v[0]);
		}
	}

	solver->retcode   = LIS_MAXITER;
	solver->iter      = iter+1;
	solver->resid     = nrm2;
	lis_free(h);
	LIS_DEBUG_FUNC_OUT;
	return LIS_MAXITER;
}
LIS_INT lis_solver_get_initial_residual(LIS_SOLVER solver, LIS_PRECON M, LIS_VECTOR t, LIS_VECTOR r, LIS_SCALAR *bnrm2)
{
	LIS_INT			output,conv;
	#ifdef USE_QUAD_PRECISION
		LIS_INT	i;
	#endif
	LIS_MATRIX	A;
	LIS_VECTOR	x,b,p,xx;
	LIS_SCALAR	nrm2;
	LIS_REAL	tol,tol_w,tol_switch;

	LIS_DEBUG_FUNC_IN;

	A  = solver->A;
	b  = solver->b;
	x  = solver->x;
	xx = solver->xx;
	output     = solver->options[LIS_OPTIONS_OUTPUT];
	conv       = solver->options[LIS_OPTIONS_CONV_COND];
	tol        = solver->params[LIS_PARAMS_RESID-LIS_OPTIONS_LEN];
	tol_w      = solver->params[LIS_PARAMS_RESID_WEIGHT-LIS_OPTIONS_LEN];
	tol_switch = solver->params[LIS_PARAMS_SWITCH_RESID-LIS_OPTIONS_LEN];


	/* Initial Residual */
	if( M==NULL )
	{
		p = r;
	}
	else
	{
		p = t;
	}

	if( !solver->options[LIS_OPTIONS_INITGUESS_ZEROS] )
	{
		#ifndef USE_QUAD_PRECISION
			lis_matvec(A,x,p);           /* p = Ax    */
			lis_vector_xpay(b,-1,p);     /* p = b - p */
		#else
			if( solver->precision==LIS_PRECISION_DOUBLE )
			{
				lis_matvec(A,x,p);           /* p = Ax    */
				lis_vector_xpay(b,-1,p);     /* p = b - p */
			}
			else
			{
				lis_matvec(A,xx,p);           /* p = Ax    */
				lis_vector_xpay(b,-1,p);     /* p = b - p */
				
				#ifdef _OPENMP
				#pragma omp parallel for private(i)
				#endif
				for(i=0;i<A->n;i++)
				{
					p->value_lo[i] = 0.0;
				}
				
			}
		#endif
	}
	else
	{
		#ifndef USE_QUAD_PRECISION
			lis_vector_copy(b,p);
		#else
			if( solver->precision==LIS_PRECISION_DOUBLE )
			{
				lis_vector_copy(b,p);
			}
			else
			{
				lis_vector_copyex_nm(b,p);
			}
		#endif
	}

	switch(conv)
	{
	case LIS_CONV_COND_NRM2_R:
		lis_vector_nrm2(p,&nrm2);
		*bnrm2 = nrm2;
		solver->tol = tol;
		solver->tol_switch = tol_switch;
		break;
	case LIS_CONV_COND_NRM2_B:
		lis_vector_nrm2(p,&nrm2);
		lis_vector_nrm2(b,bnrm2);
		solver->tol = tol;
		solver->tol_switch = tol_switch;
		break;
	case LIS_CONV_COND_NRM1_B:
		lis_vector_nrm1(p,&nrm2);
		lis_vector_nrm1(b,bnrm2);
		solver->tol = *bnrm2*tol_w + tol;
		solver->tol_switch = *bnrm2*tol_w + tol_switch;
		break;
	}
	if( *bnrm2 == 0.0 )
	{
		*bnrm2 = 1.0;
	}
	else
	{
		*bnrm2 = 1.0 / *bnrm2;
	}
	solver->bnrm = *bnrm2;
	nrm2 = nrm2 * *bnrm2;

	if( output && (r->precision==LIS_PRECISION_QUAD && solver->precision!=LIS_PRECISION_SWITCH) )
	{
		if( output & LIS_PRINT_MEM ) solver->residual[0] = nrm2;
		if( output & LIS_PRINT_OUT && A->my_rank==0 ) printf("iter: %5d  residual = %e\n", 0, nrm2); 
	}
	if( nrm2 <= solver->params[LIS_PARAMS_RESID-LIS_OPTIONS_LEN] )
	{
		solver->retcode = LIS_SUCCESS;
		solver->iter    = 1;
		solver->resid   = nrm2; 
		LIS_DEBUG_FUNC_OUT;
		return LIS_FAILS;
	}

	if( M!=NULL )
	{
		/* r = M^-1 * p */
		lis_psolve(solver, p, r);
	}

	LIS_DEBUG_FUNC_OUT;
	return LIS_SUCCESS;
}
Exemple #10
0
LIS_INT lis_gs(LIS_SOLVER solver)
{
	LIS_MATRIX A;
	LIS_VECTOR b,x;
	LIS_VECTOR r,t,s;
	LIS_REAL bnrm2, nrm2, tol;
	LIS_INT iter,maxiter,output;
	double time,ptime;

	LIS_INT err;

	LIS_DEBUG_FUNC_IN;

	A       = solver->A;
	b       = solver->b;
	x       = solver->x;
	maxiter = solver->options[LIS_OPTIONS_MAXITER];
	output  = solver->options[LIS_OPTIONS_OUTPUT];
	tol     = solver->params[LIS_PARAMS_RESID-LIS_OPTIONS_LEN];
	ptime   = 0.0;

	r       = solver->work[0];
	t       = solver->work[1];
	s       = solver->work[2];

	lis_vector_nrm2(b,&bnrm2);
	bnrm2   = 1.0 / bnrm2;

	err = lis_matrix_split(A);
	if( err ) return err;
	if( A->use_wd!=LIS_SOLVER_GS )
	{
		if( !A->WD )
		{
			err = lis_matrix_diag_duplicate(A->D,&A->WD);
			if( err ) return err;
		}
		lis_matrix_diag_copy(A->D,A->WD);
		lis_matrix_diag_inverse(A->WD);
		A->use_wd = LIS_SOLVER_GS;
	}

	for( iter=1; iter<=maxiter; iter++ )
	{
		/* x += (D-L)^{-1}(b - Ax) */
		time = lis_wtime();
		lis_psolve(solver,x,s);
		ptime += lis_wtime() - time;
		lis_matvec(A,s,t);
/*		lis_matvec(A,x,t);*/
		lis_vector_axpyz(-1,t,b,r);
		lis_vector_nrm2(r,&nrm2);
		lis_matrix_solve(A,r,t,LIS_MATRIX_LOWER);
		lis_vector_axpy(1,t,x);

		/* convergence check */
		nrm2 = nrm2 * bnrm2;

		if( output )
		{
			if( output & LIS_PRINT_MEM ) solver->rhistory[iter] = nrm2;
			if( output & LIS_PRINT_OUT && A->my_rank==0 ) lis_print_rhistory(iter,nrm2);
		}

		if( tol >= nrm2 )
		{
			time = lis_wtime();
			lis_psolve(solver,x,s);
			ptime += lis_wtime() - time;
			lis_vector_copy(s,x);
			solver->retcode    = LIS_SUCCESS;
			solver->iter       = iter;
			solver->resid      = nrm2;
			solver->ptime      = ptime;
			LIS_DEBUG_FUNC_OUT;
			return LIS_SUCCESS;
		}
	}

	lis_psolve(solver,x,s);
	lis_vector_copy(s,x);
	solver->retcode   = LIS_MAXITER;
	solver->iter      = iter;
	solver->resid     = nrm2;
	LIS_DEBUG_FUNC_OUT;
	return LIS_MAXITER;
}
LIS_INT lis_epi_quad(LIS_ESOLVER esolver)
{
  LIS_MATRIX        A;
  LIS_VECTOR        x;
  LIS_SCALAR        evalue;
  LIS_INT               emaxiter;
  LIS_REAL          tol;
  LIS_INT               iter,output;
  LIS_INT               nprocs,my_rank;
  LIS_REAL          nrm2,resid;
  LIS_QUAD_PTR      qdot_xz;
  LIS_VECTOR        z,q;
  double            times, ptimes;

  LIS_DEBUG_FUNC_IN;

  emaxiter = esolver->options[LIS_EOPTIONS_MAXITER];
  tol = esolver->params[LIS_EPARAMS_RESID - LIS_EOPTIONS_LEN]; 
  output  = esolver->options[LIS_EOPTIONS_OUTPUT];

  A = esolver->A;
  x = esolver->x;
  if (esolver->options[LIS_EOPTIONS_INITGUESS_ONES] ) 
    {
      lis_vector_set_all(1.0,x);
    }
  z = esolver->work[0];
  q = esolver->work[1];

  LIS_QUAD_SCALAR_MALLOC(qdot_xz,0,1);

  iter=0;
  while (iter<emaxiter)
    {
      iter = iter+1;

      /* x = x / ||x||_2 */
      lis_vector_nrm2(x, &nrm2);
      lis_vector_scale(1/nrm2, x);

      /* z = A * x */
      lis_matvec(A,x,z);

      /* evalue = <x,z> */
      lis_vector_dotex_mmm(x, z, &qdot_xz);
      lis_quad_minus((LIS_QUAD *)qdot_xz.hi);

      /* resid = ||z - evalue * x||_2 / |evalue| */
      lis_vector_axpyzex_mmmm(qdot_xz,x,z,q);
      lis_quad_minus((LIS_QUAD *)qdot_xz.hi);
      lis_vector_nrm2(q, &resid); 
      evalue = qdot_xz.hi[0];
      resid = fabs(resid / evalue);

      /* x = z */
      lis_vector_copy(z, x);

      /* convergence check */
      if( output )
  {
    if( output & LIS_EPRINT_MEM ) esolver->residual[iter] = resid;
    if( output & LIS_EPRINT_OUT && A->my_rank==0 ) lis_print_rhistory(iter,resid);
  }

      if( tol >= resid )
  {
    esolver->retcode    = LIS_SUCCESS;
    esolver->iter       = iter;
    esolver->resid      = resid;
    esolver->evalue[0] = evalue;
    LIS_DEBUG_FUNC_OUT;
    return LIS_SUCCESS;
  }
    }
  esolver->retcode    = LIS_MAXITER;
  esolver->iter      = iter;
  esolver->resid     = resid;
  esolver->evalue[0] = evalue;
  LIS_DEBUG_FUNC_OUT;
  return LIS_MAXITER;
}
LIS_INT main(LIS_INT argc, char* argv[])
{
  LIS_MATRIX		A,A0;
  LIS_VECTOR		b,x,v;
  LIS_SCALAR		ntimes,nmflops,nnrm2;
  LIS_SCALAR		*value;

  LIS_INT		nprocs,my_rank;
  int    		int_nprocs,int_my_rank;
  LIS_INT		nthreads, maxthreads;
  LIS_INT		gn,nnz,mode;
  LIS_INT		i,j,jj,j0,j1,l,k,n,np,h,ih;
  LIS_INT		m,nn,ii;
  LIS_INT		block;
  LIS_INT		rn,rmin,rmax,rb;
  LIS_INT		is,ie,clsize,ci,*iw;
  LIS_INT		err,iter,matrix_type;
  LIS_INT	       	*ptr,*index;
  double		mem,val,ra,rs,ri,ria,ca,time,time2,convtime,val2,nnzs,nnzap,nnzt;
  double		commtime,comptime,flops;
  FILE			*file;
  char path[1024];

  LIS_DEBUG_FUNC_IN;
    
  lis_initialize(&argc, &argv);

#ifdef USE_MPI
  MPI_Comm_size(MPI_COMM_WORLD,&int_nprocs);
  MPI_Comm_rank(MPI_COMM_WORLD,&int_my_rank);
  nprocs = int_nprocs;
  my_rank = int_my_rank;
#else
  nprocs  = 1;
  my_rank = 0;
#endif

  if( argc < 4 )
    {
      if( my_rank==0 ) printf("Usage: spmvtest5 matrix_filename matrix_type iter [block] \n");
      lis_finalize();
      exit(0);
    }

  file = fopen(argv[1], "r");
  if( file==NULL ) CHKERR(1);

  matrix_type  = atoi(argv[2]);
  iter = atoi(argv[3]);
  if (argv[4] == NULL) {
    block = 2;
  }
  else {
    block = atoi(argv[4]);
  }

  if( matrix_type<1 || matrix_type>11 )
    {
      if( my_rank==0 ) printf("matrix_type=%d <1 or matrix_type=%d >11\n",matrix_type,matrix_type);
      CHKERR(1);
    }
  if( iter<=0 )
    {
      if( my_rank==0 ) printf("iter=%d <= 0\n",iter);
      CHKERR(1);
    }

  if( my_rank==0 )
    {
      printf("\n");
      printf("number of processes = %d\n",nprocs);
    }

#ifdef _OPENMP
  if( my_rank==0 )
    {
      nthreads = omp_get_num_procs();
      maxthreads = omp_get_max_threads();
      printf("max number of threads = %d\n", nthreads);
      printf("number of threads = %d\n", maxthreads);
    }
#else
      nthreads = 1;
      maxthreads = 1;
#endif

  /* create matrix and vectors */
  lis_matrix_create(LIS_COMM_WORLD,&A0);
  err = lis_input(A0,NULL,NULL,argv[1]);
  CHKERR(err);

  n   = A0->n;
  gn  = A0->gn;
  nnz = A0->nnz;
  np  = A0->np-n;
#ifdef USE_MPI
  MPI_Allreduce(&nnz,&i,1,LIS_MPI_INT,MPI_SUM,A0->comm);
  nnzap = (double)i / (double)nprocs;
  nnzt  = ((double)nnz -nnzap)*((double)nnz -nnzap);
  nnz   = i;
  MPI_Allreduce(&nnzt,&nnzs,1,MPI_DOUBLE,MPI_SUM,A0->comm);
  nnzs  = (nnzs / (double)nprocs)/nnzap;
  MPI_Allreduce(&np,&i,1,LIS_MPI_INT,MPI_SUM,A0->comm);
  np = i;
#endif

  if( my_rank==0 ) 
    {
      printf("block size of BSR and BSC = %d x %d\n",block,block);
      printf("iteration count = %d\n\n",iter);
    }

  err = lis_vector_duplicate(A0,&x);
  if( err ) CHKERR(err);
  err = lis_vector_duplicate(A0,&b);
  if( err ) CHKERR(err);

  lis_matrix_get_range(A0,&is,&ie);
  for(i=0;i<n;i++)
    {
      err = lis_vector_set_value(LIS_INS_VALUE,i+is,1.0,x);
    }

  lis_matrix_duplicate(A0,&A);
  lis_matrix_set_type(A,matrix_type);
  err = lis_matrix_convert(A0,A);
  if( err ) CHKERR(err);
  if( A->matrix_type==LIS_MATRIX_BSR || A->matrix_type==LIS_MATRIX_BSC )
    {
      A->bnr = block;
      A->bnc = block;
    }
		    
  comptime = 0.0;
  commtime = 0.0;

  for(i=0;i<iter;i++)
    {
#ifdef USE_MPI
      MPI_Barrier(A->comm);
      time = lis_wtime();
      lis_send_recv(A->commtable,x->value);
      commtime += lis_wtime() - time;
#endif
      time2 = lis_wtime();
      lis_matvec(A,x,b);
      comptime += lis_wtime() - time2;
    }
  lis_vector_nrm2(b,&val);

  if( my_rank==0 )
    {
      flops = 2.0*nnz*iter*1.0e-6 / comptime;
      printf("matrix_type = %2d (%s), computation = %e sec, %8.3f MFLOPS, communication = %e sec, communication/computation = %3.3f %%, 2-norm = %e\n",matrix_type,lis_storagename2[matrix_type-1],comptime,flops,commtime,commtime/comptime*100,val);
    }

  lis_matrix_destroy(A);
  lis_matrix_destroy(A0);
  lis_vector_destroy(b);
  lis_vector_destroy(x);

  lis_finalize();

  LIS_DEBUG_FUNC_OUT;

  return 0;
}
Exemple #13
0
LIS_INT main(LIS_INT argc, char* argv[])
{
  LIS_MATRIX		A,A0;
  LIS_VECTOR		b,x,v;
  LIS_SCALAR		ntimes,nmflops,nnrm2;
  LIS_SCALAR		*value;

  LIS_INT		nprocs,my_rank;
  int    		int_nprocs,int_my_rank;
  LIS_INT		nthreads, maxthreads;
  LIS_INT		gn,nnz,mode;
  LIS_INT		i,ii,j,jj,j0,j1,l,k,n,np,h,ih;
  LIS_INT		rn,rmin,rmax,rb;
  LIS_INT		is,ie,clsize,ci,*iw;
  LIS_INT		err,iter,matrix_type,s,ss,se;
  LIS_INT	       	*ptr,*index;
  double		mem,ra,rs,ri,ria,ca,time,time2,convtime,nnzs,nnzap,nnzt;
  LIS_SCALAR            val;
  double		commtime,comptime,flops;
  FILE			*file;
  char path[1024];

  LIS_DEBUG_FUNC_IN;
    
  lis_initialize(&argc, &argv);

#ifdef USE_MPI
  MPI_Comm_size(MPI_COMM_WORLD,&int_nprocs);
  MPI_Comm_rank(MPI_COMM_WORLD,&int_my_rank);
  nprocs = int_nprocs;
  my_rank = int_my_rank;
#else
  nprocs  = 1;
  my_rank = 0;
#endif

  if( argc < 3 )
    {
      if( my_rank==0 ) 
	{
	  printf("Usage: %s n iter [matrix_type]\n", argv[0]);
	}
      CHKERR(1);	  
    }
  
  n  = atoi(argv[1]);
  iter = atoi(argv[2]);
  if (argv[3] == NULL) {
    s = 0;
  }
  else {
    s = atoi(argv[3]);
  }

  if( n<=0 )
    {
#ifdef _LONGLONG
      if( my_rank==0 ) printf("n=%lld <=0\n",n);
#else
      if( my_rank==0 ) printf("n=%d <=0\n",n);
#endif
      CHKERR(1);	  
    }
  if( iter<=0 )
    {
#ifdef _LONGLONG
      if( my_rank==0 ) printf("iter=%lld <= 0\n",iter);
#else
      if( my_rank==0 ) printf("iter=%d <= 0\n",iter);
#endif
      CHKERR(1);
    }
  if( s<0 || s>11 )
    {
#ifdef _LONGLONG
      if( my_rank==0 ) printf("matrix_type=%lld < 0 or matrix_type=%lld > 11\n",s,s);
#else
      if( my_rank==0 ) printf("matrix_type=%d < 0 or matrix_type=%d > 11\n",s,s);
#endif
      CHKERR(1);
    }

  if( my_rank==0 ) 
    {
      printf("\n");
#ifdef _LONGLONG
      printf("number of processes = %lld\n",nprocs);
#else
      printf("number of processes = %d\n",nprocs);
#endif
    }

#ifdef _OPENMP
  nthreads = omp_get_num_procs();
  maxthreads = omp_get_max_threads();
  if( my_rank==0 ) 
    {
#ifdef _LONGLONG
      printf("max number of threads = %lld\n", nthreads);
      printf("number of threads = %lld\n", maxthreads);
#else
      printf("max number of threads = %d\n", nthreads);
      printf("number of threads = %d\n", maxthreads);
#endif
    }
#else
  nthreads = 1;
  maxthreads = 1;
#endif

  /* create matrix and vectors */
  lis_matrix_create(LIS_COMM_WORLD,&A0);
  lis_matrix_set_size(A0,0,n);
  lis_matrix_get_size(A0,&n,&gn);
  lis_matrix_get_range(A0,&is,&ie);
  k = 0;
    for(i=is;i<ie;i++)
    {
      if( i>0   )  lis_matrix_set_value(LIS_INS_VALUE,i,i-1,-1.0,A0);
      if( i<gn-1 ) lis_matrix_set_value(LIS_INS_VALUE,i,i+1,-1.0,A0);
      lis_matrix_set_value(LIS_INS_VALUE,i,i,2.0,A0);
    }
  err = lis_matrix_assemble(A0);
  CHKERR(err);

  n   = A0->n;
  gn  = A0->gn;
  nnz = A0->nnz;
  np  = A0->np-n;

#ifdef USE_MPI
  MPI_Allreduce(&nnz,&i,1,LIS_MPI_INT,MPI_SUM,A0->comm);
  nnzap = (double)i / (double)nprocs;
  nnzt  = ((double)nnz -nnzap)*((double)nnz -nnzap);
  nnz   = i;
  MPI_Allreduce(&nnzt,&nnzs,1,MPI_DOUBLE,MPI_SUM,A0->comm);
  nnzs  = (nnzs / (double)nprocs)/nnzap;
  MPI_Allreduce(&np,&i,1,LIS_MPI_INT,MPI_SUM,A0->comm);
  np = i;
#endif

  if( my_rank==0 ) 
    {
#ifdef _LONGLONG
      printf("matrix size = %lld x %lld (%lld nonzero entries)\n",gn,gn,nnz);
      printf("iteration count = %lld\n\n",iter);
#else
      printf("matrix size = %d x %d (%d nonzero entries)\n",gn,gn,nnz);
      printf("iteration count = %d\n\n",iter);
#endif
    }

  err = lis_vector_duplicate(A0,&x);
  if( err ) CHKERR(err);
  err = lis_vector_duplicate(A0,&b);
  if( err ) CHKERR(err);

  lis_matrix_get_range(A0,&is,&ie);

  for(i=0;i<n;i++)
    {
      err = lis_vector_set_value(LIS_INS_VALUE,i+is,1.0,x);
    }

  /* 
     MPI version of VBR is not implemented.
     DNS is also excluded to reduce memory usage.
  */

  if (s==0) 
    {
      ss = 1;
      se = 11;
    }
  else
    {
      ss = s;
      se = s+1;
    }
	
  for (matrix_type=ss;matrix_type<se;matrix_type++)
    {
      if ( nprocs>1 && matrix_type==9 ) continue;
      lis_matrix_duplicate(A0,&A);
      lis_matrix_set_type(A,matrix_type);
      err = lis_matrix_convert(A0,A);
      if( err ) CHKERR(err);
	  
      comptime = 0.0;
      commtime = 0.0;

      for(i=0;i<iter;i++)
	{
#ifdef USE_MPI
	  MPI_Barrier(A->comm);
	  time = lis_wtime();
	  lis_send_recv(A->commtable,x->value);
	  commtime += lis_wtime() - time;
	  MPI_Barrier(A->comm);
#endif
	  time2 = lis_wtime();
	  lis_matvec(A,x,b);
	  comptime += lis_wtime() - time2;
	}
      lis_vector_nrm2(b,&val);

      if( my_rank==0 )
	{
	  flops = 2.0*nnz*iter*1.0e-6 / comptime;
#ifdef USE_MPI
#ifdef _LONG__DOUBLE
#ifdef _LONGLONG
	  printf("matrix_type = %2lld (%s), computation = %e sec, %8.3f MFLOPS, communication = %e sec, communication/computation = %3.3f %%, 2-norm = %Le\n",matrix_type,lis_storagename2[matrix_type-1],comptime,flops,commtime,commtime/comptime*100,val);
#else
	  printf("matrix_type = %2d (%s), computation = %e sec, %8.3f MFLOPS, communication = %e sec, communication/computation = %3.3f %%, 2-norm = %Le\n",matrix_type,lis_storagename2[matrix_type-1],comptime,flops,commtime,commtime/comptime*100,val);
#endif
#else
#ifdef _LONGLONG
	  printf("matrix_type = %2lld (%s), computation = %e sec, %8.3f MFLOPS, communication = %e sec, communication/computation = %3.3f %%, 2-norm = %e\n",matrix_type,lis_storagename2[matrix_type-1],comptime,flops,commtime,commtime/comptime*100,val);
#else
	  printf("matrix_type = %2d (%s), computation = %e sec, %8.3f MFLOPS, communication = %e sec, communication/computation = %3.3f %%, 2-norm = %e\n",matrix_type,lis_storagename2[matrix_type-1],comptime,flops,commtime,commtime/comptime*100,val);
#endif
#endif
#else
#ifdef _LONG__DOUBLE
#ifdef _LONGLONG
	  printf("matrix_type = %2lld (%s), computation = %e sec, %8.3f MFLOPS, 2-norm = %Le\n",matrix_type,lis_storagename2[matrix_type-1],comptime,flops,val);
#else
	  printf("matrix_type = %2d (%s), computation = %e sec, %8.3f MFLOPS, 2-norm = %Le\n",matrix_type,lis_storagename2[matrix_type-1],comptime,flops,val);
#endif
#else
#ifdef _LONGLONG
	  printf("matrix_type = %2lld (%s), computation = %e sec, %8.3f MFLOPS, 2-norm = %e\n",matrix_type,lis_storagename2[matrix_type-1],comptime,flops,val);
#else
	  printf("matrix_type = %2d (%s), computation = %e sec, %8.3f MFLOPS, 2-norm = %e\n",matrix_type,lis_storagename2[matrix_type-1],comptime,flops,val);
#endif
#endif
#endif
	}
      lis_matrix_destroy(A);
    }

  lis_matrix_destroy(A0);
  lis_vector_destroy(b);
  lis_vector_destroy(x);

  lis_finalize();

  LIS_DEBUG_FUNC_OUT;

  return 0;
}
LIS_INT lis_esi_quad(LIS_ESOLVER esolver)
{
  LIS_MATRIX        A;
  LIS_VECTOR        x, Ax;
  LIS_SCALAR        xAx, xx, mu, lshift;
  LIS_INT               ss;
  LIS_INT               emaxiter;
  LIS_REAL          tol;
  LIS_INT               i,j,k;
  LIS_SCALAR        evalue,dotvr;
  LIS_INT               iter,giter,output,niesolver;
  LIS_INT               nprocs,my_rank;
  LIS_REAL          nrm2,dot,resid,resid0;
  LIS_QUAD_PTR      qdot_vv, qdot_vr;
  LIS_VECTOR        *v,r,q;
  LIS_SOLVER        solver;
  LIS_PRECON        precon;
  double	    times,itimes,ptimes,p_c_times,p_i_times;
  LIS_INT		    err;
  LIS_INT           nsol, precon_type;
  char              solvername[128], preconname[128];

  LIS_DEBUG_FUNC_IN;

  A = esolver->A;
  x = esolver->x;

  ss = esolver->options[LIS_EOPTIONS_SUBSPACE];
  emaxiter = esolver->options[LIS_EOPTIONS_MAXITER];
  tol = esolver->params[LIS_EPARAMS_RESID - LIS_EOPTIONS_LEN]; 
  lshift = esolver->lshift;
  output  = esolver->options[LIS_EOPTIONS_OUTPUT];
  niesolver = esolver->options[LIS_EOPTIONS_INNER_ESOLVER];

  r = esolver->work[0];
  q = esolver->work[1];
  v = &esolver->work[2];
  Ax = esolver->work[3];

  LIS_QUAD_SCALAR_MALLOC(qdot_vv,0,1);
  LIS_QUAD_SCALAR_MALLOC(qdot_vr,1,1);

  lis_vector_set_all(1.0,r);
  lis_vector_nrm2(r, &nrm2);
  lis_vector_scale(1/nrm2,r);
	  
  switch ( niesolver )
    {
    case LIS_ESOLVER_II:
      lis_solver_create(&solver);
      lis_solver_set_option("-i bicg -p ilu -precision quad",solver);
      lis_solver_set_optionC(solver);
      lis_solver_get_solver(solver, &nsol);
      lis_solver_get_precon(solver, &precon_type);
      lis_get_solvername(nsol, solvername);
      lis_get_preconname(precon_type, preconname);
      printf("solver     : %s %d\n", solvername, nsol);
      printf("precon     : %s %d\n", preconname, precon_type);
      if( A->my_rank==0 ) printf("local shift = %e\n", lshift);
      if (lshift != 0) lis_matrix_shift_diagonal(A, lshift);
      break;
    case LIS_ESOLVER_AII:
      lis_solver_create(&solver);
      lis_solver_set_option("-i bicg -p ilu -precision quad",solver);
      lis_solver_set_optionC(solver);
      lis_solver_get_solver(solver, &nsol);
      lis_solver_get_precon(solver, &precon_type);
      lis_get_solvername(nsol, solvername);
      lis_get_preconname(precon_type, preconname);
      printf("solver     : %s %d\n", solvername, nsol);
      printf("precon     : %s %d\n", preconname, precon_type);
      if( A->my_rank==0 ) printf("local shift = %e\n", lshift);
      if (lshift != 0) lis_matrix_shift_diagonal(A, lshift);
      lis_vector_set_all(1.0,q);
      lis_solve(A, q, x, solver);
      lis_precon_create(solver, &precon);
      solver->precon = precon;
      break;
    case LIS_ESOLVER_RQI:
      lis_solver_create(&solver);
      lis_solver_set_option("-p ilu -precision quad -maxiter 10",solver);
      lis_solver_set_optionC(solver);
      lis_solver_get_solver(solver, &nsol);
      lis_solver_get_precon(solver, &precon_type);
      lis_get_solvername(nsol, solvername);
      lis_get_preconname(precon_type, preconname);
      printf("solver     : %s %d\n", solvername, nsol);
      printf("precon     : %s %d\n", preconname, precon_type);
      if( A->my_rank==0 ) printf("local shift = %e\n", lshift);
      if (lshift != 0) lis_matrix_shift_diagonal(A, lshift);
      break;
    }

  giter=0;
  j=0;
  while (j<ss)
    {
      lis_vector_duplicate(A,&esolver->evector[j]); 
      j = j+1;
      lis_vector_copy(r, v[j]);

      if (niesolver==LIS_ESOLVER_II || niesolver==LIS_ESOLVER_RQI)
	{
	  /* create preconditioner */
	  solver->A = A;
	  err = lis_precon_create(solver, &precon);
	  if( err )
	    {
	      lis_solver_work_destroy(solver);
	      solver->retcode = err;
	      return err;
	    }
	}

      if (niesolver==LIS_ESOLVER_RQI)
	{
	  lis_vector_nrm2(x, &nrm2);
	  lis_vector_scale(1/nrm2, x);
	  lis_matvec(A, x, Ax);
	  lis_vector_dot(x, Ax, &xAx);
	  lis_vector_dot(x, x, &xx);
	  mu = xAx / xx;
	}

      iter = 0;
      while (iter<emaxiter)
	{
	  /* diagonalization */
	  iter = iter+1;
	  giter = giter+1;
	  for (k=1;k<j;k++)
	    { 
	      lis_vector_dotex_mmm(v[j], v[k], &qdot_vv);
	      lis_quad_minus((LIS_QUAD *)qdot_vv.hi);
	      lis_vector_axpyex_mmm(qdot_vv,v[k],v[j]);
	    }

	  switch( niesolver )
	    {
	    case LIS_ESOLVER_PI:
	      lis_matvec(A,v[j],r); 
	      break;
	    case LIS_ESOLVER_II:
	      lis_solve_kernel(A, v[j], r, solver, precon);
	      break;
	    case LIS_ESOLVER_AII:
	      lis_psolve(solver, v[j], r); 
	      break;
	    case LIS_ESOLVER_RQI:
	      lis_vector_nrm2(v[j], &nrm2);
	      lis_vector_scale(1/nrm2, v[j]);
	      lis_matrix_shift_diagonal(A, -mu);
	      lis_solve_kernel(A, v[j], r, solver, precon);
	      lis_matrix_shift_diagonal(A, mu);
	      break;
	    }

	  if ( j==1 && ( niesolver==LIS_ESOLVER_II || niesolver==LIS_ESOLVER_AII || niesolver==LIS_ESOLVER_RQI ))
	    {
	      lis_solver_get_timeex(solver,&times,&itimes,&ptimes,&p_c_times,&p_i_times);
	      esolver->ptimes += solver->ptimes;
	      esolver->itimes += solver->itimes;
	      esolver->p_c_times += solver->p_c_times;
	      esolver->p_i_times += solver->p_i_times;
	    }

	  lis_vector_nrm2(r, &nrm2);
	  lis_vector_dotex_mmm(v[j], r, &qdot_vr);
	  lis_quad_minus((LIS_QUAD *)qdot_vr.hi);
	  lis_vector_axpyzex_mmmm(qdot_vr,v[j],r,q);
	  lis_quad_minus((LIS_QUAD *)qdot_vr.hi);	  
	  dotvr = qdot_vr.hi[0];
	  mu = mu + 1/dotvr;

	  lis_vector_nrm2(q, &resid);
	  resid = fabs(resid / dotvr);
	  lis_vector_scale(1/nrm2,r);
	  lis_vector_copy(r, v[j]);
	  if ( j==1 ) 
	    {
	      if( output & LIS_PRINT_MEM ) esolver->residual[iter] = resid; 
	      if( output & LIS_PRINT_OUT ) printf("iter: %5d  residual = %e\n", iter, resid);
	      esolver->iter = iter;
	      esolver->resid = resid;
	    }
	  if (tol>resid) break;
	}

      if (niesolver==LIS_ESOLVER_II || niesolver==LIS_ESOLVER_RQI)
	{
	  lis_precon_destroy(precon);
	}

      switch ( niesolver )
	{
	case LIS_ESOLVER_PI:
	  esolver->evalue[j-1] = dotvr;
	  break;
	case LIS_ESOLVER_II:
	  esolver->evalue[j-1] = 1/dotvr;
	  break;
	case LIS_ESOLVER_AII:
	  esolver->evalue[j-1] = 1/dotvr;
	  break;
	case LIS_ESOLVER_RQI:
	  esolver->evalue[j-1] = mu;
	  break;
	}
      lis_vector_copy(v[j], esolver->evector[j-1]);  

      if (A->my_rank==0 && ss>1)
	{
#ifdef _LONGLONG
	  printf("Subspace: mode number              = %lld\n", j-1);
#else
	  printf("Subspace: mode number              = %d\n", j-1);
#endif
	  printf("Subspace: eigenvalue               = %e\n", esolver->evalue[j-1]);
#ifdef _LONGLONG
	  printf("Subspace: number of iterations     = %lld\n",iter);
#else
	  printf("Subspace: number of iterations     = %d\n",iter);
#endif
	  printf("Subspace: relative residual 2-norm = %e\n",resid);
	}
    }
  
  lis_vector_copy(esolver->evector[esolver->options[LIS_EOPTIONS_MODE]], esolver->x);

  switch ( niesolver )
    {
    case LIS_ESOLVER_II:
      if (lshift != 0) lis_matrix_shift_diagonal(A, -lshift);
      lis_solver_destroy(solver);
      break;
    case LIS_ESOLVER_AII:
      if (lshift != 0) lis_matrix_shift_diagonal(A, -lshift);
      lis_precon_destroy(precon);
      lis_solver_destroy(solver);
      break;
    case LIS_ESOLVER_RQI:
      if (lshift != 0) lis_matrix_shift_diagonal(A, -lshift);
      lis_solver_destroy(solver);
      break;
    }

  LIS_DEBUG_FUNC_OUT;
  return LIS_SUCCESS;
}
Exemple #15
0
LIS_INT lis_epi(LIS_ESOLVER esolver)
{
  LIS_Comm comm;
  LIS_MATRIX A;
  LIS_VECTOR v,y,q;
  LIS_SCALAR theta;
  LIS_INT emaxiter;
  LIS_REAL tol;
  LIS_INT iter,output;
  LIS_SCALAR oshift,ishift;
  LIS_REAL nrm2,resid;

  LIS_DEBUG_FUNC_IN;
  
  comm = LIS_COMM_WORLD;  

  emaxiter = esolver->options[LIS_EOPTIONS_MAXITER];
  tol = esolver->params[LIS_EPARAMS_RESID - LIS_EOPTIONS_LEN]; 
  output  = esolver->options[LIS_EOPTIONS_OUTPUT];
#ifdef _COMPLEX
  oshift = esolver->params[LIS_EPARAMS_SHIFT - LIS_EOPTIONS_LEN] + esolver->params[LIS_EPARAMS_SHIFT_IM - LIS_EOPTIONS_LEN] * _Complex_I;
#else
  oshift = esolver->params[LIS_EPARAMS_SHIFT - LIS_EOPTIONS_LEN];
#endif	

  A = esolver->A;
  v = esolver->x;
  if (esolver->options[LIS_EOPTIONS_INITGUESS_ONES] ) 
    {
      lis_vector_set_all(1.0,v);
    }
  y = esolver->work[0];
  q = esolver->work[1];

  if ( esolver->ishift != 0.0 ) oshift = ishift;  
  if ( oshift != 0.0 ) lis_matrix_shift_diagonal(A, oshift);  

  if( output )
    {
#ifdef _COMPLEX
      lis_printf(comm,"shift                 : (%e, %e)\n", (double)creal(oshift), (double)cimag(oshift));
#else
      lis_printf(comm,"shift                 : %e\n", (double)oshift);
#endif
    }

  iter=0;
  while (iter<emaxiter)
    {
      iter = iter+1;

      /* v = v / ||v||_2 */
      lis_vector_nrm2(v, &nrm2);
      lis_vector_scale(1.0/nrm2, v);

      /* y = A * v */
      lis_matvec(A,v,y);

      /* theta = <v,y> */
      lis_vector_dot(v, y, &theta);   

      /* resid = ||y - theta * v||_2 / |theta| */
      lis_vector_axpyz(-theta, v, y, q); 
      lis_vector_nrm2(q, &resid); 
      resid = resid / fabs(theta);

      /* v = y */
      lis_vector_copy(y, v);

      /* convergence check */
      if( output )
	{
	  if( output & LIS_EPRINT_MEM ) esolver->rhistory[iter] = resid;
	  if( output & LIS_EPRINT_OUT ) lis_print_rhistory(comm,iter,resid);
	}

      if( tol >= resid )
	{
	  esolver->retcode    = LIS_SUCCESS;
	  esolver->iter[0]    = iter;
	  esolver->resid[0]   = resid;
	  esolver->evalue[0]  = theta + oshift;
	  lis_vector_nrm2(v, &nrm2);
	  lis_vector_scale(1.0/nrm2, v);
	  if ( oshift != 0.0 ) lis_matrix_shift_diagonal(A, -oshift);	  
	  LIS_DEBUG_FUNC_OUT;
	  return LIS_SUCCESS;
	}
    }

  esolver->retcode   = LIS_MAXITER;
  esolver->iter[0]   = iter;
  esolver->resid[0]  = resid;
  esolver->evalue[0] = theta + oshift;
  lis_vector_nrm2(v, &nrm2);
  lis_vector_scale(1.0/nrm2, v);
  if ( oshift != 0.0 ) lis_matrix_shift_diagonal(A, -oshift);  
  LIS_DEBUG_FUNC_OUT;
  return LIS_MAXITER;
}
Exemple #16
0
LIS_INT main(LIS_INT argc, char* argv[])
{
  LIS_MATRIX A,A0;
  LIS_VECTOR b,x;
  LIS_SCALAR *value;
  LIS_INT nprocs,my_rank;
  int int_nprocs,int_my_rank;
  LIS_INT nthreads,maxthreads;
  LIS_INT gn,nnz,np;
  LIS_INT i,j,k,si,sj,sk,ii,jj,ctr;
  LIS_INT l,m,n,nn;
  LIS_INT is,ie;
  LIS_INT err,iter,matrix_type,storage,ss,se;
  LIS_INT *ptr,*index;
  double time,time2,nnzs,nnzap,nnzt;
  LIS_SCALAR val;
  double commtime,comptime,flops;

  LIS_DEBUG_FUNC_IN;
    
  lis_initialize(&argc, &argv);

#ifdef USE_MPI
  MPI_Comm_size(MPI_COMM_WORLD,&int_nprocs);
  MPI_Comm_rank(MPI_COMM_WORLD,&int_my_rank);
  nprocs = int_nprocs;
  my_rank = int_my_rank;
#else
  nprocs  = 1;
  my_rank = 0;
#endif

  if( argc < 5 )
    {
      if( my_rank==0 ) 
	{
	  printf("Usage: %s l m n iter [matrix_type]\n", argv[0]);
	}
      CHKERR(1);
    }

  l  = atoi(argv[1]);
  m  = atoi(argv[2]);
  n  = atoi(argv[3]);
  iter = atoi(argv[4]);
  if (argv[5] == NULL) {
    storage = 0;
  }
  else {
    storage = atoi(argv[5]);
  }

  if( iter<=0 )
    {
#ifdef _LONG__LONG
      if( my_rank==0 ) printf("iter=%lld <= 0\n",iter);
#else
      if( my_rank==0 ) printf("iter=%d <= 0\n",iter);
#endif
      CHKERR(1);
    }
  if( l<=0 || m<=0 || n<=0 )
    {
#ifdef _LONG__LONG
      if( my_rank==0 ) printf("l=%lld <=0, m=%lld <=0 or n=%lld <=0\n",l,m,n);
#else
      if( my_rank==0 ) printf("l=%d <=0, m=%d <=0 or n=%d <=0\n",l,m,n);
#endif
      CHKERR(1);
    }
  if( storage<0 || storage>11 )
    {
#ifdef _LONG__LONG
      if( my_rank==0 ) printf("matrix_type=%lld < 0 or matrix_type=%lld > 11\n",storage,storage);
#else
      if( my_rank==0 ) printf("matrix_type=%d < 0 or matrix_type=%d > 11\n",storage,storage);
#endif
      CHKERR(1);
    }

  if( my_rank==0 )
    {
      printf("\n");
#ifdef _LONG__LONG
      printf("number of processes = %lld\n",nprocs);
#else
      printf("number of processes = %d\n",nprocs);
#endif
    }

#ifdef _OPENMP
  nthreads = omp_get_num_procs();
  maxthreads = omp_get_max_threads();
  if( my_rank==0 )
    {
#ifdef _LONG__LONG
      printf("max number of threads = %lld\n", nthreads);
      printf("number of threads = %lld\n", maxthreads);
#else
      printf("max number of threads = %d\n", nthreads);
      printf("number of threads = %d\n", maxthreads);
#endif
    }
#else
  nthreads = 1;
  maxthreads = 1;
#endif

  /* create matrix and vectors */
  nn = l*m*n;
  err = lis_matrix_create(LIS_COMM_WORLD,&A0);
  err = lis_matrix_set_size(A0,0,nn);
  CHKERR(err);

  ptr   = (LIS_INT *)malloc((A0->n+1)*sizeof(LIS_INT));
  if( ptr==NULL ) CHKERR(1);
  index = (LIS_INT *)malloc(27*A0->n*sizeof(LIS_INT));
  if( index==NULL ) CHKERR(1);
  value = (LIS_SCALAR *)malloc(27*A0->n*sizeof(LIS_SCALAR));
  if( value==NULL ) CHKERR(1);

  lis_matrix_get_range(A0,&is,&ie);
  ctr = 0;
  for(ii=is;ii<ie;ii++)
    {
      i = ii/(m*n);
      j = (ii - i*m*n)/n;
      k = ii - i*m*n - j*n;
      for(si=-1;si<=1;si++) {
	if( i+si>-1 && i+si<l ) {
	  for(sj=-1;sj<=1;sj++) {
	    if( j+sj>-1 && j+sj<m ) {
	      for(sk=-1;sk<=1;sk++) {
		if( k+sk>-1 && k+sk<n ) {
		  jj = ii + si*m*n + sj*n + sk; 
		  index[ctr] = jj; 
		  if( jj==ii ) { value[ctr++] = 26.0;}
		  else { value[ctr++] = -1.0;}
		}
	      }
	    }
	  }
	}
      }
      ptr[ii-is+1] = ctr;
    }
  ptr[0] = 0;
  err = lis_matrix_set_csr(ptr[ie-is],ptr,index,value,A0);
  CHKERR(err);
  err = lis_matrix_assemble(A0);
  CHKERR(err);

  n   = A0->n;
  gn  = A0->gn;
  nnz = A0->nnz;
  np  = A0->np-n;

#ifdef USE_MPI
  MPI_Allreduce(&nnz,&i,1,LIS_MPI_INT,MPI_SUM,A0->comm);
  nnzap = (double)i / (double)nprocs;
  nnzt  = ((double)nnz -nnzap)*((double)nnz -nnzap);
  nnz   = i;
  MPI_Allreduce(&nnzt,&nnzs,1,MPI_DOUBLE,MPI_SUM,A0->comm);
  nnzs  = (nnzs / (double)nprocs)/nnzap;
  MPI_Allreduce(&np,&i,1,LIS_MPI_INT,MPI_SUM,A0->comm);
  np = i;
#endif

  if( my_rank==0 ) 
    {
#ifdef _LONG__LONG
      printf("matrix size = %lld x %lld (%lld nonzero entries)\n",gn,gn,nnz);
      printf("number of iterations = %lld\n\n",iter);
#else
      printf("matrix size = %d x %d (%d nonzero entries)\n",gn,gn,nnz);
      printf("number of iterations = %d\n\n",iter);
#endif
    }

  err = lis_vector_duplicate(A0,&x);
  if( err ) CHKERR(err);
  err = lis_vector_duplicate(A0,&b);
  if( err ) CHKERR(err);

  lis_matrix_get_range(A0,&is,&ie);
  for(i=0;i<n;i++)
    {
      err = lis_vector_set_value(LIS_INS_VALUE,i+is,1.0,x);
    }
  for(i=0;i<n;i++)
    {
      lis_sort_id(A0->ptr[i],A0->ptr[i+1]-1,A0->index,A0->value);
    }
		
  /* 
     MPI version of VBR is not implemented.
     DNS is also excluded to reduce memory usage.
  */

  if (storage==0) 
    {
      ss = 1;
      se = 11;
    }
  else
    {
      ss = storage;
      se = storage+1;
    }
	
  for (matrix_type=ss;matrix_type<se;matrix_type++)
    {
      if ( nprocs>1 && matrix_type==9 ) continue;
      lis_matrix_duplicate(A0,&A);
      lis_matrix_set_type(A,matrix_type);
      err = lis_matrix_convert(A0,A);
      if( err ) CHKERR(err);
		    
      comptime = 0.0;
      commtime = 0.0;

      for(i=0;i<iter;i++)
	{
#ifdef USE_MPI
	  MPI_Barrier(A->comm);
	  time = lis_wtime();
	  lis_send_recv(A->commtable,x->value);
	  commtime += lis_wtime() - time;
#endif
	  time2 = lis_wtime();
	  lis_matvec(A,x,b);
	  comptime += lis_wtime() - time2;
	}
      lis_vector_nrm2(b,&val);

      if( my_rank==0 )
	{
	  flops = 2.0*nnz*iter*1.0e-6 / comptime;
#ifdef USE_MPI
#ifdef _LONG__DOUBLE
#ifdef _LONG__LONG
	  printf("matrix_type = %2lld (%s), computation = %e sec, %8.3f MFLOPS, communication = %e sec, communication/computation = %3.3f %%, 2-norm = %Le\n",matrix_type,lis_storagename2[matrix_type-1],comptime,flops,commtime,commtime/comptime*100,val);
#else
	  printf("matrix_type = %2d (%s), computation = %e sec, %8.3f MFLOPS, communication = %e sec, communication/computation = %3.3f %%, 2-norm = %Le\n",matrix_type,lis_storagename2[matrix_type-1],comptime,flops,commtime,commtime/comptime*100,val);
#endif
#else
#ifdef _LONG__LONG
	  printf("matrix_type = %2lld (%s), computation = %e sec, %8.3f MFLOPS, communication = %e sec, communication/computation = %3.3f %%, 2-norm = %e\n",matrix_type,lis_storagename2[matrix_type-1],comptime,flops,commtime,commtime/comptime*100,val);
#else
	  printf("matrix_type = %2d (%s), computation = %e sec, %8.3f MFLOPS, communication = %e sec, communication/computation = %3.3f %%, 2-norm = %e\n",matrix_type,lis_storagename2[matrix_type-1],comptime,flops,commtime,commtime/comptime*100,val);
#endif
#endif
#else
#ifdef _LONG__DOUBLE
#ifdef _LONG__LONG
	  printf("matrix_type = %2lld (%s), computation = %e sec, %8.3f MFLOPS, 2-norm = %Le\n",matrix_type,lis_storagename2[matrix_type-1],comptime,flops,val);
#else
	  printf("matrix_type = %2d (%s), computation = %e sec, %8.3f MFLOPS, 2-norm = %Le\n",matrix_type,lis_storagename2[matrix_type-1],comptime,flops,val);
#endif
#else
#ifdef _LONG__LONG
	  printf("matrix_type = %2lld (%s), computation = %e sec, %8.3f MFLOPS, 2-norm = %e\n",matrix_type,lis_storagename2[matrix_type-1],comptime,flops,val);
#else
	  printf("matrix_type = %2d (%s), computation = %e sec, %8.3f MFLOPS, 2-norm = %e\n",matrix_type,lis_storagename2[matrix_type-1],comptime,flops,val);
#endif
#endif
#endif
	}
      lis_matrix_destroy(A);
    }

  lis_matrix_destroy(A0);
  lis_vector_destroy(b);
  lis_vector_destroy(x);

  lis_finalize();

  LIS_DEBUG_FUNC_OUT;

  return 0;
}
Exemple #17
0
LIS_INT lis_egpi(LIS_ESOLVER esolver)
{
  LIS_Comm comm;
  LIS_MATRIX A,B;
  LIS_VECTOR w,v,y,q;
  LIS_SCALAR eta,theta;
  LIS_INT emaxiter;
  LIS_REAL tol;
  LIS_INT iter,iter2,output;
  LIS_SCALAR oshift,ishift;
  LIS_REAL nrm2,resid;
  LIS_SOLVER solver;
  double time,itime,ptime,p_c_time,p_i_time;

  LIS_INT err;
  LIS_PRECON precon;
  LIS_INT nsol, precon_type;
  char solvername[128], preconname[128];

  LIS_DEBUG_FUNC_IN;

  comm = LIS_COMM_WORLD;  

  emaxiter = esolver->options[LIS_EOPTIONS_MAXITER];
  tol = esolver->params[LIS_EPARAMS_RESID - LIS_EOPTIONS_LEN]; 
  output  = esolver->options[LIS_EOPTIONS_OUTPUT];
#ifdef _COMPLEX
  oshift = esolver->params[LIS_EPARAMS_SHIFT - LIS_EOPTIONS_LEN] + esolver->params[LIS_EPARAMS_SHIFT_IM - LIS_EOPTIONS_LEN] * _Complex_I;
#else
  oshift = esolver->params[LIS_EPARAMS_SHIFT - LIS_EOPTIONS_LEN];
#endif	
  
  A = esolver->A;
  B = esolver->B;  
  v = esolver->x;
  if (esolver->options[LIS_EOPTIONS_INITGUESS_ONES] ) 
    {
      lis_vector_set_all(1.0,v);
    }
  w = esolver->work[0];  
  y = esolver->work[1];
  q = esolver->work[2];

  if ( esolver->ishift != 0.0 ) oshift = ishift;
  if ( oshift != 0.0 ) lis_matrix_shift_matrix(A, B, oshift);

  if( output )
    {
#ifdef _COMPLEX
      lis_printf(comm,"shift                 : (%e, %e)\n", (double)creal(oshift), (double)cimag(oshift));
#else
      lis_printf(comm,"shift                 : %e\n", (double)oshift);
#endif
    }
  
  lis_solver_create(&solver);
  lis_solver_set_option("-i bicg -p none",solver);
  err = lis_solver_set_optionC(solver);
  CHKERR(err);
  lis_solver_get_solver(solver, &nsol);
  lis_solver_get_precon(solver, &precon_type);
  lis_solver_get_solvername(nsol, solvername);
  lis_solver_get_preconname(precon_type, preconname);
  if( output )
    {
      lis_printf(comm,"linear solver         : %s\n", solvername);
      lis_printf(comm,"preconditioner        : %s\n", preconname);
    }

  /* create preconditioner */
  solver->A = B;
  err = lis_precon_create(solver, &precon);
  if( err )
    {
      lis_solver_work_destroy(solver);
      solver->retcode = err;
      return err;
    }

  iter=0;
  while (iter<emaxiter)
    {
      iter = iter+1;

      /* v = v / ||v||_2 */
      lis_vector_nrm2(v, &nrm2);
      lis_vector_scale(1.0/nrm2, v);

      /* w = A * v */
      lis_matvec(A, v, w);

      /* v = v / <v,w>^1/2, w = w / <v,w>^1/2 */
      lis_vector_dot(v, w, &eta);
      eta = sqrt(eta);
      lis_vector_scale(1.0/eta, v);
      lis_vector_scale(1.0/eta, w);

      /* y = B^-1 * w */
      err = lis_solve_kernel(B, w, y, solver, precon);
      if( err )
	{
	  lis_solver_work_destroy(solver);	  
	  solver->retcode = err;
	  return err;
	}
      lis_solver_get_iter(solver, &iter2);

      /* theta = <w,y> */
      lis_vector_dot(w, y, &theta); 

      /* resid = ||y - theta * v||_2 / |theta| */
      lis_vector_axpyz(-theta, v, y, q); 
      lis_vector_nrm2(q, &resid); 
      resid = resid / fabs(theta);

      /* v = y */
      lis_vector_copy(y, v);

      /* convergence check */
      lis_solver_get_timeex(solver,&time,&itime,&ptime,&p_c_time,&p_i_time);
      esolver->ptime += solver->ptime;
      esolver->itime += solver->itime;
      esolver->p_c_time += solver->p_c_time;
      esolver->p_i_time += solver->p_i_time;

      if( output )
	{
	  if( output & LIS_EPRINT_MEM ) esolver->rhistory[iter] = resid;
	  if( output & LIS_EPRINT_OUT ) lis_print_rhistory(comm,iter,resid);
	}

      if( tol >= resid )
	{
	  esolver->retcode    = LIS_SUCCESS;
	  esolver->iter[0]    = iter;
	  esolver->resid[0]   = resid;
	  esolver->evalue[0]  = theta + oshift;
	  lis_vector_nrm2(v, &nrm2);
	  lis_vector_scale(1.0/nrm2, v);
	  if ( oshift != 0.0 ) lis_matrix_shift_matrix(A, B, -oshift);	  
	  lis_precon_destroy(precon);
	  lis_solver_destroy(solver); 
	  LIS_DEBUG_FUNC_OUT;
	  return LIS_SUCCESS;
	}
    }

  lis_precon_destroy(precon);

  esolver->retcode   = LIS_MAXITER;
  esolver->iter[0]   = iter;
  esolver->resid[0]  = resid;
  esolver->evalue[0] = theta + oshift;
  lis_vector_nrm2(v, &nrm2);
  lis_vector_scale(1.0/nrm2, v);
  if ( oshift != 0.0 ) lis_matrix_shift_matrix(A, B, -oshift);  
  lis_solver_destroy(solver); 
  LIS_DEBUG_FUNC_OUT;
  return LIS_MAXITER;
}
LIS_INT lis_solve_kernel(LIS_MATRIX A, LIS_VECTOR b, LIS_VECTOR x, LIS_SOLVER solver, LIS_PRECON precon)
{
	LIS_INT			nsolver, precon_type, maxiter;
	LIS_INT			err;
	LIS_SCALAR	*residual;
	LIS_VECTOR	xx;

	LIS_INT output;
	LIS_INT scale;
	LIS_INT conv_cond;
	LIS_INT precision,is_use_at,storage,block;
	LIS_INT i,n,np;
	double p_c_times, p_i_times,itimes;
	LIS_SCALAR nrm2,tol,tol_w;
	LIS_VECTOR t;
	LIS_VECTOR bb;
	LIS_MATRIX AA,B;
	LIS_MATRIX At;
	char buf[64];

	LIS_DEBUG_FUNC_IN;

	nsolver     = solver->options[LIS_OPTIONS_SOLVER];
	precon_type = solver->options[LIS_OPTIONS_PRECON];
	maxiter     = solver->options[LIS_OPTIONS_MAXITER];
	output      = solver->options[LIS_OPTIONS_OUTPUT];
	scale       = solver->options[LIS_OPTIONS_SCALE];
	precision   = solver->options[LIS_OPTIONS_PRECISION];
	is_use_at   = solver->options[LIS_OPTIONS_USE_AT];
	storage     = solver->options[LIS_OPTIONS_STORAGE];
	block       = solver->options[LIS_OPTIONS_STORAGE_BLOCK];
	conv_cond   = solver->options[LIS_OPTIONS_CONV_COND];
	tol         = solver->params[LIS_PARAMS_RESID-LIS_OPTIONS_LEN];
	tol_w       = solver->params[LIS_PARAMS_RESID_WEIGHT-LIS_OPTIONS_LEN];
	solver->precision = precision;

	if( nsolver < 1 || nsolver > LIS_SOLVERS_LEN )
	{
		LIS_SETERR2(LIS_ERR_ILL_ARG,"Parameter LIS_OPTIONS_SOLVER is %d (Set between 1 to %d)\n",nsolver, LIS_SOLVERS_LEN);
		return LIS_ERR_ILL_ARG;
	}
	if( precon_type < 0 || precon_type > precon_register_type )
	{
		LIS_SETERR2(LIS_ERR_ILL_ARG,"Parameter LIS_OPTIONS_PRECON is %d (Set between 0 to %d)\n",precon_type, precon_register_type-1);
		return LIS_ERR_ILL_ARG;
	}
	if( maxiter<0 )
	{
		LIS_SETERR1(LIS_ERR_ILL_ARG,"Parameter LIS_OPTIONS_MAXITER(=%d) is less than 0\n",maxiter);
		return LIS_ERR_ILL_ARG;
	}
	#ifdef USE_MPI
	if( precon_type == LIS_PRECON_TYPE_SAAMG  && solver->A->nprocs < 2)
	{
		LIS_SETERR1(LIS_ERR_ILL_ARG,"Parameter A->nprocs (=%d) is less than 2 (Set more than 1 when using parallel version of SAAMG)\n",solver->A->nprocs);
		return LIS_ERR_ILL_ARG;
	}
	#endif
	#ifdef USE_QUAD_PRECISION
		if( precision==LIS_PRECISION_QUAD && lis_solver_execute_quad[nsolver]==NULL )
		{
			LIS_SETERR1(LIS_ERR_NOT_IMPLEMENTED,"Quad precision solver %s is not implemented\n",lis_solvername[nsolver]);
			return LIS_ERR_NOT_IMPLEMENTED;
		}
		else if( precision==LIS_PRECISION_SWITCH && lis_solver_execute_switch[nsolver]==NULL )
		{
			LIS_SETERR1(LIS_ERR_NOT_IMPLEMENTED,"Switch solver %s is not implemented\n",lis_solvername[nsolver]);
			return LIS_ERR_NOT_IMPLEMENTED;
		}
		if( solver->options[LIS_OPTIONS_SWITCH_MAXITER]==-1 )
		{
			solver->options[LIS_OPTIONS_SWITCH_MAXITER] = maxiter;
		}
	#endif

	err = lis_solver_check_params[nsolver](solver);
	if( err )
	{
		solver->retcode = err;
		return err;
	}
	/* end parameter check */

	solver->A        = A;
	solver->b        = b;

	/* create initial vector */
	#ifndef USE_QUAD_PRECISION
		err = lis_vector_duplicate(A,&xx);
	#else
		if( precision==LIS_PRECISION_DOUBLE )
		{
			err = lis_vector_duplicate(A,&xx);
		}
		else
		{
			err = lis_vector_duplicateex(LIS_PRECISION_QUAD,A,&xx);
		}
	#endif
	if( err )
	{
		solver->retcode = err;
		return err;
	}
	if( solver->options[LIS_OPTIONS_INITGUESS_ZEROS] )
	{
	  if( output ) lis_printf(A->comm,"initial vector x = 0\n");
		#ifndef USE_QUAD_PRECISION
			lis_vector_set_all(0.0,xx);
		#else
			if( precision==LIS_PRECISION_DOUBLE )
			{
				lis_vector_set_all(0.0,xx);
			}
			else
			{
				lis_vector_set_allex_nm(0.0,xx);
			}
		#endif
	}
	else
	{
	  if( output ) lis_printf(A->comm,"initial vector x = user defined\n"); 
		#ifndef USE_QUAD_PRECISION
			lis_vector_copy(x,xx);
		#else
			if( precision==LIS_PRECISION_DOUBLE )
			{
				lis_vector_copy(x,xx);
			}
			else
			{
				lis_vector_copyex_nm(x,xx);
			}
		#endif
	}

	/* create residual history vector */
	if( solver->residual ) lis_free(solver->residual);
	residual = (LIS_SCALAR *)lis_malloc((maxiter+2)*sizeof(LIS_SCALAR),"lis_solve::residual");
	if( residual==NULL )
	{
		LIS_SETERR_MEM((maxiter+2)*sizeof(LIS_SCALAR));
		lis_vector_destroy(xx);
		solver->retcode = err;
		return err;
	}
	residual[0] = 1.0;


	n       = A->n;
	np      = A->np;
	t       = NULL;
	At      = NULL;


	p_c_times = lis_wtime();
	if( precon_type==LIS_PRECON_TYPE_IS )
	{
		if( solver->d==NULL )
		{
			err = lis_vector_duplicate(A,&solver->d);
			if( err )
			{
				return err;
			}
		}
		if( !A->is_scaled )
		{
			lis_matrix_scaling(A,b,solver->d,LIS_SCALE_JACOBI);
		}
		else if( !b->is_scaled )
		{
			#ifdef _OPENMP
			#pragma omp parallel for
			#endif
			for(i=0;i<n;i++)
			{
				b->value[i] = b->value[i]*solver->d->value[i];
			}
		}
		if( nsolver >= LIS_SOLVER_JACOBI && nsolver <= LIS_SOLVER_SOR )
		{
			solver->options[LIS_OPTIONS_ISLEVEL] = 0;
		}
	}
	else if( nsolver >= LIS_SOLVER_JACOBI && nsolver <= LIS_SOLVER_SOR && precon_type!=LIS_PRECON_TYPE_NONE )
	{
		if( solver->d==NULL )
		{
			err = lis_vector_duplicate(A,&solver->d);
			if( err )
			{
				return err;
			}
		}
		if( !A->is_scaled )
		{
			lis_matrix_scaling(A,b,solver->d,LIS_SCALE_JACOBI);
		}
	}
	else if( scale )
	{
		if( storage==LIS_MATRIX_BSR && scale==LIS_SCALE_JACOBI )
		{
			if( A->matrix_type!=LIS_MATRIX_BSR )
			{
				err = lis_matrix_duplicate(A,&B);
				if( err ) return err;
				lis_matrix_set_blocksize(B,block,block,NULL,NULL);
				lis_matrix_set_type(B,storage);
				err = lis_matrix_convert(A,B);
				if( err ) return err;
				lis_matrix_storage_destroy(A);
				lis_matrix_DLU_destroy(A);
				lis_matrix_diag_destroy(A->WD);
				if( A->l2g_map ) lis_free( A->l2g_map );
				if( A->commtable ) lis_commtable_destroy( A->commtable );
				if( A->ranges ) lis_free( A->ranges );
				err = lis_matrix_copy_struct(B,A);
				if( err ) return err;
				lis_free(B);
			}
			err = lis_matrix_split(A);
			if( err ) return err;
			err = lis_matrix_diag_duplicate(A->D,&solver->WD);
			if( err ) return err;
			lis_matrix_diag_copy(A->D,solver->WD);
			lis_matrix_diag_inverse(solver->WD);
			lis_matrix_bscaling_bsr(A,solver->WD);
			lis_vector_duplicate(A,&t);
			lis_matrix_diag_matvec(solver->WD,b,t);
			lis_vector_copy(t,b);
			lis_vector_destroy(t);
			t = NULL;
		}
		else
		{
			if( solver->d==NULL )
			{
				err = lis_vector_duplicate(A,&solver->d);
				if( err )
				{
					return err;
				}
			}
			if( scale==LIS_SCALE_JACOBI && nsolver==LIS_SOLVER_CG )
			{
				scale = LIS_SCALE_SYMM_DIAG;
			}
			if( !A->is_scaled )
			{
				lis_matrix_scaling(A,b,solver->d,scale);
			}
			else if( !b->is_scaled )
			{
				#ifdef _OPENMP
				#pragma omp parallel for
				#endif
				for(i=0;i<n;i++)
				{
					b->value[i] = b->value[i]*solver->d->value[i];
				}
			}
		}
	}

/*	precon_type = precon->precon_type;*/
	if( precon_type==LIS_PRECON_TYPE_IS )
	{
		if( nsolver < LIS_SOLVER_JACOBI || nsolver > LIS_SOLVER_SOR )
		{
			AA = solver->A;
			bb = solver->b;
		}
		else
		{
			AA = precon->A;
			bb = precon->Pb;
		}
	}
	else
	{
		AA = A;
		bb = b;
	}

	p_c_times = lis_wtime() - p_c_times;
	itimes = lis_wtime();

	/* Matrix Convert */
	solver->A  = AA;
	solver->b  = bb;
	err = lis_matrix_convert_self(solver);
	if( err )
	{
		lis_vector_destroy(xx);
		lis_solver_work_destroy(solver);
		lis_free(residual);
		solver->retcode = err;
		return err;
	}
	block = solver->A->bnr;

	if( A->my_rank==0 )
	{
	  if( output ) printf("precision : %s\n", lis_precisionname[precision]); 
	  if( output ) printf("solver    : %s %d\n", lis_solvername[nsolver],nsolver); 
		switch( precon_type )
		{
		case LIS_PRECON_TYPE_ILU:
			i = solver->options[LIS_OPTIONS_FILL];
			if( A->matrix_type==LIS_MATRIX_BSR || A->matrix_type==LIS_MATRIX_VBR )
			{
			  if( output ) sprintf(buf,"Block %s(%d)",lis_preconname[precon_type],i); 
			}
			else
			{
			  if( output ) sprintf(buf,"%s(%d)",lis_preconname[precon_type],i); 
			}
			break;
		default:
		  if( output ) sprintf(buf,"%s",lis_preconname[precon_type]); 
			break;
		}
		if( solver->options[LIS_OPTIONS_ADDS] && precon_type )
		{
		  if( output ) printf("precon    : %s + additive schwarz\n", buf); 
		}
		else
		{
		  if( output ) printf("precon    : %s\n", buf); 
		}
	}
	switch(conv_cond)
	{
	case LIS_CONV_COND_NRM2_R:
	case LIS_CONV_COND_NRM2_B:
		if( A->my_rank==0 )
		{
		  if( output ) ("CONV_COND : ||r||_2 <= %6.1e*||r_0||_2\n", tol); 
		}
		break;
	case LIS_CONV_COND_NRM1_B:
		lis_vector_nrm1(b,&nrm2);
		nrm2 = nrm2*tol_w + tol;
		if( A->my_rank==0 )
		{
		  if( output ) printf("conv_cond : ||r||_1 <= %6.1e*||b||_1 + %6.1e = %6.1e\n", tol_w,tol,nrm2);
		}
		break;
	}
	if( A->my_rank==0 )
	{
		if( AA->matrix_type==LIS_MATRIX_BSR || AA->matrix_type==LIS_MATRIX_BSC )
		{
		  if( output ) printf("storage   : %s(%d x %d)\n", lis_storagename[AA->matrix_type-1],block,block); 
		}
		else
		{
		  if( output ) printf("storage   : %s\n", lis_storagename[AA->matrix_type-1]); 
		}
	}


	/* create work vector */
	err = lis_solver_malloc_work[nsolver](solver); 
	if( err )
	{
		lis_vector_destroy(xx);
		lis_precon_destroy(precon);
		solver->retcode = err;
		return err;
	}
	if( nsolver==LIS_SOLVER_BICG && is_use_at )
	{
	  if( output ) lis_printf(A->comm,"Use At\n"); 
		lis_matrix_duplicate(AA,&At);
		lis_matrix_set_type(At,LIS_USE_AT_TYPE[AA->matrix_type]);
		lis_matrix_convert(AA,At);
		solver->At = At;
	}

	solver->x        = xx;
	solver->xx       = x;
	solver->precon   = precon;
	solver->residual = residual;

	/* execute solver */
	#ifndef USE_QUAD_PRECISION
		err = lis_solver_execute[nsolver](solver);
	#else
		if( precision==LIS_PRECISION_DOUBLE )
		{
			err = lis_solver_execute[nsolver](solver);
		}
		else if( precision==LIS_PRECISION_QUAD )
		{
			err = lis_solver_execute_quad[nsolver](solver);
		}
		else if( precision==LIS_PRECISION_SWITCH )
		{
			err = lis_solver_execute_switch[nsolver](solver);
		}
	#endif
	solver->retcode = err;

	if( scale==LIS_SCALE_SYMM_DIAG && precon_type!=LIS_PRECON_TYPE_IS)
	{
		#ifdef _OPENMP
		#pragma omp parallel for
		#endif
		for(i=0;i<n;i++)
		{
			x->value[i] = xx->value[i]*solver->d->value[i];
		}
	}
	else
	{
		#ifndef USE_QUAD_PRECISION
			lis_vector_copy(xx,x);
		#else
			if( precision==LIS_PRECISION_DOUBLE )
			{
				lis_vector_copy(xx,x);
			}
			else
			{
				lis_vector_copyex_mn(xx,x);
			}
		#endif
	}
	itimes = lis_wtime() - itimes - solver->ptimes;
	p_i_times = solver->ptimes;
	solver->ptimes = p_c_times + p_i_times;
	solver->p_c_times = p_c_times;
	solver->p_i_times = p_i_times;
	solver->times  = solver->ptimes + itimes;
	solver->itimes = itimes;
	lis_solver_work_destroy(solver);
	lis_vector_duplicate(A,&t);
	xx->precision = LIS_PRECISION_DEFAULT;
	lis_matvec(A,xx,t);
	lis_vector_xpay(b,-1.0,t);
	if( scale==LIS_SCALE_SYMM_DIAG && precon_type!=LIS_PRECON_TYPE_IS)
	{
		#ifdef _OPENMP
		#pragma omp parallel for
		#endif
		for(i=0;i<n;i++)
		{
			t->value[i] = t->value[i]/solver->d->value[i];
		}
	}
	lis_vector_nrm2(t,&nrm2);
	/*
	solver->resid = nrm2;
	*/
	if( A->my_rank==0 )
	{
		if( err )
		{
		  if( output ) printf("lis_solve : %s(code=%d)\n\n",lis_returncode[err],err); 

		}
		else
		{
		  if( output ) printf("lis_solve : normal end\n\n"); 
		}
	}
	if( precision==LIS_PRECISION_DOUBLE )
	{
		solver->iter2 = solver->iter;
	}
	else if( precision==LIS_PRECISION_QUAD )
	{
		solver->iter2 = 0;
	}


	lis_vector_destroy(t);
/*	lis_vector_destroy(d);*/
	lis_vector_destroy(xx);

	LIS_DEBUG_FUNC_OUT;
	return LIS_SUCCESS;
}
Exemple #19
0
LIS_INT main(LIS_INT argc, char* argv[])
{
  LIS_MATRIX A,A0;
  LIS_VECTOR b,x;
  LIS_INT nprocs,my_rank;
  int int_nprocs,int_my_rank;
  LIS_INT nthreads, maxthreads;
  LIS_INT nnz;
  LIS_INT i,n,np;
  LIS_INT block;
  LIS_INT is,ie;
  LIS_INT err,iter,matrix_type;
  double time,time2,nnzs,nnzap,nnzt;
  LIS_SCALAR val;
  double commtime,comptime,flops;
  char path[1024];
  FILE *file;
  
  LIS_DEBUG_FUNC_IN;
    
  lis_initialize(&argc, &argv);

#ifdef USE_MPI
  MPI_Comm_size(MPI_COMM_WORLD,&int_nprocs);
  MPI_Comm_rank(MPI_COMM_WORLD,&int_my_rank);
  nprocs = int_nprocs;
  my_rank = int_my_rank;
#else
  nprocs  = 1;
  my_rank = 0;
#endif

  if( argc < 3 )
    {
      if( my_rank==0 ) 
	{
	  printf("Usage: %s matrix_filename_list iter [block] \n", argv[0]);
	}
      lis_finalize();
      exit(0);
    }

  file = fopen(argv[1], "r");
  if( file==NULL ) CHKERR(1);

  iter = atoi(argv[2]);
  if (argv[3] == NULL) {
    block = 2;
  }
  else {
    block = atoi(argv[3]);
  }

  if( iter<=0 )
    {
#ifdef _LONG__LONG
      printf("iter=%lld <= 0\n",iter);
#else
      printf("iter=%d <= 0\n",iter);
#endif
      CHKERR(1);
    }

  if( my_rank==0 )
    {
      printf("\n");
#ifdef _LONG__LONG
      printf("number of processes = %lld\n",nprocs);
#else
      printf("number of processes = %d\n",nprocs);
#endif
    }

#ifdef _OPENMP
  if( my_rank==0 )
    {
      nthreads = omp_get_num_procs();
      maxthreads = omp_get_max_threads();
#ifdef _LONG__LONG
      printf("max number of threads = %lld\n", nthreads);
      printf("number of threads = %lld\n", maxthreads);
#else
      printf("max number of threads = %d\n", nthreads);
      printf("number of threads = %d\n", maxthreads);
#endif
    }
#else
      nthreads = 1;
      maxthreads = 1;
#endif

  /* create matrix and vectors */
  while( fscanf(file, "%s\n", path)==1 )
    {

      if( my_rank==0 )
	{
	  printf("matrix_filename = %s\n", path);
	}
      lis_matrix_create(LIS_COMM_WORLD,&A0);
      err = lis_input(A0,NULL,NULL,path);
      if( err ) CHKERR(err);

      n   = A0->n;
      nnz = A0->nnz;
      np  = A0->np-n;
#ifdef USE_MPI
      MPI_Allreduce(&nnz,&i,1,LIS_MPI_INT,MPI_SUM,A0->comm);
      nnzap = (double)i / (double)nprocs;
      nnzt  = ((double)nnz -nnzap)*((double)nnz -nnzap);
      nnz   = i;
      MPI_Allreduce(&nnzt,&nnzs,1,MPI_DOUBLE,MPI_SUM,A0->comm);
      nnzs  = (nnzs / (double)nprocs)/nnzap;
      MPI_Allreduce(&np,&i,1,LIS_MPI_INT,MPI_SUM,A0->comm);
      np = i;
#endif

      if( my_rank==0 ) 
	{
#ifdef _LONG__LONG
	  printf("block size of BSR and BSC = %lld x %lld\n",block,block);
	  printf("number of iterations = %lld\n\n",iter);
#else
	  printf("block size of BSR and BSC = %d x %d\n",block,block);
	  printf("number of iterations = %d\n\n",iter);
#endif
	}

      err = lis_vector_duplicate(A0,&x);
      if( err ) CHKERR(err);
      err = lis_vector_duplicate(A0,&b);
      if( err ) CHKERR(err);

      lis_matrix_get_range(A0,&is,&ie);
      for(i=0;i<n;i++)
	{
	  err = lis_vector_set_value(LIS_INS_VALUE,i+is,1.0,x);
	}
		
      /* 
	 MPI version of VBR is not implemented.
	 DNS is also excluded to reduce memory usage.
      */

      for (matrix_type=1;matrix_type<11;matrix_type++)
	{
	  if ( nprocs>1 && matrix_type==9 ) continue;
	  lis_matrix_duplicate(A0,&A);
	  lis_matrix_set_type(A,matrix_type);
	  err = lis_matrix_convert(A0,A);
	  if( err ) CHKERR(err);

	  if( my_rank==0 ) 
	    {
	      if( A->matrix_type==LIS_MATRIX_BSR || A->matrix_type==LIS_MATRIX_BSC )
		{
		  A->bnr = block;
		  A->bnc = block;
		}
	    }

	  comptime = 0.0;
	  commtime = 0.0;

	  for(i=0;i<iter;i++)
	    {
#ifdef USE_MPI
	      MPI_Barrier(A->comm);
	      time = lis_wtime();
	      lis_send_recv(A->commtable,x->value);
	      commtime += lis_wtime() - time;
#endif
	      time2 = lis_wtime();
	      lis_matvec(A,x,b);
	      comptime += lis_wtime() - time2;
	    }
	  lis_vector_nrm2(b,&val);

	  if( my_rank==0 )
	    {
	      flops = 2.0*nnz*iter*1.0e-6 / comptime;
#ifdef USE_MPI
#ifdef _LONG__DOUBLE
#ifdef _LONG__LONG
	      printf("matrix_type = %2lld (%s), computation = %e sec, %8.3f MFLOPS, communication = %e sec, communication/computation = %3.3f %%, 2-norm = %Le\n",matrix_type,lis_storagename2[matrix_type-1],comptime,flops,commtime,commtime/comptime*100,val);
#else
	      printf("matrix_type = %2d (%s), computation = %e sec, %8.3f MFLOPS, communication = %e sec, communication/computation = %3.3f %%, 2-norm = %Le\n",matrix_type,lis_storagename2[matrix_type-1],comptime,flops,commtime,commtime/comptime*100,val);
#endif
#else
#ifdef _LONG__LONG
	      printf("matrix_type = %2lld (%s), computation = %e sec, %8.3f MFLOPS, communication = %e sec, communication/computation = %3.3f %%, 2-norm = %e\n",matrix_type,lis_storagename2[matrix_type-1],comptime,flops,commtime,commtime/comptime*100,val);
#else
	      printf("matrix_type = %2d (%s), computation = %e sec, %8.3f MFLOPS, communication = %e sec, communication/computation = %3.3f %%, 2-norm = %e\n",matrix_type,lis_storagename2[matrix_type-1],comptime,flops,commtime,commtime/comptime*100,val);
#endif
#endif
#else
#ifdef _LONG__DOUBLE
#ifdef _LONG__LONG
	      printf("matrix_type = %2lld (%s), computation = %e sec, %8.3f MFLOPS, 2-norm = %Le\n",matrix_type,lis_storagename2[matrix_type-1],comptime,flops,val);
#else
	      printf("matrix_type = %2d (%s), computation = %e sec, %8.3f MFLOPS, 2-norm = %Le\n",matrix_type,lis_storagename2[matrix_type-1],comptime,flops,val);
#endif
#else
#ifdef _LONG__LONG
	      printf("matrix_type = %2lld (%s), computation = %e sec, %8.3f MFLOPS, 2-norm = %e\n",matrix_type,lis_storagename2[matrix_type-1],comptime,flops,val);
#else
	      printf("matrix_type = %2d (%s), computation = %e sec, %8.3f MFLOPS, 2-norm = %e\n",matrix_type,lis_storagename2[matrix_type-1],comptime,flops,val);
#endif
#endif
#endif
	    }
	  lis_matrix_destroy(A);
	}
      
      lis_matrix_destroy(A0);
      lis_vector_destroy(b);
      lis_vector_destroy(x);
    }

  fclose(file);

  lis_finalize();

  LIS_DEBUG_FUNC_OUT;

  return 0;
}
Exemple #20
0
LIS_INT lis_fgmres_quad(LIS_SOLVER solver)
{
	LIS_MATRIX A;
	LIS_VECTOR b,x;
	LIS_VECTOR r,s,*z,*v;
	LIS_QUAD *h;
	LIS_QUAD_PTR aa,bb,rr,a2,b2,t,one,tmp;

	LIS_REAL bnrm2,nrm2,tol;
	LIS_INT iter,maxiter,n,output;
	double time,ptime;

	LIS_REAL rnorm;
	LIS_INT i,j,k,m;
	LIS_INT ii,i1,iiv,i1v,iih,jj;
	LIS_INT h_dim;
	LIS_INT cs,sn;

	LIS_DEBUG_FUNC_IN;

	A       = solver->A;
	b       = solver->b;
	x       = solver->x;
	n       = A->n;
	maxiter = solver->options[LIS_OPTIONS_MAXITER];
	output  = solver->options[LIS_OPTIONS_OUTPUT];
	m       = solver->options[LIS_OPTIONS_RESTART];
	h_dim   = m+1;
	ptime   = 0.0;

	s       = solver->work[0];
	r       = solver->work[1];
	z       = &solver->work[2];
	v       = &solver->work[m+2];

	h       = (LIS_QUAD *)lis_malloc( sizeof(LIS_QUAD)*(h_dim+1)*(h_dim+2),"lis_fgmres_quad::h" );
	cs      = (m+1)*h_dim;
	sn      = (m+2)*h_dim;

	LIS_QUAD_SCALAR_MALLOC(aa,0,1);
	LIS_QUAD_SCALAR_MALLOC(bb,1,1);
	LIS_QUAD_SCALAR_MALLOC(rr,2,1);
	LIS_QUAD_SCALAR_MALLOC(a2,3,1);
	LIS_QUAD_SCALAR_MALLOC(b2,4,1);
	LIS_QUAD_SCALAR_MALLOC(t,5,1);
	LIS_QUAD_SCALAR_MALLOC(tmp,6,1);
	LIS_QUAD_SCALAR_MALLOC(one,7,1);

	one.hi[0]   = 1.0;
	one.lo[0]   = 0.0;

	/* Initial Residual */
	if( lis_solver_get_initial_residual(solver,NULL,NULL,v[0],&bnrm2) )
	{
		lis_free(h);
		LIS_DEBUG_FUNC_OUT;
		return LIS_SUCCESS;
	}
	tol     = solver->tol;
	rnorm   = 1.0/bnrm2;


	iter=0;
	while( iter<maxiter )
	{
		/* first column of V */
		/* v = r / ||r||_2 */
		lis_vector_scaleex_nm(bnrm2,v[0]);

		/* s = ||r||_2 e_1 */
		lis_vector_set_allex_nm(0.0,s);
		s->value[0]    = rnorm;
		s->value_lo[0] = 0.0;

		i = 0;
		do
		{
			iter++;
			i++;
			ii  = i-1;
			i1  = i;
			iiv = i-1;
			i1v = i;
			iih = (i-1)*h_dim;


			/* z = M^-1 * v */
			time = lis_wtime();
			lis_psolve(solver,v[iiv],z[iiv]);
			ptime += lis_wtime()-time;

			/* w = A * z */
			lis_matvec(A,z[iiv], v[i1v]);

			for(k=0;k<i;k++)
			{
				/* h[k,i]   = <w,v[k]>          */
				/* w        = w - h[k,i] * v[k] */
				lis_vector_dotex_mmm(v[i1v],v[k],&t);
				h[k+iih].hi = t.hi[0];
				h[k+iih].lo = t.lo[0];
				lis_quad_minus((LIS_QUAD *)t.hi);
				lis_vector_axpyex_mmm(t,v[k],v[i1v]);
			}
			/* h[i+1,i] = ||w||          */
			/* v[i+1]   = w / h[i+1,i]   */
			lis_vector_nrm2ex_mm(v[i1v],&t);
			h[i1+iih].hi = t.hi[0];
			h[i1+iih].lo = t.lo[0];
			lis_quad_div((LIS_QUAD *)tmp.hi,(LIS_QUAD *)one.hi,(LIS_QUAD *)t.hi);
			lis_vector_scaleex_mm(tmp,v[i1v]);

			for(k=1;k<=ii;k++)
			{
				jj  = k-1;
				t.hi[0]   =  h[jj+iih].hi;
				t.lo[0]   =  h[jj+iih].lo;
				lis_quad_mul((LIS_QUAD *)aa.hi,(LIS_QUAD *)&h[jj+cs],(LIS_QUAD *)t.hi);
				lis_quad_mul((LIS_QUAD *)tmp.hi,(LIS_QUAD *)&h[jj+sn],(LIS_QUAD *)&h[k+iih]);
				lis_quad_add((LIS_QUAD *)aa.hi,(LIS_QUAD *)aa.hi,(LIS_QUAD *)tmp.hi);
				lis_quad_mul((LIS_QUAD *)bb.hi,(LIS_QUAD *)&h[jj+sn],(LIS_QUAD *)t.hi);
				lis_quad_minus((LIS_QUAD *)bb.hi);
				lis_quad_mul((LIS_QUAD *)tmp.hi,(LIS_QUAD *)&h[jj+cs],(LIS_QUAD *)&h[k+iih]);
				lis_quad_add((LIS_QUAD *)bb.hi,(LIS_QUAD *)bb.hi,(LIS_QUAD *)tmp.hi);
				h[jj+iih].hi = aa.hi[0];
				h[jj+iih].lo = aa.lo[0];
				h[k+iih].hi = bb.hi[0];
				h[k+iih].lo = bb.lo[0];
			}
			aa.hi[0] = h[ii+iih].hi;
			aa.lo[0] = h[ii+iih].lo;
			bb.hi[0] = h[i1+iih].hi;
			bb.lo[0] = h[i1+iih].lo;
			lis_quad_sqr((LIS_QUAD *)a2.hi,(LIS_QUAD *)aa.hi);
			lis_quad_sqr((LIS_QUAD *)b2.hi,(LIS_QUAD *)bb.hi);
			lis_quad_add((LIS_QUAD *)rr.hi,(LIS_QUAD *)a2.hi,(LIS_QUAD *)b2.hi);
			lis_quad_sqrt((LIS_QUAD *)rr.hi,(LIS_QUAD *)rr.hi);
			if( rr.hi[0]==0.0 )
			{
				rr.hi[0]=1.0e-17;
				rr.lo[0]=0.0;
			}
			lis_quad_div((LIS_QUAD *)&h[ii+cs],(LIS_QUAD *)aa.hi,(LIS_QUAD *)rr.hi);
			lis_quad_div((LIS_QUAD *)&h[ii+sn],(LIS_QUAD *)bb.hi,(LIS_QUAD *)rr.hi);
			tmp.hi[0] = s->value[ii];
			tmp.lo[0] = s->value_lo[ii];
			lis_quad_mul((LIS_QUAD *)aa.hi,(LIS_QUAD *)&h[ii+sn],(LIS_QUAD *)tmp.hi);
			lis_quad_mul((LIS_QUAD *)bb.hi,(LIS_QUAD *)&h[ii+cs],(LIS_QUAD *)tmp.hi);
			lis_quad_minus((LIS_QUAD *)aa.hi);
			s->value[i1] = aa.hi[0];
			s->value_lo[i1] = aa.lo[0];
			s->value[ii] = bb.hi[0];
			s->value_lo[ii] = bb.lo[0];

			lis_quad_mul((LIS_QUAD *)aa.hi,(LIS_QUAD *)&h[ii+cs],(LIS_QUAD *)&h[ii+iih]);
			lis_quad_mul((LIS_QUAD *)tmp.hi,(LIS_QUAD *)&h[ii+sn],(LIS_QUAD *)&h[i1+iih]);
			lis_quad_add((LIS_QUAD *)aa.hi,(LIS_QUAD *)aa.hi,(LIS_QUAD *)tmp.hi);
			h[ii+iih].hi = aa.hi[0];
			h[ii+iih].lo = aa.lo[0];

			/* convergence check */
			nrm2 = fabs(s->value[i1]);

			if( output )
			{
				if( output & LIS_PRINT_MEM ) solver->rhistory[iter] = nrm2;
				if( output & LIS_PRINT_OUT && A->my_rank==0 ) lis_print_rhistory(iter,nrm2);
			}

			if( tol >= nrm2 ) break;
		} while( i<m && iter <maxiter );

		/* Solve H * Y = S for upper Hessenberg matrix H */
		tmp.hi[0] = s->value[ii];
		tmp.lo[0] = s->value_lo[ii];
		lis_quad_div((LIS_QUAD *)tmp.hi,(LIS_QUAD *)tmp.hi,(LIS_QUAD *)&h[ii+iih]);
		s->value[ii] = tmp.hi[0];
		s->value_lo[ii] = tmp.lo[0];
		for(k=1;k<=ii;k++)
		{
			jj = ii-k;
			t.hi[0]  = s->value[jj];
			t.lo[0]  = s->value_lo[jj];
			for(j=jj+1;j<=ii;j++)
			{
				tmp.hi[0] = s->value[j];
				tmp.lo[0] = s->value_lo[j];
				lis_quad_mul((LIS_QUAD *)tmp.hi,(LIS_QUAD *)tmp.hi,(LIS_QUAD *)&h[jj+j*h_dim]);
				lis_quad_sub((LIS_QUAD *)t.hi,(LIS_QUAD *)t.hi,(LIS_QUAD *)tmp.hi);
			}
			lis_quad_div((LIS_QUAD *)tmp.hi,(LIS_QUAD *)t.hi,(LIS_QUAD *)&h[jj+jj*h_dim]);
			s->value[jj] = tmp.hi[0];
			s->value_lo[jj] = tmp.lo[0];
		}
		/* x = x + y * z */
		for(j=0;j<=ii;j++)
		{
			aa.hi[0] = s->value[j];
			aa.lo[0] = s->value_lo[j];
			lis_vector_axpyex_mmm(aa,z[j],x);
		}

		if( tol >= nrm2 )
		{
			solver->retcode    = LIS_SUCCESS;
			solver->iter       = iter;
			solver->resid      = nrm2;
			solver->ptime      = ptime;
			lis_free(h);
			LIS_DEBUG_FUNC_OUT;
			return LIS_SUCCESS;
		}

		lis_matvec(A,x,v[0]);
		lis_vector_xpay(b,-1.0,v[0]);
		memset(v[0]->value_lo,0,n*sizeof(LIS_SCALAR));
		lis_vector_nrm2(v[0],&rnorm);
		bnrm2 = 1.0/rnorm;
	}

	solver->retcode   = LIS_MAXITER;
	solver->iter      = iter+1;
	solver->resid     = nrm2;
	lis_free(h);
	LIS_DEBUG_FUNC_OUT;
	return LIS_MAXITER;
}
LIS_INT lis_ecg(LIS_ESOLVER esolver)
{
  LIS_MATRIX        A;
  LIS_VECTOR        x;
  LIS_SCALAR        evalue;
  LIS_INT               emaxiter;
  LIS_REAL          tol;
  LIS_INT               iter,iter3,nsolver,i,j,output;
  LIS_INT               nprocs,my_rank;
  LIS_REAL          nrm2,resid,resid3;
  LIS_SCALAR        lshift;
  LIS_VECTOR        b,D,r,w,p,Aw,Ax,Ap,ones,Ds;
  LIS_SCALAR        *SA, *SB, *SW, *v3, *SAv3, *SBv3, *z3, *q3, *SBz3, evalue3, ievalue3;
  LIS_SOLVER        solver;
  LIS_PRECON        precon;
  LIS_MATRIX        A0;
  LIS_VECTOR        x0,z,q;
  double	    times,itimes,ptimes,p_c_times,p_i_times;
  LIS_INT           nsol, precon_type;
  char              solvername[128], preconname[128];

  A = esolver->A;
  x = esolver->x;
  
  emaxiter = esolver->options[LIS_EOPTIONS_MAXITER];
  tol = esolver->params[LIS_EPARAMS_RESID - LIS_EOPTIONS_LEN]; 
  output  = esolver->options[LIS_EOPTIONS_OUTPUT];
  lshift = esolver->lshift;

  if( A->my_rank==0 ) printf("local shift = %e\n", lshift);
  if (lshift != 0) lis_matrix_shift_diagonal(A, lshift);

  SA = (LIS_SCALAR *)lis_malloc(3*3*sizeof(LIS_SCALAR), "lis_ecg::SA");
  SB = (LIS_SCALAR *)lis_malloc(3*3*sizeof(LIS_SCALAR), "lis_ecg::SB");
  SW = (LIS_SCALAR *)lis_malloc(3*3*sizeof(LIS_SCALAR), "lis_ecg::SW");
  v3 = (LIS_SCALAR *)lis_malloc(3*sizeof(LIS_SCALAR), "lis_ecg::v3");
  SAv3 = (LIS_SCALAR *)lis_malloc(3*sizeof(LIS_SCALAR), "lis_ecg::SAv3");
  SBv3 = (LIS_SCALAR *)lis_malloc(3*sizeof(LIS_SCALAR), "lis_ecg::SBv3");
  SBz3 = (LIS_SCALAR *)lis_malloc(3*sizeof(LIS_SCALAR), "lis_ecg::SBz3");
  z3 = (LIS_SCALAR *)lis_malloc(3*sizeof(LIS_SCALAR), "lis_ecg::z3");
  q3 = (LIS_SCALAR *)lis_malloc(3*sizeof(LIS_SCALAR), "lis_ecg::q3");

  b = esolver->work[0];
  D = esolver->work[1];
  Ds = esolver->work[2];
  r = esolver->work[3];
  w = esolver->work[4];
  p = esolver->work[5];
  Aw = esolver->work[6];
  Ax = esolver->work[7];
  Ap = esolver->work[8];

  lis_vector_set_all(1.0,b);
  lis_vector_nrm2(b, &nrm2);
  lis_vector_scale(1/nrm2, b);
  lis_solver_create(&solver);
  lis_solver_set_option("-i bicg -p ilu",solver);
  lis_solver_set_optionC(solver);
  lis_solver_get_solver(solver, &nsol);
  lis_solver_get_precon(solver, &precon_type);
  lis_get_solvername(nsol, solvername);
  lis_get_preconname(precon_type, preconname);
  printf("solver     : %s %d\n", solvername, nsol);
  printf("precon     : %s %d\n", preconname, precon_type);
  lis_solve(A, b, x, solver);
  lis_vector_copy(b,Ax);

  lis_vector_nrm2(x, &nrm2);
  lis_vector_set_all(0.0,p);
  lis_vector_set_all(0.0,Ap);

  lis_precon_create(solver, &precon);
  solver->precon = precon;

  iter=0;

  while (iter<emaxiter)
    {
      iter = iter + 1;

      lis_vector_dot(x,Ax,&evalue);
      lis_vector_axpyz(-(evalue),x,Ax,r); 
      lis_vector_nrm2(r, &nrm2);
      resid = fabs(nrm2/(evalue));

      if( output )
	{
	  if( output & LIS_EPRINT_MEM ) esolver->residual[iter] = resid;
	  if( output & LIS_EPRINT_OUT && A->my_rank==0 ) printf("iter: %5d  residual = %e\n", iter, resid);
	}

      if (resid<tol) break;  

      lis_psolve(solver, x, w);
      lis_vector_copy(x,Aw);
      lis_vector_nrm2(w, &nrm2);

      lis_vector_dot(w,Aw,&SA[0]);
      lis_vector_dot(x,Aw,&SA[3]);
      lis_vector_dot(p,Aw,&SA[6]);
      SA[1] = SA[3];
      lis_vector_dot(x,Ax,&SA[4]);
      lis_vector_dot(p,Ax,&SA[7]);
      SA[2] = SA[6];
      SA[5] = SA[7];
      lis_vector_dot(p,Ap,&SA[8]);

      lis_vector_dot(w,w,&SB[0]);
      lis_vector_dot(x,w,&SB[3]);
      lis_vector_dot(p,w,&SB[6]);
      SB[1] = SB[3];
      lis_vector_dot(x,x,&SB[4]);
      lis_vector_dot(p,x,&SB[7]);
      SB[2] = SB[6];
      SB[5] = SB[7];
      lis_vector_dot(p,p,&SB[8]);
      
      lis_array_set_all(3, 1.0, v3);

      iter3=0;
      while (iter3<emaxiter)
	{
	  iter3 = iter3 + 1;
	  lis_array_nrm2(3, v3, &nrm2); 
	  lis_array_scale(3, 1/nrm2, v3);
	  lis_array_matvec(3, SB, v3, SBv3, LIS_INS_VALUE);
	  lis_array_invvec(3, SA, SBv3, z3);
	  lis_array_dot2(3, SBv3, z3, &ievalue3);
	  if (ievalue3==0) 
	    {
	      printf("ievalue3 is zero\n");
	      lis_precon_destroy(precon);
	      lis_solver_destroy(solver);
	      esolver->iter       = iter;
	      esolver->resid      = resid;
	      esolver->evalue[0] = evalue;

	      if (lshift != 0) lis_matrix_shift_diagonal(A, -lshift);
	      lis_free(SA);
	      lis_free(SB);
	      lis_free(SW);
	      lis_free(v3);
	      lis_free(SAv3);
	      lis_free(SBv3);
	      lis_free(SBz3);
	      lis_free(z3);
	      lis_free(q3);
	      return LIS_BREAKDOWN;
	    }
	  lis_array_axpyz(3, -ievalue3, SBv3, z3, q3);
	  lis_array_nrm2(3, q3, &resid3); 
	  resid3 = fabs(resid3 / ievalue3);
	  if (resid3<1e-12) break;   
	  lis_array_copy(3,z3,v3);
	}

      evalue3 = 1 / ievalue3;
      
      lis_vector_scale(v3[0],w);  
      lis_vector_axpy(v3[2],p,w);
      lis_vector_xpay(w,v3[1],x);
      lis_vector_copy(w,p);
      
      lis_vector_scale(v3[0],Aw);  
      lis_vector_axpy(v3[2],Ap,Aw);
      lis_vector_xpay(Aw,v3[1],Ax);
      lis_vector_copy(Aw,Ap);
      
      lis_vector_nrm2(x,&nrm2);
      lis_vector_scale(1/nrm2,x);
      lis_vector_scale(1/nrm2,Ax);
      
      lis_vector_nrm2(p,&nrm2);
      lis_vector_scale(1/nrm2,p);
      lis_vector_scale(1/nrm2,Ap);
      
      lis_solver_get_timeex(solver,&times,&itimes,&ptimes,&p_c_times,&p_i_times);
      esolver->ptimes += solver->ptimes;
      esolver->itimes += solver->itimes;
      esolver->p_c_times += solver->p_c_times;
      esolver->p_i_times += solver->p_i_times;

    }

  lis_precon_destroy(precon);
  lis_solver_destroy(solver);

  esolver->iter       = iter;
  esolver->resid      = resid;
  esolver->evalue[0] = evalue;

  if (lshift != 0) lis_matrix_shift_diagonal(A, -lshift);

  lis_free(SA);
  lis_free(SB);
  lis_free(SW);
  lis_free(v3);
  lis_free(SAv3);
  lis_free(SBv3);
  lis_free(SBz3);
  lis_free(z3);
  lis_free(q3);

  if (resid<tol) 
    {
      esolver->retcode = LIS_SUCCESS;
      return LIS_SUCCESS;
    }
  else
    {
      esolver->retcode = LIS_MAXITER;
      return LIS_MAXITER;
    }
}
Exemple #22
0
LIS_INT lis_gmres_switch(LIS_SOLVER solver)
{
	LIS_MATRIX A;
	LIS_VECTOR b,x;
	LIS_VECTOR r,s,z,*v;
	LIS_QUAD *h;
	LIS_SCALAR *hd;
	LIS_QUAD_PTR aa,bb,rr,a2,b2,t,one,tmp;
	LIS_QUAD_PTR rnorm;
	LIS_REAL bnrm2,nrm2,tol,tol2;
	LIS_INT iter,maxiter,n,output;
	LIS_INT iter2,maxiter2;
	double time,ptime;

	LIS_INT i,j,k,m;
	LIS_INT ii,i1,iiv,i1v,iih,jj;
	LIS_INT h_dim;
	LIS_INT cs,sn;

	LIS_DEBUG_FUNC_IN;

	A       = solver->A;
	b       = solver->b;
	x       = solver->x;
	n       = A->n;
	maxiter  = solver->options[LIS_OPTIONS_MAXITER];
	maxiter2 = solver->options[LIS_OPTIONS_SWITCH_MAXITER];
	output   = solver->options[LIS_OPTIONS_OUTPUT];
	tol      = solver->params[LIS_PARAMS_RESID-LIS_OPTIONS_LEN];
	tol2     = solver->params[LIS_PARAMS_SWITCH_RESID-LIS_OPTIONS_LEN];
	m        = solver->options[LIS_OPTIONS_RESTART];
	h_dim    = m+1;
	ptime    = 0.0;

	s       = solver->work[0];
	r       = solver->work[1];
	z       = solver->work[2];
	v       = &solver->work[3];

	LIS_QUAD_SCALAR_MALLOC(aa,0,1);
	LIS_QUAD_SCALAR_MALLOC(bb,1,1);
	LIS_QUAD_SCALAR_MALLOC(rr,2,1);
	LIS_QUAD_SCALAR_MALLOC(a2,3,1);
	LIS_QUAD_SCALAR_MALLOC(b2,4,1);
	LIS_QUAD_SCALAR_MALLOC(t,5,1);
	LIS_QUAD_SCALAR_MALLOC(tmp,6,1);
	LIS_QUAD_SCALAR_MALLOC(one,7,1);
	LIS_QUAD_SCALAR_MALLOC(rnorm,8,1);

	h       = (LIS_QUAD *)lis_malloc( sizeof(LIS_QUAD)*(h_dim+1)*(h_dim+2),"lis_gmres_switch::h" );
	hd      = (LIS_SCALAR *)h;
	cs      = (m+1)*h_dim;
	sn      = (m+2)*h_dim;
	one.hi[0]   = 1.0;
	one.lo[0]   = 0.0;

	z->precision = LIS_PRECISION_DEFAULT;

	/* r = M^-1 * (b - A * x) */
	lis_matvec(A,x,z);
	lis_vector_xpay(b,-1.0,z);
	lis_psolve(solver,z,v[0]);

	/* Initial Residual */
	if( lis_solver_get_initial_residual(solver,NULL,NULL,v[0],&bnrm2) )
	{
		lis_free(h);
		LIS_DEBUG_FUNC_OUT;
		return LIS_SUCCESS;
	}
	tol2     = solver->tol_switch;


	iter=0;
	while( iter<maxiter2 )
	{
		/* first column of V */
		/* v = r / ||r||_2 */
		lis_vector_nrm2(v[0],&rnorm.hi[0]);
		lis_vector_scale(1.0/rnorm.hi[0],v[0]);

		/* s = ||r||_2 e_1 */
		lis_vector_set_all(0,s);
		s->value[0] = rnorm.hi[0];

		i = 0;
		do
		{
			iter++;
			i++;
			ii  = i-1;
			i1  = i;
			iiv = i-1;
			i1v = i;
			iih = (i-1)*h_dim;


			/* z = M^-1 * v */
			time = lis_wtime();
			lis_psolve(solver,v[iiv],z);
			ptime += lis_wtime()-time;

			/* w = A * z */
			lis_matvec(A,z, v[i1v]);

			for(k=0;k<i;k++)
			{
				/* h[k,i]   = <w,v[k]>          */
				/* w        = w - h[k,i] * v[k] */
				lis_vector_dot(v[i1v],v[k],&t.hi[0]);
				hd[k+iih] = t.hi[0];
				lis_vector_axpy(-t.hi[0],v[k],v[i1v]);
			}
			/* h[i+1,i] = ||w||          */
			/* v[i+1]   = w / h[i+1,i]   */
			lis_vector_nrm2(v[i1v],&t.hi[0]);
			hd[i1+iih] = t.hi[0];
			lis_vector_scale(1.0/t.hi[0],v[i1v]);

			for(k=1;k<=ii;k++)
			{
				jj        = k-1;
				t.hi[0]   =  hd[jj+iih];
				aa.hi[0]  =  hd[jj+cs]*t.hi[0];
				aa.hi[0] +=  hd[jj+sn]*hd[k+iih];
				bb.hi[0]  = -hd[jj+sn]*t.hi[0];
				bb.hi[0] +=  hd[jj+cs]*hd[k+iih];
				hd[jj+iih] = aa.hi[0];
				hd[k+iih] = bb.hi[0];
			}
			aa.hi[0] = hd[ii+iih];
			bb.hi[0] = hd[i1+iih];
			a2.hi[0] = aa.hi[0]*aa.hi[0];
			b2.hi[0] = bb.hi[0]*bb.hi[0];
			rr.hi[0] = sqrt(a2.hi[0]+b2.hi[0]);
			if( rr.hi[0]==0.0 ) rr.hi[0]=1.0e-17;
			hd[ii+cs] = aa.hi[0]/rr.hi[0];
			hd[ii+sn] = bb.hi[0]/rr.hi[0];
			s->value[i1] = -hd[ii+sn]*s->value[ii];
			s->value[ii] =  hd[ii+cs]*s->value[ii];

			aa.hi[0]  =  hd[ii+cs]*hd[ii+iih];
			aa.hi[0] +=  hd[ii+sn]*hd[i1+iih];
			hd[ii+iih] = aa.hi[0];

			/* convergence check */
			nrm2 = fabs(s->value[i1])*bnrm2;

			if( output )
			{
				if( output & LIS_PRINT_MEM ) solver->rhistory[iter] = nrm2;
				if( output & LIS_PRINT_OUT && A->my_rank==0 ) lis_print_rhistory(iter,nrm2);
			}

			if( tol2 >= nrm2 ) break;
		} while( i<m && iter <maxiter2 );

		/* Solve H * Y = S for upper Hessenberg matrix H */
		s->value[ii] = s->value[ii]/hd[ii+iih];
		for(k=1;k<=ii;k++)
		{
			jj = ii-k;
			t.hi[0]  = s->value[jj];
			for(j=jj+1;j<=ii;j++)
			{
				t.hi[0] -= hd[jj+j*h_dim]*s->value[j];
			}
			s->value[jj] = t.hi[0]/hd[jj+jj*h_dim];
		}
		/* z = z + y * v */
		for(k=0;k<n;k++)
		{
			z->value[k] = s->value[0]*v[0]->value[k];
		}
		for(j=1;j<=ii;j++)
		{
			lis_vector_axpy(s->value[j],v[j],z);
		}
		/* r = M^-1 * z */
		time = lis_wtime();
		lis_psolve(solver,z,r);
		ptime += lis_wtime()-time;

		/* x = x + r */
		lis_vector_axpy(1,r,x);

		if( tol2 >= nrm2 )
		{
			solver->iter       = iter;
			solver->iter2      = iter;
			solver->ptime      = ptime;
			break;
		}

		for(j=1;j<=i;j++)
		{
			jj = i1-j+1;
			s->value[jj-1] = -hd[jj-1+sn]*s->value[jj];
			s->value[jj]   =  hd[jj-1+cs]*s->value[jj];
		}

		for(j=0;j<=i1;j++)
		{
			t.hi[0] = s->value[j];
			if( j==0 ) t.hi[0] = t.hi[0]-1.0;
			lis_vector_axpy(t.hi[0],v[j],v[0]);
		}
	}

	/* Initial Residual */
	z->precision = LIS_PRECISION_QUAD;

	solver->options[LIS_OPTIONS_INITGUESS_ZEROS] = LIS_FALSE;
	lis_vector_copyex_mn(x,solver->xx);

	lis_solver_get_initial_residual(solver,NULL,NULL,v[0],&bnrm2);
	tol     = solver->tol;


	iter2=iter;
	while( iter2<maxiter )
	{
		/* first column of V */
		/* v = r / ||r||_2 */
		lis_vector_nrm2ex_mm(v[0],&rnorm);
		lis_quad_div((LIS_QUAD *)tmp.hi,(LIS_QUAD *)one.hi,(LIS_QUAD *)rnorm.hi);
		lis_vector_scaleex_mm(tmp,v[0]);

		/* s = ||r||_2 e_1 */
		lis_vector_set_allex_nm(0.0,s);
		s->value[0]    = rnorm.hi[0];
		s->value_lo[0] = rnorm.lo[0];

		i = 0;
		do
		{
			iter2++;
			i++;
			ii  = i-1;
			i1  = i;
			iiv = i-1;
			i1v = i;
			iih = (i-1)*h_dim;


			/* z = M^-1 * v */
			time = lis_wtime();
			lis_psolve(solver,v[iiv],z);
			ptime += lis_wtime()-time;

			/* w = A * z */
			lis_matvec(A,z, v[i1v]);

			for(k=0;k<i;k++)
			{
				/* h[k,i]   = <w,v[k]>          */
				/* w        = w - h[k,i] * v[k] */
				lis_vector_dotex_mmm(v[i1v],v[k],&t);
				h[k+iih].hi = t.hi[0];
				h[k+iih].lo = t.lo[0];
				lis_quad_minus((LIS_QUAD *)t.hi);
				lis_vector_axpyex_mmm(t,v[k],v[i1v]);
			}
			/* h[i+1,i] = ||w||          */
			/* v[i+1]   = w / h[i+1,i]   */
			lis_vector_nrm2ex_mm(v[i1v],&t);
			h[i1+iih].hi = t.hi[0];
			h[i1+iih].lo = t.lo[0];
			lis_quad_div((LIS_QUAD *)tmp.hi,(LIS_QUAD *)one.hi,(LIS_QUAD *)t.hi);
			lis_vector_scaleex_mm(tmp,v[i1v]);

			for(k=1;k<=ii;k++)
			{
				jj  = k-1;
				t.hi[0]   =  h[jj+iih].hi;
				t.lo[0]   =  h[jj+iih].lo;
				lis_quad_mul((LIS_QUAD *)aa.hi,(LIS_QUAD *)&h[jj+cs],(LIS_QUAD *)t.hi);
				lis_quad_mul((LIS_QUAD *)tmp.hi,(LIS_QUAD *)&h[jj+sn],(LIS_QUAD *)&h[k+iih]);
				lis_quad_add((LIS_QUAD *)aa.hi,(LIS_QUAD *)aa.hi,(LIS_QUAD *)tmp.hi);
				lis_quad_mul((LIS_QUAD *)bb.hi,(LIS_QUAD *)&h[jj+sn],(LIS_QUAD *)t.hi);
				lis_quad_minus((LIS_QUAD *)bb.hi);
				lis_quad_mul((LIS_QUAD *)tmp.hi,(LIS_QUAD *)&h[jj+cs],(LIS_QUAD *)&h[k+iih]);
				lis_quad_add((LIS_QUAD *)bb.hi,(LIS_QUAD *)bb.hi,(LIS_QUAD *)tmp.hi);
				h[jj+iih].hi = aa.hi[0];
				h[jj+iih].lo = aa.lo[0];
				h[k+iih].hi = bb.hi[0];
				h[k+iih].lo = bb.lo[0];
			}
			aa.hi[0] = h[ii+iih].hi;
			aa.lo[0] = h[ii+iih].lo;
			bb.hi[0] = h[i1+iih].hi;
			bb.lo[0] = h[i1+iih].lo;
			lis_quad_sqr((LIS_QUAD *)a2.hi,(LIS_QUAD *)aa.hi);
			lis_quad_sqr((LIS_QUAD *)b2.hi,(LIS_QUAD *)bb.hi);
			lis_quad_add((LIS_QUAD *)rr.hi,(LIS_QUAD *)a2.hi,(LIS_QUAD *)b2.hi);
			lis_quad_sqrt((LIS_QUAD *)rr.hi,(LIS_QUAD *)rr.hi);
			lis_quad_div((LIS_QUAD *)&h[ii+cs],(LIS_QUAD *)aa.hi,(LIS_QUAD *)rr.hi);
			lis_quad_div((LIS_QUAD *)&h[ii+sn],(LIS_QUAD *)bb.hi,(LIS_QUAD *)rr.hi);
			tmp.hi[0] = s->value[ii];
			tmp.lo[0] = s->value_lo[ii];
			lis_quad_mul((LIS_QUAD *)aa.hi,(LIS_QUAD *)&h[ii+sn],(LIS_QUAD *)tmp.hi);
			lis_quad_mul((LIS_QUAD *)bb.hi,(LIS_QUAD *)&h[ii+cs],(LIS_QUAD *)tmp.hi);
			lis_quad_minus((LIS_QUAD *)aa.hi);
			s->value[i1] = aa.hi[0];
			s->value_lo[i1] = aa.lo[0];
			s->value[ii] = bb.hi[0];
			s->value_lo[ii] = bb.lo[0];

			lis_quad_mul((LIS_QUAD *)aa.hi,(LIS_QUAD *)&h[ii+cs],(LIS_QUAD *)&h[ii+iih]);
			lis_quad_mul((LIS_QUAD *)tmp.hi,(LIS_QUAD *)&h[ii+sn],(LIS_QUAD *)&h[i1+iih]);
			lis_quad_add((LIS_QUAD *)aa.hi,(LIS_QUAD *)aa.hi,(LIS_QUAD *)tmp.hi);
			h[ii+iih].hi = aa.hi[0];
			h[ii+iih].lo = aa.lo[0];

			/* convergence check */
			nrm2 = fabs(s->value[i1])*bnrm2;

			if( output )
			{
				if( output & LIS_PRINT_MEM ) solver->rhistory[iter2] = nrm2;
				if( output & LIS_PRINT_OUT && A->my_rank==0 ) lis_print_rhistory(iter,nrm2);
			}

			if( tol >= nrm2 ) break;
		} while( i<m && iter2 <maxiter );

		/* Solve H * Y = S for upper Hessenberg matrix H */
		tmp.hi[0] = s->value[ii];
		tmp.lo[0] = s->value_lo[ii];
		lis_quad_div((LIS_QUAD *)tmp.hi,(LIS_QUAD *)tmp.hi,(LIS_QUAD *)&h[ii+iih]);
		s->value[ii] = tmp.hi[0];
		s->value_lo[ii] = tmp.lo[0];
		for(k=1;k<=ii;k++)
		{
			jj = ii-k;
			t.hi[0]  = s->value[jj];
			t.lo[0]  = s->value_lo[jj];
			for(j=jj+1;j<=ii;j++)
			{
				tmp.hi[0] = s->value[j];
				tmp.lo[0] = s->value_lo[j];
				lis_quad_mul((LIS_QUAD *)tmp.hi,(LIS_QUAD *)tmp.hi,(LIS_QUAD *)&h[jj+j*h_dim]);
				lis_quad_sub((LIS_QUAD *)t.hi,(LIS_QUAD *)t.hi,(LIS_QUAD *)tmp.hi);
			}
			lis_quad_div((LIS_QUAD *)tmp.hi,(LIS_QUAD *)t.hi,(LIS_QUAD *)&h[jj+jj*h_dim]);
			s->value[jj] = tmp.hi[0];
			s->value_lo[jj] = tmp.lo[0];
		}
		/* z = z + y * v */
		for(k=0;k<n;k++)
		{
			aa.hi[0] = s->value[0];
			aa.lo[0] = s->value_lo[0];
			bb.hi[0] = v[0]->value[k];
			bb.lo[0] = v[0]->value_lo[k];
			lis_quad_mul((LIS_QUAD *)tmp.hi,(LIS_QUAD *)aa.hi,(LIS_QUAD *)bb.hi);
			z->value[k] = tmp.hi[0];
			z->value_lo[k] = tmp.lo[0];
		}
		for(j=1;j<=ii;j++)
		{
			aa.hi[0] = s->value[j];
			aa.lo[0] = s->value_lo[j];
			lis_vector_axpyex_mmm(aa,v[j],z);
		}
		/* r = M^-1 * z */
		time = lis_wtime();
		lis_psolve(solver,z,r);
		ptime += lis_wtime()-time;

		/* x = x + r */
		lis_vector_axpyex_mmm(one,r,x);

		if( tol >= nrm2 )
		{
			solver->retcode    = LIS_SUCCESS;
			solver->iter       = iter2;
			solver->iter2      = iter;
			solver->resid      = nrm2;
			solver->ptime      = ptime;
			lis_free(h);
			LIS_DEBUG_FUNC_OUT;
			return LIS_SUCCESS;
		}

		for(j=1;j<=i;j++)
		{
			jj = i1-j+1;
			tmp.hi[0] = s->value[jj];
			tmp.lo[0] = s->value_lo[jj];
			lis_quad_mul((LIS_QUAD *)aa.hi,(LIS_QUAD *)tmp.hi,(LIS_QUAD *)&h[jj-1+sn]);
			lis_quad_mul((LIS_QUAD *)bb.hi,(LIS_QUAD *)tmp.hi,(LIS_QUAD *)&h[jj-1+cs]);
			lis_quad_minus((LIS_QUAD *)aa.hi);
			s->value[jj-1] = aa.hi[0];
			s->value_lo[jj-1] = aa.lo[0];
			s->value[jj] = bb.hi[0];
			s->value_lo[jj] = bb.lo[0];
		}

		for(j=0;j<=i1;j++)
		{
			t.hi[0] = s->value[j];
			t.lo[0] = s->value_lo[j];
			if( j==0 )
			{
				lis_quad_sub((LIS_QUAD *)t.hi,(LIS_QUAD *)t.hi,(LIS_QUAD *)one.hi);
			}
			lis_vector_axpyex_mmm(t,v[j],v[0]);
		}
	}

	solver->retcode   = LIS_MAXITER;
	solver->iter       = iter2+1;
	solver->iter2      = iter;
	solver->resid     = nrm2;
	lis_free(h);
	LIS_DEBUG_FUNC_OUT;
	return LIS_MAXITER;
}
LIS_INT lis_ecr(LIS_ESOLVER esolver)
{
  LIS_MATRIX        A;
  LIS_VECTOR        x;
  LIS_SCALAR        evalue;
  LIS_INT               emaxiter;
  LIS_REAL          tol;
  LIS_INT               iter,i,j,output;
  LIS_INT               nprocs,my_rank;
  LIS_REAL          nrm2,resid;
  LIS_SCALAR        lshift;
  LIS_VECTOR        r,p,Ax,Ar,Ap;
  LIS_SCALAR        alpha, beta;
  LIS_SCALAR        rAp, rp, ApAp, pAp, pp, ArAp, pAr;
  double	    times,itimes,ptimes,p_c_times,p_i_times;

  A = esolver->A;
  x = esolver->x;

  emaxiter = esolver->options[LIS_EOPTIONS_MAXITER];
  tol = esolver->params[LIS_EPARAMS_RESID - LIS_EOPTIONS_LEN]; 
  output  = esolver->options[LIS_EOPTIONS_OUTPUT];
  lshift = esolver->lshift;

  if( A->my_rank==0 ) printf("local shift = %e\n", lshift);
  if (lshift != 0) lis_matrix_shift_diagonal(A, lshift);

  r = esolver->work[0];
  p = esolver->work[1];
  Ax = esolver->work[2];
  Ar = esolver->work[3];
  Ap = esolver->work[4];

  lis_vector_set_all(1.0,x);
  lis_vector_nrm2(x,&nrm2);
  lis_vector_scale(1/nrm2,x);

  lis_matvec(A,x,Ax);

  lis_vector_dot(x,Ax,&evalue);
  lis_vector_axpyz(-evalue,x,Ax,r); 
  lis_vector_scale(-1.0,r);

  lis_vector_copy(r,p);

  lis_matvec(A,p,Ap);

  iter=0;

  while (iter<emaxiter)
    {
      iter = iter + 1;
      
      lis_vector_dot(r,Ap,&rAp);
      lis_vector_dot(r,p,&rp);
      lis_vector_dot(Ap,Ap,&ApAp);
      lis_vector_dot(p,Ap,&pAp); 
      lis_vector_dot(p,p,&pp);

      alpha = (rAp - evalue * rp) / (ApAp - evalue * (2.0 * pAp - evalue * pp));

      lis_vector_axpy(alpha,p,x);
      lis_matvec(A,x,Ax);

      lis_vector_dot(x,Ax,&evalue);
      lis_vector_nrm2(x, &nrm2);
      evalue = evalue / (nrm2 * nrm2);
      lis_vector_axpyz(-evalue,x,Ax,r); 
      lis_vector_scale(-1.0,r);
      lis_matvec(A,r,Ar);

      lis_vector_dot(Ar,Ap,&ArAp);
      lis_vector_dot(p,Ar,&pAr);      
      lis_vector_dot(r,Ap,&rAp);      
      lis_vector_dot(r,p,&rp);

      beta = - (ArAp - evalue * ((pAr + rAp) - evalue * rp))/ (ApAp - evalue * (2.0 * pAp - evalue * pp));

      lis_vector_xpay(r,beta,p);

      lis_vector_nrm2(r,&nrm2);
      resid = fabs(nrm2 / (evalue));

      if( output )
	{
	  if( output & LIS_EPRINT_MEM ) esolver->residual[iter] = resid;
	  if( output & LIS_EPRINT_OUT && A->my_rank==0 ) printf("iter: %5d  residual = %e\n", iter, resid);
	}

      if (resid<tol) break;  
      
    }

  esolver->iter       = iter;
  esolver->resid      = resid;
  esolver->evalue[0] = evalue;

  if (lshift != 0) lis_matrix_shift_diagonal(A, -lshift);

  if (resid<tol) 
    {
      esolver->retcode = LIS_SUCCESS;
      return LIS_SUCCESS;
    }
  else
    {
      esolver->retcode = LIS_MAXITER;
      return LIS_MAXITER;
    }
}
Exemple #24
0
LIS_INT lis_minres(LIS_SOLVER solver)
{
  LIS_Comm comm;  
  LIS_MATRIX A;
  LIS_VECTOR b,x;
  LIS_VECTOR v1,v2,v3,v4,w0,w1,w2;
  LIS_REAL nrm2,tol;
  LIS_SCALAR alpha;
  LIS_REAL beta2,beta3;
  LIS_SCALAR gamma1,gamma2,gamma3;
  LIS_SCALAR delta,eta;
  LIS_SCALAR sigma1,sigma2,sigma3;
  LIS_SCALAR rho1,rho2,rho3;
  LIS_REAL r0_euc,r_euc; 
  LIS_INT iter,maxiter,output;
  double time,ptime;

  LIS_DEBUG_FUNC_IN;

  comm = LIS_COMM_WORLD;
  
  A       = solver->A;
  b       = solver->b;
  x       = solver->x;
  tol     = solver->params[LIS_PARAMS_RESID-LIS_OPTIONS_LEN];
  maxiter = solver->options[LIS_OPTIONS_MAXITER];
  output  = solver->options[LIS_OPTIONS_OUTPUT];
  ptime   = 0.0;

  v1       = solver->work[0];
  v2       = solver->work[1];
  v3       = solver->work[2];
  v4       = solver->work[3];
  w0       = solver->work[4];
  w1       = solver->work[5];
  w2       = solver->work[6];

  /* Lanczos algorithm */
  lis_matvec(A,x,v2); 
  lis_vector_xpay(b,-1.0,v2);

  time = lis_wtime();
  lis_psolve(solver,v2,v3);
  ptime += lis_wtime()-time;
  lis_vector_copy(v3,v2);

  /* Compute elements of Hermitian tridiagonal matrix */
  lis_vector_nrm2(v2,&r_euc); 
  eta = beta2 = r0_euc = r_euc; 
  gamma2 = gamma1 = 1.0; 
  sigma2 = sigma1 = 0.0;

  lis_vector_set_all(0.0,v1); 
  lis_vector_set_all(0.0,w0); 
  lis_vector_set_all(0.0,w1);

  nrm2 = r_euc / r0_euc; 

  for(iter=1;iter<=maxiter;iter++)
    {

      /* Lanczos algorithm */
      lis_vector_scale(1.0 / beta2,v2); 

      lis_matvec(A,v2,v3); 
      time = lis_wtime();

      lis_psolve(solver,v3,v4);
      ptime += lis_wtime()-time;

      lis_vector_dot(v2,v4,&alpha);
      lis_vector_axpy(-alpha,v2,v4);
      lis_vector_axpy(-beta2,v1,v4);
      lis_vector_nrm2(v4,&beta3);

      /* Compute elements of Hermitian tridiagonal matrix */
      delta = gamma2 * alpha - gamma1 * sigma2 * beta2;
      rho1 = sqrt(delta * delta + beta3 * beta3); 
      rho2 = sigma2 * alpha + gamma1 * gamma2 * beta2; 
      rho3 = sigma1 * beta2;
      gamma3 = delta / rho1; 
      sigma3 = beta3 / rho1;

      lis_vector_axpyz(-rho3,w0,v2,w2); 
      lis_vector_axpy(-rho2,w1,w2); 
      lis_vector_scale(1.0 / rho1,w2);

      lis_vector_axpy(gamma3 * eta,w2,x);

      /* convergence check */
      r_euc *= fabs(sigma3);
      nrm2 = r_euc / r0_euc;
      
      if( output )
	{
	  if( output & LIS_PRINT_MEM ) solver->rhistory[iter] = nrm2;
	  if( output & LIS_PRINT_OUT ) lis_print_rhistory(comm,iter,nrm2);
	}
      
      if( nrm2 <= tol )
	{ 
	  solver->retcode    = LIS_SUCCESS;
	  solver->iter       = iter;
	  solver->resid      = nrm2;
	  solver->ptime      = ptime;
	  LIS_DEBUG_FUNC_OUT;
	  return LIS_SUCCESS;
	}

      eta *= -sigma3;

      lis_vector_copy(v2,v1); 
      lis_vector_copy(v4,v2);
      lis_vector_copy(w1,w0); 
      lis_vector_copy(w2,w1);

      beta2 = beta3;
      gamma1 = gamma2; 
      gamma2 = gamma3; 
      sigma1 = sigma2; 
      sigma2 = sigma3;

    }

  lis_vector_destroy(v1);
  lis_vector_destroy(v2); 
  lis_vector_destroy(v3);
  lis_vector_destroy(v4);
  lis_vector_destroy(w0); 
  lis_vector_destroy(w1); 
  lis_vector_destroy(w2);

  solver->retcode   = LIS_MAXITER;
  solver->iter      = iter;
  solver->resid     = nrm2;
  LIS_DEBUG_FUNC_OUT;
  return LIS_MAXITER;
}
Exemple #25
0
LIS_INT lis_eai_quad(LIS_ESOLVER esolver)
{
  LIS_MATRIX A;
  LIS_INT ss,ic;
  LIS_INT emaxiter,iter0,hqriter;
  LIS_REAL tol,hqrerr,D;
  LIS_INT i,j;
  LIS_INT output, niesolver;
  LIS_REAL nrm2,resid0; 
  LIS_VECTOR *v,w;
  LIS_SCALAR *h,*hq,*hr,evalue,evalue0;
  LIS_SOLVER solver;
  LIS_ESOLVER esolver2;
  char esolvername[128],solvername[128],preconname[128];
  LIS_INT nsol,precon_type;

  ss = esolver->options[LIS_EOPTIONS_SUBSPACE];
  emaxiter = esolver->options[LIS_EOPTIONS_MAXITER];
  tol = esolver->params[LIS_EPARAMS_RESID - LIS_EOPTIONS_LEN]; 
  output  = esolver->options[LIS_EOPTIONS_OUTPUT];
  niesolver = esolver->options[LIS_EOPTIONS_INNER_ESOLVER];

  h = (LIS_SCALAR *)lis_malloc(ss*ss*sizeof(LIS_SCALAR), "lis_eai_quad::h");
  hq = (LIS_SCALAR *)lis_malloc(ss*ss*sizeof(LIS_SCALAR), "lis_eai_quad::hq");
  hr = (LIS_SCALAR *)lis_malloc(ss*ss*sizeof(LIS_SCALAR), "lis_eai_quad::hr");
  
  A = esolver->A;
  w = esolver->work[0];
  v = &esolver->work[1];
  lis_vector_set_all(0.0,v[0]);
  lis_vector_set_all(1.0,w);
  lis_vector_nrm2(w, &nrm2);

  lis_solver_create(&solver);
  lis_solver_set_option("-i bicg -p none",solver);  
  lis_solver_set_optionC(solver);
  lis_solver_get_solver(solver, &nsol);
  lis_solver_get_precon(solver, &precon_type);
  lis_solver_get_solvername(nsol, solvername);
  lis_solver_get_preconname(precon_type, preconname);
  lis_esolver_get_esolvername(niesolver, esolvername);
  if( A->my_rank==0 ) printf("inner eigensolver     : %s\n", esolvername);
  if( A->my_rank==0 ) printf("linear solver         : %s\n", solvername);
  if( A->my_rank==0 ) printf("preconditioner        : %s\n", preconname);

  for (i=0;i<ss*ss;i++) h[i] = 0.0;

  j=-1;
  while (j<ss-1)
    {
      j = j+1;
      lis_vector_copy(w, v[j]);

      /* w = A * v(j) */
      lis_matvec(A, v[j], w);

      /* reorthogonalization */
      for (i=0;i<=j;i++)
	{
	  /* h(i,j) = <v(i), w> */
	  lis_vector_dot(v[i], w, &h[i+j*ss]);
	  /* w = w - h(i,j) * v(i) */
	  lis_vector_axpy(-h[i+j*ss], v[i], w); 
	}

      /* h(j+1,j) = ||w||_2 */
      lis_vector_nrm2(w, &h[j+1+j*ss]);

      /* convergence check */
      if (fabs(h[j+1+j*ss])<tol) break;

      /* v(j+1) = w / h(i+1,j) */
      lis_vector_scale(1/h[j+1+j*ss],w);
      lis_vector_copy(w,v[j+1]);
      
    }

  /* compute eigenvalues of a real upper
     Hessenberg matrix H(j) = SH'(j)S^* */
  lis_array_qr(ss,h,hq,hr,&hqriter,&hqrerr);


  if( A->my_rank==0 ) 
    {
#ifdef _LONG__LONG
      printf("size of subspace      : %lld\n\n", ss);
#else
      printf("size of subspace      : %d\n\n", ss);
#endif
      if( output ) printf("approximate eigenvalues in subspace:\n\n");


      i=0;
      while (i<ss-1) 
	{
	  i = i + 1;
	  if (fabs(h[i+(i-1)*ss])<tol)
	    {
#ifdef _LONG__LONG
	      printf("Arnoldi: mode number              = %lld\n",i-1);
#else
	      printf("Arnoldi: mode number              = %d\n",i-1);
#endif	  
#ifdef _LONG__DOUBLE
	      printf("Arnoldi: eigenvalue               = %Le\n",h[i-1+(i-1)*ss]);
#else
	      printf("Arnoldi: eigenvalue               = %e\n",h[i-1+(i-1)*ss]);
#endif
	      esolver->evalue[i-1] = h[i-1+(i-1)*ss];
	    }
	  else
	    {
	      D = (h[i-1+(i-1)*ss]-h[i+i*ss]) * (h[i-1+(i-1)*ss]-h[i+i*ss])
		+ 4 * h[i-1+i*ss] * h[i+(i-1)*ss];
	      if (D<0)
		{
#ifdef _LONG__LONG
		  printf("Arnoldi: mode number              = %lld\n",i-1);
#else
		  printf("Arnoldi: mode number              = %d\n",i-1);
#endif
#ifdef _LONG__DOUBLE	      
		  printf("Arnoldi: eigenvalue               = %Le + %Le i\n", (h[i-1+(i-1)*ss]+h[i+i*ss])/2, sqrt(-D)/2);
#else
		  printf("Arnoldi: eigenvalue               = %e + %e i\n", (h[i-1+(i-1)*ss]+h[i+i*ss])/2, sqrt(-D)/2);
#endif
#ifdef _LONG__LONG	      
		  printf("Arnoldi: mode number              = %lld\n",i);
#else
		  printf("Arnoldi: mode number              = %d\n",i);
#endif
#ifdef _LONG__DOUBLE	      	      
		  printf("Arnoldi: eigenvalue               = %Le - %Le i\n", (h[i-1+(i-1)*ss]+h[i+i*ss])/2, sqrt(-D)/2);
#else
		  printf("Arnoldi: eigenvalue               = %e - %e i\n", (h[i-1+(i-1)*ss]+h[i+i*ss])/2, sqrt(-D)/2);
#endif	      
		  esolver->evalue[i-1] = (h[i-1+(i-1)*ss]+h[i+i*ss])/2;
		  esolver->evalue[i]   = (h[i-1+(i-1)*ss]+h[i+i*ss])/2;	      
		  i=i+1;
		}
	      else
		{
#ifdef _LONG__LONG	      	      
		  printf("Arnoldi: mode number              = %lld\n",i-1);
#else
		  printf("Arnoldi: mode number              = %d\n",i-1);
#endif
#ifdef _LONG__DOUBLE	      	      	      
		  printf("Arnoldi: eigenvalue               = %Le\n",h[i-1+(i-1)*ss]);
#else
		  printf("Arnoldi: eigenvalue               = %e\n",h[i-1+(i-1)*ss]);
#endif	      
		  esolver->evalue[i-1] = h[i-1+(i-1)*ss];
		}
	    }
	}
      if (i<ss)
	{
#ifdef _LONG__LONG	            
	  printf("Arnoldi: mode number              = %lld\n",i);
#else
	  printf("Arnoldi: mode number              = %d\n",i);
#endif
#ifdef _LONG__DOUBLE	      	      	      	      
	  printf("Arnoldi: eigenvalue               = %Le\n",h[i+i*ss]);
#else
	  printf("Arnoldi: eigenvalue               = %e\n",h[i+i*ss]);
#endif	      
	}

      if( output ) printf("\n");
      if( output ) printf("compute refined (real) eigenpairs, where imaginary parts are currently neglected:\n\n");
  
    }

  lis_esolver_create(&esolver2);
  esolver2->options[LIS_EOPTIONS_ESOLVER] = niesolver;
  esolver2->options[LIS_EOPTIONS_SUBSPACE] = 1;
  esolver2->options[LIS_EOPTIONS_MAXITER] = emaxiter;
  esolver2->options[LIS_EOPTIONS_OUTPUT] = esolver->options[LIS_EOPTIONS_OUTPUT];
  esolver2->params[LIS_EPARAMS_RESID - LIS_EOPTIONS_LEN] = tol;
  esolver2->eprecision = LIS_PRECISION_QUAD;

  /* compute refined (real) eigenpairs, where imaginary parts are currently neglected */

  for (i=0;i<ss;i++)
    {
      lis_vector_duplicate(A, &esolver->evector[i]); 
      esolver2->lshift = -(esolver->evalue[i]);
      lis_esolve(A, esolver->evector[i], &evalue, esolver2);
      lis_esolver_work_destroy(esolver2); 
      esolver->evalue[i] = evalue - esolver2->lshift;
      esolver->iter[i] = esolver2->iter[0];            
      esolver->resid[i] = esolver2->resid[0];

      if (i==0) 
	{
	  evalue0 = esolver->evalue[0];
	  iter0 = esolver2->iter[0];
	  resid0 = esolver2->resid[0];
	  if( output & LIS_EPRINT_MEM ) 
	    {
	      for (ic=0;ic<iter0+1;ic++)
		{
		  esolver->rhistory[ic] = esolver2->rhistory[ic]; 
		}
	    }
	  esolver->ptime = esolver2->ptime;
	  esolver->itime = esolver2->itime;
	  esolver->p_c_time = esolver2->p_c_time;
	  esolver->p_i_time = esolver2->p_i_time;
	}

      if (A->my_rank==0) 
	{

#ifdef _LONG__LONG
	  if( output ) printf("Arnoldi: mode number          = %lld\n", i);
#else
	  if( output ) printf("Arnoldi: mode number          = %d\n", i);
#endif
#ifdef _LONG__DOUBLE
	  if( output ) printf("Arnoldi: eigenvalue           = %Le\n", esolver->evalue[i]);
#else
	  if( output ) printf("Arnoldi: eigenvalue           = %e\n", esolver->evalue[i]);
#endif
#ifdef _LONG__LONG
	  if( output ) printf("Arnoldi: number of iterations = %lld\n",esolver2->iter[0]);
#else
	  if( output ) printf("Arnoldi: number of iterations = %d\n",esolver2->iter[0]);
#endif
#ifdef _LONG__DOUBLE
	  if( output ) printf("Arnoldi: relative residual    = %Le\n\n",esolver2->resid[0]);
#else
	  if( output ) printf("Arnoldi: relative residual    = %e\n\n",esolver2->resid[0]);
#endif
	}
    }

  esolver->evalue[0] = evalue0; 
  esolver->iter[0] = iter0;
  esolver->resid[0] = resid0;

  lis_vector_copy(esolver->evector[0], esolver->x);

  lis_esolver_destroy(esolver2); 

  lis_free(h); 
  lis_free(hq);
  lis_free(hr);

  lis_solver_destroy(solver);

  LIS_DEBUG_FUNC_OUT;
  return LIS_SUCCESS;
}