コード例 #1
0
LIS_INT lis_solver_set_shadowresidual(LIS_SOLVER solver, LIS_VECTOR r0, LIS_VECTOR rs0)
{
    unsigned long init[4]={0x123, 0x234, 0x345, 0x456}, length=4;
	LIS_INT i,n,resid;

	LIS_DEBUG_FUNC_IN;

	resid = solver->options[LIS_OPTIONS_INIT_SHADOW_RESID];
	if( resid==LIS_RANDOM )
	{
		n     = solver->A->n;
		init_by_array(init, length);

		#ifdef USE_QUAD_PRECISION
		if( solver->precision==LIS_PRECISION_DEFAULT )
		#endif
		{
			#ifdef _OPENMP
			#pragma omp parallel for private(i)
			#endif
			for(i=0;i<n;i++)
			{
				rs0->value[i] = genrand_real1();
			}
		}
		#ifdef USE_QUAD_PRECISION
		else
		{
			#ifdef _OPENMP
			#pragma omp parallel for private(i)
			#endif
			for(i=0;i<n;i++)
			{
				rs0->value[i]    = genrand_real1();
				rs0->value_lo[i] = 0.0;
			}
		}
		#endif
	}
	else
	{
		#ifdef USE_QUAD_PRECISION
		if( solver->precision==LIS_PRECISION_DEFAULT )
		#endif
		{
			lis_vector_copy(r0,rs0);
		}
		#ifdef USE_QUAD_PRECISION
		else
		{
			lis_vector_copyex_mm(r0,rs0);
		}
		#endif
	}

	LIS_DEBUG_FUNC_OUT;
	return LIS_SUCCESS;
}
コード例 #2
0
ファイル: lis_precon.c プロジェクト: iurisegtovich/lis
LIS_INT lis_psolvet_none(LIS_SOLVER solver, LIS_VECTOR b, LIS_VECTOR x)
{
	LIS_DEBUG_FUNC_IN;

	#ifndef USE_QUAD_PRECISION
		lis_vector_copy(b,x);
	#else
		if( solver->precision==LIS_PRECISION_DOUBLE )
		{
			lis_vector_copy(b,x);
		}
		else
		{
			lis_vector_copyex_mm(b,x);
		}
	#endif

	LIS_DEBUG_FUNC_OUT;
	return LIS_SUCCESS;
}
コード例 #3
0
ファイル: lis_solver_orthomin.c プロジェクト: anishida/lis
LIS_INT lis_orthomin_quad(LIS_SOLVER solver)
{
	LIS_Comm comm;  
	LIS_MATRIX A;
	LIS_PRECON M;
	LIS_VECTOR x;
	LIS_VECTOR r, rtld, *p, *ap, *aptld;
	LIS_QUAD *dotsave;
	LIS_QUAD_PTR alpha, beta, tmp, one;

	LIS_REAL bnrm2, nrm2, tol;
	LIS_INT iter,maxiter,output,conv;
	double time,ptime;

	LIS_INT m,l,lmax,ip,ip0;

	LIS_DEBUG_FUNC_IN;

	comm = LIS_COMM_WORLD;

	A       = solver->A;
	M       = solver->precon;
	x       = solver->x;
	maxiter = solver->options[LIS_OPTIONS_MAXITER];
	output  = solver->options[LIS_OPTIONS_OUTPUT];
	m       = solver->options[LIS_OPTIONS_RESTART];
	conv    = solver->options[LIS_OPTIONS_CONV_COND];
	ptime   = 0.0;

	LIS_QUAD_SCALAR_MALLOC(alpha,0,1);
	LIS_QUAD_SCALAR_MALLOC(beta,1,1);
	LIS_QUAD_SCALAR_MALLOC(tmp,3,1);
	LIS_QUAD_SCALAR_MALLOC(one,4,1);

	r       = solver->work[0];
	rtld    = solver->work[1];
	p       = &solver->work[2];
	ap      = &solver->work[  (m+1)+2];
	aptld   = &solver->work[2*(m+1)+2];

	one.hi[0] = 1.0;
	one.lo[0] = 0.0;

	dotsave = (LIS_QUAD *)lis_malloc( sizeof(LIS_QUAD) * (m+1),"lis_orthomin_quad::dotsave" );

	/* Initial Residual */
	if( lis_solver_get_initial_residual(solver,M,r,rtld,&bnrm2) )
	{
		LIS_DEBUG_FUNC_OUT;
		return LIS_SUCCESS;
	}
	tol     = solver->tol;

	
	iter=1;
	while( iter<=maxiter )
	{
		ip = (iter-1) % (m+1);

		/* p[ip] = rtld */
		lis_vector_copyex_mm(rtld,p[ip]);

		/* ap[ip]    = A*p[ip] */
		/* aptld[ip] = M^-1 ap[ip] */
		lis_matvec(A,p[ip],ap[ip]);
		time = lis_wtime();
		lis_psolve(solver, ap[ip], aptld[ip]);
		ptime += lis_wtime()-time;

		lmax = _min(m,iter-1);
		for(l=1;l<=lmax;l++)
		{
			ip0 = (ip+m+1-l) % (m+1);
			/* beta = -<Ar[ip],Ap[ip0]> / <Ap[ip0],Ap[ip0]> */
			lis_vector_dotex_mmm(aptld[ip],aptld[ip0],&beta);
			lis_quad_mul((LIS_QUAD *)beta.hi,(LIS_QUAD *)beta.hi,&dotsave[l-1]);
			lis_quad_minus((LIS_QUAD *)beta.hi);

			lis_vector_axpyex_mmm(beta,p[ip0]    ,p[ip]);
			lis_vector_axpyex_mmm(beta,ap[ip0]   ,ap[ip]);
			lis_vector_axpyex_mmm(beta,aptld[ip0],aptld[ip]);
		}
		for(l=m-1;l>0;l--)
		{
			dotsave[l] = dotsave[l-1];
		}

		lis_vector_dotex_mmm(aptld[ip],aptld[ip],&tmp);
		dotsave[0].hi = tmp.hi[0];
		dotsave[0].lo = tmp.lo[0];
		/* test breakdown */
		if( tmp.hi[0]==0.0 && tmp.lo[0]==0.0 )
		{
			solver->retcode   = LIS_BREAKDOWN;
			solver->iter      = iter;
			solver->resid     = nrm2;
			lis_free(dotsave);
			LIS_DEBUG_FUNC_OUT;
			return LIS_BREAKDOWN;
		}
		lis_quad_div(&dotsave[0],(LIS_QUAD *)one.hi,&dotsave[0]);

		/* alpha = <rtld,Aptld[ip]> */
		lis_vector_dotex_mmm(rtld,aptld[ip],&alpha);
		lis_quad_mul((LIS_QUAD *)alpha.hi,(LIS_QUAD *)alpha.hi,&dotsave[0]);

		lis_vector_axpyex_mmm( alpha,p[ip],x);
		lis_quad_minus((LIS_QUAD *)alpha.hi);
		lis_vector_axpyex_mmm(alpha,ap[ip],r);
		lis_vector_axpyex_mmm(alpha,aptld[ip],rtld);
		lis_quad_minus((LIS_QUAD *)alpha.hi);

		/* convergence check */
		lis_solver_get_residual[conv](r,solver,&nrm2);
		if( output )
		{
			if( output & LIS_PRINT_MEM ) solver->rhistory[iter] = nrm2;
			if( output & LIS_PRINT_OUT ) lis_print_rhistory(comm,iter,nrm2);
		}

		if( tol > nrm2 )
		{
			solver->retcode    = LIS_SUCCESS;
			solver->iter       = iter;
			solver->resid      = nrm2;
			solver->ptime      = ptime;
			lis_free(dotsave);
			LIS_DEBUG_FUNC_OUT;
			return LIS_SUCCESS;
		}

		iter++;
	}

	solver->retcode   = LIS_MAXITER;
	solver->iter      = iter;
	solver->resid     = nrm2;
	lis_free(dotsave);
	LIS_DEBUG_FUNC_OUT;
	return LIS_MAXITER;
}
コード例 #4
0
LIS_INT lis_bicr_quad(LIS_SOLVER solver)
{
  LIS_MATRIX A,At;
  LIS_PRECON M;
  LIS_VECTOR b,x;
  LIS_VECTOR r,rtld, z,ztld,p, ptld, ap, map, az, aptld;
  LIS_QUAD_PTR alpha, beta, rho, rho_old, tmpdot1;
  LIS_REAL   bnrm2, nrm2, tol;
  LIS_INT iter,maxiter,n,output,conv;
  double times,ptimes;

  LIS_DEBUG_FUNC_IN;

  A       = solver->A;
  At      = solver->A;
  M       = solver->precon;
  b       = solver->b;
  x       = solver->x;
  n       = A->n;
  maxiter = solver->options[LIS_OPTIONS_MAXITER];
  output  = solver->options[LIS_OPTIONS_OUTPUT];
  conv    = solver->options[LIS_OPTIONS_CONV_COND];
  ptimes  = 0.0;

  r       = solver->work[0];
  rtld    = solver->work[1];
  z       = solver->work[2];
  ztld    = solver->work[3];
  p       = solver->work[4];
  ptld    = solver->work[5];
  ap      = solver->work[6];
  az      = solver->work[7];
  map     = solver->work[8];
  aptld   = solver->work[9];

  LIS_QUAD_SCALAR_MALLOC(alpha,0,1);
  LIS_QUAD_SCALAR_MALLOC(beta,1,1);
  LIS_QUAD_SCALAR_MALLOC(rho,2,1);
  LIS_QUAD_SCALAR_MALLOC(rho_old,3,1);
  LIS_QUAD_SCALAR_MALLOC(tmpdot1,4,1);

  /* Initial Residual */
  if( lis_solver_get_initial_residual(solver,NULL,NULL,r,&bnrm2) )
  {
    LIS_DEBUG_FUNC_OUT;
    return LIS_SUCCESS;
  }
  tol     = solver->tol;

  lis_solver_set_shadowresidual(solver,r,rtld);

  lis_psolve(solver, r, z);
  lis_psolvet(solver, rtld, ztld);
  lis_vector_copyex_mm(z,p);
  lis_vector_copyex_mm(ztld,ptld);
  LIS_MATVEC(A,z,ap);
  lis_vector_dotex_mmm(ap,ztld,&rho_old);

  for( iter=1; iter<=maxiter; iter++ )
  {
    /* aptld = A^T * ptld */
    /* map   = M^-1 * ap  */
    LIS_MATVECT(A,ptld,aptld);
    times = lis_wtime();
    lis_psolve(solver, ap, map);
    ptimes += lis_wtime()-times;

    /* tmpdot1 = <map,aptld> */
    lis_vector_dotex_mmm(map,aptld,&tmpdot1);
    /* test breakdown */
    if( tmpdot1.hi[0]==0.0 && tmpdot1.lo[0]==0.0 )
    {
      solver->retcode   = LIS_BREAKDOWN;
      solver->iter      = iter;
      solver->resid     = nrm2;
      LIS_DEBUG_FUNC_OUT;
      return LIS_BREAKDOWN;
    }

    /* alpha = rho_old / tmpdot1 */
    /* x     = x + alpha*p   */
    /* r     = r - alpha*ap  */
    lis_quad_div((LIS_QUAD *)alpha.hi,(LIS_QUAD *)rho_old.hi,(LIS_QUAD *)tmpdot1.hi);
    lis_vector_axpyex_mmm(alpha,p,x);
    lis_quad_minus((LIS_QUAD *)alpha.hi);
    lis_vector_axpyex_mmm(alpha,ap,r);
    /* convergence check */
    lis_solver_get_residual[conv](r,solver,&nrm2);

    if( output )
    {
      if( output & LIS_PRINT_MEM ) solver->residual[iter] = nrm2;
      if( output & LIS_PRINT_OUT && A->my_rank==0 ) lis_print_rhistory(iter,nrm2);
    }

    if( tol >= nrm2 )
    {
      solver->retcode    = LIS_SUCCESS;
      solver->iter       = iter;
      solver->resid      = nrm2;
      solver->ptimes     = ptimes;
      LIS_DEBUG_FUNC_OUT;
      return LIS_SUCCESS;
    }
    
    /* rtld = rtld - alpha*aptld */
    /* z    = z - alpha*map      */
    /* ztld = M^-T * rtld        */
    /* az   = A * z              */
    /* rho = <az,ztld>           */
    lis_vector_axpyex_mmm(alpha,aptld,rtld);
    lis_vector_axpyex_mmm(alpha,map,z);
    times = lis_wtime();
    lis_psolvet(solver, rtld, ztld);
    ptimes += lis_wtime()-times;
    LIS_MATVEC(A,z,az);
    lis_vector_dotex_mmm(az,ztld,&rho);

    /* test breakdown */
    if( rho.hi[0]==0.0 && rho.lo[0]==0.0 )
    {
      solver->retcode   = LIS_BREAKDOWN;
      solver->iter      = iter;
      solver->resid     = nrm2;
      LIS_DEBUG_FUNC_OUT;
      return LIS_BREAKDOWN;
    }

    /* beta = rho / rho_old    */
    /* p    = z    + beta*p    */
    /* ptld = ztld + beta*ptld */
    /* ap   = az   + beta*ap   */
    lis_quad_div((LIS_QUAD *)beta.hi,(LIS_QUAD *)rho.hi,(LIS_QUAD *)rho_old.hi);
    lis_vector_xpayex_mmm(z,beta,p);
    lis_vector_xpayex_mmm(ztld,beta,ptld);
    lis_vector_xpayex_mmm(az,beta,ap);

    rho_old.hi[0] = rho.hi[0];
    rho_old.lo[0] = rho.lo[0];
  }

  solver->retcode   = LIS_MAXITER;
  solver->iter      = iter;
  solver->resid     = nrm2;
  LIS_DEBUG_FUNC_OUT;
  return LIS_MAXITER;
}
コード例 #5
0
ファイル: lis_matrix_csr.c プロジェクト: florianl/lis
LIS_INT lis_matrix_solvet_csr(LIS_MATRIX A, LIS_VECTOR B, LIS_VECTOR X, LIS_INT flag)
{
	LIS_INT i,j,jj,n;
	LIS_SCALAR t;
	LIS_SCALAR *x;
	#ifdef _OPENMP
		LIS_INT is,ie,my_rank,nprocs;
	#endif
	#ifdef USE_QUAD_PRECISION
		LIS_QUAD w1,w2;
		LIS_SCALAR *xl;
	#endif
	LIS_QUAD_DECLAR;

	LIS_DEBUG_FUNC_IN;

	n  = A->n;
	x  = X->value;
	#ifdef USE_QUAD_PRECISION
		xl = X->value_lo;
	#endif

	#ifdef USE_QUAD_PRECISION
		if( B->precision==LIS_PRECISION_DEFAULT )
		{
	#endif
			lis_vector_copy(B,X);
	#ifdef USE_QUAD_PRECISION
		}
		else
		{
			lis_vector_copyex_mm(B,X);
		}
	#endif
	switch(flag)
	{
	case LIS_MATRIX_LOWER:
		for(i=0;i<n;i++)
		{
			x[i]   = x[i] * A->WD->value[i];
			for(j=A->U->ptr[i];j<A->U->ptr[i+1];j++)
			{
				x[A->U->index[j]] -= A->U->value[j] * x[i];
			}
		}
		break;
	case LIS_MATRIX_UPPER:
		for(i=n-1;i>=0;i--)
		{
			x[i]   = x[i] * A->WD->value[i];
			for(j=A->L->ptr[i];j<A->L->ptr[i+1];j++)
			{
				x[A->L->index[j]] -= A->L->value[j] * x[i];
			}
		}
		break;
	case LIS_MATRIX_SSOR:
	#ifdef USE_QUAD_PRECISION
		if( B->precision==LIS_PRECISION_DEFAULT )
		{
	#endif
			#ifdef _OPENMP
				nprocs = omp_get_max_threads();
				#pragma omp parallel private(i,j,jj,t,is,ie,my_rank)
				{
					my_rank = omp_get_thread_num();
					LIS_GET_ISIE(my_rank,nprocs,n,is,ie);
					for(i=is;i<ie;i++)
					{
						t   = x[i] * A->WD->value[i];
						for(j=A->U->ptr[i];j<A->U->ptr[i+1];j++)
						{
							jj = A->U->index[j];
							if( jj<is || jj>=ie ) continue;
							x[jj] -= A->U->value[j] * t;
						}
					}
					for(i=ie-1;i>=is;i--)
					{
						t    = x[i] * A->WD->value[i];
						x[i] = t;
						for(j=A->L->ptr[i];j<A->L->ptr[i+1];j++)
						{
							jj = A->L->index[j];
							if( jj<is ) continue;
							x[jj] -= A->L->value[j] * t;
						}
					}
				}
			#else
				for(i=0;i<n;i++)
				{
					t   = x[i] * A->WD->value[i];
					for(j=A->U->ptr[i];j<A->U->ptr[i+1];j++)
					{
						x[A->U->index[j]] -= A->U->value[j] * t;
					}
				}
				for(i=n-1;i>=0;i--)
				{
					t    = x[i] * A->WD->value[i];
					x[i] = t;
					for(j=A->L->ptr[i];j<A->L->ptr[i+1];j++)
					{
						x[A->L->index[j]] -= A->L->value[j] * t;
					}
				}
			#endif
	#ifdef USE_QUAD_PRECISION
		}
		else
		{
			#ifdef _OPENMP
				nprocs = omp_get_max_threads();
				#ifndef USE_SSE2
					#pragma omp parallel private(i,j,jj,is,ie,w1,my_rank,p1,p2,tq,bhi,blo,chi,clo,sh,sl,th,tl,eh,el)
				#else
					#pragma omp parallel private(i,j,jj,is,ie,w1,my_rank,bh,ch,sh,wh,th,bl,cl,sl,wl,tl,p1,p2,t0,t1,t2,eh)
				#endif
				{
					my_rank = omp_get_thread_num();
					LIS_GET_ISIE(my_rank,nprocs,n,is,ie);
					for(i=is;i<ie;i++)
					{
						#ifndef USE_SSE2
							LIS_QUAD_MULD(w1.hi,w1.lo,x[i],xl[i],A->WD->value[i]);
						#else
							LIS_QUAD_MULD_SSE2(w1.hi,w1.lo,x[i],xl[i],A->WD->value[i]);
						#endif
						/* t   = x[i] * A->WD->value[i]; */
						for(j=A->U->ptr[i];j<A->U->ptr[i+1];j++)
						{
							jj = A->U->index[j];
							if( jj<is || jj>=ie ) continue;
							#ifndef USE_SSE2
								LIS_QUAD_FMAD(x[jj],xl[jj],x[jj],xl[jj],w1.hi,w1.lo,-A->U->value[j]);
							#else
								LIS_QUAD_FMAD_SSE2(x[jj],xl[jj],x[jj],xl[jj],w1.hi,w1.lo,-A->U->value[j]);
							#endif
							/* x[A->U->index[j]] -= A->U->value[j] * t; */
						}
					}
					for(i=ie-1;i>=is;i--)
					{
						#ifndef USE_SSE2
							LIS_QUAD_MULD(w1.hi,w1.lo,x[i],xl[i],A->WD->value[i]);
						#else
							LIS_QUAD_MULD_SSE2(w1.hi,w1.lo,x[i],xl[i],A->WD->value[i]);
						#endif
						x[i]  = w1.hi;
						xl[i] = w1.lo;
						/* t    = x[i] * A->WD->value[i]; */
						/* x[i] = t; */
						for(j=A->L->ptr[i];j<A->L->ptr[i+1];j++)
						{
							jj = A->L->index[j];
							if( jj<is || jj>=ie ) continue;
							#ifndef USE_SSE2
								LIS_QUAD_FMAD(x[jj],xl[jj],x[jj],xl[jj],w1.hi,w1.lo,-A->L->value[j]);
							#else
								LIS_QUAD_FMAD_SSE2(x[jj],xl[jj],x[jj],xl[jj],w1.hi,w1.lo,-A->L->value[j]);
							#endif
							/* x[A->L->index[j]] -= A->L->value[j] * t; */
						}
					}
				}
			#else
				for(i=0;i<n;i++)
				{
					#ifndef USE_SSE2
						LIS_QUAD_MULD(w1.hi,w1.lo,x[i],xl[i],A->WD->value[i]);
					#else
						LIS_QUAD_MULD_SSE2(w1.hi,w1.lo,x[i],xl[i],A->WD->value[i]);
					#endif
					/* t   = x[i] * A->WD->value[i]; */
					for(j=A->U->ptr[i];j<A->U->ptr[i+1];j++)
					{
						jj = A->U->index[j];
						#ifndef USE_SSE2
							LIS_QUAD_FMAD(x[jj],xl[jj],x[jj],xl[jj],w1.hi,w1.lo,-A->U->value[j]);
						#else
							LIS_QUAD_FMAD_SSE2(x[jj],xl[jj],x[jj],xl[jj],w1.hi,w1.lo,-A->U->value[j]);
						#endif
						/* x[A->U->index[j]] -= A->U->value[j] * t; */
					}
				}
				for(i=n-1;i>=0;i--)
				{
					#ifndef USE_SSE2
						LIS_QUAD_MULD(w1.hi,w1.lo,x[i],xl[i],A->WD->value[i]);
					#else
						LIS_QUAD_MULD_SSE2(w1.hi,w1.lo,x[i],xl[i],A->WD->value[i]);
					#endif
					x[i]  = w1.hi;
					xl[i] = w1.lo;
					/* t    = x[i] * A->WD->value[i]; */
					/* x[i] = t; */
					for(j=A->L->ptr[i];j<A->L->ptr[i+1];j++)
					{
						jj = A->L->index[j];
						#ifndef USE_SSE2
							LIS_QUAD_FMAD(x[jj],xl[jj],x[jj],xl[jj],w1.hi,w1.lo,-A->L->value[j]);
						#else
							LIS_QUAD_FMAD_SSE2(x[jj],xl[jj],x[jj],xl[jj],w1.hi,w1.lo,-A->L->value[j]);
						#endif
						/* x[A->L->index[j]] -= A->L->value[j] * t; */
					}
				}
			#endif
		}
	#endif
		break;
	}

	LIS_DEBUG_FUNC_OUT;
	return LIS_SUCCESS;
}
コード例 #6
0
ファイル: lis_precon_ads.c プロジェクト: florianl/lis
LIS_INT lis_psolve_adds(LIS_SOLVER solver, LIS_VECTOR B, LIS_VECTOR X)
{
	LIS_INT i,k,n,np,iter,ptype;
	LIS_SCALAR *b,*x,*w,*r,*rl;
	LIS_VECTOR W,R;
	LIS_PRECON precon;
	LIS_QUAD_DECLAR;

	LIS_DEBUG_FUNC_IN;

	precon = solver->precon;
	n     = precon->A->n;
	np    = precon->A->np;
	W     = precon->work[0];
	R     = precon->work[1];
	b     = B->value;
	x     = X->value;
	w     = W->value;
	r     = R->value;
	rl    = R->value_lo;
	iter  = solver->options[LIS_OPTIONS_ADDS_ITER];
	ptype = solver->options[LIS_OPTIONS_PRECON];

	#ifdef USE_QUAD_PRECISION
	if( solver->precision==LIS_PRECISION_DEFAULT )
	{
	#endif
		lis_vector_set_all(0.0,X);
		lis_vector_copy(B,R);
		for(k=0;k<iter+1;k++)
		{
			for(i=n;i<np;i++)
			{
				r[i] = 0.0;
			}

			lis_psolve_xxx[ptype](solver,R,W);
			#ifdef _OPENMP
			#pragma omp parallel for private(i)
			#endif
			for(i=0;i<n;i++)
			{
				x[i] += w[i];
			}
		
			if(k!=iter)
			{
				lis_matvec(precon->A,X,R);
				#ifdef _OPENMP
				#pragma omp parallel for private(i)
				#endif
				for(i=0;i<n;i++)
				{
					r[i] = b[i] - r[i];
				}
			}
		}
	#ifdef USE_QUAD_PRECISION
		}
		else
		{
			lis_vector_set_allex_nm(0.0,X);
			lis_vector_copyex_mm(B,R);
			for(k=0;k<iter+1;k++)
			{
				for(i=n;i<np;i++)
				{
					r[i] = 0.0;
					rl[i] = 0.0;
				}

				lis_psolve_xxx[ptype](solver,R,W);
				for(i=0;i<n;i++)
				{
					#ifndef USE_SSE2
						LIS_QUAD_ADD(X->value[i],X->value_lo[i],X->value[i],X->value_lo[i],W->value[i],W->value_lo[i]);
					#else
						LIS_QUAD_ADD_SSE2(X->value[i],X->value_lo[i],X->value[i],X->value_lo[i],W->value[i],W->value_lo[i]);
					#endif
	/*				x[i] += w[i];*/
				}
			
				if(k==iter) break;

				lis_matvec(precon->A,X,R);
				for(i=0;i<n;i++)
				{
					#ifndef USE_SSE2
						LIS_QUAD_ADD(R->value[i],R->value_lo[i],B->value[i],B->value_lo[i],-R->value[i],-R->value_lo[i]);
					#else
						LIS_QUAD_ADD_SSE2(R->value[i],R->value_lo[i],B->value[i],B->value_lo[i],-R->value[i],-R->value_lo[i]);
					#endif
	/*				r[i] = b[i] - r[i];*/
				}
			}
		}
	#endif

	LIS_DEBUG_FUNC_OUT;
	return LIS_SUCCESS;
}
コード例 #7
0
LIS_INT lis_psolvet_ilut_csr(LIS_SOLVER solver, LIS_VECTOR B, LIS_VECTOR X)
{
#ifdef _OPENMP
  LIS_INT i,j,jj,n;
  LIS_INT is,ie,my_rank,nprocs;
  LIS_SCALAR *b,*x;
  LIS_MATRIX_ILU L,U;
  LIS_VECTOR D;
  LIS_PRECON  precon;
  LIS_QUAD_DECLAR;
  #ifdef USE_QUAD_PRECISION
    LIS_SCALAR *xl;
  #endif

  LIS_DEBUG_FUNC_IN;

  precon = solver->precon;
  L = precon->L;
  U = precon->U;
  D = precon->D;
  b = B->value;
  x = X->value;
  #ifdef USE_QUAD_PRECISION
    xl = X->value_lo;
  #endif
  n = solver->A->n;
  nprocs = omp_get_max_threads();

  #ifdef USE_QUAD_PRECISION
    if( B->precision==LIS_PRECISION_DEFAULT )
    {
  #endif
      lis_vector_copy(B,X);
      #pragma omp parallel private(i,j,jj,is,ie,my_rank)
      {
        my_rank = omp_get_thread_num();
        LIS_GET_ISIE(my_rank,nprocs,n,is,ie);

        for(i=is;i<ie;i++)
        {
          x[i] = D->value[i]*x[i];
          for(j=0;j<U->nnz[i];j++)
          {
            jj     = U->index[i][j];
            x[jj] -= U->value[i][j] * x[i];
          }
        }
        for(i=ie-1;i>=is;i--)
        {
          for(j=0;j<L->nnz[i];j++)
          {
            jj     = L->index[i][j];
            x[jj] -= L->value[i][j] * x[i];
          }
        }
      }
  #ifdef USE_QUAD_PRECISION
    }
    else
    {
      lis_vector_copyex_mm(B,X);
      nprocs = omp_get_max_threads();
      #ifndef USE_SSE2
        #pragma omp parallel private(i,j,jj,is,ie,my_rank,p1,p2,tq,bhi,blo,chi,clo,sh,sl,th,tl,eh,el)
      #else
        #pragma omp parallel private(i,j,jj,is,ie,my_rank,bh,ch,sh,wh,th,bl,cl,sl,wl,tl,p1,p2,t0,t1,t2,eh)
      #endif
      {
        my_rank = omp_get_thread_num();
        LIS_GET_ISIE(my_rank,nprocs,n,is,ie);

        for(i=is;i<ie;i++)
        {
          #ifndef USE_SSE2
            LIS_QUAD_MULD(x[i],xl[i],x[i],xl[i],D->value[i]);
          #else
            LIS_QUAD_MULD_SSE2(x[i],xl[i],x[i],xl[i],D->value[i]);
          #endif
/*          x[i] = D->value[i]*x[i];*/
          for(j=0;j<U->nnz[i];j++)
          {
            jj     = U->index[i][j];
            #ifndef USE_SSE2
              LIS_QUAD_FMAD(x[jj],xl[jj],x[jj],xl[jj],x[i],xl[i],-U->value[i][j]);
            #else
              LIS_QUAD_FMAD_SSE2(x[jj],xl[jj],x[jj],xl[jj],x[i],xl[i],-U->value[i][j]);
            #endif
/*            x[jj] -= U->value[i][j] * x[i];*/
          }
        }
        for(i=ie-1;i>=is;i--)
        {
          for(j=0;j<L->nnz[i];j++)
          {
            jj     = L->index[i][j];
            #ifndef USE_SSE2
              LIS_QUAD_FMAD(x[jj],xl[jj],x[jj],xl[jj],x[i],xl[i],-L->value[i][j]);
            #else
              LIS_QUAD_FMAD_SSE2(x[jj],xl[jj],x[jj],xl[jj],x[i],xl[i],-L->value[i][j]);
            #endif
/*            x[jj] -= L->value[i][j] * x[i];*/
          }
        }
      }
    }
  #endif

  LIS_DEBUG_FUNC_OUT;
  return LIS_SUCCESS;
#else
  LIS_INT i,j,jj,n;
  LIS_SCALAR *b,*x;
  LIS_MATRIX_ILU L,U;
  LIS_VECTOR D;
  LIS_PRECON  precon;
  LIS_QUAD_DECLAR;
  #ifdef USE_QUAD_PRECISION
    LIS_SCALAR *xl;
  #endif


  LIS_DEBUG_FUNC_IN;

  precon = solver->precon;
  L = precon->L;
  U = precon->U;
  D = precon->D;
  b = B->value;
  x = X->value;
  #ifdef USE_QUAD_PRECISION
    xl = X->value_lo;
  #endif
  n = solver->A->n;

  #ifdef USE_QUAD_PRECISION
    if( B->precision==LIS_PRECISION_DEFAULT )
    {
  #endif
      lis_vector_copy(B,X);
      for(i=0; i<n; i++)
      {
        x[i] = D->value[i]*x[i];
        for(j=0;j<U->nnz[i];j++)
        {
          jj     = U->index[i][j];
          x[jj] -= U->value[i][j] * x[i];
        }
      }
      for(i=n-1; i>=0; i--)
      {
        for(j=0;j<L->nnz[i];j++)
        {
          jj     = L->index[i][j];
          x[jj] -= L->value[i][j] * x[i];
        }
      }
  #ifdef USE_QUAD_PRECISION
    }
    else
    {
      lis_vector_copy(B,X);
      for(i=0; i<n; i++)
      {
        #ifndef USE_SSE2
          LIS_QUAD_MULD(x[i],xl[i],x[i],xl[i],D->value[i]);
        #else
          LIS_QUAD_MULD_SSE2(x[i],xl[i],x[i],xl[i],D->value[i]);
        #endif
/*        x[i] = D->value[i]*x[i];*/
        for(j=0;j<U->nnz[i];j++)
        {
          jj     = U->index[i][j];
          #ifndef USE_SSE2
            LIS_QUAD_FMAD(x[jj],xl[jj],x[jj],xl[jj],x[i],xl[i],-U->value[i][j]);
          #else
            LIS_QUAD_FMAD_SSE2(x[jj],xl[jj],x[jj],xl[jj],x[i],xl[i],-U->value[i][j]);
          #endif
/*          x[jj] -= U->value[i][j] * x[i];*/
        }
      }
      for(i=n-1; i>=0; i--)
      {
        for(j=0;j<L->nnz[i];j++)
        {
          jj     = L->index[i][j];
          #ifndef USE_SSE2
            LIS_QUAD_FMAD(x[jj],xl[jj],x[jj],xl[jj],x[i],xl[i],-L->value[i][j]);
          #else
            LIS_QUAD_FMAD_SSE2(x[jj],xl[jj],x[jj],xl[jj],x[i],xl[i],-L->value[i][j]);
          #endif
/*          x[jj] -= L->value[i][j] * x[i];*/
        }
      }
    }
  #endif

  LIS_DEBUG_FUNC_OUT;
  return LIS_SUCCESS;
#endif
}
コード例 #8
0
ファイル: lis_precon_hybrid.c プロジェクト: florianl/lis
LIS_INT lis_psolvet_hybrid(LIS_SOLVER solver, LIS_VECTOR B, LIS_VECTOR X)
{
	LIS_VECTOR xx;
	LIS_SOLVER solver2;
	LIS_INT	nsolver;
	LIS_PRECON precon;

	/*
	 *  Mx = b
	 *  M  = A
	 */

	LIS_DEBUG_FUNC_IN;

	precon      = solver->precon;
	solver2     = precon->solver;
	xx          = precon->solver->x;
	nsolver     = solver2->options[LIS_OPTIONS_SOLVER];
	solver2->b  = B;
	LIS_MATVEC  = lis_matvect;
	LIS_MATVECT = lis_matvec;

	if( solver2->options[LIS_OPTIONS_INITGUESS_ZEROS] )
	{
		#ifdef USE_QUAD_PRECISION
		if( solver->precision==LIS_PRECISION_DEFAULT )
		{
		#endif
			lis_vector_set_all(0,xx);
		#ifdef USE_QUAD_PRECISION
		}
		else
		{
			lis_vector_set_allex_nm(0,xx);
		}
		#endif
	}
	else
	{
		#ifdef USE_QUAD_PRECISION
		if( solver->precision==LIS_PRECISION_DEFAULT )
		{
		#endif
			lis_vector_copy(B,xx);
		#ifdef USE_QUAD_PRECISION
		}
		else
		{
			lis_vector_copyex_mm(B,xx);
		}
		#endif
	}

	/* execute solver */
	lis_solver_execute[nsolver](solver2);
	#ifdef USE_QUAD_PRECISION
	if( solver->precision==LIS_PRECISION_DEFAULT )
	{
	#endif
		lis_vector_copy(solver2->x,X);
	#ifdef USE_QUAD_PRECISION
	}
	else
	{
		lis_vector_copyex_mm(solver2->x,X);
	}
	#endif
	LIS_MATVEC  = lis_matvec;
	LIS_MATVECT = lis_matvect;


	LIS_DEBUG_FUNC_OUT;
	return LIS_SUCCESS;
}
コード例 #9
0
LIS_INT lis_bicgstab_switch(LIS_SOLVER solver)
{
	LIS_MATRIX A;
	LIS_PRECON M;
	LIS_VECTOR b,x;
	LIS_VECTOR r,rtld, t,p,v, s, phat, shat;
	LIS_QUAD_PTR alpha, beta, omega, rho, rho_old, tmpdot1, tmpdot2;
	LIS_REAL   bnrm2, nrm2, tol, tol2;
	LIS_INT iter,maxiter,n,output,conv;
	LIS_INT iter2,maxiter2;
	double times,ptimes;

	LIS_DEBUG_FUNC_IN;

	A       = solver->A;
	M       = solver->precon;
	b       = solver->b;
	x       = solver->x;
	n       = A->n;
	maxiter  = solver->options[LIS_OPTIONS_MAXITER];
	maxiter2 = solver->options[LIS_OPTIONS_SWITCH_MAXITER];
	output   = solver->options[LIS_OPTIONS_OUTPUT];
	conv    = solver->options[LIS_OPTIONS_CONV_COND];
	tol      = solver->params[LIS_PARAMS_RESID-LIS_OPTIONS_LEN];
	tol2     = solver->params[LIS_PARAMS_SWITCH_RESID-LIS_OPTIONS_LEN];
	ptimes  = 0.0;

	rtld    = solver->work[0];
	r       = solver->work[1];
	s       = solver->work[1];
	t       = solver->work[2];
	p       = solver->work[3];
	v       = solver->work[4];
	phat    = solver->work[5];
	shat    = solver->work[6];

	LIS_QUAD_SCALAR_MALLOC(alpha,0,1);
	LIS_QUAD_SCALAR_MALLOC(beta,1,1);
	LIS_QUAD_SCALAR_MALLOC(rho,2,1);
	LIS_QUAD_SCALAR_MALLOC(rho_old,3,1);
	LIS_QUAD_SCALAR_MALLOC(tmpdot1,4,1);
	LIS_QUAD_SCALAR_MALLOC(omega,6,1);
	LIS_QUAD_SCALAR_MALLOC(tmpdot2,7,1);
	rho_old.hi[0] = 1.0;
	rho_old.lo[0] = 0.0;
	alpha.hi[0] = 1.0;
	alpha.lo[0] = 0.0;
	omega.hi[0] = 1.0;
	omega.lo[0] = 0.0;

	lis_vector_set_allex_nm(0.0, p);
	lis_vector_set_allex_nm(0.0, phat);
	lis_vector_set_allex_nm(0.0, s);
	lis_vector_set_allex_nm(0.0, shat);

	/* Initial Residual */
	if( lis_solver_get_initial_residual(solver,NULL,NULL,r,&bnrm2) )
	{
		LIS_DEBUG_FUNC_OUT;
		return LIS_SUCCESS;
	}
	tol2     = solver->tol_switch;

	lis_solver_set_shadowresidual(solver,r,rtld);

	s->precision = LIS_PRECISION_DEFAULT;
	shat->precision = LIS_PRECISION_DEFAULT;
	p->precision = LIS_PRECISION_DEFAULT;
	phat->precision = LIS_PRECISION_DEFAULT;
	for( iter=1; iter<=maxiter2; iter++ )
	{
			/* rho = <rtld,r> */
			lis_vector_dot(rtld,r,&rho.hi[0]);

			/* test breakdown */
			if( rho.hi[0]==0.0 )
			{
				solver->retcode   = LIS_BREAKDOWN;
				solver->iter      = iter;
				solver->iter2     = iter;
				solver->resid     = nrm2;
				LIS_DEBUG_FUNC_OUT;
				return LIS_BREAKDOWN;
			}

			if( iter==1 )
			{
				lis_vector_copy(r,p);
			}
			else
			{
				/* beta = (rho / rho_old) * (alpha / omega) */
				beta.hi[0] = (rho.hi[0] / rho_old.hi[0]) * (alpha.hi[0] / omega.hi[0]);
		
				/* p = r + beta*(p - omega*v) */
				lis_vector_axpy(-omega.hi[0],v,p);
				lis_vector_xpay(r,beta.hi[0],p);
			}
			
			/* phat = M^-1 * p */
			times = lis_wtime();
			lis_psolve(solver, p, phat);
			ptimes += lis_wtime()-times;

			/* v = A * phat */
			LIS_MATVEC(A,phat,v);

			/* tmpdot1 = <rtld,v> */
			lis_vector_dot(rtld,v,&tmpdot1.hi[0]);
			/* test breakdown */
			/* */
			
			/* alpha = rho / tmpdot1 */
			alpha.hi[0] = rho.hi[0] / tmpdot1.hi[0];
			
			/* s = r - alpha*v */
			lis_vector_axpy(-alpha.hi[0],v,r);

			/* Early check for tolerance */
			lis_solver_get_residual[conv](s,solver,&nrm2);
			if( nrm2 <= tol2 )
			{
				if( output )
				{
					if( output & LIS_PRINT_MEM ) solver->residual[iter] = nrm2;
					if( output & LIS_PRINT_OUT && A->my_rank==0 ) printf("iter: %5d  residual = %e\n", iter, nrm2);
				}

				lis_vector_axpy(alpha.hi[0],phat,x);
				solver->iter       = iter;
				solver->iter2     = iter;
				solver->ptimes     = ptimes;
				break;
			}

			/* shat = M^-1 * s */
			times = lis_wtime();
			lis_psolve(solver, s, shat);
			ptimes += lis_wtime()-times;

			/* t = A * shat */
			LIS_MATVEC(A,shat,t);

			/* tmpdot1 = <t,s> */
			/* tmpdot2 = <t,t> */
			/* omega   = tmpdot1 / tmpdot2 */
			lis_vector_dot(t,s,&tmpdot1.hi[0]);
			lis_vector_dot(t,t,&tmpdot2.hi[0]);
			omega.hi[0]   = tmpdot1.hi[0] / tmpdot2.hi[0];

			/* x = x + alpha*phat + omega*shat */
			lis_vector_axpy(alpha.hi[0],phat,x);
			lis_vector_axpy(omega.hi[0],shat,x);
			
			/* r = s - omega*t */
			lis_vector_axpy(-omega.hi[0],t,r);
			
			/* convergence check */
			lis_solver_get_residual[conv](r,solver,&nrm2);
			if( output )
			{
				if( output & LIS_PRINT_MEM ) solver->residual[iter] = nrm2;
				if( output & LIS_PRINT_OUT && A->my_rank==0 ) printf("iter: %5d  residual = %e\n", iter, nrm2);
			}

			if( nrm2 <= tol2 )
			{
				solver->iter       = iter;
				solver->iter2     = iter;
				solver->ptimes     = ptimes;
				break;
			}
			
			if( omega.hi[0]==0.0 )
			{
				solver->retcode   = LIS_BREAKDOWN;
				solver->iter      = iter;
				solver->iter2     = iter;
				solver->resid     = nrm2;
				LIS_DEBUG_FUNC_OUT;
				return LIS_BREAKDOWN;
			}
			rho_old.hi[0] = rho.hi[0];
	}

	s->precision = LIS_PRECISION_QUAD;
	shat->precision = LIS_PRECISION_QUAD;
	p->precision = LIS_PRECISION_QUAD;
	phat->precision = LIS_PRECISION_QUAD;

	solver->options[LIS_OPTIONS_INITGUESS_ZEROS] = LIS_FALSE;
	lis_vector_copyex_mn(x,solver->xx);
	rho_old.hi[0] = 1.0;
	alpha.hi[0] = 1.0;
	omega.hi[0] = 1.0;

	lis_vector_set_allex_nm(0.0, p);
	lis_vector_set_allex_nm(0.0, phat);
	lis_vector_set_allex_nm(0.0, s);
	lis_vector_set_allex_nm(0.0, shat);

	/* Initial Residual */
	lis_solver_get_initial_residual(solver,NULL,NULL,r,&bnrm2);
	tol     = solver->tol;

	lis_solver_set_shadowresidual(solver,r,rtld);

	for( iter2=iter+1; iter2<=maxiter; iter2++ )
	{
			/* rho = <rtld,r> */
			lis_vector_dotex_mmm(rtld,r,&rho);

			/* test breakdown */
			if( rho.hi[0]==0.0 && rho.lo[0]==0.0 )
			{
				solver->retcode   = LIS_BREAKDOWN;
				solver->iter       = iter2;
				solver->iter2      = iter;
				solver->resid     = nrm2;
				LIS_DEBUG_FUNC_OUT;
				return LIS_BREAKDOWN;
			}

			if( iter2==1 )
			{
				lis_vector_copyex_mm(r,p);
			}
			else
			{
				/* beta = (rho / rho_old) * (alpha / omega) */
				lis_quad_div((LIS_QUAD *)beta.hi,(LIS_QUAD *)rho.hi,(LIS_QUAD *)rho_old.hi);
				lis_quad_div((LIS_QUAD *)tmpdot1.hi,(LIS_QUAD *)alpha.hi,(LIS_QUAD *)omega.hi);
				lis_quad_mul((LIS_QUAD *)beta.hi,(LIS_QUAD *)beta.hi,(LIS_QUAD *)tmpdot1.hi);
		
				/* p = r + beta*(p - omega*v) */
				lis_quad_minus((LIS_QUAD *)omega.hi);
				lis_vector_axpyex_mmm(omega,v,p);
				lis_vector_xpayex_mmm(r,beta,p);
			}
			
			/* phat = M^-1 * p */
			times = lis_wtime();
			lis_psolve(solver, p, phat);
			ptimes += lis_wtime()-times;

			/* v = A * phat */
			LIS_MATVEC(A,phat,v);

			/* tmpdot1 = <rtld,v> */
			lis_vector_dotex_mmm(rtld,v,&tmpdot1);
			/* test breakdown */
			/* */
			
			/* alpha = rho / tmpdot1 */
			lis_quad_div((LIS_QUAD *)alpha.hi,(LIS_QUAD *)rho.hi,(LIS_QUAD *)tmpdot1.hi);
			
			/* s = r - alpha*v */
			lis_quad_minus((LIS_QUAD *)alpha.hi);
			lis_vector_axpyex_mmm(alpha,v,r);

			/* Early check for tolerance */
			lis_solver_get_residual[conv](s,solver,&nrm2);
			if( tol > nrm2 )
			{
				if( output )
				{
					if( output & LIS_PRINT_MEM ) solver->residual[iter2] = nrm2;
					if( output & LIS_PRINT_OUT && A->my_rank==0 ) printf("iter: %5d  residual = %e\n", iter2, nrm2);
				}

				lis_quad_minus((LIS_QUAD *)alpha.hi);
				lis_vector_axpyex_mmm(alpha,phat,x);
				solver->retcode    = LIS_SUCCESS;
				solver->iter       = iter2;
				solver->iter2      = iter;
				solver->resid      = nrm2;
				solver->ptimes     = ptimes;
				LIS_DEBUG_FUNC_OUT;
				return LIS_SUCCESS;
			}

			/* shat = M^-1 * s */
			times = lis_wtime();
			lis_psolve(solver, s, shat);
			ptimes += lis_wtime()-times;

			/* t = A * shat */
			LIS_MATVEC(A,shat,t);

			/* tmpdot1 = <t,s> */
			/* tmpdot2 = <t,t> */
			/* omega   = tmpdot1 / tmpdot2 */
			lis_vector_dotex_mmm(t,s,&tmpdot1);
			lis_vector_dotex_mmm(t,t,&tmpdot2);
			lis_quad_div((LIS_QUAD *)omega.hi,(LIS_QUAD *)tmpdot1.hi,(LIS_QUAD *)tmpdot2.hi);

			/* x = x + alpha*phat + omega*shat */
			lis_quad_minus((LIS_QUAD *)alpha.hi);
			lis_vector_axpyex_mmm(alpha,phat,x);
			lis_vector_axpyex_mmm(omega,shat,x);
			
			/* r = s - omega*t */
			lis_quad_minus((LIS_QUAD *)omega.hi);
			lis_vector_axpyex_mmm(omega,t,r);
			lis_quad_minus((LIS_QUAD *)omega.hi);
			
			/* convergence check */
			lis_solver_get_residual[conv](r,solver,&nrm2);
			if( output )
			{
				if( output & LIS_PRINT_MEM ) solver->residual[iter2] = nrm2;
				if( output & LIS_PRINT_OUT && A->my_rank==0 ) printf("iter: %5d  residual = %e\n", iter2, nrm2);
			}

			if( tol > nrm2 )
			{
				solver->retcode    = LIS_SUCCESS;
				solver->iter       = iter2;
				solver->iter2      = iter;
				solver->resid      = nrm2;
				solver->ptimes     = ptimes;
				LIS_DEBUG_FUNC_OUT;
				return LIS_SUCCESS;
			}
			
			if( omega.hi[0]==0.0 && omega.lo[0]==0.0 )
			{
				solver->retcode   = LIS_BREAKDOWN;
				solver->iter       = iter2;
				solver->iter2      = iter;
				solver->resid     = nrm2;
				LIS_DEBUG_FUNC_OUT;
				return LIS_BREAKDOWN;
			}
			rho_old.hi[0] = rho.hi[0];
			rho_old.lo[0] = rho.lo[0];
	}

	solver->retcode   = LIS_MAXITER;
	solver->iter       = iter2;
	solver->iter2      = iter;
	solver->resid     = nrm2;
	LIS_DEBUG_FUNC_OUT;
	return LIS_MAXITER;
}
コード例 #10
0
LIS_INT lis_bicrstab_quad(LIS_SOLVER solver)
{
	LIS_MATRIX A;
	LIS_PRECON M;
	LIS_VECTOR b,x;
	LIS_VECTOR r,rtld, p, s, ap, ms, map, ams, z;
	LIS_QUAD_PTR alpha, beta, omega, rho, rho_old, tmpdot1, tmpdot2;
	LIS_REAL   bnrm2, nrm2, tol;
	LIS_INT iter,maxiter,n,output,conv;
	double times,ptimes;

	LIS_DEBUG_FUNC_IN;

	A       = solver->A;
	M       = solver->precon;
	b       = solver->b;
	x       = solver->x;
	n       = A->n;
	maxiter = solver->options[LIS_OPTIONS_MAXITER];
	output  = solver->options[LIS_OPTIONS_OUTPUT];
	conv    = solver->options[LIS_OPTIONS_CONV_COND];
	ptimes  = 0.0;

	rtld    = solver->work[0];
	r       = solver->work[1];
	s       = solver->work[2];
	ms      = solver->work[3];
	ams     = solver->work[4];
	p       = solver->work[5];
	ap      = solver->work[6];
	map     = solver->work[7];
	z       = solver->work[8];
	LIS_QUAD_SCALAR_MALLOC(alpha,0,1);
	LIS_QUAD_SCALAR_MALLOC(beta,1,1);
	LIS_QUAD_SCALAR_MALLOC(rho,2,1);
	LIS_QUAD_SCALAR_MALLOC(rho_old,3,1);
	LIS_QUAD_SCALAR_MALLOC(tmpdot1,4,1);
	LIS_QUAD_SCALAR_MALLOC(omega,6,1);
	LIS_QUAD_SCALAR_MALLOC(tmpdot2,7,1);

	/* Initial Residual */
	if( lis_solver_get_initial_residual(solver,NULL,NULL,r,&bnrm2) )
	{
		LIS_DEBUG_FUNC_OUT;
		return LIS_SUCCESS;
	}
	tol     = solver->tol;

	lis_solver_set_shadowresidual(solver,r,p);

	LIS_MATVECT(A,p,rtld);
	times = lis_wtime();
	lis_psolve(solver, r, z);
	ptimes += lis_wtime()-times;
	lis_vector_copyex_mm(z,p);
	lis_vector_dotex_mmm(rtld,z,&rho_old);
	
	for( iter=1; iter<=maxiter; iter++ )
	{
		/* ap      = A * p             */
		/* map     = M^-1 * ap         */
		/* tmpdot1 = <rtld,map>        */
		/* alpha   = rho_old / tmpdot1 */
		/* s       = r - alpha*ap      */
		LIS_MATVEC(A,p,ap);
		times = lis_wtime();
		lis_psolve(solver, ap, map);
		ptimes += lis_wtime()-times;
		lis_vector_dotex_mmm(rtld,map,&tmpdot1);
		lis_quad_div((LIS_QUAD *)alpha.hi,(LIS_QUAD *)rho_old.hi,(LIS_QUAD *)tmpdot1.hi);
		lis_quad_minus((LIS_QUAD *)alpha.hi);
		lis_vector_axpyzex_mmmm(alpha,ap,r,s);

		/* Early check for tolerance */
		lis_solver_get_residual[conv](s,solver,&nrm2);
		if( nrm2 <= tol )
		{
			if( output )
			{
				if( output & LIS_PRINT_MEM ) solver->residual[iter] = nrm2;
				if( output & LIS_PRINT_OUT && A->my_rank==0 ) printf("iter: %5d  residual = %e\n", iter, nrm2);
			}

			lis_quad_minus((LIS_QUAD *)alpha.hi);
			lis_vector_axpyex_mmm(alpha,p,x);
			solver->retcode    = LIS_SUCCESS;
			solver->iter       = iter;
			solver->resid      = nrm2;
			solver->ptimes     = ptimes;
			LIS_DEBUG_FUNC_OUT;
			return LIS_SUCCESS;
		}

		/* ms      = z - alpha*map     */
		/* ams     = A * ms            */
		/* tmpdot1 = <ams,s>           */
		/* tmpdot2 = <ams,ams>         */
		/* omega   = tmpdot1 / tmpdot2 */
		lis_vector_axpyzex_mmmm(alpha,map,z,ms);
		LIS_MATVEC(A,ms,ams);
		lis_vector_dotex_mmm(ams,s,&tmpdot1);
		lis_vector_dotex_mmm(ams,ams,&tmpdot2);
		lis_quad_div((LIS_QUAD *)omega.hi,(LIS_QUAD *)tmpdot1.hi,(LIS_QUAD *)tmpdot2.hi);

		/* x = x + alpha*p  + omega*ms  */
		/* r = s - omega*ams            */
		lis_quad_minus((LIS_QUAD *)alpha.hi);
		lis_vector_axpyex_mmm(alpha,p,x);
		lis_vector_axpyex_mmm(omega,ms,x);
		lis_quad_minus((LIS_QUAD *)omega.hi);
		lis_vector_axpyzex_mmmm(omega,ams,s,r);
		
		/* convergence check */
		lis_solver_get_residual[conv](r,solver,&nrm2);
		if( output )
		{
			if( output & LIS_PRINT_MEM ) solver->residual[iter] = nrm2;
			if( output & LIS_PRINT_OUT && A->my_rank==0 ) printf("iter: %5d  residual = %e\n", iter, nrm2);
		}

		if( tol >= nrm2 )
		{
			solver->retcode    = LIS_SUCCESS;
			solver->iter       = iter;
			solver->resid      = nrm2;
			solver->ptimes     = ptimes;
			LIS_DEBUG_FUNC_OUT;
			return LIS_SUCCESS;
		}
		
		/* z   = M^-1 * r */
		/* rho = <rtld,z> */
		times = lis_wtime();
		lis_psolve(solver, r, z);
		ptimes += lis_wtime()-times;
		lis_vector_dotex_mmm(rtld,z,&rho);
		if( rho.hi[0]==0.0 && rho.lo[0]==0.0 )
		{
			solver->retcode   = LIS_BREAKDOWN;
			solver->iter      = iter;
			solver->resid     = nrm2;
			LIS_DEBUG_FUNC_OUT;
			return LIS_BREAKDOWN;
		}

		/* beta = (rho / rho_old) * (alpha / omega) */
		/* p    = z + beta*(p - omega*map)          */
		lis_quad_minus((LIS_QUAD *)omega.hi);
		lis_quad_div((LIS_QUAD *)beta.hi,(LIS_QUAD *)rho.hi,(LIS_QUAD *)rho_old.hi);
		lis_quad_div((LIS_QUAD *)tmpdot1.hi,(LIS_QUAD *)alpha.hi,(LIS_QUAD *)omega.hi);
		lis_quad_mul((LIS_QUAD *)beta.hi,(LIS_QUAD *)beta.hi,(LIS_QUAD *)tmpdot1.hi);
		lis_quad_minus((LIS_QUAD *)omega.hi);
		lis_vector_axpyex_mmm(omega,map,p);
		lis_vector_xpayex_mmm(z,beta,p);

		rho_old.hi[0] = rho.hi[0];
		rho_old.lo[0] = rho.lo[0];
	}

	solver->retcode   = LIS_MAXITER;
	solver->iter      = iter;
	solver->resid     = nrm2;
	LIS_DEBUG_FUNC_OUT;
	return LIS_MAXITER;
}
コード例 #11
0
ファイル: lis_solver_bicgsafe.c プロジェクト: florianl/lis
LIS_INT lis_bicgsafe_switch(LIS_SOLVER solver)
{
	LIS_MATRIX A;
	LIS_VECTOR x;
	LIS_VECTOR r, rtld, rhat, p, ptld, phat;
	LIS_VECTOR t, ttld, that, t0, t0hat;
	LIS_VECTOR y, w, u, z;
	LIS_QUAD_PTR alpha, beta, rho, rho_old;
	LIS_QUAD_PTR qsi, eta, one;
	LIS_QUAD_PTR tmp, tmpdot[5];
	LIS_REAL bnrm2, nrm2, tol, tol2;
	LIS_INT iter,maxiter,output,conv;
	LIS_INT iter2,maxiter2;
	double time,ptime;


	LIS_DEBUG_FUNC_IN;

	A       = solver->A;
	x       = solver->x;
	maxiter  = solver->options[LIS_OPTIONS_MAXITER];
	maxiter2 = solver->options[LIS_OPTIONS_SWITCH_MAXITER];
	output   = solver->options[LIS_OPTIONS_OUTPUT];
	conv    = solver->options[LIS_OPTIONS_CONV_COND];
	tol      = solver->params[LIS_PARAMS_RESID-LIS_OPTIONS_LEN];
	tol2     = solver->params[LIS_PARAMS_SWITCH_RESID-LIS_OPTIONS_LEN];
	ptime   = 0.0;

	rtld    = solver->work[0];
	r       = solver->work[1];
	rhat    = solver->work[2];
	p       = solver->work[3];
	ptld    = solver->work[4];
	phat    = solver->work[5];
	t       = solver->work[6];
	ttld    = solver->work[7];
	that    = solver->work[8];
	t0      = solver->work[9];
	t0hat   = solver->work[10];
	y       = solver->work[11];
	w       = solver->work[12];
	u       = solver->work[13];
	z       = solver->work[14];

	LIS_QUAD_SCALAR_MALLOC(alpha,0,1);
	LIS_QUAD_SCALAR_MALLOC(beta,1,1);
	LIS_QUAD_SCALAR_MALLOC(rho,2,1);
	LIS_QUAD_SCALAR_MALLOC(rho_old,3,1);
	LIS_QUAD_SCALAR_MALLOC(qsi,4,1);
	LIS_QUAD_SCALAR_MALLOC(eta,5,1);
	LIS_QUAD_SCALAR_MALLOC(tmp,6,1);
	LIS_QUAD_SCALAR_MALLOC(tmpdot[0],7,1);
	LIS_QUAD_SCALAR_MALLOC(tmpdot[1],8,1);
	LIS_QUAD_SCALAR_MALLOC(tmpdot[2],9,1);
	LIS_QUAD_SCALAR_MALLOC(tmpdot[3],10,1);
	LIS_QUAD_SCALAR_MALLOC(tmpdot[4],11,1);
	LIS_QUAD_SCALAR_MALLOC(one,13,1);

	rho_old.hi[0] = 1.0;
	rho_old.lo[0] = 0.0;
	alpha.hi[0] = 1.0;
	alpha.lo[0] = 0.0;
	qsi.hi[0] = 1.0;
	qsi.lo[0] = 0.0;
	one.hi[0] = -1.0;
	one.lo[0] = 0.0;


	/* Initial Residual */
	if( lis_solver_get_initial_residual(solver,NULL,NULL,r,&bnrm2) )
	{
		LIS_DEBUG_FUNC_OUT;
		return LIS_SUCCESS;
	}
	tol2     = solver->tol_switch;

	lis_solver_set_shadowresidual(solver,r,rtld);

	lis_vector_set_allex_nm(0.0, ttld);
	lis_vector_set_allex_nm(0.0, ptld);
	lis_vector_set_allex_nm(0.0, p);
	lis_vector_set_allex_nm(0.0, u);
	lis_vector_set_allex_nm(0.0, t);
	lis_vector_set_allex_nm(0.0, t0);

	for( iter=1; iter<=maxiter2; iter++ )
	{
		/* rho = <rtld,r> */
		lis_vector_dot(rtld,r,&rho.hi[0]);

		/* test breakdown */
		if( rho.hi[0]==0.0 )
		{
			solver->retcode   = LIS_BREAKDOWN;
			solver->iter      = iter;
			solver->iter2     = iter;
			solver->resid     = nrm2;
			LIS_DEBUG_FUNC_OUT;
			return LIS_BREAKDOWN;
		}

		/* beta = (rho / rho_old) * (alpha / qsi) */
		beta.hi[0] = (rho.hi[0] / rho_old.hi[0]) * (alpha.hi[0] / qsi.hi[0]);

		/* w = ttld + beta*ptld */
		lis_vector_axpyz(beta.hi[0],ptld,ttld,w);

		/* rhat = M^-1 * r */
		time = lis_wtime();
		lis_psolve(solver, r, rhat);
		ptime += lis_wtime()-time;

		/* p = rhat + beta*(p - u) */
		lis_vector_axpy(-1,u,p);
		lis_vector_xpay(rhat,beta.hi[0],p);
		
		/* ptld = A * p */
		lis_matvec(A,p,ptld);

		/* tmpdot[0] = <rtld,ptld> */
		lis_vector_dot(rtld,ptld,&tmpdot[0].hi[0]);
		/* test breakdown */
		/* */
		
		/* alpha = rho / tmpdot[0] */
		alpha.hi[0] = rho.hi[0] / tmpdot[0].hi[0];

		/* y = t - r + alpha*(-w + ptld) */
		lis_vector_axpyz(-1,w,ptld,y);
		lis_vector_xpay(t,alpha.hi[0],y);
		lis_vector_axpy(-1,r,y);

		/* t = r - alpha*ptld */
		lis_vector_axpyz(-alpha.hi[0],ptld,r,t);

		/* that  = M^-1 * t */
		/* phat  = M^-1 * ptld */
		/* t0hat = M^-1 * t0 */
		time = lis_wtime();
		lis_psolve(solver, t, that);
		lis_psolve(solver, ptld, phat);
		lis_psolve(solver, t0, t0hat);
		ptime += lis_wtime()-time;

		/* ttld = A * that */
		lis_matvec(A,that,ttld);

		/* tmpdot[0] = <y,y>       */
		/* tmpdot[1] = <ttld,t>    */
		/* tmpdot[2] = <y,t>       */
		/* tmpdot[3] = <ttld,y>    */
		/* tmpdot[4] = <ttld,ttld> */
		lis_vector_dot(y,y,&tmpdot[0].hi[0]);
		lis_vector_dot(ttld,t,&tmpdot[1].hi[0]);
		lis_vector_dot(y,t,&tmpdot[2].hi[0]);
		lis_vector_dot(ttld,y,&tmpdot[3].hi[0]);
		lis_vector_dot(ttld,ttld,&tmpdot[4].hi[0]);
		if(iter==1)
		{
			qsi.hi[0] = tmpdot[1].hi[0] / tmpdot[4].hi[0];
			eta.hi[0] = 0.0;
		}
		else
		{
			tmp.hi[0] = tmpdot[4].hi[0]*tmpdot[0].hi[0]  - tmpdot[3].hi[0]*tmpdot[3].hi[0];
			qsi.hi[0] = (tmpdot[0].hi[0]*tmpdot[1].hi[0] - tmpdot[2].hi[0]*tmpdot[3].hi[0]) / tmp.hi[0];
			eta.hi[0] = (tmpdot[4].hi[0]*tmpdot[2].hi[0] - tmpdot[3].hi[0]*tmpdot[1].hi[0]) / tmp.hi[0];
		}

		/* u = qsi*phat + eta*(t0hat - rhat + beta*u) */
		lis_vector_xpay(t0hat,beta.hi[0],u);
		lis_vector_axpy(-1,rhat,u);
		lis_vector_scale(eta.hi[0],u);
		lis_vector_axpy(qsi.hi[0],phat,u);

		/* z = qsi*rhat + eta*z - alpha*u */
		lis_vector_scale(eta.hi[0],z);
		lis_vector_axpy(qsi.hi[0],rhat,z);
		lis_vector_axpy(-alpha.hi[0],u,z);

		/* x = x + alpha*p + z */
		lis_vector_axpy(alpha.hi[0],p,x);
		lis_vector_axpy(1,z,x);
		
		/* r = t - eta*y - qsi*ttld */
		lis_vector_axpyz(-eta.hi[0],y,t,r);
		lis_vector_axpy(-qsi.hi[0],ttld,r);
		
		/* convergence check */
		lis_solver_get_residual[conv](r,solver,&nrm2);
		if( output )
		{
			if( output & LIS_PRINT_MEM ) solver->rhistory[iter] = nrm2;
			if( output & LIS_PRINT_OUT && A->my_rank==0 ) lis_print_rhistory(iter,nrm2);
		}

		if( tol2 >= nrm2 )
		{
			solver->iter       = iter;
			solver->iter2      = iter;
			solver->ptime      = ptime;
			break;
		}

		lis_vector_copy(t,t0);
		rho_old.hi[0] = rho.hi[0];
	}

	r->precision = LIS_PRECISION_QUAD;
	p->precision = LIS_PRECISION_QUAD;
	t->precision = LIS_PRECISION_QUAD;
	t0->precision = LIS_PRECISION_QUAD;
	ptld->precision = LIS_PRECISION_QUAD;
	that->precision = LIS_PRECISION_QUAD;

	solver->options[LIS_OPTIONS_INITGUESS_ZEROS] = LIS_FALSE;
	lis_vector_copyex_mn(x,solver->xx);

	rho_old.hi[0] = 1.0;
	alpha.hi[0] = 1.0;
	qsi.hi[0] = 1.0;
	one.hi[0] = -1.0;

	/* Initial Residual */
	lis_solver_get_initial_residual(solver,NULL,NULL,r,&bnrm2);
	tol     = solver->tol;

	lis_solver_set_shadowresidual(solver,r,rtld);

	lis_vector_set_allex_nm(0.0, ttld);
	lis_vector_set_allex_nm(0.0, ptld);
	lis_vector_set_allex_nm(0.0, p);
	lis_vector_set_allex_nm(0.0, u);
	lis_vector_set_allex_nm(0.0, t);
	lis_vector_set_allex_nm(0.0, t0);

	for( iter2=iter+1; iter2<=maxiter; iter2++ )
	{
		/* rho = <rtld,r> */
		lis_vector_dotex_mmm(rtld,r,&rho);

		/* test breakdown */
		if( rho.hi[0]==0.0 && rho.lo[0]==0.0 )
		{
			solver->retcode   = LIS_BREAKDOWN;
			solver->iter      = iter2;
			solver->iter2     = iter;
			solver->resid     = nrm2;
			LIS_DEBUG_FUNC_OUT;
			return LIS_BREAKDOWN;
		}

		/* beta = (rho / rho_old) * (alpha / qsi) */
		lis_quad_div((LIS_QUAD *)beta.hi,(LIS_QUAD *)rho.hi,(LIS_QUAD *)rho_old.hi);
		lis_quad_div((LIS_QUAD *)tmp.hi,(LIS_QUAD *)alpha.hi,(LIS_QUAD *)qsi.hi);
		lis_quad_mul((LIS_QUAD *)beta.hi,(LIS_QUAD *)beta.hi,(LIS_QUAD *)tmp.hi);

		/* w = ttld + beta*ptld */
		lis_vector_axpyzex_mmmm(beta,ptld,ttld,w);

		/* rhat = M^-1 * r */
		time = lis_wtime();
		lis_psolve(solver, r, rhat);
		ptime += lis_wtime()-time;

		/* p = rhat + beta*(p - u) */
		lis_vector_axpyex_mmm(one,u,p);
		lis_vector_xpayex_mmm(rhat,beta,p);
		
		/* ptld = A * p */
		lis_matvec(A,p,ptld);

		/* tmpdot[0] = <rtld,ptld> */
		lis_vector_dotex_mmm(rtld,ptld,&tmpdot[0]);
		/* test breakdown */
		/* */
		
		/* alpha = rho / tmpdot[0] */
		lis_quad_div((LIS_QUAD *)alpha.hi,(LIS_QUAD *)rho.hi,(LIS_QUAD *)tmpdot[0].hi);

		/* y = t - r + alpha*(-w + ptld) */
		lis_vector_axpyzex_mmmm(one,w,ptld,y);
		lis_vector_xpayex_mmm(t,alpha,y);
		lis_vector_axpyex_mmm(one,r,y);

		/* t = r - alpha*ptld */
		lis_quad_minus((LIS_QUAD *)alpha.hi);
		lis_vector_axpyzex_mmmm(alpha,ptld,r,t);

		/* that  = M^-1 * t */
		/* phat  = M^-1 * ptld */
		/* t0hat = M^-1 * t0 */
		time = lis_wtime();
		lis_psolve(solver, t, that);
		lis_psolve(solver, ptld, phat);
		lis_psolve(solver, t0, t0hat);
		ptime += lis_wtime()-time;

		/* ttld = A * that */
		lis_matvec(A,that,ttld);

		/* tmpdot[0] = <y,y>       */
		/* tmpdot[1] = <ttld,t>    */
		/* tmpdot[2] = <y,t>       */
		/* tmpdot[3] = <ttld,y>    */
		/* tmpdot[4] = <ttld,ttld> */
		lis_vector_dotex_mmm(y,y,&tmpdot[0]);
		lis_vector_dotex_mmm(ttld,t,&tmpdot[1]);
		lis_vector_dotex_mmm(y,t,&tmpdot[2]);
		lis_vector_dotex_mmm(ttld,y,&tmpdot[3]);
		lis_vector_dotex_mmm(ttld,ttld,&tmpdot[4]);
		if(iter==1)
		{
			lis_quad_div((LIS_QUAD *)qsi.hi,(LIS_QUAD *)tmpdot[1].hi,(LIS_QUAD *)tmpdot[4].hi);
			eta.hi[0] = 0.0;
			eta.lo[0] = 0.0;
		}
		else
		{
			lis_quad_mul((LIS_QUAD *)tmp.hi,(LIS_QUAD *)tmpdot[4].hi,(LIS_QUAD *)tmpdot[0].hi);
			lis_quad_sqr((LIS_QUAD *)qsi.hi,(LIS_QUAD *)tmpdot[3].hi);
			lis_quad_sub((LIS_QUAD *)tmp.hi,(LIS_QUAD *)tmp.hi,(LIS_QUAD *)qsi.hi);

			lis_quad_mul((LIS_QUAD *)qsi.hi,(LIS_QUAD *)tmpdot[0].hi,(LIS_QUAD *)tmpdot[1].hi);
			lis_quad_mul((LIS_QUAD *)eta.hi,(LIS_QUAD *)tmpdot[2].hi,(LIS_QUAD *)tmpdot[3].hi);
			lis_quad_sub((LIS_QUAD *)qsi.hi,(LIS_QUAD *)qsi.hi,(LIS_QUAD *)eta.hi);
			lis_quad_div((LIS_QUAD *)qsi.hi,(LIS_QUAD *)qsi.hi,(LIS_QUAD *)tmp.hi);

			lis_quad_mul((LIS_QUAD *)eta.hi,(LIS_QUAD *)tmpdot[4].hi,(LIS_QUAD *)tmpdot[2].hi);
			lis_quad_mul((LIS_QUAD *)tmpdot[0].hi,(LIS_QUAD *)tmpdot[3].hi,(LIS_QUAD *)tmpdot[1].hi);
			lis_quad_sub((LIS_QUAD *)eta.hi,(LIS_QUAD *)eta.hi,(LIS_QUAD *)tmpdot[0].hi);
			lis_quad_div((LIS_QUAD *)eta.hi,(LIS_QUAD *)eta.hi,(LIS_QUAD *)tmp.hi);
		}

		/* u = qsi*phat + eta*(t0hat - rhat + beta*u) */
		lis_vector_xpayex_mmm(t0hat,beta,u);
		lis_vector_axpyex_mmm(one,rhat,u);
		lis_vector_scaleex_mm(eta,u);
		lis_vector_axpyex_mmm(qsi,phat,u);

		/* z = qsi*rhat + eta*z - alpha*u */
		lis_vector_scaleex_mm(eta,z);
		lis_vector_axpyex_mmm(qsi,rhat,z);
		lis_vector_axpyex_mmm(alpha,u,z);

		/* x = x + alpha*p + z */
		lis_quad_minus((LIS_QUAD *)alpha.hi);
		lis_quad_minus((LIS_QUAD *)one.hi);
		lis_vector_axpyex_mmm(alpha,p,x);
		lis_vector_axpyex_mmm(one,z,x);
		lis_quad_minus((LIS_QUAD *)one.hi);
		
		/* r = t - eta*y - qsi*ttld */
		lis_quad_minus((LIS_QUAD *)eta.hi);
		lis_quad_minus((LIS_QUAD *)qsi.hi);
		lis_vector_axpyzex_mmmm(eta,y,t,r);
		lis_vector_axpyex_mmm(qsi,ttld,r);
		lis_quad_minus((LIS_QUAD *)eta.hi);
		lis_quad_minus((LIS_QUAD *)qsi.hi);
		
		/* convergence check */
		lis_solver_get_residual[conv](r,solver,&nrm2);
		if( output )
		{
			if( output & LIS_PRINT_MEM ) solver->rhistory[iter2] = nrm2;
			if( output & LIS_PRINT_OUT && A->my_rank==0 ) lis_print_rhistory(iter,nrm2);
		}

		if( tol > nrm2 )
		{
			solver->retcode    = LIS_SUCCESS;
			solver->iter       = iter2;
			solver->iter2      = iter;
			solver->resid      = nrm2;
			solver->ptime      = ptime;
			LIS_DEBUG_FUNC_OUT;
			return LIS_SUCCESS;
		}

		lis_vector_copyex_mm(t,t0);
		rho_old.hi[0] = rho.hi[0];
		rho_old.lo[0] = rho.lo[0];
	}
	solver->retcode   = LIS_MAXITER;
	solver->iter      = iter;
	solver->iter2     = iter2;
	solver->resid     = nrm2;
	LIS_DEBUG_FUNC_OUT;
	return LIS_MAXITER;
}
コード例 #12
0
ファイル: lis_solver_bicgsafe.c プロジェクト: florianl/lis
LIS_INT lis_bicgsafe_quad(LIS_SOLVER solver)
{
	LIS_MATRIX A;
	LIS_VECTOR x;
	LIS_VECTOR r, rtld, rhat, p, ptld;
	LIS_VECTOR t, ttld;
	LIS_VECTOR y, v, u, utld, z;
	LIS_QUAD_PTR alpha, beta, rho, rho_old;
	LIS_QUAD_PTR qsi, eta;
	LIS_QUAD_PTR tmp, tmpdot[5],one;
	LIS_REAL bnrm2, nrm2, tol;
	LIS_INT iter,maxiter,output,conv;
	double time,ptime;

	LIS_DEBUG_FUNC_IN;

	A       = solver->A;
	x       = solver->x;
	maxiter = solver->options[LIS_OPTIONS_MAXITER];
	output  = solver->options[LIS_OPTIONS_OUTPUT];
	conv    = solver->options[LIS_OPTIONS_CONV_COND];
	ptime   = 0.0;


	rtld    = solver->work[0];
	r       = solver->work[1];
	rhat    = solver->work[2];
	p       = solver->work[3];
	ptld    = solver->work[4];
	t       = solver->work[5];
	ttld    = solver->work[6];
	y       = solver->work[7];
	v       = solver->work[8];
	u       = solver->work[9];
	z       = solver->work[10];
	utld    = solver->work[11];

	LIS_QUAD_SCALAR_MALLOC(alpha,0,1);
	LIS_QUAD_SCALAR_MALLOC(beta,1,1);
	LIS_QUAD_SCALAR_MALLOC(rho,2,1);
	LIS_QUAD_SCALAR_MALLOC(rho_old,3,1);
	LIS_QUAD_SCALAR_MALLOC(qsi,4,1);
	LIS_QUAD_SCALAR_MALLOC(eta,5,1);
	LIS_QUAD_SCALAR_MALLOC(tmp,6,1);
	LIS_QUAD_SCALAR_MALLOC(tmpdot[0],7,1);
	LIS_QUAD_SCALAR_MALLOC(tmpdot[1],8,1);
	LIS_QUAD_SCALAR_MALLOC(tmpdot[2],9,1);
	LIS_QUAD_SCALAR_MALLOC(tmpdot[3],10,1);
	LIS_QUAD_SCALAR_MALLOC(tmpdot[4],11,1);
	LIS_QUAD_SCALAR_MALLOC(one,13,1);

	rho_old.hi[0] = 1.0;
	rho_old.lo[0] = 0.0;
	alpha.hi[0] = 1.0;
	alpha.lo[0] = 0.0;
	qsi.hi[0] = 1.0;
	qsi.lo[0] = 0.0;
	one.hi[0] = -1.0;
	one.lo[0] = 0.0;


	/* Initial Residual */
	if( lis_solver_get_initial_residual(solver,NULL,NULL,r,&bnrm2) )
	{
		LIS_DEBUG_FUNC_OUT;
		return LIS_SUCCESS;
	}
	tol     = solver->tol;

	lis_solver_set_shadowresidual(solver,r,rtld);

	lis_vector_set_allex_nm(0.0,p);
	lis_vector_set_allex_nm(0.0,u);
	lis_vector_set_allex_nm(0.0,ptld);
	lis_vector_set_allex_nm(0.0,utld);
	
	for( iter=1; iter<=maxiter; iter++ )
	{
		/* rho = <rtld,r> */
		lis_vector_dotex_mmm(rtld,r,&rho);

		/* test breakdown */
		if( rho.hi[0]==0.0 && rho.lo[0]==0.0 )
		{
			solver->retcode   = LIS_BREAKDOWN;
			solver->iter      = iter;
			solver->resid     = nrm2;
			LIS_DEBUG_FUNC_OUT;
			return LIS_BREAKDOWN;
		}

		/* beta = (rho / rho_old) * (alpha / qsi) */
		lis_quad_div((LIS_QUAD *)beta.hi,(LIS_QUAD *)rho.hi,(LIS_QUAD *)rho_old.hi);
		lis_quad_div((LIS_QUAD *)tmp.hi,(LIS_QUAD *)alpha.hi,(LIS_QUAD *)qsi.hi);
		lis_quad_mul((LIS_QUAD *)beta.hi,(LIS_QUAD *)beta.hi,(LIS_QUAD *)tmp.hi);

		/* rhat = M^-1 * r */
		/* v    = A * rhat */
		time = lis_wtime();
		lis_psolve(solver, r, rhat);
		ptime += lis_wtime()-time;
		lis_matvec(A,rhat,v);

		/* p = rhat + beta*(p - u) */
		lis_vector_axpyex_mmm(one,u,p);
		lis_vector_xpayex_mmm(rhat,beta,p);
		
		/* ptld = v + beta*(ptld - utld) */
		lis_vector_axpyex_mmm(one,utld,ptld);
		lis_vector_xpayex_mmm(v,beta,ptld);

		/* tmpdot[0] = <rtld,ptld> */
		lis_vector_dotex_mmm(rtld,ptld,&tmpdot[0]);
		/* test breakdown */
		/* */
		
		/* alpha = rho / tmpdot[0] */
		lis_quad_div((LIS_QUAD *)alpha.hi,(LIS_QUAD *)rho.hi,(LIS_QUAD *)tmpdot[0].hi);


		/* tmpdot[0] = <y,y>       */
		/* tmpdot[1] = <v,r>       */
		/* tmpdot[2] = <y,r>       */
		/* tmpdot[3] = <v,y>       */
		/* tmpdot[4] = <v,v>       */
		lis_vector_dotex_mmm(y,y,&tmpdot[0]);
		lis_vector_dotex_mmm(v,r,&tmpdot[1]);
		lis_vector_dotex_mmm(y,r,&tmpdot[2]);
		lis_vector_dotex_mmm(v,y,&tmpdot[3]);
		lis_vector_dotex_mmm(v,v,&tmpdot[4]);
		if(iter==1)
		{
			lis_quad_div((LIS_QUAD *)qsi.hi,(LIS_QUAD *)tmpdot[1].hi,(LIS_QUAD *)tmpdot[4].hi);
			eta.hi[0] = 0.0;
			eta.lo[0] = 0.0;
		}
		else
		{
			lis_quad_mul((LIS_QUAD *)tmp.hi,(LIS_QUAD *)tmpdot[4].hi,(LIS_QUAD *)tmpdot[0].hi);
			lis_quad_sqr((LIS_QUAD *)qsi.hi,(LIS_QUAD *)tmpdot[3].hi);
			lis_quad_sub((LIS_QUAD *)tmp.hi,(LIS_QUAD *)tmp.hi,(LIS_QUAD *)qsi.hi);

			lis_quad_mul((LIS_QUAD *)qsi.hi,(LIS_QUAD *)tmpdot[0].hi,(LIS_QUAD *)tmpdot[1].hi);
			lis_quad_mul((LIS_QUAD *)eta.hi,(LIS_QUAD *)tmpdot[2].hi,(LIS_QUAD *)tmpdot[3].hi);
			lis_quad_sub((LIS_QUAD *)qsi.hi,(LIS_QUAD *)qsi.hi,(LIS_QUAD *)eta.hi);
			lis_quad_div((LIS_QUAD *)qsi.hi,(LIS_QUAD *)qsi.hi,(LIS_QUAD *)tmp.hi);

			lis_quad_mul((LIS_QUAD *)eta.hi,(LIS_QUAD *)tmpdot[4].hi,(LIS_QUAD *)tmpdot[2].hi);
			lis_quad_mul((LIS_QUAD *)tmpdot[0].hi,(LIS_QUAD *)tmpdot[3].hi,(LIS_QUAD *)tmpdot[1].hi);
			lis_quad_sub((LIS_QUAD *)eta.hi,(LIS_QUAD *)eta.hi,(LIS_QUAD *)tmpdot[0].hi);
			lis_quad_div((LIS_QUAD *)eta.hi,(LIS_QUAD *)eta.hi,(LIS_QUAD *)tmp.hi);
		}

		/* t = qsi*ptld + eta*y */
		lis_vector_copyex_mm(y,t);
		lis_vector_scaleex_mm(eta,t);
		lis_vector_axpyex_mmm(qsi,ptld,t);

		/* ttld  = M^-1 * t */
		time = lis_wtime();
		lis_psolve(solver, t, ttld);
		ptime += lis_wtime()-time;

		/* u    = ttld + eta*beta*u */
		/* utld = A * u             */
		lis_quad_mul((LIS_QUAD *)tmp.hi,(LIS_QUAD *)eta.hi,(LIS_QUAD *)beta.hi);
		lis_vector_xpayex_mmm(ttld,tmp,u);
		lis_matvec(A,u,utld);

		/* z = qsi*rhat + eta*z - alpha*u */
		lis_vector_scaleex_mm(eta,z);
		lis_vector_axpyex_mmm(qsi,rhat,z);
		lis_quad_minus((LIS_QUAD *)alpha.hi);
		lis_vector_axpyex_mmm(alpha,u,z);

		/* y = qsi*v + eta*y - alpha*utld */
		lis_vector_scaleex_mm(eta,y);
		lis_vector_axpyex_mmm(qsi,v,y);
		lis_vector_axpyex_mmm(alpha,utld,y);
		lis_quad_minus((LIS_QUAD *)alpha.hi);

		/* x = x + alpha*p + z */
		lis_vector_axpyex_mmm(alpha,p,x);
		lis_quad_minus((LIS_QUAD *)one.hi);
		lis_vector_axpyex_mmm(one,z,x);
		lis_quad_minus((LIS_QUAD *)one.hi);
		
		/* r = r - alpha*ptld - y */
		lis_quad_minus((LIS_QUAD *)alpha.hi);
		lis_vector_axpyex_mmm(alpha,ptld,r);
		lis_quad_minus((LIS_QUAD *)alpha.hi);
		lis_vector_axpyex_mmm(one,y,r);
		
		/* convergence check */
		lis_solver_get_residual[conv](r,solver,&nrm2);
		if( output )
		{
			if( output & LIS_PRINT_MEM ) solver->rhistory[iter] = nrm2;
			if( output & LIS_PRINT_OUT && A->my_rank==0 ) lis_print_rhistory(iter,nrm2);
		}

		if( tol > nrm2 )
		{
			solver->retcode    = LIS_SUCCESS;
			solver->iter       = iter;
			solver->resid      = nrm2;
			solver->ptime      = ptime;
			LIS_DEBUG_FUNC_OUT;
			return LIS_SUCCESS;
		}

		rho_old.hi[0] = rho.hi[0];
		rho_old.lo[0] = rho.lo[0];
	}

	solver->retcode   = LIS_MAXITER;
	solver->iter      = iter;
	solver->resid     = nrm2;
	LIS_DEBUG_FUNC_OUT;
	return LIS_MAXITER;
}
コード例 #13
0
ファイル: lis_solver_bicgsafe.c プロジェクト: florianl/lis
LIS_INT lis_bicrsafe_quad(LIS_SOLVER solver)
{
	LIS_MATRIX A;
	LIS_VECTOR x;
	LIS_VECTOR r, rtld, artld, mr, amr, p, ap, map;
	LIS_VECTOR y, my, u, au, z;
	LIS_QUAD_PTR alpha, beta, rho, rho_old;
	LIS_QUAD_PTR qsi, eta, one;
	LIS_QUAD_PTR tmp, tmpdot[5];
	LIS_REAL bnrm2, nrm2, tol;
	LIS_INT iter,maxiter,output,conv;
	double time,ptime;

	LIS_DEBUG_FUNC_IN;

	A       = solver->A;
	x       = solver->x;
	maxiter = solver->options[LIS_OPTIONS_MAXITER];
	output  = solver->options[LIS_OPTIONS_OUTPUT];
	conv    = solver->options[LIS_OPTIONS_CONV_COND];
	ptime   = 0.0;

	rtld    = solver->work[0];
	r       = solver->work[1];
	mr      = solver->work[2];
	amr     = solver->work[3];
	p       = solver->work[4];
	ap      = solver->work[5];
	map     = solver->work[6];
	my      = solver->work[7];
	y       = solver->work[8];
	u       = solver->work[9];
	z       = solver->work[10];
	au      = solver->work[11];
	artld   = solver->work[12];

	LIS_QUAD_SCALAR_MALLOC(alpha,0,1);
	LIS_QUAD_SCALAR_MALLOC(beta,1,1);
	LIS_QUAD_SCALAR_MALLOC(rho,2,1);
	LIS_QUAD_SCALAR_MALLOC(rho_old,3,1);
	LIS_QUAD_SCALAR_MALLOC(qsi,4,1);
	LIS_QUAD_SCALAR_MALLOC(eta,5,1);
	LIS_QUAD_SCALAR_MALLOC(tmp,6,1);
	LIS_QUAD_SCALAR_MALLOC(tmpdot[0],7,1);
	LIS_QUAD_SCALAR_MALLOC(tmpdot[1],8,1);
	LIS_QUAD_SCALAR_MALLOC(tmpdot[2],9,1);
	LIS_QUAD_SCALAR_MALLOC(tmpdot[3],10,1);
	LIS_QUAD_SCALAR_MALLOC(tmpdot[4],11,1);
	LIS_QUAD_SCALAR_MALLOC(one,13,1);


	/* Initial Residual */
	if( lis_solver_get_initial_residual(solver,NULL,NULL,r,&bnrm2) )
	{
		LIS_DEBUG_FUNC_OUT;
		return LIS_SUCCESS;
	}
	tol     = solver->tol;

	lis_solver_set_shadowresidual(solver,r,rtld);

	lis_matvect(A,rtld,artld);
	time = lis_wtime();
	lis_psolve(solver, r, mr);
	ptime += lis_wtime()-time;
	lis_matvec(A,mr,amr);
	lis_vector_dotex_mmm(rtld,amr,&rho_old);
	lis_vector_copyex_mm(amr,ap);
	lis_vector_copyex_mm(mr,p);
	one.hi[0] = -1.0;
	one.lo[0] = 0.0;

	
	for( iter=1; iter<=maxiter; iter++ )
	{
		/* map  = M^-1 * ap */
		time = lis_wtime();
		lis_psolve(solver, ap, map);
		ptime += lis_wtime()-time;

		/* tmpdot[0] = <artld,map> */
		/* alpha = rho_old / tmpdot[0] */
		lis_vector_dotex_mmm(artld,map,&tmpdot[0]);
		lis_quad_div((LIS_QUAD *)alpha.hi,(LIS_QUAD *)rho_old.hi,(LIS_QUAD *)tmpdot[0].hi);


		/* tmpdot[0] = <y,y>           */
		/* tmpdot[1] = <amr,r>         */
		/* tmpdot[2] = <y,r>           */
		/* tmpdot[3] = <amr,y>         */
		/* tmpdot[4] = <amr,amr>       */
		lis_vector_dotex_mmm(y,y,&tmpdot[0]);
		lis_vector_dotex_mmm(amr,r,&tmpdot[1]);
		lis_vector_dotex_mmm(y,r,&tmpdot[2]);
		lis_vector_dotex_mmm(amr,y,&tmpdot[3]);
		lis_vector_dotex_mmm(amr,amr,&tmpdot[4]);
		if(iter==1)
		{
			lis_quad_div((LIS_QUAD *)qsi.hi,(LIS_QUAD *)tmpdot[1].hi,(LIS_QUAD *)tmpdot[4].hi);
			eta.hi[0] = 0.0;
			eta.lo[0] = 0.0;
		}
		else
		{
			lis_quad_mul((LIS_QUAD *)tmp.hi,(LIS_QUAD *)tmpdot[4].hi,(LIS_QUAD *)tmpdot[0].hi);
			lis_quad_sqr((LIS_QUAD *)qsi.hi,(LIS_QUAD *)tmpdot[3].hi);
			lis_quad_sub((LIS_QUAD *)tmp.hi,(LIS_QUAD *)tmp.hi,(LIS_QUAD *)qsi.hi);

			lis_quad_mul((LIS_QUAD *)qsi.hi,(LIS_QUAD *)tmpdot[0].hi,(LIS_QUAD *)tmpdot[1].hi);
			lis_quad_mul((LIS_QUAD *)eta.hi,(LIS_QUAD *)tmpdot[2].hi,(LIS_QUAD *)tmpdot[3].hi);
			lis_quad_sub((LIS_QUAD *)qsi.hi,(LIS_QUAD *)qsi.hi,(LIS_QUAD *)eta.hi);
			lis_quad_div((LIS_QUAD *)qsi.hi,(LIS_QUAD *)qsi.hi,(LIS_QUAD *)tmp.hi);

			lis_quad_mul((LIS_QUAD *)eta.hi,(LIS_QUAD *)tmpdot[4].hi,(LIS_QUAD *)tmpdot[2].hi);
			lis_quad_mul((LIS_QUAD *)tmpdot[0].hi,(LIS_QUAD *)tmpdot[3].hi,(LIS_QUAD *)tmpdot[1].hi);
			lis_quad_sub((LIS_QUAD *)eta.hi,(LIS_QUAD *)eta.hi,(LIS_QUAD *)tmpdot[0].hi);
			lis_quad_div((LIS_QUAD *)eta.hi,(LIS_QUAD *)eta.hi,(LIS_QUAD *)tmp.hi);
		}

		/* u    = qsi*map + eta*my + eta*beta*u */
		/* au   = A * u                         */
		lis_quad_mul((LIS_QUAD *)tmp.hi,(LIS_QUAD *)eta.hi,(LIS_QUAD *)beta.hi);
		lis_vector_scaleex_mm(tmp,u);
		lis_vector_axpyex_mmm(qsi,map,u);
		lis_vector_axpyex_mmm(eta,my,u);
		lis_matvec(A,u,au);

		/* z = qsi*mr + eta*z - alpha*u */
		lis_vector_scaleex_mm(eta,z);
		lis_vector_axpyex_mmm(qsi,mr,z);
		lis_quad_minus((LIS_QUAD *)alpha.hi);
		lis_vector_axpyex_mmm(alpha,u,z);

		/* y  = qsi*amr + eta*y - alpha*au */
		/* my = M^-1 * y */
		lis_vector_scaleex_mm(eta,y);
		lis_vector_axpyex_mmm(qsi,amr,y);
		lis_vector_axpyex_mmm(alpha,au,y);
		time = lis_wtime();
		lis_psolve(solver, y, my);
		ptime += lis_wtime()-time;

		/* x = x + alpha*p + z */
		lis_quad_minus((LIS_QUAD *)alpha.hi);
		lis_vector_axpyex_mmm(alpha,p,x);
		lis_quad_minus((LIS_QUAD *)one.hi);
		lis_vector_axpyex_mmm(one,z,x);
		
		/* r = r - alpha*ap - y */
		lis_quad_minus((LIS_QUAD *)alpha.hi);
		lis_quad_minus((LIS_QUAD *)one.hi);
		lis_vector_axpyex_mmm(alpha,ap,r);
		lis_vector_axpyex_mmm(one,y,r);
		
		/* convergence check */
		lis_solver_get_residual[conv](r,solver,&nrm2);
		if( output )
		{
			if( output & LIS_PRINT_MEM ) solver->rhistory[iter] = nrm2;
			if( output & LIS_PRINT_OUT && A->my_rank==0 ) lis_print_rhistory(iter,nrm2);
		}

		if( tol >= nrm2 )
		{
			solver->retcode    = LIS_SUCCESS;
			solver->iter       = iter;
			solver->resid      = nrm2;
			solver->ptime      = ptime;
			LIS_DEBUG_FUNC_OUT;
			return LIS_SUCCESS;
		}

		/* mr  = mr - alpha*map - my */
		/* amr = A * mr              */
		/* rho = <rtld,amr> */
		lis_vector_axpyex_mmm(alpha,map,mr);
		lis_vector_axpyex_mmm(one,my,mr);
		lis_matvec(A,mr,amr);
		lis_vector_dotex_mmm(rtld,amr,&rho);
		if( rho.hi[0]==0.0 && rho.lo[0]==0.0 )
		{
			solver->retcode   = LIS_BREAKDOWN;
			solver->iter      = iter;
			solver->resid     = nrm2;
			LIS_DEBUG_FUNC_OUT;
			return LIS_BREAKDOWN;
		}

		/* beta = (rho / rho_old) * (alpha / qsi) */
		lis_quad_minus((LIS_QUAD *)alpha.hi);
		lis_quad_div((LIS_QUAD *)beta.hi,(LIS_QUAD *)rho.hi,(LIS_QUAD *)rho_old.hi);
		lis_quad_div((LIS_QUAD *)tmp.hi,(LIS_QUAD *)alpha.hi,(LIS_QUAD *)qsi.hi);
		lis_quad_mul((LIS_QUAD *)beta.hi,(LIS_QUAD *)beta.hi,(LIS_QUAD *)tmp.hi);


		/* p  = mr + beta*(p - u)    */
		/* ap = amr + beta*(ap - au) */
		lis_vector_axpyex_mmm(one,u,p);
		lis_vector_xpayex_mmm(mr,beta,p);
		lis_vector_axpyex_mmm(one,au,ap);
		lis_vector_xpayex_mmm(amr,beta,ap);

		rho_old.hi[0] = rho.hi[0];
		rho_old.lo[0] = rho.lo[0];
	}

	solver->retcode   = LIS_MAXITER;
	solver->iter      = iter;
	solver->resid     = nrm2;
	LIS_DEBUG_FUNC_OUT;
	return LIS_MAXITER;
}