예제 #1
0
LIS_INT lis_matrix_malloc_csr(LIS_INT n, LIS_INT nnz, LIS_INT **ptr, LIS_INT **index, LIS_SCALAR **value)
{
	LIS_DEBUG_FUNC_IN;

	*ptr     = NULL;
	*index   = NULL;
	*value   = NULL;

	*ptr = (LIS_INT *)lis_malloc( (n+1)*sizeof(LIS_INT),"lis_matrix_malloc_csr::ptr" );
	if( *ptr==NULL )
	{
		LIS_SETERR_MEM((n+1)*sizeof(LIS_INT));
		lis_free2(3,*ptr,*index,*value);
		return LIS_OUT_OF_MEMORY;
	}
	*index = (LIS_INT *)lis_malloc( nnz*sizeof(LIS_INT),"lis_matrix_malloc_csr::index" );
	if( *index==NULL )
	{
		LIS_SETERR_MEM(nnz*sizeof(LIS_INT));
		lis_free2(3,*ptr,*index,*value);
		return LIS_OUT_OF_MEMORY;
	}
	*value = (LIS_SCALAR *)lis_malloc( nnz*sizeof(LIS_SCALAR),"lis_matrix_malloc_csr::value" );
	if( *value==NULL )
	{
		LIS_SETERR_MEM(nnz*sizeof(LIS_SCALAR));
		lis_free2(3,*ptr,*index,*value);
		return LIS_OUT_OF_MEMORY;
	}
	LIS_DEBUG_FUNC_OUT;
	return LIS_SUCCESS;
}
LIS_INT lis_matrix_ilu_destroy(LIS_MATRIX_ILU A)
{
	LIS_INT i,j;

	LIS_DEBUG_FUNC_IN;

	if( lis_is_malloc(A) )
	{
		if( A->bsz )
		{
			for(i=0;i<A->n;i++)
			{
				free(A->index[i]);
				for(j=0;j<A->nnz[i];j++)
				{
					free(A->values[i][j]);
				}
				if( A->nnz[i]>0 ) free(A->values[i]);
			}
			lis_free2(5,A->bsz,A->nnz,A->index,A->values,A->nnz_ma);
		}
		else
		{
			for(i=0;i<A->n;i++)
			{
				if( A->nnz[i]>0 )
				{
					free(A->index[i]);
					free(A->value[i]);
				}
			}
			lis_free2(4,A->nnz,A->index,A->value,A->nnz_ma);
		}
		lis_free(A);
	}

	LIS_DEBUG_FUNC_OUT;
	return LIS_SUCCESS;
}
예제 #3
0
void lis_sort_jds(int is, int ie, int maxnzr, int *i1, int *i2)
{
	int i,j;
	int *iw,*iw2;

	iw  = (int *)lis_malloc((maxnzr+2)*sizeof(int),"lis_sort_jds::iw");
	iw2 = (int *)lis_malloc((maxnzr+2)*sizeof(int),"lis_sort_jds::iw2");

	#ifdef USE_VEC_COMP
	#pragma cdir nodep
	#endif
	for(i=0;i<maxnzr+2;i++)
	{
		iw[i] = 0;
	}
	for(i=is;i<ie;i++)
	{
		iw[(maxnzr - i1[i])+1]++;
	}
	iw[0] = is;
	for(i=0;i<maxnzr+1;i++)
	{
		iw[i+1] += iw[i];
	}
	#ifdef USE_VEC_COMP
	#pragma cdir nodep
	#endif
	for(i=0;i<maxnzr+2;i++)
	{
		iw2[i] = iw[i];
	}

	for(i=is;i<ie;i++)
	{
		i2[iw[maxnzr - i1[i]]] = i;
		iw[maxnzr - i1[i]]++;
	}
	for(i=0;i<maxnzr+1;i++)
	{
		#ifdef USE_VEC_COMP
		#pragma cdir nodep
		#endif
		for(j=iw2[i];j<iw2[i+1];j++)
		{
			i1[j] = maxnzr - i;
		}
	}
	lis_free2(2,iw,iw2);
}
예제 #4
0
void lis_sort_jad(LIS_INT is, LIS_INT ie, LIS_INT maxnzr, LIS_INT *i1, LIS_INT *i2)
{
  LIS_INT i,j;
  LIS_INT *iw,*iw2;

  iw  = (LIS_INT *)lis_malloc((maxnzr+2)*sizeof(LIS_INT),"lis_sort_jad::iw");
  iw2 = (LIS_INT *)lis_malloc((maxnzr+2)*sizeof(LIS_INT),"lis_sort_jad::iw2");

  #ifdef USE_VEC_COMP
  #pragma cdir nodep
  #endif
  for(i=0;i<maxnzr+2;i++)
  {
    iw[i] = 0;
  }
  for(i=is;i<ie;i++)
  {
    iw[(maxnzr - i1[i])+1]++;
  }
  iw[0] = is;
  for(i=0;i<maxnzr+1;i++)
  {
    iw[i+1] += iw[i];
  }
  #ifdef USE_VEC_COMP
  #pragma cdir nodep
  #endif
  for(i=0;i<maxnzr+2;i++)
  {
    iw2[i] = iw[i];
  }

  for(i=is;i<ie;i++)
  {
    i2[iw[maxnzr - i1[i]]] = i;
    iw[maxnzr - i1[i]]++;
  }
  for(i=0;i<maxnzr+1;i++)
  {
    #ifdef USE_VEC_COMP
    #pragma cdir nodep
    #endif
    for(j=iw2[i];j<iw2[i+1];j++)
    {
      i1[j] = maxnzr - i;
    }
  }
  lis_free2(2,iw,iw2);
}
LIS_INT lis_matrix_malloc_dia(LIS_INT n, LIS_INT nnd, LIS_INT **index, LIS_SCALAR **value)
{
  LIS_DEBUG_FUNC_IN;

  *index   = NULL;
  *value   = NULL;

  *index = (LIS_INT *)lis_malloc( n*nnd*sizeof(LIS_INT),"lis_matrix_malloc_dia::index" );
  if( *index==NULL )
  {
    LIS_SETERR_MEM(n*nnd*sizeof(LIS_INT));
    lis_free2(2,*index,*value);
    return LIS_OUT_OF_MEMORY;
  }
  *value = (LIS_SCALAR *)lis_malloc( n*nnd*sizeof(LIS_SCALAR),"lis_matrix_malloc_dia::value" );
  if( *value==NULL )
  {
    LIS_SETERR_MEM(n*nnd*sizeof(LIS_SCALAR));
    lis_free2(2,*index,*value);
    return LIS_OUT_OF_MEMORY;
  }
  LIS_DEBUG_FUNC_OUT;
  return LIS_SUCCESS;
}
예제 #6
0
LIS_INT lis_args_free(LIS_ARGS args)
{
  LIS_ARGS arg,t;

  LIS_DEBUG_FUNC_IN;

  arg = args->next;
  
  while( arg!=args )
  {
    t             = arg;
    arg           = arg->next;

    lis_free2(2,t->arg1,t->arg2);
    t->next->prev = t->prev;
    t->prev->next = t->next;
    lis_free(t);
  }
  if (args) lis_free(args);

  LIS_DEBUG_FUNC_OUT;
  return LIS_SUCCESS;
}
예제 #7
0
LIS_INT lis_matrix_convert_rco2csr(LIS_MATRIX Ain, LIS_MATRIX Aout)
{
	LIS_INT i,j,k,n,nnz,err;
	LIS_INT *ptr,*index;
	LIS_SCALAR *value;

	LIS_DEBUG_FUNC_IN;

	ptr     = NULL;
	index   = NULL;
	value   = NULL;

	n       = Ain->n;
	nnz     = 0;
	#ifdef _OPENMP
	#pragma omp parallel for reduction(+:nnz) private(i)
	#endif
	for(i=0;i<n;i++)
	{
		nnz += Ain->w_row[i];
	}

	err = lis_matrix_malloc_csr(n,nnz,&ptr,&index,&value);
	if( err )
	{
		return err;
	}

	#ifdef _NUMA
		#pragma omp parallel for private(i)
		for(i=0;i<n+1;i++) ptr[i] = 0;
	#else
		ptr[0] = 0;
	#endif
	for(i=0;i<n;i++)
	{
		ptr[i+1] = ptr[i] + Ain->w_row[i];
	}
	#ifdef _OPENMP
	#pragma omp parallel for private(i,j,k)
	#endif
	for(i=0;i<n;i++)
	{
		k = ptr[i];
		for(j=0;j<Ain->w_row[i];j++)
		{
			index[k] = Ain->w_index[i][j];
			value[k] = Ain->w_value[i][j];
			k++;
		}
	}

	err = lis_matrix_set_csr(nnz,ptr,index,value,Aout);
	if( err )
	{
		lis_free2(3,ptr,index,value);
		return err;
	}
	err = lis_matrix_assemble(Aout);
	if( err )
	{
		lis_matrix_storage_destroy(Aout);
		return err;
	}

	LIS_DEBUG_FUNC_OUT;
	return LIS_SUCCESS;
}
예제 #8
0
LIS_INT lis_precon_create_sainv_csr(LIS_SOLVER solver, LIS_PRECON precon)
{
	LIS_INT	err;
	LIS_INT	i,j,k,ii,jj,len,lfil;
	LIS_INT	n,nnz,annz,cl,cu,cc,m;
	LIS_INT	*wu,*wl,*il,*iu,*ic,*pc;
	LIS_SCALAR t,v;
	LIS_REAL tol,tol_dd,nrm;
	LIS_SCALAR *d,*r,*c,*l,*u,*tmp;
	LIS_MATRIX A,B;
	LIS_MATRIX_ILU W,Z;
	LIS_VECTOR D;

	LIS_DEBUG_FUNC_IN;


	A      = solver->A;
	n      = A->n;
	nnz    = A->nnz;
	tol    = solver->params[LIS_PARAMS_DROP-LIS_OPTIONS_LEN];
	m      = solver->params[LIS_PARAMS_RATE-LIS_OPTIONS_LEN];
	annz   = 10+A->nnz / A->n;
	lfil   = (LIS_INT)((double)A->nnz/(2.0*n))*m;

	W      = NULL;
	Z      = NULL;
	wu     = NULL;
	wl     = NULL;
	d      = NULL;
	l      = NULL;
	u      = NULL;
	il     = NULL;
	iu     = NULL;

	err = lis_matrix_ilu_create(n,1,&W);
	if( err ) return err;
	err = lis_matrix_ilu_create(n,1,&Z);
	if( err ) return err;
	err = lis_matrix_ilu_setCR(W);
	if( err ) return err;
	err = lis_matrix_ilu_setCR(Z);
	if( err ) return err;
	err = lis_vector_duplicate(A,&D);
	if( err ) return err;
	d = D->value;

	tmp   = (LIS_SCALAR *)lis_malloc(n*sizeof(LIS_SCALAR),"lis_precon_create_sainv_csr::l");
	if( tmp==NULL )
	{
		LIS_SETERR_MEM(n*sizeof(LIS_SCALAR));
		return LIS_OUT_OF_MEMORY;
	}
	r   = (LIS_SCALAR *)lis_malloc(n*sizeof(LIS_SCALAR),"lis_precon_create_sainv_csr::l");
	if( r==NULL )
	{
		LIS_SETERR_MEM(n*sizeof(LIS_SCALAR));
		return LIS_OUT_OF_MEMORY;
	}
	c   = (LIS_SCALAR *)lis_malloc(n*sizeof(LIS_SCALAR),"lis_precon_create_sainv_csr::u");
	if( c==NULL )
	{
		LIS_SETERR_MEM(n*sizeof(LIS_SCALAR));
		return LIS_OUT_OF_MEMORY;
	}
	l   = (LIS_SCALAR *)lis_malloc(n*sizeof(LIS_SCALAR),"lis_precon_create_sainv_csr::l");
	if( l==NULL )
	{
		LIS_SETERR_MEM(n*sizeof(LIS_SCALAR));
		return LIS_OUT_OF_MEMORY;
	}
	u   = (LIS_SCALAR *)lis_malloc(n*sizeof(LIS_SCALAR),"lis_precon_create_sainv_csr::u");
	if( u==NULL )
	{
		LIS_SETERR_MEM(n*sizeof(LIS_SCALAR));
		return LIS_OUT_OF_MEMORY;
	}
	il   = (LIS_INT *)lis_malloc(n*sizeof(LIS_INT),"lis_precon_create_sainv_csr::il");
	if( il==NULL )
	{
		LIS_SETERR_MEM(n*sizeof(LIS_INT));
		return LIS_OUT_OF_MEMORY;
	}
	iu   = (LIS_INT *)lis_malloc(n*sizeof(LIS_INT),"lis_precon_create_sainv_csr::iu");
	if( iu==NULL )
	{
		LIS_SETERR_MEM(n*sizeof(LIS_INT));
		return LIS_OUT_OF_MEMORY;
	}
	ic   = (LIS_INT *)lis_malloc(n*sizeof(LIS_INT),"lis_precon_create_sainv_csr::iu");
	if( ic==NULL )
	{
		LIS_SETERR_MEM(n*sizeof(LIS_INT));
		return LIS_OUT_OF_MEMORY;
	}
	wu   = (LIS_INT *)lis_malloc(n*sizeof(LIS_INT),"lis_precon_create_sainv_csr::ww");
	if( wu==NULL )
	{
		LIS_SETERR_MEM(n*sizeof(LIS_INT));
		return LIS_OUT_OF_MEMORY;
	}
	wl   = (LIS_INT *)lis_malloc(n*sizeof(LIS_INT),"lis_precon_create_sainv_csr::ww");
	if( wl==NULL )
	{
		LIS_SETERR_MEM(n*sizeof(LIS_INT));
		return LIS_OUT_OF_MEMORY;
	}
	pc   = (LIS_INT *)lis_malloc(n*sizeof(LIS_INT),"lis_precon_create_sainv_csr::iu");
	if( pc==NULL )
	{
		LIS_SETERR_MEM(n*sizeof(LIS_INT));
		return LIS_OUT_OF_MEMORY;
	}

	lis_matrix_sort_csr(A);
	err = lis_matrix_duplicate(A,&B);
	if( err ) return err;
	err = lis_matrix_convert_csr2csc(A,B);
	if( err ) return err;

	for(i=0;i<n;i++)
	{
		wu[i] = 0;
		wl[i] = 0;
		pc[i] = A->ptr[i];
	}
	for(i=0; i<n; i++)
	{
		/* nrm_inf(A[i,:]) */
		nrm = 0.0;
		for(j=A->ptr[i];j<A->ptr[i+1];j++)
		{
			nrm = _max(nrm,fabs(A->value[j]));
		}
		tol_dd = nrm * tol;

		/* l = e_i  */
		/* u = e_i  */
		l[i]  = 1.0;
		u[i]  = 1.0;
		il[0] = i;
		iu[0] = i;
		cl    = 1;
		cu    = 1;
		wu[i] = 1;
		wl[i] = 1;
		cc    = 0;

		/* r = e_i^T*A */
		for(j=A->ptr[i];j<A->ptr[i+1];j++)
		{
			jj    = A->index[j];
			r[jj] = A->value[j];
		}
		/* c = A_i = A*e_i */
		for(j=B->ptr[i];j<B->ptr[i+1];j++)
		{
			jj    = B->index[j];
			c[jj] = B->value[j];
		}

	    /* W_i = W_i - (r*Z_j/D_jj)*W_j */
		for(j=0;j<i;j++)
		{
			t = 0.0;
			for(k=0;k<Z->nnz[j];k++)
			{
				t += r[Z->index[j][k]]*Z->value[j][k];
			}
			t = t * d[j];
			if( fabs(t) > tol_dd )
			{
				for(k=0;k<W->nnz[j];k++)
				{
					v      = t * W->value[j][k];
					if( fabs(v) > tol_dd )
					{
						jj     = W->index[j][k];
						if( wl[jj]==1 )
						{
							l[jj] -= v;
						}
						else
						{
							l[jj]    = -v;
							il[cl++] = jj;
							wl[jj]   = 1;
						}
					}
				}
			}
		}

		/* Z_i = Z_i - (W_j^T*c/D_jj)*Z_j */
		for(j=0;j<i;j++)
		{
			t = 0.0;
			for(k=0;k<W->nnz[j];k++)
			{
				t += c[W->index[j][k]]*W->value[j][k];
			}
			t = t * d[j];
			if( fabs(t) > tol_dd )
			{
				for(k=0;k<Z->nnz[j];k++)
				{
					v      = t * Z->value[j][k];
					if( fabs(v) > tol_dd )
					{
						jj     = Z->index[j][k];
						if( wu[jj]==1 )
						{
							u[jj] -= v;
						}
						else
						{
							u[jj]    = -v;
							iu[cu++] = jj;
							wu[jj]   = 1;
						}
					}
				}
			}
		}
/*
		len = _min(lfil,cl);
		for(j=0;j<cl;j++) tmp[j] = fabs(l[il[j]]);
		lis_sort_di(0,cl-1,tmp,il);
		lis_sort_i(0,len-1,il);
		cl = len;
		*/
		/*
		k = cl;
		for(j=0;j<cl;j++)
		{
			if( fabs(l[il[j]])<= tol_dd )
			{
				wl[il[j]] = 0;
				il[j] = n;
				k--;
			}
		}
		lis_sort_i(0,cl-1,il);
		cl = k;
		

		k = cu;
		for(j=0;j<cu;j++)
		{
			if( fabs(u[iu[j]])<= tol_dd )
			{
				wu[iu[j]] = 0;
				iu[j] = n;
				k--;
			}
		}
		lis_sort_i(0,cu-1,iu);
		cu = k;
		*/

		W->nnz[i] = cl;
		if( cl > 0 )
		{
			W->index[i] = (LIS_INT *)malloc(cl*sizeof(LIS_INT));
			W->value[i] = (LIS_SCALAR *)malloc(cl*sizeof(LIS_SCALAR));
			memcpy(W->index[i],il,cl*sizeof(LIS_INT));
			for(j=0;j<cl;j++)
			{
				W->value[i][j] = l[il[j]];
			}
		}
		Z->nnz[i] = cu;
		if( cu > 0 )
		{
			Z->index[i] = (LIS_INT *)malloc(cu*sizeof(LIS_INT));
			Z->value[i] = (LIS_SCALAR *)malloc(cu*sizeof(LIS_SCALAR));
			memcpy(Z->index[i],iu,cu*sizeof(LIS_INT));
			for(j=0;j<cu;j++)
			{
				Z->value[i][j] = u[iu[j]];
			}
		}

		for(j=A->ptr[i];j<A->ptr[i+1];j++) r[A->index[j]] = 0.0;
		for(j=B->ptr[i];j<B->ptr[i+1];j++) c[B->index[j]] = 0.0;
		for(j=0;j<cl;j++)
		{
			wl[il[j]] = 0;
			l[il[j]] = 0.0;
		}
		for(j=0;j<cu;j++)
		{
			wu[iu[j]] = 0;
		}

		/* D_ii = W_i^T * A * Z_i */
		cl = 0;
		for(k=0;k<Z->nnz[i];k++)
		{
			ii = Z->index[i][k];
			for(j=B->ptr[ii];j<B->ptr[ii+1];j++)
			{
				jj     = B->index[j];
				if( wl[jj]==0 )
				{
					l[jj] = B->value[j]*Z->value[i][k];
					wl[jj]   = 1;
					il[cl++] = jj;
				}
				else
				{
					l[jj] += B->value[j]*Z->value[i][k];
				}
			}
		}
		t = 0.0;
		for(j=0;j<W->nnz[i];j++)
		{
			k  = W->index[i][j];
			t += W->value[i][j] * l[k];
		}
		d[i] = 1.0 / t;
		for(j=0;j<cl;j++) wl[il[j]] = 0;

	}

	lis_matrix_destroy(B);
	lis_free2(11,r,c,il,l,wl,iu,u,wu,ic,pc,tmp);


	precon->L  = W;
	precon->U  = Z;
	precon->D  = D;

	LIS_DEBUG_FUNC_OUT;
	return LIS_SUCCESS;
}
예제 #9
0
LIS_INT lis_precon_create_sainv_csr(LIS_SOLVER solver, LIS_PRECON precon)
{
	LIS_INT	err;
	LIS_INT	i,j,k,ii,jj,ik,jk;
	LIS_INT	n,annz,cl,cu;
	LIS_INT	*ww,*il,*iu;
	LIS_SCALAR t,dd;
	LIS_REAL tol,nrm;
	LIS_SCALAR *d,*l,*u;
	LIS_MATRIX A,B;
	LIS_MATRIX_ILU W,Z;
	LIS_VECTOR D;

	LIS_DEBUG_FUNC_IN;


	A      = solver->A;
	n      = A->n;
	tol    = solver->params[LIS_PARAMS_DROP-LIS_OPTIONS_LEN];
	annz   = A->n / 10;

	W      = NULL;
	ww     = NULL;
	d      = NULL;
	l      = NULL;
	u      = NULL;
	il     = NULL;
	iu     = NULL;

	err = lis_matrix_ilu_create(n,1,&W);
	if( err ) return err;
	err = lis_matrix_ilu_create(n,1,&Z);
	if( err ) return err;
	err = lis_matrix_ilu_setCR(W);
	if( err ) return err;
	err = lis_matrix_ilu_setCR(Z);
	if( err ) return err;
	err = lis_vector_duplicate(A,&D);
	if( err ) return err;
	d = D->value;
	err = lis_matrix_ilu_premalloc(annz,W);
	if( err ) return err;
	err = lis_matrix_ilu_premalloc(annz,Z);
	if( err ) return err;
	l   = (LIS_SCALAR *)lis_malloc(n*sizeof(LIS_SCALAR),"lis_precon_create_sainv_csr::l");
	if( l==NULL )
	{
		LIS_SETERR_MEM(n*sizeof(LIS_SCALAR));
		return LIS_OUT_OF_MEMORY;
	}
	u   = (LIS_SCALAR *)lis_malloc(n*sizeof(LIS_SCALAR),"lis_precon_create_sainv_csr::u");
	if( u==NULL )
	{
		LIS_SETERR_MEM(n*sizeof(LIS_SCALAR));
		return LIS_OUT_OF_MEMORY;
	}
	il   = (LIS_INT *)lis_malloc(n*sizeof(LIS_INT),"lis_precon_create_sainv_csr::il");
	if( il==NULL )
	{
		LIS_SETERR_MEM(n*sizeof(LIS_INT));
		return LIS_OUT_OF_MEMORY;
	}
	iu   = (LIS_INT *)lis_malloc(n*sizeof(LIS_INT),"lis_precon_create_sainv_csr::iu");
	if( iu==NULL )
	{
		LIS_SETERR_MEM(n*sizeof(LIS_INT));
		return LIS_OUT_OF_MEMORY;
	}
	ww   = (LIS_INT *)lis_malloc(n*sizeof(LIS_INT),"lis_precon_create_sainv_csr::ww");
	if( ww==NULL )
	{
		LIS_SETERR_MEM(n*sizeof(LIS_INT));
		return LIS_OUT_OF_MEMORY;
	}
	err = lis_matrix_duplicate(A,&B);
	if( err ) return err;
	err = lis_matrix_convert_csr2csc(A,B);
	if( err )
	{
		return err;
	}

	for(i=0;i<n;i++) ww[i] = 0;
	for(i=0;i<n;i++)
	{
		W->value[i][0] = 1.0;
		W->index[i][0] = i;
		W->nnz[i]      = 1;
		Z->value[i][0] = 1.0;
		Z->index[i][0] = i;
		Z->nnz[i]      = 1;
	}
	for(i=0; i<n; i++)
	{
		/* nrm_inf(A[i,:]) */
		nrm = 0.0;
		for(j=A->ptr[i];j<A->ptr[i+1];j++)
		{
			nrm = _max(nrm,fabs(A->value[j]));
		}
		nrm = 1.0/nrm;

		/* l = AZ_i */
		cl = 0;
		memset(l,0,n*sizeof(LIS_SCALAR));
		for(k=0;k<Z->nnz[i];k++)
		{
			ii = Z->index[i][k];
			for(j=B->ptr[ii];j<B->ptr[ii+1];j++)
			{
				jj     = B->index[j];
				if( jj>i )
				{
					l[jj] += B->value[j]*Z->value[i][k];
					if( ww[jj]==0 )
					{
						ww[jj]   = 1;
						il[cl++] = jj;
					}
				}
			}
		}
		for(k=0;k<cl;k++) ww[il[k]] = 0;

		/* u = W_i'A */
		cu = 0;
		memset(u,0,n*sizeof(LIS_SCALAR));
		for(k=0;k<W->nnz[i];k++)
		{
			ii = W->index[i][k];
			for(j=A->ptr[ii];j<A->ptr[ii+1];j++)
			{
				jj     = A->index[j];
				#ifdef USE_MPI
					if( jj>n-1 ) continue;
				#endif
				u[jj] += A->value[j]*W->value[i][k];
				if( jj>i && ww[jj]==0 )
				{
					ww[jj]   = 1;
					iu[cu++] = jj;
				}
			}
		}
		for(k=0;k<cu;k++) ww[iu[k]] = 0;

		/* d_ii = uZ_i or W_i'l  */
		t = 0.0;
		for(k=0;k<Z->nnz[i];k++)
		{
			t += u[Z->index[i][k]]*Z->value[i][k];
		}
		d[i] = 1.0/t;

		/* for j>i, l_j!=0            */
		/* w_j = w_j - (l_j/d_ii)*w_i */
		for(jj=0;jj<cl;jj++)
		{
			j = il[jj];
			dd = l[j]*d[i];
			for(k=0;k<W->nnz[j];k++)
			{
				ww[W->index[j][k]] = k+1;
			}
			for(ik=0;ik<W->nnz[i];ik++)
			{
				jk = ww[W->index[i][ik]];
				if( jk!=0 )
				{
					t = dd*W->value[i][ik];
					if( fabs(t)*nrm > tol )
					{
						W->value[j][jk-1] -= t;
					}
				}
				else
				{
					t = dd*W->value[i][ik];
					if( fabs(t)*nrm > tol )
					{
						if( W->nnz[j] == W->nnz_ma[j] )
						{
							W->nnz_ma[j] += annz;
							err = lis_matrix_ilu_realloc(j,W->nnz_ma[j],W);
							if( err ) return err;
						}
						jk                = W->nnz[j];
						W->index[j][jk] = W->index[i][ik];
						W->value[j][jk] = -t;
						W->nnz[j]++;
					}
				}
			}
			for(k=0;k<W->nnz[j];k++)
			{
				ww[W->index[j][k]] = 0;
			}
		}

		/* for j>i, u_j!=0            */
		/* z_j = z_j - (u_j/d_ii)*z_i */
		for(jj=0;jj<cu;jj++)
		{
			j = iu[jj];
			dd = u[j]*d[i];
			for(k=0;k<Z->nnz[j];k++)
			{
				ww[Z->index[j][k]] = k+1;
			}
			for(ik=0;ik<Z->nnz[i];ik++)
			{
				jk = ww[Z->index[i][ik]];
				if( jk!=0 )
				{
					t = dd*Z->value[i][ik];
					if( fabs(t)*nrm > tol )
					{
						Z->value[j][jk-1] -= t;
					}
				}
				else
				{
					t = dd*Z->value[i][ik];
					if( fabs(t)*nrm > tol )
					{
						if( Z->nnz[j] == Z->nnz_ma[j] )
						{
							Z->nnz_ma[j] += annz;
							err = lis_matrix_ilu_realloc(j,Z->nnz_ma[j],Z);
							if( err ) return err;
						}
						jk                = Z->nnz[j];
						Z->index[j][jk] = Z->index[i][ik];
						Z->value[j][jk] = -t;
						Z->nnz[j]++;
					}
				}
			}
			for(k=0;k<Z->nnz[j];k++)
			{
				ww[Z->index[j][k]] = 0;
			}
		}
	}

	lis_matrix_destroy(B);
	lis_free2(5,l,u,ww,il,iu);


	precon->L  = W;
	precon->U  = Z;
	precon->D  = D;


	LIS_DEBUG_FUNC_OUT;
	return LIS_SUCCESS;
}
예제 #10
0
LIS_INT lis_matrix_split2_csr(LIS_MATRIX A)
{
	LIS_INT i,j,n;
	LIS_INT nnzl,nnzu;
	LIS_INT err;
	LIS_INT *lptr,*lindex,*uptr,*uindex;
	LIS_SCALAR *lvalue,*uvalue;
	#ifdef _OPENMP
		LIS_INT kl,ku;
		LIS_INT *liw,*uiw;
	#endif

	LIS_DEBUG_FUNC_IN;

	n        = A->n;
	nnzl     = 0;
	nnzu     = 0;
	lptr     = NULL;
	lindex   = NULL;
	lvalue   = NULL;
	uptr     = NULL;
	uindex   = NULL;
	uvalue   = NULL;

	#ifdef _OPENMP
		liw = (LIS_INT *)lis_malloc((n+1)*sizeof(LIS_INT),"lis_matrix_split2_csr::liw");
		if( liw==NULL )
		{
			LIS_SETERR_MEM((n+1)*sizeof(LIS_INT));
			return LIS_OUT_OF_MEMORY;
		}
		uiw = (LIS_INT *)lis_malloc((n+1)*sizeof(LIS_INT),"lis_matrix_split2_csr::uiw");
		if( uiw==NULL )
		{
			LIS_SETERR_MEM((n+1)*sizeof(LIS_INT));
			lis_free(liw);
			return LIS_OUT_OF_MEMORY;
		}
		#pragma omp parallel for private(i)
		for(i=0;i<n+1;i++)
		{
			liw[i] = 0;
			uiw[i] = 0;
		}
		#pragma omp parallel for private(i,j)
		for(i=0;i<n;i++)
		{
			for(j=A->ptr[i];j<A->ptr[i+1];j++)
			{
				if( A->index[j]<n )
				{
					liw[i+1]++;
				}
				else
				{
					uiw[i+1]++;
				}
			}
		}
		for(i=0;i<n;i++)
		{
			liw[i+1] += liw[i];
			uiw[i+1] += uiw[i];
		}
		nnzl = liw[n];
		nnzu = uiw[n];
	#else
		for(i=0;i<n;i++)
		{
			for(j=A->ptr[i];j<A->ptr[i+1];j++)
			{
				if( A->index[j]<n )
				{
					nnzl++;
				}
				else
				{
					nnzu++;
				}
			}
		}
	#endif

	err = lis_matrix_LU_create(A);
	if( err )
	{
		return err;
	}
	err = lis_matrix_malloc_csr(n,nnzl,&lptr,&lindex,&lvalue);
	if( err )
	{
		return err;
	}
	err = lis_matrix_malloc_csr(n,nnzu,&uptr,&uindex,&uvalue);
	if( err )
	{
		lis_free2(6,lptr,lindex,lvalue,uptr,uindex,uvalue);
		return err;
	}

	#ifdef _OPENMP
		#pragma omp parallel for private(i)
		for(i=0;i<n+1;i++)
		{
			lptr[i] = liw[i];
			uptr[i] = uiw[i];
		}
		#pragma omp parallel for private(i,j,kl,ku)
		for(i=0;i<n;i++)
		{
			kl = lptr[i];
			ku = uptr[i];
			for(j=A->ptr[i];j<A->ptr[i+1];j++)
			{
				if( A->index[j]<n )
				{
					lindex[kl]   = A->index[j];
					lvalue[kl]   = A->value[j];
					kl++;
				}
				else
				{
					uindex[ku]   = A->index[j];
					uvalue[ku]   = A->value[j];
					ku++;
				}
			}
		}
		lis_free2(2,liw,uiw);
	#else
		nnzl = 0;
		nnzu = 0;
		lptr[0] = 0;
		uptr[0] = 0;
		for(i=0;i<n;i++)
		{
			for(j=A->ptr[i];j<A->ptr[i+1];j++)
			{
				if( A->index[j]<n )
				{
					lindex[nnzl]   = A->index[j];
					lvalue[nnzl]   = A->value[j];
					nnzl++;
				}
				else
				{
					uindex[nnzu]   = A->index[j];
					uvalue[nnzu]   = A->value[j];
					nnzu++;
				}
			}
			lptr[i+1] = nnzl;
			uptr[i+1] = nnzu;
		}
	#endif
	A->L->nnz     = nnzl;
	A->L->ptr     = lptr;
	A->L->index   = lindex;
	A->L->value   = lvalue;
	A->U->nnz     = nnzu;
	A->U->ptr     = uptr;
	A->U->index   = uindex;
	A->U->value   = uvalue;
	A->is_splited = LIS_TRUE;

	LIS_DEBUG_FUNC_OUT;
	return LIS_SUCCESS;
}
예제 #11
0
LIS_INT lis_matrix_copyDLU_csr(LIS_MATRIX Ain, LIS_MATRIX_DIAG *D, LIS_MATRIX *L, LIS_MATRIX *U)
{
	LIS_INT err;
	LIS_INT i,n,np,lnnz,unnz;
	LIS_INT *lptr,*lindex;
	LIS_INT *uptr,*uindex;
	LIS_SCALAR *lvalue,*uvalue,*diag;

	LIS_DEBUG_FUNC_IN;
	
	*D = NULL;
	*L = NULL;
	*U = NULL;

	err = lis_matrix_check(Ain,LIS_MATRIX_CHECK_ALL);
	if( err ) return err;

	n       = Ain->n;
	np      = Ain->np;

	err = lis_matrix_duplicate(Ain,L);
	if( err )
	{
		return err;
	}
	err = lis_matrix_duplicate(Ain,U);
	if( err )
	{
		lis_matrix_destroy(*L);
		return err;
	}
	err = lis_matrix_diag_duplicateM(Ain,D);
	if( err )
	{
		lis_matrix_destroy(*L);
		lis_matrix_destroy(*U);
		return err;
	}
	lis_free((*D)->value);

	if( Ain->is_splited )
	{
	}
	lnnz     = Ain->L->nnz;
	unnz     = Ain->U->nnz;
	lptr     = NULL;
	lindex   = NULL;
	uptr     = NULL;
	uindex   = NULL;
	diag     = NULL;

	err = lis_matrix_malloc_csr(n,lnnz,&lptr,&lindex,&lvalue);
	if( err )
	{
		return err;
	}
	err = lis_matrix_malloc_csr(n,unnz,&uptr,&uindex,&uvalue);
	if( err )
	{
		lis_free2(7,diag,uptr,lptr,uindex,lindex,uvalue,lvalue);
		return err;
	}
	diag = (LIS_SCALAR *)lis_malloc(np*sizeof(LIS_SCALAR),"lis_matrix_copyDLU_csr::diag");
	if( diag==NULL )
	{
		lis_free2(7,diag,uptr,lptr,uindex,lindex,uvalue,lvalue);
		return err;
	}

	#ifdef _OPENMP
	#pragma omp parallel for private(i)
	#endif
	for(i=0;i<n;i++)
	{
		diag[i] = Ain->D->value[i];
	}
	lis_matrix_elements_copy_csr(n,Ain->L->ptr,Ain->L->index,Ain->L->value,lptr,lindex,lvalue);
	lis_matrix_elements_copy_csr(n,Ain->U->ptr,Ain->U->index,Ain->U->value,uptr,uindex,uvalue);

	(*D)->value = diag;
	err = lis_matrix_set_csr(lnnz,lptr,lindex,lvalue,*L);
	if( err )
	{
		lis_free2(7,diag,uptr,lptr,uindex,lindex,uvalue,lvalue);
		return err;
	}
	err = lis_matrix_set_csr(unnz,uptr,uindex,uvalue,*U);
	if( err )
	{
		lis_free2(7,diag,uptr,lptr,uindex,lindex,uvalue,lvalue);
		return err;
	}

	err = lis_matrix_assemble(*L);
	if( err )
	{
		return err;
	}
	err = lis_matrix_assemble(*U);
	if( err )
	{
		return err;
	}
	LIS_DEBUG_FUNC_OUT;
	return LIS_SUCCESS;
}
예제 #12
0
LIS_INT lis_matrix_copy_csr(LIS_MATRIX Ain, LIS_MATRIX Aout)
{
	LIS_INT err;
	LIS_INT i,n,nnz,lnnz,unnz;
	LIS_INT *ptr,*index;
	LIS_INT *lptr,*lindex;
	LIS_INT *uptr,*uindex;
	LIS_SCALAR *value,*lvalue,*uvalue,*diag;

	LIS_DEBUG_FUNC_IN;


	n       = Ain->n;

	if( Ain->is_splited )
	{
		lnnz     = Ain->L->nnz;
		unnz     = Ain->U->nnz;
		lptr     = NULL;
		lindex   = NULL;
		uptr     = NULL;
		uindex   = NULL;
		diag     = NULL;

		err = lis_matrix_malloc_csr(n,lnnz,&lptr,&lindex,&lvalue);
		if( err )
		{
			return err;
		}
		err = lis_matrix_malloc_csr(n,unnz,&uptr,&uindex,&uvalue);
		if( err )
		{
			lis_free2(7,diag,uptr,lptr,uindex,lindex,uvalue,lvalue);
			return err;
		}
		diag = (LIS_SCALAR *)lis_malloc(n*sizeof(LIS_SCALAR),"lis_matrix_copy_csr::diag");
		if( diag==NULL )
		{
			lis_free2(7,diag,uptr,lptr,uindex,lindex,uvalue,lvalue);
			return err;
		}

		#ifdef _OPENMP
		#pragma omp parallel for private(i)
		#endif
		for(i=0;i<n;i++)
		{
			diag[i] = Ain->D->value[i];
		}
		lis_matrix_elements_copy_csr(n,Ain->L->ptr,Ain->L->index,Ain->L->value,lptr,lindex,lvalue);
		lis_matrix_elements_copy_csr(n,Ain->U->ptr,Ain->U->index,Ain->U->value,uptr,uindex,uvalue);

		err = lis_matrix_setDLU_csr(lnnz,unnz,diag,lptr,lindex,lvalue,uptr,uindex,uvalue,Aout);
		if( err )
		{
			lis_free2(7,diag,uptr,lptr,uindex,lindex,uvalue,lvalue);
			return err;
		}
	}
	if( !Ain->is_splited || (Ain->is_splited && Ain->is_save) )
	{
		ptr     = NULL;
		index   = NULL;
		value   = NULL;
		nnz     = Ain->nnz;
		err = lis_matrix_malloc_csr(n,nnz,&ptr,&index,&value);
		if( err )
		{
			return err;
		}

		lis_matrix_elements_copy_csr(n,Ain->ptr,Ain->index,Ain->value,ptr,index,value);

		err = lis_matrix_set_csr(nnz,ptr,index,value,Aout);
		if( err )
		{
			lis_free2(3,ptr,index,value);
			return err;
		}
	}
	if( Ain->matrix_type==LIS_MATRIX_CSC )
	{
		Aout->matrix_type = LIS_MATRIX_CSC;
		Aout->status = -LIS_MATRIX_CSC;
		err = lis_matrix_assemble(Aout);
	}
	else
	{
		err = lis_matrix_assemble(Aout);
	}
	if( err )
	{
		lis_matrix_storage_destroy(Aout);
		return err;
	}
	LIS_DEBUG_FUNC_OUT;
	return LIS_SUCCESS;
}
LIS_INT lis_matrix_split_msr(LIS_MATRIX A)
{
  LIS_INT        i,j,n;
  LIS_INT        lnnz,unnz;
  LIS_INT        lndz,undz;
  LIS_INT        err;
  LIS_INT        *lindex,*uindex;
  LIS_SCALAR    *lvalue,*uvalue;
  #ifdef _OPENMP
    LIS_INT      kl,ku;
    LIS_INT      *liw,*uiw;
  #endif
  LIS_MATRIX_DIAG  D;

  LIS_DEBUG_FUNC_IN;

  n        = A->n;
  lnnz     = 0;
  unnz     = 0;
  lndz     = n;
  undz     = n;
  D        = NULL;
  lindex   = NULL;
  lvalue   = NULL;
  uindex   = NULL;
  uvalue   = NULL;

  #ifdef _OPENMP
    liw = (LIS_INT *)lis_malloc((n+1)*sizeof(LIS_INT),"lis_matrix_split_msr::liw");
    if( liw==NULL )
    {
      LIS_SETERR_MEM((n+1)*sizeof(LIS_INT));
      return LIS_OUT_OF_MEMORY;
    }
    uiw = (LIS_INT *)lis_malloc((n+1)*sizeof(LIS_INT),"lis_matrix_split_msr::uiw");
    if( uiw==NULL )
    {
      LIS_SETERR_MEM((n+1)*sizeof(LIS_INT));
      lis_free(liw);
      return LIS_OUT_OF_MEMORY;
    }
    #pragma omp parallel for private(i)
    for(i=0;i<n+1;i++)
    {
      liw[i] = 0;
      uiw[i] = 0;
    }
    #pragma omp parallel for private(i,j)
    for(i=0;i<n;i++)
    {
      for(j=A->index[i];j<A->index[i+1];j++)
      {
        if( A->index[j]<i )
        {
          liw[i+1]++;
        }
        else if( A->index[j]>i )
        {
          uiw[i+1]++;
        }
      }
    }
    liw[0] = n+1;
    uiw[0] = n+1;
    for(i=0;i<n;i++)
    {
      liw[i+1] += liw[i];
      uiw[i+1] += uiw[i];
    }
    lnnz = liw[n];
    unnz = uiw[n];
  #else
    for(i=0;i<n;i++)
    {
      for(j=A->index[i];j<A->index[i+1];j++)
      {
        if( A->index[j]<i )
        {
          lnnz++;
        }
        else if( A->index[j]>i )
        {
          unnz++;
        }
      }
    }
  #endif

  err = lis_matrix_LU_create(A);
  if( err )
  {
    return err;
  }
  err = lis_matrix_malloc_msr(n,lnnz,lndz,&lindex,&lvalue);
  if( err )
  {
    return err;
  }
  err = lis_matrix_malloc_msr(n,unnz,undz,&uindex,&uvalue);
  if( err )
  {
    lis_free2(4,lindex,lvalue,uindex,uvalue);
    return err;
  }
  err = lis_matrix_diag_duplicateM(A,&D);
  if( err )
  {
    lis_free2(4,lindex,lvalue,uindex,uvalue);
    return err;
  }

  #ifdef _OPENMP
    #pragma omp parallel for private(i)
    for(i=0;i<n+1;i++)
    {
      lindex[i] = liw[i];
      uindex[i] = uiw[i];
    }
    #pragma omp parallel for private(i,j,kl,ku)
    for(i=0;i<n;i++)
    {
      kl = lindex[i];
      ku = uindex[i];
      D->value[i] = A->value[i];
      for(j=A->index[i];j<A->index[i+1];j++)
      {
        if( A->index[j]<i )
        {
          lindex[kl]   = A->index[j];
          lvalue[kl]   = A->value[j];
          kl++;
        }
        else if( A->index[j]>i )
        {
          uindex[ku]   = A->index[j];
          uvalue[ku]   = A->value[j];
          ku++;
        }
      }
    }
    lis_free2(2,liw,uiw);
  #else
    lnnz = n+1;
    unnz = n+1;
    lindex[0] = n+1;
    uindex[0] = n+1;
    for(i=0;i<n;i++)
    {
      D->value[i] = A->value[i];
      for(j=A->index[i];j<A->index[i+1];j++)
      {
        if( A->index[j]<i )
        {
          lindex[lnnz]   = A->index[j];
          lvalue[lnnz]   = A->value[j];
          lnnz++;
        }
        else if( A->index[j]>i )
        {
          uindex[unnz]   = A->index[j];
          uvalue[unnz]   = A->value[j];
          unnz++;
        }
      }
      lindex[i+1] = lnnz;
      uindex[i+1] = unnz;
    }
  #endif
  A->L->nnz     = lnnz - (n+1);
  A->L->ndz     = lndz;
  A->L->index   = lindex;
  A->L->value   = lvalue;
  A->U->nnz     = unnz - (n+1);
  A->U->ndz     = undz;
  A->U->index   = uindex;
  A->U->value   = uvalue;
  A->D          = D;
  A->is_splited = LIS_TRUE;

  LIS_DEBUG_FUNC_OUT;
  return LIS_SUCCESS;
}
예제 #14
0
LIS_INT lis_matrix_convert_rco2csc(LIS_MATRIX Ain, LIS_MATRIX Aout)
{
	LIS_INT i,j,k,l,n,nnz,err;
	LIS_INT *ptr,*index,*iw;
	LIS_SCALAR *value;

	LIS_DEBUG_FUNC_IN;

	ptr     = NULL;
	index   = NULL;
	value   = NULL;
	iw      = NULL;
	n       = Ain->n;


	iw = (LIS_INT *)lis_malloc(n*sizeof(LIS_INT),"lis_matrix_convert_rco2csc::iw");
	if( iw==NULL )
	{
		LIS_SETERR_MEM(n*sizeof(LIS_INT));
		lis_free2(4,ptr,index,value,iw);
		return LIS_OUT_OF_MEMORY;
	}
	ptr = (LIS_INT *)lis_malloc((n+1)*sizeof(LIS_INT),"lis_matrix_convert_rco2csc::ptr");
	if( ptr==NULL )
	{
		LIS_SETERR_MEM((n+1)*sizeof(LIS_INT));
		lis_free2(4,ptr,index,value,iw);
		return LIS_OUT_OF_MEMORY;
	}

	for(i=0;i<n;i++) iw[i] = 0;
	for(i=0;i<n;i++)
	{
		for(j=0;j<Ain->w_row[i];j++)
		{
			iw[Ain->w_index[i][j]]++;
		}
	}
	ptr[0] = 0;
	for(i=0;i<n;i++)
	{
		ptr[i+1] = ptr[i] + iw[i];
		iw[i]    = ptr[i];
	}
	nnz = ptr[n];

	index = (LIS_INT *)lis_malloc( nnz*sizeof(LIS_INT),"lis_matrix_convert_rco2csc::index" );
	if( index==NULL )
	{
		LIS_SETERR_MEM(nnz*sizeof(LIS_INT));
		lis_free2(4,ptr,index,value,iw);
		return LIS_OUT_OF_MEMORY;
	}
	value = (LIS_SCALAR *)lis_malloc( nnz*sizeof(LIS_SCALAR),"lis_matrix_convert_rco2csc::value" );
	if( value==NULL )
	{
		LIS_SETERR_MEM(nnz*sizeof(LIS_SCALAR));
		lis_free2(4,ptr,index,value,iw);
		return LIS_OUT_OF_MEMORY;
	}

	for(i=0;i<n;i++)
	{
		for(j=0;j<Ain->w_row[i];j++)
		{
			k        = Ain->w_index[i][j];
			l        = iw[k];
			value[l] = Ain->w_value[i][j];
			index[l] = i;
			iw[k]++;
		}
	}

	err = lis_matrix_set_csc(nnz,ptr,index,value,Aout);
	if( err )
	{
		lis_free2(4,ptr,index,value,iw);
		return err;
	}
	err = lis_matrix_assemble(Aout);
	if( err )
	{
		lis_matrix_storage_destroy(Aout);
		return err;
	}

	lis_free(iw);

	LIS_DEBUG_FUNC_OUT;
	return LIS_SUCCESS;
}
LIS_INT lis_matrix_copy_dia(LIS_MATRIX Ain, LIS_MATRIX Aout)
{
  LIS_INT      err;
  LIS_INT      i,n,nnd,lnnd,unnd;
  LIS_INT      *index;
  LIS_INT      *lindex;
  LIS_INT      *uindex;
  LIS_SCALAR  *value,*lvalue,*uvalue,*diag;

  LIS_DEBUG_FUNC_IN;

  n       = Ain->n;

  if( Ain->is_splited )
  {
    lnnd     = Ain->L->nnd;
    unnd     = Ain->U->nnd;
    lindex   = NULL;
    uindex   = NULL;
    diag     = NULL;

    err = lis_matrix_malloc_dia(n,lnnd,&lindex,&lvalue);
    if( err )
    {
      return err;
    }
    err = lis_matrix_malloc_dia(n,unnd,&uindex,&uvalue);
    if( err )
    {
      lis_free2(5,diag,uindex,lindex,uvalue,lvalue);
      return err;
    }
    diag = (LIS_SCALAR *)lis_malloc(n*sizeof(LIS_SCALAR),"lis_matrix_copy_dia::diag");
    if( diag==NULL )
    {
      lis_free2(5,diag,uindex,lindex,uvalue,lvalue);
      return err;
    }

    #ifdef _OPENMP
    #pragma omp parallel for private(i)
    #endif
    for(i=0;i<n;i++)
    {
      diag[i] = Ain->D->value[i];
    }
    lis_matrix_elements_copy_dia(n,lnnd,Ain->L->index,Ain->L->value,lindex,lvalue);
    lis_matrix_elements_copy_dia(n,unnd,Ain->U->index,Ain->U->value,uindex,uvalue);

    err = lis_matrix_setDLU_dia(lnnd,unnd,diag,lindex,lvalue,uindex,uvalue,Aout);
    if( err )
    {
      lis_free2(5,diag,uindex,lindex,uvalue,lvalue);
      return err;
    }
  }
  if( !Ain->is_splited || (Ain->is_splited && Ain->is_save) )
  {
    index   = NULL;
    value   = NULL;
    nnd     = Ain->nnd;
    err = lis_matrix_malloc_dia(n,nnd,&index,&value);
    if( err )
    {
      return err;
    }

    lis_matrix_elements_copy_dia(n,nnd,Ain->index,Ain->value,index,value);

    err = lis_matrix_set_dia(nnd,index,value,Aout);
    if( err )
    {
      lis_free2(2,index,value);
      return err;
    }
  }

  err = lis_matrix_assemble(Aout);
  if( err )
  {
    lis_matrix_storage_destroy(Aout);
    return err;
  }
  LIS_DEBUG_FUNC_OUT;
  return LIS_SUCCESS;
}
예제 #16
0
LIS_INT lis_matrix_malloc_rco(LIS_INT n, LIS_INT nnz[], LIS_INT **row, LIS_INT ***index, LIS_SCALAR ***value)
{
	LIS_INT	i,j;
	LIS_INT *w_row,**w_index;
	LIS_SCALAR **w_value;

	LIS_DEBUG_FUNC_IN;

	w_row     = NULL;
	w_index   = NULL;
	w_value   = NULL;

	w_row = (LIS_INT *)lis_malloc( n*sizeof(LIS_INT),"lis_matrix_malloc_rco::w_row" );
	if( w_row==NULL )
	{
		LIS_SETERR_MEM(n*sizeof(LIS_INT));
		return LIS_OUT_OF_MEMORY;
	}
	w_index = (LIS_INT **)lis_malloc( n*sizeof(LIS_INT *),"lis_matrix_malloc_rco::w_index" );
	if( w_index==NULL )
	{
		LIS_SETERR_MEM(n*sizeof(LIS_INT *));
		lis_free2(3,w_row,w_index,w_value);
		return LIS_OUT_OF_MEMORY;
	}
	w_value = (LIS_SCALAR **)lis_malloc( n*sizeof(LIS_SCALAR *),"lis_matrix_malloc_rco::w_value" );
	if( w_value==NULL )
	{
		LIS_SETERR_MEM(n*sizeof(LIS_SCALAR *));
		lis_free2(3,w_row,w_index,w_value);
		return LIS_OUT_OF_MEMORY;
	}
	if( nnz!=NULL )
	{
		for(i=0;i<n;i++)
		{
			w_index[i] = NULL;
			w_value[i] = NULL;
			if( nnz[i]==0 ) continue;
			w_index[i] = (LIS_INT *)lis_malloc( nnz[i]*sizeof(LIS_INT),"lis_matrix_malloc_rco::w_index[i]" );
			if( w_index[i]==NULL )
			{
				LIS_SETERR_MEM(nnz[i]*sizeof(LIS_INT));
				break;
			}
			w_value[i] = (LIS_SCALAR *)lis_malloc( nnz[i]*sizeof(LIS_SCALAR),"lis_matrix_malloc_rco::w_value[i]" );
			if( w_value[i]==NULL )
			{
				LIS_SETERR_MEM(nnz[i]*sizeof(LIS_SCALAR));
				break;
			}
		}
		if(i<n)
		{
			for(j=0;j<i;j++)
			{
				if( w_index[i] ) lis_free(w_index[i]);
				if( w_value[i] ) lis_free(w_value[i]);
			}
			lis_free2(3,w_row,w_index,w_value);
			return LIS_OUT_OF_MEMORY;
		}
	}
	#ifdef _OPENMP
	#pragma omp parallel for private(i)
	#endif
	for(i=0;i<n;i++) w_row[i] = 0;
	*row   = w_row;
	*index = w_index;
	*value = w_value;

	LIS_DEBUG_FUNC_OUT;
	return LIS_SUCCESS;
}
LIS_INT lis_precon_create_ilut_csr(LIS_SOLVER solver, LIS_PRECON precon)
{
#ifdef _OPENMP
  LIS_INT        err;
  LIS_INT        i,j,k,ii,jj,kk;
  LIS_INT        is,ie,my_rank,nprocs;
  LIS_INT        n,nr,nnz,lfil,len;
  LIS_SCALAR    gamma,t,tol,toldd,m;
  LIS_MATRIX    A;
  LIS_MATRIX_ILU  L,U;
  LIS_VECTOR    D;

  LIS_SCALAR    tnorm, tolnorm;
  LIS_SCALAR    fact,lxu,*wn,*w;
  LIS_INT        lenu,lenl,col,jpos,jrow,upos,para;
  LIS_INT        *jbuf,*iw;

  LIS_DEBUG_FUNC_IN;


  A      = solver->A;
  n      = A->n;
  tol    = solver->params[LIS_PARAMS_DROP-LIS_OPTIONS_LEN];
  m      = solver->params[LIS_PARAMS_RATE-LIS_OPTIONS_LEN];
  gamma  = solver->params[LIS_PARAMS_GAMMA-LIS_OPTIONS_LEN];
  lfil   = (LIS_INT)((double)A->nnz/(2.0*n))*m;
  nprocs = omp_get_max_threads();

  L      = NULL;
  U      = NULL;


  err = lis_matrix_ilu_create(n,1,&L);
  if( err ) return err;
  err = lis_matrix_ilu_create(n,1,&U);
  if( err ) return err;
  err = lis_matrix_ilu_setCR(L);
  if( err ) return err;
  err = lis_matrix_ilu_setCR(U);
  if( err ) return err;
  err = lis_vector_duplicate(A,&D);
  if( err )
  {
    return err;
  }

  w   = (LIS_SCALAR *)lis_malloc(nprocs*(n+1)*sizeof(LIS_SCALAR),"lis_precon_create_ilut_csr::w");
  if( w==NULL )
  {
    LIS_SETERR_MEM(nprocs*(n+1)*sizeof(LIS_SCALAR));
    return LIS_OUT_OF_MEMORY;
  }
  wn = (LIS_SCALAR *)lis_malloc(nprocs*n*sizeof(LIS_SCALAR),"lis_precon_create_ilut_csr::w");
  if( wn==NULL )
  {
    LIS_SETERR_MEM(nprocs*n*sizeof(LIS_SCALAR));
    return LIS_OUT_OF_MEMORY;
  }

  jbuf   = (LIS_INT *)lis_malloc(nprocs*n*sizeof(LIS_INT),"lis_precon_create_ilut_csr::iw");
  if( jbuf==NULL )
  {
    LIS_SETERR_MEM(nprocs*n*sizeof(LIS_INT));
    return LIS_OUT_OF_MEMORY;
  }
  iw   = (LIS_INT *)lis_malloc(nprocs*n*sizeof(LIS_INT),"lis_precon_create_ilut_csr::iw");
  if( iw==NULL )
  {
    LIS_SETERR_MEM(nprocs*n*sizeof(LIS_INT));
    return LIS_OUT_OF_MEMORY;
  }


  #pragma omp parallel private(is,ie,my_rank,i,j,k,jj,tnorm,tolnorm,len,lenu,lenl,col,t,jpos,jrow,fact,lxu,upos)
  {
    my_rank  = omp_get_thread_num();
    LIS_GET_ISIE(my_rank,nprocs,n,is,ie);

    for(i=is;i<ie;i++) iw[my_rank*n+i] = -1;

    for(i=is;i<ie;i++)
    {
      tnorm = 0;
      k = 0;
      for(j=A->ptr[i];j<A->ptr[i+1];j++)
      {
        jj = A->index[j];
        if( jj<is || jj>=ie ) continue;
        tnorm += fabs(A->value[j]);
        k++;
      }
      tnorm   = tnorm / (double)k;
      tolnorm = tol * tnorm;

      lenu = 0;
      lenl = 0;
      jbuf[my_rank*n+i] = i;
      w[my_rank*n+i] = 0;
      iw[my_rank*n+i] = i;

      for(j=A->ptr[i];j<A->ptr[i+1];j++)
      {
        col = A->index[j];
        if( col<is || col>=ie ) continue;
        t = A->value[j];
        if( col < i )
        {
          jbuf[my_rank*n+lenl] = col;
          iw[my_rank*n+col] = lenl;
          w[my_rank*n+lenl] = t;
          lenl++;
        }
        else if( col == i )
        {
          w[my_rank*n+i] = t;
        }
        else
        {
          lenu++;
          jpos = i + lenu;
          jbuf[my_rank*n+jpos] = col;
          iw[my_rank*n+col] = jpos;
          w[my_rank*n+jpos] = t;
        }
      }

      j = -1;
      len = 0;

      while( ++j < lenl )
      {
        jrow = jbuf[my_rank*n+j];
        jpos = j;
        for(k=j+1;k<lenl;k++)
        {
          if( jbuf[my_rank*n+k]<jrow )
          {
            jrow = jbuf[my_rank*n+k];
            jpos = k;
          }
        }
        if( jpos!=j )
        {
          col = jbuf[my_rank*n+j];
          jbuf[my_rank*n+j] = jbuf[my_rank*n+jpos];
          jbuf[my_rank*n+jpos] = col;
          iw[my_rank*n+jrow] = j;
          iw[my_rank*n+col] = jpos;
          t = w[my_rank*n+j];
          w[my_rank*n+j] = w[my_rank*n+jpos];
          w[my_rank*n+jpos] = t;
        }
        fact = w[my_rank*n+j] * D->value[jrow];
        w[my_rank*n+j] = fact;
        iw[my_rank*n+jrow] = -1;

        for(k=0;k<U->nnz[jrow];k++)
        {
          col = U->index[jrow][k];
          jpos = iw[my_rank*n+col];
          lxu = -fact * U->value[jrow][k];

          if( fabs(lxu) < tolnorm && jpos==-1 ) continue;
          if( col >= i )
          {
            if( jpos == -1 )
            {
              lenu++;
              upos = i + lenu;
              jbuf[my_rank*n+upos] = col;
              iw[my_rank*n+col] = upos;
              w[my_rank*n+upos] = lxu;
            }
            else
            {
              w[my_rank*n+jpos] += lxu;
            }
          }
          else
          {
            if( jpos == -1 )
            {
              jbuf[my_rank*n+lenl] = col;
              iw[my_rank*n+col] = lenl;
              w[my_rank*n+lenl] = lxu;
              lenl++;
            }
            else
            {
              w[my_rank*n+jpos] += lxu;
            }
          }
        }
      }

      iw[my_rank*n+i] = -1;
      for(j=0;j<lenu;j++)
      {
        iw[ my_rank*n+jbuf[my_rank*n+i+j+1] ] = -1;
      }

      D->value[i] = 1.0 / w[my_rank*n+i];


      len = _min(lfil,lenl);
      for(j=0;j<lenl;j++)
      {
        wn[my_rank*n+j] = fabs(w[my_rank*n+j]);
        iw[my_rank*n+j] = j;
      }
      lis_sort_di(0,lenl-1,&wn[my_rank*n],&iw[my_rank*n]);
      lis_sort_i(0,len-1,&iw[my_rank*n]);
      
      L->nnz[i] = len;
      if( len>0 )
      {
        L->index[i] = (LIS_INT *)malloc(len*sizeof(LIS_INT));
        L->value[i] = (LIS_SCALAR *)malloc(len*sizeof(LIS_SCALAR));
      }
      for(j=0;j<len;j++)
      {
        jpos = iw[my_rank*n+j];
        L->index[i][j] = jbuf[my_rank*n+jpos];
        L->value[i][j] = w[my_rank*n+jpos];
      }
      for(j=0;j<lenl;j++) iw[my_rank*n+j] = -1;

      len = _min(lfil,lenu);
      for(j=0;j<lenu;j++)
      {
        wn[my_rank*n+j] = fabs(w[my_rank*n+i+j+1]);
        iw[my_rank*n+j] = i+j+1;
      }
      lis_sort_di(0,lenu-1,&wn[my_rank*n],&iw[my_rank*n]);
      lis_sort_i(0,len-1,&iw[my_rank*n]);
      
      U->nnz[i] = len;
      if( len>0 )
      {
        U->index[i] = (LIS_INT *)malloc(len*sizeof(LIS_INT));
        U->value[i] = (LIS_SCALAR *)malloc(len*sizeof(LIS_SCALAR));
      }
      for(j=0;j<len;j++)
      {
        jpos = iw[my_rank*n+j];
        U->index[i][j] = jbuf[my_rank*n+jpos];
        U->value[i][j] = w[my_rank*n+jpos];
      }
      for(j=0;j<lenu;j++) iw[my_rank*n+j] = -1;
    }
  }

  precon->L  = L;
  precon->U  = U;
  precon->D  = D;

  lis_free2(4,w,iw,wn,jbuf);

  LIS_DEBUG_FUNC_OUT;
  return LIS_SUCCESS;
#else
  LIS_INT        err;
  LIS_INT        i,j,k;
  LIS_INT        n,lfil,len;
  LIS_SCALAR    gamma,t,tol,m;
  LIS_MATRIX    A;
  LIS_MATRIX_ILU  L,U;
  LIS_VECTOR    D;

  LIS_SCALAR    tnorm, tolnorm;
  LIS_SCALAR    fact,lxu,*wn,*w;
  LIS_INT        lenu,lenl,col,jpos,jrow,upos;
  LIS_INT        *jbuf,*iw;

  LIS_DEBUG_FUNC_IN;


  A      = solver->A;
  n      = A->n;
  tol    = solver->params[LIS_PARAMS_DROP-LIS_OPTIONS_LEN];
  m      = solver->params[LIS_PARAMS_RATE-LIS_OPTIONS_LEN];
  gamma  = solver->params[LIS_PARAMS_GAMMA-LIS_OPTIONS_LEN];
  lfil   = (LIS_INT)(((double)A->nnz/(2.0*n))*m);

  L      = NULL;
  U      = NULL;


  err = lis_matrix_ilu_create(n,1,&L);
  if( err ) return err;
  err = lis_matrix_ilu_create(n,1,&U);
  if( err ) return err;
  err = lis_matrix_ilu_setCR(L);
  if( err ) return err;
  err = lis_matrix_ilu_setCR(U);
  if( err ) return err;
  err = lis_vector_duplicate(A,&D);
  if( err )
  {
    return err;
  }

  w   = (LIS_SCALAR *)lis_malloc((n+1)*sizeof(LIS_SCALAR),"lis_precon_create_ilut_csr::w");
  if( w==NULL )
  {
    LIS_SETERR_MEM(n*sizeof(LIS_SCALAR));
    return LIS_OUT_OF_MEMORY;
  }
  wn = (LIS_SCALAR *)lis_malloc(n*sizeof(LIS_SCALAR),"lis_precon_create_ilut_csr::w");
  if( wn==NULL )
  {
    LIS_SETERR_MEM(n*sizeof(LIS_SCALAR));
    return LIS_OUT_OF_MEMORY;
  }

  jbuf   = (LIS_INT *)lis_malloc(n*sizeof(LIS_INT),"lis_precon_create_ilut_csr::iw");
  if( jbuf==NULL )
  {
    LIS_SETERR_MEM(n*sizeof(LIS_INT));
    return LIS_OUT_OF_MEMORY;
  }
  iw   = (LIS_INT *)lis_malloc(n*sizeof(LIS_INT),"lis_precon_create_ilut_csr::iw");
  if( iw==NULL )
  {
    LIS_SETERR_MEM(n*sizeof(LIS_INT));
    return LIS_OUT_OF_MEMORY;
  }


  for(i=0;i<n;i++) iw[i] = -1;

  for(i=0;i<n;i++)
  {
    tnorm = 0;
    for(j=A->ptr[i];j<A->ptr[i+1];j++)
    {
      tnorm += fabs(A->value[j]);
    }
    tnorm   = tnorm / (double)(A->ptr[i+1]-A->ptr[i]);
    tolnorm = tol * tnorm;

    lenu = 0;
    lenl = 0;
    jbuf[i] = i;
    w[i] = 0;
    iw[i] = i;

    for(j=A->ptr[i];j<A->ptr[i+1];j++)
    {
      col = A->index[j];
      #ifdef USE_MPI
        if( col>n-1 ) continue;
      #endif
      t = A->value[j];
      if( col < i )
      {
        jbuf[lenl] = col;
        iw[col] = lenl;
        w[lenl] = t;
        lenl++;
      }
      else if( col == i )
      {
        w[i] = t;
      }
      else
      {
        lenu++;
        jpos = i + lenu;
        jbuf[jpos] = col;
        iw[col] = jpos;
        w[jpos] = t;
      }
    }

    j = -1;
    len = 0;

    while( ++j < lenl )
    {
      jrow = jbuf[j];
      jpos = j;
      for(k=j+1;k<lenl;k++)
      {
        if( jbuf[k]<jrow )
        {
          jrow = jbuf[k];
          jpos = k;
        }
      }
      if( jpos!=j )
      {
        col = jbuf[j];
        jbuf[j] = jbuf[jpos];
        jbuf[jpos] = col;
        iw[jrow] = j;
        iw[col] = jpos;
        t = w[j];
        w[j] = w[jpos];
        w[jpos] = t;
      }
      fact = w[j] * D->value[jrow];
      w[j] = fact;
      iw[jrow] = -1;

      for(k=0;k<U->nnz[jrow];k++)
      {
        col = U->index[jrow][k];
        jpos = iw[col];
        lxu = -fact * U->value[jrow][k];

        if( fabs(lxu) < tolnorm && jpos==-1 ) continue;
        if( col >= i )
        {
          if( jpos == -1 )
          {
            lenu++;
            upos = i + lenu;
            jbuf[upos] = col;
            iw[col] = upos;
            w[upos] = lxu;
          }
          else
          {
            w[jpos] += lxu;
          }
        }
        else
        {
          if( jpos == -1 )
          {
            jbuf[lenl] = col;
            iw[col] = lenl;
            w[lenl] = lxu;
            lenl++;
          }
          else
          {
            w[jpos] += lxu;
          }
        }
      }
/*      for(kk=0;kk<bs;kk++)
      {
        w[bs*len+kk] = -buf_fact[kk];
      }
      jbuf[len] = jrow;
      len++;*/
    }

    iw[i] = -1;
    for(j=0;j<lenu;j++)
    {
      iw[ jbuf[i+j+1] ] = -1;
    }

    D->value[i] = 1.0 / w[i];


    len = _min(lfil,lenl);
    for(j=0;j<lenl;j++)
    {
      wn[j] = fabs(w[j]);
      iw[j] = j;
    }
    lis_sort_di(0,lenl-1,wn,iw);
    lis_sort_i(0,len-1,iw);
    
    L->nnz[i] = len;
    if( len>0 )
    {
      L->index[i] = (LIS_INT *)malloc(len*sizeof(LIS_INT));
      L->value[i] = (LIS_SCALAR *)malloc(len*sizeof(LIS_SCALAR));
    }
    for(j=0;j<len;j++)
    {
      jpos = iw[j];
      L->index[i][j] = jbuf[jpos];
      L->value[i][j] = w[jpos];
    }
    for(j=0;j<lenl;j++) iw[j] = -1;

    len = _min(lfil,lenu);
    for(j=0;j<lenu;j++)
    {
      wn[j] = fabs(w[i+j+1]);
      iw[j] = i+j+1;
    }
    lis_sort_di(0,lenu-1,wn,iw);
    lis_sort_i(0,len-1,iw);
    
    U->nnz[i] = len;
    if( len>0 )
    {
      U->index[i] = (LIS_INT *)malloc(len*sizeof(LIS_INT));
      U->value[i] = (LIS_SCALAR *)malloc(len*sizeof(LIS_SCALAR));
    }
    for(j=0;j<len;j++)
    {
      jpos = iw[j];
      U->index[i][j] = jbuf[jpos];
      U->value[i][j] = w[jpos];
    }
    for(j=0;j<lenu;j++) iw[j] = -1;
  }

  precon->L  = L;
  precon->U  = U;
  precon->D  = D;

  lis_free2(4,w,iw,wn,jbuf);

  LIS_DEBUG_FUNC_OUT;
  return LIS_SUCCESS;
#endif
}
LIS_INT lis_precon_create_ilut_bsr(LIS_SOLVER solver, LIS_PRECON precon)
{
  LIS_INT        err;
  LIS_INT        i,j,k,kk,bnr,bs;
  LIS_INT        n,nr,annz,lfil,len;
  LIS_SCALAR    gamma,t,tol,m;
  LIS_MATRIX    A;
  LIS_MATRIX_ILU  L,U;
  LIS_MATRIX_DIAG  D;

  LIS_SCALAR    tnorm, tolnorm;
  LIS_SCALAR    buf_ns[16],buf_fact[16],*xnrm,*wn,*w;
  LIS_INT        lenu,lenl,col,jpos,jrow,upos,para;
  LIS_INT        *jbuf,*iw;

  LIS_DEBUG_FUNC_IN;


  A      = solver->A;
  n      = A->n;
  nr     = A->nr;
  bnr    = A->bnr;
  bs     = bnr*bnr;
  tol    = solver->params[LIS_PARAMS_DROP-LIS_OPTIONS_LEN];
  m      = solver->params[LIS_PARAMS_RATE-LIS_OPTIONS_LEN];
  gamma  = solver->params[LIS_PARAMS_GAMMA-LIS_OPTIONS_LEN];
  annz   = 10+A->bnnz / A->nr;
  lfil   = (LIS_INT)(((double)A->bnnz/(2.0*nr))*m);

  L      = NULL;
  U      = NULL;


  err = lis_matrix_ilu_create(nr,bnr,&L);
  if( err ) return err;
  err = lis_matrix_ilu_create(nr,bnr,&U);
  if( err ) return err;
  err = lis_matrix_ilu_setCR(L);
  if( err ) return err;
  err = lis_matrix_ilu_setCR(U);
  if( err ) return err;
  err = lis_matrix_diag_duplicateM(A,&D);
  if( err )
  {
    return err;
  }

  w   = (LIS_SCALAR *)lis_malloc(bs*(nr+1)*sizeof(LIS_SCALAR),"lis_precon_create_iluc_csr::w");
  if( w==NULL )
  {
    LIS_SETERR_MEM(n*sizeof(LIS_SCALAR));
    return LIS_OUT_OF_MEMORY;
  }
  xnrm = (LIS_SCALAR *)lis_malloc(nr*sizeof(LIS_SCALAR),"lis_precon_create_iluc_csr::w");
  if( xnrm==NULL )
  {
    LIS_SETERR_MEM(n*sizeof(LIS_SCALAR));
    return LIS_OUT_OF_MEMORY;
  }
  wn = (LIS_SCALAR *)lis_malloc(nr*sizeof(LIS_SCALAR),"lis_precon_create_iluc_csr::w");
  if( wn==NULL )
  {
    LIS_SETERR_MEM(n*sizeof(LIS_SCALAR));
    return LIS_OUT_OF_MEMORY;
  }

  jbuf   = (LIS_INT *)lis_malloc(n*sizeof(LIS_INT),"lis_precon_create_iluc_csr::iw");
  if( jbuf==NULL )
  {
    LIS_SETERR_MEM(n*sizeof(LIS_INT));
    return LIS_OUT_OF_MEMORY;
  }
  iw   = (LIS_INT *)lis_malloc(nr*sizeof(LIS_INT),"lis_precon_create_iluc_csr::iw");
  if( iw==NULL )
  {
    LIS_SETERR_MEM(n*sizeof(LIS_INT));
    return LIS_OUT_OF_MEMORY;
  }


  for(i=0;i<nr;i++) iw[i] = -1;

  for(i=0;i<nr;i++)
  {
    tnorm = 0;
    for(j=A->bptr[i];j<A->bptr[i+1];j++)
    {
      lis_array_nrm2(bs,&A->value[bs*j],&t);
      tnorm = _max(t,tnorm);
    }
    tolnorm = tol * tnorm;

    lenu = 1;
    lenl = 0;
    jbuf[i] = i;
    memset(&w[bs*i],0,bs*sizeof(LIS_SCALAR));
    iw[i] = i;

    for(j=A->bptr[i];j<A->bptr[i+1];j++)
    {
      col = A->bindex[j];
      lis_array_nrm2(bs,&A->value[bs*j],&t);
      if( t<tolnorm && col!=i ) continue;
      if( col < i )
      {
        jbuf[lenl] = col;
        iw[col] = lenl;
        memcpy(&w[bs*lenl],&A->value[bs*j],bs*sizeof(LIS_SCALAR));
        lenl++;
      }
      else if( col == i )
      {
        memcpy(&w[bs*i],&A->value[bs*j],bs*sizeof(LIS_SCALAR));
      }
      else
      {
        jpos = i + lenu;
        jbuf[jpos] = col;
        iw[col] = jpos;
        memcpy(&w[bs*jpos],&A->value[bs*j],bs*sizeof(LIS_SCALAR));
        lenu++;
      }
    }

    j = -1;
    len = 0;

    while( ++j < lenl )
    {
      jrow = jbuf[j];
      jpos = j;
      for(k=j+1;k<lenl;k++)
      {
        if( jbuf[k]<jrow )
        {
          jrow = jbuf[k];
          jpos = k;
        }
      }
      if( jpos!=j )
      {
        col = jbuf[j];
        jbuf[j] = jbuf[jpos];
        jbuf[jpos] = col;
        iw[jrow] = j;
        iw[col] = jpos;
        memcpy(buf_ns,&w[bs*j],bs*sizeof(LIS_SCALAR));
        memcpy(&w[bs*j],&w[bs*jpos],bs*sizeof(LIS_SCALAR));
        memcpy(&w[bs*jpos],buf_ns,bs*sizeof(LIS_SCALAR));
      }
/*      lis_array_matmat(bnr,&D->value[bs*jrow],&w[bs*j],buf_fact,LIS_INS_VALUE);*/
      lis_array_matinv(bnr,&D->value[bs*jrow],&w[bs*j],buf_fact);
      iw[jrow] = -1;

      lis_array_nrm2(bs,buf_fact,&t);
      if( t * xnrm[jrow] <= tolnorm ) continue;

      for(k=0;k<U->nnz[jrow];k++)
      {
        col = U->index[jrow][k];
        lis_array_matmat(bnr,buf_fact,&U->value[jrow][bs*k],buf_ns,LIS_INS_VALUE);
        jpos = iw[col];

        lis_array_nrm2(bs,buf_ns,&t);
        if( t < tolnorm && jpos == -1 )
        {
          continue;
        }

        if( col >= i )
        {
          if( jpos == -1 )
          {
            upos = i + lenu;
            jbuf[upos] = col;
            iw[col] = upos;
            memcpy(&w[bs*upos],buf_ns,bs*sizeof(LIS_SCALAR));
            lenu++;
          }
          else
          {
            for(kk=0;kk<bs;kk++)
            {
              w[bs*jpos+kk] += buf_ns[kk];
            }
          }
        }
        else
        {
          if( jpos == -1 )
          {
            jbuf[lenl] = col;
            iw[col] = lenl;
            memcpy(&w[bs*lenl],buf_ns,bs*sizeof(LIS_SCALAR));
            lenl++;
          }
          else
          {
            for(kk=0;kk<bs;kk++)
            {
              w[bs*jpos+kk] += buf_ns[kk];
            }
          }
        }
      }
      for(kk=0;kk<bs;kk++)
      {
        w[bs*len+kk] = -buf_fact[kk];
      }
      jbuf[len] = jrow;
      len++;
    }
    lenl = len;
    len = _min(lfil,lenl);
    for(j=0;j<lenl;j++)
    {
      lis_array_nrm2(bs,&w[bs*j],&wn[j]);
      iw[j] = j;
    }
    lis_sort_di(0,lenl-1,wn,iw);
    lis_sort_i(0,len-1,iw);
    
    L->nnz[i] = len;
    if( len>0 )
    {
      L->index[i] = (LIS_INT *)malloc(len*sizeof(LIS_INT));
      L->value[i] = (LIS_SCALAR *)malloc(bs*len*sizeof(LIS_SCALAR));
    }
    for(j=0;j<len;j++)
    {
      jpos = iw[j];
      L->index[i][j] = jbuf[jpos];
      memcpy(&L->value[i][bs*j],&w[bs*jpos],bs*sizeof(LIS_SCALAR));
    }
    for(j=0;j<lenl;j++) iw[j] = -1;

    len = _min(lfil,lenu);
    for(j=1;j<lenu;j++)
    {
      jpos = i+j;
      lis_array_nrm2(bs,&w[bs*jpos],&wn[j-1]);
      iw[j-1] = jpos;
    }
    para = lenu - 1;
    lis_sort_di(0,para-1,wn,iw);
    lis_sort_i(0,len-2,iw);
    
    U->nnz[i] = len-1;
    if( len>1 )
    {
      U->index[i] = (LIS_INT *)malloc((len-1)*sizeof(LIS_INT));
      U->value[i] = (LIS_SCALAR *)malloc(bs*(len-1)*sizeof(LIS_SCALAR));
    }
    lis_array_nrm2(bs,&w[bs*i],&t);
    for(j=0;j<len-1;j++)
    {
      jpos = iw[j];
      U->index[i][j] = jbuf[jpos];
      memcpy(&U->value[i][bs*j],&w[bs*jpos],bs*sizeof(LIS_SCALAR));
      t = _max(t,wn[j]);
    }
    for(j=0;j<lenu-1;j++) iw[j] = -1;

    xnrm[i] = t;

    memcpy(&D->value[bs*i],&w[bs*i],bs*sizeof(LIS_SCALAR));

    if( i==nr-1 )
    {
      switch(bnr)
      {
      case 2:
        if( n%2!=0 )
        {
          D->value[4*(nr-1)+3] = 1.0;
        }
        break;
      case 3:
        if( n%3==1 )
        {
          D->value[9*(nr-1)+4] = 1.0;
          D->value[9*(nr-1)+8] = 1.0;
        }
        else if( n%3==2 )
        {
          D->value[9*(nr-1)+8] = 1.0;
        }
        break;
      }
    }
/*    lis_array_invGauss(bnr,&D->value[bs*i]);*/
    lis_array_LUdecomp(bnr,&D->value[bs*i]);

    for(j=0;j<lenu;j++)
    {
      iw[ jbuf[i+j] ] = -1;
    }
  }

  precon->L  = L;
  precon->U  = U;
  precon->WD  = D;

  lis_free2(5,w,iw,xnrm,wn,jbuf);

  LIS_DEBUG_FUNC_OUT;
  return LIS_SUCCESS;
}
예제 #19
0
파일: lis_input_mm.c 프로젝트: huahbo/lis
LIS_INT lis_input_mm_csr(LIS_MATRIX A, LIS_VECTOR b, LIS_VECTOR x, FILE *file)
{
	char buf[BUFSIZE];
	LIS_INT	nr,nc,nnz;
	LIS_INT	i,j,my_rank;
	LIS_INT	err;
	LIS_INT	mmtype,mode;
	LIS_INT	n,is,ie;
	LIS_INT	ridx,cidx;
	LIS_INT	*ptr, *index;
	LIS_INT	*work;
	LIS_INT	isb,isx,isbin;
	LIS_SCALAR val;
	LIS_SCALAR *value;
	LIS_MM_MATFMT matfmt;

	LIS_DEBUG_FUNC_IN;

	#ifdef USE_MPI
		my_rank = A->my_rank;
	#else
		my_rank = 0;
	#endif
	
	/* check banner */
	err = lis_input_mm_banner(file,&mmtype);
	if( err ) return err;

	/* check size */		
	err = lis_input_mm_size(file,&nr,&nc,&nnz,&isb,&isx,&isbin);
	if( err ) return err;

	err = lis_matrix_set_size(A,0,nr);
	if( err ) return err;

#ifdef _LONGLONG
	if( my_rank==0 ) printf("matrix size = %lld x %lld (%lld nonzero entries)\n\n",nr,nc,nnz);
#else
	if( my_rank==0 ) printf("matrix size = %d x %d (%d nonzero entries)\n\n",nr,nc,nnz);
#endif

	n      = A->n;
	ptr    = NULL;
	index  = NULL;
	value  = NULL;
	work   = NULL;


	lis_matrix_get_range(A,&is,&ie);

	ptr   = (LIS_INT *)lis_malloc( (n+1)*sizeof(LIS_INT),"lis_input_mm_csr::ptr" );
	if( ptr==NULL )
	{
		LIS_SETERR_MEM((n+1)*sizeof(LIS_INT));
		lis_free2(4,ptr,index,value,work);
		return LIS_OUT_OF_MEMORY;
	}
	work  = (LIS_INT *)lis_malloc( (n+1)*sizeof(LIS_INT),"lis_input_mm_csr::work" );
	if( work==NULL )
	{
		LIS_SETERR_MEM((n+1)*sizeof(LIS_INT));
		lis_free2(4,ptr,index,value,work);
		return LIS_OUT_OF_MEMORY;
	}

	#ifdef _OPENMP
	#pragma omp parallel for private(i)
	#endif
	for(i=0;i<n+1;i++)
	{
		ptr[i]  = 0;
		work[i]  = 0;
	}

	/* read data */
	mode = 1;
	mode = *(char *)&mode;
	if( mode!=(isbin-1) )
	{
		mode = LIS_TRUE;			
	}
	else
	{
		mode = LIS_FALSE;
	}
	for( i=0; i<nnz; i++ )
	{
		if( isbin )
		{
			if( fread(&matfmt, sizeof(matfmt), 1, file)!=1 )
			{
				LIS_SETERR_FIO;
				lis_free2(4,ptr,index,value,work);
				return LIS_ERR_FILE_IO;
			}
			ridx = matfmt.i;
			cidx = matfmt.j;
			if( mode )
			{
				lis_bswap_int(1,&ridx);
				lis_bswap_int(1,&cidx);
			}
		}
		else
		{
			if( fgets(buf, BUFSIZE, file)==NULL )
			{
				LIS_SETERR_FIO;
				lis_free2(4,ptr,index,value,work);
				return LIS_ERR_FILE_IO;
			}
#ifdef _LONGLONG
#ifdef _LONG__DOUBLE
			if( sscanf(buf, "%lld %lld %Lg", &ridx, &cidx, &val) != 3 )
#else
			if( sscanf(buf, "%lld %lld %lg", &ridx, &cidx, &val) != 3 )
#endif
#else
#ifdef _LONG__DOUBLE
			if( sscanf(buf, "%d %d %Lg", &ridx, &cidx, &val) != 3 )
#else
			if( sscanf(buf, "%d %d %lg", &ridx, &cidx, &val) != 3 )
#endif
#endif
			{
				LIS_SETERR_FIO;
				lis_free2(4,ptr,index,value,work);
				return LIS_ERR_FILE_IO;
			}
		}
/*		if( val!=0.0 )*/
		{
			if( mmtype==MM_SYMM && ridx!=cidx )
			{
				if( cidx>is && cidx<=ie ) work[cidx-is-1]++;
			}
			if( ridx>is && ridx<=ie )
			{
				ptr[ridx-is]++;
			}
		}
	}


	ptr[0] = 0;
	for( i=0; i<n; i++ )
	{
		if( mmtype==MM_SYMM )
		{
			ptr[i+1] += ptr[i] + work[i];
		}
		else
		{
			ptr[i+1] += ptr[i];
		}
		work[i] = 0;
	}

	index   = (LIS_INT *)lis_malloc( ptr[n]*sizeof(LIS_INT),"lis_input_mm_csr::index" );
	if( index==NULL )
	{
		LIS_SETERR_MEM(ptr[n]*sizeof(LIS_INT));
		lis_free2(4,ptr,index,value,work);
		return LIS_OUT_OF_MEMORY;
	}
	value   = (LIS_SCALAR *)lis_malloc( ptr[n]*sizeof(LIS_SCALAR),"lis_input_mm_csr::value" );
	if( value==NULL )
	{
		LIS_SETERR_MEM(ptr[n]*sizeof(LIS_SCALAR));
		lis_free2(4,ptr,index,value,work);
		return LIS_OUT_OF_MEMORY;
	}
	#ifdef _OPENMP
	#pragma omp parallel for private(i,j)
	#endif
	for(i=0;i<n;i++)
	{
		for(j=ptr[i];j<ptr[i+1];j++)
		{
			index[j] = 0;
			value[j] = 0.0;
		}
	}

	rewind(file);
	if( fgets(buf, BUFSIZE, file) == NULL )
	{
		LIS_SETERR_FIO;
		lis_free2(4,ptr,index,value,work);
		return LIS_ERR_FILE_IO;
	}
	do
	{
		if( fgets(buf, BUFSIZE, file) == NULL )
		{
			LIS_SETERR_FIO;
			lis_free2(4,ptr,index,value,work);
			return LIS_ERR_FILE_IO;
		}
	}while( buf[0]=='%' );

	for( i=0; i<nnz; i++ )
	{
		if( isbin )
		{
			if( fread(&matfmt, sizeof(matfmt), 1, file)!=1 )
			{
				LIS_SETERR_FIO;
				lis_free2(4,ptr,index,value,work);
				return LIS_ERR_FILE_IO;
			}
			ridx = matfmt.i;
			cidx = matfmt.j;
			val  = matfmt.value;
			if( mode )
			{
				lis_bswap_int(1,&ridx);
				lis_bswap_int(1,&cidx);
				lis_bswap_scalar(1,&val);
			}
		}
		else
		{
			if( fgets(buf, BUFSIZE, file) == NULL )
			{
				LIS_SETERR_FIO;
				lis_free2(4,ptr,index,value,work);
				return LIS_ERR_FILE_IO;
			}
#ifdef _LONGLONG
#ifdef _LONG__DOUBLE
			if( sscanf(buf, "%lld %lld %Lg", &ridx, &cidx, &val) != 3 )
#else
			if( sscanf(buf, "%lld %lld %lg", &ridx, &cidx, &val) != 3 )
#endif
#else
#ifdef _LONG__DOUBLE
			if( sscanf(buf, "%d %d %Lg", &ridx, &cidx, &val) != 3 )
#else
			if( sscanf(buf, "%d %d %lg", &ridx, &cidx, &val) != 3 )
#endif
#endif
			{
				LIS_SETERR_FIO;
				lis_free2(4,ptr,index,value,work);
				return LIS_ERR_FILE_IO;
			}
		}
		ridx--;
		cidx--;
		if( ridx==cidx && val==0.0 )
		{
#ifdef _LONGLONG
			printf("diagonal element is zero (i=%lld)\n",ridx);
#else
			printf("diagonal element is zero (i=%d)\n",ridx);
#endif
		}
/*		if( val!=0.0 )*/
		{
			if( mmtype==MM_SYMM && ridx!=cidx )
			{
				if( cidx>=is && cidx<ie )
				{
					value[ptr[cidx-is]+work[cidx-is]] = val;
					index[ptr[cidx-is]+work[cidx-is]] = ridx;
					work[cidx-is]++;
				}
			}
			if( ridx>=is && ridx<ie )
			{
				value[ptr[ridx-is]+work[ridx-is]] = val;
				index[ptr[ridx-is]+work[ridx-is]] = cidx;
				work[ridx-is]++;
			}
		}
	}
	#ifdef USE_MPI
		MPI_Barrier(A->comm);
	#endif

	err = lis_matrix_set_csr(ptr[n],ptr,index,value,A);
	if( err )
	{
		lis_free2(4,ptr,index,value,work);
		return err;
	}
	err = lis_matrix_assemble(A);
	if( err )
	{
		lis_matrix_storage_destroy(A);
		lis_free(work);
		return err;
	}

	if( b!=NULL && x!=NULL )
	{
		err = lis_input_mm_vec(A,b,x,file,isb,isx,isbin);
		if( err )
		{
			lis_matrix_storage_destroy(A);
			lis_free(work);
		}
	}
	lis_free(work);

	LIS_DEBUG_FUNC_OUT;
	return LIS_SUCCESS;
}
LIS_INT lis_matrix_convert_csr2msr(LIS_MATRIX Ain, LIS_MATRIX Aout)
{
  LIS_INT      i,j,k,jj;
  LIS_INT      err;
  LIS_INT      n,nnz,ndz;
  LIS_INT      count;
  LIS_INT      *iw;
  LIS_INT      *index;
  LIS_SCALAR  *value;

  LIS_DEBUG_FUNC_IN;

  n       = Ain->n;
  nnz    = Ain->nnz;

  iw      = NULL;
  index   = NULL;
  value   = NULL;

  iw = (LIS_INT *)lis_malloc( (n+1)*sizeof(LIS_INT),"lis_matrix_convert_csr2msr::iw" );
  if( iw==NULL )
  {
    LIS_SETERR_MEM((n+1)*sizeof(LIS_INT));
    return LIS_ERR_OUT_OF_MEMORY;
  }

  /* check ndz */
  for(i=0;i<n+1;i++) iw[i] = 0;
  count = 0;
  #ifdef _OPENMP
  #pragma omp parallel private(i,j)
  #endif
  {
    #ifdef _OPENMP
    #pragma omp for
    #endif
    for(i=0;i<n;i++)
    {
      iw[i+1] = 0;
      for(j=Ain->ptr[i];j<Ain->ptr[i+1];j++)
      {
        if( i==Ain->index[j] )
        {
          iw[i+1] = 1;
        }
      }
    }
    #ifdef _OPENMP
    #pragma omp for reduction(+:count)
    #endif
    for(i=0;i<n;i++)
    {
      count += iw[i+1];
    }
    #ifdef _OPENMP
    #pragma omp for
    #endif
    for(i=0;i<n;i++)
    {
      iw[i+1] = Ain->ptr[i+1]-Ain->ptr[i]-iw[i+1];
    }
  }
  ndz = n - count;

  err = lis_matrix_malloc_msr(n,nnz,ndz,&index,&value);
  if( err )
  {
    lis_free2(3,index,value,iw);
    return err;
  }

  /* convert msr */
  iw[0] = n+1;
  for(i=0;i<n;i++)
  {
    iw[i+1] = iw[i+1] + iw[i];
  }
  #ifdef _OPENMP
  #pragma omp parallel private(i,j,k)
  #endif
  {
    #ifdef _OPENMP
    #pragma omp for
    #endif
    for(i=0;i<n+1;i++)
    {
      index[i] = iw[i];
    }
    #ifdef _OPENMP
    #pragma omp for
    #endif
    for(i=0;i<n;i++)
    {
      k = index[i];
      for(j=Ain->ptr[i];j<Ain->ptr[i+1];j++)
      {
        jj = Ain->index[j];
        if( jj==i )
        {
          value[i]   = Ain->value[j];
        }
        else
        {
          value[k]   = Ain->value[j];
          index[k]   = Ain->index[j];
          k++;
        }
      }
    }
  }

  err = lis_matrix_set_msr(nnz,ndz,index,value,Aout);
  if( err )
  {
    lis_free2(3,index,value,iw);
    return err;
  }
  err = lis_matrix_assemble(Aout);
  if( err )
  {
    lis_free(iw);
    lis_matrix_storage_destroy(Aout);
    return err;
  }

  lis_free(iw);
  LIS_DEBUG_FUNC_OUT;

  return LIS_SUCCESS;
}
예제 #21
0
LIS_INT lis_matrix_convert_rco2bsr(LIS_MATRIX Ain, LIS_MATRIX Aout)
{
	LIS_INT i,j,k,n,gn,nnz,bnnz,nr,nc,bnr,bnc,err;
	LIS_INT ii,jj,kk,bj,jpos,ij,kv,bi;
	LIS_INT *iw,*iw2;
	LIS_INT *bptr,*bindex;
	LIS_SCALAR *value;

	LIS_DEBUG_FUNC_IN;

	bnr     = Ain->conv_bnr;
	bnc     = Ain->conv_bnc;
	n       = Ain->n;
	gn      = Ain->gn;
	nr      = 1 + (n-1)/bnr;
	nc      = 1 + (gn-1)/bnc;
	bptr    = NULL;
	bindex  = NULL;
	value   = NULL;
	iw      = NULL;
	iw2     = NULL;


	bptr = (LIS_INT *)lis_malloc( (nr+1)*sizeof(LIS_INT),"lis_matrix_convert_rco2bsr::bptr" );
	if( bptr==NULL )
	{
		LIS_SETERR_MEM((nr+1)*sizeof(LIS_INT));
		lis_free2(5,bptr,bindex,value,iw,iw2);
		return LIS_OUT_OF_MEMORY;
	}

	#ifdef _OPENMP
	#pragma omp parallel private(i,k,ii,j,bj,kk,ij,jj,iw,iw2,kv,jpos)
	#endif
	{
		iw    = (LIS_INT *)lis_malloc( nc*sizeof(LIS_INT),"lis_matrix_convert_rco2bsr::iw" );
		iw2   = (LIS_INT *)lis_malloc( nc*sizeof(LIS_INT),"lis_matrix_convert_rco2bsr::iw2" );
		memset(iw,0,nc*sizeof(LIS_INT));

		#ifdef _OPENMP
		#pragma omp for
		#endif
		for(i=0;i<nr;i++)
		{
			k = 0;
			kk   = bnr*i;
			jj   = 0;
			for(ii=0;ii+kk<n&&ii<bnr;ii++)
			{
				for(j=0;j<Ain->w_row[kk+ii];j++)
				{
					bj   = Ain->w_index[kk+ii][j]/bnc;
					jpos = iw[bj];
					if( jpos==0 )
					{
						iw[bj] = 1;
						iw2[jj] = bj;
						jj++;
					}
				}
			}
			for(bj=0;bj<jj;bj++)
			{
				k++;
				ii = iw2[bj];
				iw[ii]=0;
			}
			bptr[i+1] = k;
		}
		lis_free(iw);
		lis_free(iw2);
	}

	bptr[0] = 0;
	for(i=0;i<nr;i++)
	{
		bptr[i+1] += bptr[i];
	}
	bnnz = bptr[nr];
	nnz  = bnnz*bnr*bnc;
	
	bindex = (LIS_INT *)lis_malloc( bnnz*sizeof(LIS_INT),"lis_matrix_convert_rco2bsr::bindex" );
	if( bindex==NULL )
	{
		LIS_SETERR_MEM((nr+1)*sizeof(LIS_INT));
		lis_free2(3,bptr,bindex,value);
		return LIS_OUT_OF_MEMORY;
	}
	value = (LIS_SCALAR *)lis_malloc( nnz*sizeof(LIS_SCALAR),"lis_matrix_convert_rco2bsr::value" );
	if( value==NULL )
	{
		LIS_SETERR_MEM(nnz*sizeof(LIS_SCALAR));
		lis_free2(3,bptr,bindex,value);
		return LIS_OUT_OF_MEMORY;
	}

	/* convert bsr */
	#ifdef _OPENMP
	#pragma omp parallel private(bi,i,ii,k,j,bj,jpos,kv,kk,ij,jj,iw)
	#endif
	{
		iw = (LIS_INT *)lis_malloc( nc*sizeof(LIS_INT),"lis_matrix_convert_rco2bsr::iw" );
		memset(iw,0,nc*sizeof(LIS_INT));

		#ifdef _OPENMP
		#pragma omp for
		#endif
		for(bi=0;bi<nr;bi++)
		{
			i  = bi*bnr;
			ii = 0;
			kk = bptr[bi];
			while( i+ii<n && ii<=bnr-1 )
			{
				for( k=0;k<Ain->w_row[i+ii];k++)
				{
					j    = Ain->w_index[i+ii][k];
					bj   = j/bnc;
					j    = j%bnc;
					jpos = iw[bj];
					if( jpos==0 )
					{
						kv     = kk * bnr * bnc;
						iw[bj] = kv+1;
						bindex[kk]  = bj;
						for(jj=0;jj<bnr*bnc;jj++) value[kv+jj] = 0.0;
						ij = j*bnr + ii;
						value[kv+ij]   = Ain->w_value[i+ii][k];
						kk = kk+1;
					}
					else
					{
						ij = j*bnr + ii;
						value[jpos+ij-1]   = Ain->w_value[i+ii][k];
					}
				}
				ii = ii+1;
			}
			for(j=bptr[bi];j<bptr[bi+1];j++)
			{
				iw[bindex[j]] = 0;
			}
		}
		lis_free(iw);
	}

	err = lis_matrix_set_bsr(bnr,bnc,bnnz,bptr,bindex,value,Aout);
	if( err )
	{
		lis_free2(3,bptr,bindex,value);
		return err;
	}
	err = lis_matrix_assemble(Aout);
	if( err )
	{
		lis_matrix_storage_destroy(Aout);
		return err;
	}
	LIS_DEBUG_FUNC_OUT;
	return LIS_SUCCESS;
}
LIS_INT lis_idrs(LIS_SOLVER solver)
{
	LIS_MATRIX A;
	LIS_VECTOR b,x;
	LIS_VECTOR r,t,v,av,*dX,*dR,*P;
	LIS_SCALAR om, h;
	LIS_SCALAR *M,*m,*c,*MM;
	LIS_REAL   bnrm2, nrm2, tol;
	LIS_REAL   angle;
	LIS_INT i,j,k,s,oldest;
	LIS_INT iter,maxiter,n,output,conv;
	double times,ptimes,tim;
    unsigned long init[4]={0x123, 0x234, 0x345, 0x456}, length=4;

	LIS_DEBUG_FUNC_IN;

	A       = solver->A;
	b       = solver->b;
	x       = solver->x;
	n       = A->n;
	maxiter = solver->options[LIS_OPTIONS_MAXITER];
	output  = solver->options[LIS_OPTIONS_OUTPUT];
	conv    = solver->options[LIS_OPTIONS_CONV_COND];
	s       = solver->options[LIS_OPTIONS_IDRS_RESTART];
	ptimes  = 0.0;

	r       = solver->work[0];
	t       = solver->work[1];
	v       = solver->work[2];
	av      = solver->work[3];
	dX      = &solver->work[4];
	P       = &solver->work[4+s];
	dR      = &solver->work[4+2*s];

	angle   = 0.7;

	m = (LIS_SCALAR *)lis_malloc(s*sizeof(LIS_SCALAR), "lis_idrs::m");
	c = (LIS_SCALAR *)lis_malloc(s*sizeof(LIS_SCALAR), "lis_idrs::c");
	M = (LIS_SCALAR *)lis_malloc(s*s*sizeof(LIS_SCALAR), "lis_idrs::M");
	MM = (LIS_SCALAR *)lis_malloc(s*s*sizeof(LIS_SCALAR),
"lis_idrs::M");



	/* Initial Residual */
	if( lis_solver_get_initial_residual(solver,NULL,NULL,r,&bnrm2) )
	{
		lis_free2(4,m,c,M,MM);
		LIS_DEBUG_FUNC_OUT;
		return LIS_SUCCESS;
	}
	tol     = solver->tol;

	init_by_array(init, length);
	for(k=0;k<s;k++)
	{
		for(i=0;i<n;i++)
		{
			P[k]->value[i] = genrand_real1();
		}
	}
	lis_idrs_orth(s,P);

	for( k=0; k<s; k++ )
	{
		#ifdef PRE_RIGHT
			times = lis_wtime();
			lis_psolve(solver, r, dX[k]);
			ptimes += lis_wtime()-times;
			LIS_MATVEC(A,dX[k],dR[k]);
		#endif

		lis_vector_dot(dR[k],dR[k],&h);
		lis_vector_dot(dR[k],r,&om);
		om = om / h;
		lis_vector_scale(om,dX[k]);
		lis_vector_scale(-om,dR[k]);

		lis_vector_axpy(1.0,dX[k],x);
		lis_vector_axpy(1.0,dR[k],r);


		/* convergence check */
		lis_solver_get_residual[conv](r,solver,&nrm2);

		if( output )
		{
			if( output & LIS_PRINT_MEM ) solver->residual[k+1] =
nrm2;
			if( output & LIS_PRINT_OUT && A->my_rank==0 )
printf("iter: %5d  residual = %e\n", k+1, nrm2);
		}

		if( tol >= nrm2 )
		{
			lis_free2(4,m,c,M,MM);

			solver->retcode    = LIS_SUCCESS;
			solver->iter       = k+1;
			solver->resid      = nrm2;
			solver->ptimes     = ptimes;
			LIS_DEBUG_FUNC_OUT;
			return LIS_SUCCESS;
		}

		for(i=0;i<s;i++)
		{
			lis_vector_dot(P[i],dR[k],&M[k*s+i]);
		}
	}

	iter = s;
	oldest = 0;
	for(i=0;i<s;i++)
	{
		lis_vector_dot(P[i],r,&m[i]);
	}

	while( iter<=maxiter )
	{
		tim = lis_wtime();
		lis_array_solve(s,M,m,c,MM); /* solve Mc=m */

		lis_vector_copy(r,v);
		for(j=0;j<s;j++)
		{
			lis_vector_axpy(-c[j],dR[j],v);
		}

		if( (iter%(s+1))==s )
		{
			#ifdef PRE_RIGHT
				times = lis_wtime();
				lis_psolve(solver, v, av);
				ptimes += lis_wtime()-times;
				LIS_MATVEC(A,av,t);
			#endif

			lis_vector_dot(t,t,&h);
			lis_vector_dot(t,v,&om);
			om = om / h;
			#if 0
				lis_vector_scale(-om,t);
				for(j=0;j<s;j++)
				{
					lis_vector_axpy(-c[j],dR[j],t);
				}
				lis_vector_copy(t,dR[oldest]);
				lis_vector_scale(om,av);
				for(j=0;j<s;j++)
				{
					lis_vector_axpy(-c[j],dX[j],av);
				}
				lis_vector_copy(av,dX[oldest]);
			#else
				for(i=0;i<n;i++)
				{
					h = om*av->value[i];
					for(j=0;j<s;j++)
					{
						h -= dX[j]->value[i] * c[j];
					}
					dX[oldest]->value[i] = h;
				}
				for(i=0;i<n;i++)
				{
					h = -om*t->value[i];
					for(j=0;j<s;j++)
					{
						h -= dR[j]->value[i] * c[j];
					}
					dR[oldest]->value[i] = h;
				}
			#endif
		}
		else
		{
			#ifdef PRE_RIGHT
				times = lis_wtime();
				lis_psolve(solver, v, av);
				ptimes += lis_wtime()-times;
			#endif

			#if 0
				lis_vector_scale(om,av);
				for(j=0;j<s;j++)
				{
					lis_vector_axpy(-c[j],dX[j],av);
				}
				lis_vector_copy(av,dX[oldest]);
			#else
				for(i=0;i<n;i++)
				{
					h = om*av->value[i];
					for(j=0;j<s;j++)
					{
						h -= dX[j]->value[i] * c[j];
					}
					dX[oldest]->value[i] = h;
				}
			#endif

			LIS_MATVEC(A,dX[oldest],dR[oldest]);
			lis_vector_scale(-1.0,dR[oldest]);
		}

		lis_vector_axpy(1.0,dR[oldest],r);
		lis_vector_axpy(1.0,dX[oldest],x);

		iter++;

		/* convergence check */
		lis_solver_get_residual[conv](r,solver,&nrm2);

		if( output )
		{
			if( output & LIS_PRINT_MEM ) solver->residual[iter]
= nrm2;
			if( output & LIS_PRINT_OUT && A->my_rank==0 )
printf("iter: %5d  residual = %e\n", iter, nrm2);
		}

		if( tol >= nrm2 )
		{
			lis_free2(4,m,c,M,MM);

			solver->retcode    = LIS_SUCCESS;
			solver->iter       = iter;
			solver->resid      = nrm2;
			solver->ptimes     = ptimes;
			LIS_DEBUG_FUNC_OUT;
			return LIS_SUCCESS;
		}

		for(i=0;i<s;i++)
		{
			lis_vector_dot(P[i],dR[oldest],&h);
			m[i] += h;
			M[oldest*s+i] = h;
		}

		oldest++;
		if( oldest==s ) oldest = 0;
		tim = lis_wtime() - tim;
		/*
		printf("update m,M: %e\n",tim);
		*/
	}
	lis_free2(4,m,c,M,MM);
	solver->retcode   = LIS_MAXITER;
	solver->iter      = iter;
	solver->resid     = nrm2;
	LIS_DEBUG_FUNC_OUT;
	return LIS_MAXITER;
}
LIS_INT lis_matrix_convert_msr2csr(LIS_MATRIX Ain, LIS_MATRIX Aout)
{
  LIS_INT      i,j,k;
  LIS_INT      err;
  LIS_INT      n,nnz,is;
  LIS_INT      *ptr,*index;
  LIS_SCALAR  *value;

  LIS_DEBUG_FUNC_IN;

  n       = Ain->n;
  nnz     = Ain->nnz;
  is      = Ain->is;

  ptr     = NULL;
  index   = NULL;
  value   = NULL;

  err = lis_matrix_malloc_csr(n,nnz,&ptr,&index,&value);
  if( err )
  {
    return err;
  }

  /* convert csr */
  #ifdef _OPENMP
  #pragma omp parallel for private(i)
  #endif
  for(i=0;i<n;i++)
  {
    ptr[i+1] = Ain->index[i+1] - Ain->index[i];
    if( Ain->value[i]!=0.0 )
    {
      ptr[i+1]++;
    }
  }
  ptr[0] = 0;
  for(i=0;i<n;i++)
  {
    ptr[i+1] += ptr[i];
  }

  #ifdef _OPENMP
  #pragma omp parallel for private(i,j,k)
  #endif
  for(i=0;i<n;i++)
  {
    k = ptr[i];
    if( Ain->value[i]!=(LIS_SCALAR)0.0 )
    {
      value[k]   = Ain->value[i];
      index[k]   = i;
      k++;
    }
    for(j=Ain->index[i];j<Ain->index[i+1];j++)
    {
      value[k]   = Ain->value[j];
      index[k]   = Ain->index[j];
      k++;
    }
  }

  err = lis_matrix_set_csr(nnz,ptr,index,value,Aout);
  if( err )
  {
    lis_free2(3,ptr,index,value);
    return err;
  }
  err = lis_matrix_assemble(Aout);
  if( err )
  {
    lis_matrix_storage_destroy(Aout);
    return err;
  }
  LIS_DEBUG_FUNC_OUT;
  return LIS_SUCCESS;
}