Exemplo n.º 1
0
LIS_INT lis_precon_create_sainv_csr(LIS_SOLVER solver, LIS_PRECON precon)
{
	LIS_INT	err;
	LIS_INT	i,j,k,ii,jj,len,lfil;
	LIS_INT	n,nnz,annz,cl,cu,cc,m;
	LIS_INT	*wu,*wl,*il,*iu,*ic,*pc;
	LIS_SCALAR t,v;
	LIS_REAL tol,tol_dd,nrm;
	LIS_SCALAR *d,*r,*c,*l,*u,*tmp;
	LIS_MATRIX A,B;
	LIS_MATRIX_ILU W,Z;
	LIS_VECTOR D;

	LIS_DEBUG_FUNC_IN;


	A      = solver->A;
	n      = A->n;
	nnz    = A->nnz;
	tol    = solver->params[LIS_PARAMS_DROP-LIS_OPTIONS_LEN];
	m      = solver->params[LIS_PARAMS_RATE-LIS_OPTIONS_LEN];
	annz   = 10+A->nnz / A->n;
	lfil   = (LIS_INT)((double)A->nnz/(2.0*n))*m;

	W      = NULL;
	Z      = NULL;
	wu     = NULL;
	wl     = NULL;
	d      = NULL;
	l      = NULL;
	u      = NULL;
	il     = NULL;
	iu     = NULL;

	err = lis_matrix_ilu_create(n,1,&W);
	if( err ) return err;
	err = lis_matrix_ilu_create(n,1,&Z);
	if( err ) return err;
	err = lis_matrix_ilu_setCR(W);
	if( err ) return err;
	err = lis_matrix_ilu_setCR(Z);
	if( err ) return err;
	err = lis_vector_duplicate(A,&D);
	if( err ) return err;
	d = D->value;

	tmp   = (LIS_SCALAR *)lis_malloc(n*sizeof(LIS_SCALAR),"lis_precon_create_sainv_csr::l");
	if( tmp==NULL )
	{
		LIS_SETERR_MEM(n*sizeof(LIS_SCALAR));
		return LIS_OUT_OF_MEMORY;
	}
	r   = (LIS_SCALAR *)lis_malloc(n*sizeof(LIS_SCALAR),"lis_precon_create_sainv_csr::l");
	if( r==NULL )
	{
		LIS_SETERR_MEM(n*sizeof(LIS_SCALAR));
		return LIS_OUT_OF_MEMORY;
	}
	c   = (LIS_SCALAR *)lis_malloc(n*sizeof(LIS_SCALAR),"lis_precon_create_sainv_csr::u");
	if( c==NULL )
	{
		LIS_SETERR_MEM(n*sizeof(LIS_SCALAR));
		return LIS_OUT_OF_MEMORY;
	}
	l   = (LIS_SCALAR *)lis_malloc(n*sizeof(LIS_SCALAR),"lis_precon_create_sainv_csr::l");
	if( l==NULL )
	{
		LIS_SETERR_MEM(n*sizeof(LIS_SCALAR));
		return LIS_OUT_OF_MEMORY;
	}
	u   = (LIS_SCALAR *)lis_malloc(n*sizeof(LIS_SCALAR),"lis_precon_create_sainv_csr::u");
	if( u==NULL )
	{
		LIS_SETERR_MEM(n*sizeof(LIS_SCALAR));
		return LIS_OUT_OF_MEMORY;
	}
	il   = (LIS_INT *)lis_malloc(n*sizeof(LIS_INT),"lis_precon_create_sainv_csr::il");
	if( il==NULL )
	{
		LIS_SETERR_MEM(n*sizeof(LIS_INT));
		return LIS_OUT_OF_MEMORY;
	}
	iu   = (LIS_INT *)lis_malloc(n*sizeof(LIS_INT),"lis_precon_create_sainv_csr::iu");
	if( iu==NULL )
	{
		LIS_SETERR_MEM(n*sizeof(LIS_INT));
		return LIS_OUT_OF_MEMORY;
	}
	ic   = (LIS_INT *)lis_malloc(n*sizeof(LIS_INT),"lis_precon_create_sainv_csr::iu");
	if( ic==NULL )
	{
		LIS_SETERR_MEM(n*sizeof(LIS_INT));
		return LIS_OUT_OF_MEMORY;
	}
	wu   = (LIS_INT *)lis_malloc(n*sizeof(LIS_INT),"lis_precon_create_sainv_csr::ww");
	if( wu==NULL )
	{
		LIS_SETERR_MEM(n*sizeof(LIS_INT));
		return LIS_OUT_OF_MEMORY;
	}
	wl   = (LIS_INT *)lis_malloc(n*sizeof(LIS_INT),"lis_precon_create_sainv_csr::ww");
	if( wl==NULL )
	{
		LIS_SETERR_MEM(n*sizeof(LIS_INT));
		return LIS_OUT_OF_MEMORY;
	}
	pc   = (LIS_INT *)lis_malloc(n*sizeof(LIS_INT),"lis_precon_create_sainv_csr::iu");
	if( pc==NULL )
	{
		LIS_SETERR_MEM(n*sizeof(LIS_INT));
		return LIS_OUT_OF_MEMORY;
	}

	lis_matrix_sort_csr(A);
	err = lis_matrix_duplicate(A,&B);
	if( err ) return err;
	err = lis_matrix_convert_csr2csc(A,B);
	if( err ) return err;

	for(i=0;i<n;i++)
	{
		wu[i] = 0;
		wl[i] = 0;
		pc[i] = A->ptr[i];
	}
	for(i=0; i<n; i++)
	{
		/* nrm_inf(A[i,:]) */
		nrm = 0.0;
		for(j=A->ptr[i];j<A->ptr[i+1];j++)
		{
			nrm = _max(nrm,fabs(A->value[j]));
		}
		tol_dd = nrm * tol;

		/* l = e_i  */
		/* u = e_i  */
		l[i]  = 1.0;
		u[i]  = 1.0;
		il[0] = i;
		iu[0] = i;
		cl    = 1;
		cu    = 1;
		wu[i] = 1;
		wl[i] = 1;
		cc    = 0;

		/* r = e_i^T*A */
		for(j=A->ptr[i];j<A->ptr[i+1];j++)
		{
			jj    = A->index[j];
			r[jj] = A->value[j];
		}
		/* c = A_i = A*e_i */
		for(j=B->ptr[i];j<B->ptr[i+1];j++)
		{
			jj    = B->index[j];
			c[jj] = B->value[j];
		}

	    /* W_i = W_i - (r*Z_j/D_jj)*W_j */
		for(j=0;j<i;j++)
		{
			t = 0.0;
			for(k=0;k<Z->nnz[j];k++)
			{
				t += r[Z->index[j][k]]*Z->value[j][k];
			}
			t = t * d[j];
			if( fabs(t) > tol_dd )
			{
				for(k=0;k<W->nnz[j];k++)
				{
					v      = t * W->value[j][k];
					if( fabs(v) > tol_dd )
					{
						jj     = W->index[j][k];
						if( wl[jj]==1 )
						{
							l[jj] -= v;
						}
						else
						{
							l[jj]    = -v;
							il[cl++] = jj;
							wl[jj]   = 1;
						}
					}
				}
			}
		}

		/* Z_i = Z_i - (W_j^T*c/D_jj)*Z_j */
		for(j=0;j<i;j++)
		{
			t = 0.0;
			for(k=0;k<W->nnz[j];k++)
			{
				t += c[W->index[j][k]]*W->value[j][k];
			}
			t = t * d[j];
			if( fabs(t) > tol_dd )
			{
				for(k=0;k<Z->nnz[j];k++)
				{
					v      = t * Z->value[j][k];
					if( fabs(v) > tol_dd )
					{
						jj     = Z->index[j][k];
						if( wu[jj]==1 )
						{
							u[jj] -= v;
						}
						else
						{
							u[jj]    = -v;
							iu[cu++] = jj;
							wu[jj]   = 1;
						}
					}
				}
			}
		}
/*
		len = _min(lfil,cl);
		for(j=0;j<cl;j++) tmp[j] = fabs(l[il[j]]);
		lis_sort_di(0,cl-1,tmp,il);
		lis_sort_i(0,len-1,il);
		cl = len;
		*/
		/*
		k = cl;
		for(j=0;j<cl;j++)
		{
			if( fabs(l[il[j]])<= tol_dd )
			{
				wl[il[j]] = 0;
				il[j] = n;
				k--;
			}
		}
		lis_sort_i(0,cl-1,il);
		cl = k;
		

		k = cu;
		for(j=0;j<cu;j++)
		{
			if( fabs(u[iu[j]])<= tol_dd )
			{
				wu[iu[j]] = 0;
				iu[j] = n;
				k--;
			}
		}
		lis_sort_i(0,cu-1,iu);
		cu = k;
		*/

		W->nnz[i] = cl;
		if( cl > 0 )
		{
			W->index[i] = (LIS_INT *)malloc(cl*sizeof(LIS_INT));
			W->value[i] = (LIS_SCALAR *)malloc(cl*sizeof(LIS_SCALAR));
			memcpy(W->index[i],il,cl*sizeof(LIS_INT));
			for(j=0;j<cl;j++)
			{
				W->value[i][j] = l[il[j]];
			}
		}
		Z->nnz[i] = cu;
		if( cu > 0 )
		{
			Z->index[i] = (LIS_INT *)malloc(cu*sizeof(LIS_INT));
			Z->value[i] = (LIS_SCALAR *)malloc(cu*sizeof(LIS_SCALAR));
			memcpy(Z->index[i],iu,cu*sizeof(LIS_INT));
			for(j=0;j<cu;j++)
			{
				Z->value[i][j] = u[iu[j]];
			}
		}

		for(j=A->ptr[i];j<A->ptr[i+1];j++) r[A->index[j]] = 0.0;
		for(j=B->ptr[i];j<B->ptr[i+1];j++) c[B->index[j]] = 0.0;
		for(j=0;j<cl;j++)
		{
			wl[il[j]] = 0;
			l[il[j]] = 0.0;
		}
		for(j=0;j<cu;j++)
		{
			wu[iu[j]] = 0;
		}

		/* D_ii = W_i^T * A * Z_i */
		cl = 0;
		for(k=0;k<Z->nnz[i];k++)
		{
			ii = Z->index[i][k];
			for(j=B->ptr[ii];j<B->ptr[ii+1];j++)
			{
				jj     = B->index[j];
				if( wl[jj]==0 )
				{
					l[jj] = B->value[j]*Z->value[i][k];
					wl[jj]   = 1;
					il[cl++] = jj;
				}
				else
				{
					l[jj] += B->value[j]*Z->value[i][k];
				}
			}
		}
		t = 0.0;
		for(j=0;j<W->nnz[i];j++)
		{
			k  = W->index[i][j];
			t += W->value[i][j] * l[k];
		}
		d[i] = 1.0 / t;
		for(j=0;j<cl;j++) wl[il[j]] = 0;

	}

	lis_matrix_destroy(B);
	lis_free2(11,r,c,il,l,wl,iu,u,wu,ic,pc,tmp);


	precon->L  = W;
	precon->U  = Z;
	precon->D  = D;

	LIS_DEBUG_FUNC_OUT;
	return LIS_SUCCESS;
}
LIS_INT lis_precon_create_ilut_csr(LIS_SOLVER solver, LIS_PRECON precon)
{
#ifdef _OPENMP
  LIS_INT        err;
  LIS_INT        i,j,k,ii,jj,kk;
  LIS_INT        is,ie,my_rank,nprocs;
  LIS_INT        n,nr,nnz,lfil,len;
  LIS_SCALAR    gamma,t,tol,toldd,m;
  LIS_MATRIX    A;
  LIS_MATRIX_ILU  L,U;
  LIS_VECTOR    D;

  LIS_SCALAR    tnorm, tolnorm;
  LIS_SCALAR    fact,lxu,*wn,*w;
  LIS_INT        lenu,lenl,col,jpos,jrow,upos,para;
  LIS_INT        *jbuf,*iw;

  LIS_DEBUG_FUNC_IN;


  A      = solver->A;
  n      = A->n;
  tol    = solver->params[LIS_PARAMS_DROP-LIS_OPTIONS_LEN];
  m      = solver->params[LIS_PARAMS_RATE-LIS_OPTIONS_LEN];
  gamma  = solver->params[LIS_PARAMS_GAMMA-LIS_OPTIONS_LEN];
  lfil   = (LIS_INT)((double)A->nnz/(2.0*n))*m;
  nprocs = omp_get_max_threads();

  L      = NULL;
  U      = NULL;


  err = lis_matrix_ilu_create(n,1,&L);
  if( err ) return err;
  err = lis_matrix_ilu_create(n,1,&U);
  if( err ) return err;
  err = lis_matrix_ilu_setCR(L);
  if( err ) return err;
  err = lis_matrix_ilu_setCR(U);
  if( err ) return err;
  err = lis_vector_duplicate(A,&D);
  if( err )
  {
    return err;
  }

  w   = (LIS_SCALAR *)lis_malloc(nprocs*(n+1)*sizeof(LIS_SCALAR),"lis_precon_create_ilut_csr::w");
  if( w==NULL )
  {
    LIS_SETERR_MEM(nprocs*(n+1)*sizeof(LIS_SCALAR));
    return LIS_OUT_OF_MEMORY;
  }
  wn = (LIS_SCALAR *)lis_malloc(nprocs*n*sizeof(LIS_SCALAR),"lis_precon_create_ilut_csr::w");
  if( wn==NULL )
  {
    LIS_SETERR_MEM(nprocs*n*sizeof(LIS_SCALAR));
    return LIS_OUT_OF_MEMORY;
  }

  jbuf   = (LIS_INT *)lis_malloc(nprocs*n*sizeof(LIS_INT),"lis_precon_create_ilut_csr::iw");
  if( jbuf==NULL )
  {
    LIS_SETERR_MEM(nprocs*n*sizeof(LIS_INT));
    return LIS_OUT_OF_MEMORY;
  }
  iw   = (LIS_INT *)lis_malloc(nprocs*n*sizeof(LIS_INT),"lis_precon_create_ilut_csr::iw");
  if( iw==NULL )
  {
    LIS_SETERR_MEM(nprocs*n*sizeof(LIS_INT));
    return LIS_OUT_OF_MEMORY;
  }


  #pragma omp parallel private(is,ie,my_rank,i,j,k,jj,tnorm,tolnorm,len,lenu,lenl,col,t,jpos,jrow,fact,lxu,upos)
  {
    my_rank  = omp_get_thread_num();
    LIS_GET_ISIE(my_rank,nprocs,n,is,ie);

    for(i=is;i<ie;i++) iw[my_rank*n+i] = -1;

    for(i=is;i<ie;i++)
    {
      tnorm = 0;
      k = 0;
      for(j=A->ptr[i];j<A->ptr[i+1];j++)
      {
        jj = A->index[j];
        if( jj<is || jj>=ie ) continue;
        tnorm += fabs(A->value[j]);
        k++;
      }
      tnorm   = tnorm / (double)k;
      tolnorm = tol * tnorm;

      lenu = 0;
      lenl = 0;
      jbuf[my_rank*n+i] = i;
      w[my_rank*n+i] = 0;
      iw[my_rank*n+i] = i;

      for(j=A->ptr[i];j<A->ptr[i+1];j++)
      {
        col = A->index[j];
        if( col<is || col>=ie ) continue;
        t = A->value[j];
        if( col < i )
        {
          jbuf[my_rank*n+lenl] = col;
          iw[my_rank*n+col] = lenl;
          w[my_rank*n+lenl] = t;
          lenl++;
        }
        else if( col == i )
        {
          w[my_rank*n+i] = t;
        }
        else
        {
          lenu++;
          jpos = i + lenu;
          jbuf[my_rank*n+jpos] = col;
          iw[my_rank*n+col] = jpos;
          w[my_rank*n+jpos] = t;
        }
      }

      j = -1;
      len = 0;

      while( ++j < lenl )
      {
        jrow = jbuf[my_rank*n+j];
        jpos = j;
        for(k=j+1;k<lenl;k++)
        {
          if( jbuf[my_rank*n+k]<jrow )
          {
            jrow = jbuf[my_rank*n+k];
            jpos = k;
          }
        }
        if( jpos!=j )
        {
          col = jbuf[my_rank*n+j];
          jbuf[my_rank*n+j] = jbuf[my_rank*n+jpos];
          jbuf[my_rank*n+jpos] = col;
          iw[my_rank*n+jrow] = j;
          iw[my_rank*n+col] = jpos;
          t = w[my_rank*n+j];
          w[my_rank*n+j] = w[my_rank*n+jpos];
          w[my_rank*n+jpos] = t;
        }
        fact = w[my_rank*n+j] * D->value[jrow];
        w[my_rank*n+j] = fact;
        iw[my_rank*n+jrow] = -1;

        for(k=0;k<U->nnz[jrow];k++)
        {
          col = U->index[jrow][k];
          jpos = iw[my_rank*n+col];
          lxu = -fact * U->value[jrow][k];

          if( fabs(lxu) < tolnorm && jpos==-1 ) continue;
          if( col >= i )
          {
            if( jpos == -1 )
            {
              lenu++;
              upos = i + lenu;
              jbuf[my_rank*n+upos] = col;
              iw[my_rank*n+col] = upos;
              w[my_rank*n+upos] = lxu;
            }
            else
            {
              w[my_rank*n+jpos] += lxu;
            }
          }
          else
          {
            if( jpos == -1 )
            {
              jbuf[my_rank*n+lenl] = col;
              iw[my_rank*n+col] = lenl;
              w[my_rank*n+lenl] = lxu;
              lenl++;
            }
            else
            {
              w[my_rank*n+jpos] += lxu;
            }
          }
        }
      }

      iw[my_rank*n+i] = -1;
      for(j=0;j<lenu;j++)
      {
        iw[ my_rank*n+jbuf[my_rank*n+i+j+1] ] = -1;
      }

      D->value[i] = 1.0 / w[my_rank*n+i];


      len = _min(lfil,lenl);
      for(j=0;j<lenl;j++)
      {
        wn[my_rank*n+j] = fabs(w[my_rank*n+j]);
        iw[my_rank*n+j] = j;
      }
      lis_sort_di(0,lenl-1,&wn[my_rank*n],&iw[my_rank*n]);
      lis_sort_i(0,len-1,&iw[my_rank*n]);
      
      L->nnz[i] = len;
      if( len>0 )
      {
        L->index[i] = (LIS_INT *)malloc(len*sizeof(LIS_INT));
        L->value[i] = (LIS_SCALAR *)malloc(len*sizeof(LIS_SCALAR));
      }
      for(j=0;j<len;j++)
      {
        jpos = iw[my_rank*n+j];
        L->index[i][j] = jbuf[my_rank*n+jpos];
        L->value[i][j] = w[my_rank*n+jpos];
      }
      for(j=0;j<lenl;j++) iw[my_rank*n+j] = -1;

      len = _min(lfil,lenu);
      for(j=0;j<lenu;j++)
      {
        wn[my_rank*n+j] = fabs(w[my_rank*n+i+j+1]);
        iw[my_rank*n+j] = i+j+1;
      }
      lis_sort_di(0,lenu-1,&wn[my_rank*n],&iw[my_rank*n]);
      lis_sort_i(0,len-1,&iw[my_rank*n]);
      
      U->nnz[i] = len;
      if( len>0 )
      {
        U->index[i] = (LIS_INT *)malloc(len*sizeof(LIS_INT));
        U->value[i] = (LIS_SCALAR *)malloc(len*sizeof(LIS_SCALAR));
      }
      for(j=0;j<len;j++)
      {
        jpos = iw[my_rank*n+j];
        U->index[i][j] = jbuf[my_rank*n+jpos];
        U->value[i][j] = w[my_rank*n+jpos];
      }
      for(j=0;j<lenu;j++) iw[my_rank*n+j] = -1;
    }
  }

  precon->L  = L;
  precon->U  = U;
  precon->D  = D;

  lis_free2(4,w,iw,wn,jbuf);

  LIS_DEBUG_FUNC_OUT;
  return LIS_SUCCESS;
#else
  LIS_INT        err;
  LIS_INT        i,j,k;
  LIS_INT        n,lfil,len;
  LIS_SCALAR    gamma,t,tol,m;
  LIS_MATRIX    A;
  LIS_MATRIX_ILU  L,U;
  LIS_VECTOR    D;

  LIS_SCALAR    tnorm, tolnorm;
  LIS_SCALAR    fact,lxu,*wn,*w;
  LIS_INT        lenu,lenl,col,jpos,jrow,upos;
  LIS_INT        *jbuf,*iw;

  LIS_DEBUG_FUNC_IN;


  A      = solver->A;
  n      = A->n;
  tol    = solver->params[LIS_PARAMS_DROP-LIS_OPTIONS_LEN];
  m      = solver->params[LIS_PARAMS_RATE-LIS_OPTIONS_LEN];
  gamma  = solver->params[LIS_PARAMS_GAMMA-LIS_OPTIONS_LEN];
  lfil   = (LIS_INT)(((double)A->nnz/(2.0*n))*m);

  L      = NULL;
  U      = NULL;


  err = lis_matrix_ilu_create(n,1,&L);
  if( err ) return err;
  err = lis_matrix_ilu_create(n,1,&U);
  if( err ) return err;
  err = lis_matrix_ilu_setCR(L);
  if( err ) return err;
  err = lis_matrix_ilu_setCR(U);
  if( err ) return err;
  err = lis_vector_duplicate(A,&D);
  if( err )
  {
    return err;
  }

  w   = (LIS_SCALAR *)lis_malloc((n+1)*sizeof(LIS_SCALAR),"lis_precon_create_ilut_csr::w");
  if( w==NULL )
  {
    LIS_SETERR_MEM(n*sizeof(LIS_SCALAR));
    return LIS_OUT_OF_MEMORY;
  }
  wn = (LIS_SCALAR *)lis_malloc(n*sizeof(LIS_SCALAR),"lis_precon_create_ilut_csr::w");
  if( wn==NULL )
  {
    LIS_SETERR_MEM(n*sizeof(LIS_SCALAR));
    return LIS_OUT_OF_MEMORY;
  }

  jbuf   = (LIS_INT *)lis_malloc(n*sizeof(LIS_INT),"lis_precon_create_ilut_csr::iw");
  if( jbuf==NULL )
  {
    LIS_SETERR_MEM(n*sizeof(LIS_INT));
    return LIS_OUT_OF_MEMORY;
  }
  iw   = (LIS_INT *)lis_malloc(n*sizeof(LIS_INT),"lis_precon_create_ilut_csr::iw");
  if( iw==NULL )
  {
    LIS_SETERR_MEM(n*sizeof(LIS_INT));
    return LIS_OUT_OF_MEMORY;
  }


  for(i=0;i<n;i++) iw[i] = -1;

  for(i=0;i<n;i++)
  {
    tnorm = 0;
    for(j=A->ptr[i];j<A->ptr[i+1];j++)
    {
      tnorm += fabs(A->value[j]);
    }
    tnorm   = tnorm / (double)(A->ptr[i+1]-A->ptr[i]);
    tolnorm = tol * tnorm;

    lenu = 0;
    lenl = 0;
    jbuf[i] = i;
    w[i] = 0;
    iw[i] = i;

    for(j=A->ptr[i];j<A->ptr[i+1];j++)
    {
      col = A->index[j];
      #ifdef USE_MPI
        if( col>n-1 ) continue;
      #endif
      t = A->value[j];
      if( col < i )
      {
        jbuf[lenl] = col;
        iw[col] = lenl;
        w[lenl] = t;
        lenl++;
      }
      else if( col == i )
      {
        w[i] = t;
      }
      else
      {
        lenu++;
        jpos = i + lenu;
        jbuf[jpos] = col;
        iw[col] = jpos;
        w[jpos] = t;
      }
    }

    j = -1;
    len = 0;

    while( ++j < lenl )
    {
      jrow = jbuf[j];
      jpos = j;
      for(k=j+1;k<lenl;k++)
      {
        if( jbuf[k]<jrow )
        {
          jrow = jbuf[k];
          jpos = k;
        }
      }
      if( jpos!=j )
      {
        col = jbuf[j];
        jbuf[j] = jbuf[jpos];
        jbuf[jpos] = col;
        iw[jrow] = j;
        iw[col] = jpos;
        t = w[j];
        w[j] = w[jpos];
        w[jpos] = t;
      }
      fact = w[j] * D->value[jrow];
      w[j] = fact;
      iw[jrow] = -1;

      for(k=0;k<U->nnz[jrow];k++)
      {
        col = U->index[jrow][k];
        jpos = iw[col];
        lxu = -fact * U->value[jrow][k];

        if( fabs(lxu) < tolnorm && jpos==-1 ) continue;
        if( col >= i )
        {
          if( jpos == -1 )
          {
            lenu++;
            upos = i + lenu;
            jbuf[upos] = col;
            iw[col] = upos;
            w[upos] = lxu;
          }
          else
          {
            w[jpos] += lxu;
          }
        }
        else
        {
          if( jpos == -1 )
          {
            jbuf[lenl] = col;
            iw[col] = lenl;
            w[lenl] = lxu;
            lenl++;
          }
          else
          {
            w[jpos] += lxu;
          }
        }
      }
/*      for(kk=0;kk<bs;kk++)
      {
        w[bs*len+kk] = -buf_fact[kk];
      }
      jbuf[len] = jrow;
      len++;*/
    }

    iw[i] = -1;
    for(j=0;j<lenu;j++)
    {
      iw[ jbuf[i+j+1] ] = -1;
    }

    D->value[i] = 1.0 / w[i];


    len = _min(lfil,lenl);
    for(j=0;j<lenl;j++)
    {
      wn[j] = fabs(w[j]);
      iw[j] = j;
    }
    lis_sort_di(0,lenl-1,wn,iw);
    lis_sort_i(0,len-1,iw);
    
    L->nnz[i] = len;
    if( len>0 )
    {
      L->index[i] = (LIS_INT *)malloc(len*sizeof(LIS_INT));
      L->value[i] = (LIS_SCALAR *)malloc(len*sizeof(LIS_SCALAR));
    }
    for(j=0;j<len;j++)
    {
      jpos = iw[j];
      L->index[i][j] = jbuf[jpos];
      L->value[i][j] = w[jpos];
    }
    for(j=0;j<lenl;j++) iw[j] = -1;

    len = _min(lfil,lenu);
    for(j=0;j<lenu;j++)
    {
      wn[j] = fabs(w[i+j+1]);
      iw[j] = i+j+1;
    }
    lis_sort_di(0,lenu-1,wn,iw);
    lis_sort_i(0,len-1,iw);
    
    U->nnz[i] = len;
    if( len>0 )
    {
      U->index[i] = (LIS_INT *)malloc(len*sizeof(LIS_INT));
      U->value[i] = (LIS_SCALAR *)malloc(len*sizeof(LIS_SCALAR));
    }
    for(j=0;j<len;j++)
    {
      jpos = iw[j];
      U->index[i][j] = jbuf[jpos];
      U->value[i][j] = w[jpos];
    }
    for(j=0;j<lenu;j++) iw[j] = -1;
  }

  precon->L  = L;
  precon->U  = U;
  precon->D  = D;

  lis_free2(4,w,iw,wn,jbuf);

  LIS_DEBUG_FUNC_OUT;
  return LIS_SUCCESS;
#endif
}
Exemplo n.º 3
0
LIS_INT lis_precon_create_sainv_csr(LIS_SOLVER solver, LIS_PRECON precon)
{
	LIS_INT	err;
	LIS_INT	i,j,k,ii,jj,ik,jk;
	LIS_INT	n,annz,cl,cu;
	LIS_INT	*ww,*il,*iu;
	LIS_SCALAR t,dd;
	LIS_REAL tol,nrm;
	LIS_SCALAR *d,*l,*u;
	LIS_MATRIX A,B;
	LIS_MATRIX_ILU W,Z;
	LIS_VECTOR D;

	LIS_DEBUG_FUNC_IN;


	A      = solver->A;
	n      = A->n;
	tol    = solver->params[LIS_PARAMS_DROP-LIS_OPTIONS_LEN];
	annz   = A->n / 10;

	W      = NULL;
	ww     = NULL;
	d      = NULL;
	l      = NULL;
	u      = NULL;
	il     = NULL;
	iu     = NULL;

	err = lis_matrix_ilu_create(n,1,&W);
	if( err ) return err;
	err = lis_matrix_ilu_create(n,1,&Z);
	if( err ) return err;
	err = lis_matrix_ilu_setCR(W);
	if( err ) return err;
	err = lis_matrix_ilu_setCR(Z);
	if( err ) return err;
	err = lis_vector_duplicate(A,&D);
	if( err ) return err;
	d = D->value;
	err = lis_matrix_ilu_premalloc(annz,W);
	if( err ) return err;
	err = lis_matrix_ilu_premalloc(annz,Z);
	if( err ) return err;
	l   = (LIS_SCALAR *)lis_malloc(n*sizeof(LIS_SCALAR),"lis_precon_create_sainv_csr::l");
	if( l==NULL )
	{
		LIS_SETERR_MEM(n*sizeof(LIS_SCALAR));
		return LIS_OUT_OF_MEMORY;
	}
	u   = (LIS_SCALAR *)lis_malloc(n*sizeof(LIS_SCALAR),"lis_precon_create_sainv_csr::u");
	if( u==NULL )
	{
		LIS_SETERR_MEM(n*sizeof(LIS_SCALAR));
		return LIS_OUT_OF_MEMORY;
	}
	il   = (LIS_INT *)lis_malloc(n*sizeof(LIS_INT),"lis_precon_create_sainv_csr::il");
	if( il==NULL )
	{
		LIS_SETERR_MEM(n*sizeof(LIS_INT));
		return LIS_OUT_OF_MEMORY;
	}
	iu   = (LIS_INT *)lis_malloc(n*sizeof(LIS_INT),"lis_precon_create_sainv_csr::iu");
	if( iu==NULL )
	{
		LIS_SETERR_MEM(n*sizeof(LIS_INT));
		return LIS_OUT_OF_MEMORY;
	}
	ww   = (LIS_INT *)lis_malloc(n*sizeof(LIS_INT),"lis_precon_create_sainv_csr::ww");
	if( ww==NULL )
	{
		LIS_SETERR_MEM(n*sizeof(LIS_INT));
		return LIS_OUT_OF_MEMORY;
	}
	err = lis_matrix_duplicate(A,&B);
	if( err ) return err;
	err = lis_matrix_convert_csr2csc(A,B);
	if( err )
	{
		return err;
	}

	for(i=0;i<n;i++) ww[i] = 0;
	for(i=0;i<n;i++)
	{
		W->value[i][0] = 1.0;
		W->index[i][0] = i;
		W->nnz[i]      = 1;
		Z->value[i][0] = 1.0;
		Z->index[i][0] = i;
		Z->nnz[i]      = 1;
	}
	for(i=0; i<n; i++)
	{
		/* nrm_inf(A[i,:]) */
		nrm = 0.0;
		for(j=A->ptr[i];j<A->ptr[i+1];j++)
		{
			nrm = _max(nrm,fabs(A->value[j]));
		}
		nrm = 1.0/nrm;

		/* l = AZ_i */
		cl = 0;
		memset(l,0,n*sizeof(LIS_SCALAR));
		for(k=0;k<Z->nnz[i];k++)
		{
			ii = Z->index[i][k];
			for(j=B->ptr[ii];j<B->ptr[ii+1];j++)
			{
				jj     = B->index[j];
				if( jj>i )
				{
					l[jj] += B->value[j]*Z->value[i][k];
					if( ww[jj]==0 )
					{
						ww[jj]   = 1;
						il[cl++] = jj;
					}
				}
			}
		}
		for(k=0;k<cl;k++) ww[il[k]] = 0;

		/* u = W_i'A */
		cu = 0;
		memset(u,0,n*sizeof(LIS_SCALAR));
		for(k=0;k<W->nnz[i];k++)
		{
			ii = W->index[i][k];
			for(j=A->ptr[ii];j<A->ptr[ii+1];j++)
			{
				jj     = A->index[j];
				#ifdef USE_MPI
					if( jj>n-1 ) continue;
				#endif
				u[jj] += A->value[j]*W->value[i][k];
				if( jj>i && ww[jj]==0 )
				{
					ww[jj]   = 1;
					iu[cu++] = jj;
				}
			}
		}
		for(k=0;k<cu;k++) ww[iu[k]] = 0;

		/* d_ii = uZ_i or W_i'l  */
		t = 0.0;
		for(k=0;k<Z->nnz[i];k++)
		{
			t += u[Z->index[i][k]]*Z->value[i][k];
		}
		d[i] = 1.0/t;

		/* for j>i, l_j!=0            */
		/* w_j = w_j - (l_j/d_ii)*w_i */
		for(jj=0;jj<cl;jj++)
		{
			j = il[jj];
			dd = l[j]*d[i];
			for(k=0;k<W->nnz[j];k++)
			{
				ww[W->index[j][k]] = k+1;
			}
			for(ik=0;ik<W->nnz[i];ik++)
			{
				jk = ww[W->index[i][ik]];
				if( jk!=0 )
				{
					t = dd*W->value[i][ik];
					if( fabs(t)*nrm > tol )
					{
						W->value[j][jk-1] -= t;
					}
				}
				else
				{
					t = dd*W->value[i][ik];
					if( fabs(t)*nrm > tol )
					{
						if( W->nnz[j] == W->nnz_ma[j] )
						{
							W->nnz_ma[j] += annz;
							err = lis_matrix_ilu_realloc(j,W->nnz_ma[j],W);
							if( err ) return err;
						}
						jk                = W->nnz[j];
						W->index[j][jk] = W->index[i][ik];
						W->value[j][jk] = -t;
						W->nnz[j]++;
					}
				}
			}
			for(k=0;k<W->nnz[j];k++)
			{
				ww[W->index[j][k]] = 0;
			}
		}

		/* for j>i, u_j!=0            */
		/* z_j = z_j - (u_j/d_ii)*z_i */
		for(jj=0;jj<cu;jj++)
		{
			j = iu[jj];
			dd = u[j]*d[i];
			for(k=0;k<Z->nnz[j];k++)
			{
				ww[Z->index[j][k]] = k+1;
			}
			for(ik=0;ik<Z->nnz[i];ik++)
			{
				jk = ww[Z->index[i][ik]];
				if( jk!=0 )
				{
					t = dd*Z->value[i][ik];
					if( fabs(t)*nrm > tol )
					{
						Z->value[j][jk-1] -= t;
					}
				}
				else
				{
					t = dd*Z->value[i][ik];
					if( fabs(t)*nrm > tol )
					{
						if( Z->nnz[j] == Z->nnz_ma[j] )
						{
							Z->nnz_ma[j] += annz;
							err = lis_matrix_ilu_realloc(j,Z->nnz_ma[j],Z);
							if( err ) return err;
						}
						jk                = Z->nnz[j];
						Z->index[j][jk] = Z->index[i][ik];
						Z->value[j][jk] = -t;
						Z->nnz[j]++;
					}
				}
			}
			for(k=0;k<Z->nnz[j];k++)
			{
				ww[Z->index[j][k]] = 0;
			}
		}
	}

	lis_matrix_destroy(B);
	lis_free2(5,l,u,ww,il,iu);


	precon->L  = W;
	precon->U  = Z;
	precon->D  = D;


	LIS_DEBUG_FUNC_OUT;
	return LIS_SUCCESS;
}
LIS_INT lis_precon_create_ilut_bsr(LIS_SOLVER solver, LIS_PRECON precon)
{
  LIS_INT        err;
  LIS_INT        i,j,k,kk,bnr,bs;
  LIS_INT        n,nr,annz,lfil,len;
  LIS_SCALAR    gamma,t,tol,m;
  LIS_MATRIX    A;
  LIS_MATRIX_ILU  L,U;
  LIS_MATRIX_DIAG  D;

  LIS_SCALAR    tnorm, tolnorm;
  LIS_SCALAR    buf_ns[16],buf_fact[16],*xnrm,*wn,*w;
  LIS_INT        lenu,lenl,col,jpos,jrow,upos,para;
  LIS_INT        *jbuf,*iw;

  LIS_DEBUG_FUNC_IN;


  A      = solver->A;
  n      = A->n;
  nr     = A->nr;
  bnr    = A->bnr;
  bs     = bnr*bnr;
  tol    = solver->params[LIS_PARAMS_DROP-LIS_OPTIONS_LEN];
  m      = solver->params[LIS_PARAMS_RATE-LIS_OPTIONS_LEN];
  gamma  = solver->params[LIS_PARAMS_GAMMA-LIS_OPTIONS_LEN];
  annz   = 10+A->bnnz / A->nr;
  lfil   = (LIS_INT)(((double)A->bnnz/(2.0*nr))*m);

  L      = NULL;
  U      = NULL;


  err = lis_matrix_ilu_create(nr,bnr,&L);
  if( err ) return err;
  err = lis_matrix_ilu_create(nr,bnr,&U);
  if( err ) return err;
  err = lis_matrix_ilu_setCR(L);
  if( err ) return err;
  err = lis_matrix_ilu_setCR(U);
  if( err ) return err;
  err = lis_matrix_diag_duplicateM(A,&D);
  if( err )
  {
    return err;
  }

  w   = (LIS_SCALAR *)lis_malloc(bs*(nr+1)*sizeof(LIS_SCALAR),"lis_precon_create_iluc_csr::w");
  if( w==NULL )
  {
    LIS_SETERR_MEM(n*sizeof(LIS_SCALAR));
    return LIS_OUT_OF_MEMORY;
  }
  xnrm = (LIS_SCALAR *)lis_malloc(nr*sizeof(LIS_SCALAR),"lis_precon_create_iluc_csr::w");
  if( xnrm==NULL )
  {
    LIS_SETERR_MEM(n*sizeof(LIS_SCALAR));
    return LIS_OUT_OF_MEMORY;
  }
  wn = (LIS_SCALAR *)lis_malloc(nr*sizeof(LIS_SCALAR),"lis_precon_create_iluc_csr::w");
  if( wn==NULL )
  {
    LIS_SETERR_MEM(n*sizeof(LIS_SCALAR));
    return LIS_OUT_OF_MEMORY;
  }

  jbuf   = (LIS_INT *)lis_malloc(n*sizeof(LIS_INT),"lis_precon_create_iluc_csr::iw");
  if( jbuf==NULL )
  {
    LIS_SETERR_MEM(n*sizeof(LIS_INT));
    return LIS_OUT_OF_MEMORY;
  }
  iw   = (LIS_INT *)lis_malloc(nr*sizeof(LIS_INT),"lis_precon_create_iluc_csr::iw");
  if( iw==NULL )
  {
    LIS_SETERR_MEM(n*sizeof(LIS_INT));
    return LIS_OUT_OF_MEMORY;
  }


  for(i=0;i<nr;i++) iw[i] = -1;

  for(i=0;i<nr;i++)
  {
    tnorm = 0;
    for(j=A->bptr[i];j<A->bptr[i+1];j++)
    {
      lis_array_nrm2(bs,&A->value[bs*j],&t);
      tnorm = _max(t,tnorm);
    }
    tolnorm = tol * tnorm;

    lenu = 1;
    lenl = 0;
    jbuf[i] = i;
    memset(&w[bs*i],0,bs*sizeof(LIS_SCALAR));
    iw[i] = i;

    for(j=A->bptr[i];j<A->bptr[i+1];j++)
    {
      col = A->bindex[j];
      lis_array_nrm2(bs,&A->value[bs*j],&t);
      if( t<tolnorm && col!=i ) continue;
      if( col < i )
      {
        jbuf[lenl] = col;
        iw[col] = lenl;
        memcpy(&w[bs*lenl],&A->value[bs*j],bs*sizeof(LIS_SCALAR));
        lenl++;
      }
      else if( col == i )
      {
        memcpy(&w[bs*i],&A->value[bs*j],bs*sizeof(LIS_SCALAR));
      }
      else
      {
        jpos = i + lenu;
        jbuf[jpos] = col;
        iw[col] = jpos;
        memcpy(&w[bs*jpos],&A->value[bs*j],bs*sizeof(LIS_SCALAR));
        lenu++;
      }
    }

    j = -1;
    len = 0;

    while( ++j < lenl )
    {
      jrow = jbuf[j];
      jpos = j;
      for(k=j+1;k<lenl;k++)
      {
        if( jbuf[k]<jrow )
        {
          jrow = jbuf[k];
          jpos = k;
        }
      }
      if( jpos!=j )
      {
        col = jbuf[j];
        jbuf[j] = jbuf[jpos];
        jbuf[jpos] = col;
        iw[jrow] = j;
        iw[col] = jpos;
        memcpy(buf_ns,&w[bs*j],bs*sizeof(LIS_SCALAR));
        memcpy(&w[bs*j],&w[bs*jpos],bs*sizeof(LIS_SCALAR));
        memcpy(&w[bs*jpos],buf_ns,bs*sizeof(LIS_SCALAR));
      }
/*      lis_array_matmat(bnr,&D->value[bs*jrow],&w[bs*j],buf_fact,LIS_INS_VALUE);*/
      lis_array_matinv(bnr,&D->value[bs*jrow],&w[bs*j],buf_fact);
      iw[jrow] = -1;

      lis_array_nrm2(bs,buf_fact,&t);
      if( t * xnrm[jrow] <= tolnorm ) continue;

      for(k=0;k<U->nnz[jrow];k++)
      {
        col = U->index[jrow][k];
        lis_array_matmat(bnr,buf_fact,&U->value[jrow][bs*k],buf_ns,LIS_INS_VALUE);
        jpos = iw[col];

        lis_array_nrm2(bs,buf_ns,&t);
        if( t < tolnorm && jpos == -1 )
        {
          continue;
        }

        if( col >= i )
        {
          if( jpos == -1 )
          {
            upos = i + lenu;
            jbuf[upos] = col;
            iw[col] = upos;
            memcpy(&w[bs*upos],buf_ns,bs*sizeof(LIS_SCALAR));
            lenu++;
          }
          else
          {
            for(kk=0;kk<bs;kk++)
            {
              w[bs*jpos+kk] += buf_ns[kk];
            }
          }
        }
        else
        {
          if( jpos == -1 )
          {
            jbuf[lenl] = col;
            iw[col] = lenl;
            memcpy(&w[bs*lenl],buf_ns,bs*sizeof(LIS_SCALAR));
            lenl++;
          }
          else
          {
            for(kk=0;kk<bs;kk++)
            {
              w[bs*jpos+kk] += buf_ns[kk];
            }
          }
        }
      }
      for(kk=0;kk<bs;kk++)
      {
        w[bs*len+kk] = -buf_fact[kk];
      }
      jbuf[len] = jrow;
      len++;
    }
    lenl = len;
    len = _min(lfil,lenl);
    for(j=0;j<lenl;j++)
    {
      lis_array_nrm2(bs,&w[bs*j],&wn[j]);
      iw[j] = j;
    }
    lis_sort_di(0,lenl-1,wn,iw);
    lis_sort_i(0,len-1,iw);
    
    L->nnz[i] = len;
    if( len>0 )
    {
      L->index[i] = (LIS_INT *)malloc(len*sizeof(LIS_INT));
      L->value[i] = (LIS_SCALAR *)malloc(bs*len*sizeof(LIS_SCALAR));
    }
    for(j=0;j<len;j++)
    {
      jpos = iw[j];
      L->index[i][j] = jbuf[jpos];
      memcpy(&L->value[i][bs*j],&w[bs*jpos],bs*sizeof(LIS_SCALAR));
    }
    for(j=0;j<lenl;j++) iw[j] = -1;

    len = _min(lfil,lenu);
    for(j=1;j<lenu;j++)
    {
      jpos = i+j;
      lis_array_nrm2(bs,&w[bs*jpos],&wn[j-1]);
      iw[j-1] = jpos;
    }
    para = lenu - 1;
    lis_sort_di(0,para-1,wn,iw);
    lis_sort_i(0,len-2,iw);
    
    U->nnz[i] = len-1;
    if( len>1 )
    {
      U->index[i] = (LIS_INT *)malloc((len-1)*sizeof(LIS_INT));
      U->value[i] = (LIS_SCALAR *)malloc(bs*(len-1)*sizeof(LIS_SCALAR));
    }
    lis_array_nrm2(bs,&w[bs*i],&t);
    for(j=0;j<len-1;j++)
    {
      jpos = iw[j];
      U->index[i][j] = jbuf[jpos];
      memcpy(&U->value[i][bs*j],&w[bs*jpos],bs*sizeof(LIS_SCALAR));
      t = _max(t,wn[j]);
    }
    for(j=0;j<lenu-1;j++) iw[j] = -1;

    xnrm[i] = t;

    memcpy(&D->value[bs*i],&w[bs*i],bs*sizeof(LIS_SCALAR));

    if( i==nr-1 )
    {
      switch(bnr)
      {
      case 2:
        if( n%2!=0 )
        {
          D->value[4*(nr-1)+3] = 1.0;
        }
        break;
      case 3:
        if( n%3==1 )
        {
          D->value[9*(nr-1)+4] = 1.0;
          D->value[9*(nr-1)+8] = 1.0;
        }
        else if( n%3==2 )
        {
          D->value[9*(nr-1)+8] = 1.0;
        }
        break;
      }
    }
/*    lis_array_invGauss(bnr,&D->value[bs*i]);*/
    lis_array_LUdecomp(bnr,&D->value[bs*i]);

    for(j=0;j<lenu;j++)
    {
      iw[ jbuf[i+j] ] = -1;
    }
  }

  precon->L  = L;
  precon->U  = U;
  precon->WD  = D;

  lis_free2(5,w,iw,xnrm,wn,jbuf);

  LIS_DEBUG_FUNC_OUT;
  return LIS_SUCCESS;
}