Beispiel #1
0
LIS_INT lis_matrix_split_csr(LIS_MATRIX A)
{
	LIS_INT i,j,n;
	LIS_INT nnzl,nnzu;
	LIS_INT err;
	LIS_INT *lptr,*lindex,*uptr,*uindex;
	LIS_SCALAR *lvalue,*uvalue;
	LIS_MATRIX_DIAG	D;
	#ifdef _OPENMP
		LIS_INT kl,ku;
		LIS_INT *liw,*uiw;
	#endif

	LIS_DEBUG_FUNC_IN;

	n        = A->n;
	nnzl     = 0;
	nnzu     = 0;
	D        = NULL;
	lptr     = NULL;
	lindex   = NULL;
	lvalue   = NULL;
	uptr     = NULL;
	uindex   = NULL;
	uvalue   = NULL;

	#ifdef _OPENMP
		liw = (LIS_INT *)lis_malloc((n+1)*sizeof(LIS_INT),"lis_matrix_split_csr::liw");
		if( liw==NULL )
		{
			LIS_SETERR_MEM((n+1)*sizeof(LIS_INT));
			return LIS_OUT_OF_MEMORY;
		}
		uiw = (LIS_INT *)lis_malloc((n+1)*sizeof(LIS_INT),"lis_matrix_split_csr::uiw");
		if( uiw==NULL )
		{
			LIS_SETERR_MEM((n+1)*sizeof(LIS_INT));
			lis_free(liw);
			return LIS_OUT_OF_MEMORY;
		}
		#pragma omp parallel for private(i)
		for(i=0;i<n+1;i++)
		{
			liw[i] = 0;
			uiw[i] = 0;
		}
		#pragma omp parallel for private(i,j)
		for(i=0;i<n;i++)
		{
			for(j=A->ptr[i];j<A->ptr[i+1];j++)
			{
				if( A->index[j]<i )
				{
					liw[i+1]++;
				}
				else if( A->index[j]>i )
				{
					uiw[i+1]++;
				}
			}
		}
		for(i=0;i<n;i++)
		{
			liw[i+1] += liw[i];
			uiw[i+1] += uiw[i];
		}
		nnzl = liw[n];
		nnzu = uiw[n];
	#else
		for(i=0;i<n;i++)
		{
			for(j=A->ptr[i];j<A->ptr[i+1];j++)
			{
				if( A->index[j]<i )
				{
					nnzl++;
				}
				else if( A->index[j]>i )
				{
					nnzu++;
				}
			}
		}
	#endif

	err = lis_matrix_LU_create(A);
	if( err )
	{
		return err;
	}
	err = lis_matrix_malloc_csr(n,nnzl,&lptr,&lindex,&lvalue);
	if( err )
	{
		return err;
	}
	err = lis_matrix_malloc_csr(n,nnzu,&uptr,&uindex,&uvalue);
	if( err )
	{
		lis_free2(6,lptr,lindex,lvalue,uptr,uindex,uvalue);
		return err;
	}
	err = lis_matrix_diag_duplicateM(A,&D);
	if( err )
	{
		lis_free2(6,lptr,lindex,lvalue,uptr,uindex,uvalue);
		return err;
	}

	#ifdef _OPENMP
		#pragma omp parallel for private(i)
		for(i=0;i<n+1;i++)
		{
			lptr[i] = liw[i];
			uptr[i] = uiw[i];
		}
		#pragma omp parallel for private(i,j,kl,ku)
		for(i=0;i<n;i++)
		{
			kl = lptr[i];
			ku = uptr[i];
			for(j=A->ptr[i];j<A->ptr[i+1];j++)
			{
				if( A->index[j]<i )
				{
					lindex[kl]   = A->index[j];
					lvalue[kl]   = A->value[j];
					kl++;
				}
				else if( A->index[j]>i )
				{
					uindex[ku]   = A->index[j];
					uvalue[ku]   = A->value[j];
					ku++;
				}
				else
				{
					D->value[i] = A->value[j];
				}
			}
		}
		lis_free2(2,liw,uiw);
	#else
		nnzl = 0;
		nnzu = 0;
		lptr[0] = 0;
		uptr[0] = 0;
		for(i=0;i<n;i++)
		{
			for(j=A->ptr[i];j<A->ptr[i+1];j++)
			{
				if( A->index[j]<i )
				{
					lindex[nnzl]   = A->index[j];
					lvalue[nnzl]   = A->value[j];
					nnzl++;
				}
				else if( A->index[j]>i )
				{
					uindex[nnzu]   = A->index[j];
					uvalue[nnzu]   = A->value[j];
					nnzu++;
				}
				else
				{
					D->value[i] = A->value[j];
				}
			}
			lptr[i+1] = nnzl;
			uptr[i+1] = nnzu;
		}
	#endif
	A->L->nnz     = nnzl;
	A->L->ptr     = lptr;
	A->L->index   = lindex;
	A->L->value   = lvalue;
	A->U->nnz     = nnzu;
	A->U->ptr     = uptr;
	A->U->index   = uindex;
	A->U->value   = uvalue;
	A->D          = D;
	A->is_splited = LIS_TRUE;
	
	LIS_DEBUG_FUNC_OUT;
	return LIS_SUCCESS;
}
LIS_INT lis_precon_create_ilut_bsr(LIS_SOLVER solver, LIS_PRECON precon)
{
  LIS_INT        err;
  LIS_INT        i,j,k,kk,bnr,bs;
  LIS_INT        n,nr,annz,lfil,len;
  LIS_SCALAR    gamma,t,tol,m;
  LIS_MATRIX    A;
  LIS_MATRIX_ILU  L,U;
  LIS_MATRIX_DIAG  D;

  LIS_SCALAR    tnorm, tolnorm;
  LIS_SCALAR    buf_ns[16],buf_fact[16],*xnrm,*wn,*w;
  LIS_INT        lenu,lenl,col,jpos,jrow,upos,para;
  LIS_INT        *jbuf,*iw;

  LIS_DEBUG_FUNC_IN;


  A      = solver->A;
  n      = A->n;
  nr     = A->nr;
  bnr    = A->bnr;
  bs     = bnr*bnr;
  tol    = solver->params[LIS_PARAMS_DROP-LIS_OPTIONS_LEN];
  m      = solver->params[LIS_PARAMS_RATE-LIS_OPTIONS_LEN];
  gamma  = solver->params[LIS_PARAMS_GAMMA-LIS_OPTIONS_LEN];
  annz   = 10+A->bnnz / A->nr;
  lfil   = (LIS_INT)(((double)A->bnnz/(2.0*nr))*m);

  L      = NULL;
  U      = NULL;


  err = lis_matrix_ilu_create(nr,bnr,&L);
  if( err ) return err;
  err = lis_matrix_ilu_create(nr,bnr,&U);
  if( err ) return err;
  err = lis_matrix_ilu_setCR(L);
  if( err ) return err;
  err = lis_matrix_ilu_setCR(U);
  if( err ) return err;
  err = lis_matrix_diag_duplicateM(A,&D);
  if( err )
  {
    return err;
  }

  w   = (LIS_SCALAR *)lis_malloc(bs*(nr+1)*sizeof(LIS_SCALAR),"lis_precon_create_iluc_csr::w");
  if( w==NULL )
  {
    LIS_SETERR_MEM(n*sizeof(LIS_SCALAR));
    return LIS_OUT_OF_MEMORY;
  }
  xnrm = (LIS_SCALAR *)lis_malloc(nr*sizeof(LIS_SCALAR),"lis_precon_create_iluc_csr::w");
  if( xnrm==NULL )
  {
    LIS_SETERR_MEM(n*sizeof(LIS_SCALAR));
    return LIS_OUT_OF_MEMORY;
  }
  wn = (LIS_SCALAR *)lis_malloc(nr*sizeof(LIS_SCALAR),"lis_precon_create_iluc_csr::w");
  if( wn==NULL )
  {
    LIS_SETERR_MEM(n*sizeof(LIS_SCALAR));
    return LIS_OUT_OF_MEMORY;
  }

  jbuf   = (LIS_INT *)lis_malloc(n*sizeof(LIS_INT),"lis_precon_create_iluc_csr::iw");
  if( jbuf==NULL )
  {
    LIS_SETERR_MEM(n*sizeof(LIS_INT));
    return LIS_OUT_OF_MEMORY;
  }
  iw   = (LIS_INT *)lis_malloc(nr*sizeof(LIS_INT),"lis_precon_create_iluc_csr::iw");
  if( iw==NULL )
  {
    LIS_SETERR_MEM(n*sizeof(LIS_INT));
    return LIS_OUT_OF_MEMORY;
  }


  for(i=0;i<nr;i++) iw[i] = -1;

  for(i=0;i<nr;i++)
  {
    tnorm = 0;
    for(j=A->bptr[i];j<A->bptr[i+1];j++)
    {
      lis_array_nrm2(bs,&A->value[bs*j],&t);
      tnorm = _max(t,tnorm);
    }
    tolnorm = tol * tnorm;

    lenu = 1;
    lenl = 0;
    jbuf[i] = i;
    memset(&w[bs*i],0,bs*sizeof(LIS_SCALAR));
    iw[i] = i;

    for(j=A->bptr[i];j<A->bptr[i+1];j++)
    {
      col = A->bindex[j];
      lis_array_nrm2(bs,&A->value[bs*j],&t);
      if( t<tolnorm && col!=i ) continue;
      if( col < i )
      {
        jbuf[lenl] = col;
        iw[col] = lenl;
        memcpy(&w[bs*lenl],&A->value[bs*j],bs*sizeof(LIS_SCALAR));
        lenl++;
      }
      else if( col == i )
      {
        memcpy(&w[bs*i],&A->value[bs*j],bs*sizeof(LIS_SCALAR));
      }
      else
      {
        jpos = i + lenu;
        jbuf[jpos] = col;
        iw[col] = jpos;
        memcpy(&w[bs*jpos],&A->value[bs*j],bs*sizeof(LIS_SCALAR));
        lenu++;
      }
    }

    j = -1;
    len = 0;

    while( ++j < lenl )
    {
      jrow = jbuf[j];
      jpos = j;
      for(k=j+1;k<lenl;k++)
      {
        if( jbuf[k]<jrow )
        {
          jrow = jbuf[k];
          jpos = k;
        }
      }
      if( jpos!=j )
      {
        col = jbuf[j];
        jbuf[j] = jbuf[jpos];
        jbuf[jpos] = col;
        iw[jrow] = j;
        iw[col] = jpos;
        memcpy(buf_ns,&w[bs*j],bs*sizeof(LIS_SCALAR));
        memcpy(&w[bs*j],&w[bs*jpos],bs*sizeof(LIS_SCALAR));
        memcpy(&w[bs*jpos],buf_ns,bs*sizeof(LIS_SCALAR));
      }
/*      lis_array_matmat(bnr,&D->value[bs*jrow],&w[bs*j],buf_fact,LIS_INS_VALUE);*/
      lis_array_matinv(bnr,&D->value[bs*jrow],&w[bs*j],buf_fact);
      iw[jrow] = -1;

      lis_array_nrm2(bs,buf_fact,&t);
      if( t * xnrm[jrow] <= tolnorm ) continue;

      for(k=0;k<U->nnz[jrow];k++)
      {
        col = U->index[jrow][k];
        lis_array_matmat(bnr,buf_fact,&U->value[jrow][bs*k],buf_ns,LIS_INS_VALUE);
        jpos = iw[col];

        lis_array_nrm2(bs,buf_ns,&t);
        if( t < tolnorm && jpos == -1 )
        {
          continue;
        }

        if( col >= i )
        {
          if( jpos == -1 )
          {
            upos = i + lenu;
            jbuf[upos] = col;
            iw[col] = upos;
            memcpy(&w[bs*upos],buf_ns,bs*sizeof(LIS_SCALAR));
            lenu++;
          }
          else
          {
            for(kk=0;kk<bs;kk++)
            {
              w[bs*jpos+kk] += buf_ns[kk];
            }
          }
        }
        else
        {
          if( jpos == -1 )
          {
            jbuf[lenl] = col;
            iw[col] = lenl;
            memcpy(&w[bs*lenl],buf_ns,bs*sizeof(LIS_SCALAR));
            lenl++;
          }
          else
          {
            for(kk=0;kk<bs;kk++)
            {
              w[bs*jpos+kk] += buf_ns[kk];
            }
          }
        }
      }
      for(kk=0;kk<bs;kk++)
      {
        w[bs*len+kk] = -buf_fact[kk];
      }
      jbuf[len] = jrow;
      len++;
    }
    lenl = len;
    len = _min(lfil,lenl);
    for(j=0;j<lenl;j++)
    {
      lis_array_nrm2(bs,&w[bs*j],&wn[j]);
      iw[j] = j;
    }
    lis_sort_di(0,lenl-1,wn,iw);
    lis_sort_i(0,len-1,iw);
    
    L->nnz[i] = len;
    if( len>0 )
    {
      L->index[i] = (LIS_INT *)malloc(len*sizeof(LIS_INT));
      L->value[i] = (LIS_SCALAR *)malloc(bs*len*sizeof(LIS_SCALAR));
    }
    for(j=0;j<len;j++)
    {
      jpos = iw[j];
      L->index[i][j] = jbuf[jpos];
      memcpy(&L->value[i][bs*j],&w[bs*jpos],bs*sizeof(LIS_SCALAR));
    }
    for(j=0;j<lenl;j++) iw[j] = -1;

    len = _min(lfil,lenu);
    for(j=1;j<lenu;j++)
    {
      jpos = i+j;
      lis_array_nrm2(bs,&w[bs*jpos],&wn[j-1]);
      iw[j-1] = jpos;
    }
    para = lenu - 1;
    lis_sort_di(0,para-1,wn,iw);
    lis_sort_i(0,len-2,iw);
    
    U->nnz[i] = len-1;
    if( len>1 )
    {
      U->index[i] = (LIS_INT *)malloc((len-1)*sizeof(LIS_INT));
      U->value[i] = (LIS_SCALAR *)malloc(bs*(len-1)*sizeof(LIS_SCALAR));
    }
    lis_array_nrm2(bs,&w[bs*i],&t);
    for(j=0;j<len-1;j++)
    {
      jpos = iw[j];
      U->index[i][j] = jbuf[jpos];
      memcpy(&U->value[i][bs*j],&w[bs*jpos],bs*sizeof(LIS_SCALAR));
      t = _max(t,wn[j]);
    }
    for(j=0;j<lenu-1;j++) iw[j] = -1;

    xnrm[i] = t;

    memcpy(&D->value[bs*i],&w[bs*i],bs*sizeof(LIS_SCALAR));

    if( i==nr-1 )
    {
      switch(bnr)
      {
      case 2:
        if( n%2!=0 )
        {
          D->value[4*(nr-1)+3] = 1.0;
        }
        break;
      case 3:
        if( n%3==1 )
        {
          D->value[9*(nr-1)+4] = 1.0;
          D->value[9*(nr-1)+8] = 1.0;
        }
        else if( n%3==2 )
        {
          D->value[9*(nr-1)+8] = 1.0;
        }
        break;
      }
    }
/*    lis_array_invGauss(bnr,&D->value[bs*i]);*/
    lis_array_LUdecomp(bnr,&D->value[bs*i]);

    for(j=0;j<lenu;j++)
    {
      iw[ jbuf[i+j] ] = -1;
    }
  }

  precon->L  = L;
  precon->U  = U;
  precon->WD  = D;

  lis_free2(5,w,iw,xnrm,wn,jbuf);

  LIS_DEBUG_FUNC_OUT;
  return LIS_SUCCESS;
}
Beispiel #3
0
LIS_INT lis_matrix_copyDLU_csr(LIS_MATRIX Ain, LIS_MATRIX_DIAG *D, LIS_MATRIX *L, LIS_MATRIX *U)
{
	LIS_INT err;
	LIS_INT i,n,np,lnnz,unnz;
	LIS_INT *lptr,*lindex;
	LIS_INT *uptr,*uindex;
	LIS_SCALAR *lvalue,*uvalue,*diag;

	LIS_DEBUG_FUNC_IN;
	
	*D = NULL;
	*L = NULL;
	*U = NULL;

	err = lis_matrix_check(Ain,LIS_MATRIX_CHECK_ALL);
	if( err ) return err;

	n       = Ain->n;
	np      = Ain->np;

	err = lis_matrix_duplicate(Ain,L);
	if( err )
	{
		return err;
	}
	err = lis_matrix_duplicate(Ain,U);
	if( err )
	{
		lis_matrix_destroy(*L);
		return err;
	}
	err = lis_matrix_diag_duplicateM(Ain,D);
	if( err )
	{
		lis_matrix_destroy(*L);
		lis_matrix_destroy(*U);
		return err;
	}
	lis_free((*D)->value);

	if( Ain->is_splited )
	{
	}
	lnnz     = Ain->L->nnz;
	unnz     = Ain->U->nnz;
	lptr     = NULL;
	lindex   = NULL;
	uptr     = NULL;
	uindex   = NULL;
	diag     = NULL;

	err = lis_matrix_malloc_csr(n,lnnz,&lptr,&lindex,&lvalue);
	if( err )
	{
		return err;
	}
	err = lis_matrix_malloc_csr(n,unnz,&uptr,&uindex,&uvalue);
	if( err )
	{
		lis_free2(7,diag,uptr,lptr,uindex,lindex,uvalue,lvalue);
		return err;
	}
	diag = (LIS_SCALAR *)lis_malloc(np*sizeof(LIS_SCALAR),"lis_matrix_copyDLU_csr::diag");
	if( diag==NULL )
	{
		lis_free2(7,diag,uptr,lptr,uindex,lindex,uvalue,lvalue);
		return err;
	}

	#ifdef _OPENMP
	#pragma omp parallel for private(i)
	#endif
	for(i=0;i<n;i++)
	{
		diag[i] = Ain->D->value[i];
	}
	lis_matrix_elements_copy_csr(n,Ain->L->ptr,Ain->L->index,Ain->L->value,lptr,lindex,lvalue);
	lis_matrix_elements_copy_csr(n,Ain->U->ptr,Ain->U->index,Ain->U->value,uptr,uindex,uvalue);

	(*D)->value = diag;
	err = lis_matrix_set_csr(lnnz,lptr,lindex,lvalue,*L);
	if( err )
	{
		lis_free2(7,diag,uptr,lptr,uindex,lindex,uvalue,lvalue);
		return err;
	}
	err = lis_matrix_set_csr(unnz,uptr,uindex,uvalue,*U);
	if( err )
	{
		lis_free2(7,diag,uptr,lptr,uindex,lindex,uvalue,lvalue);
		return err;
	}

	err = lis_matrix_assemble(*L);
	if( err )
	{
		return err;
	}
	err = lis_matrix_assemble(*U);
	if( err )
	{
		return err;
	}
	LIS_DEBUG_FUNC_OUT;
	return LIS_SUCCESS;
}
LIS_INT lis_matrix_split_msr(LIS_MATRIX A)
{
  LIS_INT        i,j,n;
  LIS_INT        lnnz,unnz;
  LIS_INT        lndz,undz;
  LIS_INT        err;
  LIS_INT        *lindex,*uindex;
  LIS_SCALAR    *lvalue,*uvalue;
  #ifdef _OPENMP
    LIS_INT      kl,ku;
    LIS_INT      *liw,*uiw;
  #endif
  LIS_MATRIX_DIAG  D;

  LIS_DEBUG_FUNC_IN;

  n        = A->n;
  lnnz     = 0;
  unnz     = 0;
  lndz     = n;
  undz     = n;
  D        = NULL;
  lindex   = NULL;
  lvalue   = NULL;
  uindex   = NULL;
  uvalue   = NULL;

  #ifdef _OPENMP
    liw = (LIS_INT *)lis_malloc((n+1)*sizeof(LIS_INT),"lis_matrix_split_msr::liw");
    if( liw==NULL )
    {
      LIS_SETERR_MEM((n+1)*sizeof(LIS_INT));
      return LIS_OUT_OF_MEMORY;
    }
    uiw = (LIS_INT *)lis_malloc((n+1)*sizeof(LIS_INT),"lis_matrix_split_msr::uiw");
    if( uiw==NULL )
    {
      LIS_SETERR_MEM((n+1)*sizeof(LIS_INT));
      lis_free(liw);
      return LIS_OUT_OF_MEMORY;
    }
    #pragma omp parallel for private(i)
    for(i=0;i<n+1;i++)
    {
      liw[i] = 0;
      uiw[i] = 0;
    }
    #pragma omp parallel for private(i,j)
    for(i=0;i<n;i++)
    {
      for(j=A->index[i];j<A->index[i+1];j++)
      {
        if( A->index[j]<i )
        {
          liw[i+1]++;
        }
        else if( A->index[j]>i )
        {
          uiw[i+1]++;
        }
      }
    }
    liw[0] = n+1;
    uiw[0] = n+1;
    for(i=0;i<n;i++)
    {
      liw[i+1] += liw[i];
      uiw[i+1] += uiw[i];
    }
    lnnz = liw[n];
    unnz = uiw[n];
  #else
    for(i=0;i<n;i++)
    {
      for(j=A->index[i];j<A->index[i+1];j++)
      {
        if( A->index[j]<i )
        {
          lnnz++;
        }
        else if( A->index[j]>i )
        {
          unnz++;
        }
      }
    }
  #endif

  err = lis_matrix_LU_create(A);
  if( err )
  {
    return err;
  }
  err = lis_matrix_malloc_msr(n,lnnz,lndz,&lindex,&lvalue);
  if( err )
  {
    return err;
  }
  err = lis_matrix_malloc_msr(n,unnz,undz,&uindex,&uvalue);
  if( err )
  {
    lis_free2(4,lindex,lvalue,uindex,uvalue);
    return err;
  }
  err = lis_matrix_diag_duplicateM(A,&D);
  if( err )
  {
    lis_free2(4,lindex,lvalue,uindex,uvalue);
    return err;
  }

  #ifdef _OPENMP
    #pragma omp parallel for private(i)
    for(i=0;i<n+1;i++)
    {
      lindex[i] = liw[i];
      uindex[i] = uiw[i];
    }
    #pragma omp parallel for private(i,j,kl,ku)
    for(i=0;i<n;i++)
    {
      kl = lindex[i];
      ku = uindex[i];
      D->value[i] = A->value[i];
      for(j=A->index[i];j<A->index[i+1];j++)
      {
        if( A->index[j]<i )
        {
          lindex[kl]   = A->index[j];
          lvalue[kl]   = A->value[j];
          kl++;
        }
        else if( A->index[j]>i )
        {
          uindex[ku]   = A->index[j];
          uvalue[ku]   = A->value[j];
          ku++;
        }
      }
    }
    lis_free2(2,liw,uiw);
  #else
    lnnz = n+1;
    unnz = n+1;
    lindex[0] = n+1;
    uindex[0] = n+1;
    for(i=0;i<n;i++)
    {
      D->value[i] = A->value[i];
      for(j=A->index[i];j<A->index[i+1];j++)
      {
        if( A->index[j]<i )
        {
          lindex[lnnz]   = A->index[j];
          lvalue[lnnz]   = A->value[j];
          lnnz++;
        }
        else if( A->index[j]>i )
        {
          uindex[unnz]   = A->index[j];
          uvalue[unnz]   = A->value[j];
          unnz++;
        }
      }
      lindex[i+1] = lnnz;
      uindex[i+1] = unnz;
    }
  #endif
  A->L->nnz     = lnnz - (n+1);
  A->L->ndz     = lndz;
  A->L->index   = lindex;
  A->L->value   = lvalue;
  A->U->nnz     = unnz - (n+1);
  A->U->ndz     = undz;
  A->U->index   = uindex;
  A->U->value   = uvalue;
  A->D          = D;
  A->is_splited = LIS_TRUE;

  LIS_DEBUG_FUNC_OUT;
  return LIS_SUCCESS;
}