void lis_matrix_malloc_msr_f(LIS_INT *n, LIS_INT *nnz, LIS_INT *ndz, LIS_INT *index, LIS_SCALAR *value, LIS_INT *ierr)
{
	LIS_DEBUG_FUNC_IN;

	*ierr = lis_matrix_malloc_msr(*n, *nnz, *ndz, &index, &value);

	LIS_DEBUG_FUNC_OUT;
	return;
}
LIS_INT lis_matrix_convert_csr2msr(LIS_MATRIX Ain, LIS_MATRIX Aout)
{
  LIS_INT      i,j,k,jj;
  LIS_INT      err;
  LIS_INT      n,nnz,ndz;
  LIS_INT      count;
  LIS_INT      *iw;
  LIS_INT      *index;
  LIS_SCALAR  *value;

  LIS_DEBUG_FUNC_IN;

  n       = Ain->n;
  nnz    = Ain->nnz;

  iw      = NULL;
  index   = NULL;
  value   = NULL;

  iw = (LIS_INT *)lis_malloc( (n+1)*sizeof(LIS_INT),"lis_matrix_convert_csr2msr::iw" );
  if( iw==NULL )
  {
    LIS_SETERR_MEM((n+1)*sizeof(LIS_INT));
    return LIS_ERR_OUT_OF_MEMORY;
  }

  /* check ndz */
  for(i=0;i<n+1;i++) iw[i] = 0;
  count = 0;
  #ifdef _OPENMP
  #pragma omp parallel private(i,j)
  #endif
  {
    #ifdef _OPENMP
    #pragma omp for
    #endif
    for(i=0;i<n;i++)
    {
      iw[i+1] = 0;
      for(j=Ain->ptr[i];j<Ain->ptr[i+1];j++)
      {
        if( i==Ain->index[j] )
        {
          iw[i+1] = 1;
        }
      }
    }
    #ifdef _OPENMP
    #pragma omp for reduction(+:count)
    #endif
    for(i=0;i<n;i++)
    {
      count += iw[i+1];
    }
    #ifdef _OPENMP
    #pragma omp for
    #endif
    for(i=0;i<n;i++)
    {
      iw[i+1] = Ain->ptr[i+1]-Ain->ptr[i]-iw[i+1];
    }
  }
  ndz = n - count;

  err = lis_matrix_malloc_msr(n,nnz,ndz,&index,&value);
  if( err )
  {
    lis_free2(3,index,value,iw);
    return err;
  }

  /* convert msr */
  iw[0] = n+1;
  for(i=0;i<n;i++)
  {
    iw[i+1] = iw[i+1] + iw[i];
  }
  #ifdef _OPENMP
  #pragma omp parallel private(i,j,k)
  #endif
  {
    #ifdef _OPENMP
    #pragma omp for
    #endif
    for(i=0;i<n+1;i++)
    {
      index[i] = iw[i];
    }
    #ifdef _OPENMP
    #pragma omp for
    #endif
    for(i=0;i<n;i++)
    {
      k = index[i];
      for(j=Ain->ptr[i];j<Ain->ptr[i+1];j++)
      {
        jj = Ain->index[j];
        if( jj==i )
        {
          value[i]   = Ain->value[j];
        }
        else
        {
          value[k]   = Ain->value[j];
          index[k]   = Ain->index[j];
          k++;
        }
      }
    }
  }

  err = lis_matrix_set_msr(nnz,ndz,index,value,Aout);
  if( err )
  {
    lis_free2(3,index,value,iw);
    return err;
  }
  err = lis_matrix_assemble(Aout);
  if( err )
  {
    lis_free(iw);
    lis_matrix_storage_destroy(Aout);
    return err;
  }

  lis_free(iw);
  LIS_DEBUG_FUNC_OUT;

  return LIS_SUCCESS;
}
LIS_INT lis_matrix_merge_msr(LIS_MATRIX A)
{
  LIS_INT        i,j,n,is;
  LIS_INT        nnz,ndz;
  LIS_INT        err;
  LIS_INT        *index;
  LIS_SCALAR    *value;

  LIS_DEBUG_FUNC_IN;


  n       = A->n;
  nnz     = 0;
  ndz     = 0;
  is      = A->is;
  index   = NULL;
  value   = NULL;
  nnz     = A->L->nnz + A->U->nnz + n;

  err = lis_matrix_malloc_msr(n,nnz,ndz,&index,&value);
  if( err )
  {
    return err;
  }

  nnz      = n+1;
  index[0] = n+1;
  if( A->matrix_type==LIS_MATRIX_MSR )
  {
    for(i=0;i<n;i++)
    {
      value[i]   = A->D->value[i];
      for(j=A->L->index[i];j<A->L->index[i+1];j++)
      {
        index[nnz]   = A->L->index[j];
        value[nnz]   = A->L->value[j];
        nnz++;
      }
      for(j=A->U->index[i];j<A->U->index[i+1];j++)
      {
        index[nnz]   = A->U->index[j];
        value[nnz]   = A->U->value[j];
        nnz++;
      }
      index[i+1] = nnz;
    }
  }
  else
  {
    for(i=0;i<n;i++)
    {
      value[i]   = A->D->value[i];
      for(j=A->U->index[i];j<A->U->index[i+1];j++)
      {
        index[nnz]   = A->U->index[j];
        value[nnz]   = A->U->value[j];
        nnz++;
      }
      for(j=A->L->index[i];j<A->L->index[i+1];j++)
      {
        index[nnz]   = A->L->index[j];
        value[nnz]   = A->L->value[j];
        nnz++;
      }
      index[i+1] = nnz;
    }
  }

  A->nnz        = nnz;
  A->ndz        = ndz;
  A->value      = value;
  A->index      = index;

  LIS_DEBUG_FUNC_OUT;
  return LIS_SUCCESS;
}
LIS_INT lis_matrix_split_msr(LIS_MATRIX A)
{
  LIS_INT        i,j,n;
  LIS_INT        lnnz,unnz;
  LIS_INT        lndz,undz;
  LIS_INT        err;
  LIS_INT        *lindex,*uindex;
  LIS_SCALAR    *lvalue,*uvalue;
  #ifdef _OPENMP
    LIS_INT      kl,ku;
    LIS_INT      *liw,*uiw;
  #endif
  LIS_MATRIX_DIAG  D;

  LIS_DEBUG_FUNC_IN;

  n        = A->n;
  lnnz     = 0;
  unnz     = 0;
  lndz     = n;
  undz     = n;
  D        = NULL;
  lindex   = NULL;
  lvalue   = NULL;
  uindex   = NULL;
  uvalue   = NULL;

  #ifdef _OPENMP
    liw = (LIS_INT *)lis_malloc((n+1)*sizeof(LIS_INT),"lis_matrix_split_msr::liw");
    if( liw==NULL )
    {
      LIS_SETERR_MEM((n+1)*sizeof(LIS_INT));
      return LIS_OUT_OF_MEMORY;
    }
    uiw = (LIS_INT *)lis_malloc((n+1)*sizeof(LIS_INT),"lis_matrix_split_msr::uiw");
    if( uiw==NULL )
    {
      LIS_SETERR_MEM((n+1)*sizeof(LIS_INT));
      lis_free(liw);
      return LIS_OUT_OF_MEMORY;
    }
    #pragma omp parallel for private(i)
    for(i=0;i<n+1;i++)
    {
      liw[i] = 0;
      uiw[i] = 0;
    }
    #pragma omp parallel for private(i,j)
    for(i=0;i<n;i++)
    {
      for(j=A->index[i];j<A->index[i+1];j++)
      {
        if( A->index[j]<i )
        {
          liw[i+1]++;
        }
        else if( A->index[j]>i )
        {
          uiw[i+1]++;
        }
      }
    }
    liw[0] = n+1;
    uiw[0] = n+1;
    for(i=0;i<n;i++)
    {
      liw[i+1] += liw[i];
      uiw[i+1] += uiw[i];
    }
    lnnz = liw[n];
    unnz = uiw[n];
  #else
    for(i=0;i<n;i++)
    {
      for(j=A->index[i];j<A->index[i+1];j++)
      {
        if( A->index[j]<i )
        {
          lnnz++;
        }
        else if( A->index[j]>i )
        {
          unnz++;
        }
      }
    }
  #endif

  err = lis_matrix_LU_create(A);
  if( err )
  {
    return err;
  }
  err = lis_matrix_malloc_msr(n,lnnz,lndz,&lindex,&lvalue);
  if( err )
  {
    return err;
  }
  err = lis_matrix_malloc_msr(n,unnz,undz,&uindex,&uvalue);
  if( err )
  {
    lis_free2(4,lindex,lvalue,uindex,uvalue);
    return err;
  }
  err = lis_matrix_diag_duplicateM(A,&D);
  if( err )
  {
    lis_free2(4,lindex,lvalue,uindex,uvalue);
    return err;
  }

  #ifdef _OPENMP
    #pragma omp parallel for private(i)
    for(i=0;i<n+1;i++)
    {
      lindex[i] = liw[i];
      uindex[i] = uiw[i];
    }
    #pragma omp parallel for private(i,j,kl,ku)
    for(i=0;i<n;i++)
    {
      kl = lindex[i];
      ku = uindex[i];
      D->value[i] = A->value[i];
      for(j=A->index[i];j<A->index[i+1];j++)
      {
        if( A->index[j]<i )
        {
          lindex[kl]   = A->index[j];
          lvalue[kl]   = A->value[j];
          kl++;
        }
        else if( A->index[j]>i )
        {
          uindex[ku]   = A->index[j];
          uvalue[ku]   = A->value[j];
          ku++;
        }
      }
    }
    lis_free2(2,liw,uiw);
  #else
    lnnz = n+1;
    unnz = n+1;
    lindex[0] = n+1;
    uindex[0] = n+1;
    for(i=0;i<n;i++)
    {
      D->value[i] = A->value[i];
      for(j=A->index[i];j<A->index[i+1];j++)
      {
        if( A->index[j]<i )
        {
          lindex[lnnz]   = A->index[j];
          lvalue[lnnz]   = A->value[j];
          lnnz++;
        }
        else if( A->index[j]>i )
        {
          uindex[unnz]   = A->index[j];
          uvalue[unnz]   = A->value[j];
          unnz++;
        }
      }
      lindex[i+1] = lnnz;
      uindex[i+1] = unnz;
    }
  #endif
  A->L->nnz     = lnnz - (n+1);
  A->L->ndz     = lndz;
  A->L->index   = lindex;
  A->L->value   = lvalue;
  A->U->nnz     = unnz - (n+1);
  A->U->ndz     = undz;
  A->U->index   = uindex;
  A->U->value   = uvalue;
  A->D          = D;
  A->is_splited = LIS_TRUE;

  LIS_DEBUG_FUNC_OUT;
  return LIS_SUCCESS;
}
LIS_INT lis_matrix_copy_msr(LIS_MATRIX Ain, LIS_MATRIX Aout)
{
  LIS_INT      err;
  LIS_INT      i,n,nnz,ndz,lnnz,unnz,lndz,undz;
  LIS_INT      *index;
  LIS_INT      *lindex;
  LIS_INT      *uindex;
  LIS_SCALAR  *value,*lvalue,*uvalue,*diag;

  LIS_DEBUG_FUNC_IN;

  n       = Ain->n;

  if( Ain->is_splited )
  {
    lnnz     = Ain->L->nnz;
    unnz     = Ain->U->nnz;
    lndz     = Ain->L->ndz;
    undz     = Ain->U->ndz;
    lindex   = NULL;
    uindex   = NULL;
    diag     = NULL;

    err = lis_matrix_malloc_msr(n,lnnz,lndz,&lindex,&lvalue);
    if( err )
    {
      return err;
    }
    err = lis_matrix_malloc_msr(n,unnz,undz,&uindex,&uvalue);
    if( err )
    {
      lis_free2(5,diag,uindex,lindex,uvalue,lvalue);
      return err;
    }
    diag = (LIS_SCALAR *)lis_malloc(n*sizeof(LIS_SCALAR),"lis_matrix_copy_msr::diag");
    if( diag==NULL )
    {
      lis_free2(5,diag,uindex,lindex,uvalue,lvalue);
      return err;
    }

    #ifdef _OPENMP
    #pragma omp parallel for private(i)
    #endif
    for(i=0;i<n;i++)
    {
      diag[i] = Ain->D->value[i];
    }
    lis_matrix_elements_copy_msr(n,Ain->L->index,Ain->L->value,lindex,lvalue);
    lis_matrix_elements_copy_msr(n,Ain->U->index,Ain->U->value,uindex,uvalue);

    err = lis_matrix_setDLU_msr(lnnz,unnz,lndz,undz,diag,lindex,lvalue,uindex,uvalue,Aout);
    if( err )
    {
      lis_free2(5,diag,uindex,lindex,uvalue,lvalue);
      return err;
    }
  }
  if( !Ain->is_splited || (Ain->is_splited && Ain->is_save) )
  {
    index   = NULL;
    value   = NULL;
    nnz     = Ain->nnz;
    ndz     = Ain->ndz;
    err = lis_matrix_malloc_msr(n,nnz,ndz,&index,&value);
    if( err )
    {
      return err;
    }

    lis_matrix_elements_copy_msr(n,Ain->index,Ain->value,index,value);

    err = lis_matrix_set_msr(nnz,ndz,index,value,Aout);
    if( err )
    {
      lis_free2(2,index,value);
      return err;
    }
  }

  err = lis_matrix_assemble(Aout);
  if( err )
  {
    lis_matrix_storage_destroy(Aout);
    return err;
  }
  LIS_DEBUG_FUNC_OUT;
  return LIS_SUCCESS;
}