AmesosBTF_CrsMatrix::NewTypeRef
AmesosBTF_CrsMatrix::
operator()( OriginalTypeRef orig )
{
  origObj_ = &orig;
  const Epetra_BlockMap & OldRowMap = orig.RowMap();
  const Epetra_BlockMap & OldColMap = orig.ColMap();
  
  // Check if the matrix is on one processor.
  int myMatProc = -1, matProc = -1;
  int myPID = orig.Comm().MyPID();
  for (int proc=0; proc<orig.Comm().NumProc(); proc++) 
  {
    if (orig.NumGlobalNonzeros() == orig.NumMyNonzeros())
      myMatProc = myPID;
  }
  orig.Comm().MaxAll( &myMatProc, &matProc, 1 );
  
  if( orig.RowMap().DistributedGlobal() && matProc == -1)
    { cout << "FAIL for Global!\n"; abort(); }
  if( orig.IndicesAreGlobal() && matProc == -1)
    { cout << "FAIL for Global Indices!\n"; abort(); }
 
  int nGlobal = orig.NumGlobalRows(); 
  int n = orig.NumMyRows();
  int nnz = orig.NumMyNonzeros();
  
  if( debug_ )
  {
    cout << "Orig Matrix:\n";
    cout << orig << endl;
  }

  // Create std CRS format (without elements above the threshold)
  vector<int> ia(n+1,0);
  int maxEntries = orig.MaxNumEntries();
  vector<int> ja(nnz), ja_tmp(nnz);
  vector<double> jva_tmp(maxEntries);
  int cnt;

  Epetra_CrsGraph strippedGraph( Copy, OldRowMap, OldColMap, 0 );

  for( int i = 0; i < n; ++i )
  {
    orig.ExtractMyRowCopy( i, maxEntries, cnt, &jva_tmp[0], &ja_tmp[0] );
    ia[i+1] = ia[i];
    for( int j = 0; j < cnt; ++j )
      if( fabs(jva_tmp[j]) > threshold_ )
        ja[ ia[i+1]++ ] = ja_tmp[j];

    int new_cnt = ia[i+1] - ia[i];
    strippedGraph.InsertMyIndices( i, new_cnt, &ja[ ia[i] ] );
  }
  nnz = ia[n];
  strippedGraph.FillComplete();
  
  if( debug_ )
  {
    cout << "Stripped Graph\n";
    cout << strippedGraph;
  }

  // Compute the BTF permutation only on the processor that has the graph.
  if ( matProc == myPID ) {
    
    if( debug_ )
      {
	cout << "-----------------------------------------\n";
	cout << "CRS Format Graph (stripped) \n";
	cout << "-----------------------------------------\n";
	for( int i = 0; i < n; ++i )
	  {
	    cout << ia[i] << " - " << ia[i+1] << " : ";
	    for( int j = ia[i]; j<ia[i+1]; ++j )
	      cout << " " << ja[j];
	    cout << endl;
	  }
	cout << "-----------------------------------------\n";
      }
  
    // Transpose the graph, not the values
    int j=0, next=0;
    vector<int> ia_tmp(n+1,0);

    // Compute row lengths
    for (int i = 0; i < n; i++)
        for (int k = ia[i]; k < ia[i+1]; k++)
            ++ia_tmp[ ja[k]+1 ];

    // Compute pointers from row lengths
    ia_tmp[0] = 0;
    for (int i = 0; i < n; i++)
        ia_tmp[i+1] += ia_tmp[i];

    // Copy over indices
    for (int i = 0; i < n; i++) {
        for (int k = ia[i]; k < ia[i+1]; k++) {
            j = ja[k];
            next = ia_tmp[j];
            ja_tmp[next] = i;
            ia_tmp[j] = next + 1;
        }
    }

    // Reshift ia_tmp
    for (int i=n-1; i >= 0; i--) ia_tmp[i+1] = ia_tmp[i];
    ia_tmp[0] = 0;

    // Transformation information
    int numMatch = 0;       // number of nonzeros on diagonal after permutation.
    double maxWork =  0.0;  // no limit on how much work to perform in max-trans.
    double workPerf = 0.0;  // how much work was performed in max-trans.
    
    // Create a work vector for the BTF code.
    vector<int> work(5*n);
    
    // Storage for the row and column permutations.
    vector<int> rowperm(n);
    vector<int> colperm(n);
    vector<int> blockptr(n+1);
    
    // NOTE:  The permutations are sent in backwards since the matrix is transposed.
    // On output, rowperm and colperm are the row and column permutations of A, where 
    // i = BTF_UNFLIP(rowperm[k]) if row i of A is the kth row of P*A*Q, and j = colperm[k] 
    // if column j of A is the kth column of P*A*Q.  If rowperm[k] < 0, then the 
    // (k,k)th entry in P*A*Q is structurally zero.
    
    numBlocks_ = amesos_btf_order( n, &ia_tmp[0], &ja_tmp[0], maxWork, &workPerf,
			    &rowperm[0], &colperm[0], &blockptr[0], 
			    &numMatch, &work[0] );
    
    // Reverse ordering of permutation to get upper triangular form, if necessary.
    rowPerm_.resize( n );
    colPerm_.resize( n ); 
    blockptr.resize( numBlocks_+1 );
    blockPtr_.resize( numBlocks_+1 );
    if (!upperTri_) {
      for( int i = 0; i < n; ++i )
	{
	  rowPerm_[i] = BTF_UNFLIP(rowperm[(n-1)-i]);
	  colPerm_[i] = colperm[(n-1)-i];
	}
      for( int i = 0; i < numBlocks_+1; ++i ) 
	{
	  blockPtr_[i] = n - blockptr[numBlocks_-i];
	}
    }
    else {
      colPerm_ = colperm;
      blockPtr_ = blockptr;
      for( int i = 0; i < n; ++i )
	{
	  rowPerm_[i] = BTF_UNFLIP(rowperm[i]);
	}
    }
    
    if( debug_ ) {
      cout << "-----------------------------------------\n";
      cout << "BTF Output (n = " << n << ")\n";
      cout << "-----------------------------------------\n";
      cout << "Num Blocks: " << numBlocks_ << endl;
      cout << "Num NNZ Diags: " << numMatch << endl;
      cout << "RowPerm and ColPerm \n";
      for( int i = 0; i<n; ++i )
	cout << rowPerm_[i] << "\t" << colPerm_[i] << endl;
      cout << "-----------------------------------------\n";
    }  
  }

  // Broadcast the BTF permutation information to all processors.
  rowPerm_.resize( nGlobal );
  colPerm_.resize( nGlobal );

  orig.Comm().Broadcast(&rowPerm_[0], nGlobal, matProc);
  orig.Comm().Broadcast(&colPerm_[0], nGlobal, matProc);
  orig.Comm().Broadcast(&numBlocks_, 1, matProc);

  blockPtr_.resize( numBlocks_+1 );
  orig.Comm().Broadcast(&blockPtr_[0], numBlocks_+1, matProc);
  
  //Generate New Domain and Range Maps
  //for now, assume they start out as identical
  vector<int> myElements( n );
  OldRowMap.MyGlobalElements( &myElements[0] );
  
  vector<int> newDomainElements( n );
  vector<int> newRangeElements( n );
  for( int i = 0; i < n; ++i )
  {
    newRangeElements[ i ] = myElements[ rowPerm_[i] ];
    newDomainElements[ i ] = myElements[ colPerm_[i] ];
  }

  NewRowMap_ = Teuchos::rcp( new Epetra_Map( nGlobal, n, &newRangeElements[0], OldRowMap.IndexBase(), OldRowMap.Comm() ) );
  NewColMap_ = Teuchos::rcp( new Epetra_Map( nGlobal, n, &newDomainElements[0], OldColMap.IndexBase(), OldColMap.Comm() ) );

  if( debug_ )
  {
    cout << "New Row Map\n";
    cout << *NewRowMap_ << endl;
    cout << "New Col Map\n";
    cout << *NewColMap_ << endl;
  }

  //Generate New Graph
  NewGraph_ = Teuchos::rcp( new Epetra_CrsGraph( Copy, *NewRowMap_, *NewColMap_, 0 ) );
  Importer_ = Teuchos::rcp( new Epetra_Import( *NewRowMap_, OldRowMap ) );
  NewGraph_->Import( strippedGraph, *Importer_, Insert );
  NewGraph_->FillComplete();

  if( debug_ )
  {
    cout << "NewGraph\n";
    cout << *NewGraph_;
  }

  NewMatrix_ = Teuchos::rcp( new Epetra_CrsMatrix( Copy, *NewGraph_ ) );
  NewMatrix_->Import( orig, *Importer_, Insert );
  NewMatrix_->FillComplete();

  if( debug_ )
  {
    cout << "New CrsMatrix\n";
    cout << *NewMatrix_ << endl;
  }

  newObj_ = &*NewMatrix_;

  return *NewMatrix_;
}
Exemple #2
0
static KLU_symbolic *order_and_analyze  /* returns NULL if error, or a valid
                                           KLU_symbolic object if successful */
(
    /* inputs, not modified */
    Int n,              /* A is n-by-n */
    Int Ap [ ],         /* size n+1, column pointers */
    Int Ai [ ],         /* size nz, row indices */
    /* --------------------- */
    KLU_common *Common
)
{
    double work ;
    KLU_symbolic *Symbolic ;
    double *Lnz ;
    Int *Qbtf, *Cp, *Ci, *Pinv, *Pblk, *Pbtf, *P, *Q, *R ;
    Int nblocks, nz, block, maxblock, k1, k2, nk, do_btf, ordering, k, Cilen,
        *Work ;

    /* ---------------------------------------------------------------------- */
    /* allocate the Symbolic object, and check input matrix */
    /* ---------------------------------------------------------------------- */

    Symbolic = KLU_alloc_symbolic (n, Ap, Ai, Common) ;
    if (Symbolic == NULL)
    {
        return (NULL) ;
    }
    P = Symbolic->P ;
    Q = Symbolic->Q ;
    R = Symbolic->R ;
    Lnz = Symbolic->Lnz ;
    nz = Symbolic->nz ;

    ordering = Common->ordering ;
    if (ordering == 1)
    {
        /* COLAMD */
        Cilen = COLAMD_recommended (nz, n, n) ;
    }
    else if (ordering == 0 || (ordering == 3 && Common->user_order != NULL))
    {
        /* AMD or user ordering function */
        Cilen = nz+1 ;
    }
    else
    {
        /* invalid ordering */
        Common->status = KLU_INVALID ;
        KLU_free_symbolic (&Symbolic, Common) ;
        return (NULL) ;
    }

    /* AMD memory management routines */
    amd_malloc  = Common->malloc_memory ;
    amd_free    = Common->free_memory ;
    amd_calloc  = Common->calloc_memory ;
    amd_realloc = Common->realloc_memory ;

    /* ---------------------------------------------------------------------- */
    /* allocate workspace for BTF permutation */
    /* ---------------------------------------------------------------------- */

    Pbtf = KLU_malloc (n, sizeof (Int), Common) ;
    Qbtf = KLU_malloc (n, sizeof (Int), Common) ;
    if (Common->status < KLU_OK)
    {
        KLU_free (Pbtf, n, sizeof (Int), Common) ;
        KLU_free (Qbtf, n, sizeof (Int), Common) ;
        KLU_free_symbolic (&Symbolic, Common) ;
        return (NULL) ;
    }

    /* ---------------------------------------------------------------------- */
    /* get the common parameters for BTF and ordering method */
    /* ---------------------------------------------------------------------- */

    do_btf = Common->btf ;
    do_btf = (do_btf) ? TRUE : FALSE ;
    Symbolic->ordering = ordering ;
    Symbolic->do_btf = do_btf ;
    Symbolic->structural_rank = EMPTY ;

    /* ---------------------------------------------------------------------- */
    /* find the block triangular form (if requested) */
    /* ---------------------------------------------------------------------- */

    Common->work = 0 ;

    if (do_btf)
    {
        Work = KLU_malloc (5*n, sizeof (Int), Common) ;
        if (Common->status < KLU_OK)
        {
            /* out of memory */
            KLU_free (Pbtf, n, sizeof (Int), Common) ;
            KLU_free (Qbtf, n, sizeof (Int), Common) ;
            KLU_free_symbolic (&Symbolic, Common) ;
            return (NULL) ;
        }

        nblocks = BTF_order (n, Ap, Ai, Common->maxwork, &work, Pbtf, Qbtf, R,
                &(Symbolic->structural_rank), Work) ;
        Common->structural_rank = Symbolic->structural_rank ;
        Common->work += work ;

        KLU_free (Work, 5*n, sizeof (Int), Common) ;

        /* unflip Qbtf if the matrix does not have full structural rank */
        if (Symbolic->structural_rank < n)
        {
            for (k = 0 ; k < n ; k++)
            {
                Qbtf [k] = BTF_UNFLIP (Qbtf [k]) ;
            }
        }

        /* find the size of the largest block */
        maxblock = 1 ;
        for (block = 0 ; block < nblocks ; block++)
        {
            k1 = R [block] ;
            k2 = R [block+1] ;
            nk = k2 - k1 ;
            PRINTF (("block %d size %d\n", block, nk)) ;
            maxblock = MAX (maxblock, nk) ;
        }
    }
    else
    {
        /* BTF not requested */
        nblocks = 1 ;
        maxblock = n ;
        R [0] = 0 ;
        R [1] = n ;
        for (k = 0 ; k < n ; k++)
        {
            Pbtf [k] = k ;
            Qbtf [k] = k ;
        }
    }

    Symbolic->nblocks = nblocks ;

    PRINTF (("maxblock size %d\n", maxblock)) ;
    Symbolic->maxblock = maxblock ;

    /* ---------------------------------------------------------------------- */
    /* allocate more workspace, for analyze_worker */
    /* ---------------------------------------------------------------------- */

    Pblk = KLU_malloc (maxblock, sizeof (Int), Common) ;
    Cp   = KLU_malloc (maxblock + 1, sizeof (Int), Common) ;
    Ci   = KLU_malloc (MAX (Cilen, nz+1), sizeof (Int), Common) ;
    Pinv = KLU_malloc (n, sizeof (Int), Common) ;

    /* ---------------------------------------------------------------------- */
    /* order each block of the BTF ordering, and a fill-reducing ordering */
    /* ---------------------------------------------------------------------- */

    if (Common->status == KLU_OK)
    {
        PRINTF (("calling analyze_worker\n")) ;
        Common->status = analyze_worker (n, Ap, Ai, nblocks, Pbtf, Qbtf, R,
            ordering, P, Q, Lnz, Pblk, Cp, Ci, Cilen, Pinv, Symbolic, Common) ;
        PRINTF (("analyze_worker done\n")) ;
    }

    /* ---------------------------------------------------------------------- */
    /* free all workspace */
    /* ---------------------------------------------------------------------- */

    KLU_free (Pblk, maxblock, sizeof (Int), Common) ;
    KLU_free (Cp, maxblock+1, sizeof (Int), Common) ;
    KLU_free (Ci, MAX (Cilen, nz+1), sizeof (Int), Common) ;
    KLU_free (Pinv, n, sizeof (Int), Common) ;
    KLU_free (Pbtf, n, sizeof (Int), Common) ;
    KLU_free (Qbtf, n, sizeof (Int), Common) ;

    /* ---------------------------------------------------------------------- */
    /* return the symbolic object */
    /* ---------------------------------------------------------------------- */

    if (Common->status < KLU_OK)
    {
        KLU_free_symbolic (&Symbolic, Common) ;
    }
    return (Symbolic) ;
}
Exemple #3
0
void mexFunction
(
    int nargout,
    mxArray *pargout[],
    int nargin,
    const mxArray *pargin[]
)
{
    Long b, n, i, k, j, *Ap, *Ai, *P, *R, nblocks, *Work, *Q, jj ;
    double *Px, *Rx, *Qx ;

    /* ---------------------------------------------------------------------- */
    /* get inputs and allocate workspace */
    /* ---------------------------------------------------------------------- */

    if (!((nargin == 1 && nargout <= 2) || (nargin == 2 && nargout <= 3)))
    {
        mexErrMsgTxt ("Usage: [p,r] = strongcomp (A)"
                      " or [p,q,r] = strongcomp (A,qin)") ;
    }
    n = mxGetM (pargin [0]) ;
    if (!mxIsSparse (pargin [0]) || n != mxGetN (pargin [0]))
    {
        mexErrMsgTxt ("strongcomp: A must be sparse, square, and non-empty") ;
    }

    /* get sparse matrix A */
    Ap = (Long *) mxGetJc (pargin [0]) ;
    Ai = (Long *) mxGetIr (pargin [0]) ;

    /* get output arrays */
    P = mxMalloc (n * sizeof (Long)) ;
    R = mxMalloc ((n+1) * sizeof (Long)) ;

    /* get workspace of size 4n (recursive code only needs 2n) */
    Work = mxMalloc (4*n * sizeof (Long)) ;

    /* get the input column permutation Q */
    if (nargin == 2)
    {
        if (mxGetNumberOfElements (pargin [1]) != n)
        {
            mexErrMsgTxt
                ("strongcomp: qin must be a permutation vector of size n") ;
        }
        Qx = mxGetPr (pargin [1]) ;
        Q = mxMalloc (n * sizeof (Long)) ;
        /* connvert Qin to 0-based and check validity */
        for (i = 0 ; i < n ; i++)
        {
            Work [i] = 0 ;
        }
        for (k = 0 ; k < n ; k++)
        {
            j = Qx [k] - 1 ;    /* convert to 0-based */
            jj = BTF_UNFLIP (j) ;
            if (jj < 0 || jj >= n || Work [jj] == 1)
            {
                mexErrMsgTxt
                    ("strongcomp: qin must be a permutation vector of size n") ;
            }
            Work [jj] = 1 ;
            Q [k] = j ;
        }
    }
    else
    {
        /* no input column permutation */
        Q = (Long *) NULL ;
    }

    /* ---------------------------------------------------------------------- */
    /* find the strongly-connected components of A */
    /* ---------------------------------------------------------------------- */

    nblocks = btf_l_strongcomp (n, Ap, Ai, Q, P, R, Work) ;

    /* ---------------------------------------------------------------------- */
    /* create outputs and free workspace */
    /* ---------------------------------------------------------------------- */

    /* create P */
    pargout [0] = mxCreateDoubleMatrix (1, n, mxREAL) ;
    Px = mxGetPr (pargout [0]) ;
    for (k = 0 ; k < n ; k++)
    {
        Px [k] = P [k] + 1 ;            /* convert to 1-based */
    }

    /* create Q */
    if (nargin == 2 && nargout > 1)
    {
        pargout [1] = mxCreateDoubleMatrix (1, n, mxREAL) ;
        Qx = mxGetPr (pargout [1]) ;
        for (k = 0 ; k < n ; k++)
        {
            Qx [k] = Q [k] + 1 ;        /* convert to 1-based */
        }
    }

    /* create R */
    if (nargout == nargin + 1)
    {
        pargout [nargin] = mxCreateDoubleMatrix (1, nblocks+1, mxREAL) ;
        Rx = mxGetPr (pargout [nargin]) ;
        for (b = 0 ; b <= nblocks ; b++)
        {
            Rx [b] = R [b] + 1 ;                /* convert to 1-based */
        }
    }

    mxFree (P) ;
    mxFree (R) ;
    mxFree (Work) ;
    if (nargin == 2)
    {
        mxFree (Q) ;
    }
}