C++ (Cpp) GB_NNZ Exemples

Exemple #1

0

Afficher le fichier

int64_t GB_nvec_nonempty        // return # of non-empty vectors
(
    const GrB_Matrix A          // input matrix to examine
)
{

    //--------------------------------------------------------------------------
    // check inputs
    //--------------------------------------------------------------------------

    ASSERT (A != NULL) ;

    //--------------------------------------------------------------------------
    // trivial case
    //--------------------------------------------------------------------------

    if (GB_NNZ (A) == 0)
    { 
        return (0) ;
    }

    //--------------------------------------------------------------------------
    // count the non-empty columns
    //--------------------------------------------------------------------------

    int64_t nvec_nonempty = 0 ;

    GB_for_each_vector (A)
    { 
        int64_t GBI1_initj (Iter, j, p, pend) ;
        int64_t ajnz = pend - p ;
        if (ajnz > 0) nvec_nonempty++ ;
    }

    ASSERT (nvec_nonempty >= 0 && nvec_nonempty <= A->vdim) ;

    //--------------------------------------------------------------------------
    // return result
    //--------------------------------------------------------------------------

    return (nvec_nonempty) ;
}

Exemple #2

0

Afficher le fichier

GrB_Info GB_wait                // finish all pending computations
(
    GrB_Matrix A,               // matrix with pending computations
    GB_Context Context
)
{

    //--------------------------------------------------------------------------
    // check inputs
    //--------------------------------------------------------------------------

    ASSERT (A != NULL) ;

    // The matrix A might have pending operations but not be in the queue.
    // GB_check expects the matrix to be in the queue.  As a result, GB_check
    // can report an inconsistency, and thus this assert must be made
    // with a negative pr.
    ASSERT_OK (GB_check (A, "A to wait", GB_FLIP (GB0))) ;

    //--------------------------------------------------------------------------
    // delete zombies
    //--------------------------------------------------------------------------

    // A zombie is an entry A(i,j) in the matrix that as been marked for
    // deletion, but hasn't been deleted yet.  It is marked by "negating"
    // replacing its index i with GB_FLIP(i).  Zombies are simple to delete via
    // an in-place algorithm.  No memory is allocated so this step always
    // succeeds.  Pending tuples are ignored, so A can have pending tuples.

    GrB_Info info = GrB_SUCCESS ;
    int64_t anz = GB_NNZ (A) ;
    int64_t anz_orig = anz ;
    int64_t anzmax_orig = A->nzmax ;
    ASSERT (anz_orig <= anzmax_orig) ;

    int64_t nzombies = A->nzombies ;

    if (nzombies > 0)
    { 

        // There are zombies that will now be deleted.
        ASSERT (GB_ZOMBIES_OK (A)) ;
        ASSERT (GB_ZOMBIES (A)) ;

        // This step tolerates pending tuples
        // since pending tuples and zombies do not intersect
        ASSERT (GB_PENDING_OK (A)) ;

        //----------------------------------------------------------------------
        // zombies exist in the matrix: delete them all
        //----------------------------------------------------------------------

        // compare with the pruning phase of GB_resize
        #define GB_PRUNE if (GB_IS_ZOMBIE (i)) continue ;
        #include "GB_prune_inplace.c"

        //----------------------------------------------------------------------
        // all zombies have been deleted
        //----------------------------------------------------------------------

        // exactly A->nzombies have been deleted from A
        ASSERT (A->nzombies == (anz_orig - anz)) ;

        // at least one zombie has been deleted
        ASSERT (anz < anz_orig) ;

        // no more zombies; pending tuples may still exist
        A->nzombies = 0 ;
        ASSERT (GB_PENDING_OK (A)) ;

        // A->nvec_nonempty has been updated
        ASSERT (A->nvec_nonempty == GB_nvec_nonempty (A)) ;
    }

    ASSERT (anz == GB_NNZ (A)) ;

    //--------------------------------------------------------------------------
    // check for pending tuples
    //--------------------------------------------------------------------------

    // all the zombies are gone
    ASSERT (!GB_ZOMBIES (A)) ;

    if (!GB_PENDING (A))
    { 
        // nothing more to do; remove the matrix from the queue
        ASSERT (!GB_PENDING (A)) ;
        GB_CRITICAL (GB_queue_remove (A)) ;
        ASSERT (!(A->enqueued)) ;

        // trim any significant extra space from the matrix, but allow for some
        // future insertions.  do not increase the size of the matrix;
        // zombies have been deleted but no pending tuples added.  This is
        // guaranteed not to fail.
        ASSERT (anz <= anz_orig) ;
        info = GB_ix_resize (A, anz, Context) ;
        ASSERT (info == GrB_SUCCESS) ;

        // conform A to its desired hypersparsity
        return (GB_to_hyper_conform (A, Context)) ;
    }

    // There are pending tuples that will now be assembled.
    ASSERT (GB_PENDING (A)) ;

    //--------------------------------------------------------------------------
    // construct a new hypersparse matrix T with just the pending tuples
    //--------------------------------------------------------------------------

    // If anz > 0, T is always hypersparse.  Otherwise T can be returned as
    // non-hypersparse, and it is then transplanted as-is into the final A.

    // T has the same type as A->type, which can differ from the type of the
    // pending tuples, A->type_pending.  This is OK since build process
    // assembles the tuples in the order they were inserted into the matrix.
    // The A->operator_pending can be NULL (an implicit SECOND function), or it
    // can be any accum operator.  The z=accum(x,y) operator can have any
    // types, and it does not have to be associative.

    GrB_Matrix T ;
    info = GB_builder (&T, A->type, A->vlen, A->vdim, A->is_csc,
        &(A->i_pending), &(A->j_pending), A->sorted_pending, A->s_pending,
        A->n_pending, A->max_n_pending, A->operator_pending,
        A->type_pending->code, Context) ;

    //--------------------------------------------------------------------------
    // free pending tuples
    //--------------------------------------------------------------------------

    // The tuples have been converted to T, which is more compact, and
    // duplicates have been removed.

    // This work needs to be done even if the builder fails.

    // GB_builder frees A->j_pending.  If successful, A->i_pending is now T->i.
    // Otherwise A->i_pending is freed.  In both cases, it has been set to NULL.
    ASSERT (A->i_pending == NULL && A->j_pending == NULL) ;

    // pending tuples are now free; so A->s_pending can be freed as well
    // FUTURE: GB_builder could modify A->s_pending in place to save memory,
    // but it can't do that for the user's S array for GrB_*_build.
    GB_pending_free (A) ;

    //--------------------------------------------------------------------------
    // remove the matrix from the queue
    //--------------------------------------------------------------------------

    ASSERT (!GB_PENDING (A)) ;
    ASSERT (!GB_ZOMBIES (A)) ;
    GB_CRITICAL (GB_queue_remove (A)) ;

    // No pending operations on A, and A is not in the queue, so GB_check can
    // now see the conditions it expects.
    ASSERT (!(A->enqueued)) ;
    ASSERT_OK (GB_check (A, "A after moving pending tuples to T", GB0)) ;

    //--------------------------------------------------------------------------
    // check the status of the builder
    //--------------------------------------------------------------------------

    // Finally check the status of the builder.  The pending tuples, just freed
    // above, must be freed whether or not the builder is succesful.
    if (info != GrB_SUCCESS)
    { 
        // out of memory
        GB_CONTENT_FREE (A) ;
        ASSERT (T == NULL) ;
        return (info) ;
    }

    ASSERT_OK (GB_check (T, "T = matrix of pending tuples", GB0)) ;
    ASSERT (!GB_PENDING (T)) ;
    ASSERT (!GB_ZOMBIES (T)) ;
    ASSERT (GB_NNZ (T) > 0) ;
    ASSERT (T->is_hyper) ;
    ASSERT (T->nvec == T->nvec_nonempty) ;

    //--------------------------------------------------------------------------
    // check for quick transplant
    //--------------------------------------------------------------------------

    if (anz == 0)
    { 
        // A has no entries so just transplant T into A, then free T and
        // conform A to its desired hypersparsity.
        return (GB_transplant_conform (A, A->type, &T, Context)) ;
    }

    //--------------------------------------------------------------------------
    // reallocate A to hold the tuples
    //--------------------------------------------------------------------------

    // make A->nzmax larger to accomodate future tuples, but only
    // allocate new space if the old A->nzmax is insufficient.

    int64_t anz_new = anz + GB_NNZ (T) ;  // must have at least this space

    info = GB_ix_resize (A, anz_new, Context) ;
    if (info != GrB_SUCCESS)
    { 
        // out of memory
        GB_MATRIX_FREE (&T) ;
        return (info) ;
    }

    //--------------------------------------------------------------------------
    // if A is hypersparse, ensure A->plen is sufficient for A=A+T
    //--------------------------------------------------------------------------

    // If anz > 0, T is hypersparse, even if A is a GrB_Vector
    ASSERT (T->is_hyper) ;

    // No addition is done since the nonzero patterns of A and T are disjoint.

    int64_t *restrict Ah = A->h ;
    int64_t *restrict Ap = A->p ;
    int64_t *restrict Ai = A->i ;
    GB_void *restrict Ax = A->x ;
    int64_t anvec = A->nvec ;
    int64_t anvec_new = anvec ;

    const int64_t *restrict Th = T->h ;
    const int64_t *restrict Tp = T->p ;
    const int64_t *restrict Ti = T->i ;
    const GB_void *restrict Tx = T->x ;
    int64_t tnvec = T->nvec ;

    int64_t ak, tk ;

    if (A->is_hyper)
    {

        // 2-way merge of A->h and T->h
        for (ak = 0, tk = 0 ; ak < anvec && tk < tnvec ; )
        {
            int64_t ja = Ah [ak] ;
            int64_t jt = Th [tk] ;
            if (jt == ja)
            { 
                // vector jt appears in both A and T
                ak++ ;
                tk++ ;
            }
            else if (ja < jt)
            { 
                // vector ja appears in A but not T
                ak++ ;
            }
            else // jt < ja
            { 
                // vector jt appears in T but not A
                tk++ ;
                anvec_new++ ;
            }
        }

        // count the vectors not yet seen in T
        if (tk < tnvec)
        { 
            anvec_new += (tnvec - tk) ;
        }

        // reallocate A->p and A->h, if needed
        if (anvec_new > A->plen)
        {
            if (GB_to_nonhyper_test (A, anvec_new, A->vdim))
            { 
                // convert to non-hypersparse if anvec_new will become too large
                info = GB_to_nonhyper (A, Context) ;
            }
            else
            { 
                // increase the size of A->p and A->h.  The size must be at
                // least anvec_new, but add some slack for future growth.
                int64_t aplen_new = 2 * (anvec_new + 1) ;
                aplen_new = GB_IMIN (aplen_new, A->vdim) ;
                info = GB_hyper_realloc (A, aplen_new, Context) ;
            }
            if (info != GrB_SUCCESS)
            { 
                // out of memory; all content of A has been freed
                ASSERT (A->magic == GB_MAGIC2) ;
                GB_MATRIX_FREE (&T) ;
                return (info) ;
            }
            Ah = A->h ;
            Ap = A->p ;
        }
    }

    ASSERT_OK (GB_check (A, "A after increasing A->h", GB0)) ;
    ASSERT_OK (GB_check (T, "T to fold in", GB0)) ;

    //--------------------------------------------------------------------------
    // A = A + T ; in place by folding in the tuples in reverse order
    //--------------------------------------------------------------------------

    // Merge in the tuples into each vector, in reverse order.  Note that Ap
    // [k+1] or Ap [j+1] is changed during the iteration.  The bottom of the
    // new A is treated like a stack, where entries are placed on top of the Ai
    // and Ax stack, and vector indices are placed on top of the Ah stack
    // if A is hypersparse.

    // T is always hypersparse, even if A and T are typecasted GrB_Vector
    // objects.  A can be non-hypersparse or hypersparse.  If A is hypersparse
    // then this step does not take O(A->vdim) time.  It takes at most
    // O(nnz(Z)+nnz(A)) time, regardless of the vector dimension of A and T,
    // A->vdim and T->vdim.

    bool A_is_hyper = A->is_hyper ;

    int64_t asize = A->type->size ;

    // pdest points to the top of the stack at the end of the A matrix;
    // this is also the total number of nonzeros in A+T.  Since the stack
    // is empty, pdest points to one past the position where the last entry
    // in A will appear.
    int64_t pdest = anz_new ;

    // pdest-1 must be within the size of A->i and A->x
    ASSERT (pdest <= A->nzmax) ;

    tk = tnvec - 1 ;

    // ak_dest points to the top of the hyperlist stack, also currently empty.
    int64_t ak_dest ;
    if (A_is_hyper)
    { 
        // Ah [ak] is the rightmost non-empty vector in the hypersparse A.
        // It will be moved to Ah [anvec_new-1].
        ak = A->nvec - 1 ;
        ak_dest = anvec_new ;
    }
    else
    { 
        // ak is the rightmost vector in the non-hypersparse A
        ak = A->vdim - 1 ;
        ak_dest = A->vdim ;
        ASSERT (A->nvec == A->vdim) ;
    }

    // count the number of non-empty vectors (again, if hypersparse, but for
    // the first time if non-hypersparse)
    anvec_new = A->nvec_nonempty ;

    // while T has non-empty vectors
    while (tk >= 0)
    {

        // When T is exhausted, the while loop can stop.  Let j1 be the
        // leftmost non-empty vector of the hypersparse T.  A(:,0:j1-1) is
        // not affected by the merge.  Only vectors A(:,j1:n-1) need to be
        // shifted (where n == A->vdim).

        // If the vectors of A are exhausted, ak becomes -1 and stays there.
        ASSERT (ak >= -1) ;

        //----------------------------------------------------------------------
        // get vectors A(:,j) and T(:,j)
        //----------------------------------------------------------------------

        int64_t j, ja, jt, pa, pa_end, pt, pt_end ;
        if (A_is_hyper)
        { 
            // get the next non-empty vector ja in the prior hypersparse A
            ja = (ak >= 0) ? Ah [ak] : -1 ;
        }
        else
        { 
            // ja always appears in the non-hypersparse A
            ja = ak ;
        }

        // jt is the next non-empty vector in the hypersparse T
        jt = Th [tk] ;

        ASSERT (jt >= 0) ;
        ASSERT (ja >= -1) ;

        if (ja == jt)
        { 
            // vector j appears in both A(:,j) and T(:,j)
            ASSERT (ak >= 0) ;
            j = ja ;
            pa_end = Ap [ak  ] - 1 ;
            pa     = Ap [ak+1] - 1 ;
            pt_end = Tp [tk  ] - 1 ;
            pt     = Tp [tk+1] - 1 ;
        }
        else if (ja > jt)
        { 
            // vector j appears in A(:,j) but not T(:,j)
            ASSERT (ak >= 0) ;
            j = ja ;
            pa_end = Ap [ak  ] - 1 ;
            pa     = Ap [ak+1] - 1 ;
            pt_end = -1 ;
            pt     = -1 ;
        }
        else // jt > ja
        { 
            // vector j appears in T(:,j) but not A(:,j)
            ASSERT (ak >= -1) ;
            j = jt ;
            pa_end = -1 ;
            pa     = -1 ;
            pt_end = Tp [tk  ] - 1 ;
            pt     = Tp [tk+1] - 1 ;
        }

        ASSERT (j >= 0 && j < A->vdim) ;

        // A (:,j) is in Ai,Ax [pa_end+1 ... pa]
        // T (:,j) is in Ti,Tx [pt_end+1 ... pt]

        //----------------------------------------------------------------------
        // count the number of non-empty vectors in the new A
        //----------------------------------------------------------------------

        if (!(pa > pa_end) && (pt > pt_end))
        { 
            // A(:,j) is empty but T(:,j) is not; count one more non-empty
            // vector in A
            anvec_new++ ;
        }

        //----------------------------------------------------------------------
        // log the new end of A(:,j)
        //----------------------------------------------------------------------

        // get the next free slot on the hyperlist stack
        ASSERT (ak < ak_dest) ;
        ASSERT (GB_IMPLIES (!A_is_hyper, ak_dest == ak+1)) ;
        --ak_dest ;
        ASSERT (ak <= ak_dest) ;
        ASSERT (ak_dest >= 0) ;

        if (A_is_hyper)
        { 
            // push j onto the stack for the new hyperlist for A
            Ah [ak_dest] = j ;
        }

        ASSERT (GB_IMPLIES (!A_is_hyper, ak_dest == ak && j == ak)) ;
        Ap [ak_dest+1] = pdest ;

        //----------------------------------------------------------------------
        // merge while entries exist in both A (:,j) and T (:,j) (reverse order)
        //----------------------------------------------------------------------

        while (pa > pa_end && pt > pt_end)
        {
            // entries exist in both A (:,j) and T (:,j); take the biggest one
            int64_t ia = Ai [pa] ;
            int64_t it = Ti [pt] ;

            // no entries are both in A and T
            ASSERT (ia != it) ;

            // get next free slot on the top of the stack of the entries of A
            --pdest ;
            ASSERT (pa < pdest) ;

            if (ia > it)
            { 
                // push Ai,Ax [pa] onto the stack
                Ai [pdest] = ia ;
                // Ax [pdest] = Ax [pa]
                memcpy (Ax +(pdest*asize), Ax +(pa*asize), asize) ;
                --pa ;
            }
            else // it > ia
            { 
                // push Ti,Tx [pt] onto the stack
                ASSERT (it > ia) ;
                Ai [pdest] = it ;
                // Ax [pdest] = Tx [pt]
                memcpy (Ax +(pdest*asize), Tx +(pt*asize), asize) ;
                --pt ;
            }
        }

        //----------------------------------------------------------------------
        // merge the remainder
        //----------------------------------------------------------------------

        // Either A (:,j) or T (:,j) is exhausted; but the other one can have
        // entries that still need to be shifted down.

        // FUTURE: can use two memmove's here, for Ai and Ax, with no while
        // loop, since the source and destination can overlap
        while (pa > pa_end)
        {
            // entries still exist in A (:,j); shift downwards
            int64_t ia = Ai [pa] ;

            // get next free slot on the top of the stack of the entries of A
            --pdest ;
            ASSERT (pa <= pdest) ;

            // push Ai,Ax [pa] onto the stack
            if (pa != pdest)
            { 
                Ai [pdest] = ia ;
                // Ax [pdest] = Ax [pa]
                memcpy (Ax +(pdest*asize), Ax +(pa*asize), asize) ;
            }
            --pa ;
        }

        // FUTURE: can use two memcpy's here, for Ai and Ax, with no while loop
        while (pt > pt_end)
        { 
            // entries still exist in T (:,j); shift downwards
            int64_t it = Ti [pt] ;

            // get next free slot on the top of the stack of the entries of A
            --pdest ;

            // push Ti,Tx [pt] onto the stack
            Ai [pdest] = it ;
            // Ax [pdest] = Tx [pt]
            memcpy (Ax +(pdest*asize), Tx +(pt*asize), asize) ;
            --pt ;
        }

        //----------------------------------------------------------------------
        // advance to the next vector (right to left)
        //----------------------------------------------------------------------

        if (ja == jt)
        { 
            // vector j appears in both A(:,j) and T(:,j)
            --ak ;
            --tk ;
        }
        else if (ja > jt)
        { 
            // vector j appears in A(:,j) but not T(:,j)
            --ak ;
        }
        else // jt > ja
        { 
            // vector j appears in T(:,j) but not A(:,j)
            --tk ;
        }
    }

    // update the count of non-empty vectors in A
    A->nvec_nonempty = anvec_new ;

    // all vectors have been merged into A
    if (A->is_hyper)
    { 
        A->nvec = A->nvec_nonempty ;
    }

    // end condition: no need to log the end of A(:,-1) since Ap[0]=0
    // already holds.
    ASSERT (Ap [0] == 0) ;

    //--------------------------------------------------------------------------
    // tuples have now been assembled into the matrix
    //--------------------------------------------------------------------------

    GB_MATRIX_FREE (&T) ;
    ASSERT_OK (GB_check (A, "A after assembling pending tuples", GB0)) ;

    // conform A to its desired hypersparsity
    return (GB_to_hyper_conform (A, Context)) ;
}

Exemple #3

0

Afficher le fichier

GrB_Info GB_matvec_check    // check a GraphBLAS matrix or vector
(
    const GrB_Matrix A,     // GraphBLAS matrix to print and check
    const char *name,       // name of the matrix, optional
    int pr,                 // 0: print nothing, 1: print header and errors,
                            // 2: print brief, 3: print all
                            // if negative, ignore queue conditions
                            // and use GB_FLIP(pr) for diagnostic printing.
    FILE *f,                // file for output
    const char *kind,       // "matrix" or "vector"
    GB_Context Context
)
{

    //--------------------------------------------------------------------------
    // check inputs
    //--------------------------------------------------------------------------

    bool ignore_queue = false ;
    if (pr < 0)
    { 
        // -2: print nothing (pr = 0)
        // -3: print header  (pr = 1)
        // -4: print brief   (pr = 2)
        // -5: print all     (pr = 3)
        pr = GB_FLIP (pr) ;
        ignore_queue = true ;
    }

    if (pr > 0) GBPR ("\nGraphBLAS %s: %s ", kind, GB_NAME) ;

    if (A == NULL)
    { 
        // GrB_error status not modified since this may be an optional argument
        if (pr > 0) GBPR ("NULL\n") ;
        return (GrB_NULL_POINTER) ;
    }

    //--------------------------------------------------------------------------
    // check the object
    //--------------------------------------------------------------------------

    GB_CHECK_MAGIC (A, kind) ;
    ASSERT (A->magic == GB_MAGIC) ;    // A is now a valid initialized object

    if (pr > 0)
    { 
        GBPR ("\nnrows: "GBd" ncols: "GBd" max # entries: "GBd"\n",
            GB_NROWS (A), GB_NCOLS (A), A->nzmax) ;
        GBPR ("format: %s %s",
            A->is_hyper ? "hypersparse" : "standard",
            A->is_csc ?   "CSC" : "CSR") ;
        GBPR (" vlen: "GBd" nvec_nonempty: "GBd" nvec: "GBd" plen: "
            GBd " vdim: "GBd"\n",
            A->vlen, A->nvec_nonempty, A->nvec, A->plen, A->vdim) ;
        GBPR ("hyper_ratio %g\n", A->hyper_ratio) ;
    }

    if (A->vlen < 0 || A->vlen > GB_INDEX_MAX ||
        A->vdim < 0 || A->vdim > GB_INDEX_MAX ||
        A->nzmax < 0 || A->nzmax > GB_INDEX_MAX)
    { 
        if (pr > 0) GBPR ("invalid %s dimensions\n", kind) ;
        return (GB_ERROR (GrB_INVALID_OBJECT, (GB_LOG,
            "%s invalid : nrows, ncols, or nzmax out of range: [%s]",
            kind, GB_NAME))) ;
    }

    if (A->is_hyper)
    {
        if (! (A->nvec >= 0 && A->nvec <= A->plen && A->plen <= A->vdim &&
               A->nvec == A->nvec_nonempty))
        { 
            if (pr > 0) GBPR ("invalid %s hypersparse structure\n", kind) ;
            return (GB_ERROR (GrB_INVALID_OBJECT, (GB_LOG,
                "%s invalid hypersparse structure [%s]", kind, GB_NAME))) ;
        }
    }
    else
    {
        if (! (A->nvec == A->plen && A->plen == A->vdim))
        { 
            if (pr > 0) GBPR ("invalid %s standard structure\n", kind) ;
            return (GB_ERROR (GrB_INVALID_OBJECT, (GB_LOG,
                "%s invalid structure [%s]", kind, GB_NAME))) ;
        }
    }

    // a matrix contains 1 to 8 different malloc'd blocks
    int64_t nallocs = 1 +                       // header
        (A->h != NULL && !A->h_shallow) +       // A->h, if not shallow
        (A->p != NULL && !A->p_shallow) +       // A->p, if not shallow
        (A->i != NULL && !A->i_shallow) +       // A->i, if not shallow
        (A->x != NULL && !A->x_shallow) +       // A->x, if not shallow
        (A->i_pending != NULL) +                // A->i_pending if tuples
        (A->j_pending != NULL) +                // A->j_pending if tuples
        (A->s_pending != NULL) ;                // A->s_pending if tuples

    #ifdef GB_DEVELOPER
    if (pr > 1) GBPR ("A %p magic "GBd"\n", A, A->magic) ;
    if (pr > 1) GBPR ("number of memory blocks: "GBd"\n", nallocs) ;
    #endif

    GrB_Info info = GB_Type_check (A->type, "", pr, f, Context) ;
    if (info != GrB_SUCCESS || (A->type->size != A->type_size))
    { 
        if (pr > 0) GBPR ("%s has an invalid type\n", kind) ;
        return (GB_ERROR (GrB_INVALID_OBJECT, (GB_LOG,
            "%s has an invalid type: [%s]", kind, GB_NAME))) ;
    }

    if (A->Sauna != NULL)
    {
        if (pr > 1) GBPR ("Sauna: n: "GBd" entry size: %zu\n",
            A->Sauna->Sauna_n, A->Sauna->Sauna_size) ;
    }

    if (pr > 1 && A->AxB_method_used != GxB_DEFAULT)
    {
        GBPR ("last method used for GrB_mxm, vxm, or mxv: ") ;
        switch (A->AxB_method_used)
        {
            case GxB_AxB_GUSTAVSON : GBPR ("Gustavson") ; break ;
            case GxB_AxB_HEAP      : GBPR ("heap")      ; break ;
            case GxB_AxB_DOT       : GBPR ("dot")       ; break ;
            default: ;
        }
        GBPR ("\n") ;
    }

    #ifdef GB_DEVELOPER
    if (pr > 1) GBPR ("->h: %p shallow: %d\n", A->h, A->h_shallow) ;
    if (pr > 1) GBPR ("->p: %p shallow: %d\n", A->p, A->p_shallow) ;
    if (pr > 1) GBPR ("->i: %p shallow: %d\n", A->i, A->i_shallow) ;
    if (pr > 1) GBPR ("->x: %p shallow: %d\n", A->x, A->x_shallow) ;
    #endif

    if (A->p == NULL)
    { 
        if (pr > 0) GBPR ("->p is NULL, invalid %s\n", kind) ;
        return (GB_ERROR (GrB_INVALID_OBJECT, (GB_LOG,
            "%s contains a NULL A->p pointer: [%s]", kind, GB_NAME))) ;
    }

    if (A->is_hyper)
    {
        if (A->h == NULL)
        { 
            if (pr > 0) GBPR ("->h is NULL, invalid hypersparse %s\n",
                kind) ;
            return (GB_ERROR (GrB_INVALID_OBJECT, (GB_LOG,
                "hypersparse %s contains a NULL A->h pointer: [%s]",
                kind, GB_NAME))) ;
        }
    }
    else
    {
        if (A->h != NULL)
        { 
            if (pr > 0) GBPR ("->h is not NULL, invalid non-hypersparse %s\n",
                kind) ;
            return (GB_ERROR (GrB_INVALID_OBJECT, (GB_LOG,
                "non-hypersparse %s contains a non-NULL A->h pointer: [%s]",
                kind, GB_NAME))) ;
        }
    }

    bool A_empty = (A->nzmax == 0) ;

    if (A_empty)
    {
        // A->x and A->i pointers must be NULL and shallow must be false
        if (A->i != NULL || A->i_shallow || A->x_shallow)
        { 
            if (pr > 0) GBPR ("invalid empty %s\n", kind) ;
            return (GB_ERROR (GrB_INVALID_OBJECT, (GB_LOG,
                "%s is an invalid empty object: [%s]", kind, GB_NAME))) ;
        }

        // check the vector pointers
        for (int64_t j = 0 ; j <= A->nvec ; j++)
        {
            if (A->p [j] != 0)
            { 
                if (pr > 0) GBPR ("->p ["GBd"] = "GBd" invalid\n", j,A->p[j]);
                return (GB_ERROR (GrB_INVALID_OBJECT, (GB_LOG,
                    "%s ->p ["GBd"] = "GBd" invalid: [%s]",
                    kind, j, A->p[j], GB_NAME))) ;
            }
        }
        if (pr > 0) GBPR ("empty\n") ;
    }

    if (!A_empty && A->i == NULL)
    { 
        if (pr > 0) GBPR ("->i is NULL, invalid %s\n", kind) ;
        return (GB_ERROR (GrB_INVALID_OBJECT, (GB_LOG,
            "%s contains a NULL A->i pointer: [%s]", kind, GB_NAME))) ;
    }

    //--------------------------------------------------------------------------
    // check the vector pointers
    //--------------------------------------------------------------------------

    if (A->p [0] != 0)
    { 
        if (pr > 0) GBPR ("->p [0] = "GBd" invalid\n", A->p [0]) ;
        return (GB_ERROR (GrB_INVALID_OBJECT, (GB_LOG,
            "%s A->p [0] = "GBd" invalid: [%s]", kind, A->p [0], GB_NAME))) ;
    }

    for (int64_t j = 0 ; j < A->nvec ; j++)
    {
        if (A->p [j+1] < A->p [j] || A->p [j+1] > A->nzmax)
        { 
            if (pr > 0) GBPR ("->p ["GBd"] = "GBd" invalid\n",
                j+1, A->p [j+1]) ;
            return (GB_ERROR (GrB_INVALID_OBJECT, (GB_LOG,
                "%s A->p ["GBd"] = "GBd" invalid: [%s]",
                kind, j+1, A->p [j+1], GB_NAME))) ;
        }
    }

    if (A->is_hyper)
    {
        int64_t jlast = -1 ;
        for (int64_t k = 0 ; k < A->nvec ; k++)
        {
            int64_t j = A->h [k] ;
            if (jlast >= j || j < 0 || j >= A->vdim)
            { 
                if (pr > 0) GBPR ("->h ["GBd"] = "GBd" invalid\n",
                    k, A->h [k]) ;
                return (GB_ERROR (GrB_INVALID_OBJECT, (GB_LOG,
                    "%s A->h ["GBd"] = "GBd" invalid: [%s]",
                    kind, k, A->h [k], GB_NAME))) ;
            }
            jlast = j ;
        }
    }

    int64_t anz = GB_NNZ (A) ;
    if (pr > 0) GBPR ("number of entries: "GBd" ", anz) ;

    if (pr > 0) GBPR ("\n") ;

    //--------------------------------------------------------------------------
    // report the number of pending tuples and number of zombies
    //--------------------------------------------------------------------------

    if (A->n_pending != 0 || A->nzombies != 0)
    { 
        if (pr > 0) GBPR ("pending tuples: "GBd" max pending: "GBd
            " zombies: "GBd"\n", A->n_pending, A->max_n_pending, A->nzombies) ;
    }

    if (A->nzombies < 0 || A->nzombies > anz)
    { 
        if (pr > 0) GBPR ("invalid number of zombies: "GBd" "
            "must be >= 0 and <= # entries ("GBd")\n", A->nzombies, anz) ;
        return (GB_ERROR (GrB_INVALID_OBJECT, (GB_LOG,
            "%s invalid number of zombies: "GBd"\n"
            "must be >= 0 and <= # entries ("GBd") [%s]",
            kind, A->nzombies, anz, GB_NAME))) ;
    }

    //--------------------------------------------------------------------------
    // check and print the row indices and numerical values
    //--------------------------------------------------------------------------

    #define GB_NBRIEF 10
    #define GB_NZBRIEF 30

    bool jumbled = false ;
    int64_t nzombies = 0 ;
    int64_t jcount = 0 ;

    GB_for_each_vector (A)
    {
        int64_t ilast = -1 ;
        GB_for_each_entry (j, p, pend)
        {
            bool prcol = ((pr > 1 && jcount < GB_NBRIEF) || pr > 2) ;
            if (ilast == -1)
            {
                // print the header for vector j
                if (prcol)
                { 
                    GBPR ("%s: "GBd" : "GBd" entries ["GBd":"GBd"]\n",
                        A->is_csc ? "column" : "row", j, pend - p, p, pend-1) ;
                }
                else if (pr == 2 && jcount == GB_NBRIEF)
                { 
                    GBPR ("...\n") ;
                }
                jcount++ ;      // count # of vectors printed so far
            }
            int64_t i = A->i [p] ;
            bool is_zombie = GB_IS_ZOMBIE (i) ;
            i = GB_UNFLIP (i) ;
            if (is_zombie) nzombies++ ;
            if (prcol)
            { 
                if ((pr > 1 && p < GB_NZBRIEF) || pr > 2)
                { 
                    GBPR ("    %s "GBd": ", A->is_csc ? "row" : "column", i) ;
                }
                else if (pr == 2 && (ilast == -1 || p == GB_NZBRIEF))
                { 
                    GBPR ("    ...\n") ;
                }
            }
            int64_t row = A->is_csc ? i : j ;
            int64_t col = A->is_csc ? j : i ;
            if (i < 0 || i >= A->vlen)
            { 
                if (pr > 0) GBPR ("index ("GBd","GBd") out of range\n",
                    row, col) ;
                return (GB_ERROR (GrB_INVALID_OBJECT, (GB_LOG,
                    "%s index ("GBd","GBd") out of range: [%s]",
                    kind, row, col, GB_NAME))) ;
            }

            // print the value
            bool print_value = prcol && ((pr > 1 && p < GB_NZBRIEF) || pr > 2) ;
            if (print_value)
            { 
                if (is_zombie)
                { 
                    GBPR ("zombie") ;
                }
                else if (A->x != NULL)
                { 
                    GB_void *Ax = A->x ;
                    info = GB_entry_check (A->type,
                        Ax +(p * (A->type->size)), f, Context) ;
                    if (info != GrB_SUCCESS) return (info) ;
                }
            }

            if (i <= ilast)
            { 
                // indices unsorted, or duplicates present
                if (pr > 0) GBPR (" index ("GBd","GBd") jumbled", row, col) ;
                jumbled = true ;
                print_value = (pr > 0) ;
            }

            if (print_value)
            { 
                GBPR ("\n") ;
            }
            ilast = i ;
        }
    }

Exemple #4

0

Afficher le fichier

GrB_Info GB_emult           // C = A.*B
(
    GrB_Matrix *Chandle,    // output matrix (unallocated on input)
    const GrB_Type ctype,   // type of output matrix C
    const bool C_is_csc,    // format of output matrix C
    const GrB_Matrix A,     // input A matrix
    const GrB_Matrix B,     // input B matrix
    const GrB_BinaryOp op,  // op to perform C = op (A,B)
    GB_Context Context
)
{

    //--------------------------------------------------------------------------
    // check inputs
    //--------------------------------------------------------------------------

    ASSERT (Chandle != NULL) ;
    ASSERT_OK (GB_check (A, "A for C=A.*B", GB0)) ;
    ASSERT_OK (GB_check (B, "B for C=A.*B", GB0)) ;
    ASSERT (!GB_PENDING (A)) ; ASSERT (!GB_ZOMBIES (A)) ;
    ASSERT (!GB_PENDING (B)) ; ASSERT (!GB_ZOMBIES (B)) ;
    ASSERT_OK (GB_check (op, "op for C=A.*B", GB0)) ;
    ASSERT (A->vdim == A->vdim && B->vlen == A->vlen) ;

    ASSERT (GB_Type_compatible (ctype,   op->ztype)) ;
    ASSERT (GB_Type_compatible (A->type, op->xtype)) ;
    ASSERT (GB_Type_compatible (B->type, op->ytype)) ;

    (*Chandle) = NULL ;

    //--------------------------------------------------------------------------
    // allocate the output matrix C
    //--------------------------------------------------------------------------

    // C is hypersparse if A or B are hypersparse (contrast with GB_add)
    bool C_is_hyper = (A->is_hyper || B->is_hyper) && (A->vdim > 1) ;

    // [ allocate the result C; C->p is malloc'd
    // worst case nnz (C) is min (nnz (A), nnz (B))
    GrB_Info info ;
    GrB_Matrix C = NULL ;           // allocate a new header for C
    GB_CREATE (&C, ctype, A->vlen, A->vdim, GB_Ap_malloc, C_is_csc,
        GB_SAME_HYPER_AS (C_is_hyper), B->hyper_ratio,
        GB_IMIN (A->nvec_nonempty, B->nvec_nonempty),
        GB_IMIN (GB_NNZ (A), GB_NNZ (B)), true) ;
    if (info != GrB_SUCCESS)
    { 
        return (info) ;
    }

    //--------------------------------------------------------------------------
    // get functions and type sizes
    //--------------------------------------------------------------------------

    GB_cast_function cast_A_to_X, cast_B_to_Y, cast_Z_to_C ;

    cast_A_to_X = GB_cast_factory (op->xtype->code, A->type->code) ;
    cast_B_to_Y = GB_cast_factory (op->ytype->code, B->type->code) ;
    cast_Z_to_C = GB_cast_factory (C->type->code,   op->ztype->code) ;

    // If types are user-defined, the cast* function is just
    // GB_copy_user_user, which requires the size of the type.  No typecast is
    // done.

    GxB_binary_function fmult = op->function ;

    size_t xsize = op->xtype->size ;
    size_t ysize = op->ytype->size ;
    size_t zsize = op->ztype->size ;
    size_t asize = A->type->size ;
    size_t bsize = B->type->size ;
    size_t csize = C->type->size ;

    // no typecasting needed if all the types match the operator
    bool nocasting =
        (A->type->code == op->xtype->code) &&
        (B->type->code == op->ytype->code) &&
        (C->type->code == op->ztype->code) ;

    // scalar workspace
    char xwork [nocasting ? 1 : xsize] ;
    char ywork [nocasting ? 1 : ysize] ;
    char zwork [nocasting ? 1 : zsize] ;

    //--------------------------------------------------------------------------
    // C = A .* B, where .*+ is defined by z=fmult(x,y)
    //--------------------------------------------------------------------------

    int64_t *Ci = C->i ;
    GB_void *Cx = C->x ;

    int64_t jlast, cnz, cnz_last ;
    GB_jstartup (C, &jlast, &cnz, &cnz_last) ;

    const int64_t *Ai = A->i, *Bi = B->i ;
    const GB_void *Ax = A->x, *Bx = B->x ;

    GB_for_each_vector2 (A, B)
    {

        //----------------------------------------------------------------------
        // get the next column, A (:,j) and B (:j)
        //----------------------------------------------------------------------

        int64_t GBI2_initj (Iter, j, pa, pa_end, pb, pb_end) ;
        int64_t ajnz = pa_end - pa ;
        int64_t bjnz = pb_end - pb ;

        //----------------------------------------------------------------------
        // compute C (:,j): pattern is the set intersection
        //----------------------------------------------------------------------

        if (ajnz == 0 || bjnz == 0)
        { 

            // one or both columns are empty; set intersection is empty
            ;

        }
        else if (Ai [pa_end-1] < Bi [pb])
        { 

            // all entries in A are in lower row indices than all the
            // entries in B; set intersection is empty
            ;

        }
        else if (Bi [pb_end-1] < Ai [pa])
        { 
            // all entries in B are in lower row indices than all the
            // entries in A; set intersection is empty
            ;

        }
        else if (ajnz > 256 * bjnz)
        {

            //------------------------------------------------------------------
            // A (:,j) has many more nonzeros than B (:,j)
            //------------------------------------------------------------------

            for ( ; pa < pa_end && pb < pb_end ; )
            {
                int64_t ia = Ai [pa] ;
                int64_t ib = Bi [pb] ;
                if (ia < ib)
                { 
                    // A (ia,j) appears before B (ib,j)
                    // discard all entries A (ia:ib-1,j)
                    int64_t pleft = pa + 1 ;
                    int64_t pright = pa_end ;
                    GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ;
                    ASSERT (pleft > pa) ;
                    pa = pleft ;
                }
                else if (ia > ib)
                { 
                    // B (ib,j) appears before A (ia,j)
                    pb++ ;
                }
                else // ia == ib
                { 
                    // A (i,j) and B (i,j) match
                    GB_EMULT ;
                }
            }

        }
        else if (bjnz > 256 * ajnz)
        {

            //------------------------------------------------------------------
            // B (:,j) has many more nonzeros than A (:,j)
            //------------------------------------------------------------------

            for ( ; pa < pa_end && pb < pb_end ; )
            {
                int64_t ia = Ai [pa] ;
                int64_t ib = Bi [pb] ;
                if (ia < ib)
                { 
                    // A (ia,j) appears before B (ib,j)
                    pa++ ;
                }
                else if (ia > ib)
                { 
                    // B (ib,j) appears before A (ia,j)
                    // discard all entries B (ib:ia-1,j)
                    int64_t pleft = pb + 1 ;
                    int64_t pright = pb_end ;
                    GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ;
                    ASSERT (pleft > pb) ;
                    pb = pleft ;
                }
                else // ia == ib
                { 
                    // A (i,j) and B (i,j) match
                    GB_EMULT ;
                }
            }

        }
        else
        {

            //------------------------------------------------------------------
            // A (:,j) and B (:,j) have about the same number of entries
            //------------------------------------------------------------------

            for ( ; pa < pa_end && pb < pb_end ; )
            {
                int64_t ia = Ai [pa] ;
                int64_t ib = Bi [pb] ;
                if (ia < ib)
                { 
                    // A (ia,j) appears before B (ib,j)
                    pa++ ;
                }
                else if (ia > ib)
                { 
                    // B (ib,j) appears before A (ia,j)
                    pb++ ;
                }
                else // ia == ib
                { 
                    // A (i,j) and B (i,j) match
                    GB_EMULT ;
                }
            }
        }

        //----------------------------------------------------------------------
        // finalize C(:,j)
        //----------------------------------------------------------------------

        // this cannot fail since C->plen is the upper bound: min of the
        // non-empty vectors of A and B
        info = GB_jappend (C, j, &jlast, cnz, &cnz_last, Context) ;
        ASSERT (info == GrB_SUCCESS) ;

        #if 0
        // if it could fail, do this:
        if (info != GrB_SUCCESS) { GB_MATRIX_FREE (&C) ; return (info) ; }
        #endif
    }

Exemple #5

0

Afficher le fichier

GrB_Info GB_AxB_meta                // C<M>=A*B meta algorithm
(
    GrB_Matrix *Chandle,            // output matrix C
    const bool C_is_csc,            // desired CSR/CSC format of C
    GrB_Matrix *MT_handle,          // return MT = M' to caller, if computed
    const GrB_Matrix M_in,          // mask for C<M> (not complemented)
    const GrB_Matrix A_in,          // input matrix
    const GrB_Matrix B_in,          // input matrix
    const GrB_Semiring semiring,    // semiring that defines C=A*B
    bool A_transpose,               // if true, use A', else A
    bool B_transpose,               // if true, use B', else B
    bool flipxy,                    // if true, do z=fmult(b,a) vs fmult(a,b)
    bool *mask_applied,             // if true, mask was applied
    const GrB_Desc_Value AxB_method,// for auto vs user selection of methods
    GrB_Desc_Value *AxB_method_used,// method selected
    GB_Sauna *Sauna_Handle,         // handle to sparse accumulator
    GB_Context Context
)
{

    //--------------------------------------------------------------------------
    // check inputs
    //--------------------------------------------------------------------------

    ASSERT_OK_OR_NULL (GB_check (M_in, "M for meta A*B", GB0)) ;
    ASSERT_OK (GB_check (A_in, "A_in for meta A*B", GB0)) ;
    ASSERT_OK (GB_check (B_in, "B_in for meta A*B", GB0)) ;
    ASSERT (!GB_PENDING (M_in)) ; ASSERT (!GB_ZOMBIES (M_in)) ;
    ASSERT (!GB_PENDING (A_in)) ; ASSERT (!GB_ZOMBIES (A_in)) ;
    ASSERT (!GB_PENDING (B_in)) ; ASSERT (!GB_ZOMBIES (B_in)) ;
    ASSERT_OK (GB_check (semiring, "semiring for numeric A*B", GB0)) ;
    ASSERT (mask_applied != NULL) ;
    ASSERT (AxB_method_used != NULL) ;
    ASSERT (Sauna_Handle != NULL) ;

    (*Chandle) = NULL ;
    if (MT_handle != NULL)
    { 
        (*MT_handle) = NULL ;
    }

    GrB_Info info ;

    GrB_Matrix AT = NULL ;
    GrB_Matrix BT = NULL ;
    GrB_Matrix MT = NULL ;

    (*mask_applied) = false ;
    (*AxB_method_used) = GxB_DEFAULT ;

    //--------------------------------------------------------------------------
    // handle the CSR/CSC formats of C, M, A, and B
    //--------------------------------------------------------------------------

    // On input, A and/or B can be transposed, and all four matrices can be in
    // either CSR or CSC format, in any combination.  This gives a total of 64
    // possible combinations.  However, a CSR matrix that is transposed is just
    // the same as a non-transposed CSC matrix.

    // Use transpose to handle the CSR/CSC format.  If C is desired in CSR
    // format, treat it as if it were in format CSC but transposed.
    bool C_transpose = !C_is_csc ;

    // If the mask is not present, then treat it as having the same CSR/CSC
    // format as C.
    bool M_is_csc = (M_in == NULL) ? C_is_csc : M_in->is_csc ;

    // Treat M just like C.  If M is in CSR format, treat it as if it were CSC
    // but transposed, since there are no descriptors that transpose C or M.
    bool M_transpose = !M_is_csc ;

    // A can be transposed, and can also be in CSR or CSC format.  If A is in
    // CSR, treat it as A' in CSC, and if A' is in CSR, treat it as A in CSC.
    if (!A_in->is_csc)
    { 
        // Flip the sense of A_transpose
        A_transpose = !A_transpose ;
    }

    // B is treated just like A
    if (!B_in->is_csc)
    { 
        // Flip the sense of A_transpose
        B_transpose = !B_transpose ;
    }

        // Now all matrices C, M_in, A_in, and B_in, can be treated as if they
        // were all in CSC format, except any of them can be transposed.  There
        // are now 16 cases to handle, where M, A, and B are M_in, A_in, and
        // B_in and all matrices are CSR/CSC agnostic, and where C has not yet
        // been created.

        //      C <M > = A  * B
        //      C <M'> = A  * B
        //      C'<M > = A  * B
        //      C'<M'> = A  * B

        //      C <M > = A  * B'
        //      C <M'> = A  * B'
        //      C'<M > = A  * B'
        //      C'<M'> = A  * B'

        //      C <M > = A' * B
        //      C <M'> = A' * B
        //      C'<M > = A' * B
        //      C'<M'> = A' * B

        //      C <M > = A' * B'
        //      C <M'> = A' * B'
        //      C'<M > = A' * B'
        //      C'<M'> = A' * B'

    //--------------------------------------------------------------------------
    // swap_rule: remove the tranpose of C
    //--------------------------------------------------------------------------

    // It is also possible to compute and return C' from this function, and to
    // set C->is_csc as the negation of the desired format C_is_csc.  This
    // ensures that GB_accum_mask will transpose C when this function is done.

    // FUTURE: give user control over the swap_rule.  Any test here will work,
    // with no other changes to the code below.  The decision will only affect
    // the performance, not the result.

    bool swap_rule = (C_transpose) ;

    GrB_Matrix A, B ;
    bool atrans, btrans ;

    if (swap_rule)
    { 
        // Replace C'=A*B with C=B'*A', and so on.  Swap A and B and transose
        // them, transpose M, negate flipxy, and transpose M and C.
        A = B_in ; atrans = !B_transpose ;
        B = A_in ; btrans = !A_transpose ;
        flipxy = !flipxy ;
        M_transpose = !M_transpose ;
        C_transpose = !C_transpose ;
    }
    else
    { 
        // use the input matrices as-is
        A = A_in ; atrans = A_transpose ;
        B = B_in ; btrans = B_transpose ;
    }

    // Assuming the swap_rule == C_transpose, C no longer needs to be
    // transposed, but the following assertion only holds if swap_rule ==
    // C_transpose.
    ASSERT (!C_transpose) ;

    ASSERT_OK (GB_check (A, "final A for A*B", GB0)) ;
    ASSERT_OK (GB_check (B, "final B for A*B", GB0)) ;

    //--------------------------------------------------------------------------
    // explicitly transpose the mask
    //--------------------------------------------------------------------------

    // all uses of GB_transpose below:
    // transpose: typecast, no op, not in place

    GrB_Matrix M ;

    if (M_transpose && M_in != NULL)
    { 
        // MT = M_in' also typecasting to boolean.  It is not freed here
        // unless an error occurs, but is returned to the caller.
        GB_OK (GB_transpose (&MT, GrB_BOOL, C_is_csc, M_in, NULL, Context)) ;
        M = MT ;
    }
    else
    { 
        // M_in can be used as-is; it may be NULL
        M = M_in ;
    }

    ASSERT_OK_OR_NULL (GB_check (M, "final M for A*B", GB0)) ;

    //--------------------------------------------------------------------------
    // typecast A and B when transposing them, if needed
    //--------------------------------------------------------------------------

    GrB_Type atype_required, btype_required ;
    if (flipxy)
    { 
        // A is passed as y, and B as x, in z = mult(x,y)
        atype_required = semiring->multiply->ytype ;
        btype_required = semiring->multiply->xtype ;
    }
    else
    { 
        // A is passed as x, and B as y, in z = mult(x,y)
        atype_required = semiring->multiply->xtype ;
        btype_required = semiring->multiply->ytype ;
    }

    //--------------------------------------------------------------------------
    // select the algorithm
    //--------------------------------------------------------------------------

    if (atrans)
    {

        //----------------------------------------------------------------------
        // C<M> = A'*B' or A'*B
        //----------------------------------------------------------------------

        // explicitly transpose B
        if (btrans)
        {
            // B = B'
            GB_OK (GB_transpose (&BT, btype_required, true, B, NULL, Context)) ;
            B = BT ;
        }

        //----------------------------------------------------------------------
        // C<M> = A'*B
        //----------------------------------------------------------------------

        // A'*B is being computed: use the dot product without computing A'
        // or use the saxpy (heap or gather/scatter) method

        // If the mask is present, only entries for which M(i,j)=1 are
        // computed, which makes this method very efficient when the mask is
        // very sparse (triangle counting, for example).  Each entry C(i,j) for
        // which M(i,j)=1 is computed via a dot product, C(i,j) =
        // A(:,i)'*B(:,j).  If the mask is not present, the dot-product method
        // is very slow in general, and thus the saxpy method is usually used
        // instead (via gather/scatter or heap).

        bool use_adotb ;

        if (AxB_method == GxB_DEFAULT)
        {
            // auto selection for A'*B
            if (M != NULL)
            { 
                // C<M> = A'*B always uses the dot product method
                use_adotb = true ;
            }
            else if (A->vdim == 1 || B->vdim == 1)
            { 
                // C=A'*B uses dot product method if C is a 1-by-n or n-by-1
                use_adotb = true ;
            }
            else
            { 
                // when C is a matrix, C=A'*B uses the dot product method if A
                // or B are dense, since the dot product method requires no
                // workspace in that case and can exploit dense vectors of A
                // and/or B.
                GrB_Index bnzmax, anzmax ;
                bool A_is_dense = GB_Index_multiply (&anzmax, A->vlen, A->vdim)
                                  && (anzmax == GB_NNZ (A)) ;
                bool B_is_dense = GB_Index_multiply (&bnzmax, B->vlen, B->vdim)
                                  && (bnzmax == GB_NNZ (B)) ;
                use_adotb = A_is_dense || B_is_dense ;
            }
        }
        else
        { 
            // user selection for A'*B
            use_adotb = (AxB_method == GxB_AxB_DOT) ;
        }

        if (use_adotb)
        { 
            // C<M> = A'*B via dot product method
            (*AxB_method_used) = GxB_AxB_DOT ;
            GB_OK (GB_AxB_dot (Chandle, M, A, B, semiring, flipxy, Context)) ;
        }
        else
        { 
            // C<M> = A'*B via saxpy: gather/scatter or heap method
            GB_OK (GB_transpose (&AT, atype_required, true, A, NULL, Context)) ;
            GB_OK (GB_AxB_saxpy (Chandle, M, AT, B, semiring, flipxy,
                AxB_method, AxB_method_used, Sauna_Handle, Context)) ;
        }

    }
    else if (btrans)
    {

        //----------------------------------------------------------------------
        // C<M> = A*B'
        //----------------------------------------------------------------------

        if (AxB_method == GxB_AxB_DOT)
        { 
            // C<M> = A*B' via dot product
            (*AxB_method_used) = GxB_AxB_DOT ;
            GB_OK (GB_transpose (&AT, atype_required, true, A, NULL, Context)) ;
            GB_OK (GB_transpose (&BT, btype_required, true, B, NULL, Context)) ;
            GB_OK (GB_AxB_dot (Chandle, M, AT, BT, semiring, flipxy, Context)) ;
        }
        else
        { 
            // C<M> = A*B' via saxpy: gather/scatter or heap method
            GB_OK (GB_transpose (&BT, btype_required, true, B, NULL, Context)) ;
            GB_OK (GB_AxB_saxpy (Chandle, M, A, BT, semiring, flipxy,
                AxB_method, AxB_method_used, Sauna_Handle, Context)) ;
        }

    }
    else
    {

        //----------------------------------------------------------------------
        // C<M> = A*B
        //----------------------------------------------------------------------

        if (AxB_method == GxB_AxB_DOT)
        { 
            // C<M> = A*B via dot product
            (*AxB_method_used) = GxB_AxB_DOT ;
            GB_OK (GB_transpose (&AT, atype_required, true, A, NULL, Context)) ;
            GB_OK (GB_AxB_dot (Chandle, M, AT, B, semiring, flipxy, Context)) ;
        }
        else
        { 
            // C<M> = A*B via saxpy: gather/scatter or heap method
            GB_OK (GB_AxB_saxpy (Chandle, M, A, B, semiring, flipxy,
                AxB_method, AxB_method_used, Sauna_Handle, Context)) ;
        }
    }

    //--------------------------------------------------------------------------
    // handle C_transpose and assign the CSR/CSC format
    //--------------------------------------------------------------------------

    // If C_transpose is true, then C' has been computed.  In this case, negate
    // the desired C_is_csc so that GB_accum_mask transposes the result before
    // applying the accum operator and/or writing the result back to the user's
    // C.  If swap_rule == C_transpose, then C_transpose is always false here,
    // but this could change in the future.  The following code will adapt to
    // any swap_rule, so it does not change if the swap_rule changes.

    GrB_Matrix C = (*Chandle) ;
    ASSERT (C != NULL) ;
    C->is_csc = C_transpose ? !C_is_csc : C_is_csc ;

    //--------------------------------------------------------------------------
    // free workspace and return result
    //--------------------------------------------------------------------------

    GB_MATRIX_FREE (&AT) ;
    GB_MATRIX_FREE (&BT) ;
    ASSERT_OK (GB_check (C, "C output for all C=A*B", GB0)) ;
    ASSERT_OK_OR_NULL (GB_check (MT, "MT if computed", GB0)) ;

    (*mask_applied) = (M != NULL) ;
    if (MT_handle != NULL)
    { 
        // return MT to the caller, if computed and the caller wants it
        (*MT_handle) = MT ;
    }
    else
    { 
        // otherwise, free it
        GB_MATRIX_FREE (&MT) ;
    }

    return (GrB_SUCCESS) ;
}