Exemple #1
0
void GB_transpose_ix        // transpose the pattern and values of a matrix
(
    int64_t *Rp,            // size m+1, input: row pointers, shifted on output
    int64_t *Ri,            // size cnz, output column indices
    GB_void *Rx,            // size cnz, output numerical values, type R_type
    const GrB_Type R_type,  // type of output R (do typecasting into R)
    const GrB_Matrix A      // input matrix
)
{

    //--------------------------------------------------------------------------
    // check inputs
    //--------------------------------------------------------------------------

    ASSERT (A != NULL) ;
    ASSERT (R_type != NULL) ;
    ASSERT (Rp != NULL && Ri != NULL && Rx != NULL) ;
    ASSERT (GB_Type_compatible (A->type, R_type)) ;
    ASSERT (!GB_ZOMBIES (A)) ;

    //--------------------------------------------------------------------------
    // get the input matrix
    //--------------------------------------------------------------------------

    const int64_t *Ai = A->i ;
    const GB_void *Ax = A->x ;

    //--------------------------------------------------------------------------
    // define the worker for the switch factory
    //--------------------------------------------------------------------------

    #define GB_WORKER(rtype,atype)                              \
    {                                                           \
        rtype *rx = (rtype *) Rx ;                              \
        atype *ax = (atype *) Ax ;                              \
        GB_for_each_vector (A)                                  \
        {                                                       \
            GB_for_each_entry (j, p, pend)                      \
            {                                                   \
                int64_t q = Rp [Ai [p]]++ ;                     \
                Ri [q] = j ;                                    \
                /* rx [q] = ax [p], type casting */             \
                GB_CAST (rx [q], ax [p]) ;                      \
            }                                                   \
        }                                                       \
        return ;                                                \
    }

    //--------------------------------------------------------------------------
    // launch the switch factory
    //--------------------------------------------------------------------------

    // The switch factory cannot be disabled by #ifndef GBCOMPACT
    // because the generic worker does no typecasting.

    // switch factory for two types, controlled by code1 and code2
    GB_Type_code code1 = R_type->code ;         // defines rtype
    GB_Type_code code2 = A->type->code ;        // defines atype
    ASSERT (code1 <= GB_UDT_code) ;
    ASSERT (code2 <= GB_UDT_code) ;
    #include "GB_2type_template.c"

    //--------------------------------------------------------------------------
    // generic worker
    //--------------------------------------------------------------------------

    // the switch factory handles all built-in types; user-defined types
    // fall through the switch factory to here, which can never be
    // typecasted.  Because the generic worker does no typecasting, the
    // switch factory cannot be disabled.
    ASSERT (A->type == R_type && A->type->code > GB_FP64_code) ;

    int64_t asize = A->type->size ;
    GB_for_each_vector (A)
    {
        GB_for_each_entry (j, p, pend)
        { 
            int64_t q = Rp [Ai [p]]++ ;
            Ri [q] = j ;
            memcpy (Rx +(q*asize), Ax +(p*asize), asize) ;
        }
    }
Exemple #2
0
GrB_Info GB_emult           // C = A.*B
(
    GrB_Matrix *Chandle,    // output matrix (unallocated on input)
    const GrB_Type ctype,   // type of output matrix C
    const bool C_is_csc,    // format of output matrix C
    const GrB_Matrix A,     // input A matrix
    const GrB_Matrix B,     // input B matrix
    const GrB_BinaryOp op,  // op to perform C = op (A,B)
    GB_Context Context
)
{

    //--------------------------------------------------------------------------
    // check inputs
    //--------------------------------------------------------------------------

    ASSERT (Chandle != NULL) ;
    ASSERT_OK (GB_check (A, "A for C=A.*B", GB0)) ;
    ASSERT_OK (GB_check (B, "B for C=A.*B", GB0)) ;
    ASSERT (!GB_PENDING (A)) ; ASSERT (!GB_ZOMBIES (A)) ;
    ASSERT (!GB_PENDING (B)) ; ASSERT (!GB_ZOMBIES (B)) ;
    ASSERT_OK (GB_check (op, "op for C=A.*B", GB0)) ;
    ASSERT (A->vdim == A->vdim && B->vlen == A->vlen) ;

    ASSERT (GB_Type_compatible (ctype,   op->ztype)) ;
    ASSERT (GB_Type_compatible (A->type, op->xtype)) ;
    ASSERT (GB_Type_compatible (B->type, op->ytype)) ;

    (*Chandle) = NULL ;

    //--------------------------------------------------------------------------
    // allocate the output matrix C
    //--------------------------------------------------------------------------

    // C is hypersparse if A or B are hypersparse (contrast with GB_add)
    bool C_is_hyper = (A->is_hyper || B->is_hyper) && (A->vdim > 1) ;

    // [ allocate the result C; C->p is malloc'd
    // worst case nnz (C) is min (nnz (A), nnz (B))
    GrB_Info info ;
    GrB_Matrix C = NULL ;           // allocate a new header for C
    GB_CREATE (&C, ctype, A->vlen, A->vdim, GB_Ap_malloc, C_is_csc,
        GB_SAME_HYPER_AS (C_is_hyper), B->hyper_ratio,
        GB_IMIN (A->nvec_nonempty, B->nvec_nonempty),
        GB_IMIN (GB_NNZ (A), GB_NNZ (B)), true) ;
    if (info != GrB_SUCCESS)
    { 
        return (info) ;
    }

    //--------------------------------------------------------------------------
    // get functions and type sizes
    //--------------------------------------------------------------------------

    GB_cast_function cast_A_to_X, cast_B_to_Y, cast_Z_to_C ;

    cast_A_to_X = GB_cast_factory (op->xtype->code, A->type->code) ;
    cast_B_to_Y = GB_cast_factory (op->ytype->code, B->type->code) ;
    cast_Z_to_C = GB_cast_factory (C->type->code,   op->ztype->code) ;

    // If types are user-defined, the cast* function is just
    // GB_copy_user_user, which requires the size of the type.  No typecast is
    // done.

    GxB_binary_function fmult = op->function ;

    size_t xsize = op->xtype->size ;
    size_t ysize = op->ytype->size ;
    size_t zsize = op->ztype->size ;
    size_t asize = A->type->size ;
    size_t bsize = B->type->size ;
    size_t csize = C->type->size ;

    // no typecasting needed if all the types match the operator
    bool nocasting =
        (A->type->code == op->xtype->code) &&
        (B->type->code == op->ytype->code) &&
        (C->type->code == op->ztype->code) ;

    // scalar workspace
    char xwork [nocasting ? 1 : xsize] ;
    char ywork [nocasting ? 1 : ysize] ;
    char zwork [nocasting ? 1 : zsize] ;

    //--------------------------------------------------------------------------
    // C = A .* B, where .*+ is defined by z=fmult(x,y)
    //--------------------------------------------------------------------------

    int64_t *Ci = C->i ;
    GB_void *Cx = C->x ;

    int64_t jlast, cnz, cnz_last ;
    GB_jstartup (C, &jlast, &cnz, &cnz_last) ;

    const int64_t *Ai = A->i, *Bi = B->i ;
    const GB_void *Ax = A->x, *Bx = B->x ;

    GB_for_each_vector2 (A, B)
    {

        //----------------------------------------------------------------------
        // get the next column, A (:,j) and B (:j)
        //----------------------------------------------------------------------

        int64_t GBI2_initj (Iter, j, pa, pa_end, pb, pb_end) ;
        int64_t ajnz = pa_end - pa ;
        int64_t bjnz = pb_end - pb ;

        //----------------------------------------------------------------------
        // compute C (:,j): pattern is the set intersection
        //----------------------------------------------------------------------

        if (ajnz == 0 || bjnz == 0)
        { 

            // one or both columns are empty; set intersection is empty
            ;

        }
        else if (Ai [pa_end-1] < Bi [pb])
        { 

            // all entries in A are in lower row indices than all the
            // entries in B; set intersection is empty
            ;

        }
        else if (Bi [pb_end-1] < Ai [pa])
        { 
            // all entries in B are in lower row indices than all the
            // entries in A; set intersection is empty
            ;

        }
        else if (ajnz > 256 * bjnz)
        {

            //------------------------------------------------------------------
            // A (:,j) has many more nonzeros than B (:,j)
            //------------------------------------------------------------------

            for ( ; pa < pa_end && pb < pb_end ; )
            {
                int64_t ia = Ai [pa] ;
                int64_t ib = Bi [pb] ;
                if (ia < ib)
                { 
                    // A (ia,j) appears before B (ib,j)
                    // discard all entries A (ia:ib-1,j)
                    int64_t pleft = pa + 1 ;
                    int64_t pright = pa_end ;
                    GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ;
                    ASSERT (pleft > pa) ;
                    pa = pleft ;
                }
                else if (ia > ib)
                { 
                    // B (ib,j) appears before A (ia,j)
                    pb++ ;
                }
                else // ia == ib
                { 
                    // A (i,j) and B (i,j) match
                    GB_EMULT ;
                }
            }

        }
        else if (bjnz > 256 * ajnz)
        {

            //------------------------------------------------------------------
            // B (:,j) has many more nonzeros than A (:,j)
            //------------------------------------------------------------------

            for ( ; pa < pa_end && pb < pb_end ; )
            {
                int64_t ia = Ai [pa] ;
                int64_t ib = Bi [pb] ;
                if (ia < ib)
                { 
                    // A (ia,j) appears before B (ib,j)
                    pa++ ;
                }
                else if (ia > ib)
                { 
                    // B (ib,j) appears before A (ia,j)
                    // discard all entries B (ib:ia-1,j)
                    int64_t pleft = pb + 1 ;
                    int64_t pright = pb_end ;
                    GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ;
                    ASSERT (pleft > pb) ;
                    pb = pleft ;
                }
                else // ia == ib
                { 
                    // A (i,j) and B (i,j) match
                    GB_EMULT ;
                }
            }

        }
        else
        {

            //------------------------------------------------------------------
            // A (:,j) and B (:,j) have about the same number of entries
            //------------------------------------------------------------------

            for ( ; pa < pa_end && pb < pb_end ; )
            {
                int64_t ia = Ai [pa] ;
                int64_t ib = Bi [pb] ;
                if (ia < ib)
                { 
                    // A (ia,j) appears before B (ib,j)
                    pa++ ;
                }
                else if (ia > ib)
                { 
                    // B (ib,j) appears before A (ia,j)
                    pb++ ;
                }
                else // ia == ib
                { 
                    // A (i,j) and B (i,j) match
                    GB_EMULT ;
                }
            }
        }

        //----------------------------------------------------------------------
        // finalize C(:,j)
        //----------------------------------------------------------------------

        // this cannot fail since C->plen is the upper bound: min of the
        // non-empty vectors of A and B
        info = GB_jappend (C, j, &jlast, cnz, &cnz_last, Context) ;
        ASSERT (info == GrB_SUCCESS) ;

        #if 0
        // if it could fail, do this:
        if (info != GrB_SUCCESS) { GB_MATRIX_FREE (&C) ; return (info) ; }
        #endif
    }
Exemple #3
0
GrB_Info GB_wait                // finish all pending computations
(
    GrB_Matrix A,               // matrix with pending computations
    GB_Context Context
)
{

    //--------------------------------------------------------------------------
    // check inputs
    //--------------------------------------------------------------------------

    ASSERT (A != NULL) ;

    // The matrix A might have pending operations but not be in the queue.
    // GB_check expects the matrix to be in the queue.  As a result, GB_check
    // can report an inconsistency, and thus this assert must be made
    // with a negative pr.
    ASSERT_OK (GB_check (A, "A to wait", GB_FLIP (GB0))) ;

    //--------------------------------------------------------------------------
    // delete zombies
    //--------------------------------------------------------------------------

    // A zombie is an entry A(i,j) in the matrix that as been marked for
    // deletion, but hasn't been deleted yet.  It is marked by "negating"
    // replacing its index i with GB_FLIP(i).  Zombies are simple to delete via
    // an in-place algorithm.  No memory is allocated so this step always
    // succeeds.  Pending tuples are ignored, so A can have pending tuples.

    GrB_Info info = GrB_SUCCESS ;
    int64_t anz = GB_NNZ (A) ;
    int64_t anz_orig = anz ;
    int64_t anzmax_orig = A->nzmax ;
    ASSERT (anz_orig <= anzmax_orig) ;

    int64_t nzombies = A->nzombies ;

    if (nzombies > 0)
    { 

        // There are zombies that will now be deleted.
        ASSERT (GB_ZOMBIES_OK (A)) ;
        ASSERT (GB_ZOMBIES (A)) ;

        // This step tolerates pending tuples
        // since pending tuples and zombies do not intersect
        ASSERT (GB_PENDING_OK (A)) ;

        //----------------------------------------------------------------------
        // zombies exist in the matrix: delete them all
        //----------------------------------------------------------------------

        // compare with the pruning phase of GB_resize
        #define GB_PRUNE if (GB_IS_ZOMBIE (i)) continue ;
        #include "GB_prune_inplace.c"

        //----------------------------------------------------------------------
        // all zombies have been deleted
        //----------------------------------------------------------------------

        // exactly A->nzombies have been deleted from A
        ASSERT (A->nzombies == (anz_orig - anz)) ;

        // at least one zombie has been deleted
        ASSERT (anz < anz_orig) ;

        // no more zombies; pending tuples may still exist
        A->nzombies = 0 ;
        ASSERT (GB_PENDING_OK (A)) ;

        // A->nvec_nonempty has been updated
        ASSERT (A->nvec_nonempty == GB_nvec_nonempty (A)) ;
    }

    ASSERT (anz == GB_NNZ (A)) ;

    //--------------------------------------------------------------------------
    // check for pending tuples
    //--------------------------------------------------------------------------

    // all the zombies are gone
    ASSERT (!GB_ZOMBIES (A)) ;

    if (!GB_PENDING (A))
    { 
        // nothing more to do; remove the matrix from the queue
        ASSERT (!GB_PENDING (A)) ;
        GB_CRITICAL (GB_queue_remove (A)) ;
        ASSERT (!(A->enqueued)) ;

        // trim any significant extra space from the matrix, but allow for some
        // future insertions.  do not increase the size of the matrix;
        // zombies have been deleted but no pending tuples added.  This is
        // guaranteed not to fail.
        ASSERT (anz <= anz_orig) ;
        info = GB_ix_resize (A, anz, Context) ;
        ASSERT (info == GrB_SUCCESS) ;

        // conform A to its desired hypersparsity
        return (GB_to_hyper_conform (A, Context)) ;
    }

    // There are pending tuples that will now be assembled.
    ASSERT (GB_PENDING (A)) ;

    //--------------------------------------------------------------------------
    // construct a new hypersparse matrix T with just the pending tuples
    //--------------------------------------------------------------------------

    // If anz > 0, T is always hypersparse.  Otherwise T can be returned as
    // non-hypersparse, and it is then transplanted as-is into the final A.

    // T has the same type as A->type, which can differ from the type of the
    // pending tuples, A->type_pending.  This is OK since build process
    // assembles the tuples in the order they were inserted into the matrix.
    // The A->operator_pending can be NULL (an implicit SECOND function), or it
    // can be any accum operator.  The z=accum(x,y) operator can have any
    // types, and it does not have to be associative.

    GrB_Matrix T ;
    info = GB_builder (&T, A->type, A->vlen, A->vdim, A->is_csc,
        &(A->i_pending), &(A->j_pending), A->sorted_pending, A->s_pending,
        A->n_pending, A->max_n_pending, A->operator_pending,
        A->type_pending->code, Context) ;

    //--------------------------------------------------------------------------
    // free pending tuples
    //--------------------------------------------------------------------------

    // The tuples have been converted to T, which is more compact, and
    // duplicates have been removed.

    // This work needs to be done even if the builder fails.

    // GB_builder frees A->j_pending.  If successful, A->i_pending is now T->i.
    // Otherwise A->i_pending is freed.  In both cases, it has been set to NULL.
    ASSERT (A->i_pending == NULL && A->j_pending == NULL) ;

    // pending tuples are now free; so A->s_pending can be freed as well
    // FUTURE: GB_builder could modify A->s_pending in place to save memory,
    // but it can't do that for the user's S array for GrB_*_build.
    GB_pending_free (A) ;

    //--------------------------------------------------------------------------
    // remove the matrix from the queue
    //--------------------------------------------------------------------------

    ASSERT (!GB_PENDING (A)) ;
    ASSERT (!GB_ZOMBIES (A)) ;
    GB_CRITICAL (GB_queue_remove (A)) ;

    // No pending operations on A, and A is not in the queue, so GB_check can
    // now see the conditions it expects.
    ASSERT (!(A->enqueued)) ;
    ASSERT_OK (GB_check (A, "A after moving pending tuples to T", GB0)) ;

    //--------------------------------------------------------------------------
    // check the status of the builder
    //--------------------------------------------------------------------------

    // Finally check the status of the builder.  The pending tuples, just freed
    // above, must be freed whether or not the builder is succesful.
    if (info != GrB_SUCCESS)
    { 
        // out of memory
        GB_CONTENT_FREE (A) ;
        ASSERT (T == NULL) ;
        return (info) ;
    }

    ASSERT_OK (GB_check (T, "T = matrix of pending tuples", GB0)) ;
    ASSERT (!GB_PENDING (T)) ;
    ASSERT (!GB_ZOMBIES (T)) ;
    ASSERT (GB_NNZ (T) > 0) ;
    ASSERT (T->is_hyper) ;
    ASSERT (T->nvec == T->nvec_nonempty) ;

    //--------------------------------------------------------------------------
    // check for quick transplant
    //--------------------------------------------------------------------------

    if (anz == 0)
    { 
        // A has no entries so just transplant T into A, then free T and
        // conform A to its desired hypersparsity.
        return (GB_transplant_conform (A, A->type, &T, Context)) ;
    }

    //--------------------------------------------------------------------------
    // reallocate A to hold the tuples
    //--------------------------------------------------------------------------

    // make A->nzmax larger to accomodate future tuples, but only
    // allocate new space if the old A->nzmax is insufficient.

    int64_t anz_new = anz + GB_NNZ (T) ;  // must have at least this space

    info = GB_ix_resize (A, anz_new, Context) ;
    if (info != GrB_SUCCESS)
    { 
        // out of memory
        GB_MATRIX_FREE (&T) ;
        return (info) ;
    }

    //--------------------------------------------------------------------------
    // if A is hypersparse, ensure A->plen is sufficient for A=A+T
    //--------------------------------------------------------------------------

    // If anz > 0, T is hypersparse, even if A is a GrB_Vector
    ASSERT (T->is_hyper) ;

    // No addition is done since the nonzero patterns of A and T are disjoint.

    int64_t *restrict Ah = A->h ;
    int64_t *restrict Ap = A->p ;
    int64_t *restrict Ai = A->i ;
    GB_void *restrict Ax = A->x ;
    int64_t anvec = A->nvec ;
    int64_t anvec_new = anvec ;

    const int64_t *restrict Th = T->h ;
    const int64_t *restrict Tp = T->p ;
    const int64_t *restrict Ti = T->i ;
    const GB_void *restrict Tx = T->x ;
    int64_t tnvec = T->nvec ;

    int64_t ak, tk ;

    if (A->is_hyper)
    {

        // 2-way merge of A->h and T->h
        for (ak = 0, tk = 0 ; ak < anvec && tk < tnvec ; )
        {
            int64_t ja = Ah [ak] ;
            int64_t jt = Th [tk] ;
            if (jt == ja)
            { 
                // vector jt appears in both A and T
                ak++ ;
                tk++ ;
            }
            else if (ja < jt)
            { 
                // vector ja appears in A but not T
                ak++ ;
            }
            else // jt < ja
            { 
                // vector jt appears in T but not A
                tk++ ;
                anvec_new++ ;
            }
        }

        // count the vectors not yet seen in T
        if (tk < tnvec)
        { 
            anvec_new += (tnvec - tk) ;
        }

        // reallocate A->p and A->h, if needed
        if (anvec_new > A->plen)
        {
            if (GB_to_nonhyper_test (A, anvec_new, A->vdim))
            { 
                // convert to non-hypersparse if anvec_new will become too large
                info = GB_to_nonhyper (A, Context) ;
            }
            else
            { 
                // increase the size of A->p and A->h.  The size must be at
                // least anvec_new, but add some slack for future growth.
                int64_t aplen_new = 2 * (anvec_new + 1) ;
                aplen_new = GB_IMIN (aplen_new, A->vdim) ;
                info = GB_hyper_realloc (A, aplen_new, Context) ;
            }
            if (info != GrB_SUCCESS)
            { 
                // out of memory; all content of A has been freed
                ASSERT (A->magic == GB_MAGIC2) ;
                GB_MATRIX_FREE (&T) ;
                return (info) ;
            }
            Ah = A->h ;
            Ap = A->p ;
        }
    }

    ASSERT_OK (GB_check (A, "A after increasing A->h", GB0)) ;
    ASSERT_OK (GB_check (T, "T to fold in", GB0)) ;

    //--------------------------------------------------------------------------
    // A = A + T ; in place by folding in the tuples in reverse order
    //--------------------------------------------------------------------------

    // Merge in the tuples into each vector, in reverse order.  Note that Ap
    // [k+1] or Ap [j+1] is changed during the iteration.  The bottom of the
    // new A is treated like a stack, where entries are placed on top of the Ai
    // and Ax stack, and vector indices are placed on top of the Ah stack
    // if A is hypersparse.

    // T is always hypersparse, even if A and T are typecasted GrB_Vector
    // objects.  A can be non-hypersparse or hypersparse.  If A is hypersparse
    // then this step does not take O(A->vdim) time.  It takes at most
    // O(nnz(Z)+nnz(A)) time, regardless of the vector dimension of A and T,
    // A->vdim and T->vdim.

    bool A_is_hyper = A->is_hyper ;

    int64_t asize = A->type->size ;

    // pdest points to the top of the stack at the end of the A matrix;
    // this is also the total number of nonzeros in A+T.  Since the stack
    // is empty, pdest points to one past the position where the last entry
    // in A will appear.
    int64_t pdest = anz_new ;

    // pdest-1 must be within the size of A->i and A->x
    ASSERT (pdest <= A->nzmax) ;

    tk = tnvec - 1 ;

    // ak_dest points to the top of the hyperlist stack, also currently empty.
    int64_t ak_dest ;
    if (A_is_hyper)
    { 
        // Ah [ak] is the rightmost non-empty vector in the hypersparse A.
        // It will be moved to Ah [anvec_new-1].
        ak = A->nvec - 1 ;
        ak_dest = anvec_new ;
    }
    else
    { 
        // ak is the rightmost vector in the non-hypersparse A
        ak = A->vdim - 1 ;
        ak_dest = A->vdim ;
        ASSERT (A->nvec == A->vdim) ;
    }

    // count the number of non-empty vectors (again, if hypersparse, but for
    // the first time if non-hypersparse)
    anvec_new = A->nvec_nonempty ;

    // while T has non-empty vectors
    while (tk >= 0)
    {

        // When T is exhausted, the while loop can stop.  Let j1 be the
        // leftmost non-empty vector of the hypersparse T.  A(:,0:j1-1) is
        // not affected by the merge.  Only vectors A(:,j1:n-1) need to be
        // shifted (where n == A->vdim).

        // If the vectors of A are exhausted, ak becomes -1 and stays there.
        ASSERT (ak >= -1) ;

        //----------------------------------------------------------------------
        // get vectors A(:,j) and T(:,j)
        //----------------------------------------------------------------------

        int64_t j, ja, jt, pa, pa_end, pt, pt_end ;
        if (A_is_hyper)
        { 
            // get the next non-empty vector ja in the prior hypersparse A
            ja = (ak >= 0) ? Ah [ak] : -1 ;
        }
        else
        { 
            // ja always appears in the non-hypersparse A
            ja = ak ;
        }

        // jt is the next non-empty vector in the hypersparse T
        jt = Th [tk] ;

        ASSERT (jt >= 0) ;
        ASSERT (ja >= -1) ;

        if (ja == jt)
        { 
            // vector j appears in both A(:,j) and T(:,j)
            ASSERT (ak >= 0) ;
            j = ja ;
            pa_end = Ap [ak  ] - 1 ;
            pa     = Ap [ak+1] - 1 ;
            pt_end = Tp [tk  ] - 1 ;
            pt     = Tp [tk+1] - 1 ;
        }
        else if (ja > jt)
        { 
            // vector j appears in A(:,j) but not T(:,j)
            ASSERT (ak >= 0) ;
            j = ja ;
            pa_end = Ap [ak  ] - 1 ;
            pa     = Ap [ak+1] - 1 ;
            pt_end = -1 ;
            pt     = -1 ;
        }
        else // jt > ja
        { 
            // vector j appears in T(:,j) but not A(:,j)
            ASSERT (ak >= -1) ;
            j = jt ;
            pa_end = -1 ;
            pa     = -1 ;
            pt_end = Tp [tk  ] - 1 ;
            pt     = Tp [tk+1] - 1 ;
        }

        ASSERT (j >= 0 && j < A->vdim) ;

        // A (:,j) is in Ai,Ax [pa_end+1 ... pa]
        // T (:,j) is in Ti,Tx [pt_end+1 ... pt]

        //----------------------------------------------------------------------
        // count the number of non-empty vectors in the new A
        //----------------------------------------------------------------------

        if (!(pa > pa_end) && (pt > pt_end))
        { 
            // A(:,j) is empty but T(:,j) is not; count one more non-empty
            // vector in A
            anvec_new++ ;
        }

        //----------------------------------------------------------------------
        // log the new end of A(:,j)
        //----------------------------------------------------------------------

        // get the next free slot on the hyperlist stack
        ASSERT (ak < ak_dest) ;
        ASSERT (GB_IMPLIES (!A_is_hyper, ak_dest == ak+1)) ;
        --ak_dest ;
        ASSERT (ak <= ak_dest) ;
        ASSERT (ak_dest >= 0) ;

        if (A_is_hyper)
        { 
            // push j onto the stack for the new hyperlist for A
            Ah [ak_dest] = j ;
        }

        ASSERT (GB_IMPLIES (!A_is_hyper, ak_dest == ak && j == ak)) ;
        Ap [ak_dest+1] = pdest ;

        //----------------------------------------------------------------------
        // merge while entries exist in both A (:,j) and T (:,j) (reverse order)
        //----------------------------------------------------------------------

        while (pa > pa_end && pt > pt_end)
        {
            // entries exist in both A (:,j) and T (:,j); take the biggest one
            int64_t ia = Ai [pa] ;
            int64_t it = Ti [pt] ;

            // no entries are both in A and T
            ASSERT (ia != it) ;

            // get next free slot on the top of the stack of the entries of A
            --pdest ;
            ASSERT (pa < pdest) ;

            if (ia > it)
            { 
                // push Ai,Ax [pa] onto the stack
                Ai [pdest] = ia ;
                // Ax [pdest] = Ax [pa]
                memcpy (Ax +(pdest*asize), Ax +(pa*asize), asize) ;
                --pa ;
            }
            else // it > ia
            { 
                // push Ti,Tx [pt] onto the stack
                ASSERT (it > ia) ;
                Ai [pdest] = it ;
                // Ax [pdest] = Tx [pt]
                memcpy (Ax +(pdest*asize), Tx +(pt*asize), asize) ;
                --pt ;
            }
        }

        //----------------------------------------------------------------------
        // merge the remainder
        //----------------------------------------------------------------------

        // Either A (:,j) or T (:,j) is exhausted; but the other one can have
        // entries that still need to be shifted down.

        // FUTURE: can use two memmove's here, for Ai and Ax, with no while
        // loop, since the source and destination can overlap
        while (pa > pa_end)
        {
            // entries still exist in A (:,j); shift downwards
            int64_t ia = Ai [pa] ;

            // get next free slot on the top of the stack of the entries of A
            --pdest ;
            ASSERT (pa <= pdest) ;

            // push Ai,Ax [pa] onto the stack
            if (pa != pdest)
            { 
                Ai [pdest] = ia ;
                // Ax [pdest] = Ax [pa]
                memcpy (Ax +(pdest*asize), Ax +(pa*asize), asize) ;
            }
            --pa ;
        }

        // FUTURE: can use two memcpy's here, for Ai and Ax, with no while loop
        while (pt > pt_end)
        { 
            // entries still exist in T (:,j); shift downwards
            int64_t it = Ti [pt] ;

            // get next free slot on the top of the stack of the entries of A
            --pdest ;

            // push Ti,Tx [pt] onto the stack
            Ai [pdest] = it ;
            // Ax [pdest] = Tx [pt]
            memcpy (Ax +(pdest*asize), Tx +(pt*asize), asize) ;
            --pt ;
        }

        //----------------------------------------------------------------------
        // advance to the next vector (right to left)
        //----------------------------------------------------------------------

        if (ja == jt)
        { 
            // vector j appears in both A(:,j) and T(:,j)
            --ak ;
            --tk ;
        }
        else if (ja > jt)
        { 
            // vector j appears in A(:,j) but not T(:,j)
            --ak ;
        }
        else // jt > ja
        { 
            // vector j appears in T(:,j) but not A(:,j)
            --tk ;
        }
    }

    // update the count of non-empty vectors in A
    A->nvec_nonempty = anvec_new ;

    // all vectors have been merged into A
    if (A->is_hyper)
    { 
        A->nvec = A->nvec_nonempty ;
    }

    // end condition: no need to log the end of A(:,-1) since Ap[0]=0
    // already holds.
    ASSERT (Ap [0] == 0) ;

    //--------------------------------------------------------------------------
    // tuples have now been assembled into the matrix
    //--------------------------------------------------------------------------

    GB_MATRIX_FREE (&T) ;
    ASSERT_OK (GB_check (A, "A after assembling pending tuples", GB0)) ;

    // conform A to its desired hypersparsity
    return (GB_to_hyper_conform (A, Context)) ;
}
Exemple #4
0
GrB_Info GB_AxB_meta                // C<M>=A*B meta algorithm
(
    GrB_Matrix *Chandle,            // output matrix C
    const bool C_is_csc,            // desired CSR/CSC format of C
    GrB_Matrix *MT_handle,          // return MT = M' to caller, if computed
    const GrB_Matrix M_in,          // mask for C<M> (not complemented)
    const GrB_Matrix A_in,          // input matrix
    const GrB_Matrix B_in,          // input matrix
    const GrB_Semiring semiring,    // semiring that defines C=A*B
    bool A_transpose,               // if true, use A', else A
    bool B_transpose,               // if true, use B', else B
    bool flipxy,                    // if true, do z=fmult(b,a) vs fmult(a,b)
    bool *mask_applied,             // if true, mask was applied
    const GrB_Desc_Value AxB_method,// for auto vs user selection of methods
    GrB_Desc_Value *AxB_method_used,// method selected
    GB_Sauna *Sauna_Handle,         // handle to sparse accumulator
    GB_Context Context
)
{

    //--------------------------------------------------------------------------
    // check inputs
    //--------------------------------------------------------------------------

    ASSERT_OK_OR_NULL (GB_check (M_in, "M for meta A*B", GB0)) ;
    ASSERT_OK (GB_check (A_in, "A_in for meta A*B", GB0)) ;
    ASSERT_OK (GB_check (B_in, "B_in for meta A*B", GB0)) ;
    ASSERT (!GB_PENDING (M_in)) ; ASSERT (!GB_ZOMBIES (M_in)) ;
    ASSERT (!GB_PENDING (A_in)) ; ASSERT (!GB_ZOMBIES (A_in)) ;
    ASSERT (!GB_PENDING (B_in)) ; ASSERT (!GB_ZOMBIES (B_in)) ;
    ASSERT_OK (GB_check (semiring, "semiring for numeric A*B", GB0)) ;
    ASSERT (mask_applied != NULL) ;
    ASSERT (AxB_method_used != NULL) ;
    ASSERT (Sauna_Handle != NULL) ;

    (*Chandle) = NULL ;
    if (MT_handle != NULL)
    { 
        (*MT_handle) = NULL ;
    }

    GrB_Info info ;

    GrB_Matrix AT = NULL ;
    GrB_Matrix BT = NULL ;
    GrB_Matrix MT = NULL ;

    (*mask_applied) = false ;
    (*AxB_method_used) = GxB_DEFAULT ;

    //--------------------------------------------------------------------------
    // handle the CSR/CSC formats of C, M, A, and B
    //--------------------------------------------------------------------------

    // On input, A and/or B can be transposed, and all four matrices can be in
    // either CSR or CSC format, in any combination.  This gives a total of 64
    // possible combinations.  However, a CSR matrix that is transposed is just
    // the same as a non-transposed CSC matrix.

    // Use transpose to handle the CSR/CSC format.  If C is desired in CSR
    // format, treat it as if it were in format CSC but transposed.
    bool C_transpose = !C_is_csc ;

    // If the mask is not present, then treat it as having the same CSR/CSC
    // format as C.
    bool M_is_csc = (M_in == NULL) ? C_is_csc : M_in->is_csc ;

    // Treat M just like C.  If M is in CSR format, treat it as if it were CSC
    // but transposed, since there are no descriptors that transpose C or M.
    bool M_transpose = !M_is_csc ;

    // A can be transposed, and can also be in CSR or CSC format.  If A is in
    // CSR, treat it as A' in CSC, and if A' is in CSR, treat it as A in CSC.
    if (!A_in->is_csc)
    { 
        // Flip the sense of A_transpose
        A_transpose = !A_transpose ;
    }

    // B is treated just like A
    if (!B_in->is_csc)
    { 
        // Flip the sense of A_transpose
        B_transpose = !B_transpose ;
    }

        // Now all matrices C, M_in, A_in, and B_in, can be treated as if they
        // were all in CSC format, except any of them can be transposed.  There
        // are now 16 cases to handle, where M, A, and B are M_in, A_in, and
        // B_in and all matrices are CSR/CSC agnostic, and where C has not yet
        // been created.

        //      C <M > = A  * B
        //      C <M'> = A  * B
        //      C'<M > = A  * B
        //      C'<M'> = A  * B

        //      C <M > = A  * B'
        //      C <M'> = A  * B'
        //      C'<M > = A  * B'
        //      C'<M'> = A  * B'

        //      C <M > = A' * B
        //      C <M'> = A' * B
        //      C'<M > = A' * B
        //      C'<M'> = A' * B

        //      C <M > = A' * B'
        //      C <M'> = A' * B'
        //      C'<M > = A' * B'
        //      C'<M'> = A' * B'

    //--------------------------------------------------------------------------
    // swap_rule: remove the tranpose of C
    //--------------------------------------------------------------------------

    // It is also possible to compute and return C' from this function, and to
    // set C->is_csc as the negation of the desired format C_is_csc.  This
    // ensures that GB_accum_mask will transpose C when this function is done.

    // FUTURE: give user control over the swap_rule.  Any test here will work,
    // with no other changes to the code below.  The decision will only affect
    // the performance, not the result.

    bool swap_rule = (C_transpose) ;

    GrB_Matrix A, B ;
    bool atrans, btrans ;

    if (swap_rule)
    { 
        // Replace C'=A*B with C=B'*A', and so on.  Swap A and B and transose
        // them, transpose M, negate flipxy, and transpose M and C.
        A = B_in ; atrans = !B_transpose ;
        B = A_in ; btrans = !A_transpose ;
        flipxy = !flipxy ;
        M_transpose = !M_transpose ;
        C_transpose = !C_transpose ;
    }
    else
    { 
        // use the input matrices as-is
        A = A_in ; atrans = A_transpose ;
        B = B_in ; btrans = B_transpose ;
    }

    // Assuming the swap_rule == C_transpose, C no longer needs to be
    // transposed, but the following assertion only holds if swap_rule ==
    // C_transpose.
    ASSERT (!C_transpose) ;

    ASSERT_OK (GB_check (A, "final A for A*B", GB0)) ;
    ASSERT_OK (GB_check (B, "final B for A*B", GB0)) ;

    //--------------------------------------------------------------------------
    // explicitly transpose the mask
    //--------------------------------------------------------------------------

    // all uses of GB_transpose below:
    // transpose: typecast, no op, not in place

    GrB_Matrix M ;

    if (M_transpose && M_in != NULL)
    { 
        // MT = M_in' also typecasting to boolean.  It is not freed here
        // unless an error occurs, but is returned to the caller.
        GB_OK (GB_transpose (&MT, GrB_BOOL, C_is_csc, M_in, NULL, Context)) ;
        M = MT ;
    }
    else
    { 
        // M_in can be used as-is; it may be NULL
        M = M_in ;
    }

    ASSERT_OK_OR_NULL (GB_check (M, "final M for A*B", GB0)) ;

    //--------------------------------------------------------------------------
    // typecast A and B when transposing them, if needed
    //--------------------------------------------------------------------------

    GrB_Type atype_required, btype_required ;
    if (flipxy)
    { 
        // A is passed as y, and B as x, in z = mult(x,y)
        atype_required = semiring->multiply->ytype ;
        btype_required = semiring->multiply->xtype ;
    }
    else
    { 
        // A is passed as x, and B as y, in z = mult(x,y)
        atype_required = semiring->multiply->xtype ;
        btype_required = semiring->multiply->ytype ;
    }

    //--------------------------------------------------------------------------
    // select the algorithm
    //--------------------------------------------------------------------------

    if (atrans)
    {

        //----------------------------------------------------------------------
        // C<M> = A'*B' or A'*B
        //----------------------------------------------------------------------

        // explicitly transpose B
        if (btrans)
        {
            // B = B'
            GB_OK (GB_transpose (&BT, btype_required, true, B, NULL, Context)) ;
            B = BT ;
        }

        //----------------------------------------------------------------------
        // C<M> = A'*B
        //----------------------------------------------------------------------

        // A'*B is being computed: use the dot product without computing A'
        // or use the saxpy (heap or gather/scatter) method

        // If the mask is present, only entries for which M(i,j)=1 are
        // computed, which makes this method very efficient when the mask is
        // very sparse (triangle counting, for example).  Each entry C(i,j) for
        // which M(i,j)=1 is computed via a dot product, C(i,j) =
        // A(:,i)'*B(:,j).  If the mask is not present, the dot-product method
        // is very slow in general, and thus the saxpy method is usually used
        // instead (via gather/scatter or heap).

        bool use_adotb ;

        if (AxB_method == GxB_DEFAULT)
        {
            // auto selection for A'*B
            if (M != NULL)
            { 
                // C<M> = A'*B always uses the dot product method
                use_adotb = true ;
            }
            else if (A->vdim == 1 || B->vdim == 1)
            { 
                // C=A'*B uses dot product method if C is a 1-by-n or n-by-1
                use_adotb = true ;
            }
            else
            { 
                // when C is a matrix, C=A'*B uses the dot product method if A
                // or B are dense, since the dot product method requires no
                // workspace in that case and can exploit dense vectors of A
                // and/or B.
                GrB_Index bnzmax, anzmax ;
                bool A_is_dense = GB_Index_multiply (&anzmax, A->vlen, A->vdim)
                                  && (anzmax == GB_NNZ (A)) ;
                bool B_is_dense = GB_Index_multiply (&bnzmax, B->vlen, B->vdim)
                                  && (bnzmax == GB_NNZ (B)) ;
                use_adotb = A_is_dense || B_is_dense ;
            }
        }
        else
        { 
            // user selection for A'*B
            use_adotb = (AxB_method == GxB_AxB_DOT) ;
        }

        if (use_adotb)
        { 
            // C<M> = A'*B via dot product method
            (*AxB_method_used) = GxB_AxB_DOT ;
            GB_OK (GB_AxB_dot (Chandle, M, A, B, semiring, flipxy, Context)) ;
        }
        else
        { 
            // C<M> = A'*B via saxpy: gather/scatter or heap method
            GB_OK (GB_transpose (&AT, atype_required, true, A, NULL, Context)) ;
            GB_OK (GB_AxB_saxpy (Chandle, M, AT, B, semiring, flipxy,
                AxB_method, AxB_method_used, Sauna_Handle, Context)) ;
        }

    }
    else if (btrans)
    {

        //----------------------------------------------------------------------
        // C<M> = A*B'
        //----------------------------------------------------------------------

        if (AxB_method == GxB_AxB_DOT)
        { 
            // C<M> = A*B' via dot product
            (*AxB_method_used) = GxB_AxB_DOT ;
            GB_OK (GB_transpose (&AT, atype_required, true, A, NULL, Context)) ;
            GB_OK (GB_transpose (&BT, btype_required, true, B, NULL, Context)) ;
            GB_OK (GB_AxB_dot (Chandle, M, AT, BT, semiring, flipxy, Context)) ;
        }
        else
        { 
            // C<M> = A*B' via saxpy: gather/scatter or heap method
            GB_OK (GB_transpose (&BT, btype_required, true, B, NULL, Context)) ;
            GB_OK (GB_AxB_saxpy (Chandle, M, A, BT, semiring, flipxy,
                AxB_method, AxB_method_used, Sauna_Handle, Context)) ;
        }

    }
    else
    {

        //----------------------------------------------------------------------
        // C<M> = A*B
        //----------------------------------------------------------------------

        if (AxB_method == GxB_AxB_DOT)
        { 
            // C<M> = A*B via dot product
            (*AxB_method_used) = GxB_AxB_DOT ;
            GB_OK (GB_transpose (&AT, atype_required, true, A, NULL, Context)) ;
            GB_OK (GB_AxB_dot (Chandle, M, AT, B, semiring, flipxy, Context)) ;
        }
        else
        { 
            // C<M> = A*B via saxpy: gather/scatter or heap method
            GB_OK (GB_AxB_saxpy (Chandle, M, A, B, semiring, flipxy,
                AxB_method, AxB_method_used, Sauna_Handle, Context)) ;
        }
    }

    //--------------------------------------------------------------------------
    // handle C_transpose and assign the CSR/CSC format
    //--------------------------------------------------------------------------

    // If C_transpose is true, then C' has been computed.  In this case, negate
    // the desired C_is_csc so that GB_accum_mask transposes the result before
    // applying the accum operator and/or writing the result back to the user's
    // C.  If swap_rule == C_transpose, then C_transpose is always false here,
    // but this could change in the future.  The following code will adapt to
    // any swap_rule, so it does not change if the swap_rule changes.

    GrB_Matrix C = (*Chandle) ;
    ASSERT (C != NULL) ;
    C->is_csc = C_transpose ? !C_is_csc : C_is_csc ;

    //--------------------------------------------------------------------------
    // free workspace and return result
    //--------------------------------------------------------------------------

    GB_MATRIX_FREE (&AT) ;
    GB_MATRIX_FREE (&BT) ;
    ASSERT_OK (GB_check (C, "C output for all C=A*B", GB0)) ;
    ASSERT_OK_OR_NULL (GB_check (MT, "MT if computed", GB0)) ;

    (*mask_applied) = (M != NULL) ;
    if (MT_handle != NULL)
    { 
        // return MT to the caller, if computed and the caller wants it
        (*MT_handle) = MT ;
    }
    else
    { 
        // otherwise, free it
        GB_MATRIX_FREE (&MT) ;
    }

    return (GrB_SUCCESS) ;
}