Ejemplo n.º 1
0
/** Transpose a matrix in place.
 *
 *  \ingroup transpose_group
 *
 *  \param A The matrix to be transposeed
 *  \return the transposeed A
 */
void TYPED_FUNC(
    bml_transpose_ellpack) (
    const bml_matrix_ellpack_t * A)
{
    int N = A->N;
    int M = A->M;

    REAL_T *A_value = (REAL_T *) A->value;
    int *A_index = A->index;
    int *A_nnz = A->nnz;

#pragma omp parallel for default(none) shared(N, M, A_value, A_index, A_nnz)
    for (int i = 0; i < N; i++)
    {
        for (int j = A_nnz[i] - 1; j >= 0; j--)
        {
            if (A_index[ROWMAJOR(i, j, N, M)] > i)
            {
                int ind = A_index[ROWMAJOR(i, j, N, M)];
                int exchangeDone = 0;
                for (int k = 0; k < A_nnz[ind]; k++)
                {
                    // Existing corresponding value for transpose - exchange
                    if (A_index[ROWMAJOR(ind, k, N, M)] == i)
                    {
                        REAL_T tmp = A_value[ROWMAJOR(i, j, N, M)];

#pragma omp critical
                        {
                            A_value[ROWMAJOR(i, j, N, M)] =
                                A_value[ROWMAJOR(ind, k, N, M)];
                            A_value[ROWMAJOR(ind, k, N, M)] = tmp;
                        }
                        exchangeDone = 1;
                        break;
                    }
                }

                // If no match add to end of row
                if (!exchangeDone)
                {
                    int jind = A_nnz[ind];

#pragma omp critical
                    {
                        A_index[ROWMAJOR(ind, jind, N, M)] = i;
                        A_value[ROWMAJOR(ind, jind, N, M)] =
                            A_value[ROWMAJOR(i, j, N, M)];
                        A_nnz[ind]++;
                        A_nnz[i]--;
                    }
                }
            }
        }
    }

}
Ejemplo n.º 2
0
/** Calculate Gershgorin bounds for a dense matrix.
 *
 *  \ingroup gershgorin_group
 *
 *  \param A The matrix
 *  returns maxeval Calculated max value
 *  returns maxminusmin Calculated max-min value
 */
void *TYPED_FUNC(
    bml_gershgorin_dense) (
    const bml_matrix_dense_t * A)
{
    REAL_T radius, dvalue, absham;

    int N = A->N;
    REAL_T *A_matrix = A->matrix;

    double emin = 100000000000.0;
    double emax = -100000000000.0;

    double *eval = bml_allocate_memory(sizeof(double) * 2);

#pragma omp parallel for default(none) shared(N, A_matrix) private(absham, radius, dvalue) reduction(max:emax) reduction(min:emin)
    for (int i = 0; i < N; i++)
    {
        radius = 0.0;

        for (int j = 0; j < N; j++)
        {
            absham = ABS(A_matrix[ROWMAJOR(i, j, N, N)]);
            radius += (double) absham;
        }

        dvalue = A_matrix[ROWMAJOR(i, i, N, N)];

        radius -= ABS(dvalue);

        emax =
            (emax >
             REAL_PART(dvalue + radius) ? emax : REAL_PART(dvalue + radius));
        emin =
            (emin <
             REAL_PART(dvalue - radius) ? emin : REAL_PART(dvalue - radius));
    }

    eval[0] = emax;
    eval[1] = emax - emin;

    return eval;
}
Ejemplo n.º 3
0
/** Transpose a matrix.
 *
 *  \ingroup transpose_group
 *
 *  \param A The matrix to be transposed
 *  \return The transposed A
 */
bml_matrix_dense_t *TYPED_FUNC(
    bml_transpose_new_dense) (
    const bml_matrix_dense_t * A)
{
    int N = A->N;

    bml_matrix_dense_t *B = TYPED_FUNC(bml_zero_matrix_dense) (N);
    REAL_T *A_matrix = A->matrix;
    REAL_T *B_matrix = B->matrix;

#pragma omp parallel for default(none) shared(N, A_matrix, B_matrix)
    for (int i = 0; i < N; i++)
    {
        for (int j = 0; j < N; j++)
        {
            B_matrix[ROWMAJOR(i, j, N, N)] = A_matrix[ROWMAJOR(j, i, N, N)];
        }
    }
    return B;
}
Ejemplo n.º 4
0
/** Matrix addition.
 *
 * \f$ A = A + \beta \mathrm{Id} \f$
 *
 *  \ingroup add_group
 *
 *  \param A Matrix A
 *  \param beta Scalar factor multiplied by A
 */
void TYPED_FUNC(
    bml_add_identity_dense) (
    bml_matrix_dense_t * A,
    const double beta)
{
    REAL_T beta_ = beta;
    REAL_T *A_matrix = (REAL_T *) A->matrix;
    for (int i = 0; i < A->N; i++)
    {
        A_matrix[ROWMAJOR(i, i, A->N, A->N)] += beta_;
    }
}
Ejemplo n.º 5
0
/** Transpose a matrix.
 *
 *  \ingroup transpose_group
 *
 *  \param A The matrix to be transposed
 *  \return the transposed A
 */
bml_matrix_ellpack_t *TYPED_FUNC(
    bml_transpose_new_ellpack) (
    const bml_matrix_ellpack_t * A)
{
    int N = A->N;
    int M = A->M;

    bml_matrix_ellpack_t *B = TYPED_FUNC(bml_zero_matrix_ellpack) (N, M);

    REAL_T *A_value = (REAL_T *) A->value;
    int *A_index = A->index;
    int *A_nnz = A->nnz;

    REAL_T *B_value = (REAL_T *) B->value;
    int *B_index = B->index;
    int *B_nnz = B->nnz;

    // Transpose all elements
#pragma omp parallel for default(none) shared(N, M, B_index, B_value, B_nnz, A_index, A_value, A_nnz)
    for (int i = 0; i < N; i++)
    {
        for (int j = 0; j < A_nnz[i]; j++)
        {
            int trow = A_index[ROWMAJOR(i, j, N, M)];
#pragma omp critical
            {
                int colcnt = B_nnz[trow];
                B_index[ROWMAJOR(trow, colcnt, N, M)] = i;
                B_value[ROWMAJOR(trow, colcnt, N, M)] =
                    A_value[ROWMAJOR(i, j, N, M)];
                B_nnz[trow]++;
            }
        }
    }

    return B;
}
Ejemplo n.º 6
0
/** Transpose a matrix in place.
 *
 *  \ingroup transpose_group
 *
 *  \param A The matrix to be transposed
 *  \return The transposed A
 */
void TYPED_FUNC(
    bml_transpose_dense) (
    bml_matrix_dense_t * A)
{
    int N = A->N;

    REAL_T *A_matrix = A->matrix;
    REAL_T tmp;

#pragma omp parallel for default(none) private(tmp) shared(N, A_matrix)
    for (int i = 0; i < N - 1; i++)
    {
        for (int j = i + 1; j < N; j++)
        {
            if (i != j)
            {
                tmp = A_matrix[ROWMAJOR(i, j, N, N)];
                A_matrix[ROWMAJOR(i, j, N, N)] =
                    A_matrix[ROWMAJOR(j, i, N, N)];
                A_matrix[ROWMAJOR(j, i, N, N)] = tmp;
            }
        }
    }
}
Ejemplo n.º 7
0
/** Calculate the trace of a matrix.
 *
 *  \ingroup trace_group
 *
 *  \param A The matrix to calculate a trace for
 *  \return The trace of A
 */
double TYPED_FUNC(
    bml_trace_dense) (
    const bml_matrix_dense_t * A)
{
    int N = A->N;

    REAL_T trace = 0.0;
    REAL_T *A_matrix = A->matrix;

#pragma omp parallel for default(none) shared(N, A_matrix) reduction(+:trace)
    for (int i = 0; i < N; i++)
    {
        trace += A_matrix[ROWMAJOR(i, i, N, N)];
    }

    return (double) REAL_PART(trace);
}
Ejemplo n.º 8
0
/** Matrix addition.
 *
 * \f$ A = \alpha A + \beta B \f$
 *
 * \ingroup add_group
 *
 * \param A Matrix A
 * \param B Matrix B
 * \param alpha Scalar factor multiplied by A
 * \param beta Scalar factor multiplied by B
 * \param threshold Threshold for matrix addition
 */
void TYPED_FUNC(
    bml_add_ellpack) (
    const bml_matrix_ellpack_t * A,
    const bml_matrix_ellpack_t * B,
    const double alpha,
    const double beta,
    const double threshold)
{
    int N = A->N;
    int A_M = A->M;
    int B_M = B->M;
    int ix[N];
    int *A_nnz = A->nnz;
    int *A_index = A->index;
    int *B_nnz = B->nnz;
    int *B_index = B->index;
    REAL_T x[N];
    REAL_T *A_value = (REAL_T *) A->value;
    REAL_T *B_value = (REAL_T *) B->value;

    memset(ix, 0, N * sizeof(int));
    memset(x, 0.0, N * sizeof(REAL_T));

#pragma omp parallel for default(none) \
    firstprivate(x, ix) \
    shared(N, A_M, B_M, A_index, A_value, A_nnz, B_index, B_value, B_nnz)
    for (int i = 0; i < N; i++)
    {
        int l = 0;
        for (int jp = 0; jp < A_nnz[i]; jp++)
        {
            int k = A_index[ROWMAJOR(i, jp, N, A_M)];
            if (ix[k] == 0)
            {
                x[k] = 0.0;
                ix[k] = i + 1;
                A_index[ROWMAJOR(i, l, N, A_M)] = k;
                l++;
            }
            x[k] = x[k] + alpha * A_value[ROWMAJOR(i, jp, N, A_M)];
        }

        for (int jp = 0; jp < B_nnz[i]; jp++)
        {
            int k = B_index[ROWMAJOR(i, jp, N, B_M)];
            if (ix[k] == 0)
            {
                x[k] = 0.0;
                ix[k] = i + 1;
                A_index[ROWMAJOR(i, l, N, A_M)] = k;
                l++;
            }
            x[k] = x[k] + beta * B_value[ROWMAJOR(i, jp, N, B_M)];
        }
        A_nnz[i] = l;

        int ll = 0;
        for (int jp = 0; jp < l; jp++)
        {
            REAL_T xTmp = x[A_index[ROWMAJOR(i, jp, N, A_M)]];
            if (is_above_threshold(xTmp, threshold))
            {
                A_value[ROWMAJOR(i, ll, N, A_M)] = xTmp;
                A_index[ROWMAJOR(i, ll, N, A_M)] =
                    A_index[ROWMAJOR(i, jp, N, A_M)];
                ll++;
            }
            x[A_index[ROWMAJOR(i, jp, N, A_M)]] = 0.0;
            ix[A_index[ROWMAJOR(i, jp, N, A_M)]] = 0;
        }
        A_nnz[i] = ll;
    }
}
Ejemplo n.º 9
0
/** Matrix multiply.
 *
 * \f$ X^{2} \leftarrow X \, X \f$
 *
 * \ingroup multiply_group
 *
 * \param X Matrix X
 * \param X2 Matrix X2
 * \param threshold Used for sparse multiply
 */
void *TYPED_FUNC(
    bml_multiply_x2_ellpack) (
    const bml_matrix_ellpack_t * X,
    bml_matrix_ellpack_t * X2,
    const double threshold)
{
    int X_N = X->N;
    int X_M = X->M;
    int *X_index = X->index;
    int *X_nnz = X->nnz;

    int X2_N = X2->N;
    int X2_M = X2->M;
    int *X2_index = X2->index;
    int *X2_nnz = X2->nnz;

    int ix[X_N], jx[X_N];
    REAL_T x[X_N];

    REAL_T traceX = 0.0;
    REAL_T traceX2 = 0.0;
    REAL_T *X_value = (REAL_T *) X->value;
    REAL_T *X2_value = (REAL_T *) X2->value;

    double *trace = bml_allocate_memory(sizeof(double) * 2);

    memset(ix, 0, X_N * sizeof(int));
    memset(jx, 0, X_N * sizeof(int));
    memset(x, 0.0, X_N * sizeof(REAL_T));

#pragma omp parallel for \
    default(none) \
    firstprivate(ix, jx, x) \
    shared(X_N, X_M, X_index, X_nnz, X_value) \
    shared(X2_N, X2_M, X2_index, X2_nnz, X2_value) \
    reduction(+: traceX, traceX2)
    for (int i = 0; i < X_N; i++)       // CALCULATES THRESHOLDED X^2
    {
        int l = 0;
        for (int jp = 0; jp < X_nnz[i]; jp++)
        {
            REAL_T a = X_value[ROWMAJOR(i, jp, X_N, X_M)];
            int j = X_index[ROWMAJOR(i, jp, X_N, X_M)];
            if (j == i)
            {
                traceX = traceX + a;
            }
            for (int kp = 0; kp < X_nnz[j]; kp++)
            {
                int k = X_index[ROWMAJOR(j, kp, X_N, X_M)];
                if (ix[k] == 0)
                {
                    x[k] = 0.0;
                    //X2_index[ROWMAJOR(i, l, N, M)] = k;
                    jx[l] = k;
                    ix[k] = i + 1;
                    l++;
                }
                // TEMPORARY STORAGE VECTOR LENGTH FULL N
                x[k] = x[k] + a * X_value[ROWMAJOR(j, kp, X_N, X_M)];
            }
        }

        // Check for number of non-zeroes per row exceeded
        if (l > X2_M)
        {
            LOG_ERROR("Number of non-zeroes per row > M, Increase M\n");
        }

        int ll = 0;
        for (int j = 0; j < l; j++)
        {
            //int jp = X2_index[ROWMAJOR(i, j, N, M)];
            int jp = jx[j];
            REAL_T xtmp = x[jp];
            // The diagonal elements are stored in the first column
            if (jp == i)
            {
                traceX2 = traceX2 + xtmp;
                X2_value[ROWMAJOR(i, ll, X2_N, X2_M)] = xtmp;
                X2_index[ROWMAJOR(i, ll, X2_N, X2_M)] = jp;
                ll++;
            }
            else if (is_above_threshold(xtmp, threshold))
            {
                X2_value[ROWMAJOR(i, ll, X2_N, X2_M)] = xtmp;
                X2_index[ROWMAJOR(i, ll, X2_N, X2_M)] = jp;
                ll++;
            }
            ix[jp] = 0;
            x[jp] = 0.0;
        }
        X2_nnz[i] = ll;
    }

    trace[0] = traceX;
    trace[1] = traceX2;

    return trace;
}
Ejemplo n.º 10
0
/** Matrix multiply with threshold adjustment.
 *
 * \f$ C \leftarrow B \, A \f$
 *
 * \ingroup multiply_group
 *
 * \param A Matrix A
 * \param B Matrix B
 * \param C Matrix C
 * \param threshold Used for sparse multiply
 */
void TYPED_FUNC(
    bml_multiply_adjust_AB_ellpack) (
    const bml_matrix_ellpack_t * A,
    const bml_matrix_ellpack_t * B,
    bml_matrix_ellpack_t * C,
    const double threshold)
{
    int A_N = A->N;
    int A_M = A->M;
    int *A_nnz = A->nnz;
    int *A_index = A->index;

    int B_N = B->N;
    int B_M = B->M;
    int *B_nnz = B->nnz;
    int *B_index = B->index;

    int C_N = C->N;
    int C_M = C->M;
    int *C_nnz = C->nnz;
    int *C_index = C->index;

    int ix[C->N], jx[C->N];
    int aflag = 1;
    REAL_T x[C->N];

    REAL_T *A_value = (REAL_T *) A->value;
    REAL_T *B_value = (REAL_T *) B->value;
    REAL_T *C_value = (REAL_T *) C->value;

    REAL_T adjust_threshold = (REAL_T) threshold;

    memset(ix, 0, C->N * sizeof(int));
    memset(jx, 0, C->N * sizeof(int));
    memset(x, 0.0, C->N * sizeof(REAL_T));

    while (aflag > 0)
    {
        aflag = 0;

#pragma omp parallel for \
    default(none) \
    firstprivate(ix, jx, x) \
    shared(A_N, A_M, A_nnz, A_index, A_value) \
    shared(B_N, B_M, B_nnz, B_index, B_value) \
    shared(C_N, C_M, C_nnz, C_index, C_value) \
    shared(adjust_threshold) \
    reduction(+:aflag)
        for (int i = 0; i < A_N; i++)
        {
            int l = 0;
            for (int jp = 0; jp < A_nnz[i]; jp++)
            {
                REAL_T a = A_value[ROWMAJOR(i, jp, A_N, A_M)];
                int j = A_index[ROWMAJOR(i, jp, A_N, A_M)];

                for (int kp = 0; kp < B_nnz[j]; kp++)
                {
                    int k = B_index[ROWMAJOR(j, kp, B_N, B_M)];
                    if (ix[k] == 0)
                    {
                        x[k] = 0.0;
                        jx[l] = k;
                        ix[k] = i + 1;
                        l++;
                    }
                    // TEMPORARY STORAGE VECTOR LENGTH FULL N
                    x[k] = x[k] + a * B_value[ROWMAJOR(j, kp, B_N, B_M)];
                }
            }

            // Check for number of non-zeroes per row exceeded
            // Need to adjust threshold
            if (l > C_M)
            {
                aflag = 1;
            }

            int ll = 0;
            for (int j = 0; j < l; j++)
            {
                //int jp = C_index[ROWMAJOR(i, j, N, M)];
                int jp = jx[j];
                REAL_T xtmp = x[jp];
                // Diagonal elements are saved in first column
                if (jp == i)
                {
                    C_value[ROWMAJOR(i, ll, C_N, C_M)] = xtmp;
                    C_index[ROWMAJOR(i, ll, C_N, C_M)] = jp;
                    ll++;
                }
                else if (is_above_threshold(xtmp, adjust_threshold))
                {
                    C_value[ROWMAJOR(i, ll, C_N, C_M)] = xtmp;
                    C_index[ROWMAJOR(i, ll, C_N, C_M)] = jp;
                    ll++;
                }
                ix[jp] = 0;
                x[jp] = 0.0;
            }
            C_nnz[i] = ll;
        }

        adjust_threshold *= (REAL_T) 2.0;
    }
}