示例#1
0
/***************************************************************************//**
 *  Main thread control
 **/
void *plasma_parallel_section(void *plasma_ptr)
{
    plasma_context_t *plasma = (plasma_context_t*)(plasma_ptr);
    PLASMA_enum action;

    /* Set thread affinity for the worker */
    plasma_setaffinity(plasma->thread_bind[plasma_rank(plasma)]);

    plasma_barrier(plasma);
    while(1) {
        pthread_mutex_lock(&plasma->action_mutex);
        while ((action = plasma->action) == PLASMA_ACT_STAND_BY)
            pthread_cond_wait(&plasma->action_condt, &plasma->action_mutex);
        pthread_mutex_unlock(&plasma->action_mutex);
        plasma_barrier(plasma);

        switch (action) {
            case PLASMA_ACT_PARALLEL:
                plasma->parallel_func_ptr(plasma);
                break;
            case PLASMA_ACT_DYNAMIC:
                QUARK_Worker_Loop(plasma->quark, plasma_rank(plasma));
                break;
            case PLASMA_ACT_FINALIZE:
                return NULL;
            default:
                plasma_fatal_error("plasma_parallel_section", "undefined action");
                return NULL;
        }
        plasma_barrier(plasma);
    }

    plasma_unsetaffinity();
    return NULL;
}
示例#2
0
/** ****************************************************************************
 *
 * @ingroup InPlaceTransformation
 *
 * plasma_pzpack pack all extra elements at the end of the matrix
 *
 *      +---------------+
 *      |               |
 *      |               |
 *      |     A11       |
 *      |               |
 *      |               |
 *      +---------------+
 *      |     A21       |
 *      +---------------+
 *
 * This matrix is initially stored as (example of Column Major, it's
 * the same for row major. We just consider the transpose matrix) :
 *  A11(:,0), A21(:,0), A11(:,1), A21(:,1), ... 
 *
 * On exit, it's stored as follow.
 *  A11(:,:), A12(:,:)
 *******************************************************************************
 *
 * @param[in] plasma
 *         Plasma context
 *
 * @param[in] m
 *         Number of rows in matrix A
 *
 * @param[in] n
 *         Number of columns in matrix A
 *
 * @param[in,out] A
 *         Matrix A to pack. (see above for entry and exit format)
 *
 * @param[in] m0
 *         Number of rows of A21
 *
 ******************************************************************************/
void plasma_pzpack(plasma_context_t *plasma)
{
    PLASMA_Complex64_t *A, *W, *Wl;
    PLASMA_sequence *sequence;
    PLASMA_request *request;
    int m, n, m0;
    int i, m1, size, rank, start, end, end2, bs, mod;

    plasma_unpack_args_6(m, n, A, m0, sequence, request);
    if (sequence->status != PLASMA_SUCCESS)
        return;

    /* Quick return */
    if ( n <= 1 )
      return;

    m1 = m - m0;

    size = PLASMA_SIZE;
    rank = PLASMA_RANK;

    mod   = (n-1) % size;
    bs    = (n-1) / size;
    start = rank * bs;
    if ( rank < mod ) {
        bs++;
    }
    start += min( mod, rank );
    end    = start+bs;

    W  = (PLASMA_Complex64_t*)plasma_private_alloc(plasma, (m0*bs), PlasmaComplexDouble);
    Wl = (PLASMA_Complex64_t*)plasma_private_alloc(plasma, m1,      PlasmaComplexDouble);

    /* Save leftover pieces that are otherwise going to be overwritten */
    CORE_zlacpy( PlasmaUpperLower, m0, bs, &(A[(int64_t)start*m+m1]), m, W, m0 );

    /* Pack A */
    end2 = ((n-1) / size) * size + 1;
    for(i=rank+1; i<end2; i+=size) {
        memcpy( Wl, &(A[i*m]), m1*sizeof(PLASMA_Complex64_t));
        plasma_barrier(plasma);
        memcpy( &(A[i*m1]), Wl, m1*sizeof(PLASMA_Complex64_t));
    }

    if ( rank < (n - end2)) {
        i = end2 + rank;
        memcpy( Wl, &(A[i*m]), m1*sizeof(PLASMA_Complex64_t));
        plasma_barrier(plasma);
        memcpy( &(A[i*m1]), Wl, m1*sizeof(PLASMA_Complex64_t));
    }
    else
        plasma_barrier(plasma);

    /* Restore leftover pieces */
    CORE_zlacpy( PlasmaUpperLower, m0, bs, W, m0, &(A[(int64_t)m1*n+start*m0]), m0 );

    plasma_private_free(plasma, W);
    plasma_private_free(plasma, Wl);
}
示例#3
0
/***************************************************************************//**
 *
 * @ingroup Auxiliary
 *
 *  PLASMA_Finalize - Finalize PLASMA.
 *
 *******************************************************************************
 *
 * @return
 *          \retval PLASMA_SUCCESS successful exit
 *
 ******************************************************************************/
int PLASMA_Finalize()
{
    int core;
    int status;
    void *exitcodep;
    plasma_context_t *plasma;

    plasma = plasma_context_self();
    if (plasma == NULL) {
        plasma_fatal_error("PLASMA_Finalize()", "PLASMA not initialized");
        return PLASMA_ERR_NOT_INITIALIZED;
    }

    /* Terminate the dynamic scheduler */
    plasma_dynamic_sync();

    /* Free quark structures */
    QUARK_Free(plasma->quark);

    /* Set termination action */
    pthread_mutex_lock(&plasma->action_mutex);
    plasma->action = PLASMA_ACT_FINALIZE;
    pthread_mutex_unlock(&plasma->action_mutex);
    pthread_cond_broadcast(&plasma->action_condt);

    /* Barrier and clear action */
    plasma_barrier(plasma);
    plasma->action = PLASMA_ACT_STAND_BY;

    // Join threads
    for (core = 1; core < plasma->world_size; core++) {
        status = pthread_join(plasma->thread_id[core], &exitcodep);
        if (status != 0) {
            plasma_fatal_error("PLASMA_Finalize", "pthread_join() failed");
            return status;
        }
    }
    plasma_barrier_finalize(plasma);
    plasma_barrier_bw_finalize(plasma);

    /* Unbind main thread */
    plasma_unsetaffinity();

    /* Destroy thread attributes */
    status = pthread_attr_destroy(&plasma->thread_attr);
    if (status != 0)
        plasma_fatal_error("PLASMA_Finalize", "pthread_attr_destroy() failed");

    /* Destroy topology */
    plasma_topology_finalize();

    status = plasma_context_remove(plasma, pthread_self());
    if (status != PLASMA_SUCCESS) {
        plasma_fatal_error("PLASMA_Finalize", "plasma_context_remove() failed");
        return status;
    }

    /* Restore the concurency */
    /* actually it's really bad, we shoulde set the concurrency only
     * if it's not already done and restore it only we had change it */
    pthread_setconcurrency( 0 );

    return PLASMA_SUCCESS;
}
示例#4
0
/***************************************************************************//**
 *
 * @ingroup Auxiliary
 *
 *  PLASMA_Init_Affinity - Initialize PLASMA.
 *
 *******************************************************************************
 *
 * @param[in] cores
 *          Number of cores to use (threads to launch).
 *          If cores = 0, cores = PLASMA_NUM_THREADS if it is set, the
 *          system number of core otherwise.
 *
 * @param[in] coresbind
 *          Array to specify where to bind each thread.
 *          Each thread i is binded to coresbind[hwloc(i)] if hwloc is
 *          provided, or to coresbind[i] otherwise.
 *          If coresbind = NULL, coresbind = PLASMA_AFF_THREADS if it
 *          is set, the identity function otherwise.
 *
 *******************************************************************************
 *
 * @return
 *          \retval PLASMA_SUCCESS successful exit
 *
 ******************************************************************************/
int PLASMA_Init_Affinity(int cores, int *coresbind)
{
    plasma_context_t *plasma;
    int status;
    int core;

    /* Create context and insert in the context map */
    plasma = plasma_context_create();
    if (plasma == NULL) {
        plasma_fatal_error("PLASMA_Init", "plasma_context_create() failed");
        return PLASMA_ERR_OUT_OF_RESOURCES;
    }
    status = plasma_context_insert(plasma, pthread_self());
    if (status != PLASMA_SUCCESS) {
        plasma_fatal_error("PLASMA_Init", "plasma_context_insert() failed");
        return PLASMA_ERR_OUT_OF_RESOURCES;
    }
    /* Init number of cores and topology */
    plasma_topology_init();

    /* Set number of cores */
    if ( cores < 1 ) {
        plasma->world_size = plasma_get_numthreads();
        if ( plasma->world_size == -1 ) {
            plasma->world_size = 1;
            plasma_warning("PLASMA_Init", "Could not find the number of cores: the thread number is set to 1");
        }
    }
    else
      plasma->world_size = cores;

    if (plasma->world_size <= 0) {
        plasma_fatal_error("PLASMA_Init", "failed to get system size");
        return PLASMA_ERR_NOT_FOUND;
    }
    /* Check if not more cores than the hard limit */
    if (plasma->world_size > CONTEXT_THREADS_MAX) {
        plasma_fatal_error("PLASMA_Init", "not supporting so many cores");
        return PLASMA_ERR_INTERNAL_LIMIT;
    }

    /* Get the size of each NUMA node */
    plasma->group_size = plasma_get_numthreads_numa();
    while ( ((plasma->world_size)%(plasma->group_size)) != 0 )
        (plasma->group_size)--;

    /* Initialize barriers */
    plasma_barrier_init(plasma);
    plasma_barrier_bw_init(plasma);

    /* Initialize default thread attributes */
    status = pthread_attr_init(&plasma->thread_attr);
    if (status != 0) {
        plasma_fatal_error("PLASMA_Init", "pthread_attr_init() failed");
        return status;
    }
    /* Set scope to system */
    status = pthread_attr_setscope(&plasma->thread_attr, PTHREAD_SCOPE_SYSTEM);
    if (status != 0) {
        plasma_fatal_error("PLASMA_Init", "pthread_attr_setscope() failed");
        return status;
    }
    /* Set concurrency */
    status = pthread_setconcurrency(plasma->world_size);
    if (status != 0) {
        plasma_fatal_error("PLASMA_Init", "pthread_setconcurrency() failed");
        return status;
    }
    /*  Launch threads */
    memset(plasma->thread_id,   0, CONTEXT_THREADS_MAX*sizeof(pthread_t));
    if (coresbind != NULL) {
        memcpy(plasma->thread_bind, coresbind, plasma->world_size*sizeof(int));
    }
    else {
        plasma_get_affthreads(plasma->thread_bind);
    }
    /* Assign rank and thread ID for the master */
    plasma->thread_rank[0] = 0;
    plasma->thread_id[0] = pthread_self();

    for (core = 1; core < plasma->world_size; core++) {
        plasma->thread_rank[core] = core;
        pthread_create(
            &plasma->thread_id[core],
            &plasma->thread_attr,
             plasma_parallel_section,
             (void*)plasma);
    }

    /* Ensure BLAS are sequential and set thread affinity for the master */
#if defined(PLASMA_WITH_MKL)
#if defined(__ICC) || defined(__INTEL_COMPILER)
    kmp_set_defaults("KMP_AFFINITY=disabled");
#endif
#endif

    /* Initialize the dynamic scheduler */
    plasma->quark =  QUARK_Setup(plasma->world_size);
    plasma_barrier(plasma);

    plasma_setlapack_sequential(plasma);

    return PLASMA_SUCCESS;
}