Пример #1
0
void
erts_thr_progress_init(int no_schedulers, int managed, int unmanaged)
{
    int i, j, um_low, um_high;
    char *ptr;
    size_t cb_sz, intrnl_sz, thr_arr_sz, m_wakeup_size, um_wakeup_size,
	tot_size;

    intrnl_sz = sizeof(ErtsThrPrgrInternalData);
    intrnl_sz = ERTS_ALC_CACHE_LINE_ALIGN_SIZE(intrnl_sz);

    cb_sz = sizeof(ErtsThrPrgrCallbacks)*(managed+unmanaged);
    cb_sz = ERTS_ALC_CACHE_LINE_ALIGN_SIZE(cb_sz);

    thr_arr_sz = sizeof(ErtsThrPrgrArray)*managed;
    ASSERT(thr_arr_sz == ERTS_ALC_CACHE_LINE_ALIGN_SIZE(thr_arr_sz));

    m_wakeup_size = sizeof(ErtsThrPrgrManagedWakeupData);
    m_wakeup_size += (managed - 1)*sizeof(int);
    m_wakeup_size = ERTS_ALC_CACHE_LINE_ALIGN_SIZE(m_wakeup_size);

    um_low = (unmanaged - 1)/ERTS_THR_PRGR_BM_BITS + 1;
    um_high = (um_low - 1)/ERTS_THR_PRGR_BM_BITS + 1;

    um_wakeup_size = sizeof(ErtsThrPrgrUnmanagedWakeupData);
    um_wakeup_size += (um_high + um_low)*sizeof(erts_atomic32_t);
    um_wakeup_size = ERTS_ALC_CACHE_LINE_ALIGN_SIZE(um_wakeup_size);

    tot_size = intrnl_sz;
    tot_size += cb_sz;
    tot_size += thr_arr_sz;
    tot_size += m_wakeup_size*ERTS_THR_PRGR_WAKEUP_DATA_SIZE;
    tot_size += um_wakeup_size*ERTS_THR_PRGR_WAKEUP_DATA_SIZE;

    ptr = erts_alloc_permanent_cache_aligned(ERTS_ALC_T_THR_PRGR_IDATA,
					     tot_size);

    intrnl = (ErtsThrPrgrInternalData *) ptr;
    ptr += intrnl_sz;

    erts_atomic32_init_nob(&intrnl->misc.data.lflgs,
			   ERTS_THR_PRGR_LFLG_NO_LEADER);
    erts_atomic32_init_nob(&intrnl->misc.data.block_count,
			   (ERTS_THR_PRGR_BC_FLG_NOT_BLOCKING
			    | (erts_aint32_t) managed));
    erts_atomic_init_nob(&intrnl->misc.data.blocker_event, ERTS_AINT_NULL);
    erts_atomic32_init_nob(&intrnl->misc.data.pref_wakeup_used, 0);
    erts_atomic32_init_nob(&intrnl->misc.data.managed_count, 0);
    erts_atomic32_init_nob(&intrnl->misc.data.managed_id, no_schedulers);
    erts_atomic32_init_nob(&intrnl->misc.data.unmanaged_id, -1);
    intrnl->misc.data.chk_next_ix = 0;
    intrnl->misc.data.umrefc_ix.waiting = -1;
    erts_atomic32_init_nob(&intrnl->misc.data.umrefc_ix.current, 0);

    erts_atomic_init_nob(&intrnl->umrefc[0].refc, (erts_aint_t) 0);
    erts_atomic_init_nob(&intrnl->umrefc[1].refc, (erts_aint_t) 0);

    intrnl->thr = (ErtsThrPrgrArray *) ptr;
    ptr += thr_arr_sz;
    for (i = 0; i < managed; i++)
	init_nob(&intrnl->thr[i].data.current, 0);

    intrnl->managed.callbacks = (ErtsThrPrgrCallbacks *) ptr;
    intrnl->unmanaged.callbacks = &intrnl->managed.callbacks[managed];
    ptr += cb_sz;

    intrnl->managed.no = managed;
    for (i = 0; i < managed; i++) {
	intrnl->managed.callbacks[i].arg = NULL;
	intrnl->managed.callbacks[i].wakeup = NULL;
    }

    intrnl->unmanaged.no = unmanaged;
    for (i = 0; i < unmanaged; i++) {
	intrnl->unmanaged.callbacks[i].arg = NULL;
	intrnl->unmanaged.callbacks[i].wakeup = NULL;
    }

    for (i = 0; i < ERTS_THR_PRGR_WAKEUP_DATA_SIZE; i++) {
	intrnl->managed.data[i] = (ErtsThrPrgrManagedWakeupData *) ptr;
	erts_atomic32_init_nob(&intrnl->managed.data[i]->len, 0);
	ptr += m_wakeup_size;
    }

    for (i = 0; i < ERTS_THR_PRGR_WAKEUP_DATA_SIZE; i++) {
	erts_atomic32_t *bm;
	intrnl->unmanaged.data[i] = (ErtsThrPrgrUnmanagedWakeupData *) ptr;
	erts_atomic32_init_nob(&intrnl->unmanaged.data[i]->len, 0);
	bm = (erts_atomic32_t *) (ptr + sizeof(ErtsThrPrgrUnmanagedWakeupData));
	intrnl->unmanaged.data[i]->high = bm;
	intrnl->unmanaged.data[i]->high_sz = um_high;
	for (j = 0; j < um_high; j++)
	    erts_atomic32_init_nob(&intrnl->unmanaged.data[i]->high[j], 0);
	intrnl->unmanaged.data[i]->low
	    = &intrnl->unmanaged.data[i]->high[um_high];	
	intrnl->unmanaged.data[i]->low_sz = um_low;
	for (j = 0; j < um_low; j++)
	    erts_atomic32_init_nob(&intrnl->unmanaged.data[i]->low[j], 0);
	ptr += um_wakeup_size;
    }
    ERTS_THR_MEMORY_BARRIER;
}
Пример #2
0
erts_sspa_data_t *
erts_sspa_create(size_t blk_sz, int pa_size)
{
    erts_sspa_data_t *data;
    size_t tot_size;
    size_t chunk_mem_size;
    char *p;
    char *chunk_start;
    int cix;
    int no_blocks = pa_size;
    int no_blocks_per_chunk;

    if (erts_no_schedulers == 1)
	no_blocks_per_chunk = no_blocks;
    else {
	int extra = (no_blocks - 1)/4 + 1;
	if (extra == 0)
	    extra = 1;
	no_blocks_per_chunk = no_blocks;
	no_blocks_per_chunk += extra*erts_no_schedulers;
	no_blocks_per_chunk /= erts_no_schedulers;
    }
    no_blocks = no_blocks_per_chunk * erts_no_schedulers;
    chunk_mem_size = ERTS_ALC_CACHE_LINE_ALIGN_SIZE(sizeof(erts_sspa_chunk_header_t));
    chunk_mem_size += blk_sz * no_blocks_per_chunk;
    chunk_mem_size = ERTS_ALC_CACHE_LINE_ALIGN_SIZE(chunk_mem_size);
    tot_size = ERTS_ALC_CACHE_LINE_ALIGN_SIZE(sizeof(erts_sspa_data_t));
    tot_size += chunk_mem_size*erts_no_schedulers;

    p = erts_alloc_permanent_cache_aligned(ERTS_ALC_T_PRE_ALLOC_DATA, tot_size);
    data = (erts_sspa_data_t *) p;
    p += ERTS_ALC_CACHE_LINE_ALIGN_SIZE(sizeof(erts_sspa_data_t));
    chunk_start = p;

    data->chunks_mem_size = chunk_mem_size;
    data->start = chunk_start;
    data->end = chunk_start + chunk_mem_size*erts_no_schedulers;

    /* Initialize all chunks */
    for (cix = 0; cix < erts_no_schedulers; cix++) {
	erts_sspa_chunk_t *chnk = erts_sspa_cix2chunk(data, cix);
	erts_sspa_chunk_header_t *chdr = &chnk->aligned.header;
	erts_sspa_blk_t *blk;
	int i;

	erts_atomic_init_nob(&chdr->tail.data.last, (erts_aint_t) &chdr->tail.data.marker);
	erts_atomic_init_nob(&chdr->tail.data.marker.next_atmc, ERTS_AINT_NULL);
	erts_atomic_init_nob(&chdr->tail.data.um_refc[0], 0);
	erts_atomic_init_nob(&chdr->tail.data.um_refc[1], 0);
	erts_atomic32_init_nob(&chdr->tail.data.um_refc_ix, 0);

	chdr->head.no_thr_progress_check = 0;
	chdr->head.used_marker = 1;
	chdr->head.first = &chdr->tail.data.marker;
	chdr->head.unref_end = &chdr->tail.data.marker;
	chdr->head.next.thr_progress = erts_thr_progress_current();
	chdr->head.next.thr_progress_reached = 1;
	chdr->head.next.um_refc_ix = 1;
	chdr->head.next.unref_end = &chdr->tail.data.marker;

	p = &chnk->data[0];
	chdr->local.first = (erts_sspa_blk_t *) p;
	blk = (erts_sspa_blk_t *) p;
	for (i = 0; i < no_blocks_per_chunk; i++) {
	    blk = (erts_sspa_blk_t *) p;
	    p += blk_sz;
	    blk->next_ptr = (erts_sspa_blk_t *) p;
	}

	blk->next_ptr = NULL;
	chdr->local.last = blk;
	chdr->local.cnt = no_blocks_per_chunk;
	chdr->local.lim = no_blocks_per_chunk / 3;

	ERTS_SSPA_DBG_CHK_LCL(chdr);
    }

    return data;
}
erts_sspa_data_t *
erts_sspa_create(size_t blk_sz, int pa_size, int nthreads, const char* name)
{
    erts_sspa_data_t *data;
    size_t tot_size;
    size_t chunk_mem_size;
    char *p;
    char *chunk_start;
    int cix;
    int no_blocks = pa_size;
    int no_blocks_per_chunk;
    size_t aligned_blk_sz;

#if !defined(ERTS_STRUCTURE_ALIGNED_ALLOC)
    /* Force 64-bit alignment... */
    aligned_blk_sz = ((blk_sz - 1) / 8) * 8 + 8;
#else
    /* Alignment of structure is enough... */
    aligned_blk_sz = blk_sz;
#endif

    if (!name) { /* schedulers only variant */
        ASSERT(!nthreads);
        nthreads = erts_no_schedulers;
    }
    else {
        ASSERT(nthreads > 0);
    }

    if (nthreads == 1)
	no_blocks_per_chunk = no_blocks;
    else {
	int extra = (no_blocks - 1)/4 + 1;
	if (extra == 0)
	    extra = 1;
	no_blocks_per_chunk = no_blocks;
	no_blocks_per_chunk += extra * nthreads;
	no_blocks_per_chunk /= nthreads;
    }
    no_blocks = no_blocks_per_chunk * nthreads;
    chunk_mem_size = ERTS_ALC_CACHE_LINE_ALIGN_SIZE(sizeof(erts_sspa_chunk_header_t));
    chunk_mem_size += aligned_blk_sz * no_blocks_per_chunk;
    chunk_mem_size = ERTS_ALC_CACHE_LINE_ALIGN_SIZE(chunk_mem_size);
    tot_size = ERTS_ALC_CACHE_LINE_ALIGN_SIZE(sizeof(erts_sspa_data_t));
    tot_size += chunk_mem_size * nthreads;

    p = erts_alloc_permanent_cache_aligned(ERTS_ALC_T_PRE_ALLOC_DATA, tot_size);
    data = (erts_sspa_data_t *) p;
    p += ERTS_ALC_CACHE_LINE_ALIGN_SIZE(sizeof(erts_sspa_data_t));
    chunk_start = p;

    data->chunks_mem_size = chunk_mem_size;
    data->start = chunk_start;
    data->end = chunk_start + chunk_mem_size * nthreads;
    data->nthreads = nthreads;

    if (name) { /* thread variant */
        erts_tsd_key_create(&data->tsd_key, (char*)name);
        erts_atomic_init_nob(&data->id_generator, 0);
    }

    /* Initialize all chunks */
    for (cix = 0; cix < nthreads; cix++) {
	erts_sspa_chunk_t *chnk = erts_sspa_cix2chunk(data, cix);
	erts_sspa_chunk_header_t *chdr = &chnk->aligned.header;
	erts_sspa_blk_t *blk;
	int i;

	erts_atomic_init_nob(&chdr->tail.data.last, (erts_aint_t) &chdr->tail.data.marker);
	erts_atomic_init_nob(&chdr->tail.data.marker.next_atmc, ERTS_AINT_NULL);
	erts_atomic_init_nob(&chdr->tail.data.um_refc[0], 0);
	erts_atomic_init_nob(&chdr->tail.data.um_refc[1], 0);
	erts_atomic32_init_nob(&chdr->tail.data.um_refc_ix, 0);

	chdr->head.no_thr_progress_check = 0;
	chdr->head.used_marker = 1;
	chdr->head.first = &chdr->tail.data.marker;
	chdr->head.unref_end = &chdr->tail.data.marker;
	chdr->head.next.thr_progress = erts_thr_progress_current();
	chdr->head.next.thr_progress_reached = 1;
	chdr->head.next.um_refc_ix = 1;
	chdr->head.next.unref_end = &chdr->tail.data.marker;

	p = &chnk->data[0];
	chdr->local.first = (erts_sspa_blk_t *) p;
	blk = (erts_sspa_blk_t *) p;
	for (i = 0; i < no_blocks_per_chunk; i++) {
	    blk = (erts_sspa_blk_t *) p;
	    p += aligned_blk_sz;
	    blk->next_ptr = (erts_sspa_blk_t *) p;
	}

	blk->next_ptr = NULL;
	chdr->local.last = blk;
	chdr->local.cnt = no_blocks_per_chunk;
	chdr->local.lim = no_blocks_per_chunk / 3;

	ERTS_SSPA_DBG_CHK_LCL(chdr);
    }

    return data;
}
Пример #4
0
void
erts_init_async(void)
{
    async = NULL;
    if (erts_async_max_threads > 0) {
#if ERTS_USE_ASYNC_READY_Q
	ErtsThrQInit_t qinit = ERTS_THR_Q_INIT_DEFAULT;
#endif
	erts_thr_opts_t thr_opts = ERTS_THR_OPTS_DEFAULT_INITER;
	char *ptr, thr_name[16];
	size_t tot_size = 0;
	int i;

	tot_size += ERTS_ALC_CACHE_LINE_ALIGN_SIZE(sizeof(ErtsAsyncData));
	tot_size += sizeof(ErtsAlgndAsyncQ)*erts_async_max_threads;
#if ERTS_USE_ASYNC_READY_Q
	tot_size += sizeof(ErtsAlgndAsyncReadyQ)*erts_no_schedulers;
#endif

	ptr = erts_alloc_permanent_cache_aligned(ERTS_ALC_T_ASYNC_DATA,
						 tot_size);

	async = (ErtsAsyncData *) ptr;
	ptr += ERTS_ALC_CACHE_LINE_ALIGN_SIZE(sizeof(ErtsAsyncData));

	async->init.data.no_initialized = 0;
        erts_mtx_init(&async->init.data.mtx, "async_init_mtx", NIL,
            ERTS_LOCK_FLAGS_CATEGORY_SCHEDULER);
	erts_cnd_init(&async->init.data.cnd);
	erts_atomic_init_nob(&async->init.data.id, 0);

	async->queue = (ErtsAlgndAsyncQ *) ptr;
	ptr += sizeof(ErtsAlgndAsyncQ)*erts_async_max_threads;

#if ERTS_USE_ASYNC_READY_Q

	qinit.live.queue = ERTS_THR_Q_LIVE_LONG;
	qinit.live.objects = ERTS_THR_Q_LIVE_SHORT;
	qinit.notify = erts_notify_check_async_ready_queue;

	async->ready_queue = (ErtsAlgndAsyncReadyQ *) ptr;
	ptr += sizeof(ErtsAlgndAsyncReadyQ)*erts_no_schedulers;

	for (i = 1; i <= erts_no_schedulers; i++) {
	    ErtsAsyncReadyQ *arq = async_ready_q(i);
#if ERTS_USE_ASYNC_READY_ENQ_MTX
            erts_mtx_init(&arq->x.data.enq_mtx, "async_enq_mtx", make_small(i),
                ERTS_LOCK_FLAGS_PROPERTY_STATIC | ERTS_LOCK_FLAGS_CATEGORY_SCHEDULER);
#endif
	    erts_thr_q_finalize_dequeue_state_init(&arq->fin_deq);
	    qinit.arg = (void *) (SWord) i;
	    erts_thr_q_initialize(&arq->thr_q, &qinit);
	}

#endif

	/* Create async threads... */

	thr_opts.detached = 0;
	thr_opts.suggested_stack_size
	    = erts_async_thread_suggested_stack_size;

	thr_opts.name = thr_name;

	for (i = 0; i < erts_async_max_threads; i++) {
	    ErtsAsyncQ *aq = async_q(i);

            erts_snprintf(thr_opts.name, 16, "async_%d", i+1);

	    erts_thr_create(&aq->thr_id, async_main, (void*) aq, &thr_opts);
	}

	/* Wait for async threads to initialize... */

	erts_mtx_lock(&async->init.data.mtx);
	while (async->init.data.no_initialized != erts_async_max_threads)
	    erts_cnd_wait(&async->init.data.cnd, &async->init.data.mtx);
	erts_mtx_unlock(&async->init.data.mtx);

	erts_mtx_destroy(&async->init.data.mtx);
	erts_cnd_destroy(&async->init.data.cnd);

    }
}