Пример #1
0
/* internal function */
void update_progress_thread (struct fi_bgq_progress * progress) {


	struct l2atomic_fifo_consumer * consumer = &progress->consumer;
	uint64_t value_rsh3b = 0;

	/* Check if another endpoint should be managed by this progress thread
	 */
	if (l2atomic_fifo_consume(consumer, &value_rsh3b) == 0) {
		struct fi_bgq_ep *bgq_ep = (struct fi_bgq_ep *)(value_rsh3b << 3);

		assert(L2_AtomicLoad(&bgq_ep->async.enabled) != 0);
		assert(L2_AtomicLoad(&bgq_ep->async.active) == 0);

		progress->all_ep[(progress->all_ep_count)++] = bgq_ep;

		if (bgq_ep->rx.caps & FI_TAGGED) {
			progress->tag_ep[(progress->tag_ep_count)++] = bgq_ep;
		}
		if (bgq_ep->rx.caps & FI_MSG) {
			progress->msg_ep[(progress->msg_ep_count)++] = bgq_ep;
		}
		L2_AtomicStore(&bgq_ep->async.active, 1);
	}

	/*
	 * Advance control code path for each endpoint once and check
	 * each endpoint if async progress is disabled
	 */
	unsigned i = 0;
	while (i < progress->all_ep_count) {

		struct fi_bgq_ep *bgq_ep = progress->all_ep[i];
		poll_cfifo(bgq_ep, 0);

		if (L2_AtomicLoad(&bgq_ep->async.enabled) == 0) {
			L2_AtomicStore(&bgq_ep->async.active, 0);

			if (bgq_ep->rx.caps & FI_MSG) {
				unsigned n = 0;
				while (progress->msg_ep[n] != bgq_ep) ++n;
				progress->msg_ep[n] = progress->msg_ep[--(progress->msg_ep_count)];
			}

			if (bgq_ep->rx.caps & FI_TAGGED) {
				unsigned n = 0;
				while (progress->tag_ep[n] != bgq_ep) ++n;
				progress->tag_ep[n] = progress->tag_ep[--(progress->tag_ep_count)];
			}

			progress->all_ep[i] = progress->all_ep[--(progress->all_ep_count)];
		} else {
			++i;
		}
	}

	return;
}
Пример #2
0
/*!
 * \brief Initializes speculation registers before the start of a job.
 */
int Speculation_Init()
{
    int slice;
    uint64_t scrub_rate;
    L2C_SPECID_t specid;
    
    if(TI_isDD1() || ((GetPersonality()->Kernel_Config.NodeConfig & PERS_ENABLE_DD1_Workarounds) != 0))
    {
    }
    else
    {
        SPEC_SetNumberOfDomains(1);
        SPEC_SetPrivMap(
            L2C_PRIVMAP_DISABLEWRITEFNC(L2C_PRIVMAP_FUNC_NUMDOM)  | 
            L2C_PRIVMAP_DISABLEWRITEFNC(L2C_PRIVMAP_FUNC_PRIVMAP) 
            );
        ppc_msync();

        for(specid=0; specid<128; specid++)
        {
            SPEC_TryChangeState_priv(specid, L2C_IDSTATE_PRED_SPEC | L2C_IDSTATE_INVAL);  
            SPEC_SetConflict_priv(specid, 0);
        }
        ppc_msync();
    }
    App_GetEnvValue("BG_SIMPLEROLLBACK", &SIMPLE_ROLLBACK);
    L2_AtomicStore(&SpecDomainsAllocated, 0);
    domainsConfigured = 0;
    
    // Reset the L2 scrub rate
    scrub_rate = 64;
    
    for(slice=0; slice<L2_DCR_num; slice++)
    {
        // Set the L2 scrub rate
        uint64_t l2_dcr_refctrl = DCRReadPriv(L2_DCR(slice, REFCTRL));
        if(default_l2_first_init)
            default_l2_scrub_rate[slice] = L2_DCR__REFCTRL__SCB_INTERVAL_get(l2_dcr_refctrl);
        L2_DCR__REFCTRL__SCB_INTERVAL_insert(l2_dcr_refctrl, default_l2_scrub_rate[slice]);
        DCRWritePriv(L2_DCR(slice, REFCTRL), l2_dcr_refctrl);
    }
    default_l2_first_init = 1;
    
    Speculation_ExitJailMode();
    return 0;
}
int main(int argc, char * argv[])
{
    const int n = 1024;
    int count = (argc>1) ? atoi(argv[1]) : 1000000;

    /* this "activates" the L2 atomic data structures */
    uint64_t * l2_counters = NULL;
    int rc = posix_memalign((void**)&l2_counters, 2*1024*1024, n * sizeof(uint64_t) ); 
    assert(rc==0 && l2_counters != NULL);
    uint64_t rc64 = Kernel_L2AtomicsAllocate(l2_counters, n * sizeof(uint64_t) );
    assert(rc64==0);

    for (int i=0; i<n; i++) {
        L2_AtomicStore(&(l2_counters[i]), 0);
    }

    #pragma omp parallel shared(l2_counters)
    {
        int me = omp_get_thread_num();
        int jmax = n/omp_get_num_threads();
        for (int j=0; j<jmax; j++) {
            #pragma omp barrier
            uint64_t t0 = GetTimeBase();
            for (int i=0; i<count; i++) {
                L2_AtomicLoadIncrement(&(l2_counters[j*me]));
            }
            #pragma omp barrier
            uint64_t t1 = GetTimeBase();
            printf("threads = %d, stride = %d, ticks = %llu \n",
                   omp_get_num_threads(), j, t1-t0);
            fflush(stdout);
        }
    }

    for (int i=0; i<n; i++) {
        uint64_t rval = L2_AtomicLoad(&(l2_counters[i]));
        printf("l2_counter[%d]=%llu\n", i, rval);
    }

    return 0;   
}
Пример #4
0
int main(int argc, char * argv[])
{
    num_threads = (argc>1) ? atoi(argv[1]) : 1;
    printf("L2 counter test using %d threads \n", num_threads );

    //printf("sizeof(BGQ_Atomic64_t) = %zu \n", sizeof(BGQ_Atomic64_t) );

    /* this "activates" the L2 atomic data structures */
    Kernel_L2AtomicsAllocate(&counter, sizeof(BGQ_Atomic64_t) );

    L2_AtomicStore(&(counter.atom), 0);
    out64_sync(&(counter.atom), 0);

    pool = (pthread_t *) malloc( num_threads * sizeof(pthread_t) );
    assert(pool!=NULL);

    /**************************************************/

    for (int i=0; i<num_threads; i++) {
        int rc = pthread_create(&(pool[i]), NULL, &fight, NULL);
        if (rc!=0) {
            printf("pthread error \n");
            fflush(stdout);
            sleep(1);
        }
        assert(rc==0);
    }

    if (debug) {
        printf("threads created \n");
        fflush(stdout);
    }

    for (int i=0; i<num_threads; i++) {
        void * junk;
        int rc = pthread_join(pool[i], &junk);
        if (rc!=0) {
            printf("pthread error \n");
            fflush(stdout);
            sleep(1);
        }
        assert(rc==0);
    }
    
    if (debug) {
        printf("threads joined \n");
        fflush(stdout);
    }

    uint64_t rval = L2_AtomicLoad(&(counter.atom));
    printf("final value of counter is %llu \n", rval);

    /**************************************************/

    for (int i=0; i<num_threads; i++) {
        int rc = pthread_create(&(pool[i]), NULL, &slowfight, NULL);
        if (rc!=0) {
            printf("pthread error \n");
            fflush(stdout);
            sleep(1);
        }
        assert(rc==0);
    }

    printf("threads created \n");
    fflush(stdout);

    for (int i=0; i<num_threads; i++) {
        void * junk;
        int rc = pthread_join(pool[i], &junk);
        if (rc!=0) {
            printf("pthread error \n");
            fflush(stdout);
            sleep(1);
        }
        assert(rc==0);
    }
    
    printf("threads joined \n");
    fflush(stdout);

    rval = in64(&(slowcounter.atom));
    printf("final value of slowcounter is %llu \n", rval);

    /**************************************************/

    free(pool);
 
    return 0;   
}
Пример #5
0
/* internal function */
void * progress_fn (void *arg) {

	struct fi_bgq_progress * progress = (struct fi_bgq_progress *)arg;

	struct fi_bgq_ep ** tag_ep = progress->tag_ep;
	struct fi_bgq_ep ** msg_ep = progress->msg_ep;
	struct fi_bgq_ep ** all_ep = progress->all_ep;

	struct l2atomic_fifo_consumer * consumer = &progress->consumer;
	struct l2atomic_fifo_producer * producer = &progress->producer;
	uint64_t value_rsh3b = 0;

	const unsigned tag_loop = 16;
	const unsigned msg_loop = 4;

	unsigned m, j, i;

	/* first, enable the progress thread control fifo by setting the
	 * HEAD and TAIL to zero and setting the BOUNDS to FIFO_SIZE-1
	 */
	l2atomic_fifo_enable(consumer, producer);


	progress->active = 1;
	fi_bgq_msync(FI_BGQ_MSYNC_TYPE_WO);

	while (progress->enabled) {

		/* Advance performance critical code path for each endpoint multiple times */

		const unsigned tag_ep_count = progress->tag_ep_count;
		const unsigned msg_ep_count = progress->msg_ep_count;

		for (m=0; m<msg_loop; ++m) {
			for (j=0; j<tag_loop; ++j) {
				for (i=0; i<tag_ep_count; ++i) {
					poll_mfifo(tag_ep[i], 0, 0, 0);
					poll_rfifo(tag_ep[i], 0);
				}
			}
			for (i=0; i<msg_ep_count; ++i) {
				poll_noinline(msg_ep[i], 1, 0);
			}
		}

		update_progress_thread(progress);		
	}

	/*
	 * This progress thread has been disabled. Before setting the thread to inactive:
	 * 1. disable the progress thread control fifo by setting the BOUNDS to zero
	 * 2. drain the progress thread control fifo of endpoints
	 * 3. Attempt to transfer any endpoints managed by this progress thread to another progress thread
	 * 4. If no active progress threads, disable and deactivate remaining endpoints
	 */
	l2atomic_fifo_disable(consumer, producer);

	while (0 == l2atomic_fifo_drain(consumer, producer, &value_rsh3b)) {
		progress->all_ep[(progress->all_ep_count)++] = (struct fi_bgq_ep *)(value_rsh3b << 3);
	}

	struct fi_bgq_domain *bgq_domain = progress->bgq_domain;
	const unsigned max_threads = bgq_domain->progress.max_threads;

	for (i=0; i<progress->all_ep_count; ++i) {

		value_rsh3b = ((uint64_t)(all_ep[i])) >> 3;

		unsigned p;
		for (p=0; p<max_threads; ++p) {
			if (0 == l2atomic_fifo_produce(&bgq_domain->progress.thread[p].producer, value_rsh3b)) {
				all_ep[i] = NULL;
				break;
			}
		}

		if (all_ep[i] != NULL) {
			/* No active progress threads; disable async progress on this endpoint */
			L2_AtomicStore(&all_ep[i]->async.enabled, 0);
			L2_AtomicStore(&all_ep[i]->async.active, 0);
			all_ep[i] = NULL;
			/* TODO - is this an error or something? */
		}
	}

	/* Deactivate this progress thread and exit */
	progress->active = 0;
	fi_bgq_msync(FI_BGQ_MSYNC_TYPE_WO);

	return NULL;
};