Пример #1
0
int main(int argc, char* argv[]) {
        if (argc != 4) {
            printf("Incorrect number of arguments, must specify number of cores, array size, number of loops\n");
            exit(-1);
        }


        num_cores = atol(argv[1]);
        if(num_cores <= 0){
                printf("Invalid number of cores\n");
                exit(-1);
	}

        arraySize = atol(argv[2]);
        if(arraySize <= 0){
                printf("Invalid array size\n");
                exit(-1);
        }

        loops = atol(argv[3]);
        if(loops < 0){
                printf("Invalid number of loops\n");
                exit(-1);
	}

    srand(time(NULL));

	debug_printf("Spanning cores\n");
	
	errval_t err;
	for (int core = 1; core < num_cores; core++) {
		err = domain_new_dispatcher(core, span_cb, NULL);

		if (err_is_fail(err)) {
			DEBUG_ERR(err, "failed span %d", core);
		} 
	}

	while (num_cores != 1 && !all_spanned) {
		thread_yield();
	}
	debug_printf("Spanning complete!\n");	

    globalArray = (float*) malloc(arraySize * sizeof(float));

	unsigned long loops_[] = {1,100,10000,1000000,100000000,1000000000};
	for(int i = 0; i < 6; i++){	    
        loops = loops_[i];
	float shared = performAveraging(averageSharedBF);
	float local = performAveraging(averageLocalBF);
   	    printf("SHARED BF (%lu): %fs\n", loops, shared);
    	printf("LOCAL BF (%lu): %fs\n", loops, local);
        printf("\n");
	}

    free(globalArray);

    return 0;
}
Пример #2
0
int main(int argc, char *argv[])
{
    errval_t err;
    if (argc != 2) {
        printf("Usage %s: <Num additional threads>\n", argv[0]);
        exit(-1);
    }


    //printf("main running on %d\n", disp_get_core_id());

    int cores = strtol(argv[1], NULL, 10) + 1;

    NPROC = cores -1;
    BARINIT(barrier, NPROC);

    uint64_t before = rdtsc();
    times[0] = before;

    trace_event(TRACE_SUBSYS_BENCH, TRACE_EVENT_BENCH_PCBENCH, 1);
    for (int i = 1; i < cores; i++) {
        err = domain_new_dispatcher(i + disp_get_core_id(),
                                    domain_spanned_callback,
                                    (void*)(uintptr_t)i);
        if (err_is_fail(err)) {
            USER_PANIC_ERR(err, "domain_new_dispatcher failed");
        }
    }

    while (ndispatchers < cores) {
        thread_yield();
    }
    uint64_t finish = rdtsc();

    trace_event(TRACE_SUBSYS_BENCH, TRACE_EVENT_BENCH_PCBENCH, 0);

    //sys_print("\nDone\n", 6);
    printf("spantest: Done in %"PRIu64" cycles\n", finish-before);

    //trace_dump();

    for(int i = 1; i < cores; i++) {
        err = domain_thread_create_on(i, remote, NULL);
        assert(err_is_ok(err));
    }

    messages_handler_loop();
    return 0;
}
Пример #3
0
/**
 * \brief initializes a thread on the given core
 *
 * \@param core         ID of the core on which to create the tread on
 * \param stack_size    size of the stack of the tread to be created
 * \param thread        pointer to the thread struct to create
 *
 * \returns SYS_ERR_OK on SUCCESS
 *          errval on FAILURE
 */
errval_t bomp_thread_init(coreid_t core,
                          size_t stack_size,
                          struct bomp_thread *thread)
{
    errval_t err;

    BOMP_DEBUG_THREAD("Creating thread on core %"PRIuCOREID " \n", core);

    uint32_t done;

    err = domain_new_dispatcher(core, bomp_thread_init_done, &done);
    if (err_is_fail(err)) {
        BOMP_ERROR("creating new dispatcher on core %" PRIuCOREID "failed\n",
                   core);
        return err;
    }

    while(!done) {
        thread_yield();
    }

    BOMP_DEBUG_THREAD("dispatcher ready. allocating memory for msg channel\n");

    size_t msg_frame_size;
    err = frame_alloc(&thread->msgframe, 2 * BOMP_CHANNEL_SIZE, &msg_frame_size);
    if (err_is_fail(err)) {
        return err;
    }

    err = vspace_map_one_frame(&thread->msgbuf, msg_frame_size, thread->msgframe,
                               NULL, NULL);
    if (err_is_fail(err)) {
        return err;
    }

    struct bomp_frameinfo fi = {
        .sendbase = (lpaddr_t)thread->msgbuf + BOMP_CHANNEL_SIZE,
        .inbuf = thread->msgbuf,
        .inbufsize = BOMP_CHANNEL_SIZE,
        .outbuf = ((uint8_t *) thread->msgbuf) + BOMP_CHANNEL_SIZE,
        .outbufsize = BOMP_CHANNEL_SIZE
    };

    BOMP_DEBUG_THREAD("creating channel on %p\n", thread->msgbuf);

    err = bomp_accept(&fi, thread, bomp_thread_accept_cb,
                      get_default_waitset(), IDC_EXPORT_FLAGS_DEFAULT);

    if (err_is_fail(err)) {
        // XXX> error handling
        return err;
    }

    BOMP_DEBUG_THREAD("creating thread on core %" PRIuCOREID "\n", core);
    err = domain_thread_create_on(core, bomp_thread_msg_handler, thread->msgbuf);
    if (err_is_fail(err)) {
        // XXX> error handling
        return err;
    }

    while (thread->ctrl == NULL) {
        err = event_dispatch(get_default_waitset());
        if (err_is_fail(err)) {
            USER_PANIC_ERR(err, "event dispatch\n");
        }
    }

    BOMP_DEBUG_THREAD("thread on core %" PRIuCOREID " connected \n", core);

    return thread->thread_err;
}

errval_t bomp_thread_exec(struct bomp_thread *thread,
                          bomp_thread_fn_t fn, void *arg, uint32_t tid)
{
    debug_printf("bomp_thread_exec(%p, %p, %p, %u) %p\n", thread, fn, arg, tid, thread->icvt);
    struct txq_msg_st *msg_st = txq_msg_st_alloc(&thread->txq);
    if (msg_st == NULL) {
        return LIB_ERR_MALLOC_FAIL;
    }

    uint32_t msg_sent = 0;

    msg_st->send = execute__tx;
    msg_st->cleanup = (txq_cleanup_fn_t)txq_msg_sent_cb;

    struct bomp_msg_st *bomp_msg_st = (struct bomp_msg_st *)msg_st;

    bomp_msg_st->args.exec.arg = (uint64_t)arg;
    bomp_msg_st->args.exec.fn = (uint64_t)fn;
    bomp_msg_st->args.exec.tid = tid;
    bomp_msg_st->args.exec.icv = (uint64_t)thread->icvt;
    bomp_msg_st->message_sent = &msg_sent;

    txq_send(msg_st);

    while(msg_sent == 0) {
        event_dispatch(get_default_waitset());
    }

    //return event_dispatch_non_block(get_default_waitset());
    return SYS_ERR_OK;
}
Пример #4
0
/** Initialise the tweed library - must be called before any other 
 *  tweed calls
 */
int init_tweed(int workers_requested,
              int(*main_func)(struct generic_task_desc *,void*), 
              void* main_args) {
    int i, err;
    
    if (workers_requested < 1) {
        fprintf(stderr, 
                "Error initalizing tweed - requested less than 1 worker\n");
        return -1;
    }
    
    num_workers = workers_requested;
    workers = (struct worker_desc *) malloc (
	          num_workers * sizeof(struct worker_desc));
    // alloc task stack space for all workers, leave space for alignment
    task_stack_space = malloc (TWEED_TASK_STACK_SIZE * (num_workers + 1));
    char * curr_stack_space = (char*)(((unsigned long)task_stack_space + TWEED_TASK_STACK_SIZE) & ~TWEED_TASK_STACK_MASK);
 
    // Initialize worker data-structures
    for (i=0; i<num_workers; i++) {
        init_worker(i, curr_stack_space);
        curr_stack_space += TWEED_TASK_STACK_SIZE;
    }

    // create dispatchers on all other cores required for num_workers
    for (i=1; i<num_workers; i++) {
        err = domain_new_dispatcher(i + disp_get_core_id(), 
                                    domain_spanned_callback, 
                                    (void*)(uintptr_t)i);
        if (err_is_fail(err)) {
            DEBUG_ERR(err, "domain_new_dispatcher failed");
            printf("%d failed\n", i);
        }
    }

    // wait for all dispatchers to come up
    while (num_dispatchers < num_workers) {
        messages_wait_and_handle_next();
    }
    num_dispatchers = 1;  // reset

    // start work stealing threads on newly created domains
    for (i = 1; i < num_workers; i++) {
        struct worker_args * args = (struct worker_args *) malloc (
                                        sizeof(struct worker_args));
        args->id = i;
        args->origin = disp_get_core_id();

        err = domain_thread_create_on(i + disp_get_core_id(), start_worker_thread, 
                                 args);
        if (err_is_fail(err)) {
            DEBUG_ERR(err, "Failed to run a function on remote core");
        }
    }

    // wait for all dispatchers to come up
    while (num_dispatchers < num_workers) {
        messages_wait_and_handle_next();
    }

    // now start the main worker on the current dispatcher
    return main_worker(0, main_func, main_args);
}
Пример #5
0
/*
 * Entry point of Multi-core Insense runtime.
 */
int main(int argc, char* argv[]) {
	PRINTFMC("Cache line size: %dB\n", cache_line_size());
	PRINTFMC("Main thread: %u\n", (unsigned) pthread_self());

	errval_t err;
	coreid_t mycore = disp_get_core_id();

	if (argc == 2) {
		num_to_span = atoi(argv[1]);
		if(num_to_span==0)
			all_spanned = true;		

		debug_printf("Spanning onto %d cores\n", num_to_span);
		for (int i = 1; i < num_to_span; i++) {
			err = domain_new_dispatcher(mycore + i, span_cb, NULL);
		    
			if (err_is_fail(err)) {
				DEBUG_ERR(err, "failed span %d", i);
			} 
		}
	} else {
		debug_printf("ERROR: Must specify number of cores to span\n");
		return EXIT_FAILURE;
	}

	posixcompat_pthread_set_placement_fn(rrPlacement);

	while (!all_spanned) {
		thread_yield();
	}

	my_mutex_init(&shared_heap_mutex);
#if HEAPS == HEAP_PRIVATE // Private heaps
	// Initialize mutex
	if (pthread_mutex_init(&thread_lock, NULL ) != 0) {
		PRINTF("Mutex initialization failed.\n");
		return -1;
	}
#endif

	mainThread = pthread_self(); // Note the ID of the main thread.

	// Create a list for storing references to p-threads
	threadList = listCreate();

	// Create map used to store memory locations of small heaps (using Thread safe list)
	SHList = listCreate();

	// Create map used to store memory locations what is allocated using malloc
	mallocList = listCreate();

// Start recording execution time
#if TIMING
	// CPU time
	uint64_t start, end;
	uint64_t tsc_per_ms = 0;
	sys_debug_get_tsc_per_ms(&tsc_per_ms);
	start = rdtsc();
#endif

	// Call primordial_main.
	primordial_main(NULL );

	// Join all p-threads
	if (threadList != NULL ) {
		listJoinThreads(threadList);
	}

// Stop recording execution time
#if TIMING
	end = rdtsc();
	
	uint64_t diff = (end - start) / tsc_per_ms;
	float elapsed = (diff / 1000) + ((diff % 1000) / 1000.0);

	printf("CPU:  %f seconds elapsed\n", elapsed);
#endif

	// Destroy lists and free memory
	listDestroy(threadList);
	listDestroy(SHList);
	listDestroy(mallocList);
#if HEAPS == HEAP_PRIVATE
	pthread_mutex_destroy(&thread_lock); 	// Destroy mutex lock used with pthreads
#endif
	return 0;
}