int main() { for (int i = 0; i < 9; ++i) { done[i] = 0; } print_cpuinfo(); inval_dcache(); core_count = print_processor_info(); //done = malloc(sizeof(unsigned int _SPM *)*core_count); com_spm_size = com_spm_test(); main_mem_size = main_mem_test(); int param = 0; printf("Creating corethreads..."); for(int i = 0; i < get_cpucnt(); i++) { if (i != NOC_MASTER) { corethread_t ct = (corethread_t) i; if(corethread_create(&ct,&slave_tester,(void*)param) != 0){ printf("Corethread %d not created\n",i); } } } puts("OK"); printf("Performing main mem load test..."); fflush(stdout); mem_load_test(); printf("OK\n"); noc_test_master(); int* ret; for (int i = 0; i < get_cpucnt(); ++i) { if (i != NOC_MASTER) { corethread_join((corethread_t)i,(void**)&ret); } } printf("Joined with other cores\n"); return 0; }
int main() { // clear communication areas // cannot use memset() for _SPM pointers! for(int i = 0; i < sizeof(struct msg_t); i++) { ((volatile _SPM char *)spm_in)[i] = 0; ((volatile _SPM char *)spm_out)[i] = 0; } int slave_param = 1; for(int i = 0; i < get_cpucnt(); i++) { if (i != NOC_MASTER) { corethread_t ct = (corethread_t) i; if(corethread_create(&ct,&slave,(void*)slave_param) != 0){ } } } master(); int* ret; for (int i = 0; i < get_cpucnt(); ++i) { if (i != NOC_MASTER) { corethread_join((corethread_t)i,(void**)&ret); } } }
int main() { printf("Hello CMP\n"); int core_id = 1; // The core number static int parameter = 1000; corethread_create(core_id, &work, (void *) ¶meter); blink(2000); // the folowing is not executed in this example int* res; corethread_join( core_id, (void *) &res ); return 0; }
int main() { int cpucnt = get_cpucnt(); printf("cpus:%d\n",cpucnt); int dum; for(int i = 1; i < cpucnt; i++) { corethread_create(i,&writer_init,(void *)i); } flag = 1; int sum = reader(); int retries = 0; for(int i = 1; i < cpucnt; i++) { int _res; corethread_join(i, (void **)&_res); retries += _res; } printf("Sum: %d Retries: %d\n",sum, retries); return sum; }
int main() { corethread_t slave1 = 1; corethread_t slave2 = 2; corethread_t slave3 = 3; corethread_t slave4 = 4; corethread_t slave5 = 5; corethread_t slave6 = 6; corethread_t slave7 = 7; corethread_t slave8 = 8; if (!mp_chan_init(&m2s, get_cpuid(), slave4, BUFFER_SIZE, 2)) { abort(); } if (!mp_chan_init(&s2m, slave4, get_cpuid(), BUFFER_SIZE, 2)) { abort(); } if (!mp_communicator_init(&comm, 2, cores, 0)) { abort(); } if (!mp_communicator_init(&comm_world, sizeof(cores_world)/sizeof(cores_world[0]), cores_world, BUFFER_SIZE)) { abort(); } int* ret; /* set up the run */ //corethread_create(&slave,&loop,(void*)roundtrip_slave); /* run appropriate test */ // // TEST_LATENCY // puts("Latency (usecs)"); // corethread_create(&slave4,&loop,(void*)latency_slave); // loop(latency_master); // corethread_join(slave4,(void**)&ret); // // // TEST_ROUNDTRIP // puts("Roundtrip (Transactions/sec)"); // corethread_create(&slave4,&loop,(void*)roundtrip_slave); // loop(roundtrip_master); // corethread_join(slave4,(void**)&ret); // // // TEST_BANDWIDTH // puts("Bandwidth (KB/sec)"); // corethread_create(&slave4,&loop,(void*)bandwidth_slave); // loop(bandwidth_master); // corethread_join(slave4,(void**)&ret); // // TEST_BIBANDWIDTH // puts("Bibandwidth"); // corethread_create(&slave4,&loop,(void*)bibandwidth_slave); // loop(bibandwidth_master); // corethread_join(slave4,(void**)&ret); // // // TEST_REDUCE // puts("Reduce"); // corethread_create(&slave4,&loop,(void*)reduce_slave); // loop(reduce_master); // corethread_join(slave4,(void**)&ret); // // // TEST_ALLREDUCE // puts("Allreduce"); // corethread_create(&slave4,&loop,(void*)allreduce_slave); // loop(allreduce_master); // corethread_join(slave4,(void**)&ret); // // // TEST_ALLTOALL // puts("Alltoall"); // corethread_create(&slave4,&loop,(void*)alltoall_slave); // loop(alltoall_master); // corethread_join(slave4,(void**)&ret); ///////////////////////////////////////////////////////////////////////////// // TEST_BARRIER ///////////////////////////////////////////////////////////////////////////// puts("Barrier (usecs)"); for(int i = 0; i < sizeof(cores_world)/sizeof(cores_world[0]); i++) { if (i != NOC_MASTER) { if(corethread_create((corethread_t*)&cores_world[i],&loop,(void*)barrier_slave) != 0){ printf("Corethread %d not created\n",i); } } } loop(barrier_master); //puts("Master finished"); for (int i = 0; i < sizeof(cores_world)/sizeof(cores_world[0]); ++i) { if (i != NOC_MASTER) { corethread_join((corethread_t)cores_world[i],(void**)&ret); //printf("Slave %d joined\n",i); } } // ///////////////////////////////////////////////////////////////////////////// // // TEST_BROADCAST // ///////////////////////////////////////////////////////////////////////////// // puts("Broadcast (KB/sec)"); // for(int i = 0; i < sizeof(cores_world)/sizeof(cores_world[0]); i++) { // if (i != NOC_MASTER) { // if(corethread_create((corethread_t*)&cores_world[i],&loop,(void*)broadcast_slave) != 0){ // printf("Corethread %d not created\n",i); // } // } // } // loop(broadcast_master); // //puts("Master finished"); // for (int i = 0; i < sizeof(cores_world)/sizeof(cores_world[0]); ++i) { // if (i != NOC_MASTER) { // corethread_join((corethread_t)cores_world[i],(void**)&ret); // //printf("Slave %d joined\n",i); // } // } exit(0); }
int main() { puts("Master"); corethread_t worker_1 = 1; // For now the core ID int worker_1_param = 1; corethread_create(&worker_1,&func_worker_1,(void*)&worker_1_param); // Create the queuing ports spd_t * chan = mp_create_sport(MP_CHAN_1_ID, SINK, MP_CHAN_1_MSG_SIZE); volatile unsigned long long _SPM * time_sample = mp_alloc(MP_CHAN_1_MSG_SIZE); if (chan == NULL || time_sample == NULL) { DEBUGF(chan); abort(); } // Initialize the communication channels int retval = mp_init_ports(); // TODO: check on retval puts("Initialized ports"); while(slave != 1) { ; } puts("Slave is ready"); unsigned long long min_time_diff = -1; unsigned long long max_time_diff = 0; unsigned long long accum_time_diff = 0; unsigned long long cnt_time_diff = 0; unsigned long long percent = 0; int done = 0; unsigned long long start = get_cpu_usecs(); while(!done) { int success = mp_read(chan,time_sample); unsigned long long time_diff = get_cpu_usecs() - (*time_sample); if (success == 0) { printf("No sample received\n"); } else if ((*time_sample) == 0) { printf("Received empty sample, newest: %u, sample size: %u\n",chan->newest,chan->sample_size); } else { if (time_diff > 2000 ) { // Time difference is larger than a micro second printf("Time sample: %llu\tdiff: %llu\n",*time_sample,time_diff); } cnt_time_diff++; if (time_diff < min_time_diff) { min_time_diff = time_diff; } if (time_diff > max_time_diff) { max_time_diff = time_diff; } accum_time_diff += time_diff; } if (start + percent < get_cpu_usecs()) { percent += RUNTIME/10; printf("+"); fflush(stdout); } if ( start + RUNTIME < get_cpu_usecs()) { done = 1; } } printf("\n"); printf("Status:\n\tMin time diff: %llu\n\tMax time diff: %llu\n\tAvg time diff: %llu\n", min_time_diff,max_time_diff,accum_time_diff/cnt_time_diff); int* res; corethread_join(worker_1,&res); return *res; }
int main() { puts("Master"); corethread_t worker_1 = 2; // For now the core ID int worker_1_param = 1; char send_data[] = "Hello World!, Sending messages is cool!"; char recv_data[40]; // Initialization of message passing buffers // mp_chan_init() return false if local and remote // addresses are not aligned to words if (!mp_chan_init(&chan1, get_cpuid(), worker_1, MP_CHAN_1_BUF_SIZE, MP_CHAN_1_NUM_BUF)) { abort(); } if (!mp_chan_init(&chan2, worker_1, get_cpuid(), MP_CHAN_2_BUF_SIZE, MP_CHAN_2_NUM_BUF)) { abort(); } puts("Initialized buffers"); if (!mp_communicator_init(&comm, 2, cores, 0)) { abort(); } puts("Initialized barrier"); corethread_create(&worker_1,&func_worker_1,(void*)&worker_1_param); int i = 0; // While there is still data to be sent while(i < sizeof(send_data)) { int chunk = 0; if ( sizeof(send_data)-i >= chan1.buf_size) { // If the remaining data is more than the size of the buffer chunk = chan1.buf_size; } else { // The remaining data all fits in a buffer chunk = sizeof(send_data)-i; } // Copy the chunk of data to the write buffer for (int j = 0; j < chunk; ++j) { *((volatile char _SPM *)chan1.write_buf+j) = send_data[i+j]; } // Send the chunk of data mp_send(&chan1); i += chunk; } puts("Messages sent"); mp_barrier(&comm); puts("Barrier reached"); mp_recv(&chan2); puts("Message recv"); // Copy the received data to the recv_data array for(int i = 0; i < sizeof(recv_data)-1; i++) { recv_data[i] = (*((volatile char _SPM *)chan2.read_buf+i)-worker_1_param); } // Acknowledge the received data mp_ack(&chan2); recv_data[39] = '\0'; puts(recv_data); int* res; corethread_join(worker_1,&res); return *res; }
int main() { corethread_t worker_1 = SLAVE_CORE; // For now the core ID corethread_create(&worker_1,&func_worker_1,(void*)&worker_1); puts("Corethread created"); unsigned short int local_phase = 0; min_time = ULONG_MAX; max_time = 0; accum_time = 0; cnt_time = 0; unsigned long long int start = 0; unsigned long long int stop = 0; spd_t * sport1 = mp_create_sport(CHAN_ID_ONE,SINK,SAMPLE_SIZE*sizeof(short)); spd_t * sport2 = mp_create_sport(CHAN_ID_TWO,SOURCE,SAMPLE_SIZE*sizeof(short)); if (sport1 == NULL || sport2 == NULL) { //exit(1); } volatile short _SPM * sample = mp_alloc(SAMPLE_SIZE*sizeof(short)); mp_init_ports(); done = 1; int balance = 0; for (int i = 0; i < SAMPLE_SIZE; ++i) { sample[i] = i; } for (int i = 0; i < ITERATIONS/2; ++i) { mp_write(sport2,sample); for (int i = 0; i < SAMPLE_SIZE; ++i) { sample[i] = i; } } for (int i = 0; i < ITERATIONS/2; ++i) { mp_write(sport2,sample); for (int i = 0; i < SAMPLE_SIZE; ++i) { sample[SAMPLE_SIZE-1-i] = i; } } for (int i = 0; i < ITERATIONS; ++i) { start = get_cpu_usecs(); int ret = mp_read(sport1,sample); stop = get_cpu_usecs(); if (ret == 0) { puts("No value written yet."); } else { unsigned long long int exe_time = stop - start; min_time = (exe_time < min_time) ? exe_time : min_time; max_time = (exe_time > max_time) ? exe_time : max_time; accum_time += exe_time; cnt_time++; if (sample[0] == 0) { balance++; for (int i = 0; i < SAMPLE_SIZE; ++i) { if(sample[i] != i) { printf("Error: sample[%i] = %i\n",i,sample[i]); break; } } } else if (sample[0] == SAMPLE_SIZE-1) { balance--; for (int i = 0; i < SAMPLE_SIZE; ++i) { if(sample[SAMPLE_SIZE-1-i] != i) { printf("Error: sample[%i] = %i\n",i,sample[i]); break; } } } else { printf("Wrong sample values sample[0] = %i\n",sample[0]); } } } printf("Local phase: %d\n",local_phase); inval_dcache(); int* res; corethread_join(worker_1,&res); printf("Balance: %i\n",balance); printf("Min time: %llu\tMax time: %llu\tAccumulated time: %llu\nCount time: %llu\tAverage time: %llu\n", min_time,max_time,accum_time,cnt_time,accum_time/cnt_time); puts("Corethread joined"); return *res; }
void bench_noc() { // Pointer to the deadline device volatile _IODEV int *dead_ptr = (volatile _IODEV int *) PATMOS_IO_DEADLINE; // Measure execution time with the clock cycle timer volatile _IODEV int *timer_ptr = (volatile _IODEV int *) (PATMOS_IO_TIMER+4); printf("Hello NoC\n"); printf("We use %d bytes buffers\n", BUF_SIZE); int core_id = 1; // The core number corethread_create(core_id, &work, NULL); int start, val; int data = 42; // create a channel qpd_t *channel = mp_create_qport(1, SOURCE, BUF_SIZE, NUM_BUF); // init mp_init_ports(); start = *timer_ptr; // write data into the send buffer *(volatile int _SPM *) channel->write_buf = data; start = *timer_ptr; // send the buffer mp_send(channel, 0); printf("Data sent\n"); printf("Returned data is: %d\n", field); printf("Took %d cycles\n", end_time - start - 1); int min = 999999; int max = 0; printf("NoC in a loop:\n"); for (int i=0; i<CNT; ++i) { start = *timer_ptr; *(volatile int _SPM *) channel->write_buf = i; start = *timer_ptr; mp_send(channel, 0); *dead_ptr = 10000; // some delay to see the result val = *dead_ptr; val = end_time - start - 1; // printf("%d ", val); if (min>val) min = val; if (max<val) max = val; } printf("\n"); printf("Min: %d max: %d\n", min, max); min = 999999; max = 0; do_delay_times(); printf("NoC in a loop with random delay:\n"); for (int i=0; i<CNT; ++i) { start = *timer_ptr; *(volatile int _SPM *) channel->write_buf = i; *dead_ptr = data_spm[i]; val = *dead_ptr; // delay by a random value start = *timer_ptr; mp_send(channel, 0); *dead_ptr = 3000; // some delay to see the result val = *dead_ptr; val = end_time - start; // printf("%d ", val); if (min>val) min = val; if (max<val) max = val; } printf("\n"); printf("Min: %d max: %d\n", min, max); // not really as the worker runs forever int* res; corethread_join( core_id, (void *) &res ); }
// The main acts as producer int main() { int val = 0; end_flag = 0; for (int i=0; i<CONSUMERS; ++i) { result[i] = 0; started_consumer[i] = 0; finished_consumer[i] = 0; } started_producer = 0; started_fork = 0; finished_producer = 0; finished_fork = 0; printf("Producer/fork/n-consumers benchmark for the S4NOC paper:\n"); printf(" Delay: %d\n", DELAY); printf(" Number of cores: %d\n", get_cpucnt()); printf(" Total packets sent: %d\n", LEN); printf(" Buffer size: %d\n", BUF_LEN); printf("Runnning test:\n"); for (int k=0; k<CONSUMERS; ++k) { corethread_create(CONSUMER_CORE[k], &consumer, (void*) &CONSUMER_ID[k] ); *dead_ptr = 8000; val = *dead_ptr; while(started_consumer[k] == 0) {;} printf(" Consumer-%d is ready.\n", k+1); } corethread_create(FORK_CORE, &fork, NULL); while(started_fork == 0) {;} printf(" Fork is ready.\n"); corethread_create(PRODUCER_CORE, &producer, NULL); while(started_producer == 0) {;} printf(" Producer has started.\n [...]\n"); while(finished_producer == 0) {;} printf(" Producer has finished.\n"); while(finished_fork == 0) {;} printf(" Fork has finished.\n"); for (int i=0; i<CONSUMERS; ++i) { while(finished_consumer[i] == 0) {;} printf(" Consumer-%d has finished.\n", i+1); } /* for (int i=0; i<CONSUMERS; ++i) { printf(" %d %d \n", started_consumer[i], finished_consumer[i] ); } */ *dead_ptr = 8000000; val = *dead_ptr; for (int i=0; i<CONSUMERS; ++i) { printf("Results Consumer-%d: \n", i+1); printf(" %d valid pakets out of of %d received.\n", result[i], LEN); printf(" Reception time of %d cycles -> %g cycles per received packet.\n", time[i], 1. * time[i]/LEN); } // Join threads int *retval; end_flag = 1; corethread_join(PRODUCER_CORE, (void **)&retval); corethread_join(FORK_CORE, (void **)&retval); for (int i=0; i<CONSUMERS; ++i) { corethread_join(CONSUMER_CORE[i], (void **)&retval); } printf("End of program.\n"); return val; }