int main() { for (int i = 0; i < 9; ++i) { done[i] = 0; } print_cpuinfo(); inval_dcache(); core_count = print_processor_info(); //done = malloc(sizeof(unsigned int _SPM *)*core_count); com_spm_size = com_spm_test(); main_mem_size = main_mem_test(); int param = 0; printf("Creating corethreads..."); for(int i = 0; i < get_cpucnt(); i++) { if (i != NOC_MASTER) { corethread_t ct = (corethread_t) i; if(corethread_create(&ct,&slave_tester,(void*)param) != 0){ printf("Corethread %d not created\n",i); } } } puts("OK"); printf("Performing main mem load test..."); fflush(stdout); mem_load_test(); printf("OK\n"); noc_test_master(); int* ret; for (int i = 0; i < get_cpucnt(); ++i) { if (i != NOC_MASTER) { corethread_join((corethread_t)i,(void**)&ret); } } printf("Joined with other cores\n"); return 0; }
int main() { // clear communication areas // cannot use memset() for _SPM pointers! for(int i = 0; i < sizeof(struct msg_t); i++) { ((volatile _SPM char *)spm_in)[i] = 0; ((volatile _SPM char *)spm_out)[i] = 0; } int slave_param = 1; for(int i = 0; i < get_cpucnt(); i++) { if (i != NOC_MASTER) { corethread_t ct = (corethread_t) i; if(corethread_create(&ct,&slave,(void*)slave_param) != 0){ } } } master(); int* ret; for (int i = 0; i < get_cpucnt(); ++i) { if (i != NOC_MASTER) { corethread_join((corethread_t)i,(void**)&ret); } } }
int main() { volatile _SPM int *s4noc = (volatile _SPM int *) (S4NOC_ADDRESS); done = 0; result = 0; started = 0; printf("Number of cores: %d\n", get_cpucnt()); // Receiver for time slot 0 depends in schedule, which depends on number of cores int rcv = get_cpucnt() == 4 ? 3 : 7; corethread_create(rcv, &work, NULL); int credit = 0; while (!started) { ; } for (int i=0; i<LEN/BUF_LEN; ++i) { for (int j=0; j<BUF_LEN; ++j) { // wait for a credit while (!s4noc[RX_READY]) { ; } s4noc[IN_DATA]; // consume it // wait for TX FIFO ready // without it is 24 clock cycles, this costs another 8 cycles // In this case we do not really need it, as we know there will be a free slot // for each received credit /* while (!s4noc[TX_FREE]) { ; } */ s4noc[SEND_SLOT] = 1; } } printf("All tokens sent\n"); // now, after the print, we should be done if (done) { printf("%d sum in %d cycles, %g cycles per word\n", result, time, 1. * time/result); } else { printf("Not done\n"); } // feed more tokens to get the consumer finished while (!done) { s4noc[SEND_SLOT] = 0; } printf("%d out of %d received\n", result, LEN); }
int main() { _iodev_ptr_t sspm = (_iodev_ptr_t) PATMOS_IO_OWNSPM; ok = 1; owner = 0; // start with myself for (int i=1; i<get_cpucnt(); ++i) { corethread_create(i, &work, NULL); } // get first core working owner = 1; printf("Wait for finish\n"); while(owner != 0) ; int id = get_cpuid(); for (int i=0; i<4; ++i) { sspm[4*id + i] = id*0x100 + i; } int val; for (int i=0; i<4; ++i) { val = sspm[4*id + i]; if (id*0x100 + i != val) ok = 0; } // check one core's write data if (sspm[4] != 0x100) ok = 0; if (ok) { printf("Test ok\n"); } else { printf("Test failed\n"); } return 0; }
int __patmos_lock_close_recursive(_LOCK_RECURSIVE_T *lock) { const unsigned cnt = get_cpucnt(); if (cnt > 1) { __lock_close(lock->lock); } return 0; }
int __patmos_lock_acquire(_LOCK_T *lock) { const unsigned cnt = get_cpucnt(); if (cnt > 1) { const unsigned char id = get_cpuid(); _UNCACHED _LOCK_T *ll = (_UNCACHED _LOCK_T *)lock; ll->entering[id] = 1; unsigned n = 1 + max(ll); ll->number[id] = n; ll->entering[id] = 0; for (unsigned j = 0; j < cnt; j++) { while (ll->entering[j]) { /* busy wait */ } unsigned m = ll->number[j]; while ((m != 0) && ((m < n) || ((m == n) && (j < id)))) { /* busy wait, only update m */ m = ll->number[j]; } } // invalidate data cache to establish cache coherence inval_dcache(); } return 0; }
int main() { unsigned i; int id = get_cpuid(); int cnt = get_cpucnt(); for (i=0; i<MAX; ++i) data[i] = '#'; for (i=1; i<cnt; ++i) { int core_id = i; // The core number int parameter = 1; // dummy corethread_create(core_id, &work, (void *) ¶meter); } data[id] = id+'0'; for (i=0; i<MAX; ++i) UART_DATA = '.'; // This is a "normal" multicore example where main is executed only // on core 0 for (i=0; i<MAX; ++i) { while ((UART_STATUS & 0x01) == 0); UART_DATA = data[i]; } for(;;); }
static unsigned max(_UNCACHED _LOCK_T *ll) { const unsigned cnt = get_cpucnt(); unsigned m = 0; for (unsigned i = 0; i < cnt; i++) { unsigned n = ll->number[i]; m = n > m ? n : m; } return m; }
int __patmos_lock_release(_LOCK_T *lock) { const unsigned cnt = get_cpucnt(); if (cnt > 1) { const unsigned char id = get_cpuid(); _UNCACHED _LOCK_T *ll = (_UNCACHED _LOCK_T *)lock; ll->number[id] = 0; // exit section } return 0; }
int __patmos_lock_init_recursive(_LOCK_RECURSIVE_T *lock) { const unsigned cnt = get_cpucnt(); if (cnt > 1) { __lock_init(lock->lock); _UNCACHED _LOCK_RECURSIVE_T *ll = (_UNCACHED _LOCK_RECURSIVE_T *)lock; ll->owner = -1; ll->depth = 0; } return 0; }
int print_processor_info() { //puts("CPU info:"); printf("CPU ID: %d\n",get_cpuid()); int platform_cores = get_cpucnt(); printf("Number of cores: %d\n",platform_cores); //printf("Operating frequency: %d MHz\n",(get_cpu_freq()) >> 20); printf("Operating frequency: %d MHz\n",(get_cpu_freq())/1000000); int i = 0; int cores = 1; for(i = 1; i < get_cpucnt(); i++){ if(boot_info->slave[i].status != STATUS_NULL){ cores++; } } printf("Number of cores booted: %d\n",cores); int noc_cores = print_noc_info(); ABORT_IF_FAIL(platform_cores!=noc_cores,"An incorrect noc schedule is used"); ABORT_IF_FAIL(platform_cores!=cores,"Not all cores booted"); return cores; }
void mem_load_test() { int size = (main_mem_size-MINADDR)/get_cpucnt(); volatile _UNCACHED unsigned int *addr = TEST_START + get_cpuid()*size; for(unsigned int start_time = get_cpu_usecs(); get_cpu_usecs() - start_time < 2000 ;) { if (get_cpuid() == NOC_MASTER) { ABORT_IF_FAIL(mem_area_test_uncached(addr,size)<0,"FAIL"); } else { mem_area_test_uncached(addr,size); } } }
int __patmos_lock_init(_LOCK_T *lock) { const unsigned cnt = get_cpucnt(); if (cnt > 1) { _UNCACHED _LOCK_T *ll = (_UNCACHED _LOCK_T *)lock; for (unsigned i = 0; i < cnt; i++) { ll->entering[i] = 0; } for (unsigned i = 0; i < cnt; i++) { ll->number[i] = 0; } } return 0; }
int __patmos_lock_release_recursive(_LOCK_RECURSIVE_T *lock) { const unsigned cnt = get_cpucnt(); if (cnt > 1) { _UNCACHED _LOCK_RECURSIVE_T *ll = (_UNCACHED _LOCK_RECURSIVE_T *)lock; ll->depth--; if (ll->depth == 0) { ll->owner = -1; // reset owner to invalid ID __lock_release(lock->lock); } } return 0; }
int __patmos_lock_acquire_recursive(_LOCK_RECURSIVE_T *lock) { const unsigned cnt = get_cpucnt(); if (cnt > 1) { const unsigned char id = get_cpuid(); _UNCACHED _LOCK_RECURSIVE_T *ll = (_UNCACHED _LOCK_RECURSIVE_T *)lock; if (ll->owner != id || ll->depth == 0) { __lock_acquire(lock->lock); ll->owner = id; } ll->depth++; } return 0; }
void noc_test_master() { printf("Performing libnoc test..."); fflush(stdout); for (int i = 0; i < get_cpucnt(); ++i) { if (i != NOC_MASTER) { for (int j = 0; j < 8; ++j) { *(NOC_SPM_BASE+j) = 0x11223344 * i; } noc_write((unsigned)i,(volatile void _SPM *)NOC_SPM_BASE,(volatile void _SPM *)NOC_SPM_BASE,32,1); while(done[i] != 1){;} noc_receive(); for (int j = 0; j < 8; ++j){ ABORT_IF_FAIL(*(NOC_SPM_BASE+(i*8)+j) != 0x11223344 * i ,"Wrong data received"); } } } printf("OK\n"); }
int main() { int cpucnt = get_cpucnt(); printf("cpus:%d\n",cpucnt); int dum; for(int i = 1; i < cpucnt; i++) { corethread_create(i,&writer_init,(void *)i); } flag = 1; int sum = reader(); int retries = 0; for(int i = 1; i < cpucnt; i++) { int _res; corethread_join(i, (void **)&_res); retries += _res; } printf("Sum: %d Retries: %d\n",sum, retries); return sum; }
// The main function for the other threads on the another cores void work(void* arg) { _iodev_ptr_t sspm = (_iodev_ptr_t) PATMOS_IO_OWNSPM; int id = get_cpuid(); while (id != owner) ; for (int i=0; i<4; ++i) { sspm[4*id + i] = id*0x100 + i; } int val; for (int i=0; i<4; ++i) { val = sspm[4*id + i]; if (id*0x100 + i != val) ok = 0; } if (id < get_cpucnt() - 1) { ++owner; } else { owner = 0; } }
static void slave(void* param) { // clear communication areas // cannot use memset() for _SPM pointers! for(int i = 0; i < sizeof(struct msg_t); i++) { ((volatile _SPM char *)spm_in)[i] = 0; ((volatile _SPM char *)spm_out)[i] = 0; } // wait and poll until message arrives while(!spm_in->ready) { /* spin */ } // PROCESS: add ID to sum_id spm_out->sum = spm_in->sum + get_cpuid(); spm_out->ready = 1; // send to next slave int rcv_id = (get_cpuid()==(get_cpucnt()-1)) ? 0 : get_cpuid()+1; noc_write(rcv_id, spm_in, spm_out, sizeof(struct msg_t), 0); return; }
// Consumer Core void consumer(void *arg) { #ifdef MEASUREMENT_MODE // start initialization measurement timeStamps_slave2[0] = TDM_P_COUNTER; #endif int id = get_cpuid(); int cnt = get_cpucnt(); int volatile data_rd[MSG_SIZE]; /////////////////////////////////////////////////////////////////////////////// // This section of the task handles with the initializations for buffering /////////////////////////////////////////////////////////////////////////////// // allocating data to SPM qpd_t * chan2 = mp_create_qport(2, SINK, MP_CHAN_BUF_SIZE, MP_CHAN_NUM_BUF); mp_init_ports(); // mp init ports #ifdef MEASUREMENT_MODE timeStamps_slave2[1] = TDM_P_COUNTER; // stop the initialization measurement #endif for(;;){ /////////////////////////////////////////////////////////////////////////////// // Communication part of the Task. /////////////////////////////////////////////////////////////////////////////// // when the time is triggered if((TDM_P_COUNTER-TRIGGER_CONS_COMM)%CONS_PERIOD == 0 ){ #ifdef MEASUREMENT_MODE timeStamps_slave2[2] = TDM_P_COUNTER;// start the communication measurement #endif // reading the data to the read buffer for (int i=0;i<MSG_SIZE;i++){ data_rd[i] = *(volatile int _SPM*)((int*)chan2->read_buf+i); // read the data } #ifdef MEASUREMENT_MODE timeStamps_slave2[3] = TDM_P_COUNTER; //stop the communication measurement #endif } /////////////////////////////////////////////////////////////////////////////// // Computation part of the Task. Perhaps for an Actuator /////////////////////////////////////////////////////////////////////////////// if(((TDM_P_COUNTER-TRIGGER_CONS_COMP)%CONS_PERIOD) == 0 ){ // make data manipulation (but for now dummy) over the data for(int i=0;i<MSG_SIZE;i++){ data_rd[i] += 100; } }//if /////////////////////////////////////////////////////////////////////////////// //Print the received data for debuging /////////////////////////////////////////////////////////////////////////////// #define DEBUG_PRINT_CONS #ifdef DEBUG_PRINT_CONS for(int i=0;i<MSG_SIZE;i++){ debug_print_cons[i] = data_rd[i]; } #endif }//for }
// The main acts as producer int main() { int val = 0; end_flag = 0; for (int i=0; i<CONSUMERS; ++i) { result[i] = 0; started_consumer[i] = 0; finished_consumer[i] = 0; } started_producer = 0; started_fork = 0; finished_producer = 0; finished_fork = 0; printf("Producer/fork/n-consumers benchmark for the S4NOC paper:\n"); printf(" Delay: %d\n", DELAY); printf(" Number of cores: %d\n", get_cpucnt()); printf(" Total packets sent: %d\n", LEN); printf(" Buffer size: %d\n", BUF_LEN); printf("Runnning test:\n"); for (int k=0; k<CONSUMERS; ++k) { corethread_create(CONSUMER_CORE[k], &consumer, (void*) &CONSUMER_ID[k] ); *dead_ptr = 8000; val = *dead_ptr; while(started_consumer[k] == 0) {;} printf(" Consumer-%d is ready.\n", k+1); } corethread_create(FORK_CORE, &fork, NULL); while(started_fork == 0) {;} printf(" Fork is ready.\n"); corethread_create(PRODUCER_CORE, &producer, NULL); while(started_producer == 0) {;} printf(" Producer has started.\n [...]\n"); while(finished_producer == 0) {;} printf(" Producer has finished.\n"); while(finished_fork == 0) {;} printf(" Fork has finished.\n"); for (int i=0; i<CONSUMERS; ++i) { while(finished_consumer[i] == 0) {;} printf(" Consumer-%d has finished.\n", i+1); } /* for (int i=0; i<CONSUMERS; ++i) { printf(" %d %d \n", started_consumer[i], finished_consumer[i] ); } */ *dead_ptr = 8000000; val = *dead_ptr; for (int i=0; i<CONSUMERS; ++i) { printf("Results Consumer-%d: \n", i+1); printf(" %d valid pakets out of of %d received.\n", result[i], LEN); printf(" Reception time of %d cycles -> %g cycles per received packet.\n", time[i], 1. * time[i]/LEN); } // Join threads int *retval; end_flag = 1; corethread_join(PRODUCER_CORE, (void **)&retval); corethread_join(FORK_CORE, (void **)&retval); for (int i=0; i<CONSUMERS; ++i) { corethread_join(CONSUMER_CORE[i], (void **)&retval); } printf("End of program.\n"); return val; }
int main() { #ifdef MEASUREMENT timeStamps_master[0] = TDM_P_COUNTER; //start master initialization measurement #endif noc_configure(); noc_enable(); unsigned i; int slave_param = 1; int id = get_cpuid(); int cnt = get_cpucnt(); #ifdef MEASUREMENT timeStamps_master[1] = TDM_P_COUNTER; //stop master initialization measurement #endif #define PROD_CONS #ifdef PROD_CONS corethread_create(1, &producer, (void*)slave_param); corethread_create(2, &intermediate, (void*)slave_param); corethread_create(3, &consumer, (void*)slave_param); #endif #define MULTICORE_N #ifdef MULTICORE for (i=2; i<cnt; ++i) { int core_id = i; // The core number corethread_create(core_id, &slave, (void*)slave_param); } #endif printf("Threats are started!\n"); for(;;){ #define PRINT_ARRAY #ifdef PRINT_ARRAY for(int i=0;i<MSG_SIZE;i++){ printf("The Intermediate modified: data[%d] = %d \n",i, debug_print_interm[i]); printf("The Consumer modified: data[%d] = %d \n",i, debug_print_cons[i]); } #endif #ifdef MEASUREMENT // Producer timing metrics printf("-----------Producer Timing Metrics--------------------\n"); printf("Producer starts at %d TDM cycles\n", timeStamps_slave1[0]); printf("Producer end of computation at %d TDM cycles\n", timeStamps_slave1[1]); printf("Producer initialization Latency is %d TDM cycles\n", timeStamps_slave1[1]-timeStamps_slave1[0]); printf("Producer triggered at %d TDM cycles\n", timeStamps_slave1[2]); printf("Producer polls for %d TDM cycles\n", timeStamps_slave1[2]-timeStamps_slave1[1]); printf("Producer stops at %d TDM cycles\n", timeStamps_slave1[3]); printf("Producer communication Latency is %d TDM cycles\n", timeStamps_slave1[3]-timeStamps_slave1[2]); // slave 2 timing metrics printf("-----------Slave 2 Timing Metrics--------------------\n"); printf("Slave 2 starts at %d TDM cycles\n", timeStamps_slave2[0]); printf("Slave 2 end of computation at %d TDM cycles\n", timeStamps_slave2[1]); printf("Slave 2 initialization Latency is %d TDM cycles\n", timeStamps_slave2[1]-timeStamps_slave2[0]); printf("Slave 2 triggered at %d TDM cycles\n", timeStamps_slave2[2]); printf("Slave 2 polls for %d TDM cycles\n", timeStamps_slave2[2]-timeStamps_slave2[1]); printf("Slave 2 stops at %d TDM cycles\n", timeStamps_slave2[3]); printf("Slave 2 communication Latency is %d TDM cycles\n", timeStamps_slave2[3]-timeStamps_slave2[2]); // Consumer timing metrics printf("-----------Consumer Timing Metrics--------------------\n"); printf("Consumer starts at %d TDM cycles\n", timeStamps_slave3[0]); printf("Consumer end of computation at %d TDM cycles\n", timeStamps_slave3[1]); printf("Consumer initialization Latency is %d TDM cycles\n", timeStamps_slave3[1]-timeStamps_slave3[0]); printf("Consumer triggered at %d TDM cycles\n", timeStamps_slave3[2]); printf("Consumer polls for %d TDM cycles\n", timeStamps_slave3[2]-timeStamps_slave3[1]); printf("Consumer stops at %d TDM cycles\n", timeStamps_slave3[3]); printf("Consumer communication Latency is %d TDM cycles\n", timeStamps_slave3[3]-timeStamps_slave3[2]); //master printf("-----------Master Timing Metrics--------------------\n"); printf("Master starts at %d TDM cycles\n", timeStamps_master[0]); printf("The master initialization Latency is %d TDM cycles\n", timeStamps_master[1]-timeStamps_master[0]); printf("The End to End latency is %d TDM cycles\n", timeStamps_slave2[2]-timeStamps_master[0]); #endif } return 0; }