void loop(void* arg) { int (*test)(int, int) = (int (*)(int, int))arg; int i, j; int res; //communicator_t* loc_com = &comm; communicator_t* loc_com = &comm_world; //if (test == barrier_master || test == barrier_slave || // test == broadcast_master || test == broadcast_slave) { // loc_com = &comm_world; //} //communicator_t stack_com; //stack_com.barrier_set = loc_com->barrier_set; //for(int i = 0; i < loc_com->count; i++) { // stack_com.addr[i] = loc_com->addr[i]; //} //stack_com.count = loc_com->count; //stack_com.msg_size = loc_com->msg_size; DEBUGGER("Initial test run\n"); /* Allow routing/cache setup ahead of time */ test(1,1024); DEBUGGER("Initial test run done\n"); for( i=0; i < num_sizes; i++){ if (flush & FLUSH_BETWEEN_SIZES) { inval_dcache(); inval_mcache(); } for( j=1; j <= repeat_count; j++){ if (flush & FLUSH_BETWEEN_REPEATS) { inval_dcache(); inval_mcache(); } mp_barrier(loc_com); //mp_barrier(&stack_com); res = test(iterations, sizes[i]); my_two_printf("%u\t%i\n",sizes[i],res); mp_barrier(loc_com); //mp_barrier(&stack_com); } } inval_dcache(); inval_mcache(); if(get_cpuid() != 0){ int ret = 0; corethread_exit(&ret); } return; }
int __patmos_lock_acquire(_LOCK_T *lock) { const unsigned cnt = get_cpucnt(); if (cnt > 1) { const unsigned char id = get_cpuid(); _UNCACHED _LOCK_T *ll = (_UNCACHED _LOCK_T *)lock; ll->entering[id] = 1; unsigned n = 1 + max(ll); ll->number[id] = n; ll->entering[id] = 0; for (unsigned j = 0; j < cnt; j++) { while (ll->entering[j]) { /* busy wait */ } unsigned m = ll->number[j]; while ((m != 0) && ((m < n) || ((m == n) && (j < id)))) { /* busy wait, only update m */ m = ll->number[j]; } } // invalidate data cache to establish cache coherence inval_dcache(); } return 0; }
int main() { for (int i = 0; i < 9; ++i) { done[i] = 0; } print_cpuinfo(); inval_dcache(); core_count = print_processor_info(); //done = malloc(sizeof(unsigned int _SPM *)*core_count); com_spm_size = com_spm_test(); main_mem_size = main_mem_test(); int param = 0; printf("Creating corethreads..."); for(int i = 0; i < get_cpucnt(); i++) { if (i != NOC_MASTER) { corethread_t ct = (corethread_t) i; if(corethread_create(&ct,&slave_tester,(void*)param) != 0){ printf("Corethread %d not created\n",i); } } } puts("OK"); printf("Performing main mem load test..."); fflush(stdout); mem_load_test(); printf("OK\n"); noc_test_master(); int* ret; for (int i = 0; i < get_cpucnt(); ++i) { if (i != NOC_MASTER) { corethread_join((corethread_t)i,(void**)&ret); } } printf("Joined with other cores\n"); return 0; }
int latency_slave(int cnt, int bytes) { int i; for (i=0; i<cnt; i++) { if (flush & FLUSH_BETWEEN_ITERATIONS) { inval_dcache(); inval_mcache(); } mp_recv_size(&m2s,bytes); } mp_send(&s2m); return 0; }
int calibrate_cache_flush(int cnt) { int tmp = 1; int i; TIMER_START; for (i=0;i<cnt;i++) { if (flush & FLUSH_BETWEEN_ITERATIONS){ inval_dcache(); inval_mcache(); } } TIMER_STOP; tmp = TIMER_ELAPSED; return tmp; }
int barrier_slave(int cnt, int bytes) { int i; for (i=0; i<cnt; i++) { if (flush & FLUSH_BETWEEN_ITERATIONS) { inval_dcache(); //inval_mcache(); } mp_barrier(&comm_world); } return 0; }
int broadcast_slave(int cnt, int bytes) { int i; for (i=0; i<cnt; i++) { if (flush & FLUSH_BETWEEN_ITERATIONS) { inval_dcache(); inval_mcache(); } mp_broadcast(&comm_world, 1); } return 0; }
int roundtrip_slave(int cnt, int bytes) { int i; for (i=0; i<cnt; i++) { if (flush & FLUSH_BETWEEN_ITERATIONS) { inval_dcache(); inval_mcache(); } mp_recv_size(&m2s, bytes); mp_send_size(&s2m, bytes); } return 0; }
int barrier_master(int cnt, int bytes) { int i; int total = 0; TIMER_START; for (i=0; i<cnt; i++) { if (flush & FLUSH_BETWEEN_ITERATIONS) { inval_dcache(); //inval_mcache(); } mp_barrier(&comm_world); } TIMER_STOP; total = TIMER_ELAPSED; total -= calibrate_cache_flush(cnt); return(total/cnt); /* usec */ }
int broadcast_master(int cnt, int bytes) { int i; unsigned long long total = 0; TIMER_START; for (i=0; i<cnt; i++) { if (flush & FLUSH_BETWEEN_ITERATIONS) { inval_dcache(); inval_mcache(); } mp_broadcast(&comm_world, 1); } TIMER_STOP; total = TIMER_ELAPSED; total -= calibrate_cache_flush(cnt); return(((unsigned long long)cnt*bytes*1000000)/(total*1024.0)); /* KB/sec */ }
int roundtrip_master(int cnt, int bytes) { int i; unsigned long long total = 0; TIMER_START; for (i=0; i<cnt; i++) { if (flush & FLUSH_BETWEEN_ITERATIONS) { inval_dcache(); inval_mcache(); } mp_send_size(&m2s, bytes); mp_recv_size(&s2m, bytes); } TIMER_STOP; total = TIMER_ELAPSED; total -= calibrate_cache_flush(cnt); return(((unsigned long long)cnt*1000000) / total); /* Transactions/sec */ }
int latency_master(int cnt, int bytes) { int i; unsigned long long total = 0; TIMER_START; for (i=0; i<cnt; i++) { if (flush & FLUSH_BETWEEN_ITERATIONS) { inval_dcache(); inval_mcache(); } mp_send_size(&m2s,bytes); } TIMER_STOP; mp_recv(&s2m); mp_ack(&s2m); total = TIMER_ELAPSED; total -= calibrate_cache_flush(cnt); return(total/cnt); }
int bandwidth_master(int cnt, int bytes) { int i; unsigned long long total = 0; TIMER_START; for (i=0; i<cnt; i++) { if (flush & FLUSH_BETWEEN_ITERATIONS) { inval_dcache(); inval_mcache(); } mp_send_size(&m2s, bytes); } mp_recv(&s2m); mp_ack(&s2m); TIMER_STOP; total = TIMER_ELAPSED; total -= calibrate_cache_flush(cnt); return(((unsigned long long)cnt*bytes*1000000)/(total*1024)); /* KB/sec */ }
int main() { corethread_t worker_1 = SLAVE_CORE; // For now the core ID corethread_create(&worker_1,&func_worker_1,(void*)&worker_1); puts("Corethread created"); unsigned short int local_phase = 0; min_time = ULONG_MAX; max_time = 0; accum_time = 0; cnt_time = 0; unsigned long long int start = 0; unsigned long long int stop = 0; spd_t * sport1 = mp_create_sport(CHAN_ID_ONE,SINK,SAMPLE_SIZE*sizeof(short)); spd_t * sport2 = mp_create_sport(CHAN_ID_TWO,SOURCE,SAMPLE_SIZE*sizeof(short)); if (sport1 == NULL || sport2 == NULL) { //exit(1); } volatile short _SPM * sample = mp_alloc(SAMPLE_SIZE*sizeof(short)); mp_init_ports(); done = 1; int balance = 0; for (int i = 0; i < SAMPLE_SIZE; ++i) { sample[i] = i; } for (int i = 0; i < ITERATIONS/2; ++i) { mp_write(sport2,sample); for (int i = 0; i < SAMPLE_SIZE; ++i) { sample[i] = i; } } for (int i = 0; i < ITERATIONS/2; ++i) { mp_write(sport2,sample); for (int i = 0; i < SAMPLE_SIZE; ++i) { sample[SAMPLE_SIZE-1-i] = i; } } for (int i = 0; i < ITERATIONS; ++i) { start = get_cpu_usecs(); int ret = mp_read(sport1,sample); stop = get_cpu_usecs(); if (ret == 0) { puts("No value written yet."); } else { unsigned long long int exe_time = stop - start; min_time = (exe_time < min_time) ? exe_time : min_time; max_time = (exe_time > max_time) ? exe_time : max_time; accum_time += exe_time; cnt_time++; if (sample[0] == 0) { balance++; for (int i = 0; i < SAMPLE_SIZE; ++i) { if(sample[i] != i) { printf("Error: sample[%i] = %i\n",i,sample[i]); break; } } } else if (sample[0] == SAMPLE_SIZE-1) { balance--; for (int i = 0; i < SAMPLE_SIZE; ++i) { if(sample[SAMPLE_SIZE-1-i] != i) { printf("Error: sample[%i] = %i\n",i,sample[i]); break; } } } else { printf("Wrong sample values sample[0] = %i\n",sample[0]); } } } printf("Local phase: %d\n",local_phase); inval_dcache(); int* res; corethread_join(worker_1,&res); printf("Balance: %i\n",balance); printf("Min time: %llu\tMax time: %llu\tAccumulated time: %llu\nCount time: %llu\tAverage time: %llu\n", min_time,max_time,accum_time,cnt_time,accum_time/cnt_time); puts("Corethread joined"); return *res; }