void mc_main(void) { xprintf("[%02u]: mc_main\n", corenum()); hw_barrier(); if (corenum() == 2) { for (unsigned int i = 0; i < 1024; i++) { test_numbers[i] = i; } cache_pushMem(3, &test_numbers, 1024 * sizeof(int)); xprintf("[%02u]: Pushed to core 3\n", corenum()); } hw_barrier(); if (corenum() == 3) { for (unsigned int i = 0; i < 1024; i++) { if (test_numbers[i] != (int)i) xprintf("[%02u]: invalid test_numbers[%u] = %d\n", corenum(), i, test_numbers[i]); test_numbers[i] = i + 1024; } cache_pushMem(2, &test_numbers, 1024 * sizeof(int)); xprintf("[%02u]: Pushed back to core 2\n", corenum()); } hw_barrier(); if (corenum() == 2) { for (unsigned int i = 0; i < 1024; i++) { if (test_numbers[i] != (int)(i + 1024)) xprintf("[%02u]: invalid test_numbers[%u] = %d\n", corenum(), i, test_numbers[i]); } } xprintf("[%02u]: Done\n", corenum()); }
void sm_barrier(void) { // Barrier implementation using shared memory. unsigned mygen; icSema_P(sem_barrier_mutex); cache_invalidateMem(&nbarrier, sizeof(nbarrier)); cache_invalidateMem(&ngen, sizeof(ngen)); if (DEBUG) xprintf("%u: rgn_barrier enter %u %u %u\n", corenum(), enetCorenum()-2, ngen, nbarrier); mygen = ngen; nbarrier++; cache_flushMem(&nbarrier, sizeof(nbarrier)); for (;;) { if (ngen != mygen) break; if (nbarrier >= enetCorenum() -2) break; if (DEBUG) xprintf("%u: rgn_barrier wait %u %u %u\n", corenum(), enetCorenum()-2, ngen, nbarrier); icSema_V(sem_barrier_mutex); icSema_P((mygen & 1) ? sem_barrier_wait1 : sem_barrier_wait0); icSema_P(sem_barrier_mutex); cache_invalidateMem(&ngen, sizeof(ngen)); cache_invalidateMem(&nbarrier, sizeof(nbarrier)); if (DEBUG) xprintf("%u: rgn_barrier wait done %u %u %u\n", corenum(), enetCorenum()-2, ngen, nbarrier); } if (ngen == mygen) { nbarrier = 0; ngen++; cache_flushMem(&nbarrier, sizeof(nbarrier)); cache_flushMem(&ngen, sizeof(ngen)); } if (DEBUG) xprintf("%u: rgn_barrier return %u %u %u\n", corenum(), enetCorenum()-2, ngen, nbarrier); icSema_V((mygen & 1) ? sem_barrier_wait1 : sem_barrier_wait0); icSema_V(sem_barrier_mutex); }
void mc_main(void) { xprintf("[%02u]: mc_main\n", corenum()); hw_barrier(); if (corenum() == 2) { int path[NRTOWNS]; // current (partial) tour path int visited[NRTOWNS]; // count of tours of particular length int best_path[NRTOWNS];// current best tour path int min; // cost of best tour path // initialization min = 10000; for (unsigned int i = 0; i < NRTOWNS; i++) visited[i] = 0; path[0] = 0; // starting town, we are finidng a cycle so just choose town 0 xprintf("[%02u]: Starting TSP ...\n", corenum()); const unsigned int start_cycle = *cycleCounter; tsp(1, 0, path, visited, best_path, &min); // find a min cost tour const unsigned int end_cycle = *cycleCounter; // print results xprintf("[%02u]: computation time (in CPU cycles): %u\n", corenum(), end_cycle - start_cycle); xprintf("[%02u]: shortest path length is %d\n", corenum(), min); xprintf("[%02u]: best path found: ", corenum()); for (unsigned int i = 0; i < NRTOWNS; i++) printf("%d ", best_path[i]); xprintf("\n"); xprintf("level\tvisited\n"); for (unsigned int i = 0; i < NRTOWNS; i++) printf("%d\t%d\n",i,visited[i]); } }
void test1(void (*barrier)(void), const char *type) { if (corenum() == 2) { xprintf("[%02u]: test1 for %s barrier start\n", corenum(), type); } barrier(); for (unsigned int i = 0; i < 10; i++) { barrier(); // record i went through c_[corenum()].instance++; cache_flushMem(&c_[corenum()], sizeof(struct state)); if (DEBUG) xprintf("[%02u]: passed barier and increased state %u\n", corenum(), i); barrier(); for (unsigned int j = 2; j < enetCorenum(); j++) { int jinstance; cache_invalidateMem(&c_[j], sizeof(struct state)); jinstance = c_[j].instance; if (jinstance != c_[corenum()].instance) { xprintf("[%02u]: %s barrier failed: my instance is %d and %d's is %d\n", corenum(), type, c_[corenum()].instance, j, jinstance); assert(0); } } } barrier(); if (corenum() == 2) { xprintf("[%02u]: test1 for %s barrier passed\n", corenum(), type); } }
void schedule(){ int i, done; int *pData = (int *)(corenum() * 512 + 0x4000); Task *pre_task = s_processor->cur_task; Task *task_to_run= rm_schedule(timer, s_processor); int t = timer%10; putchar(48+t); putchar(10); if(task_to_run!=NULL){ myprintf("%s\n",task_to_run->name); } timer++; for(i=0;i<3;i++) dump_task(s_processor->tasks+i); /* pre_task=s_tasks+currentTask;*/ //currentTask = (currentTask + 1) %3; //task_to_run=s_tasks+currentTask; if (task_to_run != pre_task){ if (pre_task != NULL){ //switch task //save previous task state for (i = 0; i < 32; i++){ pre_task->saved_state[i] = pData[i]; } } if (task_to_run != NULL){ //restore current task state for (i = 0; i < 32; i++){ pData[i] = task_to_run->saved_state[i]; //is_idle=0; } } /* else*/ //{ //pData[31]=idle; //is_idle=1; /*}*/ } }
void mc_main() { __asm__("j7 7"); int core = corenum(); if(core == 2){ wait_after_done = 0; s_processor = new_processor("Processor1"); s_tasks = new_tasks(names, 3); //puts(s_tasks[0].name); //puts(s_tasks[1].name); //puts(s_tasks[2].name); putchar(s_tasks[0].index+'0'); putchar(s_tasks[1].index+'0'); putchar(s_tasks[2].index+'0'); init_tasks(s_processor, s_tasks, 3); /* for(int i=0;i<3;i++)*/ /*{*/ /*j=0;*/ /*while(s_tasks[i].name[j])*/ /*putchar(s_tasks[i].name[j++]);*/ /*putchar(10);*/ /*}*/ wait_after_done = 1; // Called concurrently in all cores except core #1, after mc_init returns //set timer interrupt handler set_handlerR((int)isr); //set timer interrupt intervals set_timerInterval(CLOCK_TO_MS); //set interrupt enable putchar('f'); set_mask(0); task_3(); while (1); } else while(1); }
void mc_init(void) { xprintf("[%02u]: mc_init\n", corenum()); }
void display_topology(struct cpudata *head) { struct cpudata *cpu; unsigned int threads_per_socket; unsigned int i; char *sockets; int num_sockets = 0; /* For now, we only support topology parsing on Intel. */ if (head->vendor != VENDOR_INTEL) return; if (debug == 1) { cpu = head; printf("cpu->phys_proc_id: "); for (i = 0; i < nrCPUs; i++) { printf("%d, ", cpu->phys_proc_id); cpu = cpu->next; } printf("\n"); cpu = head; printf("cpu->x86_max_cores: "); for (i = 0; i < nrCPUs; i++) { printf("%d, ", cpu->x86_max_cores); cpu = cpu->next; } printf("\n"); cpu = head; printf("cpu->cpu_core_id: "); for (i = 0; i < nrCPUs; i++) { printf("%d, ", cpu->cpu_core_id); cpu = cpu->next; } printf("\n"); } sockets = malloc(nrCPUs); if (sockets == NULL) return; for (i = 0; i < nrCPUs; i++) sockets[i]=0; cpu = head; for (i = 0; i < nrCPUs; i++) { sockets[cpu->cpu_core_id]++; cpu = cpu->next; } for (i = 0; i < nrCPUs; i++) { if (debug == 1) printf("Socket %u: %u threads\n", i, (unsigned int) sockets[i]); if (sockets[i] != 0) /* only count populated sockets */ num_sockets++; } /* Print a topology summary */ cpu = head; printf("Total processor threads: %d\n", sockets[0] * num_sockets); printf("This system has %d ", num_sockets); threads_per_socket = sockets[0]; if (cpu->flags_edx & X86_FEATURE_HT) if (cpu->num_siblings > 1) threads_per_socket = sockets[0]/2; if (nrCPUs == 1) { /* Handle the single CPU case */ printf("processor"); } else { const char *p; p = corenum(threads_per_socket); if (strncmp("?", p, 1)) printf("%s-core processor", corenum(threads_per_socket)); else printf("%u-core processor", threads_per_socket); if (num_sockets > 1) printf("s"); } if (cpu->flags_edx & X86_FEATURE_HT && cpu->num_siblings > 1) printf(" with hyper-threading (%d threads per core)", cpu->num_siblings); free(sockets); }
void mc_main() { xprintf("[%02u]: mc_main\n", corenum()); test1(sm_barrier, "shared memory"); test1(hw_barrier, "hardware"); }