int main(int argc, char const *argv[]) { /* code */ double start, end; start = gethrtime_x86(); empty(); end = gethrtime_x86(); printf("Time: %0.8f\n", end-start ); return 0; }
void *work_thread(void *id){ int i,j,k; double pivotVal; double hrtime1, hrtime2; int task_id = *((int *) id); barrier(task_num); //wait for all threads to come and then start if(task_id == 0){ hrtime1 = gethrtime_x86(); } for(i=0; i<nsize; i++){ if(task_id == i % task_num){ getPivot(nsize,i); // select corresponding thread to find pivot in row } barrier(task_num); //wait for all threads finish pivotVal = matrix[i][i]; for (j = i + 1 ; j < nsize; j++){ if(task_id == j % task_num){ pivotVal = matrix[j][i]; matrix[j][i] = 0.0; for (k = i + 1 ; k < nsize; k++){ matrix[j][k] -= pivotVal * matrix[i][k]; } B[j] -= pivotVal * B[i]; } } barrier(task_num); } hrtime2 = gethrtime_x86(); if(task_id==0){ printf("Hrtime = %f seconds\n", hrtime2 - hrtime1); } return NULL; }
int main(int argc, char **argv){ setbuf(stdout,0); int num = sysconf(_SC_NPROCESSORS_CONF); printf("system has %d processor(s)\n", num); cpu_set_t mask,get; CPU_ZERO(&mask); CPU_SET(0, &mask); int i,j ; int ite = 0; ite_converge = 0; hrtimer_converge = 0.0; int retcode,allone; void *retval; double hrtimer_tmp,hrtime_start,hrtime_end,l2normtotal; if ( argc != 4) {printf("[usage] SOR <matrix> <thread> <w>\n"); return 0;} if (pthread_setaffinity_np(pthread_self(), sizeof(mask), &mask) < 0) { fprintf(stderr, "set thread affinity failed\n"); } thread_num = atoi(argv[2]); w = atof(argv[3]); nsize = initMatrix(argv[1]); initRHS(nsize); initResult(nsize); int workload = (int)ceil((double)nsize/(double)thread_num) ; pthread_t thread[thread_num]; int *seq = (int *)malloc(thread_num*sizeof(int)); for(i=0;i<thread_num;i++) seq[i] = i; shuffle(seq,thread_num); for(j=1;j<thread_num;j++) { retcode = pthread_create(&thread[j], NULL, task, new int(j)); if (retcode != 0) fprintf (stderr, "create thread failed %d\n", retcode); } sense_reverse_barrier(0); // start at the same time hrtime_start = gethrtime_x86(); printf("Finish reading file. Start computing...\n"); while(converge == 0) { ticket_acquire(&flags[0].tlock); if(flags[0].test == 1) { solve(0,workload-1,0,1); flags[0].flag = 0; flags[0].test = 0; sense_reverse_barrier(0); } else { hrtimer_tmp = gethrtime_x86(); ite ++; solve(0,workload-1,0,0); hrtimer_compute[0] += gethrtime_x86() - hrtimer_tmp; } if(token == 0 && flags[0].flag == 1&&converge==0) { //observe all flags allone = 1; for(j=0;j<thread_num;j++) { allone = allone & flags[j].flag; } if(allone == 1) { ite_converge ++; shuffle(seq,thread_num); for(i=0;i<thread_num;i++) if(seq[i] != 0) { ticket_acquire(&flags[seq[i]].tlock); flags[seq[i]].test = 1; ticket_release(&flags[seq[i]].tlock); } hrtimer_tmp = gethrtime_x86(); solve(0,workload-1,0,1); hrtimer_converge += gethrtime_x86() - hrtimer_tmp; sense_reverse_barrier(0); l2normtotal = 0.0; for(j=0;j<thread_num;j++) l2normtotal += flags[j].l2norm; if(l2normtotal < EPS) { converge = 1; } else { token = (token + 1) % thread_num; } } } ticket_release(&flags[0].tlock); } printf ("#%d finished\n",0); iteration[0] = ite; for(j=1;j<thread_num;j++) { retcode = pthread_join(thread[j], &retval); iteration[j] = *(int *)retval; if (retcode != 0) fprintf (stderr, "join failed %d\n", retcode); } hrtime_end = gethrtime_x86(); CPU_ZERO(&get); if (pthread_getaffinity_np(pthread_self(), sizeof(get), &get) < 0) fprintf(stderr, "get thread affinity failed\n"); for (j = 0; j < 24; j++) if (CPU_ISSET(j, &get)) printf("thread %d is running in processor %d sched_getcpu %d\n", -1, j,sched_getcpu()); #ifdef VERIFY FILE *res; res = fopen ("ASOR result", "w"); for (i = 0; i < nsize; i++) { fprintf (res, "[%d] = %lf\n", i+1, X[i]); } #endif double total = hrtime_end-hrtime_start; printf("Total time:%.16lfs\n",total); printf("Computation time:%.16lfs\n",max(thread_num,hrtimer_compute)); printf("Convergence time:%.16lfs\n",hrtimer_converge); printf("Syn time:%.16lfs\n",total-max(thread_num,hrtimer_compute)-hrtimer_converge); for(i=0;i<thread_num;i++) printf("Iteration[%d]: %d\t\tComputation time:%.16lfs\n",i,iteration[i],hrtimer_compute[i]); printf("#Converge iteration:%d\n",ite_converge); printf("sizeof(cflag) = %lu\n",sizeof(cflag)); return 0; }
static void * task(void *arg) { int index = *((int *) arg); cpu_set_t mask,get; CPU_ZERO(&mask); CPU_SET(index, &mask); int i,j,k,loop,allone; double l2normtotal; int counter = 0; int workload = (int)ceil((double)nsize/(double)thread_num) ; int start = workload*(index); int end = (index + 1)*workload-1; double hrtimer_tmp; int *seq = (int *)malloc(thread_num*sizeof(int)); for(i=0;i<thread_num;i++) seq[i] = i; if (pthread_setaffinity_np(pthread_self(), sizeof(mask), &mask) < 0) { fprintf(stderr, "set thread affinity failed\n"); } sense_reverse_barrier(index); int ite = 0; while(converge==0) { ticket_acquire(&flags[index].tlock); if(flags[index].test == 1) { solve(start,end,index,1); flags[index].flag = 0; flags[index].test = 0; sense_reverse_barrier(index); } else { hrtimer_tmp = gethrtime_x86(); ite ++; solve(start,end,index,0); hrtimer_compute[index] += gethrtime_x86() - hrtimer_tmp; } if(token == index && flags[index].flag == 1&&converge==0) { //observe all flags allone = 1; for(j=0;j<thread_num;j++) { allone = allone & flags[j].flag; } if(allone == 1) { ite_converge ++; shuffle(seq,thread_num); for(i=0;i<thread_num;i++) if(seq[i] != index) { ticket_acquire(&flags[seq[i]].tlock); flags[seq[i]].test = 1; ticket_release(&flags[seq[i]].tlock); } hrtimer_tmp = gethrtime_x86(); solve(start,end,index,1); hrtimer_converge += gethrtime_x86() - hrtimer_tmp; sense_reverse_barrier(index); l2normtotal = 0.0; for(j=0;j<thread_num;j++) { l2normtotal += flags[j].l2norm; } if(l2normtotal < EPS) { converge = 1; } else { token = (token + 1) % thread_num; } } } ticket_release(&flags[index].tlock); } printf ("#%d finished\n",index); CPU_ZERO(&get); if (pthread_getaffinity_np(pthread_self(), sizeof(get), &get) < 0) { fprintf(stderr, "get thread affinity failed\n"); } for (j = 0; j < 24; j++) { if (CPU_ISSET(j, &get)) { printf("thread %d is running in processor %d sched_getcpu %d\n", index, j,sched_getcpu()); } } return new int(ite); }