void *work_thread (void *lp) { int task_id = *((int *) lp); int begin, end; struct timeval start, finish; int i; /*get the divided task*/ begin = (nsize * task_id) / task_num + 1; end = (nsize * (task_id + 1)) / task_num; if(task_id==0) gettimeofday (&start, NULL); fprintf (stderr, "thread %d: begin %d, end %d\n", task_id, begin, end); barrier (task_num); /* initialization */ if (task_id == 0) initRHS(nsize, begin, end); barrier (task_num); initResult(nsize, begin, end); barrier (task_num); /* Gauss Compute */ computeGauss(nsize, task_id); barrier (task_num); /* Gauss computation done */ if(task_id==0) { /* Since there are dependencies in computing equation stage the upper part need the results of upper part), it should be done by thread 0 solely. */ solveGauss(nsize); gettimeofday (&finish, NULL); printf ("Elapsed time: %.2f seconds\n", (((finish.tv_sec * 1000000.0) + finish.tv_usec) - ((start.tv_sec * 1000000.0) + start.tv_usec)) / 1000000.0); } }
int main(int argc, char *argv[]) { int i; struct timeval start, finish; double error; pthread_attr_t attr; pthread_t *tid; int *id; if (argc < 2) { fprintf(stderr, "usage: %s <matrixfile>\n", argv[0]); exit(-1); } // for getting the threads if(argc == 3) { task_num = strtol(argv[2], NULL, 10); } nsize = initMatrix(argv[1]); initRHS(nsize); initResult(nsize); // create threads id = (int *) malloc (sizeof (int) * task_num); tid = (pthread_t *) malloc (sizeof (pthread_t) * task_num); if (!id || !tid) errexit ("out of shared memory"); pthread_attr_init (&attr); pthread_attr_setscope (&attr, PTHREAD_SCOPE_SYSTEM); for (i = 1; i < task_num; i++) { id[i] = i; pthread_create (&tid[i], &attr, work_thread, &id[i]); } id[0]=0; work_thread(&id[0]); // wait for all threads to finish for (i = 1; i < task_num; i++) pthread_join (tid[i], NULL); solveGauss(nsize); error = 0.0; for (i = 0; i < nsize; i++) { double error__ = (X__[i]==0.0) ? 1.0 : fabs((X[i]-X__[i])/X__[i]); if (error < error__) { error = error__; } } fprintf(stdout, "Error: %e\n", error); }
int main(int argc, char **argv){ setbuf(stdout,0); int num = sysconf(_SC_NPROCESSORS_CONF); printf("system has %d processor(s)\n", num); cpu_set_t mask,get; CPU_ZERO(&mask); CPU_SET(0, &mask); int i,j ; int ite = 0; ite_converge = 0; hrtimer_converge = 0.0; int retcode,allone; void *retval; double hrtimer_tmp,hrtime_start,hrtime_end,l2normtotal; if ( argc != 4) {printf("[usage] SOR <matrix> <thread> <w>\n"); return 0;} if (pthread_setaffinity_np(pthread_self(), sizeof(mask), &mask) < 0) { fprintf(stderr, "set thread affinity failed\n"); } thread_num = atoi(argv[2]); w = atof(argv[3]); nsize = initMatrix(argv[1]); initRHS(nsize); initResult(nsize); int workload = (int)ceil((double)nsize/(double)thread_num) ; pthread_t thread[thread_num]; int *seq = (int *)malloc(thread_num*sizeof(int)); for(i=0;i<thread_num;i++) seq[i] = i; shuffle(seq,thread_num); for(j=1;j<thread_num;j++) { retcode = pthread_create(&thread[j], NULL, task, new int(j)); if (retcode != 0) fprintf (stderr, "create thread failed %d\n", retcode); } sense_reverse_barrier(0); // start at the same time hrtime_start = gethrtime_x86(); printf("Finish reading file. Start computing...\n"); while(converge == 0) { ticket_acquire(&flags[0].tlock); if(flags[0].test == 1) { solve(0,workload-1,0,1); flags[0].flag = 0; flags[0].test = 0; sense_reverse_barrier(0); } else { hrtimer_tmp = gethrtime_x86(); ite ++; solve(0,workload-1,0,0); hrtimer_compute[0] += gethrtime_x86() - hrtimer_tmp; } if(token == 0 && flags[0].flag == 1&&converge==0) { //observe all flags allone = 1; for(j=0;j<thread_num;j++) { allone = allone & flags[j].flag; } if(allone == 1) { ite_converge ++; shuffle(seq,thread_num); for(i=0;i<thread_num;i++) if(seq[i] != 0) { ticket_acquire(&flags[seq[i]].tlock); flags[seq[i]].test = 1; ticket_release(&flags[seq[i]].tlock); } hrtimer_tmp = gethrtime_x86(); solve(0,workload-1,0,1); hrtimer_converge += gethrtime_x86() - hrtimer_tmp; sense_reverse_barrier(0); l2normtotal = 0.0; for(j=0;j<thread_num;j++) l2normtotal += flags[j].l2norm; if(l2normtotal < EPS) { converge = 1; } else { token = (token + 1) % thread_num; } } } ticket_release(&flags[0].tlock); } printf ("#%d finished\n",0); iteration[0] = ite; for(j=1;j<thread_num;j++) { retcode = pthread_join(thread[j], &retval); iteration[j] = *(int *)retval; if (retcode != 0) fprintf (stderr, "join failed %d\n", retcode); } hrtime_end = gethrtime_x86(); CPU_ZERO(&get); if (pthread_getaffinity_np(pthread_self(), sizeof(get), &get) < 0) fprintf(stderr, "get thread affinity failed\n"); for (j = 0; j < 24; j++) if (CPU_ISSET(j, &get)) printf("thread %d is running in processor %d sched_getcpu %d\n", -1, j,sched_getcpu()); #ifdef VERIFY FILE *res; res = fopen ("ASOR result", "w"); for (i = 0; i < nsize; i++) { fprintf (res, "[%d] = %lf\n", i+1, X[i]); } #endif double total = hrtime_end-hrtime_start; printf("Total time:%.16lfs\n",total); printf("Computation time:%.16lfs\n",max(thread_num,hrtimer_compute)); printf("Convergence time:%.16lfs\n",hrtimer_converge); printf("Syn time:%.16lfs\n",total-max(thread_num,hrtimer_compute)-hrtimer_converge); for(i=0;i<thread_num;i++) printf("Iteration[%d]: %d\t\tComputation time:%.16lfs\n",i,iteration[i],hrtimer_compute[i]); printf("#Converge iteration:%d\n",ite_converge); printf("sizeof(cflag) = %lu\n",sizeof(cflag)); return 0; }
int main(int argc, char *argv[]) { int i; struct timeval start, finish; double error; pthread_attr_t attr; pthread_t *tid; int *id; //Ensure that the program takes two parameters // first param is the input matrix and the second is the number of processors for the parallel run if (argc != 3) { fprintf(stderr, "usage: %s <matrixfile> <#ofProcesses>\n", argv[0]); exit(-1); } task_num = atoi(argv[2]); nsize = initMatrix(argv[1]); initRHS(nsize); initResult(nsize); gettimeofday(&start, 0); // create threads id = (int *) malloc (sizeof (int) * task_num); tid = (pthread_t *) malloc (sizeof (pthread_t) * task_num); if (!id || !tid) errexit ("out of shared memory"); pthread_attr_init (&attr); pthread_attr_setscope (&attr, PTHREAD_SCOPE_SYSTEM); for (i = 1; i < task_num; i++) { id[i] = i; pthread_create (&tid[i], &attr, work_thread, &id[i]); } id[0]=0; work_thread(&id[0]); // wait for all threads to finish for (i = 1; i < task_num; i++) pthread_join (tid[i], NULL); gettimeofday(&finish, 0); solveGauss(nsize); fprintf(stdout, "Time: %f seconds\n", (finish.tv_sec - start.tv_sec) + (finish.tv_usec - start.tv_usec)*0.000001); error = 0.0; for (i = 0; i < nsize; i++) { double error__ = (X__[i]==0.0) ? 1.0 : fabs((X[i]-X__[i])/X__[i]); if (error < error__) { error = error__; } } fprintf(stdout, "Error: %e\n", error); //File output // float timeOutput = ((finish.tv_sec - start.tv_sec) + (finish.tv_usec - start.tv_usec)*0.000001); // FILE *pthreadTimingFile = fopen("pthread1Timing.txt", "a"); // fprintf(pthreadTimingFile, "%s %d %f %G\n", argv[1], task_num, timeOutput, error); // fclose(pthreadTimingFile); return 0; }