void CPU::dma_transfer(Registers::DMA& dma) { unsigned size = dma.control.size ? Word : Half; unsigned seek = dma.control.size ? 4 : 2; sequential() = false; do { step(bus.speed(dma.run.source, size)); uint32 word = bus.read(dma.run.source, size); step(bus.speed(dma.run.target, size)); bus.write(dma.run.target, size, word); sequential() = true; switch(dma.control.sourcemode) { case 0: dma.run.source += seek; break; case 1: dma.run.source -= seek; break; } switch(dma.control.targetmode) { case 0: dma.run.target += seek; break; case 1: dma.run.target -= seek; break; case 3: dma.run.target += seek; break; } } while(--dma.run.length); sequential() = false; if(dma.control.targetmode == 3) dma.run.target = dma.target; if(dma.control.repeat == 1) dma.run.length = dma.length; if(dma.control.repeat == 0) dma.control.enable = false; }
int main(int argc, char *argv[]) { extern char *optarg; extern int optind; int ch, seq; seq = 0; while ((ch = getopt(argc, argv, "d:s")) != -1) switch(ch) { case 'd': delimcnt = tr(delim = optarg); break; case 's': seq = 1; break; case '?': default: usage(); } argc -= optind; argv += optind; if (!delim) { delimcnt = 1; delim = "\t"; } if (seq) sequential(argv); else parallel(argv); exit(0); }
void* parallel( void *thread_num ) { // Sync and wait for all threads to be created and execution can start pthread_mutex_lock( &sync_mutex ); int thread = thread_count++; int offset = thread * chunk; pthread_cond_signal( &main_cv ); pthread_cond_wait( &sync_cv, &sync_mutex ); pthread_mutex_unlock( &sync_mutex ); sequential( input + offset, chunk, pOutput + offset ); partials[thread] = pOutput[offset + chunk - 1]; pthread_mutex_lock( &sync_mutex ); thread_count--; if( thread_count == 0) { pthread_cond_signal( &main_cv); } pthread_cond_wait( &sync_cv, &sync_mutex ); pthread_mutex_unlock( &sync_mutex ); int i; if( thread == 0 ) pthread_exit(0); for( i = 0; i < chunk; ++i) { pOutput[offset+i] += bias[thread-1]; } pthread_exit(0); }
int main(int argc, char *argv[]) { struct fdescr *dsc; Rune *delim; size_t i, len; int seq = 0, ret = 0; char *adelim = "\t"; ARGBEGIN { case 's': seq = 1; break; case 'd': adelim = EARGF(usage()); break; default: usage(); } ARGEND; if (!argc) usage(); /* populate delimiters */ unescape(adelim); delim = ereallocarray(NULL, utflen(adelim) + 1, sizeof(*delim)); len = utftorunestr(adelim, delim); if (!len) usage(); /* populate file list */ dsc = ereallocarray(NULL, argc, sizeof(*dsc)); for (i = 0; i < argc; i++) { if (!strcmp(argv[i], "-")) { argv[i] = "<stdin>"; dsc[i].fp = stdin; } else if (!(dsc[i].fp = fopen(argv[i], "r"))) { eprintf("fopen %s:", argv[i]); } dsc[i].name = argv[i]; } if (seq) { sequential(dsc, argc, delim, len); } else { parallel(dsc, argc, delim, len); } for (i = 0; i < argc; i++) if (dsc[i].fp != stdin && fshut(dsc[i].fp, argv[i])) ret |= fshut(dsc[i].fp, argv[i]); ret |= fshut(stdin, "<stdin>") | fshut(stdout, "<stdout>"); return ret; }
void parallel(MPI_Comm comm) { int id=0, numprocs, value=42; double sum=0.0, gsum=0.0; ELG_USER_START("parallel"); MPI_Comm_size(comm, &numprocs); MPI_Comm_rank(comm, &id); MPI_Bcast(&value, 1, MPI_INT, numprocs-1, comm); sum += sequential(id); MPI_Barrier(comm); sum += sequential(id); MPI_Barrier(comm); sum += sequential(id); MPI_Allreduce (&sum, &gsum, 1, MPI_DOUBLE, MPI_SUM, comm); p2p(comm); ELG_USER_END("parallel"); }
void parallel(MPI_Comm comm) { int id=0, numprocs, value=42; double sum=0.0, gsum=0.0; VT_begin(4); MPI_Comm_size(comm, &numprocs); MPI_Comm_rank(comm, &id); MPI_Bcast(&value, 1, MPI_INT, numprocs-1, comm); sum += sequential(id); MPI_Barrier(comm); sum += sequential(id); MPI_Barrier(comm); sum += sequential(id); MPI_Allreduce (&sum, &gsum, 1, MPI_DOUBLE, MPI_SUM, comm); p2p(comm); VT_end(4); }
int main(int argc, char *argv[]) { int ch, rval, seq; wchar_t *warg; const char *arg; size_t len; setlocale(LC_CTYPE, ""); seq = 0; while ((ch = getopt(argc, argv, "d:s")) != -1) switch(ch) { case 'd': arg = optarg; len = mbsrtowcs(NULL, &arg, 0, NULL); if (len == (size_t)-1) err(1, "delimiters"); warg = malloc((len + 1) * sizeof(*warg)); if (warg == NULL) err(1, NULL); arg = optarg; len = mbsrtowcs(warg, &arg, len + 1, NULL); if (len == (size_t)-1) err(1, "delimiters"); delimcnt = tr(delim = warg); break; case 's': seq = 1; break; case '?': default: usage(); } argc -= optind; argv += optind; if (*argv == NULL) usage(); if (!delim) { delimcnt = 1; delim = tab; } if (seq) rval = sequential(argv); else rval = parallel(argv); exit(rval); }
void ompss(int h, int j) { #pragma analysis_check assert assert defined(square[h][j]) \ upper_exposed(square[h][j], square[h-1][j], square[h][j], square[h][j-1], h, j) sequential(h, j); }
static Value parallel(ExPolicy && policy, InIter first, InIter last, InIter in) { return sequential(policy, first, last, in); }
static hpx::util::unused_type parallel(ExPolicy && policy, InIter first, InIter last, T && init) { return hpx::util::void_guard<result_type>(), sequential(policy, first, last, init); }
int main(void) { struct DSQueue *job_queue, *results_queue; pthread_t *producers, *consumers; struct job *prod_jobs, *cons_jobs; int i; int *result, result_sum; if (SEQUENTIAL) { sequential(); exit(0); } /* If there is a bad value from sysconf, just assume 1. */ cpus = (int) sysconf(_SC_NPROCESSORS_ONLN); cpus = cpus == 0 ? 1 : cpus; /* Compute the number of producer/consumer threads to start. */ num_producers = num_producers == 0 ? cpus : num_producers; num_consumers = num_consumers == 0 ? cpus : num_consumers; /* Initialize thread safe queues. `job_queue` has a capacity equivalent * to the buffer size set by the user. `results_queue` always has a capacity * equivalent to the number of consumer threads so that every consumer * can send a value to `results_queue` without blocking. */ job_queue = ds_queue_create(BUFFER); results_queue = ds_queue_create(num_consumers); assert(producers = malloc(num_producers * sizeof(*producers))); assert(consumers = malloc(num_consumers * sizeof(*consumers))); assert(prod_jobs = malloc(num_producers * sizeof(*prod_jobs))); assert(cons_jobs = malloc(num_consumers * sizeof(*cons_jobs))); /* Create `num_producers` jobs and threads. * Similarly for `num_consumers`. */ for (i = 0; i < num_producers; i++) { prod_jobs[i] = job_create(i, job_queue, results_queue); pthread_create(&(producers[i]), NULL, producer, (void*) &(prod_jobs[i])); } for (i = 0; i < num_consumers; i++) { cons_jobs[i] = job_create(i, job_queue, results_queue); pthread_create(&(consumers[i]), NULL, consumer, (void*) &(cons_jobs[i])); } /* Wait for all of the producers to finish producing. */ for (i = 0; i < num_producers; i++) assert(0 == pthread_join(producers[i], NULL)); /* Now that the producers are done, no more values will be sent to * `job_queue`, and therefore, it should be closed. (Values can still * be read from a closed queue.) */ ds_queue_close(job_queue); /* Free the producer jobs. */ for (i = 0; i < num_producers; i++) free(prod_jobs[i].id); free(prod_jobs); /* Now wait for the consumers to finish consuming. */ for (i = 0; i < num_consumers; i++) assert(0 == pthread_join(consumers[i], NULL)); /* Now that the consumers are done, no more values will be sent to * `results_queue`, and therefore, it should be closed. */ ds_queue_close(results_queue); /* Free the consumer jobs. */ for (i = 0; i < num_consumers; i++) free(cons_jobs[i].id); free(cons_jobs); /* Read all values in `results_queue`, and sum them up to get the total * number of consumed items. */ result_sum = 0; while (NULL != (result = (int*) ds_queue_pop(results_queue))) { result_sum += *result; free(result); } /* Print out the total number of produced and consumed items. * In this example, these numbers should always be equal. * N.B. Watch out for integer division! */ printf("Total produced: %d\n", (JOBS / num_producers) * num_producers); printf("Total consumed: %d\n", result_sum); free(consumers); free(producers); ds_queue_free(job_queue); ds_queue_free(results_queue); return 0; }
int main( int argc, char **argv ) { void *status; struct timespec start, stop; int sTime, pTime; int padding, size; sscanf( argv[1], "%d", &n ); sscanf( argv[2], "%d", &numProc ); padding = n % numProc; size = sizeof(int) * (n + padding); chunk = ( n + padding ) / numProc; input = (int*) malloc( size ); sOutput = (int*) malloc( size ); pOutput = (int*) malloc( size ); bias = (int*) malloc( sizeof(int) * numProc ); partials = (int*) malloc( sizeof(int) * numProc ); memset( input , 0, size ); memset( sOutput , 0, size ); memset( pOutput , 0, size ); memset( bias , 0, sizeof(int) * numProc ); memset( partials , 0, sizeof(int) * numProc ); //int *pOutput = new int[n] int i; for( i = 0; i < n; i++ ) { input[i] = abs(rand() % 1000); } //print_scan( input, n ); // Sequential clock_gettime( CLOCK_MONOTONIC, &start ); sequential( input, n, sOutput ); clock_gettime( CLOCK_MONOTONIC, &stop ); sTime = stop.tv_nsec - start.tv_nsec;; printf("Sequential version took: %d nanoseconds\n", sTime ); // Parallel Setup pthread_t *threads = (pthread_t*) malloc( sizeof(pthread_t) * numProc ); pthread_cond_init (&sync_cv, NULL); pthread_cond_init (&main_cv, NULL); pthread_mutex_init(&sync_mutex, NULL); pthread_attr_init(&attr); pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE); for( i = 0; i < numProc; ++i ){ pthread_create( &threads[i], &attr, parallel, NULL ); } pthread_attr_destroy(&attr); pthread_mutex_lock( &sync_mutex ); while( thread_count < numProc ){ pthread_cond_wait( &main_cv, &sync_mutex ); } // Parallel execution clock_gettime( CLOCK_MONOTONIC, &start ); pthread_cond_broadcast( &sync_cv ); pthread_cond_wait( &main_cv, &sync_mutex ); sequential( partials, numProc, bias ); pthread_cond_broadcast( &sync_cv ); pthread_mutex_unlock( &sync_mutex ); for( i = 0; i < numProc; ++i ){ pthread_join( threads[i], &status ); } clock_gettime( CLOCK_MONOTONIC, &stop ); pTime = stop.tv_nsec - start.tv_nsec; printf( "Parallel version took: %d nanoseconds\n", pTime ); //print_scan( sOutput, n ); //print_scan( pOutput, n ); for( i = 0; i < n; ++i ){ if( sOutput[i] != pOutput[i] ){ printf( "Output does not match!" ); } } printf( "Speedup = %f \n", (sTime * 1.0) / (pTime * 1.0) ); return 0; }
int main (int argc, char **argv) { if (argc!=8 && argc!=12) { printf("Usage: ./a.out #threads m T t FILE θ enable/disable xmin ymin len Len\nExiting...\n"); exit(1); } THREADS = atoi(argv[1]); MASS = atof(argv[2]); STEPS = atoi(argv[3]); TIME = atof(argv[4]); THETA = atoi(argv[6]); if (strcmp(argv[7], "enable")==0) { if (argc!=12) { printf("Miss 4 arguments.\nExiting...\n\n"); exit(1); } XMIN = atof(argv[8]); YMIN = atof(argv[9]); CLEN = atof(argv[10]); WLEN = atof(argv[11]); } else if (strcmp(argv[7], "disable")==0) { en = 0; printf("Xwindow disabled.\n"); } else { printf("please enter 'enable' or 'disable'.\nExiting...\n\n"); exit(1); } // planet *planets; FILE *file = fopen(argv[5], "r"); dev = fopen("d.txt", "w"); if (file==0) { printf("Testcase missing.\nExiting...\n\n"); exit(1); } else { fscanf(file, "%d", &STARS); planets = (planet*)malloc(sizeof(planet)*STARS); int i; for (i=0; i<STARS; i++) { fscanf(file, "%lf %lf %lf %lf", &planets[i].x, &planets[i].y, &planets[i].vx, &planets[i].vy); if (planets[i].x>maxx) maxx = planets[i].x; if (planets[i].x<minx) minx = planets[i].x; if (planets[i].y>maxy) maxy = planets[i].y; if (planets[i].y<miny) miny = planets[i].y; } disx = maxx - minx; disy = maxy - miny; fclose(file); } /* Handle Xwindow */ if (en) { int screen; display = XOpenDisplay(NULL); if(display == NULL) { printf("cannot open display\nExiting...\n\n"); exit(1); } screen = DefaultScreen(display); win = XCreateSimpleWindow(display, RootWindow(display, screen), 0, 0, WLEN, WLEN, 0, BLACK, BLACK); XMapWindow(display, win); XSync(display, 0); gc = XCreateGC(display, win, 0, 0); XSetForeground(display, gc, WHITE); XFlush(display); } /* Sequential */ // fprintf(dev, "tree = %d\n", tree->num_body); // fprintf(dev, "tree: nw = %d, ne = %d, sw = %d, se = %d\n", tree->nw->num_body, tree->ne->num_body, tree->sw->num_body, tree->se->num_body); // fprintf(dev, "treenw: nw = %d, ne = %d, sw = %d, se = %d\n", tree->nw->nw->num_body, tree->nw->ne->num_body, tree->nw->sw->num_body, tree->nw->se->num_body); // fprintf(dev, "treene: nw = %d, ne = %d, sw = %d, se = %d\n", tree->ne->nw->num_body, tree->ne->ne->num_body, tree->ne->sw->num_body, tree->ne->se->num_body); // fprintf(dev, "treene: nw cxn = %.3lf, cyn = %.3lf\n", tree->ne->nw->cxn, tree->ne->nw->cyn); // fprintf(dev, "treenenw: nw = %d, ne = %d, sw = %d, se = %d\n", tree->ne->nw->nw->num_body, tree->ne->nw->ne->num_body, tree->ne->nw->sw->num_body, tree->ne->nw->se->num_body); // fprintf(dev, "treesw: nw = %d, ne = %d, sw = %d, se = %d\n", tree->sw->nw->num_body, tree->sw->ne->num_body, tree->sw->sw->num_body, tree->sw->se->num_body); // fprintf(dev, "root cxn = %.3lf, cyn = %.3lf\n", tree->cxn, tree->cyn); // fprintf(dev, "treenw: cxn = %.3lf, cyn = %.3lf\n", tree->nw->cxn, tree->nw->cyn); // fprintf(dev, "treenenw: cxn = %.3lf, cyn = %.3lf\n", tree->ne->nw->cxn, tree->ne->nw->cyn); struct timespec tstart, tstop; int i, j; clock_gettime(CLOCK_REALTIME, &tstart); for (i=0; i<STEPS; i++) { build_tree(); sequential(); freetree(tree); if (en) { XClearWindow(display, win); for (j=0; j<STARS; j++) { planet *p = &(planets[j]); XFillArc(display, win, gc, (p->x-XMIN)*WLEN/CLEN, WLEN-(p->y-YMIN)*WLEN/CLEN, 2, 2, 0, 360*64); } XFlush(display); usleep(5000); } } clock_gettime(CLOCK_REALTIME, &tstop); double sdur = 1000000*(tstop.tv_sec-tstart.tv_sec)+(tstop.tv_nsec-tstart.tv_nsec)/1000; printf("\nIt took %.5lf seconds to finish sequential caculation.\n", sdur/1000000); fclose(dev); return 0; }
int main(int argc, char * argv[]) { //checking for desired number of threads if (argc != 1) { if (strcmp(argv[1],"-t") || (threads = atoi(argv[2])) < 2) { printf("Usage: %s [-t <NUMBER_OF_THREADS>]\n Default Number of Threads: 2\n" , argv[0]); exit(-1); } printf("Pthreads and OpenMP Calculations will be done with %i threads.\n", threads); } struct timeval start, end; matrix firsts[MAX_TEST_CASES]; matrix seconds[MAX_TEST_CASES]; matrix results[MAX_TEST_CASES]; //parses all Testmatrices and stores them in two arrays // int num; //the actual number of testcases printf("Reading matrices from InputFile...\n"); for (int i = 0; i < MAX_TEST_CASES; ++i) { int end = parseMatrices(PATH_TO_TESTS, i, &firsts[i], &seconds[i]); if (end == 0) { printf("Parsing went wrong\n"); exit(0); } } FILE* performance; //first, sequential computation printf("Sequential calculation...\n"); gettimeofday(&start, NULL); for (int i = 0; i < MAX_TEST_CASES; ++i) { sequential(&firsts[i], &seconds[i], &results[i]); } gettimeofday(&end, NULL); //prints the results of sequential computation to file printf("Printing results...\n"); for (int i = 0; i < MAX_TEST_CASES; ++i) { if(!printMatrix(&results[i], PATH_TO_RESULTS)) { printf("Theres a problem with the output stream.\n"); exit(0); } } printf("Sequential calculation complete. Check %s for results\n" , PATH_TO_RESULTS); performance = fopen(PATH_TO_TIMES, "w"); fprintf(performance, "Sequential Implementation took %.3lf seconds for all testcases.\n" , getDifference(start, end)); //openMP Implementation is up next matrix ompresults[MAX_TEST_CASES]; printf("OpenMP calculation...\n"); gettimeofday(&start, NULL); for (int i = 0; i < MAX_TEST_CASES; ++i) { openMP(&(firsts[i]), &(seconds[i]), &(ompresults[i]), threads); } gettimeofday(&end, NULL); for (int i = 0; i < MAX_TEST_CASES; ++i) { if(!compareMatrices(&results[i], &ompresults[i])) { printf("OMP-Implementation has faults!"); exit(0); } } fprintf(performance, "OpenMP-Implementation took %.3lf seconds for all testcases.\n" , getDifference(start, end)); //lastly, pthreads implementation matrix ptresults[MAX_TEST_CASES]; printf("Posix-Threads calculation...\n"); gettimeofday(&start, NULL); for (int i = 0; i < MAX_TEST_CASES; ++i) { multithreaded(&firsts[i], &seconds[i], &ptresults[i], threads); } gettimeofday(&end, NULL); for (int i = 0; i < MAX_TEST_CASES; ++i) { if(!compareMatrices(&results[i], &ptresults[i])) { printf("Pthread-Implementation has faults!"); exit(0); } } fprintf(performance, "Pthreads-Implementation took %.3lf seconds for all testcases.\n" , getDifference(start, end)); //cleaning up printf("Cleaning up...\n"); fclose(performance); for (int i = 0; i < MAX_TEST_CASES; ++i) { free(firsts[i].values); free(seconds[i].values); free(results[i].values); free(ompresults[i].values); free(ptresults[i].values); } printf("All done, see %s for a summarization of each implementation's performance.\n" , PATH_TO_TIMES); exit(0); }