/* ----------------------------------------------------------------------- */ int main(int argc, char *argv[]) { int STEP, NUMTHREADS; double total_time; char *PARAM_NAMES[NUM_ARGS] = {"Size (in K)"}; char *TIMERS_NAMES[NUM_TIMERS] = {"Total_time" }; char *DEFAULT_VALUES[NUM_ARGS] = {"2048 K"}; NUMTHREADS = omp_get_max_threads(); OSCR_init (NUMTHREADS, "Quicksort", "Use 'qsort' <size (in K)>", NUM_ARGS, PARAM_NAMES, DEFAULT_VALUES , NUM_TIMERS, NUM_TIMERS, TIMERS_NAMES, argc, argv); SIZE = OSCR_getarg_int(1); if (SIZE > MAXSIZE) { printf("Size: %d Maximum size: %d\n", SIZE, MAXSIZE); exit(-1); } /* Default: DEFAULT_SIZE */ for (STEP = 0; STEP < NUM_STEPS; STEP++) { initialize(array, STEP); OSCR_timer_start(0); qs(array, 0, SIZE-1); OSCR_timer_stop(0); testit(array); } total_time = OSCR_timer_read(0); OSCR_report(1, TIMERS_NAMES); printf("\n \t# THREADS \tSIZE \tSTEPS \tTIME (secs.) \n"); printf("\t%d \t\t%d \t%d \t%14.6lf \n", NUMTHREADS, SIZE, NUM_STEPS, total_time); } /* main */
int main(int argc, char **argv){ double *u, *f, dx, dy; double dt, mflops; int NUMTHREADS; char *PARAM_NAMES[NUM_ARGS] = {"Grid dimension: X dir =", "Grid dimension: Y dir =", "Helmhotlz constant =", "Successive over-relaxation parameter =", "error tolerance for iterative solver =", "Maximum iterations for solver ="}; char *TIMERS_NAMES[NUM_TIMERS] = {"Total_time"}; char *DEFAULT_VALUES[NUM_ARGS] = {"5000", "5000", "0.8", "1.0", "1e-7", "1000"}; NUMTHREADS = omp_get_max_threads(); OSCR_init (NUMTHREADS, "Jacobi Solver v1", "Use 'jacobi01' <n> <m> <alpha> <relax> <tol> <mits>", NUM_ARGS, PARAM_NAMES, DEFAULT_VALUES , NUM_TIMERS, NUM_TIMERS, TIMERS_NAMES, argc, argv); n = OSCR_getarg_int(1); m = OSCR_getarg_int(2); alpha = OSCR_getarg_double(3); relax = OSCR_getarg_double(4); tol = OSCR_getarg_double(5); mits = OSCR_getarg_int(6); printf("-> %d, %d, %g, %g, %g, %d\n", n, m, alpha, relax, tol, mits); u = (double *) OSCR_malloc(n*m*sizeof(double)); f = (double *) OSCR_malloc(n*m*sizeof(double)); /* arrays are allocated and initialzed */ initialize(n, m, alpha, &dx, &dy, u, f); /* Solve Helmholtz eqiation */ OSCR_timer_start(0); jacobi(n, m, dx, dy, alpha, relax, u,f, tol, mits); OSCR_timer_stop(0); dt = OSCR_timer_read(0); // printf(" elapsed time : %12.6f\n", dt); mflops = (0.000001*mits*(m-2)*(n-2)*13) / dt; // printf(" MFlops : %12.6g (%d, %d, %d, %g)\n",mflops, mits, m, n, dt); error_check(n, m, alpha, dx, dy, u, f); OSCR_report(1, TIMERS_NAMES); return 0; }
/* * * PARALLEL LOOP * */ void loop(int nthreads, int size, int numiter) { /* VARIABLES */ int i,iter; /* DECLARE VECTOR AND ANCILLARY DATA STRUCTURES */ double *V=NULL; double *oldV=NULL; int totalSize = size*nthreads; V = (double *)OSCR_calloc(totalSize, sizeof(double)); oldV = (double *)OSCR_calloc(totalSize, sizeof(double)); /* 1. INITIALIZE VECTOR */ for (i=0; i<totalSize; i++) { V[i]= 0.0 + i; } /* 2. START TIMER */ OSCR_timer_start(0); /* 3. ITERATIONS LOOP */ for(iter=0; iter<numiter; iter++) { /* 3.1. DUPLICATE THE FULL ARRAY IN PARALLEL */ #pragma omp parallel for default(none) shared(V,oldV,totalSize) private(i) schedule(static) for (i=0; i<totalSize; i++) { oldV[i] = V[i]; } /* 3.2. INNER LOOP: PROCESS ELEMENTS IN PARALLEL */ #pragma omp parallel for default(none) shared(V,oldV,totalSize) private(i) schedule(static) for (i=0; i<totalSize-1; i++) { V[i] = f(V[i],oldV[i+1]); } /* 3.3. END ITERATIONS LOOP */ } /* 4. STOP TIMER */ OSCR_timer_stop(0); /* 5. WRITE VECTOR (DEBUG) */ #ifdef DEBUG #include "debug_V.c" #endif /* 6. END */ }
/* ----------------------------------------------------------------------- IMPLEMENTATION * ----------------------------------------------------------------------- */ int main(int argc, char **argv) { int i, j, NUMTHREADS; long inside, /* no. of points inside the Mandelbrot set */ outside; /* no. of points outside the Mandelbrot set */ double area, error, ztemp, total_time; complex z; char *PARAM_NAMES[NUM_ARGS] = {"Number of points"}; char *TIMERS_NAMES[NUM_TIMERS] = {"Total_time"}; char *DEFAULT_VALUES[NUM_ARGS] = {"4092"}; NUMTHREADS = omp_get_max_threads(); OSCR_init (NUMTHREADS, "Mandelbrot set area", "Use 'mandel' <Number of points>", NUM_ARGS, PARAM_NAMES, DEFAULT_VALUES , NUM_TIMERS, NUM_TIMERS, TIMERS_NAMES, argc, argv); NPOINTS = OSCR_getarg_int(1); /* Default: DEFAULT_NPOINTS */ points = (complex *)OSCR_calloc(NPOINTS, sizeof(complex)); NUMTHREADS = omp_get_max_threads(); /*1. Generate NPOINTS random points in the complex plane */ srandom(31416); for (i = 0; i < NPOINTS; i++) { points[i].re = -2.0 + 2.5 * random() / OSCR_RAND_MAX; points[i].im = 1.125 * random() / OSCR_RAND_MAX; } /* * 2. Monte Carlo sampling * 2a. Outer loop runs over NPOINTS, initialise z=c * 2b. Inner loop has the iteration z=z*z+c, and threshold test */ OSCR_timer_start(0); outside = 0; #pragma omp parallel for default(none) reduction(+:outside) \ private(i, j, ztemp, z) shared(NPOINTS, points) for(i = 0; i < NPOINTS; i++) { z.re = points[i].re; z.im = points[i].im; for (j = 0; j < MAXITER; j++) { ztemp = (z.re * z.re) - (z.im * z.im) + points[i].re; z.im = z.re * z.im * 2 + points[i].im; z.re = ztemp; if (z.re * z.re + z.im * z.im > THRESOLD) { outside++; break; } } /* for j */ } /* for i */ inside = (long)NPOINTS - outside; /*3. Calculate area and error */ /* The area is proportional to 2 * the area of the rectangle * no. of points inside it */ /* The error is inversely proportional to the square root of the number of test cases */ area = 2.0 * (2.5 * 1.125) * inside / NPOINTS; error = area / sqrt(NPOINTS); OSCR_timer_stop(0); total_time = OSCR_timer_read(0); /* 4. Output the Results */ OSCR_report(1, TIMERS_NAMES); printf("\n \t# THREADS NPOINTS AREA \t\t\tERROR \t\tTIME (secs.)\n"); // printf("\t%d \t%d \t%16.12f %16.12f \t%lf\n", NUMTHREADS, NPOINTS, area, error, total_time); printf("\t%d \t%d \t%16.12f %16.12f \t\n", NUMTHREADS, NPOINTS, area, error); return 0; }
/** main function with initialization, command line argument parsing, * memory allocation, OpenMP setup, wall--clock time measurement. */ int main(int argc, char *argv[]) { std::vector < int >myVec; int numThreads; int numEntries; int switchThresh; char *PARAM_NAMES[NUM_ARGS] = {"Number of integer to sort:", "Number of threads:", "SwitchThresh:"}; char *TIMERS_NAMES[NUM_TIMERS] = {"Total_time" }; char *DEFAULT_VALUES[NUM_ARGS] = {"10000000", "1", "1000"}; /* used for time measurements */ double accTime; numThreads = omp_get_max_threads(); OSCR_init (numThreads, "QuickSort", "", NUM_ARGS, PARAM_NAMES, DEFAULT_VALUES , NUM_TIMERS, NUM_TIMERS, TIMERS_NAMES, argc, argv); numEntries = OSCR_getarg_int(1); numThreads = OSCR_getarg_int(2); switchThresh = OSCR_getarg_int(3); /* and run with the specified number of threads */ omp_set_num_threads(numThreads); /* initialize random number generator to fixed seed. this is done, so * that every run of the algorithm is sorting the exact same vector. * this way, we can compare runs easily */ //std::srand( std::time(0) ); std::srand(123); /* Reserve sufficient capacity for vector once and for all */ myVec.reserve(myVec.size() + numEntries); /* fill the vector with random numbers */ for (int i = 0; i < numEntries; ++i) { myVec.push_back(std::rand()); } /* Start measuring the time */ OSCR_timer_start(0); /* sort vector in parallel */ # pragma omp parallel shared (myVec, switchThresh, numThreads) { # pragma intel omp taskq { # pragma intel omp task { myQuickSort(myVec, 0, myVec.size() - 1, switchThresh); } } } /* Finish time measurement */ OSCR_timer_stop(0); /* calculate elapsed time */ accTime = OSCR_timer_read(0); /* determine and print out, whether or not the vector was sorted ok */ if (vectorValidate(myVec)) std::cout << "\nSuccess, wall-clock time: " << accTime << "\n\n"; else std::cout << "\nSorting FAILED!" << "\n\n"; OSCR_report(); return 0; }
/* * * PARALLEL LOOP * */ void loop(int nthreads, int size, int numiter) { /* VARIABLES */ int i,iter; int thread; int limitL, limitR; /* DECLARE VECTOR AND ANCILLARY DATA STRUCTURES */ double *V=NULL; double border; int totalSize = size*nthreads; V = (double *)OSCR_calloc(totalSize, sizeof(double)); /* 1. INITIALIZE VECTOR */ for (i=0; i<totalSize; i++) { V[i]= 0.0 + i; } /* 2. GET TIMER */ OSCR_timer_start(0); /* 3. ITERATIONS LOOP */ for(iter=0; iter<numiter; iter++) { /* 3.1. PROCESS IN PARALLEL */ #pragma omp parallel default(none) shared(V,size,nthreads,numiter) private(iter,thread,limitL,limitR,border,i) { /* 3.1.1. GET NUMBER OF THREAD */ thread = omp_get_thread_num(); /* 3.1.2. COMPUTE LIMIT INDEX */ limitL = thread*size; limitR = (thread+1)*size-1; /* 3.1.3. COPY OTHER THREADS's NEIGHBOR ELEMENT */ if (thread != nthreads) border = V[limitR+1]; /* 3.1.4. SYNCHRONIZE BEFORE UPDATING LOCAL PART */ #pragma omp barrier /* 3.1.5. COMPUTE LOCAL UPDATES */ for (i=limitL; i<limitR; i++) { V[i] = f( V[i], V[i+1] ); } /* 3.1.6. COMPUTE LAST ELEMENT (EXCEPT LAST THREAD) */ if (thread != nthreads-1) V[limitR] = f( V[limitR], border ); /* 3.1.7. END PARALLEL REGION */ } /* 3.2. END ITERATIONS LOOP */ } /* 4. STOP TIMER */ OSCR_timer_stop(0); /* 5. WRITE VECTOR (DEBUG) */ #ifdef DEBUG #include "debug_V.c" #endif /* 6. END */ }
/** main function with initialization, command line argument parsing, * memory allocation, OpenMP setup, wall--clock time measurement. */ int main(int argc, char *argv[]) { std::vector < int >myVec; std::stack < std::pair < int, int > >globalTodoStack; int numThreads; int numEntries; int switchThresh; char *PARAM_NAMES[NUM_ARGS] = {(char *)"Number of integer to sort:", (char *)"Number of threads:", (char *)"SwitchThresh:"}; char *TIMERS_NAMES[NUM_TIMERS] = {(char *)"Total_time" }; char *DEFAULT_VALUES[NUM_ARGS] = {(char *)"100", (char *)"2", (char *)"10"}; /* this number indicates, how many threads are doing useful work atm. */ int numBusyThreads = 1; /* used for time measurements */ double accTime; /* used for performance measurements */ std::vector < int >globalStackWrite; numThreads = omp_get_max_threads(); OSCR_init (numThreads, (char *)"QuickSort", (char *)"", NUM_ARGS, PARAM_NAMES, DEFAULT_VALUES , NUM_TIMERS, NUM_TIMERS, TIMERS_NAMES, argc, argv); numEntries = OSCR_getarg_int(1); numThreads = OSCR_getarg_int(2); switchThresh = OSCR_getarg_int(3); /* initialize the performance measures */ for (int i = 0; i < numThreads; ++i) { globalStackWrite.push_back(0); } /* and run with the specified number of threads */ omp_set_num_threads(numThreads); /* initialize random number generator to fixed seed. this is done, so * that every run of the algorithm is sorting the exact same vector. * this way, we can compare runs easily */ //std::srand( std::time(0) ); std::srand(123); /* Reserve sufficient capacity for vector once and for all */ myVec.reserve(myVec.size() + numEntries); /* fill the vector with random numbers */ for (int i = 0; i < numEntries; ++i) { myVec.push_back(std::rand()); } /* Start measuring the time */ OSCR_timer_start(0); /* sort vector in parallel */ # pragma omp parallel shared(myVec, globalTodoStack, numThreads, \ switchThresh, numBusyThreads, globalStackWrite) { /* start sorting with only one thread, the others wait for the stack * to fill up */ if (0 == omp_get_thread_num()) { myQuickSort(myVec, 0, myVec.size() - 1, switchThresh, globalTodoStack, numBusyThreads, numThreads, globalStackWrite); } else { myQuickSort(myVec, 0, 0, switchThresh, globalTodoStack, numBusyThreads, numThreads, globalStackWrite); } } /* Finish time measurement */ OSCR_timer_stop(0); /* calculate elapsed time */ accTime = OSCR_timer_read(0); /* determine and print out, whether or not the vector was sorted ok */ if (vectorValidate(myVec)) std::cout << "\nSuccess, wall-clock time: " << accTime << "\n\n"; else std::cout << "\nSorting FAILED!" << "\n\n"; int globalStackWriteSum = 0; /* sum up and print out all performance measures */ for (int i = 0; i < numThreads; ++i) { globalStackWriteSum += globalStackWrite[i]; std::cout << i << ".: gSW: " << globalStackWrite[i] << "\n"; } std::cout << std:: endl << "Total: gSW: " << globalStackWriteSum << "\n\n"; OSCR_report(); return 0; }
/* * * Graph search. Test if exists a path from a source node to a target node * * Parallelization method: Shared-Memory workers-farm * */ void testPath(int nthreads, int source, int target, tg graph) { /* SHARED STRUCTURES */ Bool *searched=NULL; Astack pool; Bool found = FALSE; int ind; /* ENDING CONTROL */ int num_waiting=0; /* 1. ALLOCATE MEMORY FOR ANCILLARY STRUCTURES */ pool = Ast_init(); searched = OSCR_calloc(tg_nodes(graph), sizeof(Bool)); for (ind=0; ind<tg_nodes(graph); ind++) { searched[ind]=FALSE; } /* 2. INIT "nodes to explore" POOL WITH THE source ID */ Ast_push(pool, source); /* 3. START TIMER */ OSCR_timer_start(0); /* 4. SPAWN WORKERS */ #pragma omp parallel default(none) \ shared(nthreads,num_waiting,graph,searched,pool,target,found) { Bool waiting = FALSE; tg_task next=TG_NULLID; task_list succs; int num_succs; int ind; #ifdef DEBUG int numPops=0; int numNoPops=0; int thread = omp_get_thread_num(); #endif /* WORKER WORKS UNTIL: * ALL WORKERS ARE WAITING (TARGET NOT FOUND) * OR SOMEONE FINDS THE TARGET */ while ( num_waiting != nthreads && !found ) { /* 1. GET NEXT ELEMENT TO PROCESS (OR WAIT UNTIL MORE ELEMENTS) */ while( next == TG_NULLID && num_waiting != nthreads && !found) { /* ALL POOL OPERATIONS ARE MONITORIZED */ #pragma omp critical { /* 1.1. CHECK THE POOL */ if ( Ast_more(pool) ) { /* 1.1.1. ELEMENTS IN THE POOL: GET NEXT */ next = Ast_pop(pool); #ifdef DEBUG numPops++; #endif /* 1.1.2. IF WAITING, CHANGE STATE */ if ( waiting ) { waiting = FALSE; num_waiting--; } } else { /* 1.1.3. EMPTY POOL: IF NOT WAITING, CHANGE STATE */ #ifdef DEBUG numNoPops++; #endif if ( !waiting ) { waiting = TRUE; num_waiting++; } } /* OMP END CRITICAL: MONITORIZED OPERATION */ } } /* END GET next ELEMENT FROM THE POOL */ /* 2. PROCESS next ELEMENT */ if ( next != TG_NULLID ) { /* 2.1. TARGET FOUND: END ALL */ if (next == target) { found = TRUE; } /* 2.2. NO SUCCESORS: END */ else if ( tg_succ_num(graph, next) == 0 ) { next = TG_NULLID; } /* 2.3. GET SUCCESORS LIST AND PUSH IT TO THE POOL */ else { /* 2.3.1. GET SUCCS LIST */ num_succs = tg_succ_num(graph, next); succs = tg_succ(graph, next); /* 2.3.2. PUSH SUCCS TO POOL: MONITORIZED OPERATION */ #pragma omp critical if ( num_succs > 0 ) { for(ind=0; ind<num_succs; ind++) { tg_task vp = succs[ind]; /* PUSH ONLY NON-EXPLORED NODES */ if ( ! searched[ vp ] ) { searched[ vp ] = TRUE; Ast_push(pool, vp); } } /* END OMP CRITICAL: MONITORIZED OPERATION */ } } /* 2.4. END PROCESSING ELEMENT */ next = TG_NULLID; } } /* END PROCESSING */ #ifdef DEBUG printf("#DEBUG Thread %d ENDING ----> Pops: %d, NoPops: %d\n",thread,numPops,numNoPops); #endif /* WORKERS END: PARALLEL REGION */ } /* 5. STOP TIMER */ OSCR_timer_stop(0); /* 6. WRITE RESULT */ printf("\nPath(%d,%d) = %d\n\n", source, target, found); /* 7. END */ }
/* * * PARALLEL LOOP * */ void loop(int nthreads, int size, int numiter) { /* VARIABLES */ int i,iter; int thread; int limitL, limitR; /* DECLARE VECTOR AND ANCILLARY DATA STRUCTURES */ double *V=NULL; double border; int totalSize = size*nthreads; V = (double *)OSCR_calloc(totalSize, sizeof(double)); /* 1. INITIALIZE VECTOR */ for (i=0; i<totalSize; i++) { V[i]= 0.0 + i; } /* 2. START TIMER */ OSCR_timer_start(0); /* 3. PROCESS IN PARALLEL */ #pragma omp parallel default(none) shared(V,size,nthreads,numiter) private(iter,thread,limitL,limitR,border,i) { /* 3.1. GET MY NUMBER OF THREAD IN THE GROUP */ thread = omp_get_thread_num(); /* 3.2. COMPUTE MY LIMIT INDEX */ limitL = thread*size; limitR = (thread+1)*size-1; /* 3.3. ITERATIONS LOOP (+nthreads EXTRA ITER. TO EMPTY THE PIPELINE) */ for(iter=0; iter<(numiter+nthreads-1); iter++) { /* 3.3.1. COPY OTHER THREADS's NEIGHBOR ELEMENT */ if (thread != 0) border = V[limitL-1]; /* 3.3.2. SYNCHRONIZE BEFORE UPDATING LOCAL PART */ #pragma omp barrier /* 3.3.3. COMPUTE LOCAL UPDATES */ /* (ONLY ACTIVE THREADS - CHECK PIPELINE STAGE) */ if ( thread<=iter && thread>(iter-numiter) ) { /* 3.3.3.1. COMPUTE FIRST ELEMENT (EXCEPT THREAD 0) */ if (thread != 0) V[limitL] = f( V[limitL], border ); /* 3.3.3.2. COMPUTE THE REST OF ELEMENTS */ for (i=limitL+1; i<=limitR; i++) { V[i] = f( V[i], V[i-1] ); } } /* 3.3.4. SYNCHRONIZE BEFORE COPYING UPDATED BORDER ELEMENT */ #pragma omp barrier /* 3.3.5. END ITERATIONS LOOP */ } /* 3.4. END PARALLEL REGION */ } /* 4. STOP TIMER */ OSCR_timer_stop(0); /* 5. WRITE VECTOR (DEBUG) */ #ifdef DEBUG #include "debug_V.c" #endif /* 6. END */ }