/* * * PARALLEL LOOP * */ void loop(int nthreads, int size, int numiter) { /* VARIABLES */ int i,iter; /* DECLARE VECTOR AND ANCILLARY DATA STRUCTURES */ double *V=NULL; double *oldV=NULL; int totalSize = size*nthreads; V = (double *)OSCR_calloc(totalSize, sizeof(double)); oldV = (double *)OSCR_calloc(totalSize, sizeof(double)); /* 1. INITIALIZE VECTOR */ for (i=0; i<totalSize; i++) { V[i]= 0.0 + i; } /* 2. START TIMER */ OSCR_timer_start(0); /* 3. ITERATIONS LOOP */ for(iter=0; iter<numiter; iter++) { /* 3.1. DUPLICATE THE FULL ARRAY IN PARALLEL */ #pragma omp parallel for default(none) shared(V,oldV,totalSize) private(i) schedule(static) for (i=0; i<totalSize; i++) { oldV[i] = V[i]; } /* 3.2. INNER LOOP: PROCESS ELEMENTS IN PARALLEL */ #pragma omp parallel for default(none) shared(V,oldV,totalSize) private(i) schedule(static) for (i=0; i<totalSize-1; i++) { V[i] = f(V[i],oldV[i+1]); } /* 3.3. END ITERATIONS LOOP */ } /* 4. STOP TIMER */ OSCR_timer_stop(0); /* 5. WRITE VECTOR (DEBUG) */ #ifdef DEBUG #include "debug_V.c" #endif /* 6. END */ }
/* ----------------------------------------------------------------------- IMPLEMENTATION * ----------------------------------------------------------------------- */ int main(int argc, char **argv) { int i, j, NUMTHREADS; long inside, /* no. of points inside the Mandelbrot set */ outside; /* no. of points outside the Mandelbrot set */ double area, error, ztemp, total_time; complex z; char *PARAM_NAMES[NUM_ARGS] = {"Number of points"}; char *TIMERS_NAMES[NUM_TIMERS] = {"Total_time"}; char *DEFAULT_VALUES[NUM_ARGS] = {"4092"}; NUMTHREADS = omp_get_max_threads(); OSCR_init (NUMTHREADS, "Mandelbrot set area", "Use 'mandel' <Number of points>", NUM_ARGS, PARAM_NAMES, DEFAULT_VALUES , NUM_TIMERS, NUM_TIMERS, TIMERS_NAMES, argc, argv); NPOINTS = OSCR_getarg_int(1); /* Default: DEFAULT_NPOINTS */ points = (complex *)OSCR_calloc(NPOINTS, sizeof(complex)); NUMTHREADS = omp_get_max_threads(); /*1. Generate NPOINTS random points in the complex plane */ srandom(31416); for (i = 0; i < NPOINTS; i++) { points[i].re = -2.0 + 2.5 * random() / OSCR_RAND_MAX; points[i].im = 1.125 * random() / OSCR_RAND_MAX; } /* * 2. Monte Carlo sampling * 2a. Outer loop runs over NPOINTS, initialise z=c * 2b. Inner loop has the iteration z=z*z+c, and threshold test */ OSCR_timer_start(0); outside = 0; #pragma omp parallel for default(none) reduction(+:outside) \ private(i, j, ztemp, z) shared(NPOINTS, points) for(i = 0; i < NPOINTS; i++) { z.re = points[i].re; z.im = points[i].im; for (j = 0; j < MAXITER; j++) { ztemp = (z.re * z.re) - (z.im * z.im) + points[i].re; z.im = z.re * z.im * 2 + points[i].im; z.re = ztemp; if (z.re * z.re + z.im * z.im > THRESOLD) { outside++; break; } } /* for j */ } /* for i */ inside = (long)NPOINTS - outside; /*3. Calculate area and error */ /* The area is proportional to 2 * the area of the rectangle * no. of points inside it */ /* The error is inversely proportional to the square root of the number of test cases */ area = 2.0 * (2.5 * 1.125) * inside / NPOINTS; error = area / sqrt(NPOINTS); OSCR_timer_stop(0); total_time = OSCR_timer_read(0); /* 4. Output the Results */ OSCR_report(1, TIMERS_NAMES); printf("\n \t# THREADS NPOINTS AREA \t\t\tERROR \t\tTIME (secs.)\n"); // printf("\t%d \t%d \t%16.12f %16.12f \t%lf\n", NUMTHREADS, NPOINTS, area, error, total_time); printf("\t%d \t%d \t%16.12f %16.12f \t\n", NUMTHREADS, NPOINTS, area, error); return 0; }
/* * * Graph search. Test if exists a path from a source node to a target node * * Parallelization method: Shared-Memory workers-farm * */ void testPath(int nthreads, int source, int target, tg graph) { /* SHARED STRUCTURES */ Bool *searched=NULL; Astack pool; Bool found = FALSE; int ind; /* ENDING CONTROL */ int num_waiting=0; /* 1. ALLOCATE MEMORY FOR ANCILLARY STRUCTURES */ pool = Ast_init(); searched = OSCR_calloc(tg_nodes(graph), sizeof(Bool)); for (ind=0; ind<tg_nodes(graph); ind++) { searched[ind]=FALSE; } /* 2. INIT "nodes to explore" POOL WITH THE source ID */ Ast_push(pool, source); /* 3. START TIMER */ OSCR_timer_start(0); /* 4. SPAWN WORKERS */ #pragma omp parallel default(none) \ shared(nthreads,num_waiting,graph,searched,pool,target,found) { Bool waiting = FALSE; tg_task next=TG_NULLID; task_list succs; int num_succs; int ind; #ifdef DEBUG int numPops=0; int numNoPops=0; int thread = omp_get_thread_num(); #endif /* WORKER WORKS UNTIL: * ALL WORKERS ARE WAITING (TARGET NOT FOUND) * OR SOMEONE FINDS THE TARGET */ while ( num_waiting != nthreads && !found ) { /* 1. GET NEXT ELEMENT TO PROCESS (OR WAIT UNTIL MORE ELEMENTS) */ while( next == TG_NULLID && num_waiting != nthreads && !found) { /* ALL POOL OPERATIONS ARE MONITORIZED */ #pragma omp critical { /* 1.1. CHECK THE POOL */ if ( Ast_more(pool) ) { /* 1.1.1. ELEMENTS IN THE POOL: GET NEXT */ next = Ast_pop(pool); #ifdef DEBUG numPops++; #endif /* 1.1.2. IF WAITING, CHANGE STATE */ if ( waiting ) { waiting = FALSE; num_waiting--; } } else { /* 1.1.3. EMPTY POOL: IF NOT WAITING, CHANGE STATE */ #ifdef DEBUG numNoPops++; #endif if ( !waiting ) { waiting = TRUE; num_waiting++; } } /* OMP END CRITICAL: MONITORIZED OPERATION */ } } /* END GET next ELEMENT FROM THE POOL */ /* 2. PROCESS next ELEMENT */ if ( next != TG_NULLID ) { /* 2.1. TARGET FOUND: END ALL */ if (next == target) { found = TRUE; } /* 2.2. NO SUCCESORS: END */ else if ( tg_succ_num(graph, next) == 0 ) { next = TG_NULLID; } /* 2.3. GET SUCCESORS LIST AND PUSH IT TO THE POOL */ else { /* 2.3.1. GET SUCCS LIST */ num_succs = tg_succ_num(graph, next); succs = tg_succ(graph, next); /* 2.3.2. PUSH SUCCS TO POOL: MONITORIZED OPERATION */ #pragma omp critical if ( num_succs > 0 ) { for(ind=0; ind<num_succs; ind++) { tg_task vp = succs[ind]; /* PUSH ONLY NON-EXPLORED NODES */ if ( ! searched[ vp ] ) { searched[ vp ] = TRUE; Ast_push(pool, vp); } } /* END OMP CRITICAL: MONITORIZED OPERATION */ } } /* 2.4. END PROCESSING ELEMENT */ next = TG_NULLID; } } /* END PROCESSING */ #ifdef DEBUG printf("#DEBUG Thread %d ENDING ----> Pops: %d, NoPops: %d\n",thread,numPops,numNoPops); #endif /* WORKERS END: PARALLEL REGION */ } /* 5. STOP TIMER */ OSCR_timer_stop(0); /* 6. WRITE RESULT */ printf("\nPath(%d,%d) = %d\n\n", source, target, found); /* 7. END */ }
/* * * PARALLEL LOOP * */ void loop(int nthreads, int size, int numiter) { /* VARIABLES */ int i,iter; int thread; int limitL, limitR; /* DECLARE VECTOR AND ANCILLARY DATA STRUCTURES */ double *V=NULL; double border; int totalSize = size*nthreads; V = (double *)OSCR_calloc(totalSize, sizeof(double)); /* 1. INITIALIZE VECTOR */ for (i=0; i<totalSize; i++) { V[i]= 0.0 + i; } /* 2. GET TIMER */ OSCR_timer_start(0); /* 3. ITERATIONS LOOP */ for(iter=0; iter<numiter; iter++) { /* 3.1. PROCESS IN PARALLEL */ #pragma omp parallel default(none) shared(V,size,nthreads,numiter) private(iter,thread,limitL,limitR,border,i) { /* 3.1.1. GET NUMBER OF THREAD */ thread = omp_get_thread_num(); /* 3.1.2. COMPUTE LIMIT INDEX */ limitL = thread*size; limitR = (thread+1)*size-1; /* 3.1.3. COPY OTHER THREADS's NEIGHBOR ELEMENT */ if (thread != nthreads) border = V[limitR+1]; /* 3.1.4. SYNCHRONIZE BEFORE UPDATING LOCAL PART */ #pragma omp barrier /* 3.1.5. COMPUTE LOCAL UPDATES */ for (i=limitL; i<limitR; i++) { V[i] = f( V[i], V[i+1] ); } /* 3.1.6. COMPUTE LAST ELEMENT (EXCEPT LAST THREAD) */ if (thread != nthreads-1) V[limitR] = f( V[limitR], border ); /* 3.1.7. END PARALLEL REGION */ } /* 3.2. END ITERATIONS LOOP */ } /* 4. STOP TIMER */ OSCR_timer_stop(0); /* 5. WRITE VECTOR (DEBUG) */ #ifdef DEBUG #include "debug_V.c" #endif /* 6. END */ }
/* * * PARALLEL LOOP * */ void loop(int nthreads, int size, int numiter) { /* VARIABLES */ int i,iter; int thread; int limitL, limitR; /* DECLARE VECTOR AND ANCILLARY DATA STRUCTURES */ double *V=NULL; double border; int totalSize = size*nthreads; V = (double *)OSCR_calloc(totalSize, sizeof(double)); /* 1. INITIALIZE VECTOR */ for (i=0; i<totalSize; i++) { V[i]= 0.0 + i; } /* 2. START TIMER */ OSCR_timer_start(0); /* 3. PROCESS IN PARALLEL */ #pragma omp parallel default(none) shared(V,size,nthreads,numiter) private(iter,thread,limitL,limitR,border,i) { /* 3.1. GET MY NUMBER OF THREAD IN THE GROUP */ thread = omp_get_thread_num(); /* 3.2. COMPUTE MY LIMIT INDEX */ limitL = thread*size; limitR = (thread+1)*size-1; /* 3.3. ITERATIONS LOOP (+nthreads EXTRA ITER. TO EMPTY THE PIPELINE) */ for(iter=0; iter<(numiter+nthreads-1); iter++) { /* 3.3.1. COPY OTHER THREADS's NEIGHBOR ELEMENT */ if (thread != 0) border = V[limitL-1]; /* 3.3.2. SYNCHRONIZE BEFORE UPDATING LOCAL PART */ #pragma omp barrier /* 3.3.3. COMPUTE LOCAL UPDATES */ /* (ONLY ACTIVE THREADS - CHECK PIPELINE STAGE) */ if ( thread<=iter && thread>(iter-numiter) ) { /* 3.3.3.1. COMPUTE FIRST ELEMENT (EXCEPT THREAD 0) */ if (thread != 0) V[limitL] = f( V[limitL], border ); /* 3.3.3.2. COMPUTE THE REST OF ELEMENTS */ for (i=limitL+1; i<=limitR; i++) { V[i] = f( V[i], V[i-1] ); } } /* 3.3.4. SYNCHRONIZE BEFORE COPYING UPDATED BORDER ELEMENT */ #pragma omp barrier /* 3.3.5. END ITERATIONS LOOP */ } /* 3.4. END PARALLEL REGION */ } /* 4. STOP TIMER */ OSCR_timer_stop(0); /* 5. WRITE VECTOR (DEBUG) */ #ifdef DEBUG #include "debug_V.c" #endif /* 6. END */ }