Example #1
0
/* ----------------------------------------------------------------------- */
int main(int argc, char *argv[]) {
  int STEP, NUMTHREADS;
  double total_time;
  char *PARAM_NAMES[NUM_ARGS] = {"Size (in K)"};
  char *TIMERS_NAMES[NUM_TIMERS] = {"Total_time" };
  char *DEFAULT_VALUES[NUM_ARGS] = {"2048 K"};


  NUMTHREADS = omp_get_max_threads();
  OSCR_init (NUMTHREADS, "Quicksort", "Use 'qsort' <size (in K)>", NUM_ARGS,
    PARAM_NAMES, DEFAULT_VALUES , NUM_TIMERS, NUM_TIMERS, TIMERS_NAMES,
    argc, argv);

  SIZE = OSCR_getarg_int(1);
  if (SIZE > MAXSIZE) {
    printf("Size: %d Maximum size: %d\n", SIZE, MAXSIZE);
    exit(-1);
  }
	/* Default: DEFAULT_SIZE */
  for (STEP = 0; STEP < NUM_STEPS; STEP++) {
    initialize(array, STEP);
	  OSCR_timer_start(0);
    qs(array, 0, SIZE-1);
		OSCR_timer_stop(0);
    testit(array);
  }
	total_time = OSCR_timer_read(0);
	OSCR_report(1, TIMERS_NAMES);
	printf("\n \t# THREADS \tSIZE \tSTEPS \tTIME (secs.) \n");
	printf("\t%d \t\t%d \t%d \t%14.6lf \n", NUMTHREADS, SIZE, NUM_STEPS, total_time);

} /* main */
Example #2
0
int main(int argc, char **argv){
    double *u, *f, dx, dy;
    double dt, mflops;
    int NUMTHREADS;
    char *PARAM_NAMES[NUM_ARGS] = {"Grid dimension: X dir =", "Grid dimension: Y dir =", "Helmhotlz constant =",
                                   "Successive over-relaxation parameter =",
                                   "error tolerance for iterative solver =", "Maximum iterations for solver ="};
    char *TIMERS_NAMES[NUM_TIMERS] = {"Total_time"};
    char *DEFAULT_VALUES[NUM_ARGS] = {"5000", "5000", "0.8", "1.0", "1e-7", "1000"};



   NUMTHREADS = omp_get_max_threads();
   OSCR_init (NUMTHREADS, "Jacobi Solver v1", "Use 'jacobi01' <n> <m> <alpha> <relax> <tol> <mits>", NUM_ARGS,
                PARAM_NAMES, DEFAULT_VALUES , NUM_TIMERS, NUM_TIMERS, TIMERS_NAMES,
                argc, argv);

    n = OSCR_getarg_int(1);
    m = OSCR_getarg_int(2);
    alpha = OSCR_getarg_double(3);
    relax = OSCR_getarg_double(4);
    tol = OSCR_getarg_double(5);
    mits = OSCR_getarg_int(6);

    printf("-> %d, %d, %g, %g, %g, %d\n",
           n, m, alpha, relax, tol, mits);

    u = (double *) OSCR_malloc(n*m*sizeof(double));
    f = (double *) OSCR_malloc(n*m*sizeof(double));


    /* arrays are allocated and initialzed */
    initialize(n, m, alpha, &dx, &dy, u, f);


    /* Solve Helmholtz eqiation */
    OSCR_timer_start(0);
    jacobi(n, m, dx, dy, alpha, relax, u,f, tol, mits);

    OSCR_timer_stop(0);
    dt = OSCR_timer_read(0);

   // printf(" elapsed time : %12.6f\n", dt);
    mflops = (0.000001*mits*(m-2)*(n-2)*13) / dt;
  //  printf(" MFlops       : %12.6g (%d, %d, %d, %g)\n",mflops, mits, m, n, dt);

    error_check(n, m, alpha, dx, dy, u, f);

    OSCR_report(1, TIMERS_NAMES);

  return 0;
}
Example #3
0
/*
*
* PARALLEL LOOP
*
*/
void loop(int nthreads, int size, int numiter) {
/* VARIABLES */
int i,iter;

/* DECLARE VECTOR AND ANCILLARY DATA STRUCTURES */
double *V=NULL;
double *oldV=NULL;
int totalSize = size*nthreads;

V = (double *)OSCR_calloc(totalSize, sizeof(double));
oldV = (double *)OSCR_calloc(totalSize, sizeof(double));

/* 1. INITIALIZE VECTOR */
for (i=0; i<totalSize; i++) {
    V[i]= 0.0 + i;
    }

/* 2. START TIMER */
OSCR_timer_start(0);

/* 3. ITERATIONS LOOP */
for(iter=0; iter<numiter; iter++) {

    /* 3.1. DUPLICATE THE FULL ARRAY IN PARALLEL */
#pragma omp parallel for default(none) shared(V,oldV,totalSize) private(i) schedule(static)
    for (i=0; i<totalSize; i++) {
        oldV[i] = V[i];
        }

    /* 3.2. INNER LOOP: PROCESS ELEMENTS IN PARALLEL */
#pragma omp parallel for default(none) shared(V,oldV,totalSize) private(i) schedule(static)
    for (i=0; i<totalSize-1; i++) {
        V[i] = f(V[i],oldV[i+1]);
        }

    /* 3.3. END ITERATIONS LOOP */
    }


/* 4. STOP TIMER */
OSCR_timer_stop(0);

/* 5. WRITE VECTOR (DEBUG) */
#ifdef DEBUG
#include "debug_V.c"
#endif

/* 6. END */
}
Example #4
0
/* -----------------------------------------------------------------------
                          IMPLEMENTATION
 * ----------------------------------------------------------------------- */
int main(int argc, char **argv) {
  int i, j, NUMTHREADS;
  long inside,  /* no. of points inside the Mandelbrot set */
			 outside; /* no. of points outside the Mandelbrot set */
  double area, error, ztemp, total_time;
  complex z;
  char *PARAM_NAMES[NUM_ARGS] = {"Number of points"};
  char *TIMERS_NAMES[NUM_TIMERS] = {"Total_time"};
  char *DEFAULT_VALUES[NUM_ARGS] = {"4092"};


   NUMTHREADS = omp_get_max_threads();
   OSCR_init (NUMTHREADS, "Mandelbrot set area", "Use 'mandel' <Number of points>", NUM_ARGS,
                PARAM_NAMES, DEFAULT_VALUES , NUM_TIMERS, NUM_TIMERS, TIMERS_NAMES,
                argc, argv);

	NPOINTS = OSCR_getarg_int(1);
  /* Default: DEFAULT_NPOINTS */

  points = (complex *)OSCR_calloc(NPOINTS, sizeof(complex));
  NUMTHREADS = omp_get_max_threads();

/*1. Generate NPOINTS random points in the complex plane */
  srandom(31416);
  for (i = 0; i < NPOINTS; i++) {
    points[i].re = -2.0 + 2.5 * random() / OSCR_RAND_MAX;
    points[i].im = 1.125 * random() / OSCR_RAND_MAX;
  }

/* *  2. Monte Carlo sampling
 *    2a. Outer loop runs over NPOINTS, initialise z=c
 *    2b. Inner loop has the iteration z=z*z+c, and threshold test
 */
	OSCR_timer_start(0);
  outside = 0;
#pragma omp parallel for default(none) reduction(+:outside)       \
	                       private(i, j, ztemp, z) shared(NPOINTS, points)
  for(i = 0; i < NPOINTS; i++) {
    z.re = points[i].re;
    z.im = points[i].im;
    for (j = 0; j < MAXITER; j++) {
      ztemp = (z.re * z.re) - (z.im * z.im) + points[i].re;
      z.im = z.re * z.im * 2 + points[i].im;
      z.re = ztemp;
      if (z.re * z.re + z.im * z.im > THRESOLD) {
        outside++;
        break;
      }
    } /* for j */
  } /* for i */
  inside = (long)NPOINTS - outside;

  /*3. Calculate area and error */
	/* The area is proportional to 2 * the area of the rectangle * no. of points inside it */
	/* The error is inversely proportional to the square root of the number of test cases */
  area = 2.0 * (2.5 * 1.125) * inside / NPOINTS;
  error = area / sqrt(NPOINTS);
  OSCR_timer_stop(0);
  total_time = OSCR_timer_read(0);

  /* 4. Output the Results */
	OSCR_report(1, TIMERS_NAMES);
	printf("\n \t# THREADS NPOINTS AREA \t\t\tERROR \t\tTIME (secs.)\n");
//	printf("\t%d \t%d \t%16.12f %16.12f \t%lf\n", NUMTHREADS, NPOINTS, area, error, total_time);
	printf("\t%d \t%d \t%16.12f %16.12f \t\n", NUMTHREADS, NPOINTS, area, error);


	return 0;


}
Example #5
0
/** main function with initialization, command line argument parsing,
  * memory allocation, OpenMP setup, wall--clock time measurement.
  */
int main(int argc, char *argv[])
{

	std::vector < int >myVec;
	int numThreads;
	int numEntries;
	int switchThresh;
	char *PARAM_NAMES[NUM_ARGS] = {"Number of integer to sort:", "Number of threads:", "SwitchThresh:"};
	char *TIMERS_NAMES[NUM_TIMERS] = {"Total_time" };
	char *DEFAULT_VALUES[NUM_ARGS] = {"10000000", "1", "1000"};


	/* used for time measurements */
	double accTime;

	numThreads = omp_get_max_threads();
	OSCR_init (numThreads, "QuickSort", "", NUM_ARGS,
		PARAM_NAMES, DEFAULT_VALUES , NUM_TIMERS, NUM_TIMERS, TIMERS_NAMES,
		argc, argv);

	numEntries = OSCR_getarg_int(1);
	numThreads = OSCR_getarg_int(2);
	switchThresh = OSCR_getarg_int(3);

	/* and run with the specified number of threads */
	omp_set_num_threads(numThreads);

	/* initialize random number generator to fixed seed. this is done, so
	 * that every run of the algorithm is sorting the exact same vector.
	 * this way, we can compare runs easily */
	//std::srand( std::time(0) );
	std::srand(123);

	/* Reserve sufficient capacity for vector once and for all */
	myVec.reserve(myVec.size() + numEntries);

	/* fill the vector with random numbers */
	for (int i = 0; i < numEntries; ++i) {
		myVec.push_back(std::rand());
	}

	/* Start measuring the time */
	OSCR_timer_start(0);

	/* sort vector in parallel */
#	pragma omp parallel shared (myVec, switchThresh, numThreads)
	{
#		pragma intel omp taskq
		{
#			pragma intel omp task
			{
				myQuickSort(myVec, 0, myVec.size() - 1, switchThresh);
			}
		}
	}

	/* Finish time measurement */
	OSCR_timer_stop(0);

	/* calculate elapsed time */
	accTime = OSCR_timer_read(0);

	/* determine and print out, whether or not the vector was sorted ok */
	if (vectorValidate(myVec))
		std::cout << "\nSuccess, wall-clock time: " << accTime << "\n\n";
	else
		std::cout << "\nSorting FAILED!" << "\n\n";

	OSCR_report();
	return 0;
}
/*
*
* PARALLEL LOOP
*
*/
void loop(int nthreads, int size, int numiter) {
/* VARIABLES */
int i,iter;
int thread;
int limitL, limitR;

/* DECLARE VECTOR AND ANCILLARY DATA STRUCTURES */
double *V=NULL;
double border;
int totalSize = size*nthreads;

V = (double *)OSCR_calloc(totalSize, sizeof(double));

/* 1. INITIALIZE VECTOR */
for (i=0; i<totalSize; i++) {
	V[i]= 0.0 + i;
	}

/* 2. GET TIMER */
OSCR_timer_start(0);

/* 3. ITERATIONS LOOP */
for(iter=0; iter<numiter; iter++) {

	/* 3.1. PROCESS IN PARALLEL */
#pragma omp parallel default(none) shared(V,size,nthreads,numiter) private(iter,thread,limitL,limitR,border,i)
		{
		/* 3.1.1. GET NUMBER OF THREAD */
		thread = omp_get_thread_num();

		/* 3.1.2. COMPUTE LIMIT INDEX */
		limitL = thread*size;
		limitR = (thread+1)*size-1;

		/* 3.1.3. COPY OTHER THREADS's NEIGHBOR ELEMENT */
		if (thread != nthreads) border = V[limitR+1];

		/* 3.1.4. SYNCHRONIZE BEFORE UPDATING LOCAL PART */
#pragma omp 	barrier

		/* 3.1.5. COMPUTE LOCAL UPDATES */
		for (i=limitL; i<limitR; i++) {
			V[i] = f( V[i], V[i+1] );
			}
		/* 3.1.6. COMPUTE LAST ELEMENT (EXCEPT LAST THREAD) */
		if (thread != nthreads-1)
			V[limitR] = f( V[limitR], border );
		
		/* 3.1.7. END PARALLEL REGION */
		}

	/* 3.2. END ITERATIONS LOOP */
	}


/* 4. STOP TIMER */
OSCR_timer_stop(0);

/* 5. WRITE VECTOR (DEBUG) */
#ifdef DEBUG
#include "debug_V.c"
#endif

/* 6. END */
}
Example #7
0
/** main function with initialization, command line argument parsing,
  * memory allocation, OpenMP setup, wall--clock time measurement.
  */
int main(int argc, char *argv[])
{

    std::vector < int >myVec;
    std::stack < std::pair < int, int > >globalTodoStack;
    int numThreads;
    int numEntries;
    int switchThresh;
    char *PARAM_NAMES[NUM_ARGS] = {(char *)"Number of integer to sort:", (char *)"Number of threads:", (char *)"SwitchThresh:"};
    char *TIMERS_NAMES[NUM_TIMERS] = {(char *)"Total_time" };
    char *DEFAULT_VALUES[NUM_ARGS] = {(char *)"100", (char *)"2", (char *)"10"};

  /* this number indicates, how many threads are doing useful work atm. */
    int numBusyThreads = 1;

    /* used for time measurements */
    double accTime;

    /* used for performance measurements */
    std::vector < int >globalStackWrite;



    numThreads = omp_get_max_threads();
    OSCR_init (numThreads, (char *)"QuickSort", (char *)"", NUM_ARGS,
         PARAM_NAMES, DEFAULT_VALUES , NUM_TIMERS, NUM_TIMERS, TIMERS_NAMES,
         argc, argv);

    numEntries = OSCR_getarg_int(1);
    numThreads = OSCR_getarg_int(2);
    switchThresh = OSCR_getarg_int(3);

    /* initialize the performance measures */
    for (int i = 0; i < numThreads; ++i) {
        globalStackWrite.push_back(0);
    }

    /* and run with the specified number of threads */
    omp_set_num_threads(numThreads);

    /* initialize random number generator to fixed seed. this is done, so
     * that every run of the algorithm is sorting the exact same vector.
     * this way, we can compare runs easily */
    //std::srand( std::time(0) );
    std::srand(123);

    /* Reserve sufficient capacity for vector once and for all */
    myVec.reserve(myVec.size() + numEntries);

    /* fill the vector with random numbers */
    for (int i = 0; i < numEntries; ++i) {
        myVec.push_back(std::rand());
    }

    /* Start measuring the time */
    OSCR_timer_start(0);
    /* sort vector in parallel */
#   pragma omp parallel shared(myVec, globalTodoStack, numThreads, \
        switchThresh, numBusyThreads, globalStackWrite)
    {
        /* start sorting with only one thread, the others wait for the stack
         * to fill up
         */
        if (0 == omp_get_thread_num()) {
            myQuickSort(myVec, 0, myVec.size() - 1, switchThresh,
                        globalTodoStack, numBusyThreads, numThreads,
                        globalStackWrite);
        } else {
            myQuickSort(myVec, 0, 0, switchThresh, globalTodoStack,
                        numBusyThreads, numThreads, globalStackWrite);
        }
    }

    /* Finish time measurement */
    OSCR_timer_stop(0);

    /* calculate elapsed time */
    accTime = OSCR_timer_read(0);

    /* determine and print out, whether or not the vector was sorted ok */
    if (vectorValidate(myVec))
        std::cout << "\nSuccess, wall-clock time: " << accTime << "\n\n";
    else
        std::cout << "\nSorting FAILED!" << "\n\n";

    int globalStackWriteSum = 0;
    /* sum up and print out all performance measures */
    for (int i = 0; i < numThreads; ++i) {
        globalStackWriteSum += globalStackWrite[i];
        std::cout << i << ".: gSW: " << globalStackWrite[i] << "\n";
    }
    std::cout << std::
        endl << "Total: gSW: " << globalStackWriteSum << "\n\n";
    OSCR_report();
    return 0;
}
Example #8
0
/*
*
* Graph search. Test if exists a path from a source node to a target node
*
* Parallelization method: Shared-Memory workers-farm
*
*/
void testPath(int nthreads, int source, int target, tg graph) {
/* SHARED STRUCTURES */
Bool    *searched=NULL;
Astack  pool;
Bool    found = FALSE;
int ind;

/* ENDING CONTROL */
int     num_waiting=0;

/* 1. ALLOCATE MEMORY FOR ANCILLARY STRUCTURES */
pool = Ast_init();
searched = OSCR_calloc(tg_nodes(graph), sizeof(Bool));

for (ind=0; ind<tg_nodes(graph); ind++) { searched[ind]=FALSE; }

/* 2. INIT "nodes to explore" POOL WITH THE source ID */
Ast_push(pool, source);


/* 3. START TIMER */
OSCR_timer_start(0);


/* 4. SPAWN WORKERS */
#pragma omp parallel default(none)                  \
    shared(nthreads,num_waiting,graph,searched,pool,target,found)   
{
Bool        waiting = FALSE;
tg_task     next=TG_NULLID;
task_list   succs;
int     num_succs;
int     ind;
#ifdef DEBUG
int     numPops=0;
int     numNoPops=0;
int     thread = omp_get_thread_num();
#endif

/* WORKER WORKS UNTIL:
 *  ALL WORKERS ARE WAITING (TARGET NOT FOUND) 
 *  OR SOMEONE FINDS THE TARGET
 */
while ( num_waiting != nthreads && !found ) {

    /* 1. GET NEXT ELEMENT TO PROCESS (OR WAIT UNTIL MORE ELEMENTS) */
    while( next == TG_NULLID && num_waiting != nthreads && !found) {

        /* ALL POOL OPERATIONS ARE MONITORIZED */
        #pragma omp critical
            {
            /* 1.1. CHECK THE POOL */
            if ( Ast_more(pool) ) {
                /* 1.1.1. ELEMENTS IN THE POOL: GET NEXT */
                next = Ast_pop(pool);
#ifdef DEBUG
numPops++;
#endif

                /* 1.1.2. IF WAITING, CHANGE STATE */
                if ( waiting ) { 
                    waiting = FALSE; 
                    num_waiting--; 
                    }
                }
            else {
                /* 1.1.3. EMPTY POOL: IF NOT WAITING, CHANGE STATE */
#ifdef DEBUG
numNoPops++;
#endif
                if ( !waiting ) { 
                    waiting = TRUE; 
                    num_waiting++; 
                    }
                }
            /* OMP END CRITICAL: MONITORIZED OPERATION */
            }

        } /* END GET next ELEMENT FROM THE POOL */


    /* 2. PROCESS next ELEMENT */
    if ( next != TG_NULLID ) {

        /* 2.1. TARGET FOUND: END ALL */
        if (next == target) { found = TRUE; }

        /* 2.2. NO SUCCESORS: END */
        else if ( tg_succ_num(graph, next) == 0 ) { next = TG_NULLID; }

        /* 2.3. GET SUCCESORS LIST AND PUSH IT TO THE POOL */
        else {
            /* 2.3.1. GET SUCCS LIST */
            num_succs = tg_succ_num(graph, next);
            succs = tg_succ(graph, next);

            /* 2.3.2. PUSH SUCCS TO POOL: MONITORIZED OPERATION */
            #pragma omp critical
            if ( num_succs > 0 ) {
                for(ind=0; ind<num_succs; ind++) {
                    tg_task vp = succs[ind];

                    /* PUSH ONLY NON-EXPLORED NODES */
                    if ( ! searched[ vp ] ) {
                        searched[ vp ] = TRUE;
                        Ast_push(pool, vp);
                        }
                    }
                /* END OMP CRITICAL: MONITORIZED OPERATION */
                }
            }

        /* 2.4. END PROCESSING ELEMENT */
        next = TG_NULLID;
        } 
    } /* END PROCESSING */

#ifdef DEBUG
printf("#DEBUG Thread %d ENDING ----> Pops: %d, NoPops: %d\n",thread,numPops,numNoPops);
#endif

/* WORKERS END: PARALLEL REGION */
}

/* 5. STOP TIMER */
OSCR_timer_stop(0);

/* 6. WRITE RESULT */
printf("\nPath(%d,%d) = %d\n\n", source, target, found);

/* 7. END */
}
/*
*
* PARALLEL LOOP
*
*/
void loop(int nthreads, int size, int numiter) {
/* VARIABLES */
int i,iter;

int thread;
int limitL, limitR;

/* DECLARE VECTOR AND ANCILLARY DATA STRUCTURES */
double *V=NULL;
double border;
int totalSize = size*nthreads;

V = (double *)OSCR_calloc(totalSize, sizeof(double));

/* 1. INITIALIZE VECTOR */
for (i=0; i<totalSize; i++) {
	V[i]= 0.0 + i;
	}

/* 2. START TIMER */
OSCR_timer_start(0);

/* 3. PROCESS IN PARALLEL */
#pragma omp parallel default(none) shared(V,size,nthreads,numiter) private(iter,thread,limitL,limitR,border,i)
	{

	/* 3.1. GET MY NUMBER OF THREAD IN THE GROUP */
	thread = omp_get_thread_num();

	/* 3.2. COMPUTE MY LIMIT INDEX */
	limitL = thread*size;
	limitR = (thread+1)*size-1;

	/* 3.3. ITERATIONS LOOP (+nthreads EXTRA ITER. TO EMPTY THE PIPELINE) */
	for(iter=0; iter<(numiter+nthreads-1); iter++) {

		/* 3.3.1. COPY OTHER THREADS's NEIGHBOR ELEMENT */
		if (thread != 0) border = V[limitL-1];

		/* 3.3.2. SYNCHRONIZE BEFORE UPDATING LOCAL PART */
#pragma omp 	barrier

		/* 3.3.3. COMPUTE LOCAL UPDATES */
		/* 	(ONLY ACTIVE THREADS - CHECK PIPELINE STAGE) */
		if ( thread<=iter && thread>(iter-numiter) ) {
			/* 3.3.3.1. COMPUTE FIRST ELEMENT (EXCEPT THREAD 0) */
			if (thread != 0)
				V[limitL] = f( V[limitL], border );

			/* 3.3.3.2. COMPUTE THE REST OF ELEMENTS */
			for (i=limitL+1; i<=limitR; i++) {
				V[i] = f( V[i], V[i-1] );
				}
			}
		
		/* 3.3.4. SYNCHRONIZE BEFORE COPYING UPDATED BORDER ELEMENT  */
#pragma omp 	barrier

		/* 3.3.5. END ITERATIONS LOOP */
		}

	/* 3.4. END PARALLEL REGION */
	}

/* 4. STOP TIMER */
OSCR_timer_stop(0);

/* 5. WRITE VECTOR (DEBUG) */
#ifdef DEBUG
#include "debug_V.c"
#endif

/* 6. END */
}