Esempio n. 1
0
void run(int argc, char** argv)
{
    init(argc, argv);

    unsigned long long cycles;

    int *src, *dst, *temp;
    int min;

    dst = result;
    src = new int[cols];

#ifdef GEM5_WORK
    m5_work_begin(0, 0);
    m5_dumpreset_stats(0, 0);
#endif

#ifdef TIMING
    double start_time = gettime();
#endif

    for (int t = 0; t < rows-1; t++) {
        temp = src;
        src = dst;
        dst = temp;
        #pragma omp parallel for private(min)
        for(int n = 0; n < cols; n++){
            min = src[n];
            if (n > 0)
                min = MIN(min, src[n-1]);
            if (n < cols-1)
                min = MIN(min, src[n+1]);
            dst[n] = wall[t+1][n]+min;
        }
    }

#ifdef TIMING
    double end_time = gettime();
    printf("ROI Runtime: %f\n", end_time - start_time);
#endif

#ifdef GEM5_WORK
    m5_dumpreset_stats(0, 0);
    m5_work_end(0, 0);
#endif

#ifdef OUTPUT
    for (int i = 0; i < cols; i++)
            printf("%d ",data[i]);
    printf("\n") ;
    for (int i = 0; i < cols; i++)
            printf("%d ",dst[i]);
    printf("\n") ;
#endif

    delete [] data;
    delete [] wall;
Esempio n. 2
0
void
do_dump_reset_stats(int argc, char *argv[])
{
    uint64_t ints[2];
    parse_int_args(argc, argv, ints, 2);
    m5_dumpreset_stats(ints[0], ints[1]);
}
/**
* The implementation of the particle filter using OpenMP for many frames
* @see http://openmp.org/wp/
* @note This function is designed to work with a video of several frames. In addition, it references a provided MATLAB function which takes the video, the objxy matrix and the x and y arrays as arguments and returns the likelihoods
* @param I The video to be run
* @param IszX The x dimension of the video
* @param IszY The y dimension of the video
* @param Nfr The number of frames
* @param seed The seed array used for random number generation
* @param Nparticles The number of particles to be used
*/
void particleFilter(int * I, int IszX, int IszY, int Nfr, int * seed, int Nparticles){

	int max_size = IszX*IszY*Nfr;
	long long start = get_time();
	//original particle centroid
	double xe = roundDouble(IszY/2.0);
	double ye = roundDouble(IszX/2.0);
	
	//expected object locations, compared to center
	int radius = 5;
	int diameter = radius*2 - 1;
	int * disk = (int *)malloc(diameter*diameter*sizeof(int));
	strelDisk(disk, radius);
	int countOnes = 0;
	int x, y;
	for(x = 0; x < diameter; x++){
		for(y = 0; y < diameter; y++){
			if(disk[x*diameter + y] == 1)
				countOnes++;
		}
	}
	double * objxy = (double *)malloc(countOnes*2*sizeof(double));
	getneighbors(disk, countOnes, objxy, radius);
	
	long long get_neighbors = get_time();
	printf("TIME TO GET NEIGHBORS TOOK: %f\n", elapsed_time(start, get_neighbors));
	//initial weights are all equal (1/Nparticles)
	double * weights = (double *)malloc(sizeof(double)*Nparticles);
	#pragma omp parallel for shared(weights, Nparticles) private(x)
	for(x = 0; x < Nparticles; x++){
		weights[x] = 1/((double)(Nparticles));
	}
	long long get_weights = get_time();
	printf("TIME TO GET WEIGHTSTOOK: %f\n", elapsed_time(get_neighbors, get_weights));
	//initial likelihood to 0.0
	double * likelihood = (double *)malloc(sizeof(double)*Nparticles);
	double * arrayX = (double *)malloc(sizeof(double)*Nparticles);
	double * arrayY = (double *)malloc(sizeof(double)*Nparticles);
	double * xj = (double *)malloc(sizeof(double)*Nparticles);
	double * yj = (double *)malloc(sizeof(double)*Nparticles);
	double * CDF = (double *)malloc(sizeof(double)*Nparticles);
	double * u = (double *)malloc(sizeof(double)*Nparticles);
	int * ind = (int*)malloc(sizeof(int)*countOnes*Nparticles);
	#pragma omp parallel for shared(arrayX, arrayY, xe, ye) private(x)
	for(x = 0; x < Nparticles; x++){
		arrayX[x] = xe;
		arrayY[x] = ye;
	}
	int k;
	
	printf("TIME TO SET ARRAYS TOOK: %f\n", elapsed_time(get_weights, get_time()));

#ifdef GEM5_WORK
	m5_work_begin(0, 0);
	m5_dumpreset_stats(0, 0);
#endif

	int indX, indY;
	for(k = 1; k < Nfr; k++){
		long long set_arrays = get_time();
		//apply motion model
		//draws sample from motion model (random walk). The only prior information
		//is that the object moves 2x as fast as in the y direction
		#pragma omp parallel for shared(arrayX, arrayY, Nparticles, seed) private(x)
		for(x = 0; x < Nparticles; x++){
			arrayX[x] += 1 + 5*randn(seed, x);
			arrayY[x] += -2 + 2*randn(seed, x);
		}
		long long error = get_time();
		printf("TIME TO SET ERROR TOOK: %f\n", elapsed_time(set_arrays, error));
		//particle filter likelihood
		#pragma omp parallel for shared(likelihood, I, arrayX, arrayY, objxy, ind) private(x, y, indX, indY)
		for(x = 0; x < Nparticles; x++){
			//compute the likelihood: remember our assumption is that you know
			// foreground and the background image intensity distribution.
			// Notice that we consider here a likelihood ratio, instead of
			// p(z|x). It is possible in this case. why? a hometask for you.		
			//calc ind
			for(y = 0; y < countOnes; y++){
				indX = roundDouble(arrayX[x]) + objxy[y*2 + 1];
				indY = roundDouble(arrayY[x]) + objxy[y*2];
				ind[x*countOnes + y] = fabs(indX*IszY*Nfr + indY*Nfr + k);
				if(ind[x*countOnes + y] >= max_size)
					ind[x*countOnes + y] = 0;
			}
			likelihood[x] = 0;
			for(y = 0; y < countOnes; y++)
				likelihood[x] += (pow((I[ind[x*countOnes + y]] - 100),2) - pow((I[ind[x*countOnes + y]]-228),2))/50.0;
			likelihood[x] = likelihood[x]/((double) countOnes);
		}
		long long likelihood_time = get_time();
		printf("TIME TO GET LIKELIHOODS TOOK: %f\n", elapsed_time(error, likelihood_time));
		// update & normalize weights
		// using equation (63) of Arulampalam Tutorial
		#pragma omp parallel for shared(Nparticles, weights, likelihood) private(x)
		for(x = 0; x < Nparticles; x++){
			weights[x] = weights[x] * exp(likelihood[x]);
		}
		long long exponential = get_time();
		printf("TIME TO GET EXP TOOK: %f\n", elapsed_time(likelihood_time, exponential));
		double sumWeights = 0;
		#pragma omp parallel for private(x) reduction(+:sumWeights)
		for(x = 0; x < Nparticles; x++){
			sumWeights += weights[x];
		}
		long long sum_time = get_time();
		printf("TIME TO SUM WEIGHTS TOOK: %f\n", elapsed_time(exponential, sum_time));
		#pragma omp parallel for shared(sumWeights, weights) private(x)
		for(x = 0; x < Nparticles; x++){
			weights[x] = weights[x]/sumWeights;
		}
		long long normalize = get_time();
		printf("TIME TO NORMALIZE WEIGHTS TOOK: %f\n", elapsed_time(sum_time, normalize));
		xe = 0;
		ye = 0;
		// estimate the object location by expected values
		#pragma omp parallel for private(x) reduction(+:xe, ye)
		for(x = 0; x < Nparticles; x++){
			xe += arrayX[x] * weights[x];
			ye += arrayY[x] * weights[x];
		}
		long long move_time = get_time();
		printf("TIME TO MOVE OBJECT TOOK: %f\n", elapsed_time(normalize, move_time));
		printf("XE: %lf\n", xe);
		printf("YE: %lf\n", ye);
		double distance = sqrt( pow((double)(xe-(int)roundDouble(IszY/2.0)),2) + pow((double)(ye-(int)roundDouble(IszX/2.0)),2) );
		printf("%lf\n", distance);
		//display(hold off for now)
		
		//pause(hold off for now)
		
		//resampling
		
		
		CDF[0] = weights[0];
		for(x = 1; x < Nparticles; x++){
			CDF[x] = weights[x] + CDF[x-1];
		}
		long long cum_sum = get_time();
		printf("TIME TO CALC CUM SUM TOOK: %f\n", elapsed_time(move_time, cum_sum));
		double u1 = (1/((double)(Nparticles)))*randu(seed, 0);
		#pragma omp parallel for shared(u, u1, Nparticles) private(x)
		for(x = 0; x < Nparticles; x++){
			u[x] = u1 + x/((double)(Nparticles));
		}
		long long u_time = get_time();
		printf("TIME TO CALC U TOOK: %f\n", elapsed_time(cum_sum, u_time));
		int j, i;
		
		#pragma omp parallel for shared(CDF, Nparticles, xj, yj, u, arrayX, arrayY) private(i, j)
		for(j = 0; j < Nparticles; j++){
			i = findIndex(CDF, Nparticles, u[j]);
			if(i == -1)
				i = Nparticles-1;
			xj[j] = arrayX[i];
			yj[j] = arrayY[i];
			
		}
		long long xyj_time = get_time();
		printf("TIME TO CALC NEW ARRAY X AND Y TOOK: %f\n", elapsed_time(u_time, xyj_time));
		
		//#pragma omp parallel for shared(weights, Nparticles) private(x)
		for(x = 0; x < Nparticles; x++){
			//reassign arrayX and arrayY
			arrayX[x] = xj[x];
			arrayY[x] = yj[x];
			weights[x] = 1/((double)(Nparticles));
		}
		long long reset = get_time();
		printf("TIME TO RESET WEIGHTS TOOK: %f\n", elapsed_time(xyj_time, reset));
	}

#ifdef GEM5_WORK
	m5_dumpreset_stats(0, 0);
	m5_work_end(0, 0);
#endif

	free(disk);
	free(objxy);
	free(weights);
	free(likelihood);
	free(xj);
	free(yj);
	free(arrayX);
	free(arrayY);
	free(CDF);
	free(u);
	free(ind);
}
Esempio n. 4
0
int main(int argc, char** argv) {

    unsigned i;
    int fd = 0;
    pthread_t *threads;
    pthread_attr_t pthread_custom_attr;

    for (int index = 0; index < argc; index++) {
        if (strcmp(argv[index], "-t") == 0) {
            if (argc > index+1) {
                numThreads = atoi(argv[++index]);
            } else {
                printf("ERROR: Must specify number of threads to -t option\n");
                exit(0);
            }
        } else if (strcmp(argv[index], "-a") == 0) {
            if (argc > index+1) {
                arraySize = atoi(argv[++index]);
            } else {
                printf("ERROR: Must specify number of array elements to -a option\n");
                exit(0);
            }
        } else if (strcmp(argv[index], "-d") == 0) {
            dumpStats = true;
        } else if (strcmp(argv[index], "-h") == 0) {
            printf("Usage: %s [options]\n\nOptions:\n", argv[0]);
            printf("-a #     Specify number of array elements\n");
            printf("-i #     Specify number of iterations to run\n");
            printf("-p #     Specify number of parallel accesses per-thread\n");
            printf("-q #     Specify per-thread memory-level parallelism\n");
            printf("-s #     Specify memory access stride in ints\n");
            printf("-t #     Specify number of threads\n");
            printf("-u       Use uncacheable memory\n");
            exit(0);
        } else if (strcmp(argv[index], "-i") == 0) {
            if (argc > index+1) {
                numIters = atoi(argv[++index]);
            } else {
                printf("ERROR: Must specify number of iterations to -i option\n");
                exit(0);
            }
        } else if (strcmp(argv[index], "-p") == 0) {
            if (argc > index+1) {
                parallelAccesses = atoi(argv[++index]);
            } else {
                printf("ERROR: Must specify number of memory parallel accesses to -p option\n");
                exit(0);
            }
        } else if (strcmp(argv[index], "-q") == 0) {
            if (argc > index+1) {
                parallelism = atoi(argv[++index]);
            } else {
                printf("ERROR: Must specify number of memory-level parallelism to -q option\n");
                exit(0);
            }
        } else if (strcmp(argv[index], "-s") == 0) {
            if (argc > index+1) {
                stride = atoi(argv[++index]);
            } else {
                printf("ERROR: Must specify int stride to -s option\n");
                exit(0);
            }
        } else if (strcmp(argv[index], "-u") == 0) {
            uncached = true;
        }
    }

    parallelAccesses -= (parallelAccesses % parallelism);
    int threadIDs[numThreads];
    pthread_attr_init(&pthread_custom_attr);
    arraySize *= numThreads;
    if (!uncached) {
        bigArray = (int *) malloc(arraySize * sizeof(int));
    } else {
        fd = open("/dev/mem", O_CREAT | O_RDWR | O_SYNC, 0755);
        if (fd < 0) {
            fprintf(stderr, "Open failed uncached\n");
            exit(1);
        }
        bigArray = (int*) mmap(0x0, arraySize * sizeof(int), (PROT_READ | PROT_WRITE), (MAP_SHARED), fd, 0);
        if (bigArray == MAP_FAILED) {
            fprintf (stderr, "mmap uncached\n");
            unlink("/dev/mem");
            exit(1);
        }
    }
    smallArray = (int *) malloc(numThreads * sizeof(int));
    threads = (pthread_t *) malloc(numThreads * sizeof(pthread_t));
    lapTimes = (double*) malloc(numIters * sizeof(double));

    printf("Number of threads: %u\n", numThreads);
    printf("Number of iterations: %u\n", numIters);
    printf("Memory-level parallelism: %u\n", parallelism);
    printf("Number of accesses: %u\n", parallelAccesses);
    printf("Number of array elements: %llu\n", arraySize);
    printf("Size of bigArray (MB): %.3f\n", (double)(arraySize * sizeof(int))/(1024.0*1024.0));
    printf("Data stride: %u int (%luB) = %luB\n", stride, sizeof(int), stride * sizeof(int));
    for (i = 0; i < arraySize; i++) {
        bigArray[i] = i;
    }
    if (pthread_barrier_init(&barr, NULL, numThreads)) {
        printf("Could not create a barrier\n");
        return -1;
    }

    if (dumpStats) m5_dumpreset_stats(0, 0);
    for (i = 0; i < numThreads; i++) {
        threadIDs[i] = i;
        pthread_create(&threads[i], &pthread_custom_attr, worker, &threadIDs[i]);
    }
    for (i = 0; i < numThreads; i++) {
        pthread_join(threads[i], NULL);
    }
    if (dumpStats) m5_dumpreset_stats(0, 0);

    double stdev = 0.0;
    averageLap /= numIters;
    fprintf(stderr, "%d %d %llu", numThreads, parallelAccesses, arraySize);
    for (i = 0; i < numIters; i++) {
        fprintf(stderr, " %.6f", lapTimes[i]);
        if (i > 1) {
            double temp = averageLap - lapTimes[i];
            stdev += temp * temp;
        }
    }
    fprintf(stderr, "\n");
    stdev = sqrt(stdev/numIters);
    double percent = 100 * stdev / averageLap;
    printf("AVG: %.5f\n", averageLap);
    printf("STDEV: %f (%f%%)\n", stdev, percent);

    if (!uncached) {
        free(bigArray);
    } else {
        munmap(bigArray, arraySize * sizeof(int));
        if (fd != 0)
            close(fd);
        unlink("/dev/mem");
    }
    free(smallArray);
    free(threads);
    free(lapTimes);
    return 0;
}