Esempio n. 1
0
void *
my_worker2(void *v)
{
	int i; 
	unsigned int start, dur; 
	DET_LOCK(&mutexsum); 
	sum++; 
	DET_UNLOCK(&mutexsum);

	start = get_usecs(); 
	for ( i = 0; i < iteration; i++ ) { 
		if ( i % 100 == 0 ) { 
			DET_LOCK(&mutexsum); 
			sum++; 
			DET_UNLOCK(&mutexsum);
		}

		fib(10);   // average time = x 
	}
	dur = get_usecs() - start; 
	printf("%s:dur = %d, avg = %f\n", __FUNCTION__, dur, (float)dur / iteration ); 
	DET_LOCK(&mutexsum); 
	sum++; 
	DET_UNLOCK(&mutexsum);

	return NULL; 
}
Esempio n. 2
0
void * my_worker1(void *v)
{
	int i; 
	unsigned start, dur; 
	char buf[1024]; 

	g_fd = open("file.dat", O_RDONLY); 
	if ( g_fd < 0 ) {
		perror("open failed"); 
		exit(1);
	}
	start = get_usecs(); 
	for ( i = 0; i < iteration; i++ ) { 
		int cnt; 

		if ( i % 100 == 0) { 
			DET_LOCK(&mutexsum); 
			sum++; 
			DET_UNLOCK(&mutexsum);
		}

		cnt = read(g_fd, buf, size); // average time = x
		if ( cnt <= 0 ) {
			perror("finish"); 
			break; 
		}
	}
	dur = get_usecs() - start; 
	printf("%s:dur = %d, avg = %f\n", __FUNCTION__, dur, (float)dur / iteration ); 

	return NULL; 
}
Esempio n. 3
0
int main(int argc,char **argv) {
	hupcpp::init(&argc, &argv);

#if 0
	if(argc!=5) {
		printf("(ERROR) USAGE: ./a.out <total bodies> <tileSize> <in data file> <out data file>\n");
		hupcpp::finalize();
	}
	assert(atoi(argv[1]) == NUMBODIES);
	assert(atoi(argv[2]) == TILESIZE);
	//Init(argv[3]);
#endif

	long start = get_usecs();
	for(int time_steps=0;time_steps<MAX_STEPS;time_steps++) {
		hupcpp::finish_spmd([=]() {
			if(upcxx::global_myrank() ==0) {
				calculate_forces();
			}
		});
	}
	long end = get_usecs();
 	double dur = ((double)(end-start))/1000000;
#ifdef VERIFY_SHORT
	counter_t sum = 0;
	for(int i=0; i<MAX_HCPP_WORKERS; i++) {
		sum+=TOTAL_COUNTS[i];
	}
	counter_t total_sum;
	upcxx::reduce<counter_t>(&sum, &total_sum, 1, 0, UPCXX_SUM, UPCXX_ULONG_LONG);
	if(upcxx::global_myrank() == 0) {
		const counter_t expected = NUMBODIES * MAX_STEPS;
		const char* res = expected == total_sum ? "PASSED" : "FAILED";
		printf("Test %s, Time = %0.3f\n",res,dur);
	}
#endif

#if 0
	// gather result from all other processes using upcxx_reduce
	float accx_all[NUMBODIES], accy_all[NUMBODIES], accz_all[NUMBODIES];
	upcxx::upcxx_reduce<float>(accx, accx_all, NUMBODIES, 0, UPCXX_SUM, UPCXX_FLOAT);
	upcxx::upcxx_reduce<float>(accy, accy_all, NUMBODIES, 0, UPCXX_SUM, UPCXX_FLOAT);
	upcxx::upcxx_reduce<float>(accz, accz_all, NUMBODIES, 0, UPCXX_SUM, UPCXX_FLOAT);

	if(upcxx::global_myrank() ==0) {
		printf("0: Computation done\n");
		printf("Test Passed=%d\n",verify_result(argv[4],accx_all));
	}
#endif

	hupcpp::barrier();
	hupcpp::finalize();
	return 0;
}
Esempio n. 4
0
/* Adjust for the fact that psched_ticks aren't always usecs
   (based on kernel PSCHED_CLOCK configuration */
static int get_ticks(__u32 *ticks, const char *str)
{
    unsigned t;

    if(get_usecs(&t, str))
        return -1;

    *ticks = tc_core_usec2tick(t);
    return 0;
}
Esempio n. 5
0
void quit(int param)
{
	float dur_in_sec;
	float bw;
	float dur = get_usecs() - g_start;
	dur_in_sec = (float)dur / 1000000;
	printf("g_nread(bytes read) = %lld\n", (long long)g_nread);
	printf("elapsed = %.2f sec ( %.0f usec )\n", dur_in_sec, dur);
	bw = (float)g_nread / dur_in_sec / 1024 / 1024;
	printf("CPU%d: B/W = %.2f MB/s | ",cpuid, bw);
	printf("CPU%d: average = %.2f ns\n", cpuid, (dur*1000)/(g_nread/CACHE_LINE_SIZE));
	exit(0);
}
static int
hfsc_get_sc1(int *argcp, char ***argvp, struct tc_service_curve *sc)
{
	char **argv = *argvp;
	int argc = *argcp;
	unsigned int m1 = 0, d = 0, m2 = 0;

	if (matches(*argv, "m1") == 0) {
		NEXT_ARG();
		if (get_rate(&m1, *argv) < 0) {
			explain1("m1");
			return -1;
		}
		NEXT_ARG();
	}

	if (matches(*argv, "d") == 0) {
		NEXT_ARG();
		if (get_usecs(&d, *argv) < 0) {
			explain1("d");
			return -1;
		}
		NEXT_ARG();
	}

	if (matches(*argv, "m2") == 0) {
		NEXT_ARG();
		if (get_rate(&m2, *argv) < 0) {
			explain1("m2");
			return -1;
		}
	} else
		return -1;

	sc->m1 = m1;
	sc->d  = d;
	sc->m2 = m2;

	*argvp = argv;
	*argcp = argc;
	return 0;
}
Esempio n. 7
0
static int tbf_parse_opt(struct qdisc_util *qu, int argc, char **argv, struct nlmsghdr *n)
{
	int ok=0;
	struct tc_tbf_qopt opt;
	__u32 rtab[256];
	__u32 ptab[256];
	unsigned buffer=0, mtu=0, mpu=0, latency=0;
	int Rcell_log=-1, Pcell_log = -1; 
	struct rtattr *tail;

	memset(&opt, 0, sizeof(opt));

	while (argc > 0) {
		if (matches(*argv, "limit") == 0) {
			NEXT_ARG();
			if (opt.limit || latency) {
				fprintf(stderr, "Double \"limit/latency\" spec\n");
				return -1;
			}
			if (get_size(&opt.limit, *argv)) {
				explain1("limit");
				return -1;
			}
			ok++;
		} else if (matches(*argv, "latency") == 0) {
			NEXT_ARG();
			if (opt.limit || latency) {
				fprintf(stderr, "Double \"limit/latency\" spec\n");
				return -1;
			}
			if (get_usecs(&latency, *argv)) {
				explain1("latency");
				return -1;
			}
			ok++;
		} else if (matches(*argv, "burst") == 0 ||
			strcmp(*argv, "buffer") == 0 ||
			strcmp(*argv, "maxburst") == 0) {
			NEXT_ARG();
			if (buffer) {
				fprintf(stderr, "Double \"buffer/burst\" spec\n");
				return -1;
			}
			if (get_size_and_cell(&buffer, &Rcell_log, *argv) < 0) {
				explain1("buffer");
				return -1;
			}
			ok++;
		} else if (strcmp(*argv, "mtu") == 0 ||
			   strcmp(*argv, "minburst") == 0) {
			NEXT_ARG();
			if (mtu) {
				fprintf(stderr, "Double \"mtu/minburst\" spec\n");
				return -1;
			}
			if (get_size_and_cell(&mtu, &Pcell_log, *argv) < 0) {
				explain1("mtu");
				return -1;
			}
			ok++;
		} else if (strcmp(*argv, "mpu") == 0) {
			NEXT_ARG();
			if (mpu) {
				fprintf(stderr, "Double \"mpu\" spec\n");
				return -1;
			}
			if (get_size(&mpu, *argv)) {
				explain1("mpu");
				return -1;
			}
			ok++;
		} else if (strcmp(*argv, "rate") == 0) {
			NEXT_ARG();
			if (opt.rate.rate) {
				fprintf(stderr, "Double \"rate\" spec\n");
				return -1;
			}
			if (get_rate(&opt.rate.rate, *argv)) {
				explain1("rate");
				return -1;
			}
			ok++;
		} else if (matches(*argv, "peakrate") == 0) {
			NEXT_ARG();
			if (opt.peakrate.rate) {
				fprintf(stderr, "Double \"peakrate\" spec\n");
				return -1;
			}
			if (get_rate(&opt.peakrate.rate, *argv)) {
				explain1("peakrate");
				return -1;
			}
			ok++;
		} else if (strcmp(*argv, "help") == 0) {
			explain();
			return -1;
		} else {
			fprintf(stderr, "What is \"%s\"?\n", *argv);
			explain();
			return -1;
		}
		argc--; argv++;
	}

	if (!ok)
		return 0;

	if (opt.rate.rate == 0 || !buffer) {
		fprintf(stderr, "Both \"rate\" and \"burst\" are required.\n");
		return -1;
	}
	if (opt.peakrate.rate) {
		if (!mtu) {
			fprintf(stderr, "\"mtu\" is required, if \"peakrate\" is requested.\n");
			return -1;
		}
	}

	if (opt.limit == 0 && latency == 0) {
		fprintf(stderr, "Either \"limit\" or \"latency\" are required.\n");
		return -1;
	}

	if (opt.limit == 0) {
		double lim = opt.rate.rate*(double)latency/1000000 + buffer;
		if (opt.peakrate.rate) {
			double lim2 = opt.peakrate.rate*(double)latency/1000000 + mtu;
			if (lim2 < lim)
				lim = lim2;
		}
		opt.limit = lim;
	}

	if ((Rcell_log = tc_calc_rtable(opt.rate.rate, rtab, Rcell_log, mtu, mpu)) < 0) {
		fprintf(stderr, "TBF: failed to calculate rate table.\n");
		return -1;
	}
	opt.buffer = tc_calc_xmittime(opt.rate.rate, buffer);
	opt.rate.cell_log = Rcell_log;
	opt.rate.mpu = mpu;
	if (opt.peakrate.rate) {
		if ((Pcell_log = tc_calc_rtable(opt.peakrate.rate, ptab, Pcell_log, mtu, mpu)) < 0) {
			fprintf(stderr, "TBF: failed to calculate peak rate table.\n");
			return -1;
		}
		opt.mtu = tc_calc_xmittime(opt.peakrate.rate, mtu);
		opt.peakrate.cell_log = Pcell_log;
		opt.peakrate.mpu = mpu;
	}

	tail = (struct rtattr*)(((void*)n)+NLMSG_ALIGN(n->nlmsg_len));
	addattr_l(n, 1024, TCA_OPTIONS, NULL, 0);
	addattr_l(n, 2024, TCA_TBF_PARMS, &opt, sizeof(opt));
	addattr_l(n, 3024, TCA_TBF_RTAB, rtab, 1024);
	if (opt.peakrate.rate)
		addattr_l(n, 4096, TCA_TBF_PTAB, ptab, 1024);
	tail->rta_len = (((void*)n)+NLMSG_ALIGN(n->nlmsg_len)) - (void*)tail;
	return 0;
}
Esempio n. 8
0
int main(int argc, char *argv[])
{
	int64_t sum = 0;
	unsigned finish = 5;
	int prio = 0;        
	int num_processors;
	int acc_type = READ;
	int opt;
	cpu_set_t cmask;
	int iterations = 0;
	int i;
	struct sched_param param;

	/*
	 * get command line options 
	 */
	while ((opt = getopt(argc, argv, "m:a:n:t:c:i:p:r:f:l:xh")) != -1) {
		switch (opt) {
		case 'm': /* set memory size */
			g_mem_size = 1024 * strtol(optarg, NULL, 0);
			break;
		case 'a': /* set access type */
			if (!strcmp(optarg, "read"))
				acc_type = READ;
			else if (!strcmp(optarg, "write"))
				acc_type = WRITE;
			else
				exit(1);
			break;
			
		case 't': /* set time in secs to run */
			finish = strtol(optarg, NULL, 0);
			break;
			
		case 'c': /* set CPU affinity */
			cpuid = strtol(optarg, NULL, 0);
			num_processors = sysconf(_SC_NPROCESSORS_CONF);
			CPU_ZERO(&cmask);
			CPU_SET(cpuid % num_processors, &cmask);
			if (sched_setaffinity(0, num_processors, &cmask) < 0)
				perror("error");
			else
				fprintf(stderr, "assigned to cpu %d\n", cpuid);
			break;

		case 'r':
			prio = strtol(optarg, NULL, 0);
			param.sched_priority = prio; /* 1(low)- 99(high) for SCHED_FIFO or SCHED_RR
						        0 for SCHED_OTHER or SCHED_BATCH */
			if(sched_setscheduler(0, SCHED_FIFO, &param) == -1) {
				perror("sched_setscheduler failed");
			}
			break;
		case 'p': /* set priority */
			prio = strtol(optarg, NULL, 0);
			if (setpriority(PRIO_PROCESS, 0, prio) < 0)
				perror("error");
			else
				fprintf(stderr, "assigned priority %d\n", prio);
			break;
		case 'i': /* iterations */
			iterations = strtol(optarg, NULL, 0);
			break;
		case 'h': 
			usage(argc, argv);
			break;
		}
	}

	/*
	 * allocate contiguous region of memory 
	 */ 
	g_mem_ptr = (int *)malloc(g_mem_size);

	memset((char *)g_mem_ptr, 1, g_mem_size);

	for (i = 0; i < g_mem_size / sizeof(int); i++)
		g_mem_ptr[i] = i;

	/* print experiment info before starting */
	printf("memsize=%d KB, type=%s, cpuid=%d\n",
	       g_mem_size/1024,
	       ((acc_type==READ) ?"read": "write"),
		cpuid);
	printf("stop at %d\n", finish);

	/* set signals to terminate once time has been reached */
	signal(SIGINT, &quit);
	if (finish > 0) {
		signal(SIGALRM, &quit);
		alarm(finish);
	}

	/*
	 * actual memory access
	 */
	g_start = get_usecs();
	for (i=0;; i++) {
		switch (acc_type) {
		case READ:
			sum += bench_read();
			break;
		case WRITE:
			sum += bench_write();
			break;
		}

		if (iterations > 0 && i >= iterations)
			break;
	}
	printf("total sum = %ld\n", (long)sum);
	quit(0);
	return 0;
}
static int
hfsc_get_sc2(int *argcp, char ***argvp, struct tc_service_curve *sc)
{
	char **argv = *argvp;
	int argc = *argcp;
	unsigned int umax = 0, dmax = 0, rate = 0;

	if (matches(*argv, "umax") == 0) {
		NEXT_ARG();
		if (get_size(&umax, *argv) < 0) {
			explain1("umax");
			return -1;
		}
		NEXT_ARG();
	}

	if (matches(*argv, "dmax") == 0) {
		NEXT_ARG();
		if (get_usecs(&dmax, *argv) < 0) {
			explain1("dmax");
			return -1;
		}
		NEXT_ARG();
	}

	if (matches(*argv, "rate") == 0) {
		NEXT_ARG();
		if (get_rate(&rate, *argv) < 0) {
			explain1("rate");
			return -1;
		}
	} else
		return -1;

	if (umax != 0 && dmax == 0) {
		fprintf(stderr, "HFSC: umax given but dmax is zero.\n");
		return -1;
	}

	if (dmax != 0 && ceil(umax * 1000000.0 / dmax) > rate) {
		/*
		 * concave curve, slope of first segment is umax/dmax,
		 * intersection is at dmax
		 */
		sc->m1 = ceil(umax * 1000000.0 / dmax); /* in bps */
		sc->d  = dmax;
		sc->m2 = rate;
	} else {
		/*
		 * convex curve, slope of first segment is 0, intersection
		 * is at dmax - umax / rate
		 */
		sc->m1 = 0;
		sc->d  = ceil(dmax - umax * 1000000.0 / rate); /* in usec */
		sc->m2 = rate;
	}

	*argvp = argv;
	*argcp = argc;
	return 0;
}
int main(int argc, char* argv[]) {
    if(argc < 2) {
        usage(argv[0]);
    }

    int num_threads = 2;
    if(argc > 2) {
        for(int i = 0; i < argc; i++) {
            if(strcmp(argv[i], "-t") == 0 && argc > i+1) {
                num_threads = atoi(argv[i+1]);
            }
        }
    }
    omp_set_num_threads(num_threads);

    // Open files
    FILE* input_file = fopen(argv[1], "r");
    if(input_file == NULL) {
        usage(argv[0]);
    }

    // Read the matrix
    int dim = 0;
    fscanf(input_file, "%u\n", &dim);
    int mat[dim][dim];
    int element = 0;
    for(int i=0; i<dim; i++) {
        for(int j=0; j<dim; j++) {
            if (j != (dim-1))
                fscanf(input_file, "%d\t", &element);
            else
                fscanf(input_file, "%d\n",&element);
            mat[i][j] = element;
        }
    }

#ifdef _PRINT_INFO
    // Print the matrix
    printf("Input matrix [%d]\n", dim);
    for(int i=0; i<dim; i++) {
        for(int j=0; j<dim; j++) {
            printf("%d\t", mat[i][j]);
        }
        printf("\n");
    }
#endif

    // Algorithm based on information obtained here:
    // http://stackoverflow.com/questions/2643908/getting-the-submatrix-with-maximum-sum
    long alg_start = get_usecs();

    // Compute vertical prefix sum
    int ps[dim][dim];

    for (int j=0; j<dim; j++) {
        ps[0][j] = mat[0][j];
        for (int i=1; i<dim; i++) {
            ps[i][j] = ps[i-1][j] + mat[i][j];
        }
    }

#ifdef _PRINT_INFO
    // Print the matrix
    printf("Vertical prefix sum matrix [%d]\n", dim);
    for(int i=0; i<dim; i++) {
        for(int j=0; j<dim; j++) {
            printf("%d\t", ps[i][j]);
        }
        printf("\n");
    }
#endif

    int max_sum = mat[0][0];
    int top = 0, left = 0, bottom = 0, right = 0;

    //Auxilliary variables
    int sum[dim];
    int pos[dim];
    int local_max;

    #pragma omp parallel for private(sum, pos, local_max) schedule(static, 10)
    for (int i=0; i<dim; i++) {
        for (int k=i; k<dim; k++) {
            // Kandane over all columns with the i..k rows
            clear(sum, dim);
            clear(pos, dim);
            local_max = 0;

            // We keep track of the position of the max value over each Kandane's execution
            // Notice that we do not keep track of the max value, but only its position
            sum[0] = ps[k][0] - (i==0 ? 0 : ps[i-1][0]);
            for (int j=1; j<dim; j++) {
                if (sum[j-1] > 0) {
                    sum[j] = sum[j-1] + ps[k][j] - (i==0 ? 0 : ps[i-1][j]);
                    pos[j] = pos[j-1];
                }
                else {
                    sum[j] = ps[k][j] - (i==0 ? 0 : ps[i-1][j]);
                    pos[j] = j;
                }
                if (sum[j] > sum[local_max]) {
                    local_max = j;
                }
            } //Kandane ends here

            #pragma omp critical
            if (sum[local_max] > max_sum) {
                // sum[local_max] is the new max value
                // the corresponding submatrix goes from rows i..k.
                // and from columns pos[local_max]..local_max
                max_sum = sum[local_max];
                top = i;
                left = pos[local_max];
                bottom = k;
                right = local_max;
            }
        }
    }

    // Compose the output matrix
    int outmat_row_dim = bottom - top + 1;
    int outmat_col_dim = right - left + 1;
    int outmat[outmat_row_dim][outmat_col_dim];
    for(int i=top, k=0; i<=bottom; i++, k++) {
        for(int j=left, l=0; j<=right ; j++, l++) {
            outmat[k][l] = mat[i][j];
        }
    }


    long alg_end = get_usecs();

    // Print output matrix
    printf("Sub-matrix [%dX%d] with max sum = %d, top = %d, bottom = %d, left = %d, right = %d\n", outmat_row_dim, outmat_col_dim, max_sum, top, bottom, left, right);
#ifdef _PRINT_INFO
    for(int i=0; i<outmat_row_dim; i++) {
        for(int j=0; j<outmat_col_dim; j++) {
            printf("%d\t", outmat[i][j]);
        }
        printf("\n");
    }
#endif

    printf("%s,arg(%s),%s,%f sec, threads: %d\n", argv[0], argv[1], "CHECK_NOT_PERFORMED", ((double)(alg_end-alg_start))/1000000, num_threads);

    // Release resources
    fclose(input_file);

    return 0;
}
Esempio n. 11
0
void 
max_sub_arr(
	int **mat, 
	int **ps, 
	int **outmat, 
	int dim) 
{
	int max_sum = mat[0][0];
    int top = 0, left = 0, bottom = 0, right = 0; 
	long alg_start, alg_end;

    /* auxilliary variables */
    int sum[dim];
    int pos[dim];
    int local_max;

    alg_start = get_usecs();
    
	/* precompute vertical prefix sum */
	precomp_matrix(mat, ps, dim);

#ifdef _PRINT_INFO
    /* Print the matrix */
    printf("[INFO] Vertical prefix sum matrix [%d]\n", dim);
	print_matrix(ps, dim, dim); 
#endif

    for (int i=0; i<dim; i++) {
        for (int k=i; k<dim; k++) {
            /* Kandane over all columns with the i..k rows */
            clear(sum, dim);
            clear(pos, dim);
            local_max = 0;

			/* we keep track of the position of the max value over each 
			 * Kandane's execution 
             * -> Notice that we do not keep track of the max value, 
			 *    but only its position */
            sum[0] = ps[k][0] - (i==0 ? 0 : ps[i-1][0]);
            for (int j=1; j<dim; j++) {
                if (sum[j-1] > 0){
                    sum[j] = sum[j-1] + ps[k][j] - (i==0 ? 0 : ps[i-1][j]);
                    pos[j] = pos[j-1];
                } 
                else {
                    sum[j] = ps[k][j] - (i==0 ? 0 : ps[i-1][j]);
                    pos[j] = j;
                }
                if (sum[j] > sum[local_max]){
                    local_max = j;
                }
            } /* Kandane ends here */

            if (sum[local_max] > max_sum) {
                /* sum[local_max] is the new max value
                 * the corresponding submatrix goes from rows i..k.
                 * and from columns pos[local_max]..local_max */

                max_sum = sum[local_max];
                top = i;
                left = pos[local_max];
                bottom = k;
                right = local_max;
            }
        }
    }

    /* FIXME - Question: Do we need to compute the output matrix? */
	/* Compose the output matrix */

	int outmat_row_dim = bottom - top + 1;
    int outmat_col_dim = right - left + 1;
    outmat = alloc_matrix(outmat_row_dim, outmat_col_dim);

    for(int i=top, k=0; i<=bottom; i++, k++) {
#ifdef OPTIMIZE
		int j=left;

		memcpy((void*) outmat[k], (void*)(&mat[i][j]),
				sizeof(**mat)*(right-left+1));
#else
        for(int j=left, l=0; j<=right; j++, l++) {
            outmat[k][l] = mat[i][j];
        }
#endif
    }

    alg_end = get_usecs();
	runtime = (double)(alg_end-alg_start)/1000000;

#ifdef PRINT_RESULT
	/* prints the usual result */
	printf("[RESULT] Sub-matrix [%dX%d] in %f sec\n",
			outmat_row_dim, outmat_col_dim, runtime);
	printf(" -> max sum=%d, left=%d, top=%d right=%d, bottom=%d\n",
			max_sum, left, top, right, bottom);
#endif

#ifdef _PRINT_INFO
    /* print output matrix */
	printf("[INFO] output matrix: \n");
	print_matrix(outmat, outmat_row_dim, outmat_col_dim); 
#endif

	free_matrix(outmat, outmat_row_dim); 
}
Esempio n. 12
0
int main(int argc, char *argv[])
{
	long sum = 0;
	unsigned finish = 5;
	int prio = 0;        
	int num_processors;
	int acc_type = READ;
	int opt;
	cpu_set_t cmask;
	int use_mmap = 0;
	int iterations = 0;
	int i;

	/*
	 * get command line options 
	 */
	while ((opt = getopt(argc, argv, "m:a:n:t:c:i:p:f:l:xh")) != -1) {
		switch (opt) {
		case 'm': /* set memory size */
			g_mem_size = 1024 * strtol(optarg, NULL, 0);
			break;
		case 'a': /* set access type */
			if (!strcmp(optarg, "read"))
				acc_type = READ;
			else if (!strcmp(optarg, "write"))
				acc_type = WRITE;
			else if (!strcmp(optarg, "rdwr"))
				acc_type = RDWR;
			else
				exit(1);
			break;
			
		case 'n': /* set access pattern */
			/* sequential */
			if( strcmp(optarg,"Seq") == 0 ) {
				g_indx = 0;
				g_next = (CACHE_LINE_SIZE/4);				
			}
			/* same bank */
#if P4080_MCTRL_INTRV_NONE
			else if( strcmp(optarg,"Row") == 0 ) {
				g_indx = 0;
				g_next = (CACHE_LINE_SIZE/4) * 1024;

			}
			/* diff bank */
			else if( strcmp(optarg,"Bank") == 0 ) {
				g_indx = 128*(CACHE_LINE_SIZE/4);
				g_next = (CACHE_LINE_SIZE/4) * 1024;
			}
#elif P4080_MCTRL_INTRV_CLCS
			else if( strcmp(optarg,"Row") == 0 ) {
				g_indx = 0;
				g_next = (CACHE_LINE_SIZE/4) * 1024 * 8;// 2^19
			}
			/* diff bank */
			else if( strcmp(optarg,"Bank") == 0 ) {
				g_indx = 256*(CACHE_LINE_SIZE/4); // 2^16
				g_next = (CACHE_LINE_SIZE/4) * 1024 * 8;// 2^19
			}
#endif
			else
				exit(1);
			break;

		case 't': /* set time in secs to run */
			finish = strtol(optarg, NULL, 0);
			break;
			
		case 'c': /* set CPU affinity */
			cpuid = strtol(optarg, NULL, 0);
			num_processors = sysconf(_SC_NPROCESSORS_CONF);
			CPU_ZERO(&cmask);
			CPU_SET(cpuid % num_processors, &cmask);
			if (sched_setaffinity(0, num_processors, &cmask) < 0)
				perror("error");
			else
				fprintf(stderr, "assigned to cpu %d\n", cpuid);
			break;
			
		case 'p': /* set priority */
			prio = strtol(optarg, NULL, 0);
			if (setpriority(PRIO_PROCESS, 0, prio) < 0)
				perror("error");
			else
				fprintf(stderr, "assigned priority %d\n", prio);
			break;
		case 'i': /* iterations */
			iterations = strtol(optarg, NULL, 0);
			break;
		case 'l': /* set label */
			g_label = strdup(optarg);
			break;
			
		case 'f': /* set file descriptor */
			g_fd = fopen(optarg, "a+");
			if (g_fd == NULL) 
				perror("error");
			break;
		case 'x': /* mapping to /dev/mem. !! DANGEROUS !! */
			use_mmap = 1;
			break;
		case 'h': 
			usage(argc, argv);
			break;
		}
	}

	g_indx *= cpuid;

	/*
	 * allocate contiguous region of memory 
	 */ 
	if (use_mmap) {
		/* open /dev/mem for accessing memory in physical addr. */
		int fd = -1;
		unsigned long offset;

		fprintf(stderr, "Use mmap| g_indx: 0x%x g_next: 0x%x\n", g_indx, g_next);
		fd = open("/dev/mem", O_RDWR | O_SYNC);
		if(fd == -1) {
			fprintf(stderr, "ERROR Opening /dev/mem\n");	
			exit(1);
		} 
		/* offset variable is used to allocate each cpu to a different offset 
		   from each other */
		offset = ADDR_2ND_RANK; /*  + cpuid*g_mem_size;*/
		fprintf(stderr, "offset: %p\n", (void *)offset);
		/* use mmap to allocate each cpu to the specific address in memory */
		g_mem_ptr = (int *)mmap(NULL, g_mem_size, PROT_READ|PROT_WRITE, 
					MAP_SHARED, fd, offset);
		if(g_mem_ptr == NULL) {
			fprintf(stderr, "could not allocate memarea");
			exit(1);
		}
		fprintf(stderr, "mmap was successful: addr=%p\n", g_mem_ptr);
	} else {
		printf("Use standard malloc\n");
		g_mem_ptr = (int *)malloc(g_mem_size);
	}

	for (i = 0; i < g_mem_size / sizeof(int); i++)
		g_mem_ptr[i] = i;

	memset((char *)g_mem_ptr, 1, g_mem_size);
	fprintf(stderr, "VADDR: %p-%p\n", (char *)g_mem_ptr, (char *)g_mem_ptr + g_mem_size);

	/* print experiment info before starting */
	printf("memsize=%d KB, type=%s, cpuid=%d\n",
	       g_mem_size/1024,
	       ((acc_type==READ) ?"read":
		(acc_type==WRITE)? "write" :
		(acc_type==RDWR) ? "rdwr" : "worst"),
		cpuid);
	printf("stop at %d\n", finish);

	/* set signals to terminate once time has been reached */
	signal(SIGINT, &quit);
	if (finish > 0) {
		signal(SIGALRM, &quit);
		alarm(finish);
	}

	/*
	 * actual memory access
	 */
	g_start = get_usecs();
	for (i=0;; i++) {
		switch (acc_type) {
		case READ:
			sum += bench_read();
			break;
		case WRITE:
			sum += bench_write();
			break;
		case RDWR:
			sum += bench_rdwr();
			break;
		}

		if (iterations > 0 && i >= iterations)
			break;
	}
	printf("total sum = %ld\n", sum);
	quit(0);
}