예제 #1
0
파일: bench.c 프로젝트: ulysseses/sr_exp
double lcod() {
	// Allocate patches
	float *patches = (float*)malloc(N*K*sizeof(float));
	// Allocate patches_sr
	float *patches_sr = (float*)malloc(N*F*sizeof(float));
	// Allocate LR dictionary
	float *dict_lr = (float*)malloc(K*C*sizeof(float));
	// Allocate HR dictionary
	float *dict_hr = (float*)malloc(C*F*sizeof(float));
	// Allocate S matrix
	float *s = (float*)malloc(C*C*sizeof(float));
	
	// Allocate B
	float *b = (float*)malloc(N*C*sizeof(float));
	// Allocate Z
	float *z = (float*)calloc(N*C, sizeof(float));
	// Allocate Z_hat
	float *z_hat = (float*)malloc(N*C*sizeof(float));
	// Allocate tmp
	float *tmp = (float*)malloc(N*C*sizeof(float));
	
	// Perform Coordinate Descent
	double start_time = omp_get_wtime();
	dot(patches, dict_lr, b, N, K, C);
	for (int t=0; t<T; t++) {
		st(b, z_hat, N, C, THRESH);
		int k;
		if (t > 1) {
			sub(z_hat, z, tmp, N, C);
			k = greedy_k(tmp, N, C);
			outer_update(tmp, s, b, N, C, k);
		} else {
			k = greedy_k(z_hat, N, C);
			outer_update(z_hat, s, b, N, C, k);
		}
		slice_update(z, z_hat, N, C, k);
	}
	st(b, z, N, C, THRESH);
	
	// Project back to HR space
	dot(z, dict_hr, patches_sr, N, C, F);
	double duration = omp_get_wtime();
	
	// De-allocate
	free(patches);
	free(patches_sr);
	free(dict_lr);
	free(dict_hr);
	free(s);
	free(b);
	free(z);
	free(z_hat);
	free(tmp);

	return duration;
}
int main (int argc, char *argv[])
{
	void inidat();
	float  ***array;        /* array for grid */
	int	taskid,                     /* this task's unique id */
		numtasks,                   /* number of tasks */
		averow,rows,offset,extra,   /* for sending rows of data */
		dest, source,               /* to - from for message send-receive */
		left,right,        /* neighbor tasks */
		msgtype,                    /* for message types */
		rc,start,end,               /* misc */
		i,x,y,z,it,size,t_sqrt;              /* loop variables */
	MPI_Status status;
   	MPI_Datatype dt,dt2; 
    MPI_Request req, req2,req3,req4,req5;
    double t1,t2;

/* First, find out my taskid and how many tasks are running */
   	MPI_Init(&argc,&argv);
   	MPI_Comm_size(MPI_COMM_WORLD,&numtasks);
   	MPI_Comm_rank(MPI_COMM_WORLD,&taskid);

   	/*Set number of threads */
	omp_set_num_threads(atoi(argv[1])); // Use n threads for all consecutive parallel regions
	omp_set_nested(1);

	if (taskid == 0)
   	{
   		//printf("Grid size: X= %d  Y= %d  Time steps= %d\n",NXPROB,NYPROB,STEPS);
   		t1 = MPI_Wtime();
   	}
   	i = 0;
   	while(i*i < (NXPROB*NYPROB)/numtasks)
   		i++;
   	size = i;
   	i = 0;
   	while(i*i<numtasks)
   		i++;
   	t_sqrt = i;
   	MPI_Type_contiguous(size+2,MPI_FLOAT, &dt); 
	MPI_Type_commit(&dt);
	MPI_Type_vector(size+2,1,size+2,MPI_FLOAT,&dt2);
	MPI_Type_commit(&dt2); 
	array = malloc(2*sizeof(float**));
	for (i = 0;i<2;i++){
		array[i] = malloc((2+size)*sizeof(float*));
		array[i][0] = malloc(((2+size)*(2+size))*sizeof(float));
		for (x = 1;x<2+size;x++){
			array[i][x] = &(array[i][0][x*(2+size)]);
		}
	}
	for (z=0; z<2; z++){
		for (x=0; x<2+size; x++){
			for (y=0; y<2+size; y++){
				array[z][x][y] = 0.0;
			}
		}
	}
	z = 0;
	inidat(NXPROB,NYPROB,array[z],size*(taskid/t_sqrt),size*(taskid%t_sqrt),size);
	for (i = 1; i <= STEPS; i++)
	{
		if (taskid/t_sqrt != 0) //not first row
		{
			MPI_Isend(array[z][1],1,dt,taskid-t_sqrt,100, MPI_COMM_WORLD, &req);
			MPI_Irecv(array[z][0],1,dt,taskid-t_sqrt,100, MPI_COMM_WORLD, &req2);
		}
		if (taskid/t_sqrt != t_sqrt-1) //not last row
		{
			MPI_Isend(array[z][size],1,dt,taskid+t_sqrt,100, MPI_COMM_WORLD, &req);
			MPI_Irecv(array[z][size+1],1,dt,taskid+t_sqrt,100, MPI_COMM_WORLD, &req3);
		}
		if(taskid%t_sqrt != 0) //not last column
		{
			MPI_Isend(&array[z][0][1],1,dt2,taskid-1,100, MPI_COMM_WORLD, &req);
			MPI_Irecv(&array[z][0][0],1,dt2,taskid-1,100, MPI_COMM_WORLD, &req4);
		}
		if(taskid%t_sqrt != t_sqrt-1) //not last column
		{
			MPI_Isend(&array[z][0][size],1,dt2,taskid+1,100, MPI_COMM_WORLD, &req);
			MPI_Irecv(&array[z][0][size+1],1,dt2,taskid+1,100, MPI_COMM_WORLD, &req5);
		}
		inner_update(size,array[z],array[1-z]);
		if (taskid/t_sqrt != 0) 
			MPI_Wait(&req2,&status);
		if (taskid/t_sqrt != t_sqrt-1) 
			MPI_Wait(&req3,&status);
		if(taskid%t_sqrt != 0) 
			MPI_Wait(&req4,&status);
		if(taskid%t_sqrt != t_sqrt-1) 
			MPI_Wait(&req5,&status);
		outer_update(size,taskid,t_sqrt,array[z],array[1-z]);
		z = 1-z;
	}
	if (taskid == 0){
		t2 = MPI_Wtime();
		printf("MPI_Wtime measured: %1.2f\n", t2-t1);
	} 
	for (i = 0;i<2;i++){
		free(array[i][0]);
		free(array[i]);
	}
	free(array);
	MPI_Type_free(&dt);
	MPI_Type_free(&dt2);
	MPI_Finalize();
}