double lcod() { // Allocate patches float *patches = (float*)malloc(N*K*sizeof(float)); // Allocate patches_sr float *patches_sr = (float*)malloc(N*F*sizeof(float)); // Allocate LR dictionary float *dict_lr = (float*)malloc(K*C*sizeof(float)); // Allocate HR dictionary float *dict_hr = (float*)malloc(C*F*sizeof(float)); // Allocate S matrix float *s = (float*)malloc(C*C*sizeof(float)); // Allocate B float *b = (float*)malloc(N*C*sizeof(float)); // Allocate Z float *z = (float*)calloc(N*C, sizeof(float)); // Allocate Z_hat float *z_hat = (float*)malloc(N*C*sizeof(float)); // Allocate tmp float *tmp = (float*)malloc(N*C*sizeof(float)); // Perform Coordinate Descent double start_time = omp_get_wtime(); dot(patches, dict_lr, b, N, K, C); for (int t=0; t<T; t++) { st(b, z_hat, N, C, THRESH); int k; if (t > 1) { sub(z_hat, z, tmp, N, C); k = greedy_k(tmp, N, C); outer_update(tmp, s, b, N, C, k); } else { k = greedy_k(z_hat, N, C); outer_update(z_hat, s, b, N, C, k); } slice_update(z, z_hat, N, C, k); } st(b, z, N, C, THRESH); // Project back to HR space dot(z, dict_hr, patches_sr, N, C, F); double duration = omp_get_wtime(); // De-allocate free(patches); free(patches_sr); free(dict_lr); free(dict_hr); free(s); free(b); free(z); free(z_hat); free(tmp); return duration; }
int main (int argc, char *argv[]) { void inidat(); float ***array; /* array for grid */ int taskid, /* this task's unique id */ numtasks, /* number of tasks */ averow,rows,offset,extra, /* for sending rows of data */ dest, source, /* to - from for message send-receive */ left,right, /* neighbor tasks */ msgtype, /* for message types */ rc,start,end, /* misc */ i,x,y,z,it,size,t_sqrt; /* loop variables */ MPI_Status status; MPI_Datatype dt,dt2; MPI_Request req, req2,req3,req4,req5; double t1,t2; /* First, find out my taskid and how many tasks are running */ MPI_Init(&argc,&argv); MPI_Comm_size(MPI_COMM_WORLD,&numtasks); MPI_Comm_rank(MPI_COMM_WORLD,&taskid); /*Set number of threads */ omp_set_num_threads(atoi(argv[1])); // Use n threads for all consecutive parallel regions omp_set_nested(1); if (taskid == 0) { //printf("Grid size: X= %d Y= %d Time steps= %d\n",NXPROB,NYPROB,STEPS); t1 = MPI_Wtime(); } i = 0; while(i*i < (NXPROB*NYPROB)/numtasks) i++; size = i; i = 0; while(i*i<numtasks) i++; t_sqrt = i; MPI_Type_contiguous(size+2,MPI_FLOAT, &dt); MPI_Type_commit(&dt); MPI_Type_vector(size+2,1,size+2,MPI_FLOAT,&dt2); MPI_Type_commit(&dt2); array = malloc(2*sizeof(float**)); for (i = 0;i<2;i++){ array[i] = malloc((2+size)*sizeof(float*)); array[i][0] = malloc(((2+size)*(2+size))*sizeof(float)); for (x = 1;x<2+size;x++){ array[i][x] = &(array[i][0][x*(2+size)]); } } for (z=0; z<2; z++){ for (x=0; x<2+size; x++){ for (y=0; y<2+size; y++){ array[z][x][y] = 0.0; } } } z = 0; inidat(NXPROB,NYPROB,array[z],size*(taskid/t_sqrt),size*(taskid%t_sqrt),size); for (i = 1; i <= STEPS; i++) { if (taskid/t_sqrt != 0) //not first row { MPI_Isend(array[z][1],1,dt,taskid-t_sqrt,100, MPI_COMM_WORLD, &req); MPI_Irecv(array[z][0],1,dt,taskid-t_sqrt,100, MPI_COMM_WORLD, &req2); } if (taskid/t_sqrt != t_sqrt-1) //not last row { MPI_Isend(array[z][size],1,dt,taskid+t_sqrt,100, MPI_COMM_WORLD, &req); MPI_Irecv(array[z][size+1],1,dt,taskid+t_sqrt,100, MPI_COMM_WORLD, &req3); } if(taskid%t_sqrt != 0) //not last column { MPI_Isend(&array[z][0][1],1,dt2,taskid-1,100, MPI_COMM_WORLD, &req); MPI_Irecv(&array[z][0][0],1,dt2,taskid-1,100, MPI_COMM_WORLD, &req4); } if(taskid%t_sqrt != t_sqrt-1) //not last column { MPI_Isend(&array[z][0][size],1,dt2,taskid+1,100, MPI_COMM_WORLD, &req); MPI_Irecv(&array[z][0][size+1],1,dt2,taskid+1,100, MPI_COMM_WORLD, &req5); } inner_update(size,array[z],array[1-z]); if (taskid/t_sqrt != 0) MPI_Wait(&req2,&status); if (taskid/t_sqrt != t_sqrt-1) MPI_Wait(&req3,&status); if(taskid%t_sqrt != 0) MPI_Wait(&req4,&status); if(taskid%t_sqrt != t_sqrt-1) MPI_Wait(&req5,&status); outer_update(size,taskid,t_sqrt,array[z],array[1-z]); z = 1-z; } if (taskid == 0){ t2 = MPI_Wtime(); printf("MPI_Wtime measured: %1.2f\n", t2-t1); } for (i = 0;i<2;i++){ free(array[i][0]); free(array[i]); } free(array); MPI_Type_free(&dt); MPI_Type_free(&dt2); MPI_Finalize(); }