// check sizes of send and recv buffers and adjust, if necessary void check_buff_size(void) { int i, j, max_send, max_comm, max_recv; for (max_send = max_comm = max_recv = i = 0; i < 3; i++) { if (nonblocking) { if (s_buf_num[i] > max_send) max_send = s_buf_num[i]; } else for (j = 0; j < num_comm_partners[i]; j++) if (send_size[i][j] > max_send) max_send = send_size[i][j]; if (num_comm_partners[i] > max_comm) max_comm = num_comm_partners[i]; if (r_buf_num[i] > max_recv) max_recv = r_buf_num[i]; } if (max_send > s_buf_size) { s_buf_size = (int) (2.0*((double) max_send)); free(send_buff); send_buff = (double *) ma_malloc(s_buf_size*sizeof(double), __FILE__, __LINE__); } if (max_recv > r_buf_size) { r_buf_size = (int) (2.0*((double) max_recv)); free(recv_buff); recv_buff = (double *) ma_malloc(r_buf_size*sizeof(double), __FILE__, __LINE__); } if (max_comm > max_num_req) { free(request); max_num_req = (int) (2.0*((double) max_comm)); request = (MPI_Request *) ma_malloc(max_num_req*sizeof(MPI_Request), __FILE__, __LINE__); if (nonblocking) { free(s_req); s_req = (MPI_Request *) ma_malloc(max_num_req*sizeof(MPI_Request), __FILE__, __LINE__); } } }
static int ssl_thread_init() { int i, max= CRYPTO_num_locks(); if (LOCK_crypto == NULL) { if (!(LOCK_crypto= (pthread_mutex_t *)ma_malloc(sizeof(pthread_mutex_t) * max, MYF(0)))) return 1; for (i=0; i < max; i++) pthread_mutex_init(&LOCK_crypto[i], NULL); } #if (OPENSSL_VERSION_NUMBER < 0x10000000) CRYPTO_set_id_callback(my_cb_threadid); #else CRYPTO_THREADID_set_callback(my_cb_threadid); #endif CRYPTO_set_locking_callback(my_cb_locking); return 0; }
// Routines to add and delete entries from the communication list that is // used to exchange values for ghost cells. void add_comm_list(int dir, int block_f, int pe, int fcase, int pos, int pos1) { int i, j, s_len, r_len, *tmp; /* set indexes for send and recieve to determine length of message: * for example, if we send a whole face to a quarter face, we will * recieve a message sent from a quarter face to a whole face and * use 2 as index for the send and 3 for the recv. * We can use same index except for offset */ if (fcase >= 10) /* +- direction encoded in fcase */ i = fcase - 10; else i = fcase; switch (i) { case 0: s_len = r_len = comm_vars*msg_len[dir][0]; break; case 1: s_len = r_len = comm_vars*msg_len[dir][1]; break; case 2: case 3: case 4: case 5: s_len = comm_vars*msg_len[dir][2]; r_len = comm_vars*msg_len[dir][3]; break; case 6: case 7: case 8: case 9: s_len = comm_vars*msg_len[dir][3]; r_len = comm_vars*msg_len[dir][2]; break; } for (i = 0; i < num_comm_partners[dir]; i++) if (comm_partner[dir][i] >= pe) break; /* i is being used below as an index where information about this * block should go */ if (i < num_comm_partners[dir] && comm_partner[dir][i] == pe) { send_size[dir][i] += s_len; recv_size[dir][i] += r_len; for (j = num_comm_partners[dir]-1; j > i; j--) comm_index[dir][j]++; comm_num[dir][i]++; } else { // make sure arrays are long enough // move stuff i and above up one if (num_comm_partners[dir] == max_comm_part[dir]) { max_comm_part[dir] = (int)(2.0*((double) (num_comm_partners[dir]+1))); tmp = (int *) ma_malloc(max_comm_part[dir]*sizeof(int), __FILE__, __LINE__); for (j = 0; j < i; j++) tmp[j] = comm_partner[dir][j]; for (j = i; j < num_comm_partners[dir]; j++) tmp[j+1] = comm_partner[dir][j]; free(comm_partner[dir]); comm_partner[dir] = tmp; tmp = (int *) ma_malloc(max_comm_part[dir]*sizeof(int), __FILE__, __LINE__); for (j = 0; j < i; j++) tmp[j] = send_size[dir][j]; for (j = i; j < num_comm_partners[dir]; j++) tmp[j+1] = send_size[dir][j]; free(send_size[dir]); send_size[dir] = tmp; tmp = (int *) ma_malloc(max_comm_part[dir]*sizeof(int), __FILE__, __LINE__); for (j = 0; j < i; j++) tmp[j] = recv_size[dir][j]; for (j = i; j < num_comm_partners[dir]; j++) tmp[j+1] = recv_size[dir][j]; free(recv_size[dir]); recv_size[dir] = tmp; tmp = (int *) ma_malloc(max_comm_part[dir]*sizeof(int), __FILE__, __LINE__); for (j = 0; j <= i; j++) // Note that this one is different tmp[j] = comm_index[dir][j]; for (j = i; j < num_comm_partners[dir]; j++) tmp[j+1] = comm_index[dir][j] + 1; free(comm_index[dir]); comm_index[dir] = tmp; tmp = (int *) ma_malloc(max_comm_part[dir]*sizeof(int), __FILE__, __LINE__); for (j = 0; j < i; j++) tmp[j] = comm_num[dir][j]; for (j = i; j < num_comm_partners[dir]; j++) tmp[j+1] = comm_num[dir][j]; free(comm_num[dir]); comm_num[dir] = tmp; } else { for (j = num_comm_partners[dir]; j > i; j--) { comm_partner[dir][j] = comm_partner[dir][j-1]; send_size[dir][j] = send_size[dir][j-1]; recv_size[dir][j] = recv_size[dir][j-1]; comm_index[dir][j] = comm_index[dir][j-1] + 1; comm_num[dir][j] = comm_num[dir][j-1]; } } if (i == num_comm_partners[dir]) if (i == 0) comm_index[dir][i] = 0; else comm_index[dir][i] = comm_index[dir][i-1] + comm_num[dir][i-1]; num_comm_partners[dir]++; comm_partner[dir][i] = pe; send_size[dir][i] = s_len; recv_size[dir][i] = r_len; comm_num[dir][i] = 1; // still have to put info into arrays } if ((num_cases[dir]+1) > max_num_cases[dir]) { max_num_cases[dir] = (int)(2.0*((double) (num_cases[dir]+1))); tmp = (int *) ma_malloc(max_num_cases[dir]*sizeof(int), __FILE__, __LINE__); for (j = 0; j < num_cases[dir]; j++) tmp[j] = comm_block[dir][j]; free(comm_block[dir]); comm_block[dir] = tmp; tmp = (int *) ma_malloc(max_num_cases[dir]*sizeof(int), __FILE__, __LINE__); for (j = 0; j < num_cases[dir]; j++) tmp[j] = comm_face_case[dir][j]; free(comm_face_case[dir]); comm_face_case[dir] = tmp; tmp = (int *) ma_malloc(max_num_cases[dir]*sizeof(int), __FILE__, __LINE__); for (j = 0; j < num_cases[dir]; j++) tmp[j] = comm_pos[dir][j]; free(comm_pos[dir]); comm_pos[dir] = tmp; tmp = (int *) ma_malloc(max_num_cases[dir]*sizeof(int), __FILE__, __LINE__); for (j = 0; j < num_cases[dir]; j++) tmp[j] = comm_pos1[dir][j]; free(comm_pos1[dir]); comm_pos1[dir] = tmp; tmp = (int *) ma_malloc(max_num_cases[dir]*sizeof(int), __FILE__, __LINE__); for (j = 0; j < num_cases[dir]; j++) tmp[j] = comm_send_off[dir][j]; free(comm_send_off[dir]); comm_send_off[dir] = tmp; tmp = (int *) ma_malloc(max_num_cases[dir]*sizeof(int), __FILE__, __LINE__); for (j = 0; j < num_cases[dir]; j++) tmp[j] = comm_recv_off[dir][j]; free(comm_recv_off[dir]); comm_recv_off[dir] = tmp; } if (comm_index[dir][i] == num_cases[dir]) { // at end comm_block[dir][num_cases[dir]] = block_f; comm_face_case[dir][num_cases[dir]] = fcase; comm_pos[dir][num_cases[dir]] = pos; comm_pos1[dir][num_cases[dir]] = pos1; comm_send_off[dir][num_cases[dir]] = s_buf_num[dir]; comm_recv_off[dir][num_cases[dir]] = r_buf_num[dir]; } else { for (j = num_cases[dir]; j > comm_index[dir][i]+comm_num[dir][i]-1; j--){ comm_block[dir][j] = comm_block[dir][j-1]; comm_face_case[dir][j] = comm_face_case[dir][j-1]; comm_pos[dir][j] = comm_pos[dir][j-1]; comm_pos1[dir][j] = comm_pos1[dir][j-1]; comm_send_off[dir][j] = comm_send_off[dir][j-1] + s_len; comm_recv_off[dir][j] = comm_recv_off[dir][j-1] + r_len; } for (j = comm_index[dir][i]+comm_num[dir][i]-1; j >= comm_index[dir][i]; j--) if (j == comm_index[dir][i] || comm_pos[dir][j-1] < pos || (comm_pos[dir][j-1] == pos && comm_pos1[dir][j-1] < pos1)) { comm_block[dir][j] = block_f; comm_face_case[dir][j] = fcase; comm_pos[dir][j] = pos; comm_pos1[dir][j] = pos1; if (j == num_cases[dir]) { comm_send_off[dir][j] = s_buf_num[dir]; comm_recv_off[dir][j] = r_buf_num[dir]; } // else comm_[send,recv]_off[j] values are correct break; } else { comm_block[dir][j] = comm_block[dir][j-1]; comm_face_case[dir][j] = comm_face_case[dir][j-1]; comm_pos[dir][j] = comm_pos[dir][j-1]; comm_pos1[dir][j] = comm_pos1[dir][j-1]; comm_send_off[dir][j] = comm_send_off[dir][j-1] + s_len; comm_recv_off[dir][j] = comm_recv_off[dir][j-1] + r_len; } } num_cases[dir]++; s_buf_num[dir] += s_len; r_buf_num[dir] += r_len; }
// Initialize the problem and setup initial blocks. void init(void) { int n, var, i, j, k, l, m, o, size, dir, i1, i2, j1, j2, k1, k2, ib, jb, kb; int start[num_pes], pos[3][num_pes], pos1[npx][npy][npz], set, num, npx1, npy1, npz1, pes, fact, fac[25], nfac, f; block *bp; tol = pow(10.0, ((double) -error_tol)); p2[0] = p8[0] = 1; for (i = 0; i < (num_refine+1); i++) { p8[i+1] = p8[i]*8; p2[i+1] = p2[i]*2; sorted_index[i] = 0; } sorted_index[num_refine+1] = 0; block_start[0] = 0; local_max_b = global_max_b = init_block_x*init_block_y*init_block_z; num = num_pes*global_max_b; for (i = 1; i <= num_refine; i++) { block_start[i] = block_start[i-1] + num; num *= 8; num_blocks[i] = 0; local_num_blocks[i] = 0; } /* initialize for communication arrays, which are initialized below */ zero_comm_list(); x_block_half = x_block_size/2; y_block_half = y_block_size/2; z_block_half = z_block_size/2; if (!code) { /* for E/W (X dir) messages: 0: whole -> whole (7), 1: whole -> whole (27), 2: whole -> quarter, 3: quarter -> whole */ msg_len[0][0] = msg_len[0][1] = y_block_size*z_block_size; msg_len[0][2] = msg_len[0][3] = y_block_half*z_block_half; /* for N/S (Y dir) messages */ msg_len[1][0] = x_block_size*z_block_size; msg_len[1][1] = (x_block_size+2)*z_block_size; msg_len[1][2] = msg_len[1][3] = x_block_half*z_block_half; /* for U/D (Z dir) messages */ msg_len[2][0] = x_block_size*y_block_size; msg_len[2][1] = (x_block_size+2)*(y_block_size+2); msg_len[2][2] = msg_len[2][3] = x_block_half*y_block_half; } else if (code == 1) { /* for E/W (X dir) messages */ msg_len[0][0] = msg_len[0][1] = (y_block_size+2)*(z_block_size+2); msg_len[0][2] = (y_block_half+1)*(z_block_half+1); msg_len[0][3] = (y_block_half+2)*(z_block_half+2); /* for N/S (Y dir) messages */ msg_len[1][0] = msg_len[1][1] = (x_block_size+2)*(z_block_size+2); msg_len[1][2] = (x_block_half+1)*(z_block_half+1); msg_len[1][3] = (x_block_half+2)*(z_block_half+2); /* for U/D (Z dir) messages */ msg_len[2][0] = msg_len[2][1] = (x_block_size+2)*(y_block_size+2); msg_len[2][2] = (x_block_half+1)*(y_block_half+1); msg_len[2][3] = (x_block_half+2)*(y_block_half+2); } else { /* for E/W (X dir) messages */ msg_len[0][0] = msg_len[0][1] = (y_block_size+2)*(z_block_size+2); msg_len[0][2] = (y_block_half+1)*(z_block_half+1); msg_len[0][3] = (y_block_size+2)*(z_block_size+2); /* for N/S (Y dir) messages */ msg_len[1][0] = msg_len[1][1] = (x_block_size+2)*(z_block_size+2); msg_len[1][2] = (x_block_half+1)*(z_block_half+1); msg_len[1][3] = (x_block_size+2)*(z_block_size+2); /* for U/D (Z dir) messages */ msg_len[2][0] = msg_len[2][1] = (x_block_size+2)*(y_block_size+2); msg_len[2][2] = (x_block_half+1)*(y_block_half+1); msg_len[2][3] = (x_block_size+2)*(y_block_size+2); } /* Determine position of each core in initial mesh */ npx1 = npx; npy1 = npy; npz1 = npz; for (i = 0; i < 3; i++) for (j = 0; j < num_pes; j++) pos[i][j] = 0; nfac = factor(num_pes, fac); max_num_req = num_pes; request = (MPI_Request *) ma_malloc(max_num_req*sizeof(MPI_Request), __FILE__, __LINE__); if (nonblocking) s_req = (MPI_Request *) ma_malloc(max_num_req*sizeof(MPI_Request), __FILE__, __LINE__); pes = 1; start[0] = 0; num = num_pes; comms = (MPI_Comm *) ma_malloc((nfac+1)*sizeof(MPI_Comm), __FILE__, __LINE__); me = (int *) ma_malloc((nfac+1)*sizeof(int), __FILE__, __LINE__); np = (int *) ma_malloc((nfac+1)*sizeof(int), __FILE__, __LINE__); comms[0] = MPI_COMM_WORLD; me[0] = my_pe; np[0] = num_pes; // initialize for (n = 0, i = nfac; i > 0; i--, n++) { fact = fac[i-1]; dir = find_dir(fact, npx1, npy1, npz1); if (dir == 0) npx1 /= fact; else if (dir == 1) npy1 /= fact; else npz1 /= fact; num /= fact; set = me[n]/num; MPI_Comm_split(comms[n], set, me[n], &comms[n+1]); MPI_Comm_rank(comms[n+1], &me[n+1]); MPI_Comm_size(comms[n+1], &np[n+1]); for (j = pes-1; j >= 0; j--) for (k = 0; k < fact; k++) { m = j*fact + k; if (!k) start[m] = start[j]; else start[m] = start[m-1] + num; for (l = start[m], o = 0; o < num; l++, o++) pos[dir][l] = pos[dir][l]*fact + k; } pes *= fact; } for (i = 0; i < num_pes; i++) pos1[pos[0][i]][pos[1][i]][pos[2][i]] = i; max_active_block = init_block_x*init_block_y*init_block_z; num_active = max_active_block; global_active = num_active*num_pes; num_parents = max_active_parent = 0; size = p2[num_refine+1]; /* block size is p2[num_refine+1-level] * smallest block is size p2[1], so can find * its center */ mesh_size[0] = npx*init_block_x*size; max_mesh_size = mesh_size[0]; mesh_size[1] = npy*init_block_y*size; if (mesh_size[1] > max_mesh_size) max_mesh_size = mesh_size[1]; mesh_size[2] = npz*init_block_z*size; if (mesh_size[2] > max_mesh_size) max_mesh_size = mesh_size[2]; if ((num_pes+1) > max_mesh_size) max_mesh_size = num_pes + 1; bin = (int *) ma_malloc(max_mesh_size*sizeof(int), __FILE__, __LINE__); gbin = (int *) ma_malloc(max_mesh_size*sizeof(int), __FILE__, __LINE__); if (stencil == 7) f = 0; else f = 1; for (o = n = k1 = k = 0; k < npz; k++) for (k2 = 0; k2 < init_block_z; k1++, k2++) for (j1 = j = 0; j < npy; j++) for (j2 = 0; j2 < init_block_y; j1++, j2++) for (i1 = i = 0; i < npx; i++) for (i2 = 0; i2 < init_block_x; i1++, i2++, n++) { m = pos1[i][j][k]; if (m == my_pe) { bp = &blocks[o]; bp->level = 0; bp->number = n; bp->parent = -1; bp->cen[0] = i1*size + size/2; bp->cen[1] = j1*size + size/2; bp->cen[2] = k1*size + size/2; add_sorted_list(o, n, 0); for (var = 0; var < num_vars; var++) for (ib = 1; ib <= x_block_size; ib++) for (jb = 1; jb <= y_block_size; jb++) for (kb = 1; kb <= z_block_size; kb++) bp->array[var][ib][jb][kb] = ((double) rand())/((double) RAND_MAX); if (i2 == 0) if (i == 0) { /* 0 boundary */ bp->nei_level[0] = -2; bp->nei[0][0][0] = 0; } else { /* boundary with neighbor core */ bp->nei_level[0] = 0; bp->nei[0][0][0] = -1 - pos1[i-1][j][k]; add_comm_list(0, o, pos1[i-1][j][k], 0+f, bp->cen[2]*mesh_size[1]+bp->cen[1], bp->cen[0] - size/2); } else { /* neighbor on core */ bp->nei_level[0] = 0; bp->nei[0][0][0] = o - 1; } bp->nei_refine[0] = 0; if (i2 == (init_block_x - 1)) if (i == (npx - 1)) { /* 1 boundary */ bp->nei_level[1] = -2; bp->nei[1][0][0] = 0; } else { /* boundary with neighbor core */ bp->nei_level[1] = 0; bp->nei[1][0][0] = -1 - pos1[i+1][j][k]; add_comm_list(0, o, pos1[i+1][j][k], 10+f, bp->cen[2]*mesh_size[1]+bp->cen[1], bp->cen[0] + size/2); } else { /* neighbor on core */ bp->nei_level[1] = 0; bp->nei[1][0][0] = o + 1; } bp->nei_refine[1] = 0; if (j2 == 0) if (j == 0) { /* 0 boundary */ bp->nei_level[2] = -2; bp->nei[2][0][0] = 0; } else { /* boundary with neighbor core */ bp->nei_level[2] = 0; bp->nei[2][0][0] = -1 - pos1[i][j-1][k]; add_comm_list(1, o, pos1[i][j-1][k], 0+f, bp->cen[2]*mesh_size[0]+bp->cen[0], bp->cen[1] - size/2); } else { /* neighbor on core */ bp->nei_level[2] = 0; bp->nei[2][0][0] = o - init_block_x; } bp->nei_refine[2] = 0; if (j2 == (init_block_y - 1)) if (j == (npy - 1)) { /* 1 boundary */ bp->nei_level[3] = -2; bp->nei[3][0][0] = 0; } else { /* boundary with neighbor core */ bp->nei_level[3] = 0; bp->nei[3][0][0] = -1 - pos1[i][j+1][k]; add_comm_list(1, o, pos1[i][j+1][k], 10+f, bp->cen[2]*mesh_size[0]+bp->cen[0], bp->cen[1] + size/2); } else { /* neighbor on core */ bp->nei_level[3] = 0; bp->nei[3][0][0] = o + init_block_x; } bp->nei_refine[3] = 0; if (k2 == 0) if (k == 0) { /* 0 boundary */ bp->nei_level[4] = -2; bp->nei[4][0][0] = 0; } else { /* boundary with neighbor core */ bp->nei_level[4] = 0; bp->nei[4][0][0] = -1 - pos1[i][j][k-1]; add_comm_list(2, o, pos1[i][j][k-1], 0+f, bp->cen[1]*mesh_size[0]+bp->cen[0], bp->cen[2] - size/2); } else { /* neighbor on core */ bp->nei_level[4] = 0; bp->nei[4][0][0] = o - init_block_x*init_block_y; } bp->nei_refine[4] = 0; if (k2 == (init_block_z - 1)) if (k == (npz - 1)) { /* 1 boundary */ bp->nei_level[5] = -2; bp->nei[5][0][0] = 0; } else { /* boundary with neighbor core */ bp->nei_level[5] = 0; bp->nei[5][0][0] = -1 - pos1[i][j][k+1]; add_comm_list(2, o, pos1[i][j][k+1], 10+f, bp->cen[1]*mesh_size[0]+bp->cen[0], bp->cen[2] + size/2); } else { /* neighbor on core */ bp->nei_level[5] = 0; bp->nei[5][0][0] = o + init_block_x*init_block_y; } bp->nei_refine[5] = 0; o++; } } check_buff_size(); for (var = 0; var < num_vars; var++) grid_sum[var] = check_sum(var); }